Repository: tobymao/sqlglot Branch: main Commit: f7bb6bea86c1 Files: 264 Total size: 7.3 MB Directory structure: gitextract_0kn1s5v0/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── scripts/ │ │ ├── get_integration_test_params.py │ │ └── integration_tests_sync.sh │ └── workflows/ │ ├── benchmark-sqlglot.yml │ ├── package-publish.yml │ ├── package-test.yml │ └── run-integration-tests.yml ├── .gitignore ├── .gitmodules ├── .gitpod.yml ├── .pre-commit-config.yaml ├── AGENTS.md ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── benchmarks/ │ ├── __init__.py │ ├── compare.py │ ├── optimize.py │ └── parse.py ├── pdoc/ │ ├── cli.py │ └── templates/ │ └── module.html.jinja2 ├── posts/ │ ├── ast_primer.md │ ├── onboarding.md │ ├── python_sql_engine.md │ └── sql_diff.md ├── pyproject.toml ├── setup.cfg ├── setup.py ├── sqlglot/ │ ├── __init__.py │ ├── __main__.py │ ├── _typing.py │ ├── dialects/ │ │ ├── __init__.py │ │ ├── athena.py │ │ ├── bigquery.py │ │ ├── clickhouse.py │ │ ├── databricks.py │ │ ├── dialect.py │ │ ├── doris.py │ │ ├── dremio.py │ │ ├── drill.py │ │ ├── druid.py │ │ ├── duckdb.py │ │ ├── dune.py │ │ ├── exasol.py │ │ ├── fabric.py │ │ ├── hive.py │ │ ├── materialize.py │ │ ├── mysql.py │ │ ├── oracle.py │ │ ├── postgres.py │ │ ├── presto.py │ │ ├── prql.py │ │ ├── redshift.py │ │ ├── risingwave.py │ │ ├── singlestore.py │ │ ├── snowflake.py │ │ ├── solr.py │ │ ├── spark.py │ │ ├── spark2.py │ │ ├── sqlite.py │ │ ├── starrocks.py │ │ ├── tableau.py │ │ ├── teradata.py │ │ ├── trino.py │ │ └── tsql.py │ ├── diff.py │ ├── errors.py │ ├── executor/ │ │ ├── __init__.py │ │ ├── context.py │ │ ├── env.py │ │ ├── python.py │ │ └── table.py │ ├── expressions/ │ │ ├── __init__.py │ │ ├── aggregate.py │ │ ├── array.py │ │ ├── builders.py │ │ ├── constraints.py │ │ ├── core.py │ │ ├── datatypes.py │ │ ├── ddl.py │ │ ├── dml.py │ │ ├── functions.py │ │ ├── json.py │ │ ├── math.py │ │ ├── properties.py │ │ ├── query.py │ │ ├── string.py │ │ └── temporal.py │ ├── generator.py │ ├── helper.py │ ├── jsonpath.py │ ├── lineage.py │ ├── optimizer/ │ │ ├── __init__.py │ │ ├── annotate_types.py │ │ ├── canonicalize.py │ │ ├── eliminate_ctes.py │ │ ├── eliminate_joins.py │ │ ├── eliminate_subqueries.py │ │ ├── isolate_table_selects.py │ │ ├── merge_subqueries.py │ │ ├── normalize.py │ │ ├── normalize_identifiers.py │ │ ├── optimize_joins.py │ │ ├── optimizer.py │ │ ├── pushdown_predicates.py │ │ ├── pushdown_projections.py │ │ ├── qualify.py │ │ ├── qualify_columns.py │ │ ├── qualify_tables.py │ │ ├── resolver.py │ │ ├── scope.py │ │ ├── simplify.py │ │ └── unnest_subqueries.py │ ├── parser.py │ ├── parsers/ │ │ ├── __init__.py │ │ ├── athena.py │ │ ├── base.py │ │ ├── bigquery.py │ │ ├── clickhouse.py │ │ ├── databricks.py │ │ ├── doris.py │ │ ├── dremio.py │ │ ├── drill.py │ │ ├── druid.py │ │ ├── duckdb.py │ │ ├── dune.py │ │ ├── exasol.py │ │ ├── fabric.py │ │ ├── hive.py │ │ ├── materialize.py │ │ ├── mysql.py │ │ ├── oracle.py │ │ ├── postgres.py │ │ ├── presto.py │ │ ├── prql.py │ │ ├── redshift.py │ │ ├── risingwave.py │ │ ├── singlestore.py │ │ ├── snowflake.py │ │ ├── solr.py │ │ ├── spark.py │ │ ├── spark2.py │ │ ├── sqlite.py │ │ ├── starrocks.py │ │ ├── tableau.py │ │ ├── teradata.py │ │ ├── trino.py │ │ └── tsql.py │ ├── planner.py │ ├── py.typed │ ├── schema.py │ ├── serde.py │ ├── time.py │ ├── tokenizer_core.py │ ├── tokens.py │ ├── transforms.py │ ├── trie.py │ └── typing/ │ ├── __init__.py │ ├── bigquery.py │ ├── clickhouse.py │ ├── duckdb.py │ ├── hive.py │ ├── mysql.py │ ├── presto.py │ ├── redshift.py │ ├── snowflake.py │ ├── spark.py │ ├── spark2.py │ └── tsql.py ├── sqlglotc/ │ ├── MANIFEST.in │ ├── pyproject.toml │ └── setup.py └── tests/ ├── __init__.py ├── dialects/ │ ├── __init__.py │ ├── test_athena.py │ ├── test_bigquery.py │ ├── test_clickhouse.py │ ├── test_databricks.py │ ├── test_dialect.py │ ├── test_doris.py │ ├── test_dremio.py │ ├── test_drill.py │ ├── test_druid.py │ ├── test_duckdb.py │ ├── test_dune.py │ ├── test_exasol.py │ ├── test_fabric.py │ ├── test_hive.py │ ├── test_materialize.py │ ├── test_mysql.py │ ├── test_oracle.py │ ├── test_pipe_syntax.py │ ├── test_postgres.py │ ├── test_presto.py │ ├── test_prql.py │ ├── test_redshift.py │ ├── test_risingwave.py │ ├── test_singlestore.py │ ├── test_snowflake.py │ ├── test_solr.py │ ├── test_spark.py │ ├── test_sqlite.py │ ├── test_starrocks.py │ ├── test_tableau.py │ ├── test_teradata.py │ ├── test_trino.py │ └── test_tsql.py ├── fixtures/ │ ├── identity.sql │ ├── jsonpath/ │ │ ├── LICENSE │ │ └── cts.json │ ├── optimizer/ │ │ ├── annotate_functions.sql │ │ ├── annotate_types.sql │ │ ├── canonicalize.sql │ │ ├── eliminate_ctes.sql │ │ ├── eliminate_joins.sql │ │ ├── eliminate_subqueries.sql │ │ ├── isolate_table_selects.sql │ │ ├── merge_subqueries.sql │ │ ├── normalize.sql │ │ ├── normalize_identifiers.sql │ │ ├── optimize_joins.sql │ │ ├── optimizer.sql │ │ ├── pushdown_cte_alias_columns.sql │ │ ├── pushdown_predicates.sql │ │ ├── pushdown_projections.sql │ │ ├── qualify_columns.sql │ │ ├── qualify_columns__invalid.sql │ │ ├── qualify_columns__with_invisible.sql │ │ ├── qualify_columns_ddl.sql │ │ ├── qualify_tables.sql │ │ ├── quote_identifiers.sql │ │ ├── simplify.sql │ │ ├── tpc-ds/ │ │ │ └── tpc-ds.sql │ │ ├── tpc-h/ │ │ │ └── tpc-h.sql │ │ └── unnest_subqueries.sql │ ├── partial.sql │ └── pretty.sql ├── gen_fixtures.py ├── helpers.py ├── test_build.py ├── test_dialect_entry_points.py ├── test_dialect_imports.py ├── test_diff.py ├── test_docs.py ├── test_errors.py ├── test_executor.py ├── test_expressions.py ├── test_generator.py ├── test_helper.py ├── test_integration_loader.py ├── test_jsonpath.py ├── test_lineage.py ├── test_optimizer.py ├── test_parser.py ├── test_schema.py ├── test_serde.py ├── test_time.py ├── test_tokens.py ├── test_transforms.py └── test_transpile.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Before you file an issue** - Make sure you specify the "read" dialect eg. `parse_one(sql, read="spark")` - Make sure you specify the "write" dialect eg. `ast.sql(dialect="duckdb")` - Check if the issue still exists on main **Fully reproducible code snippet** Please include a fully reproducible code snippet or the input sql, dialect, and expected output. **Official Documentation** Please include links to official SQL documentation related to your issue. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/scripts/get_integration_test_params.py ================================================ #!/usr/bin/env python3 """ This script is intended to be used as part of a GitHub Actions workflow in order to decide if the integration tests should: a) be triggered at all b) if they should be triggered, should they be triggered for a subset of dialects or all dialects? The tests can be triggered manually by using the following directive in the PR description: /integration-tests To limit them to a certain dialect or dialects, you can specify: /integration-tests dialects=bigquery,duckdb If you specify nothing, a `git diff` will be performed between your PR branch and the base branch. If any files modified contain one of the SUPPORTED_DIALECTS in the filename, that dialect will be added to the list of dialects to test. If no files match, the integration tests will be skipped. Note that integration tests in the remote workflow are only implemented for a subset of dialects. If new ones are added, update the SUPPORTED_DIALECTS constant below. Each dialect is tested against itself (roundtrip) and duckdb (transpilation). Supplying a dialect not in this list will cause the tests to get skipped. """ import typing as t import os import sys import json import subprocess from pathlib import Path TRIGGER = "/integration-test" SUPPORTED_DIALECTS = ["duckdb", "bigquery", "snowflake"] def get_dialects_from_manual_trigger(trigger: str) -> t.Set[str]: """ Takes a trigger string and parses out the supported dialects /integration_test -> [] /integration_test dialects=bigquery -> ["bigquery"] /integration_test dialects=bigquery,duckdb -> ["bigquery","duckdb"] /integration_test dialects=exasol,duckdb -> ["duckdb"] """ if not trigger.startswith(TRIGGER): raise ValueError(f"Invalid trigger: {trigger}") # trim off start at first space (to cover both /integration-test and /integration-tests) trigger_parts = trigger.split(" ")[1:] print(f"Parsing trigger args: {trigger_parts}") dialects: t.List[str] = [] for part in trigger_parts: # try to parse key=value pairs maybe_kv = part.split("=", maxsplit=1) if len(maybe_kv) >= 2: k, v = maybe_kv[0], maybe_kv[1] if k.lower().startswith("dialect"): dialects.extend([d.lower().strip() for d in v.split(",")]) return {d for d in dialects if d in SUPPORTED_DIALECTS} def get_dialects_from_git(base_ref: str, current_ref: str) -> t.Set[str]: """ Takes two git refs and runs `git diff --name-only ` If any of the returned file names contain a dialect from SUPPORTED_DIALECTS as a substring, that dialect is included in the returned set """ print(f"Checking for files changed between '{base_ref}' and '{current_ref}'") result = subprocess.run( ["git", "diff", "--name-only", base_ref, current_ref], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) output = result.stdout.decode("utf8") if result.returncode != 0: raise ValueError(f"Git process failed with exit code {result.returncode}:\n{output}") print(f"Git output:\n{output}") matching_dialects = [] for l in output.splitlines(): l = l.strip().lower() matching_dialects.extend([d for d in SUPPORTED_DIALECTS if d in l]) return set(matching_dialects) if __name__ == "__main__": github_event_path = os.environ.get("GITHUB_EVENT_PATH") github_output = os.environ.get("GITHUB_OUTPUT") if not os.environ.get("GITHUB_ACTIONS") or not github_event_path or not github_output: print("This script needs to run within GitHub Actions") sys.exit(1) github_event_path = Path(github_event_path) github_output = Path(github_output) with github_event_path.open("r") as f: event: t.Dict[str, t.Any] = json.load(f) print("Handling event: \n" + json.dumps(event, indent=2)) # for pull_request events, the body is located at github.event.pull_request.body pr_description: str = event.get("pull_request", {}).get("body") or "" dialects = [] should_run = False pr_description_lines = [l.strip().lower() for l in pr_description.splitlines()] if trigger_line := [l for l in pr_description_lines if l.startswith(TRIGGER)]: # if the user has explicitly requested /integration-tests then use that print(f"Handling trigger line: {trigger_line[0]}") dialects = get_dialects_from_manual_trigger(trigger_line[0]) should_run = True else: # otherwise, do a git diff and inspect the changed files print("Explicit trigger line not detected; performing git diff") pull_request_base_ref = event.get("pull_request", {}).get("base", {}).get("sha") if not pull_request_base_ref: raise ValueError("Unable to determine base ref") current_ref = event.get("pull_request", {}).get("head", {}).get("sha") if not current_ref: raise ValueError("Unable to determine current/head ref") print(f"Comparing '{current_ref}' against '{pull_request_base_ref}'") # otherwise, look at git files changed and only trigger if a file relating # to a supported dialect has changed dialects = get_dialects_from_git(base_ref=pull_request_base_ref, current_ref=current_ref) if dialects: should_run = True if should_run: dialects_str = ( f"the following dialects: {', '.join(dialects)}" if dialects else "all supported dialects" ) print(f"Conclusion: should run tests for {dialects_str}") else: print("Conclusion: No tests to run") # write output variables lines = [] if should_run: lines.append("skip=false") if dialects: lines.append(f"dialects={','.join(dialects)}") else: lines.append("skip=true") with github_output.open("a") as f: f.writelines(f"{l}\n" for l in lines) ================================================ FILE: .github/scripts/integration_tests_sync.sh ================================================ #!/usr/bin/env bash set -euo pipefail # Unset git env vars that leak from parent hook context into submodule commands unset GIT_INDEX_FILE GIT_DIR SUBMODULE_DIR="sqlglot-integration-tests" # Graceful no-op when the submodule is absent (public contributors) [ -e "$SUBMODULE_DIR/.git" ] || exit 0 # Ensure the submodule is on a branch matching the parent's branch name. # Creates the branch if it doesn't exist. ensure_branch() { local branch branch=$(git rev-parse --abbrev-ref HEAD) [ "$branch" = "HEAD" ] && return 0 local current current=$(git -C "$SUBMODULE_DIR" rev-parse --abbrev-ref HEAD 2>/dev/null || echo "") if [ "$current" != "$branch" ]; then # Stash any uncommitted/untracked changes so branch switch doesn't fail local stashed=0 if [ -n "$(git -C "$SUBMODULE_DIR" status --porcelain)" ]; then git -C "$SUBMODULE_DIR" stash push --include-untracked --quiet 2>/dev/null && stashed=1 fi if git -C "$SUBMODULE_DIR" show-ref --verify --quiet "refs/heads/$branch"; then git -C "$SUBMODULE_DIR" checkout "$branch" --quiet else git -C "$SUBMODULE_DIR" checkout -b "$branch" --quiet fi # Restore stashed changes on the new branch if [ "$stashed" = "1" ]; then git -C "$SUBMODULE_DIR" stash pop --quiet 2>/dev/null || true fi fi } case "${1:-}" in checkout) # No-op: branch creation is deferred to commit/post-commit to avoid # creating empty branches when switching parent branches. ;; commit) # Skip if submodule has no changes [ -n "$(git -C "$SUBMODULE_DIR" status --porcelain)" ] || exit 0 # Only auto-commit submodule changes when the submodule pointer is already # staged in the parent (i.e. the user explicitly `git add`-ed it). This # prevents surprise commits of unrelated WIP in the submodule working tree. git diff --cached --quiet -- "$SUBMODULE_DIR" && exit 0 ensure_branch if [ -n "$(git -C "$SUBMODULE_DIR" status --porcelain)" ]; then BRANCH=$(git rev-parse --abbrev-ref HEAD) git -C "$SUBMODULE_DIR" add -A git -C "$SUBMODULE_DIR" commit -m "Sync: $BRANCH" fi # Stage the updated submodule pointer in the parent if ! git diff --quiet -- "$SUBMODULE_DIR"; then git add "$SUBMODULE_DIR" fi ;; post-commit) # The pre-commit framework's stashing undoes submodule changes made during # pre-commit. Re-commit the submodule and amend the parent to include it. # Only act if the most recent parent commit references the submodule. This # prevents the post-commit hook from auto-committing unrelated submodule WIP # (e.g. when switching branches or pulling on main). if ! git diff-tree --no-commit-id --name-only -r HEAD | grep -q "^${SUBMODULE_DIR}$"; then exit 0 fi # Skip if submodule has no changes and pointer is up to date if [ -z "$(git -C "$SUBMODULE_DIR" status --porcelain)" ] && git diff --quiet -- "$SUBMODULE_DIR"; then exit 0 fi ensure_branch if [ -n "$(git -C "$SUBMODULE_DIR" status --porcelain)" ]; then BRANCH=$(git rev-parse --abbrev-ref HEAD) git -C "$SUBMODULE_DIR" add -A git -C "$SUBMODULE_DIR" commit -m "Sync: $BRANCH" fi if ! git diff --quiet -- "$SUBMODULE_DIR"; then git add "$SUBMODULE_DIR" git commit --amend --no-edit --no-verify fi ;; push) BRANCH=$(git rev-parse --abbrev-ref HEAD) case "$BRANCH" in main|master) exit 0 ;; esac ensure_branch cd "$SUBMODULE_DIR" # Check if there are local commits to push if git rev-parse --verify "@{upstream}" >/dev/null 2>&1; then UNPUSHED=$(git rev-list --count "@{upstream}..HEAD") else UNPUSHED=$(git rev-list --count "origin/main..HEAD" 2>/dev/null || echo "0") fi [ "$UNPUSHED" = "0" ] && exit 0 # Push the branch, rebasing first if needed git push -u origin "$BRANCH" 2>/dev/null || { git pull --rebase --autostash || { git rebase --abort 2>/dev/null || true echo "ERROR: Conflicts pushing integration tests. Resolve with:" >&2 echo " cd $SUBMODULE_DIR && git pull --rebase" >&2 exit 1 } git push -u origin "$BRANCH" } # Create a PR if one doesn't exist (requires gh CLI) if command -v gh >/dev/null 2>&1; then EXISTING=$(gh pr list --head "$BRANCH" --json number --jq 'length' 2>/dev/null || echo "0") if [ "$EXISTING" = "0" ]; then PARENT_URL=$(git -C .. remote get-url origin 2>/dev/null | sed 's/\.git$//' | sed 's|git@github.com:|https://github.com/|') BODY="Parent branch: ${PARENT_URL}/tree/${BRANCH}" gh pr create \ --title "$BRANCH" \ --body "$BODY" \ --head "$BRANCH" 2>/dev/null || true fi fi ;; merge) # Sync submodule to the parent's pointer git submodule update --init 2>/dev/null || true ensure_branch cd "$SUBMODULE_DIR" BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "HEAD") [ "$BRANCH" = "HEAD" ] && exit 0 git fetch origin --quiet 2>/dev/null || exit 0 BEHIND=$(git rev-list --count "HEAD..origin/$BRANCH" 2>/dev/null || echo "0") [ "$BEHIND" = "0" ] && exit 0 if ! git pull --rebase --autostash --quiet 2>/dev/null; then git rebase --abort 2>/dev/null || true echo "WARNING: Rebase conflicts in $SUBMODULE_DIR." >&2 echo "Resolve with: cd $SUBMODULE_DIR && git pull --rebase" >&2 fi ;; *) echo "Usage: $0 {checkout|commit|post-commit|push|merge}" >&2 exit 1 ;; esac ================================================ FILE: .github/workflows/benchmark-sqlglot.yml ================================================ name: Benchmark pull requests on: pull_request: types: [opened] issue_comment: types: [created] jobs: benchmark: name: benchmark runs-on: ubuntu-latest permissions: pull-requests: write if: > (github.event_name == 'pull_request') || (github.event_name == 'issue_comment' && github.event.issue.pull_request && contains(github.event.comment.body, '/benchmark')) steps: - name: Checkout PR branch uses: actions/checkout@v5 with: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha || '' }} - name: Checkout PR branch (comment trigger) if: github.event_name == 'issue_comment' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | PR_NUMBER=${{ github.event.issue.number }} PR_HEAD=$(gh pr view $PR_NUMBER --json headRefName -q .headRefName) git fetch origin "$PR_HEAD" git checkout "$PR_HEAD" - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.14" - name: Install dependencies (PR) run: | python -m venv .venv source .venv/bin/activate pip install -e ".[dev]" make install-devc-release - name: Benchmark PR branch run: | source .venv/bin/activate python benchmarks/parse.py --json bench_pr.json --quiet --sqlglot-only - name: Checkout main branch run: | git fetch origin main git worktree add main-branch origin/main - name: Install dependencies (main) run: | rm -rf .venv python -m venv .venv source .venv/bin/activate cd main-branch pip install -e ".[dev]" make install-devc-release - name: Benchmark main branch run: | source .venv/bin/activate cd main-branch python benchmarks/parse.py --json ../bench_main.json --quiet --sqlglot-only - name: Compare results run: | source .venv/bin/activate python benchmarks/compare.py bench_main.json bench_pr.json > benchmark_comment.md - name: Comment on PR uses: peter-evans/create-or-update-comment@v4 with: issue-number: ${{ github.event.issue.number || github.event.pull_request.number }} body-path: benchmark_comment.md ================================================ FILE: .github/workflows/package-publish.yml ================================================ name: Publish sqlglot to PyPI on: push: tags: - "v*" permissions: contents: read jobs: # Build mypyc wheels for each platform and Python version. build-wheels: name: Build wheels (${{ matrix.platform.os }}, ${{ matrix.platform.archs }}, ${{ matrix.python }}) runs-on: ${{ matrix.platform.os }} strategy: matrix: python: ["cp39", "cp310", "cp311", "cp312", "cp313", "cp314"] platform: - os: ubuntu-latest archs: x86_64 - os: ubuntu-24.04-arm archs: aarch64 - os: macos-latest archs: universal2 - os: windows-latest archs: AMD64 steps: - uses: actions/checkout@v5 with: fetch-depth: 0 - uses: pypa/cibuildwheel@v3.3.1 with: package-dir: sqlglotc output-dir: wheelhouse env: CIBW_BUILD: ${{ matrix.python }}-* CIBW_SKIP: "*-musllinux_*" CIBW_ARCHS: ${{ matrix.platform.archs }} - uses: actions/upload-artifact@v4 with: name: wheels-${{ matrix.platform.os }}-${{ matrix.platform.archs }}-${{ matrix.python }} path: ./wheelhouse/*.whl # Build the sqlglotc sdist (source-only, no wheels — always compiles on install). sdist-c: runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 with: fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: "3.x" - name: Install build tools run: pip install build - name: Build sdist run: | cd sqlglotc python -m build --sdist - name: Upload sdist uses: actions/upload-artifact@v4 with: name: sqlglotc-sdist path: sqlglotc/dist/*.tar.gz # Publish sqlglotc wheels and sdist to PyPI. publish-sqlglotc: needs: [build-wheels, sdist-c] runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v4 with: pattern: wheels-* path: dist/ merge-multiple: true - uses: actions/download-artifact@v4 with: name: sqlglotc-sdist path: dist/ - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} # Publish the main sqlglot package. deploy: needs: publish-sqlglotc runs-on: ubuntu-latest permissions: contents: write steps: - uses: actions/checkout@v5 with: fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v3 with: python-version: "3.10" - name: Install dependencies run: | python -m venv .venv source ./.venv/bin/activate python -m pip install --upgrade pip pip install build twine make install-dev - name: Build and publish env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | source ./.venv/bin/activate python -m build twine upload dist/* - name: Update CHANGELOG id: changelog continue-on-error: true uses: requarks/changelog-action@v1 with: token: ${{ github.token }} tag: ${{ github.ref_name }} - name: Commit CHANGELOG.md continue-on-error: true uses: stefanzweifel/git-auto-commit-action@v4 with: branch: main commit_message: "Update CHANGELOG.md for ${{ github.ref_name }} [skip ci]" file_pattern: "CHANGELOG.md" - name: Update API docs run: | source ./.venv/bin/activate make docs echo "sqlglot.com" > docs/CNAME mv docs /tmp/generated-docs git checkout -B api-docs origin/main rm -rf docs mv /tmp/generated-docs docs - name: Commit API docs uses: stefanzweifel/git-auto-commit-action@v4 with: branch: api-docs commit_message: 'Update API docs for ${{ github.ref_name }} [skip ci]' file_pattern: 'docs' push_options: '--force' ================================================ FILE: .github/workflows/package-test.yml ================================================ name: Run tests and linter checks on: push: branches: [ main ] pull_request: branches: [ main ] jobs: run-checks: runs-on: ubuntu-22.04 strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip - name: Create a virtual environment run: | python -m venv .venv - name: Install dependencies run: | source ./.venv/bin/activate python -m pip install --upgrade pip make install-dev - name: Run tests and linter checks run: | source ./.venv/bin/activate make check ================================================ FILE: .github/workflows/run-integration-tests.yml ================================================ name: Run Integration Tests on: pull_request: types: [opened, synchronize, reopened, edited] jobs: should-run: name: Check if integration tests should run runs-on: ubuntu-latest outputs: skip: ${{ steps.test-parameters.outputs.skip }} dialects: ${{ steps.test-parameters.outputs.dialects }} steps: - name: Print debugging info run: | cat <<'EOF' Github event name: ${{ github.event_name }} Github event ${{ toJSON(github.event) }} Github event comment body: ${{ github.event.comment.body }} Github event pr body: ${{ github.event.pull_request.body }} Generic Number: ${{ github.event.number }} PR number: ${{ github.event.pull_request.number }} Issue number: ${{ github.event.issue.number }} SHA: ${{ github.sha }} Head Ref ${{ github.head_ref }} Ref Name: ${{ github.ref_name }} EOF - name: Checkout Code uses: actions/checkout@v5 with: # we need to checkout all refs so we can run `git diff` fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.13' - name: Check if integration tests should be run id: test-parameters run: | python .github/scripts/get_integration_test_params.py run-integration-tests: name: Run Integration Tests runs-on: ubuntu-latest needs: should-run if: needs.should-run.outputs.skip == 'false' && github.event.pull_request.head.repo.full_name == github.repository steps: - name: Acquire credentials id: app-token uses: actions/create-github-app-token@v2 with: app-id: ${{ vars.INTEGRATION_TEST_CLIENT_ID }} private-key: ${{ secrets.INTEGRATION_TEST_PRIVATE_KEY }} owner: fivetran repositories: sqlglot-integration-tests - name: Run integration tests id: run-remote env: GH_TOKEN: ${{ steps.app-token.outputs.token }} run: | set -e CORRELATION_ID=$(python -c 'import uuid;print(uuid.uuid4())') REMOTE_REPO="fivetran/sqlglot-integration-tests" echo "Triggering remote workflow" gh workflow run run-tests.yml \ --repo $REMOTE_REPO \ --ref main \ -f sqlglot_ref=${{ github.sha }} \ -f sqlglot_pr_number=${{ github.event.number || github.event.issue.number }} \ -f sqlglot_branch_name=${{ github.head_ref || github.ref_name }} \ -f correlation_id="$CORRELATION_ID" \ -f dialects="${{ needs.should-run.outputs.dialects }}" echo "Triggered workflow using correlation id: $CORRELATION_ID" # poll for run id RUN_ID="" ATTEMPTS=0 while [ "$RUN_ID" == "" ]; do sleep 5 ATTEMPTS=$((ATTEMPTS + 1)) if [ $ATTEMPTS -gt 10 ]; then echo "Timed out waiting for matching run to start" exit 1 fi echo "Checking for run" RUN_ID=$(gh run list \ --repo $REMOTE_REPO \ --event workflow_dispatch \ --workflow run-tests.yml \ --user sqlglot-integration-tests \ --json displayTitle,databaseId \ --limit 20 \ -q '.[] | select(.displayTitle | contains("Correlation ID: '"$CORRELATION_ID"'")) | .databaseId') done echo "Using Run ID: ${RUN_ID}" echo "remote_run_id=$RUN_ID" >> $GITHUB_OUTPUT echo "Waiting for completion" gh run watch $RUN_ID \ --repo fivetran/sqlglot-integration-tests \ --interval 10 \ --compact \ --exit-status # Fail the workflow on this side if the remote workflow fails exit $? - name: Fetch outputs id: fetch-outputs uses: actions/download-artifact@v5 with: github-token: ${{ steps.app-token.outputs.token }} repository: fivetran/sqlglot-integration-tests run-id: ${{ steps.run-remote.outputs.remote_run_id }} name: summary - name: Write summary as comment uses: actions/github-script@v8 # only do this when on PR branches, main builds dont have anywhere to write comments if: github.event_name == 'pull_request' || github.event.issue.pull_request with: script: | // summary.json is downloaded from the remote workflow in the previous step const summary = require("./summary.json"); // Add a unique identifier to find this comment later const commentIdentifier = ""; const body = `${commentIdentifier}\n${summary.msg}`; // Find existing comment const { data: comments } = await github.rest.issues.listComments({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, }); const existingComment = comments.find(comment => comment.body.includes(commentIdentifier) ); if (existingComment) { // Update existing comment await github.rest.issues.updateComment({ comment_id: existingComment.id, owner: context.repo.owner, repo: context.repo.repo, body: body }); } else { // Create new comment await github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, body: body }); } ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook *.ipynb .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ venv*/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # PyCharm .idea/ # Visual Studio Code .vscode .DS_STORE metastore_db spark_warehouse # Version file sqlglot/_version.py # Emacs files *~ ================================================ FILE: .gitmodules ================================================ [submodule "sqlglot-integration-tests"] path = sqlglot-integration-tests url = git@github.com:fivetran/sqlglot-integration-tests.git ================================================ FILE: .gitpod.yml ================================================ image: gitpod/workspace-python-3.11 tasks: - name: sqlglot init: | python -m venv .venv source .venv/bin/activate make install-dev command: | clear source .venv/bin/activate ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: local hooks: - id: sync-integration-tests-commit name: sync-integration-tests-commit entry: .github/scripts/integration_tests_sync.sh commit language: script stages: [pre-commit] always_run: true pass_filenames: false - id: sync-integration-tests-checkout name: sync-integration-tests-checkout entry: .github/scripts/integration_tests_sync.sh checkout language: script stages: [post-checkout] always_run: true pass_filenames: false - id: sync-integration-tests-push name: sync-integration-tests-push entry: .github/scripts/integration_tests_sync.sh push language: script stages: [pre-push] always_run: true pass_filenames: false - id: sync-integration-tests-merge name: sync-integration-tests-merge entry: .github/scripts/integration_tests_sync.sh merge language: script stages: [post-merge] always_run: true pass_filenames: false - id: ruff name: ruff description: "Run 'ruff' for extremely fast Python linting" entry: ruff check --force-exclude --fix language: python types_or: [python, pyi] require_serial: true additional_dependencies: [] - id: ruff-format name: ruff-format description: "Run 'ruff format' for extremely fast Python formatting" entry: ruff format language: python types_or: [python, pyi] require_serial: true - id: mypy name: mypy entry: mypy sqlglot tests language: system types: [ python ] files: ^(sqlglot/|tests/) pass_filenames: false ================================================ FILE: AGENTS.md ================================================ # Contributing to [SQLGlot](https://github.com/tobymao/sqlglot/blob/main/README.md) ## About SQLGlot SQLGlot is a no-dependency SQL parser, transpiler, optimizer, and engine written in pure Python. It supports 31+ SQL dialects and can transpile between them while preserving semantics. The codebase is performance-critical despite being pure Python, with an optional mypyc-compiled C extension for speed improvements (`sqlglotc/`). ## Development Commands Before contributing read CONTRIBUTING.md ### Installation ```bash # Basic installation make install # Development installation (Python only, no C extension) make install-dev # Development installation with mypyc C extension make install-devc # Install pre-commit hooks make install-pre-commit # With uv (faster): UV=1 make install-dev ``` ### Testing ```bash # Run all tests (pure Python, hides .so files during run) make test # Run all tests with mypyc C extension (builds extension first) make testc # Run only unit tests (skip integration tests, pure Python) make unit # Run only unit tests with C extension make unitc # Run specific test file python -m unittest tests.test_expressions # Run specific test class python -m unittest tests.test_expressions.TestExpressions # Run specific test method python -m unittest tests.test_expressions.TestExpressions.test_alias ``` ### Linting & Type Checking ```bash # Run linter and formatter only make style # Run full checks (style + pure Python tests + C extension tests) make check ``` ### Benchmarks ```bash # Run parsing benchmark make bench # Run optimization benchmark make bench-optimize ``` ## Architecture Overview SQLGlot follows a classic compiler architecture with three main phases: ### 1. Tokenizer (`tokens.py`) - Converts SQL strings into a sequence of tokens (lexical analysis) - Pure Python implementation in `tokens.py`; core logic in `tokenizer_core.py` (mypyc-compiled when using `[c]` extra) - Maps lexemes to `TokenType` enum values via `KEYWORDS` and `SINGLE_TOKENS` dictionaries - Dialects can override tokenizer behavior by customizing these mappings ### 2. Parser (`parser.py`) - Converts tokens into an Abstract Syntax Tree (AST) - Uses recursive descent parsing approach - Parsing methods follow `_parse_*` naming convention (e.g., `_parse_create()`, `_parse_select()`) - Token matching methods: `_match()`, `_match_set()`, `_match_text_seq()`, `_match_texts()` - Helper methods for common patterns: `_parse_csv()`, `_parse_wrapped()`, `_parse_wrapped_csv()` - Maintains index/cursor with `_advance()` and `_retreat()` methods - Falls back to `exp.Command` for unparseable SQL (preserves original text) ### 3. Generator (`generator.py`) - Converts AST back to SQL strings - Traverses AST recursively, generating SQL for each expression node - Two ways to customize generation: - `TRANSFORMS` dictionary for single-line generations - `_sql()` methods for complex generations - Helper methods: `expressions()`, `func()`, `rename_func()` - Use `sep()` and `seg()` for proper whitespace/newline handling in pretty-printed output ### 4. Expressions (`expressions.py`) - Defines all AST node types as Python classes inheriting from `Expression` - Each expression represents a semantic SQL concept (e.g., `Select`, `Join`, `Column`) - Expressions can be traversed using `.find()`, `.find_all()`, `.walk()`, `.transform()` - Building SQL programmatically: use helper functions like `select()`, `from_()`, `where()`, etc. ### 5. Dialects (`dialects/`) - 34 dialect implementations in `dialects/.py` - Each dialect subclasses base `Dialect` and can override Tokenizer, Parser, and Generator - Base "sqlglot" dialect acts as a superset to minimize duplication - Dialect customization via: - Feature flags (e.g., `SUPPORTS_IMPLICIT_UNNEST`) - Token sets (e.g., `RESERVED_TOKENS`) - `token -> Callable` mappings (e.g., `FUNCTIONS`, `STATEMENTS`) - `Expression -> str` mappings in Generator ### 6. Optimizer (`optimizer/`) - Canonicalizes and optimizes queries while preserving semantics - Applies sequential optimization rules (order matters!) - Key rules: - `qualify`: Normalizes identifiers and qualifies all tables/columns (most important rule) - `annotate_types`: Infers data types throughout the AST - `pushdown_predicates`, `pushdown_projections`: Optimization rewrites - `simplify`: Simplifies boolean expressions and arithmetic - Rules depend on schema information for best results - Optimizer performs logical optimization only (not physical/performance) ### 7. Schema (`schema.py`) - Represents database structure (tables, columns, types) - Used by optimizer and lineage analysis - `MappingSchema` takes nested dict: `{"table": {"col": "type"}}` ### 8. Lineage (`lineage.py`) - Traces column-level lineage through queries - Requires target query, upstream queries, and root table schemas - Builds linked list of `Node` objects representing data flow - Can visualize with `node.to_html()` ## Key Concepts ### The "sqlglot" Dialect - Base dialect that accommodates common syntax across all dialects - All other dialects extend this base - When adding multi-dialect features, prefer adding to base dialect to avoid duplication - Only add dialect-specific features to individual dialect classes ### AST-First Approach - SQLGlot preserves _semantics_ not syntax - Parse SQL → AST (semantic representation) → Generate SQL in target dialect - This enables accurate cross-dialect transpilation - Comments are preserved on best-effort basis - See `posts/ast_primer.md` for detailed AST tutorial ### Testing Philosophy - Comprehensive test suite in `tests/` directory - Dialect-specific tests in `tests/dialects/` - Tests are critical - "robust test suite" is a core feature - Use `tests/fixtures/` for test data - `tests/helpers.py` contains test utilities ### Parser/Generator Symmetry - Parser: `token -> Callable` mappings (builds AST from tokens) - Generator: `Expression -> str` mappings (builds SQL from AST) - Customization follows similar patterns in both ### Type Annotations - Type inference is crucial for some transpilations (e.g., `+` can mean addition or concatenation) - Optimizer's `annotate_types` rule propagates type information through AST - Requires schema information to work effectively ## Common Usage Patterns ### Reading SQL ```python import sqlglot expression = sqlglot.parse_one("SELECT * FROM table", dialect="spark") ``` ### Validate Function Expression ```python import sqlglot tree = sqlglot.parse_one("SELECT NULLIF(1, 2)", dialect="snowflake") if "Anonymous" in repr(tree): print("Function expression exists") else: print("Function expression does not exist") ``` ### Writing SQL ```python expression.sql(dialect="duckdb", pretty=True) ``` ### Building SQL Programmatically ```python from sqlglot import select, condition select("*").from_("y").where(condition("x=1").and_("y=1")).sql() ``` ### Traversing AST ```python from sqlglot import parse_one, exp tree = parse_one("SELECT a, b + 1 AS c FROM d", dialect="dialect") for column in tree.find_all(exp.Column): print(column.alias_or_name) ``` ### Transforming AST ```python def transformer(node): if isinstance(node, exp.Column) and node.name == "a": return parse_one("FUN(a)", dialect="dialect") return node transformed = tree.transform(transformer) ``` ## Development Guidelines - Follow [Conventional Commits](https://www.conventionalcommits.org/) for PR titles - If you are an agent, you must add your model [CLAUDE, CODEX, etc...] to the end of PR titles and commit messages. - Keep PRs minimal in scope - one well-defined change per PR - Add tests for non-trivial changes - Update docstrings if APIs change - Run `make check` before submitting - Use comments for complex logic only - Don't change license or license files in setup.py, there's nothing wrong with it. ## Important Files - `posts/ast_primer.md`: Detailed AST tutorial - `posts/onboarding.md`: Architecture deep-dive (HIGHLY RECOMMENDED) - `.pre-commit-config.yaml`: Pre-commit hooks (ruff, ruff-format, mypy) - `pyproject.toml`: Project metadata and build config - `Makefile`: All development commands ## Performance Considerations - Pure Python implementation with optional mypyc-compiled C extension (`sqlglotc/`) - Install with `pip install "sqlglot[c]"` for C extension speed boost - The `[c]` extra compiles core modules (`expression_core`, `tokenizer_core`, `parser_core`, etc.) via mypyc - Performance is a key feature despite Python implementation - Benchmarks compare against other SQL parsers - see `benchmarks/` - Avoid use of typing.Protocol, prefer Union Type and Duck Typing - Never suggest Pratt parsing as an optimization for the parser. The recursive descent approach is intentional and preferred. --- ## SQLGlot Coding Rules The following patterns are based on PR review feedback. Follow these to minimize review iterations. ### 1. Use Automatic Naming Convention for Generator Methods **Don't do this (module-level function with TRANSFORMS):** ```python def _my_func_sql(self: MyDialect.Generator, expression: exp.MyFunc) -> str: ... class Generator: TRANSFORMS = { exp.MyFunc: _my_func_sql, } ``` **Don't do this (method with TRANSFORMS):** ```python class Generator: TRANSFORMS = { exp.MyFunc: lambda self, e: self._my_func_sql(e), } def _my_func_sql(self, expression): ... ``` **Do this (auto-discovered method):** ```python class Generator: # No TRANSFORMS entry needed - automatic discovery by name def myfunc_sql(self, expression: exp.MyFunc) -> str: ... ``` Generator methods named `_sql` are automatically discovered. Important: Only use TRANSFORMS for simple one-liners like `rename_func("OTHER_NAME")` or lambdas or functions with multiple entry points. For any single entry point function, always use an auto-discovered method inside the Generator class. SQLGlot automatically applies transformations based on the structure of the name, but when this fails, you must rename the function. This is only when the SQL name is not covered by auto mapping: **Do this:** ```python class Generator: TRANSFORMS = { exp.ArrayLength: rename_func("LENGTH"), } ``` **Don't do this:** ```python exp.ArrayLength: lambda self, e: self.func("LENGTH", e.this), ``` ### 2. Use Existing Expression Classes, Not Anonymous **Don't do this:** ```python from_base64 = exp.Anonymous(this="FROM_BASE64", expressions=[input_expr]) ``` **Do this:** ```python from_base64 = exp.FromBase64(this=input_expr) ``` Always check if an expression class exists in `expressions.py` before using `exp.Anonymous`. Anonymous should only be used for functions that don't have a dedicated class. Search for the function name in expressions.py first. ### 3. SQL Generation: Choose the Right Approach Use the appropriate method based on complexity. From simplest to most complex: #### Level 1: Generator Helper Methods For generating function calls in generator methods, use `self.func()`: ```python def myfunc_sql(self, expression): # Don't: return self.sql(exp.Func(this="MY_FUNC", expressions=[expression.this])) # Do: return self.func("MY_FUNC", expression.this) ``` #### Level 2: Expression Builders For building expressions, use helper functions instead of direct class construction: | Helper | Instead of | Benefits | |--------|-----------|----------| | `exp.func("name", *args)` | `exp.Anonymous(...)` | Finds proper Func class | | `exp.array(e1, e2, ...)` | `exp.Array(expressions=[...])` | Parses automatically | | `exp.and_(e1, e2, ...)` | `exp.And(this=..., expression=...)` | Handles nesting | | `exp.or_(e1, e2, ...)` | `exp.Or(this=..., expression=...)` | Handles nesting | | `exp.case().when(cond, val).else_(default)` | `exp.Case(ifs=[...])` | Fluent interface | | `exp.cast(expr, "TYPE")` | `exp.Cast(this=..., to=...)` | Builds DataType | | `exp.column("col", "table")` | `exp.Column(...)` | Handles identifiers | | `exp.null()` | `exp.Null()` | Simple factory | Also use expression operators for cleaner code: ```python # Arithmetic: exp.column("x") + 1 instead of exp.Add(this=..., expression=...) # Indexing: arr[index] instead of exp.Bracket(this=arr, expressions=[index]) # Comparison: arg.is_(exp.Null()) instead of exp.Is(this=arg, expression=exp.Null()) ``` #### Level 3: SQL Templates When expressions become complex, use templates with `exp.maybe_parse()` and `exp.replace_placeholders()`: ```python # Define template with :placeholder syntax MY_TEMPLATE: exp.Expression = exp.maybe_parse( "CASE WHEN :arg IS NULL THEN NULL ELSE :result END" ) # In generator method def myfunc_sql(self, expression): result = exp.replace_placeholders( self.MY_TEMPLATE.copy(), arg=expression.this, result=some_expression, ) return self.sql(result) ``` #### Avoid: F-strings with SQL Fragments You should rarely, if ever, build SQL with f-strings - it breaks quoting, escaping, and dialect handling: ```python # NEVER do this: def my_func_sql(self, expression): return f"CAST({self.sql(expression.this)} AS TIME)" # Do this instead: def my_func_sql(self, expression): return self.sql(exp.cast(expression.this, "TIME")) ``` ### 4. Type Checking: `is_string` vs `is_type()` These serve **different purposes**: **`is_type()`** - Semantic type check: ```python # Returns True if expression's type is text (columns, function results, etc.) # Requires annotate_types() to populate type info if arg.is_type(*exp.DataType.TEXT_TYPES): ... ``` **`is_string`** - Syntactic check for string literals: ```python # Returns True only for literal strings like 'hello' # Works without type annotation if arg.is_string: value = arg.name # Extract the string value ``` **When to use each:** | Use Case | Method | |----------|--------| | Check if node is a string literal to extract its value | `is_string` | | Check if node is a literal vs column/expression | `is_string` | | Check semantic type (works for columns, functions) | `is_type()` | | Cover both literals and typed expressions | `is_string or is_type()` | **Combined pattern (from `length_sql`):** ```python # Fast check for string literals (no annotation needed) if arg.is_string: return self.func("LENGTH", arg) # For non-literals, get type info if needed if not arg.type: arg = annotate_types(arg, dialect=self.dialect) # Then check semantic type if arg.is_type(*exp.DataType.TEXT_TYPES): return self.func("LENGTH", arg) ``` **Don't do direct type comparisons:** ```python # Bad if input_expr.type and input_expr.type.this in exp.DataType.TEXT_TYPES: # Good if input_expr.is_type(*exp.DataType.TEXT_TYPES): ``` ### 5. Use `to_py()` for Literal Value Extraction **Don't do this:** ```python if isinstance(arg, exp.Literal): value = int(arg.this.strip("'")) ``` **Do this:** ```python if isinstance(arg, exp.Literal) and arg.is_number: value = int(arg.to_py()) ``` ### 6. Avoid Compile-Time NULL Checks Don't check for `exp.Null()` or literal NULL values in Python during transpilation. NULL handling should happen at query time in the generated SQL using `IS NULL` checks. **Don't do this:** ```python def myfunc_sql(self, expression): # Bad: checking for literal NULL at transpile time if any(isinstance(arg, exp.Null) for arg in expression.expressions): return self.sql(exp.Null()) ``` **Do this:** ```python # Good: generate SQL that handles NULL at query time TEMPLATE = exp.maybe_parse("CASE WHEN :arg IS NULL THEN NULL ELSE ... END") ``` Compile-time checks only handle literal `NULL` values in the SQL text, not NULL values that come from columns, parameters, or expressions at runtime. Generate SQL with `IS NULL` checks to handle all cases. ### 7. Type Annotations in Tests When transpilation depends on `is_type()` checks, tests need `annotate_types()`: ```python from sqlglot.optimizer import annotate_types # Without annotation - is_type() returns False for literals expr = self.validate_identity("SELECT BASE64_ENCODE('Hello World')") # With annotation - types are inferred, is_type() works annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "SELECT TO_BASE64(ENCODE('Hello World'))") ``` ### 8. Use `find_ancestor` with Scope Boundaries When searching for ancestors, include scope boundaries to avoid crossing into parent queries: ```python # Stop at Select to stay within current query scope ancestor = expression.find_ancestor(exp.Where, exp.Having, exp.Select) if ancestor and not isinstance(ancestor, exp.Select): # Found restricted context within current scope ... ``` ### 9. Use @unsupported_args for unsupported arguments When arguments are not supported do this: ```python @unsupported_args("ins_cost", "del_cost", "sub_cost") def levenshtein_sql(self, expression: exp.Levenshtein) -> str: ``` ### 10. Keep Code Minimal - Remove unused imports, variables, and dead code - Don't add comments for obvious code - Don't add docstrings unless the function is complex or public API - Prefer inline expressions over intermediate variables when readable - Don't add backwards-compatibility shims for removed code ### 11. Test Patterns - Add tests to the appropriate dialect test file (e.g., `tests/dialects/test_snowflake.py`) - Use `self.validate_all()` for cross-dialect tests - Use `self.validate_identity()` for round-trip tests - Don't add tests for functionality that already has coverage ### 12. Ensure Test Validity - Make sure all tests added to tests/dialects/*.py actually run in the relevant databases, such as snowflake or duckdb ================================================ FILE: CHANGELOG.md ================================================ Changelog ========= ## [v30.0.3] - 2026-03-19 ### :zap: Performance Improvements - [`f87ebe0`](https://github.com/tobymao/sqlglot/commit/f87ebe02103b249ec5fa2c93e019e465f77630be) - use mypyc i64 for parser index fields (~1.6% faster) *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`52bca33`](https://github.com/tobymao/sqlglot/commit/52bca33e9395c4f6f621649180f2576eb8591dba) - **lineage**: improve error message when column source index is out of range *(PR [#7336](https://github.com/tobymao/sqlglot/pull/7336) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#7332](https://github.com/tobymao/sqlglot/issues/7332) opened by [@paultiq](https://github.com/paultiq)* ## [v30.0.2] - 2026-03-19 ### :boom: BREAKING CHANGES - due to [`936617e`](https://github.com/tobymao/sqlglot/commit/936617e749f969b04da318ec02e1086a01212e92) - escape comment markers in sanitize_comment for all dialects *(PR [#7301](https://github.com/tobymao/sqlglot/pull/7301) by [@llimllib](https://github.com/llimllib))*: escape comment markers in sanitize_comment for all dialects (#7301) - due to [`4f6bcd3`](https://github.com/tobymao/sqlglot/commit/4f6bcd3d21cf34346db4c7fc9936302d34a802e2) - Add transpilation support for ARRAY_TO_STRING function *(PR [#7289](https://github.com/tobymao/sqlglot/pull/7289) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add transpilation support for ARRAY_TO_STRING function (#7289) ### :sparkles: New Features - [`4f6bcd3`](https://github.com/tobymao/sqlglot/commit/4f6bcd3d21cf34346db4c7fc9936302d34a802e2) - **duckdb**: Add transpilation support for ARRAY_TO_STRING function *(PR [#7289](https://github.com/tobymao/sqlglot/pull/7289) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ### :bug: Bug Fixes - [`b41a99a`](https://github.com/tobymao/sqlglot/commit/b41a99a1405a5749a5876a64a559bd040ba618a5) - **duckdb**: FROM pipe syntax in subquery *(PR [#7311](https://github.com/tobymao/sqlglot/pull/7311) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7305](https://github.com/tobymao/sqlglot/issues/7305) opened by [@paultiq](https://github.com/paultiq)* - [`936617e`](https://github.com/tobymao/sqlglot/commit/936617e749f969b04da318ec02e1086a01212e92) - escape comment markers in sanitize_comment for all dialects *(PR [#7301](https://github.com/tobymao/sqlglot/pull/7301) by [@llimllib](https://github.com/llimllib))* - [`6ddaee3`](https://github.com/tobymao/sqlglot/commit/6ddaee3ccb92f660327501884acf103fad782e07) - **expressions**: restore Expression.alias behaviour for non-Identifier alias nodes *(PR [#7310](https://github.com/tobymao/sqlglot/pull/7310) by [@treff7es](https://github.com/treff7es))* - [`ce08047`](https://github.com/tobymao/sqlglot/commit/ce0804717876889c731f2ab64035c70a85f9b294) - **snowflake**: ILIKE/LIKE ANY/ALL with single element transpilation to duckdb *(PR [#7314](https://github.com/tobymao/sqlglot/pull/7314) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7306](https://github.com/tobymao/sqlglot/issues/7306) opened by [@ultrabear](https://github.com/ultrabear)* - [`8a52de6`](https://github.com/tobymao/sqlglot/commit/8a52de63fb908b55df0143e3ec63f3ae37aa4fd8) - **parser**: Add builder for ARRAY_INTERSECT *(PR [#7328](https://github.com/tobymao/sqlglot/pull/7328) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#7326](https://github.com/tobymao/sqlglot/issues/7326) opened by [@ADBond](https://github.com/ADBond)* - [`f8a7ab2`](https://github.com/tobymao/sqlglot/commit/f8a7ab2724cbb75d40355c79c8f68003ee2a5c7e) - **parser**: Do not consume constraints following UNIQUE *(PR [#7330](https://github.com/tobymao/sqlglot/pull/7330) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#7327](https://github.com/tobymao/sqlglot/issues/7327) opened by [@chunyangfeng](https://github.com/chunyangfeng)* - [`64509a2`](https://github.com/tobymao/sqlglot/commit/64509a2a3e7606eab57d714e63424f235c78a6b5) - sqlglotc sdist install fails when ../sqlglot dir doesn't exist *(PR [#7337](https://github.com/tobymao/sqlglot/pull/7337) by [@tobymao](https://github.com/tobymao))* - :arrow_lower_right: *fixes issue [#7333](https://github.com/tobymao/sqlglot/issues/7333) opened by [@ZipBrandon](https://github.com/ZipBrandon)* ### :zap: Performance Improvements - [`0ac52aa`](https://github.com/tobymao/sqlglot/commit/0ac52aa80241a8ed4049948f2ee0c19c8dc64279) - move instance variables to __init__ for perf *(commit by [@tobymao](https://github.com/tobymao))* - [`c95ae50`](https://github.com/tobymao/sqlglot/commit/c95ae50fa122e01c914c92e9d847ff390516e1a4) - optimize parser for nested function calls (-41% on nested_funct… *(PR [#7307](https://github.com/tobymao/sqlglot/pull/7307) by [@tobymao](https://github.com/tobymao))* - [`1697bc3`](https://github.com/tobymao/sqlglot/commit/1697bc3a67c10314c5640ed7d52c40063db70f10) - optimize parser fast path for simple table references *(commit by [@tobymao](https://github.com/tobymao))* - [`18f15ca`](https://github.com/tobymao/sqlglot/commit/18f15ca96c7bdebe5798e407993495c0a9e9bd43) - inline token parsing for massive gains *(PR [#7335](https://github.com/tobymao/sqlglot/pull/7335) by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`fe7874f`](https://github.com/tobymao/sqlglot/commit/fe7874f3cb5dd7899a438249f65b51b684a056f6) - update changelog with v30 release notes *(PR [#7309](https://github.com/tobymao/sqlglot/pull/7309) by [@georgesittas](https://github.com/georgesittas))* - [`8a914f0`](https://github.com/tobymao/sqlglot/commit/8a914f09ed3534ecc9998b6ce84f74e2f15909e4) - use ProcessPoolExecutor in test_executor *(PR [#7313](https://github.com/tobymao/sqlglot/pull/7313) by [@georgesittas](https://github.com/georgesittas))* - [`604fe3f`](https://github.com/tobymao/sqlglot/commit/604fe3f1770715baf8b461344cffa44eaf017fc2) - improve integration test submodule automations *(PR [#7320](https://github.com/tobymao/sqlglot/pull/7320) by [@georgesittas](https://github.com/georgesittas))* - [`c7b55c1`](https://github.com/tobymao/sqlglot/commit/c7b55c1998e7aaea7a4950e8ee347db7b0ec4af3) - Fix uv sync failing for sqlglotc *(PR [#7322](https://github.com/tobymao/sqlglot/pull/7322) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#7321](https://github.com/tobymao/sqlglot/issues/7321) opened by [@OutSquareCapital](https://github.com/OutSquareCapital)* - [`158424c`](https://github.com/tobymao/sqlglot/commit/158424cda0cc908c9f3687cc466194fa31cafb1b) - update readme *(commit by [@tobymao](https://github.com/tobymao))* ## [v30.0.1] - 2026-03-16 ### :bug: Bug Fixes - [`7bcad5c`](https://github.com/tobymao/sqlglot/commit/7bcad5c61673a8567e9c22d059ed4326e6698457) - auto pin sqlglotc version to sqlglot so there's no version mismatch closes [#7304](https://github.com/tobymao/sqlglot/pull/7304) *(commit by [@tobymao](https://github.com/tobymao))* ## [v30.0.0] - 2026-03-16 SQLGlot v30 is a major release focused on performance and compilation. Some of the core components of the library are now fully compilable by [mypyc](https://mypyc.readthedocs.io/), delivering significant speedups when installed with the `[c]` extra. This required restructuring several internal modules, which introduces breaking changes for users who depend on internal APIs, subclass parsers, or import from internal paths. > **If you only use the public API** (`sqlglot.parse`, `sqlglot.parse_one`, `sqlglot.transpile`, `sqlglot.exp.*`, `sqlglot.optimizer.*`), most code will work without changes. --- ## Migration Guide ### 1. Rust tokenizer removed — use `[c]` instead of `[rs]` The Rust-based tokenizer (`sqlglotrs`) has been removed (since v29) and replaced with a mypyc-compiled C extension (`sqlglotc`). ```bash # Before pip install "sqlglot[rs]" # After pip install "sqlglot[c]" ``` The `[rs]` extra still installs but is now a deprecated no-op stub. The following APIs are removed: - `use_rs_tokenizer` parameter and attribute on `Tokenizer` - `RsTokenizer`, `RsTokenizerSettings`, `RsTokenTypeSettings` imports - `USE_RS_TOKENIZER` constant from `tokens.py` ### 2. `expressions.py` split into a package The monolithic `sqlglot/expressions.py` has been split into `sqlglot/expressions/` with submodules: | Module | Contents | |---|---| | `core.py` | `Expr`, `Expression`, `Condition`, `Func`, `AggFunc`, `Column`, `Literal`, etc. | | `datatypes.py` | `DataType`, `DType`, `DataTypeParam`, `Interval` | | `query.py` | `Select`, `Query`, `SetOperation`, `UDTF`, `Subquery` | | `ddl.py` | `Create`, `Alter`, `Drop`, DDL statements | | `dml.py` | `Insert`, `Update`, `Delete`, `Merge` | | `properties.py` | All `*Property` classes, `PropertiesLocation` | | `constraints.py` | All `*ColumnConstraint` classes | | `math.py` | Arithmetic operators (`Add`, `Sub`, `Mul`, `Div`, etc.) | | `string.py` | String functions (`Concat`, `Length`, `Upper`, etc.) | | `temporal.py` | Date/time functions (`DateAdd`, `DateDiff`, etc.) | | `aggregate.py` | Aggregate functions (`Count`, `Sum`, `Avg`, etc.) | | `array.py` | Array functions (`ArrayAgg`, `Explode`, etc.) | | `json.py` | JSON functions (`JSONExtract`, etc.) | | `functions.py` | Other functions (`Coalesce`, `If`, `Case`, `Cast`, etc.) | | `builders.py` | Builder helpers (`select()`, `from_()`, `condition()`, etc.) | **Backwards-compatibility:** `from sqlglot.expressions import *` and `from sqlglot import expressions as exp` still work, because everything is re-exported from `expressions/__init__.py`. However, if you were importing from `sqlglot.expressions` by relying on it being a single file (e.g., inspecting `__file__`), that will break. ### 3. `Parser.expression()` no longer accepts `**kwargs` This affects anyone subclassing `Parser` or calling `self.expression()` in custom parse methods. ```python # Before self.expression(exp.Select, distinct=True, expressions=cols) # After self.expression(exp.Select(distinct=True, expressions=cols)) ``` The expression instance is now constructed by the caller and passed directly. This eliminates `**kwargs` dict allocation overhead. ### 4. Scope traversal: `bfs` parameter removed The `bfs` parameter has been removed from all scope traversal functions. Traversal is now always depth-first (DFS). ```python # Before scope.walk(bfs=True) scope.find(exp.Column, bfs=False) walk_in_scope(expr, bfs=True) # After scope.walk() scope.find(exp.Column) walk_in_scope(expr) ``` **Behavioral change:** The old default was `bfs=True`. Now traversal is always DFS. Code that depended on BFS ordering from these functions will get results in a different order. Affected functions: `Scope.walk()`, `Scope.find()`, `Scope.find_all()`, `walk_in_scope()`, `find_in_scope()`, `find_all_in_scope()`. ### 5. Dialect metaclass no longer mutates Parser token sets Previously, the `_Dialect` metaclass dynamically modified parser token sets (`ID_VAR_TOKENS`, `TABLE_ALIAS_TOKENS`, `NO_PAREN_FUNCTIONS`) at class creation time based on dialect flags like `SUPPORTS_SEMI_ANTI_JOIN`. These mutations are removed — each parser now declares its token sets statically. - `Dialect.SUPPORTS_SEMI_ANTI_JOIN` has been removed. - `SHOW_TRIE` / `SET_TRIE` are no longer auto-computed from `SHOW_PARSERS` / `SET_PARSERS`. ### 6. Use `Expr` instead of `Expression` for generic `isinstance` checks Base classes like `Func`, `Condition`, `Binary`, and other traits now inherit from `Expr` directly, not from `Expression`. This means `isinstance(node, exp.Expression)` will **not** match these trait classes. If your code uses `isinstance` to check for "any AST node", switch to `exp.Expr`: ```python # Before isinstance(node, exp.Expression) # After isinstance(node, exp.Expr) ``` ### 7. Compiled classes cannot be subclassed (when using `[c]`) When `sqlglot[c]` is installed, many core classes are compiled via mypyc. **Compiled classes cannot be subclassed at runtime** — class definition succeeds, but instantiation raises `TypeError: interpreted classes cannot inherit from compiled`. **Affected classes (compiled):** | Class | Subclassable? | |---|---| | All parsers (`BigQueryParser`, `SnowflakeParser`, etc.) | No | | `Parser` (base) | No | | `Expression`, `Expr`, and all AST nodes (`Select`, `Column`, `Func`, etc.) | No | | `MappingSchema`, `AbstractMappingSchema` | No | | `Scope` | No | | Optimizer rules (`scope.py`, `qualify.py`, `qualify_columns.py`, etc.) | No | **Not compiled (still subclassable):** | Class | Subclassable? | |---|---| | `Generator` and all dialect generators | Yes | | `Tokenizer` and all dialect tokenizers | Yes | | `Dialect` and all dialect classes | Yes | If you need to subclass compiled classes (parsers, expressions, schema, etc.), install the pure Python version instead: ```bash pip install sqlglot # pure Python — full subclassing support pip install "sqlglot[c]" # compiled — faster, but no subclassing ``` ### :boom: BREAKING CHANGES - due to [`8ee0646`](https://github.com/tobymao/sqlglot/commit/8ee0646baa6dfae7e96ca86e2c1af5d53fc04290) - Transpile numeric literals with underscores from ClickHouse to other dialects *(PR [#7132](https://github.com/tobymao/sqlglot/pull/7132) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Transpile numeric literals with underscores from ClickHouse to other dialects (#7132) - due to [`81a3763`](https://github.com/tobymao/sqlglot/commit/81a37636c374690d12fe0b57d78adf2310daf3cb) - cast string literals to TIMESTAMP in TO_CHAR generation *(PR [#7127](https://github.com/tobymao/sqlglot/pull/7127) by [@marconae](https://github.com/marconae))*: cast string literals to TIMESTAMP in TO_CHAR generation (#7127) - due to [`dff662a`](https://github.com/tobymao/sqlglot/commit/dff662a1389bdfbe6c331ca31dd37f76a6353429) - add transpilation support for ARRAY_GENERATE_RANGE function *(PR [#7107](https://github.com/tobymao/sqlglot/pull/7107) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: add transpilation support for ARRAY_GENERATE_RANGE function (#7107) - due to [`8e6b61f`](https://github.com/tobymao/sqlglot/commit/8e6b61f18e465b24ad9e20a8e1509486177fbb32) - transpilation support MAP_DELETE *(PR [#7139](https://github.com/tobymao/sqlglot/pull/7139) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support MAP_DELETE (#7139) - due to [`351e958`](https://github.com/tobymao/sqlglot/commit/351e958c3cd8a1395826ef624979275b246490f8) - fix parsing error in json_extract for exasol *(PR [#7098](https://github.com/tobymao/sqlglot/pull/7098) by [@nnamdi16](https://github.com/nnamdi16))*: fix parsing error in json_extract for exasol (#7098) - due to [`1b1db57`](https://github.com/tobymao/sqlglot/commit/1b1db5728903d6468eba3d07da2a468395bf628b) - transpilation support MAP_SIZE *(PR [#7146](https://github.com/tobymao/sqlglot/pull/7146) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support MAP_SIZE (#7146) - due to [`0b4e26b`](https://github.com/tobymao/sqlglot/commit/0b4e26b8e7045bcf68992b6a2c8a5fd51b8262d7) - annotate EXTRACT(expr) for DuckDB *(PR [#7154](https://github.com/tobymao/sqlglot/pull/7154) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate EXTRACT(expr) for DuckDB (#7154) - due to [`95d7d20`](https://github.com/tobymao/sqlglot/commit/95d7d2052b1ed8fc64a99d557b8b085ad466100e) - annotate `TO_TIMESTAMP` as `TIMESTAMPTZ` fixes [#7155](https://github.com/tobymao/sqlglot/pull/7155) *(commit by [@georgesittas](https://github.com/georgesittas))*: annotate `TO_TIMESTAMP` as `TIMESTAMPTZ` fixes #7155 - due to [`7cc4332`](https://github.com/tobymao/sqlglot/commit/7cc43327ba72b3a1af6d8f2f489a97b997748ee9) - support transpilation of function RIGHT from Snowflake to DuckDB *(PR [#7148](https://github.com/tobymao/sqlglot/pull/7148) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of function RIGHT from Snowflake to DuckDB (#7148) - due to [`ad9d114`](https://github.com/tobymao/sqlglot/commit/ad9d114aea7f7553485631372d99ec5e5cf85045) - Enable transpilation for ARRAY_POSITION function *(PR [#7153](https://github.com/tobymao/sqlglot/pull/7153) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Enable transpilation for ARRAY_POSITION function (#7153) - due to [`684ff4a`](https://github.com/tobymao/sqlglot/commit/684ff4a13b1220fd5d3c0ec597cbdc630a3b9c03) - support arrayExcept for ClickHouse *(PR [#7161](https://github.com/tobymao/sqlglot/pull/7161) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support arrayExcept for ClickHouse (#7161) - due to [`74fd80c`](https://github.com/tobymao/sqlglot/commit/74fd80cdf055c828cedfac43b4b54132d18558bb) - split up expressions.py *(PR [#7160](https://github.com/tobymao/sqlglot/pull/7160) by [@tobymao](https://github.com/tobymao))*: split up expressions.py (#7160) - due to [`d5840c5`](https://github.com/tobymao/sqlglot/commit/d5840c53f6a359def002c0b634a48706519b11e7) - support transpilation of RANDOM from Snowflake to DuckDB *(PR [#7163](https://github.com/tobymao/sqlglot/pull/7163) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of RANDOM from Snowflake to DuckDB (#7163) - due to [`07ef171`](https://github.com/tobymao/sqlglot/commit/07ef1711fdff0fa8f877b93f5be8921424eed438) - Add support for multiple-suffix combined aggregate functions in Clickhouse dialect *(PR [#7109](https://github.com/tobymao/sqlglot/pull/7109) by [@emanb29](https://github.com/emanb29))*: Add support for multiple-suffix combined aggregate functions in Clickhouse dialect (#7109) - due to [`280e247`](https://github.com/tobymao/sqlglot/commit/280e24726be22a4f06261168d5dfc74b361dd04d) - Add transpilation support for NULLs and mutiset semantics in ARRAY_INTERSECTION *(PR [#7145](https://github.com/tobymao/sqlglot/pull/7145) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add transpilation support for NULLs and mutiset semantics in ARRAY_INTERSECTION (#7145) - due to [`542e392`](https://github.com/tobymao/sqlglot/commit/542e3920fb7d232b523e950820750e549a8d909a) - Add transpilation support for NULLs and mutiset semantics in ARRAY_INTERSECTION *(PR [#7145](https://github.com/tobymao/sqlglot/pull/7145) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add transpilation support for NULLs and mutiset semantics in ARRAY_INTERSECTION (#7145) - due to [`363167c`](https://github.com/tobymao/sqlglot/commit/363167c6609fa80b6014851b4415adca17b00df4) - parse FILE column *(PR [#7184](https://github.com/tobymao/sqlglot/pull/7184) by [@geooo109](https://github.com/geooo109))*: parse FILE column (#7184) - due to [`f630d75`](https://github.com/tobymao/sqlglot/commit/f630d7579231f29fa5637b48f1be0b5665eb36b3) - support dotcolon with JSON *(PR [#7191](https://github.com/tobymao/sqlglot/pull/7191) by [@geooo109](https://github.com/geooo109))*: support dotcolon with JSON (#7191) - due to [`29399bb`](https://github.com/tobymao/sqlglot/commit/29399bbed44a74d95257040fd36f0a0f6de7c7d8) - remove invalid group by distinct during custom transformation of group by all *(PR [#7197](https://github.com/tobymao/sqlglot/pull/7197) by [@nnamdi16](https://github.com/nnamdi16))*: remove invalid group by distinct during custom transformation of group by all (#7197) - due to [`e0947ad`](https://github.com/tobymao/sqlglot/commit/e0947adcaeb1e3cb829e584e0b071c598c64cfa9) - set default window frame for certain Snowflake ranking functions during transpilation *(PR [#7195](https://github.com/tobymao/sqlglot/pull/7195) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: set default window frame for certain Snowflake ranking functions during transpilation (#7195) - due to [`4a3254f`](https://github.com/tobymao/sqlglot/commit/4a3254fbd1bfee7aa6787d3fc31832d8e9771932) - transpilation support MAP_PICK *(PR [#7189](https://github.com/tobymao/sqlglot/pull/7189) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support MAP_PICK (#7189) - due to [`9431fd4`](https://github.com/tobymao/sqlglot/commit/9431fd457dc4dbad7a963828c5303889573079d3) - annotate CURRENT_TIMESTAMP for TSQL *(PR [#7208](https://github.com/tobymao/sqlglot/pull/7208) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate CURRENT_TIMESTAMP for TSQL (#7208) - due to [`4087a15`](https://github.com/tobymao/sqlglot/commit/4087a152c5d4372ab53644061980b81a9174db1e) - properly handle GENERATED ALWAYS/BY DEFAULT *(PR [#7210](https://github.com/tobymao/sqlglot/pull/7210) by [@anna-stepien](https://github.com/anna-stepien))*: properly handle GENERATED ALWAYS/BY DEFAULT (#7210) - due to [`a39d3e9`](https://github.com/tobymao/sqlglot/commit/a39d3e999e707114226edb806061b80e0164489a) - Implement transpilation for ARRAYS_OVERLAP function *(PR [#7200](https://github.com/tobymao/sqlglot/pull/7200) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Implement transpilation for ARRAYS_OVERLAP function (#7200) - due to [`ed5e179`](https://github.com/tobymao/sqlglot/commit/ed5e1792a30e5172620e263edcae65f2f892f55b) - Added tests for to_array *(PR [#7201](https://github.com/tobymao/sqlglot/pull/7201) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Added tests for to_array (#7201) - due to [`3ef5db9`](https://github.com/tobymao/sqlglot/commit/3ef5db9ca10527e237340cc24612d63d4d1bcf1b) - support DETACH *(PR [#7215](https://github.com/tobymao/sqlglot/pull/7215) by [@geooo109](https://github.com/geooo109))*: support DETACH (#7215) - due to [`b5f888e`](https://github.com/tobymao/sqlglot/commit/b5f888e403b20dc9729eb7f01f5cc5227f173ce2) - handle NULL discrepancy during transpilation of SPLIT from Snowflake to DuckDB *(PR [#7216](https://github.com/tobymao/sqlglot/pull/7216) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: handle NULL discrepancy during transpilation of SPLIT from Snowflake to DuckDB (#7216) - due to [`5599478`](https://github.com/tobymao/sqlglot/commit/55994785bae54dd37ffc40b085878d186780f033) - Compile base & BigQuery's parser with mypyc *(PR [#7206](https://github.com/tobymao/sqlglot/pull/7206) by [@VaggelisD](https://github.com/VaggelisD))*: Compile base & BigQuery's parser with mypyc (#7206) - due to [`3c02ea8`](https://github.com/tobymao/sqlglot/commit/3c02ea8a2c515d53e89c0d7455392a2b8fac2d8a) - handle empty separator for SPLIT transpilation (Snowflake -> Duckdb) *(PR [#7224](https://github.com/tobymao/sqlglot/pull/7224) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: handle empty separator for SPLIT transpilation (Snowflake -> Duckdb) (#7224) - due to [`c34bc2d`](https://github.com/tobymao/sqlglot/commit/c34bc2d17c2467b433d49e8e84611ec6acb39580) - transpilation support MAP_INSERT *(PR [#7190](https://github.com/tobymao/sqlglot/pull/7190) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support MAP_INSERT (#7190) - due to [`8256e08`](https://github.com/tobymao/sqlglot/commit/8256e08e55bb12ac3598e2b1f936e5ef380e2cf8) - Extract Spark parser for mypyc compilation *(PR [#7235](https://github.com/tobymao/sqlglot/pull/7235) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Spark parser for mypyc compilation (#7235) - due to [`2379947`](https://github.com/tobymao/sqlglot/commit/237994706746d6a294bfae0b413a0ec479645c2c) - Extract SingleStore parser for mypyc compilation *(PR [#7250](https://github.com/tobymao/sqlglot/pull/7250) by [@VaggelisD](https://github.com/VaggelisD))*: Extract SingleStore parser for mypyc compilation (#7250) - due to [`27b6f56`](https://github.com/tobymao/sqlglot/commit/27b6f56a3871a0d2152e3eb26ac26dd56a4b5ff3) - Extract Doris parser for mypyc compilation *(PR [#7249](https://github.com/tobymao/sqlglot/pull/7249) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Doris parser for mypyc compilation (#7249) - due to [`5c8b003`](https://github.com/tobymao/sqlglot/commit/5c8b0037e2a6a8ccd5588234bb591d42904e8a02) - Extract StarRocks parser for mypyc compilation *(PR [#7248](https://github.com/tobymao/sqlglot/pull/7248) by [@VaggelisD](https://github.com/VaggelisD))*: Extract StarRocks parser for mypyc compilation (#7248) - due to [`669bc3f`](https://github.com/tobymao/sqlglot/commit/669bc3f7711253d4ecf044c2ec956c0f38a74463) - Extract Materialize parser for mypyc compilation *(PR [#7247](https://github.com/tobymao/sqlglot/pull/7247) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Materialize parser for mypyc compilation (#7247) - due to [`5b51c64`](https://github.com/tobymao/sqlglot/commit/5b51c64a0d620469e696fcb7a2e86915b7f9a925) - Extract RisingWave parser for mypyc compilation *(PR [#7246](https://github.com/tobymao/sqlglot/pull/7246) by [@VaggelisD](https://github.com/VaggelisD))*: Extract RisingWave parser for mypyc compilation (#7246) - due to [`6982d44`](https://github.com/tobymao/sqlglot/commit/6982d442ecebbeb38ee3cda1ab530b813dcff988) - Extract Solr parser for mypyc compilation *(PR [#7244](https://github.com/tobymao/sqlglot/pull/7244) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Solr parser for mypyc compilation (#7244) - due to [`a57b632`](https://github.com/tobymao/sqlglot/commit/a57b632c512ff8be65629ea30dc9cd0fe69cb1d3) - Extract Redshift parser for mypyc compilation *(PR [#7245](https://github.com/tobymao/sqlglot/pull/7245) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Redshift parser for mypyc compilation (#7245) - due to [`364fca7`](https://github.com/tobymao/sqlglot/commit/364fca74025eab10f8ba34f2498f9545321e8a3f) - Extract Tableau parser for mypyc compilation *(PR [#7243](https://github.com/tobymao/sqlglot/pull/7243) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Tableau parser for mypyc compilation (#7243) - due to [`ab6331f`](https://github.com/tobymao/sqlglot/commit/ab6331fc920636e47c6bb03825086642d9425b77) - Extract SQLite parser for mypyc compilation *(PR [#7240](https://github.com/tobymao/sqlglot/pull/7240) by [@VaggelisD](https://github.com/VaggelisD))*: Extract SQLite parser for mypyc compilation (#7240) - due to [`df6f052`](https://github.com/tobymao/sqlglot/commit/df6f05268e15f5de1685552ea87c69d2f5bd48c6) - Extract Drill parser for mypyc compilation *(PR [#7242](https://github.com/tobymao/sqlglot/pull/7242) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Drill parser for mypyc compilation (#7242) - due to [`125ea72`](https://github.com/tobymao/sqlglot/commit/125ea7216a01f0d5288517b1aa7203c43b4e737a) - Extract Dremio parser for mypyc compilation *(PR [#7241](https://github.com/tobymao/sqlglot/pull/7241) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Dremio parser for mypyc compilation (#7241) - due to [`a485038`](https://github.com/tobymao/sqlglot/commit/a485038a64dd560de093153b23c43f33526d4bf1) - Extract Exasol parser for mypyc compilation *(PR [#7239](https://github.com/tobymao/sqlglot/pull/7239) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Exasol parser for mypyc compilation (#7239) - due to [`113447f`](https://github.com/tobymao/sqlglot/commit/113447f587891d431af063fa4f18f021033bcb88) - Extract PRQL parser for mypyc compilation *(PR [#7238](https://github.com/tobymao/sqlglot/pull/7238) by [@VaggelisD](https://github.com/VaggelisD))*: Extract PRQL parser for mypyc compilation (#7238) - due to [`0872db7`](https://github.com/tobymao/sqlglot/commit/0872db71adc1da464d0d7022dbb58b064bd96abb) - Extract Teradata parser for mypyc compilation *(PR [#7237](https://github.com/tobymao/sqlglot/pull/7237) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Teradata parser for mypyc compilation (#7237) - due to [`0fe1afd`](https://github.com/tobymao/sqlglot/commit/0fe1afd39a5105bc9406ecac0ea78c26254d7f9e) - Extract Oracle parser for mypyc compilation *(PR [#7236](https://github.com/tobymao/sqlglot/pull/7236) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Oracle parser for mypyc compilation (#7236) - due to [`47e75e5`](https://github.com/tobymao/sqlglot/commit/47e75e5bb29c6ecd142ca4c51e27ac4ed9996f74) - Extract Snowflake parser for mypyc compilation *(PR [#7229](https://github.com/tobymao/sqlglot/pull/7229) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Snowflake parser for mypyc compilation (#7229) - due to [`ea035e1`](https://github.com/tobymao/sqlglot/commit/ea035e15793c047df58af9f9a901c7b7c8ed07e2) - Rename Parser to Parser and auto-discover parsers in setup.py *(PR [#7252](https://github.com/tobymao/sqlglot/pull/7252) by [@VaggelisD](https://github.com/VaggelisD))*: Rename Parser to Parser and auto-discover parsers in setup.py (#7252) - due to [`e8d0dab`](https://github.com/tobymao/sqlglot/commit/e8d0dabf10d4afe22f2277d46cf931c39409063e) - Extract Databricks parser for mypyc compilation *(PR [#7253](https://github.com/tobymao/sqlglot/pull/7253) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Databricks parser for mypyc compilation (#7253) - due to [`8c799ad`](https://github.com/tobymao/sqlglot/commit/8c799adf70cc0136f1c6647c3212239c83f6cbe1) - Extract Fabric parser for mypyc compilation *(PR [#7254](https://github.com/tobymao/sqlglot/pull/7254) by [@VaggelisD](https://github.com/VaggelisD))*: Extract Fabric parser for mypyc compilation (#7254) - due to [`b155a29`](https://github.com/tobymao/sqlglot/commit/b155a29574fbc57720637343529623c02b0db43a) - expression to not use **kwargs because it allocates multiple dicts *(PR [#7256](https://github.com/tobymao/sqlglot/pull/7256) by [@tobymao](https://github.com/tobymao))*: expression to not use **kwargs because it allocates multiple dicts (#7256) - due to [`17c2fc7`](https://github.com/tobymao/sqlglot/commit/17c2fc774fda32eb2f1c1baed354db10d4d11e3d) - JSON path with brackets containing non literals *(PR [#7251](https://github.com/tobymao/sqlglot/pull/7251) by [@geooo109](https://github.com/geooo109))*: JSON path with brackets containing non literals (#7251) - due to [`3d7bbb5`](https://github.com/tobymao/sqlglot/commit/3d7bbb5fd2689aed1b2f659abd8d1b18db421104) - parse single-arg TO_{GEOMETRY_GEOGRAPHY} as Cast *(PR [#7270](https://github.com/tobymao/sqlglot/pull/7270) by [@georgesittas](https://github.com/georgesittas))*: parse single-arg TO_{GEOMETRY_GEOGRAPHY} as Cast (#7270) - due to [`5a9a522`](https://github.com/tobymao/sqlglot/commit/5a9a52212f6ba975e1f671b712a89befe9c2d606) - support transpilation of SPLIT_PART from snowflake to duckdb *(PR [#7258](https://github.com/tobymao/sqlglot/pull/7258) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of SPLIT_PART from snowflake to duckdb (#7258) - due to [`a13995e`](https://github.com/tobymao/sqlglot/commit/a13995e7c3441bcd8442b8a210cadd2055199492) - Move `ParserCore` back to `Parser` *(PR [#7268](https://github.com/tobymao/sqlglot/pull/7268) by [@VaggelisD](https://github.com/VaggelisD))*: Move `ParserCore` back to `Parser` (#7268) - due to [`07f6893`](https://github.com/tobymao/sqlglot/commit/07f68932e39f148c267ce1b12086b85d4d485bf7) - Fully compile schema *(PR [#7276](https://github.com/tobymao/sqlglot/pull/7276) by [@VaggelisD](https://github.com/VaggelisD))*: Fully compile schema (#7276) - due to [`79d72db`](https://github.com/tobymao/sqlglot/commit/79d72dbd2c770c53f9ec9c36dd67ae63860ba4fe) - Transpilation support for to_variant *(PR [#7262](https://github.com/tobymao/sqlglot/pull/7262) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Transpilation support for to_variant (#7262) - due to [`3f94428`](https://github.com/tobymao/sqlglot/commit/3f94428507d7207ca99e76cebef9375bd3648f4d) - Transpilation support for HASH_AGG *(commit by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Transpilation support for HASH_AGG - due to [`1bd952b`](https://github.com/tobymao/sqlglot/commit/1bd952bfc68926405e5fe7efb1bb833dd6dd6dfb) - Transpilation support for HASH_AGG *(commit by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Transpilation support for HASH_AGG - due to [`bfa4818`](https://github.com/tobymao/sqlglot/commit/bfa48188e45601491f981c4cea8a4bd8a2c6a0a2) - Implement transpilation for ARRAY_SORT function *(PR [#7223](https://github.com/tobymao/sqlglot/pull/7223) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Implement transpilation for ARRAY_SORT function (#7223) - due to [`4b64898`](https://github.com/tobymao/sqlglot/commit/4b648985a166ac091b8a46af1590caf59f9bc31b) - robust support for IGNORE NULLS *(PR [#7288](https://github.com/tobymao/sqlglot/pull/7288) by [@geooo109](https://github.com/geooo109))*: robust support for IGNORE NULLS (#7288) ### :sparkles: New Features - [`8ee0646`](https://github.com/tobymao/sqlglot/commit/8ee0646baa6dfae7e96ca86e2c1af5d53fc04290) - **clickhouse**: Transpile numeric literals with underscores from ClickHouse to other dialects *(PR [#7132](https://github.com/tobymao/sqlglot/pull/7132) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`68da927`](https://github.com/tobymao/sqlglot/commit/68da927d3418eccfe261c22c6e39d8742f52f01c) - **exasol**: support REGEXP_LIKE binary predicate *(PR [#7136](https://github.com/tobymao/sqlglot/pull/7136) by [@marconae](https://github.com/marconae))* - [`1b5a7d7`](https://github.com/tobymao/sqlglot/commit/1b5a7d7e2a65665bb3cb0e8efc11062bf891ee28) - **exasol**: transpile FROM_UNIXTIME to FROM_POSIX_TIME *(PR [#7133](https://github.com/tobymao/sqlglot/pull/7133) by [@marconae](https://github.com/marconae))* - [`dff662a`](https://github.com/tobymao/sqlglot/commit/dff662a1389bdfbe6c331ca31dd37f76a6353429) - **duckdb**: add transpilation support for ARRAY_GENERATE_RANGE function *(PR [#7107](https://github.com/tobymao/sqlglot/pull/7107) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8e6b61f`](https://github.com/tobymao/sqlglot/commit/8e6b61f18e465b24ad9e20a8e1509486177fbb32) - **snowflake**: transpilation support MAP_DELETE *(PR [#7139](https://github.com/tobymao/sqlglot/pull/7139) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`483791d`](https://github.com/tobymao/sqlglot/commit/483791d66302a463041094c668421a1d4cc8061e) - **clickhouse**: support nested JSON subcolumn access *(PR [#7140](https://github.com/tobymao/sqlglot/pull/7140) by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog))* - [`260f116`](https://github.com/tobymao/sqlglot/commit/260f116df06a4e5e7eb81f87713b84e551e012c8) - **clickhouse**: support JSON type arguments *(PR [#7141](https://github.com/tobymao/sqlglot/pull/7141) by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog))* - [`1b1db57`](https://github.com/tobymao/sqlglot/commit/1b1db5728903d6468eba3d07da2a468395bf628b) - **snowflake**: transpilation support MAP_SIZE *(PR [#7146](https://github.com/tobymao/sqlglot/pull/7146) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`0b4e26b`](https://github.com/tobymao/sqlglot/commit/0b4e26b8e7045bcf68992b6a2c8a5fd51b8262d7) - **optimizer**: annotate EXTRACT(expr) for DuckDB *(PR [#7154](https://github.com/tobymao/sqlglot/pull/7154) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b2e736c`](https://github.com/tobymao/sqlglot/commit/b2e736cd4ae1dd949b5ac59ae263599f8d6f259c) - postgres -> sqlite transpilation improvements *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c91e8d9`](https://github.com/tobymao/sqlglot/commit/c91e8d9dd8a9cdcfaec791144326cf6038edf414) - **clickhouse**: ANY/ALL joins *(PR [#7157](https://github.com/tobymao/sqlglot/pull/7157) by [@geooo109](https://github.com/geooo109))* - [`7cc4332`](https://github.com/tobymao/sqlglot/commit/7cc43327ba72b3a1af6d8f2f489a97b997748ee9) - **duckdb**: support transpilation of function RIGHT from Snowflake to DuckDB *(PR [#7148](https://github.com/tobymao/sqlglot/pull/7148) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`63f2981`](https://github.com/tobymao/sqlglot/commit/63f29812e41e1d7d4bb3de349f33c42ee8103498) - **trino**: Add support for ARRAY_FIRST(array, x -> predicate) *(PR [#7147](https://github.com/tobymao/sqlglot/pull/7147) by [@gertjanal](https://github.com/gertjanal))* - [`d76dc36`](https://github.com/tobymao/sqlglot/commit/d76dc36a0c2b15ff795725f1a71431ba247eda96) - **clickhouse**: add support for sql security property *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ad9d114`](https://github.com/tobymao/sqlglot/commit/ad9d114aea7f7553485631372d99ec5e5cf85045) - **DuckDB**: Enable transpilation for ARRAY_POSITION function *(PR [#7153](https://github.com/tobymao/sqlglot/pull/7153) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`684ff4a`](https://github.com/tobymao/sqlglot/commit/684ff4a13b1220fd5d3c0ec597cbdc630a3b9c03) - **clickhouse**: support arrayExcept for ClickHouse *(PR [#7161](https://github.com/tobymao/sqlglot/pull/7161) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b97da8d`](https://github.com/tobymao/sqlglot/commit/b97da8dea15c39867b23db01624464e7d15427e7) - **spark, dbx**: robust SET support *(PR [#7166](https://github.com/tobymao/sqlglot/pull/7166) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#7165](https://github.com/tobymao/sqlglot/issues/7165) opened by [@aersam](https://github.com/aersam)* - [`c74049c`](https://github.com/tobymao/sqlglot/commit/c74049c2c573522c398b3e179a09a7b49b52f54f) - **databricks**: Add support for HANDLER and PARAMETER STYLE properties *(PR [#7150](https://github.com/tobymao/sqlglot/pull/7150) by [@aersam](https://github.com/aersam))* - [`d5840c5`](https://github.com/tobymao/sqlglot/commit/d5840c53f6a359def002c0b634a48706519b11e7) - **duckdb**: support transpilation of RANDOM from Snowflake to DuckDB *(PR [#7163](https://github.com/tobymao/sqlglot/pull/7163) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`07ef171`](https://github.com/tobymao/sqlglot/commit/07ef1711fdff0fa8f877b93f5be8921424eed438) - **Clickhouse**: Add support for multiple-suffix combined aggregate functions in Clickhouse dialect *(PR [#7109](https://github.com/tobymao/sqlglot/pull/7109) by [@emanb29](https://github.com/emanb29))* - [`e18a24e`](https://github.com/tobymao/sqlglot/commit/e18a24e108976910e55b77d863fa4b5eeb622684) - **exasol**: Custom Transformation of GROUP BY ALL in exasol dialect *(PR [#7151](https://github.com/tobymao/sqlglot/pull/7151) by [@nnamdi16](https://github.com/nnamdi16))* - [`280e247`](https://github.com/tobymao/sqlglot/commit/280e24726be22a4f06261168d5dfc74b361dd04d) - **duckdb**: Add transpilation support for NULLs and mutiset semantics in ARRAY_INTERSECTION *(PR [#7145](https://github.com/tobymao/sqlglot/pull/7145) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`542e392`](https://github.com/tobymao/sqlglot/commit/542e3920fb7d232b523e950820750e549a8d909a) - **duckdb**: Add transpilation support for NULLs and mutiset semantics in ARRAY_INTERSECTION *(PR [#7145](https://github.com/tobymao/sqlglot/pull/7145) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ff61214`](https://github.com/tobymao/sqlglot/commit/ff61214ab3f3ee48a413020867144a7f4d0b39c3) - **databricks**: Add support for DECLARE OR REPLACE *(PR [#7169](https://github.com/tobymao/sqlglot/pull/7169) by [@aersam](https://github.com/aersam))* - :arrow_lower_right: *addresses issue [#7168](https://github.com/tobymao/sqlglot/issues/7168) opened by [@aersam](https://github.com/aersam)* - [`90e8cab`](https://github.com/tobymao/sqlglot/commit/90e8cab0a54c225698f1cfa33d0ddbc92793fa15) - **clickhouse**: support robust ASSUME/CHECK constraints *(PR [#7170](https://github.com/tobymao/sqlglot/pull/7170) by [@geooo109](https://github.com/geooo109))* - [`4273991`](https://github.com/tobymao/sqlglot/commit/427399151aa69ff934c802b3452bd4ad4f7010e7) - **clickhouse**: support DROP with SYNC *(PR [#7172](https://github.com/tobymao/sqlglot/pull/7172) by [@geooo109](https://github.com/geooo109))* - [`0c5c3f1`](https://github.com/tobymao/sqlglot/commit/0c5c3f17637df659004036c37c569ad42da21dd4) - **duckdb**: support GROUPS for WINDOW *(PR [#7185](https://github.com/tobymao/sqlglot/pull/7185) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#7176](https://github.com/tobymao/sqlglot/issues/7176) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`af05677`](https://github.com/tobymao/sqlglot/commit/af05677fe08a2830bb0a18587f0c0f8dfbc5b9bd) - make Query and DerivedTable inherit from Selectable *(commit by [@tobymao](https://github.com/tobymao))* - [`cab0f24`](https://github.com/tobymao/sqlglot/commit/cab0f24ad212eea592f532628f4fc10489fc32f7) - compile scope for mypy *(PR [#7192](https://github.com/tobymao/sqlglot/pull/7192) by [@tobymao](https://github.com/tobymao))* - [`8db0323`](https://github.com/tobymao/sqlglot/commit/8db03233f88f106f0339e53237a5f054c4e61b3a) - compile qualify and resolver *(PR [#7193](https://github.com/tobymao/sqlglot/pull/7193) by [@tobymao](https://github.com/tobymao))* - [`3de5d29`](https://github.com/tobymao/sqlglot/commit/3de5d29ef5fd19e2f1ac0b2681e29e8a6c6fbd48) - **duckdb**: Add transpilation support for ARRAY_SLICE function *(PR [#7188](https://github.com/tobymao/sqlglot/pull/7188) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`e0947ad`](https://github.com/tobymao/sqlglot/commit/e0947adcaeb1e3cb829e584e0b071c598c64cfa9) - **Snowflake**: set default window frame for certain Snowflake ranking functions during transpilation *(PR [#7195](https://github.com/tobymao/sqlglot/pull/7195) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`4a3254f`](https://github.com/tobymao/sqlglot/commit/4a3254fbd1bfee7aa6787d3fc31832d8e9771932) - **snowflake**: transpilation support MAP_PICK *(PR [#7189](https://github.com/tobymao/sqlglot/pull/7189) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`9431fd4`](https://github.com/tobymao/sqlglot/commit/9431fd457dc4dbad7a963828c5303889573079d3) - **optimizer**: annotate CURRENT_TIMESTAMP for TSQL *(PR [#7208](https://github.com/tobymao/sqlglot/pull/7208) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`cc8724d`](https://github.com/tobymao/sqlglot/commit/cc8724d791938e440c8d72d72c4ec301ca02eee9) - **clickhouse**: support cityHash64 *(PR [#7209](https://github.com/tobymao/sqlglot/pull/7209) by [@geooo109](https://github.com/geooo109))* - [`a39d3e9`](https://github.com/tobymao/sqlglot/commit/a39d3e999e707114226edb806061b80e0164489a) - **duckdb**: Implement transpilation for ARRAYS_OVERLAP function *(PR [#7200](https://github.com/tobymao/sqlglot/pull/7200) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`3ef5db9`](https://github.com/tobymao/sqlglot/commit/3ef5db9ca10527e237340cc24612d63d4d1bcf1b) - **clickhouse**: support DETACH *(PR [#7215](https://github.com/tobymao/sqlglot/pull/7215) by [@geooo109](https://github.com/geooo109))* - [`b5f888e`](https://github.com/tobymao/sqlglot/commit/b5f888e403b20dc9729eb7f01f5cc5227f173ce2) - **Snowflake**: handle NULL discrepancy during transpilation of SPLIT from Snowflake to DuckDB *(PR [#7216](https://github.com/tobymao/sqlglot/pull/7216) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`5599478`](https://github.com/tobymao/sqlglot/commit/55994785bae54dd37ffc40b085878d186780f033) - Compile base & BigQuery's parser with mypyc *(PR [#7206](https://github.com/tobymao/sqlglot/pull/7206) by [@VaggelisD](https://github.com/VaggelisD))* - [`319f359`](https://github.com/tobymao/sqlglot/commit/319f3591e53013b972c57057c65479d7024a4388) - **tsql**: Move parser out and enable compilation *(PR [#7221](https://github.com/tobymao/sqlglot/pull/7221) by [@VaggelisD](https://github.com/VaggelisD))* - [`3c02ea8`](https://github.com/tobymao/sqlglot/commit/3c02ea8a2c515d53e89c0d7455392a2b8fac2d8a) - **snowflake**: handle empty separator for SPLIT transpilation (Snowflake -> Duckdb) *(PR [#7224](https://github.com/tobymao/sqlglot/pull/7224) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`c34bc2d`](https://github.com/tobymao/sqlglot/commit/c34bc2d17c2467b433d49e8e84611ec6acb39580) - **snowflake**: transpilation support MAP_INSERT *(PR [#7190](https://github.com/tobymao/sqlglot/pull/7190) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4ae6c3d`](https://github.com/tobymao/sqlglot/commit/4ae6c3d8fe774c57647491572b764a4c2767704a) - fast path for tokenizer *(PR [#7226](https://github.com/tobymao/sqlglot/pull/7226) by [@tobymao](https://github.com/tobymao))* - [`5a9a522`](https://github.com/tobymao/sqlglot/commit/5a9a52212f6ba975e1f671b712a89befe9c2d606) - **Snowflake**: support transpilation of SPLIT_PART from snowflake to duckdb *(PR [#7258](https://github.com/tobymao/sqlglot/pull/7258) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`dc4a7ef`](https://github.com/tobymao/sqlglot/commit/dc4a7ef7a1739ecc6b4ff8ff1ab018d61d7bcffa) - support multi-word DESCRIBE kinds in Snowflake *(PR [#7260](https://github.com/tobymao/sqlglot/pull/7260) by [@sabir-akhadov-localstack](https://github.com/sabir-akhadov-localstack))* - [`f15b8b0`](https://github.com/tobymao/sqlglot/commit/f15b8b018ddb0aa1bd26683c439ab2d063ca9cd8) - transpile postgres GREATEST(x,y) to MAX in sqlite *(PR [#7274](https://github.com/tobymao/sqlglot/pull/7274) by [@treysp](https://github.com/treysp))* - [`79d72db`](https://github.com/tobymao/sqlglot/commit/79d72dbd2c770c53f9ec9c36dd67ae63860ba4fe) - **snowflake**: Transpilation support for to_variant *(PR [#7262](https://github.com/tobymao/sqlglot/pull/7262) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`bfa4818`](https://github.com/tobymao/sqlglot/commit/bfa48188e45601491f981c4cea8a4bd8a2c6a0a2) - **duckdb**: Implement transpilation for ARRAY_SORT function *(PR [#7223](https://github.com/tobymao/sqlglot/pull/7223) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ### :bug: Bug Fixes - [`81a3763`](https://github.com/tobymao/sqlglot/commit/81a37636c374690d12fe0b57d78adf2310daf3cb) - **exasol**: cast string literals to TIMESTAMP in TO_CHAR generation *(PR [#7127](https://github.com/tobymao/sqlglot/pull/7127) by [@marconae](https://github.com/marconae))* - [`351e958`](https://github.com/tobymao/sqlglot/commit/351e958c3cd8a1395826ef624979275b246490f8) - **exasol**: fix parsing error in json_extract for exasol *(PR [#7098](https://github.com/tobymao/sqlglot/pull/7098) by [@nnamdi16](https://github.com/nnamdi16))* - [`95d7d20`](https://github.com/tobymao/sqlglot/commit/95d7d2052b1ed8fc64a99d557b8b085ad466100e) - **redshift**: annotate `TO_TIMESTAMP` as `TIMESTAMPTZ` fixes [#7155](https://github.com/tobymao/sqlglot/pull/7155) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`50d58a5`](https://github.com/tobymao/sqlglot/commit/50d58a5843cfc02f11acf0fe28b2ad7c9a59e252) - **parser**: support more DICTIONARY properties *(PR [#7158](https://github.com/tobymao/sqlglot/pull/7158) by [@geooo109](https://github.com/geooo109))* - [`363167c`](https://github.com/tobymao/sqlglot/commit/363167c6609fa80b6014851b4415adca17b00df4) - **duckdb**: parse FILE column *(PR [#7184](https://github.com/tobymao/sqlglot/pull/7184) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7175](https://github.com/tobymao/sqlglot/issues/7175) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`baa9974`](https://github.com/tobymao/sqlglot/commit/baa9974b8042eaef7897537772b1002c30e503b8) - **duckdb**: fix IGNORE NULLS in AGG FUNC *(PR [#7187](https://github.com/tobymao/sqlglot/pull/7187) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7179](https://github.com/tobymao/sqlglot/issues/7179) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`f630d75`](https://github.com/tobymao/sqlglot/commit/f630d7579231f29fa5637b48f1be0b5665eb36b3) - **clickhouse**: support dotcolon with JSON *(PR [#7191](https://github.com/tobymao/sqlglot/pull/7191) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7183](https://github.com/tobymao/sqlglot/issues/7183) opened by [@telperions](https://github.com/telperions)* - [`eea5880`](https://github.com/tobymao/sqlglot/commit/eea58807411edf962a1cdc28e02337a428866665) - **snowflake**: suppor positional `GENERATOR` args *(commit by [@georgesittas](https://github.com/georgesittas))* - [`29399bb`](https://github.com/tobymao/sqlglot/commit/29399bbed44a74d95257040fd36f0a0f6de7c7d8) - **exasol**: remove invalid group by distinct during custom transformation of group by all *(PR [#7197](https://github.com/tobymao/sqlglot/pull/7197) by [@nnamdi16](https://github.com/nnamdi16))* - [`3ee3db5`](https://github.com/tobymao/sqlglot/commit/3ee3db59de23de0bbaed85c8779b6663b435a1e9) - **clickhouse**: support nested field for INSERT *(PR [#7199](https://github.com/tobymao/sqlglot/pull/7199) by [@geooo109](https://github.com/geooo109))* - [`81bc810`](https://github.com/tobymao/sqlglot/commit/81bc8102a7fbb33188869694800e8db9ae84541a) - **clickhouse**: make ArrayDistinct transpilable *(commit by [@timoha](https://github.com/timoha))* - [`6b21d7a`](https://github.com/tobymao/sqlglot/commit/6b21d7a5932afdc6ba117c808203777db9f329a2) - **clickhouse**: empty brackets handling *(PR [#7211](https://github.com/tobymao/sqlglot/pull/7211) by [@geooo109](https://github.com/geooo109))* - [`4087a15`](https://github.com/tobymao/sqlglot/commit/4087a152c5d4372ab53644061980b81a9174db1e) - **databricks**: properly handle GENERATED ALWAYS/BY DEFAULT *(PR [#7210](https://github.com/tobymao/sqlglot/pull/7210) by [@anna-stepien](https://github.com/anna-stepien))* - [`37b0f9f`](https://github.com/tobymao/sqlglot/commit/37b0f9f6b897d4289d87dfd78d42ca3df64870ba) - **spark, dbx**: support RECURSIVE ctes *(PR [#7214](https://github.com/tobymao/sqlglot/pull/7214) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7213](https://github.com/tobymao/sqlglot/issues/7213) opened by [@aersam](https://github.com/aersam)* - [`e990b67`](https://github.com/tobymao/sqlglot/commit/e990b671120ce66036ffdc4c65e5db8b36eaec5f) - **redshift**: avoid warning for IGNORE/RESPECT NULLS *(PR [#7222](https://github.com/tobymao/sqlglot/pull/7222) by [@geooo109](https://github.com/geooo109))* - [`172e399`](https://github.com/tobymao/sqlglot/commit/172e399f6211f73708ba9e423cf90c9ff83ffba3) - **hive, spark, dbx**: dash in json path *(PR [#7257](https://github.com/tobymao/sqlglot/pull/7257) by [@geooo109](https://github.com/geooo109))* - [`17c2fc7`](https://github.com/tobymao/sqlglot/commit/17c2fc774fda32eb2f1c1baed354db10d4d11e3d) - **snowflake**: JSON path with brackets containing non literals *(PR [#7251](https://github.com/tobymao/sqlglot/pull/7251) by [@geooo109](https://github.com/geooo109))* - [`fd87f53`](https://github.com/tobymao/sqlglot/commit/fd87f53f7ab5d68904555e9fae0a025c134faeaf) - **duckdb**: transpile DATE_TRUNC from bigquery *(PR [#7263](https://github.com/tobymao/sqlglot/pull/7263) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7261](https://github.com/tobymao/sqlglot/issues/7261) opened by [@MaxHalford](https://github.com/MaxHalford)* - [`3d7bbb5`](https://github.com/tobymao/sqlglot/commit/3d7bbb5fd2689aed1b2f659abd8d1b18db421104) - **snowflake**: parse single-arg TO_{GEOMETRY_GEOGRAPHY} as Cast *(PR [#7270](https://github.com/tobymao/sqlglot/pull/7270) by [@georgesittas](https://github.com/georgesittas))* - [`3e16da6`](https://github.com/tobymao/sqlglot/commit/3e16da64c0c32bd970b2ab5fd5ff2fdc3a134feb) - Make unit silently failing to move .so *(PR [#7272](https://github.com/tobymao/sqlglot/pull/7272) by [@VaggelisD](https://github.com/VaggelisD))* - [`c3eb8e2`](https://github.com/tobymao/sqlglot/commit/c3eb8e22c8ed83029a38640ae569d829c7258d0d) - **optimizer**: `qualify_tables` overwriting FQN alias mapping for duplicate tables *(PR [#7278](https://github.com/tobymao/sqlglot/pull/7278) by [@cg-roling](https://github.com/cg-roling))* - [`0527315`](https://github.com/tobymao/sqlglot/commit/0527315a23dc44c736de80fa753125da764e95ba) - Drop schema with if exists displacing catalog *(PR [#7285](https://github.com/tobymao/sqlglot/pull/7285) by [@themisvaltinos](https://github.com/themisvaltinos))* - [`a2964dd`](https://github.com/tobymao/sqlglot/commit/a2964ddb88ac518a57e19c620358e7e4a03e611e) - **duckdb**: complete ARRAY_SORT transpilation logic *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6d6250d`](https://github.com/tobymao/sqlglot/commit/6d6250db9b8bf257d86a59266c48a02fe10cd1b3) - **bigquery**: no warning for window funcs with NULL order *(PR [#7280](https://github.com/tobymao/sqlglot/pull/7280) by [@geooo109](https://github.com/geooo109))* - [`4b64898`](https://github.com/tobymao/sqlglot/commit/4b648985a166ac091b8a46af1590caf59f9bc31b) - **hive, spark**: robust support for IGNORE NULLS *(PR [#7288](https://github.com/tobymao/sqlglot/pull/7288) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7282](https://github.com/tobymao/sqlglot/issues/7282) opened by [@catlynkong](https://github.com/catlynkong)* - [`4cb1a7f`](https://github.com/tobymao/sqlglot/commit/4cb1a7faf3e5a431ea482f34e197485a1611d8c4) - handle single-element `RANGE` during duckdb->spark transpilation *(PR [#7294](https://github.com/tobymao/sqlglot/pull/7294) by [@ShubhamKapoor992](https://github.com/ShubhamKapoor992))* - :arrow_lower_right: *fixes issue [#7291](https://github.com/tobymao/sqlglot/issues/7291) opened by [@huydo862003](https://github.com/huydo862003)* - [`002bdaf`](https://github.com/tobymao/sqlglot/commit/002bdafbd5315d7e7fa67903478ed888fb1b1229) - don't warn for sqlglotrs if sqlglotc is found *(PR [#7290](https://github.com/tobymao/sqlglot/pull/7290) by [@rolandwalker](https://github.com/rolandwalker))* - [`0eb5aae`](https://github.com/tobymao/sqlglot/commit/0eb5aae8e348774ae5f12bd7e0140da6f8e16da4) - **optimizer**: add forward-reference guard to pushdown_dnf, fixing cycle error *(PR [#7299](https://github.com/tobymao/sqlglot/pull/7299) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#7297](https://github.com/tobymao/sqlglot/issues/7297) opened by [@snovik75](https://github.com/snovik75)* - [`6f471f1`](https://github.com/tobymao/sqlglot/commit/6f471f1bfb466b32f04c814a0beb6fa23e045eff) - unnest_subqueries crashes when correlated subquery is inside a function in SELECT *(PR [#7300](https://github.com/tobymao/sqlglot/pull/7300) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#7295](https://github.com/tobymao/sqlglot/issues/7295) opened by [@snovik75](https://github.com/snovik75)* ### :zap: Performance Improvements - [`489ba1d`](https://github.com/tobymao/sqlglot/commit/489ba1dd0b6024f2d876b474b451b5e6fa8d27de) - **parser**: fast path for column reference parsing *(PR [#7293](https://github.com/tobymao/sqlglot/pull/7293) by [@tobymao](https://github.com/tobymao))* ### :recycle: Refactors - [`74fd80c`](https://github.com/tobymao/sqlglot/commit/74fd80cdf055c828cedfac43b4b54132d18558bb) - split up expressions.py *(PR [#7160](https://github.com/tobymao/sqlglot/pull/7160) by [@tobymao](https://github.com/tobymao))* - [`8256e08`](https://github.com/tobymao/sqlglot/commit/8256e08e55bb12ac3598e2b1f936e5ef380e2cf8) - Extract Spark parser for mypyc compilation *(PR [#7235](https://github.com/tobymao/sqlglot/pull/7235) by [@VaggelisD](https://github.com/VaggelisD))* - [`2379947`](https://github.com/tobymao/sqlglot/commit/237994706746d6a294bfae0b413a0ec479645c2c) - Extract SingleStore parser for mypyc compilation *(PR [#7250](https://github.com/tobymao/sqlglot/pull/7250) by [@VaggelisD](https://github.com/VaggelisD))* - [`27b6f56`](https://github.com/tobymao/sqlglot/commit/27b6f56a3871a0d2152e3eb26ac26dd56a4b5ff3) - Extract Doris parser for mypyc compilation *(PR [#7249](https://github.com/tobymao/sqlglot/pull/7249) by [@VaggelisD](https://github.com/VaggelisD))* - [`5c8b003`](https://github.com/tobymao/sqlglot/commit/5c8b0037e2a6a8ccd5588234bb591d42904e8a02) - Extract StarRocks parser for mypyc compilation *(PR [#7248](https://github.com/tobymao/sqlglot/pull/7248) by [@VaggelisD](https://github.com/VaggelisD))* - [`669bc3f`](https://github.com/tobymao/sqlglot/commit/669bc3f7711253d4ecf044c2ec956c0f38a74463) - Extract Materialize parser for mypyc compilation *(PR [#7247](https://github.com/tobymao/sqlglot/pull/7247) by [@VaggelisD](https://github.com/VaggelisD))* - [`5b51c64`](https://github.com/tobymao/sqlglot/commit/5b51c64a0d620469e696fcb7a2e86915b7f9a925) - Extract RisingWave parser for mypyc compilation *(PR [#7246](https://github.com/tobymao/sqlglot/pull/7246) by [@VaggelisD](https://github.com/VaggelisD))* - [`6982d44`](https://github.com/tobymao/sqlglot/commit/6982d442ecebbeb38ee3cda1ab530b813dcff988) - Extract Solr parser for mypyc compilation *(PR [#7244](https://github.com/tobymao/sqlglot/pull/7244) by [@VaggelisD](https://github.com/VaggelisD))* - [`a57b632`](https://github.com/tobymao/sqlglot/commit/a57b632c512ff8be65629ea30dc9cd0fe69cb1d3) - Extract Redshift parser for mypyc compilation *(PR [#7245](https://github.com/tobymao/sqlglot/pull/7245) by [@VaggelisD](https://github.com/VaggelisD))* - [`364fca7`](https://github.com/tobymao/sqlglot/commit/364fca74025eab10f8ba34f2498f9545321e8a3f) - Extract Tableau parser for mypyc compilation *(PR [#7243](https://github.com/tobymao/sqlglot/pull/7243) by [@VaggelisD](https://github.com/VaggelisD))* - [`ab6331f`](https://github.com/tobymao/sqlglot/commit/ab6331fc920636e47c6bb03825086642d9425b77) - Extract SQLite parser for mypyc compilation *(PR [#7240](https://github.com/tobymao/sqlglot/pull/7240) by [@VaggelisD](https://github.com/VaggelisD))* - [`df6f052`](https://github.com/tobymao/sqlglot/commit/df6f05268e15f5de1685552ea87c69d2f5bd48c6) - Extract Drill parser for mypyc compilation *(PR [#7242](https://github.com/tobymao/sqlglot/pull/7242) by [@VaggelisD](https://github.com/VaggelisD))* - [`125ea72`](https://github.com/tobymao/sqlglot/commit/125ea7216a01f0d5288517b1aa7203c43b4e737a) - Extract Dremio parser for mypyc compilation *(PR [#7241](https://github.com/tobymao/sqlglot/pull/7241) by [@VaggelisD](https://github.com/VaggelisD))* - [`a485038`](https://github.com/tobymao/sqlglot/commit/a485038a64dd560de093153b23c43f33526d4bf1) - Extract Exasol parser for mypyc compilation *(PR [#7239](https://github.com/tobymao/sqlglot/pull/7239) by [@VaggelisD](https://github.com/VaggelisD))* - [`113447f`](https://github.com/tobymao/sqlglot/commit/113447f587891d431af063fa4f18f021033bcb88) - Extract PRQL parser for mypyc compilation *(PR [#7238](https://github.com/tobymao/sqlglot/pull/7238) by [@VaggelisD](https://github.com/VaggelisD))* - [`0872db7`](https://github.com/tobymao/sqlglot/commit/0872db71adc1da464d0d7022dbb58b064bd96abb) - Extract Teradata parser for mypyc compilation *(PR [#7237](https://github.com/tobymao/sqlglot/pull/7237) by [@VaggelisD](https://github.com/VaggelisD))* - [`0fe1afd`](https://github.com/tobymao/sqlglot/commit/0fe1afd39a5105bc9406ecac0ea78c26254d7f9e) - Extract Oracle parser for mypyc compilation *(PR [#7236](https://github.com/tobymao/sqlglot/pull/7236) by [@VaggelisD](https://github.com/VaggelisD))* - [`47e75e5`](https://github.com/tobymao/sqlglot/commit/47e75e5bb29c6ecd142ca4c51e27ac4ed9996f74) - Extract Snowflake parser for mypyc compilation *(PR [#7229](https://github.com/tobymao/sqlglot/pull/7229) by [@VaggelisD](https://github.com/VaggelisD))* - [`ea035e1`](https://github.com/tobymao/sqlglot/commit/ea035e15793c047df58af9f9a901c7b7c8ed07e2) - Rename Parser to Parser and auto-discover parsers in setup.py *(PR [#7252](https://github.com/tobymao/sqlglot/pull/7252) by [@VaggelisD](https://github.com/VaggelisD))* - [`e8d0dab`](https://github.com/tobymao/sqlglot/commit/e8d0dabf10d4afe22f2277d46cf931c39409063e) - Extract Databricks parser for mypyc compilation *(PR [#7253](https://github.com/tobymao/sqlglot/pull/7253) by [@VaggelisD](https://github.com/VaggelisD))* - [`8c799ad`](https://github.com/tobymao/sqlglot/commit/8c799adf70cc0136f1c6647c3212239c83f6cbe1) - Extract Fabric parser for mypyc compilation *(PR [#7254](https://github.com/tobymao/sqlglot/pull/7254) by [@VaggelisD](https://github.com/VaggelisD))* - [`b155a29`](https://github.com/tobymao/sqlglot/commit/b155a29574fbc57720637343529623c02b0db43a) - expression to not use **kwargs because it allocates multiple dicts *(PR [#7256](https://github.com/tobymao/sqlglot/pull/7256) by [@tobymao](https://github.com/tobymao))* - [`07f6893`](https://github.com/tobymao/sqlglot/commit/07f68932e39f148c267ce1b12086b85d4d485bf7) - Fully compile schema *(PR [#7276](https://github.com/tobymao/sqlglot/pull/7276) by [@VaggelisD](https://github.com/VaggelisD))* ### :white_check_mark: Tests - [`ed5e179`](https://github.com/tobymao/sqlglot/commit/ed5e1792a30e5172620e263edcae65f2f892f55b) - **snowflake**: Added tests for to_array *(PR [#7201](https://github.com/tobymao/sqlglot/pull/7201) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`3f94428`](https://github.com/tobymao/sqlglot/commit/3f94428507d7207ca99e76cebef9375bd3648f4d) - **snowflake**: Transpilation support for HASH_AGG *(commit by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`1bd952b`](https://github.com/tobymao/sqlglot/commit/1bd952bfc68926405e5fe7efb1bb833dd6dd6dfb) - **snowflake**: Transpilation support for HASH_AGG *(commit by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* ### :wrench: Chores - [`76c7eeb`](https://github.com/tobymao/sqlglot/commit/76c7eeb7657a5435f1086c16317927b2780c6ad9) - cleanup makefile *(commit by [@tobymao](https://github.com/tobymao))* - [`588e565`](https://github.com/tobymao/sqlglot/commit/588e5650467f5cb9903ac1fa5ece59c3bfb42c8e) - **exasol**: DAYS_BETWEEN tests *(PR [#7135](https://github.com/tobymao/sqlglot/pull/7135) by [@marconae](https://github.com/marconae))* - [`ea424bf`](https://github.com/tobymao/sqlglot/commit/ea424bf865aa4d7bbca62b834f6994c05232fdf0) - **tokenizer**: Replace SPACE_CHARS with str.isspace *(PR [#7134](https://github.com/tobymao/sqlglot/pull/7134) by [@VaggelisD](https://github.com/VaggelisD))* - [`e1c26c5`](https://github.com/tobymao/sqlglot/commit/e1c26c5425a602030e5fefd5fe76614081fe3991) - keep api-docs in sync with main, only add docs/ on top *(PR [#7137](https://github.com/tobymao/sqlglot/pull/7137) by [@georgesittas](https://github.com/georgesittas))* - [`f69a152`](https://github.com/tobymao/sqlglot/commit/f69a152d51666abd832a497299db1fefdfaa3ec9) - add tests for transpiling LAG from snowflake to duckdb *(PR [#7138](https://github.com/tobymao/sqlglot/pull/7138) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`842462e`](https://github.com/tobymao/sqlglot/commit/842462e2fc36f7d69ecc4989d0c8d4600e013c61) - move ruff config to pyproject.toml *(PR [#7149](https://github.com/tobymao/sqlglot/pull/7149) by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog))* - [`4a955cb`](https://github.com/tobymao/sqlglot/commit/4a955cb993ae24d91a5094b7849ceb60ab963419) - scan all modules under sqlglot/ for doc tests *(PR [#7162](https://github.com/tobymao/sqlglot/pull/7162) by [@georgesittas](https://github.com/georgesittas))* - [`3a930da`](https://github.com/tobymao/sqlglot/commit/3a930dad611743a4b6b1d647e27af2324db4f755) - add integration test automations *(PR [#7167](https://github.com/tobymao/sqlglot/pull/7167) by [@georgesittas](https://github.com/georgesittas))* - [`151f961`](https://github.com/tobymao/sqlglot/commit/151f961f6575e9cdd54a1cab3dad66fa2a12774c) - update parse_one description *(PR [#7181](https://github.com/tobymao/sqlglot/pull/7181) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#7173](https://github.com/tobymao/sqlglot/issues/7173) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`ab8331c`](https://github.com/tobymao/sqlglot/commit/ab8331c67dd43ee2add85520507211203e85817f) - consolidate `SecurityProperty` and `SqlSecurityProperty` *(PR [#7174](https://github.com/tobymao/sqlglot/pull/7174) by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog))* - [`4f5abdb`](https://github.com/tobymao/sqlglot/commit/4f5abdb3fb0cfcaaca9763ced38a7920c2eab548) - refactor SQL SECURITY property location logic in MySQL *(PR [#7186](https://github.com/tobymao/sqlglot/pull/7186) by [@georgesittas](https://github.com/georgesittas))* - [`a3fecc5`](https://github.com/tobymao/sqlglot/commit/a3fecc5dbc827c0873cd83cfc8388b081bcec75b) - qol improvements to integration test workflow *(PR [#7198](https://github.com/tobymao/sqlglot/pull/7198) by [@georgesittas](https://github.com/georgesittas))* - [`95f6c35`](https://github.com/tobymao/sqlglot/commit/95f6c354b06482d202e496b3d7df3e1dd9bbdcaf) - minor refactor for snowflake window gen *(commit by [@geooo109](https://github.com/geooo109))* - [`175360e`](https://github.com/tobymao/sqlglot/commit/175360ecf17c839c67e6dc83a7da97823a406e0f) - clickhouse test ARRAY_DISTINCT refactor *(commit by [@geooo109](https://github.com/geooo109))* - [`55594ed`](https://github.com/tobymao/sqlglot/commit/55594edfa998f4174570268d88595f08a7078c66) - cleanup MAP_PICK tests in duckdb *(commit by [@geooo109](https://github.com/geooo109))* - [`c341a3d`](https://github.com/tobymao/sqlglot/commit/c341a3d7351e619d8935866611ffb23a32de24f4) - **test**: add test for SPLIT transpilation from Snowflake to DuckDB *(PR [#7212](https://github.com/tobymao/sqlglot/pull/7212) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`03d2a4b`](https://github.com/tobymao/sqlglot/commit/03d2a4bf87efed98df98927cca705cc9dcfc9d76) - cleanup duckdb ARRAY_OVERLAPS *(commit by [@geooo109](https://github.com/geooo109))* - [`7a74228`](https://github.com/tobymao/sqlglot/commit/7a7422896e75c9c260c3737982597438f5128e0b) - duckdb tests style *(commit by [@geooo109](https://github.com/geooo109))* - [`7df5bd4`](https://github.com/tobymao/sqlglot/commit/7df5bd487d942eeee3f6cf1ab26777405ce90b94) - **perf**: lineage cte memoization *(PR [#7207](https://github.com/tobymao/sqlglot/pull/7207) by [@treff7es](https://github.com/treff7es))* - [`0787c74`](https://github.com/tobymao/sqlglot/commit/0787c74f5351d7eeb825eba75e6112241aeb1d98) - remove read-only flag in lineage, always use shared refs *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7f3a18b`](https://github.com/tobymao/sqlglot/commit/7f3a18bcda811a5c259d6524e64eea2ed78ea83a) - **parser**: Add backwards compatibility tests *(PR [#7255](https://github.com/tobymao/sqlglot/pull/7255) by [@VaggelisD](https://github.com/VaggelisD))* - [`fa42909`](https://github.com/tobymao/sqlglot/commit/fa4290956b922f173042170b7ff85fd29eb1f0d4) - do not run integration tests on make unit *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6da7d90`](https://github.com/tobymao/sqlglot/commit/6da7d9061508c2b04ae81a6de2af82736879da38) - replace greek characters in `DATETIME_DELTA` type variable fixes [#7264](https://github.com/tobymao/sqlglot/pull/7264) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a13995e`](https://github.com/tobymao/sqlglot/commit/a13995e7c3441bcd8442b8a210cadd2055199492) - **parser**: Move `ParserCore` back to `Parser` *(PR [#7268](https://github.com/tobymao/sqlglot/pull/7268) by [@VaggelisD](https://github.com/VaggelisD))* - [`4efd393`](https://github.com/tobymao/sqlglot/commit/4efd393b143fddc1e8a6481e36c333d89c0025a4) - update benchmarks *(commit by [@tobymao](https://github.com/tobymao))* - [`20ebebf`](https://github.com/tobymao/sqlglot/commit/20ebebf40805e25f7e2496326b595480e10b7a32) - get rid of unused `read_only` kwarg in lineage *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b8cbea0`](https://github.com/tobymao/sqlglot/commit/b8cbea02713989174a2bad32a17a85d763539f58) - make rs depend on sqlglotc *(commit by [@tobymao](https://github.com/tobymao))* - [`d5e6d96`](https://github.com/tobymao/sqlglot/commit/d5e6d965288c0929e0a4ef9a9db292fb28bbf3d1) - clean up VARIANT tests for duckdb and sf *(commit by [@geooo109](https://github.com/geooo109))* - [`ea76ca5`](https://github.com/tobymao/sqlglot/commit/ea76ca55405efdb09a1ea0dbb2d02d1892260b71) - unpin ruff *(PR [#7287](https://github.com/tobymao/sqlglot/pull/7287) by [@georgesittas](https://github.com/georgesittas))* ## [v29.0.1] - 2026-02-23 ### :boom: BREAKING CHANGES - due to [`fdfdfb1`](https://github.com/tobymao/sqlglot/commit/fdfdfb1703f1f408ad01453147e3d269f0911fef) - support GET_CURRENT_TIME() for DuckDB *(PR [#7126](https://github.com/tobymao/sqlglot/pull/7126) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support GET_CURRENT_TIME() for DuckDB (#7126) ### :sparkles: New Features - [`fdfdfb1`](https://github.com/tobymao/sqlglot/commit/fdfdfb1703f1f408ad01453147e3d269f0911fef) - **duckdb**: support GET_CURRENT_TIME() for DuckDB *(PR [#7126](https://github.com/tobymao/sqlglot/pull/7126) by [@AbhishekASLK](https://github.com/AbhishekASLK))* ### :wrench: Chores - [`21a2a57`](https://github.com/tobymao/sqlglot/commit/21a2a5773717f675963dddbdba3df9343da60abe) - actually emit warning *(commit by [@tobymao](https://github.com/tobymao))* ## [v29.0.0] - 2026-02-23 ### :boom: BREAKING CHANGES - due to [`c8ddcc3`](https://github.com/tobymao/sqlglot/commit/c8ddcc383bab07b807ed1d6b6f9bef91417e43c1) - Annotate COLLATION(expr) for Spark/DBX *(PR [#6957](https://github.com/tobymao/sqlglot/pull/6957) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate COLLATION(expr) for Spark/DBX (#6957) - due to [`fc4b332`](https://github.com/tobymao/sqlglot/commit/fc4b3326a14a1b42bc954914ce43b8dad7ef23b2) - Annotate BITMAP_COUNT(expr) for Spark/DBX *(PR [#6956](https://github.com/tobymao/sqlglot/pull/6956) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate BITMAP_COUNT(expr) for Spark/DBX (#6956) - due to [`8725010`](https://github.com/tobymao/sqlglot/commit/87250100eb2a1d2c206a26cce276f7babec0e409) - add exp.Trunc for numeric truncation *(PR [#6923](https://github.com/tobymao/sqlglot/pull/6923) by [@doripo](https://github.com/doripo))*: add exp.Trunc for numeric truncation (#6923) - due to [`1418494`](https://github.com/tobymao/sqlglot/commit/1418494f777358f4b6bd1e05ee5cb02591d92c74) - Annotate FORMAT_STRING(expr) for Spark/DBX *(PR [#6962](https://github.com/tobymao/sqlglot/pull/6962) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate FORMAT_STRING(expr) for Spark/DBX (#6962) - due to [`37fa84e`](https://github.com/tobymao/sqlglot/commit/37fa84e389b6bcbc94326d3defb4664d0826fb3f) - support `CURRENT_VERSION()` transpilation for Spark *(PR [#6964](https://github.com/tobymao/sqlglot/pull/6964) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support `CURRENT_VERSION()` transpilation for Spark (#6964) - due to [`51d3ebd`](https://github.com/tobymao/sqlglot/commit/51d3ebdca83e114449590d9f337ae6902659a8b4) - transpile `CURRENT_VERSION()` to MySQL *(PR [#6965](https://github.com/tobymao/sqlglot/pull/6965) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: transpile `CURRENT_VERSION()` to MySQL (#6965) - due to [`9008553`](https://github.com/tobymao/sqlglot/commit/90085534eb8863f588003bdf65d96771729889aa) - transpile CURRENT_VERSION() to ClickHouse, Postgres, Trino, Redshift *(PR [#6966](https://github.com/tobymao/sqlglot/pull/6966) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: transpile CURRENT_VERSION() to ClickHouse, Postgres, Trino, Redshift (#6966) - due to [`e8b379e`](https://github.com/tobymao/sqlglot/commit/e8b379eb67d034f829d2fd50daefea2a98b83976) - Map SQLITE_VERSION() to exp.CurrentVersion expression *(PR [#6967](https://github.com/tobymao/sqlglot/pull/6967) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Map SQLITE_VERSION() to exp.CurrentVersion expression (#6967) - due to [`dfd299f`](https://github.com/tobymao/sqlglot/commit/dfd299fcfaf7a61d13b073e7b59d6bdd0748c7b8) - Annotate `RANDSTR(expr)` for Spark/DBX *(PR [#6971](https://github.com/tobymao/sqlglot/pull/6971) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `RANDSTR(expr)` for Spark/DBX (#6971) - due to [`969e45d`](https://github.com/tobymao/sqlglot/commit/969e45d3ba1db25f4561b122b9401b5608356f58) - Annotate REPEAT(expr) for Hive, Spark, DBX *(PR [#6974](https://github.com/tobymao/sqlglot/pull/6974) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate REPEAT(expr) for Hive, Spark, DBX (#6974) - due to [`ade3639`](https://github.com/tobymao/sqlglot/commit/ade3639b337d0222a00feec7ac9762571586f7ab) - transpilation support current_database *(PR [#6973](https://github.com/tobymao/sqlglot/pull/6973) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support current_database (#6973) - due to [`57093d1`](https://github.com/tobymao/sqlglot/commit/57093d15d5bbc2217366ace42db109e215dca79f) - Annotate `OVERLAY(expr)` for Spark/DBX *(PR [#6970](https://github.com/tobymao/sqlglot/pull/6970) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `OVERLAY(expr)` for Spark/DBX (#6970) - due to [`f5b2328`](https://github.com/tobymao/sqlglot/commit/f5b23281b6829bace426808f0a55e73590b70bbd) - Annotate RIGHT(expr) for Spark/DBX *(PR [#6980](https://github.com/tobymao/sqlglot/pull/6980) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate RIGHT(expr) for Spark/DBX (#6980) - due to [`61a0d3f`](https://github.com/tobymao/sqlglot/commit/61a0d3f05be478dd4552e6559b6781891d4a3447) - add support for JAROWINKLER_SIMILARITY *(PR [#6977](https://github.com/tobymao/sqlglot/pull/6977) by [@kyle-cheung](https://github.com/kyle-cheung))*: add support for JAROWINKLER_SIMILARITY (#6977) - due to [`9d1f4e0`](https://github.com/tobymao/sqlglot/commit/9d1f4e0ea6f8b66b022a5263320275ed43efb5f3) - transpilation support current_schema *(PR [#6976](https://github.com/tobymao/sqlglot/pull/6976) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support current_schema (#6976) - due to [`0d345aa`](https://github.com/tobymao/sqlglot/commit/0d345aafd037b047808716dfdb60cc554d47941d) - Annotate REPLACE(expr) for Hive, Spark and DBX *(PR [#6975](https://github.com/tobymao/sqlglot/pull/6975) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate REPLACE(expr) for Hive, Spark and DBX (#6975) - due to [`19f9000`](https://github.com/tobymao/sqlglot/commit/19f900031c9abe26bebb541e8907ca263454055c) - transpilation support current_version *(PR [#6960](https://github.com/tobymao/sqlglot/pull/6960) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support current_version (#6960) - due to [`f103a16`](https://github.com/tobymao/sqlglot/commit/f103a166aca95da726ac9281816181e53b916dc3) - support parsing `VERSION()` for ClickHouse *(PR [#6986](https://github.com/tobymao/sqlglot/pull/6986) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support parsing `VERSION()` for ClickHouse (#6986) - due to [`2751c8f`](https://github.com/tobymao/sqlglot/commit/2751c8ff1d6c4acc1a0d407e601d572886ceffc3) - parse support for `VERSION()` *(PR [#6985](https://github.com/tobymao/sqlglot/pull/6985) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: parse support for `VERSION()` (#6985) - due to [`37db91c`](https://github.com/tobymao/sqlglot/commit/37db91c5cea14488654a2b69aceab13b6c6a98b7) - Annotate `REVERSE(expr)` for Hive, Spark and DBX *(PR [#6979](https://github.com/tobymao/sqlglot/pull/6979) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `REVERSE(expr)` for Hive, Spark and DBX (#6979) - due to [`f4d53a2`](https://github.com/tobymao/sqlglot/commit/f4d53a2d0f9aadf7fc63e484d64821cddf5d1f17) - support parsing VERSION() for Postgres/Redshift *(PR [#6987](https://github.com/tobymao/sqlglot/pull/6987) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support parsing VERSION() for Postgres/Redshift (#6987) - due to [`71bb0c3`](https://github.com/tobymao/sqlglot/commit/71bb0c3f7947c1959160c8b401354ceddee2e8ce) - support for version() for trino *(PR [#6988](https://github.com/tobymao/sqlglot/pull/6988) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support for version() for trino (#6988) - due to [`fbca704`](https://github.com/tobymao/sqlglot/commit/fbca7040cd3ae9eb0bc599b5ce656724fccafab1) - Annotate SPLIT(expr) for Hive/Spark/DBX *(PR [#6990](https://github.com/tobymao/sqlglot/pull/6990) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SPLIT(expr) for Hive/Spark/DBX (#6990) - due to [`264e95f`](https://github.com/tobymao/sqlglot/commit/264e95f04d95f2cd7bcf255ee7ae160db36882a7) - Move TRANSLATE(expr) annotator to base *(PR [#6992](https://github.com/tobymao/sqlglot/pull/6992) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Move TRANSLATE(expr) annotator to base (#6992) - due to [`eb8ad51`](https://github.com/tobymao/sqlglot/commit/eb8ad518142dc91e25d37310cb9cbfa33c44fe34) - Annotate FILTER(expr, func) for Spark/DBX *(PR [#6995](https://github.com/tobymao/sqlglot/pull/6995) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate FILTER(expr, func) for Spark/DBX (#6995) - due to [`12e0869`](https://github.com/tobymao/sqlglot/commit/12e0869ff6820b35884e189b2b4f29aef56c3a51) - annotate CURRENT_TIMESTAMP for MySQL *(PR [#7004](https://github.com/tobymao/sqlglot/pull/7004) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate CURRENT_TIMESTAMP for MySQL (#7004) - due to [`cbdad37`](https://github.com/tobymao/sqlglot/commit/cbdad3762dd6935d75405a0c33a1656cab8c2d1e) - support CURTIME() for MySQL/SingleStore *(PR [#7005](https://github.com/tobymao/sqlglot/pull/7005) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support CURTIME() for MySQL/SingleStore (#7005) - due to [`d16cc62`](https://github.com/tobymao/sqlglot/commit/d16cc62ea8342bec91092f8c7cf2504364581a7e) - annotate ADD_MONTH(expr) for Hive/Spark/DBX *(PR [#7003](https://github.com/tobymao/sqlglot/pull/7003) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate ADD_MONTH(expr) for Hive/Spark/DBX (#7003) - due to [`6381c48`](https://github.com/tobymao/sqlglot/commit/6381c4825c1929da56363035be2c4ae7a90336dd) - support NOW() for exasol *(PR [#7006](https://github.com/tobymao/sqlglot/pull/7006) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support NOW() for exasol (#7006) - due to [`d9ea168`](https://github.com/tobymao/sqlglot/commit/d9ea1683a98252ad43948be32fbd7cf77d17b67c) - annotate CURRENT_USER to base *(PR [#7007](https://github.com/tobymao/sqlglot/pull/7007) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate CURRENT_USER to base (#7007) - due to [`7a2a777`](https://github.com/tobymao/sqlglot/commit/7a2a777fb8c215b51436942645965792257b8dc9) - annotate FROM_UTC_TIMESTAMP(expr) for Spark/DBX *(PR [#7008](https://github.com/tobymao/sqlglot/pull/7008) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate FROM_UTC_TIMESTAMP(expr) for Spark/DBX (#7008) - due to [`8be32fd`](https://github.com/tobymao/sqlglot/commit/8be32fde55c8d256e73fd504246f695bf550f4cb) - support MAKE_TIMESTAMP(expr) for Spark/DBX *(PR [#7009](https://github.com/tobymao/sqlglot/pull/7009) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support MAKE_TIMESTAMP(expr) for Spark/DBX (#7009) - due to [`b951d74`](https://github.com/tobymao/sqlglot/commit/b951d740a934a8f46ce2c96caf7d8ae80b61604c) - annotate NEXT_DAY(expr) for Hive/Spark/DBX *(PR [#7010](https://github.com/tobymao/sqlglot/pull/7010) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate NEXT_DAY(expr) for Hive/Spark/DBX (#7010) - due to [`404797a`](https://github.com/tobymao/sqlglot/commit/404797acfb1a9f860bd87880fecacd79cb1b2161) - Move `CURRENT_SCHEMA()` to Base Annotator *(PR [#7021](https://github.com/tobymao/sqlglot/pull/7021) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Move `CURRENT_SCHEMA()` to Base Annotator (#7021) - due to [`994d3a3`](https://github.com/tobymao/sqlglot/commit/994d3a37983791d4e8484d6d39b819a1cff2f774) - robust STAR with EXCLUDE (redshift) *(PR [#6972](https://github.com/tobymao/sqlglot/pull/6972) by [@geooo109](https://github.com/geooo109))*: robust STAR with EXCLUDE (redshift) (#6972) - due to [`3ea80fb`](https://github.com/tobymao/sqlglot/commit/3ea80fb86482e257c4565ab7876dd6cdd60a7be2) - annotate REVERSE(str) for DuckDB *(PR [#7018](https://github.com/tobymao/sqlglot/pull/7018) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate REVERSE(str) for DuckDB (#7018) - due to [`352fb94`](https://github.com/tobymao/sqlglot/commit/352fb94c46e5dd0dfd824b4472b03cccf21d3f56) - annotate ISODOW(expr) for DuckDB *(PR [#7016](https://github.com/tobymao/sqlglot/pull/7016) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate ISODOW(expr) for DuckDB (#7016) - due to [`c2e5954`](https://github.com/tobymao/sqlglot/commit/c2e59545d8030b1d2e7859631c9d75ea0f6df883) - annotate COUNTIF(expr) for DuckDB *(PR [#7012](https://github.com/tobymao/sqlglot/pull/7012) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate COUNTIF(expr) for DuckDB (#7012) - due to [`e35ee14`](https://github.com/tobymao/sqlglot/commit/e35ee143b08f671175e730e602e9a5dcd9155fde) - annotate CountIf(expr) for ClickHouse *(PR [#7013](https://github.com/tobymao/sqlglot/pull/7013) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate CountIf(expr) for ClickHouse (#7013) - due to [`b73be3e`](https://github.com/tobymao/sqlglot/commit/b73be3e8c95b44f2cd71498592bd4b5b63ba02d9) - support `today()` for duckdb *(PR [#7015](https://github.com/tobymao/sqlglot/pull/7015) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support `today()` for duckdb (#7015) - due to [`36e3310`](https://github.com/tobymao/sqlglot/commit/36e3310959260a7b1124a60589cdc90a3e631624) - support current_schema as no_param *(PR [#7000](https://github.com/tobymao/sqlglot/pull/7000) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support current_schema as no_param (#7000) - due to [`fd8860b`](https://github.com/tobymao/sqlglot/commit/fd8860b8b8d5e0c29c53597485c656923375e1d9) - annotate FORMAT(expr) for DuckDB *(PR [#7017](https://github.com/tobymao/sqlglot/pull/7017) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate FORMAT(expr) for DuckDB (#7017) - due to [`eecdfa1`](https://github.com/tobymao/sqlglot/commit/eecdfa1b15ac1808f93107d2ad6a51f52ffaf7cc) - Annotate `DAYOFWEEK(expr)`, `DAYOFMONTH(expr)` for Hive/Spark/DBX *(PR [#6996](https://github.com/tobymao/sqlglot/pull/6996) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `DAYOFWEEK(expr)`, `DAYOFMONTH(expr)` for Hive/Spark/DBX (#6996) - due to [`4fc26f0`](https://github.com/tobymao/sqlglot/commit/4fc26f086701cebb3d3974b762d12e1435f4a195) - annotate `TIME_BUCKET(expr)` for DuckDB *(PR [#7014](https://github.com/tobymao/sqlglot/pull/7014) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate `TIME_BUCKET(expr)` for DuckDB (#7014) - due to [`00e6d9a`](https://github.com/tobymao/sqlglot/commit/00e6d9af02971aad6e7102cae3af2a7192fa7070) - annotate `UNIX_DATE(expr)` for Spark/DBX *(PR [#7011](https://github.com/tobymao/sqlglot/pull/7011) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate `UNIX_DATE(expr)` for Spark/DBX (#7011) - due to [`86a5509`](https://github.com/tobymao/sqlglot/commit/86a5509bfcb8df6a8cf8b0971d9d12ae3204f2af) - support user for exasol *(PR [#7001](https://github.com/tobymao/sqlglot/pull/7001) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support user for exasol (#7001) - due to [`b27c163`](https://github.com/tobymao/sqlglot/commit/b27c163fcce0a4b0a4f75d131cdc105353e95464) - support `CURDATE` for Exasol *(PR [#6999](https://github.com/tobymao/sqlglot/pull/6999) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support `CURDATE` for Exasol (#6999) - due to [`47dc558`](https://github.com/tobymao/sqlglot/commit/47dc5589f8b165d9f0296e6aa48de337f556f1a4) - annotate ARRAY_COMPACT(expr) for Spark/DBX *(PR [#7034](https://github.com/tobymao/sqlglot/pull/7034) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate ARRAY_COMPACT(expr) for Spark/DBX (#7034) - due to [`9d7db06`](https://github.com/tobymao/sqlglot/commit/9d7db06cf8ef66583f11b6d54af573bb28f4434b) - Generator for ARRAY_INSERT(expr) *(PR [#7036](https://github.com/tobymao/sqlglot/pull/7036) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Generator for ARRAY_INSERT(expr) (#7036) - due to [`235b8ac`](https://github.com/tobymao/sqlglot/commit/235b8ac24d41324239d6581b5636ad19ec7b9376) - annotate `ARRAY_INTERSECT(expr)` for Hive/Spark/DBX *(PR [#7037](https://github.com/tobymao/sqlglot/pull/7037) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate `ARRAY_INTERSECT(expr)` for Hive/Spark/DBX (#7037) - due to [`f476d07`](https://github.com/tobymao/sqlglot/commit/f476d071a1412fb2d9cd6f39067380252ab4c15a) - update transpilation of SEQ functions and GENERATOR for DuckDB *(PR [#7029](https://github.com/tobymao/sqlglot/pull/7029) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: update transpilation of SEQ functions and GENERATOR for DuckDB (#7029) - due to [`643a6c7`](https://github.com/tobymao/sqlglot/commit/643a6c7d97292eb29ba1bac6523747e161544a1a) - Transpilation support for Snowflake REGEXP_LIKE to DuckDB *(PR [#7030](https://github.com/tobymao/sqlglot/pull/7030) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation support for Snowflake REGEXP_LIKE to DuckDB (#7030) - due to [`b1f0542`](https://github.com/tobymao/sqlglot/commit/b1f05428d8a4d441398c0f3d4a65b49b0eda2729) - tokenizer optimizations *(PR [#7038](https://github.com/tobymao/sqlglot/pull/7038) by [@geooo109](https://github.com/geooo109))*: tokenizer optimizations (#7038) - due to [`70d6f2b`](https://github.com/tobymao/sqlglot/commit/70d6f2b5d8c0f37550ba7a288c5f7f7021c66bd7) - annotate `ARRAY_INSERT(expr)` for Spark/DBX *(PR [#7044](https://github.com/tobymao/sqlglot/pull/7044) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate `ARRAY_INSERT(expr)` for Spark/DBX (#7044) - due to [`3019d0a`](https://github.com/tobymao/sqlglot/commit/3019d0a0110a503b68b2a3cf7f93be1000f20a40) - Map BIT_GET to GETBIT for Spark/DBX *(PR [#7041](https://github.com/tobymao/sqlglot/pull/7041) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Map BIT_GET to GETBIT for Spark/DBX (#7041) - due to [`27ae429`](https://github.com/tobymao/sqlglot/commit/27ae42987864949f74b784d3dbb063bd3450e0dc) - transpile BIT_COUNT to DuckDB *(PR [#7039](https://github.com/tobymao/sqlglot/pull/7039) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: transpile BIT_COUNT to DuckDB (#7039) - due to [`f6cbd27`](https://github.com/tobymao/sqlglot/commit/f6cbd27fd61937efba6879f20f5ff0239e678469) - Add support for trigger DDL statements *(PR [#6978](https://github.com/tobymao/sqlglot/pull/6978) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add support for trigger DDL statements (#6978) - due to [`749cf18`](https://github.com/tobymao/sqlglot/commit/749cf18dbcb8d1d0c7d144e6481ecc3a443d4a0e) - require original SQL in `Parser.parse` *(PR [#7045](https://github.com/tobymao/sqlglot/pull/7045) by [@georgesittas](https://github.com/georgesittas))*: require original SQL in `Parser.parse` (#7045) - due to [`f9d1f73`](https://github.com/tobymao/sqlglot/commit/f9d1f73b490e6694da0e800d9a5a70e1ba7f38d5) - refactor colon (extract) parsing precedence *(PR [#7046](https://github.com/tobymao/sqlglot/pull/7046) by [@georgesittas](https://github.com/georgesittas))*: refactor colon (extract) parsing precedence (#7046) - due to [`c5939c1`](https://github.com/tobymao/sqlglot/commit/c5939c12c6816437f5abda3322f99cc597b1616c) - Map curdate to current_date for Spark/DBX *(PR [#7048](https://github.com/tobymao/sqlglot/pull/7048) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Map curdate to current_date for Spark/DBX (#7048) - due to [`58419e1`](https://github.com/tobymao/sqlglot/commit/58419e1f47119a135276a722b85ccfa92ae3d1f1) - move `SessionUser` to base *(PR [#7049](https://github.com/tobymao/sqlglot/pull/7049) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: move `SessionUser` to base (#7049) - due to [`13ee312`](https://github.com/tobymao/sqlglot/commit/13ee31281fe21c670f8ce1656520c18762e8402f) - use `IntEnum` instead of auto/string enums *(PR [#7050](https://github.com/tobymao/sqlglot/pull/7050) by [@georgesittas](https://github.com/georgesittas))*: use `IntEnum` instead of auto/string enums (#7050) - due to [`f98cba1`](https://github.com/tobymao/sqlglot/commit/f98cba17caa8f3c2dcd5669f3525094c5dd58781) - move exp.Rand to base *(PR [#7065](https://github.com/tobymao/sqlglot/pull/7065) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: move exp.Rand to base (#7065) - due to [`4f8a49c`](https://github.com/tobymao/sqlglot/commit/4f8a49cd42ef37178cd0626554bfd263a140046e) - Transpilation support for Snowflake REGEXP_COUNT to DuckDB *(PR [#7054](https://github.com/tobymao/sqlglot/pull/7054) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation support for Snowflake REGEXP_COUNT to DuckDB (#7054) - due to [`eaba8dc`](https://github.com/tobymao/sqlglot/commit/eaba8dc26b2ec28b8074eb0cf2b3db086cf7ccc3) - move DEGREES(expr) to base *(PR [#7074](https://github.com/tobymao/sqlglot/pull/7074) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: move DEGREES(expr) to base (#7074) - due to [`a87c1b4`](https://github.com/tobymao/sqlglot/commit/a87c1b46b54ef7f6895c68666c05041924691576) - annotate DEGREES(expr) for T-SQL *(PR [#7077](https://github.com/tobymao/sqlglot/pull/7077) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate DEGREES(expr) for T-SQL (#7077) - due to [`f057a1b`](https://github.com/tobymao/sqlglot/commit/f057a1b912d473a77f111d33f40ecce2f6d54cf8) - move MONTHNAME to base *(PR [#7083](https://github.com/tobymao/sqlglot/pull/7083) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: move MONTHNAME to base (#7083) - due to [`dd2cf85`](https://github.com/tobymao/sqlglot/commit/dd2cf850134a2833d0da5eb58206accc298c9e0a) - support `UTCTimestamp` *(PR [#7082](https://github.com/tobymao/sqlglot/pull/7082) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support `UTCTimestamp` (#7082) - due to [`7ff13c6`](https://github.com/tobymao/sqlglot/commit/7ff13c690dddd9e1024b1a4ab6c9532f67bdece8) - transpilation support map_contains_key *(PR [#7070](https://github.com/tobymao/sqlglot/pull/7070) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support map_contains_key (#7070) - due to [`6fa494d`](https://github.com/tobymao/sqlglot/commit/6fa494da191753da8b85fb6a706b99f7f4950f43) - support `arrayCompact(expr)` function *(PR [#7084](https://github.com/tobymao/sqlglot/pull/7084) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support `arrayCompact(expr)` function (#7084) - due to [`a58c37d`](https://github.com/tobymao/sqlglot/commit/a58c37d24441f12af92df329269910bc7b5a0c8f) - transpilation of Snowflake REGEXP_REPLACE to DuckDB *(PR [#7078](https://github.com/tobymao/sqlglot/pull/7078) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: transpilation of Snowflake REGEXP_REPLACE to DuckDB (#7078) - due to [`dfcd5d7`](https://github.com/tobymao/sqlglot/commit/dfcd5d75f1caf3a32d5930a91f4fb1de598aa914) - Add Exasol reserved keywords to exasol dialect *(PR [#7086](https://github.com/tobymao/sqlglot/pull/7086) by [@nnamdi16](https://github.com/nnamdi16))*: Add Exasol reserved keywords to exasol dialect (#7086) - due to [`7664358`](https://github.com/tobymao/sqlglot/commit/7664358b27599936dacb2b7d8e5329fe32425e62) - fix parsing error in json_value for exasol dialect *(PR [#7088](https://github.com/tobymao/sqlglot/pull/7088) by [@nnamdi16](https://github.com/nnamdi16))*: fix parsing error in json_value for exasol dialect (#7088) - due to [`217e960`](https://github.com/tobymao/sqlglot/commit/217e960f57675cc5f5cb9ff9996c048a31d8004c) - annotate ARRAY_CONTAINS *(PR [#7099](https://github.com/tobymao/sqlglot/pull/7099) by [@geooo109](https://github.com/geooo109))*: annotate ARRAY_CONTAINS (#7099) - due to [`1074d66`](https://github.com/tobymao/sqlglot/commit/1074d66231d1de64b6b9aa43de6afbdc6717da5f) - transpilation of Snowflake REGEXP_INSTR to DuckDB *(PR [#7097](https://github.com/tobymao/sqlglot/pull/7097) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: transpilation of Snowflake REGEXP_INSTR to DuckDB (#7097) - due to [`ab1c2ab`](https://github.com/tobymao/sqlglot/commit/ab1c2ab44556a8f7ffe7dad09e4b50e75b122b5d) - annotate PERCENTILE/APPROX_PERCENTILE for hive, spark2, spark, dbx *(PR [#7100](https://github.com/tobymao/sqlglot/pull/7100) by [@geooo109](https://github.com/geooo109))*: annotate PERCENTILE/APPROX_PERCENTILE for hive, spark2, spark, dbx (#7100) - due to [`0f8287d`](https://github.com/tobymao/sqlglot/commit/0f8287d8e5ff1eee2aea29001443ee00a4b2ae47) - annotate BIT_OR(expr) for Spark/DBX *(PR [#7101](https://github.com/tobymao/sqlglot/pull/7101) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate BIT_OR(expr) for Spark/DBX (#7101) - due to [`e19abfd`](https://github.com/tobymao/sqlglot/commit/e19abfded7c159c30f53063d10ae57406553f75d) - annotate BIT_AND(expr) for Spark/DBX *(PR [#7103](https://github.com/tobymao/sqlglot/pull/7103) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate BIT_AND(expr) for Spark/DBX (#7103) - due to [`0c4f74f`](https://github.com/tobymao/sqlglot/commit/0c4f74f6574d3acf06935626615e2e1f14ae9c04) - parse and annotate ELEMENT_AT for spark2, spark, dbx *(PR [#7104](https://github.com/tobymao/sqlglot/pull/7104) by [@geooo109](https://github.com/geooo109))*: parse and annotate ELEMENT_AT for spark2, spark, dbx (#7104) - due to [`8b8aef0`](https://github.com/tobymao/sqlglot/commit/8b8aef01197b670a727a59e46727b5a57f106a5d) - annotate BIT_XOR(expr) for Spark/DBX *(PR [#7106](https://github.com/tobymao/sqlglot/pull/7106) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate BIT_XOR(expr) for Spark/DBX (#7106) - due to [`1b1d57a`](https://github.com/tobymao/sqlglot/commit/1b1d57a3541f48291a1534a0b1a28948f8b8207e) - transpilation of Snowflake REGEXP_SUBSTR and REGEXP_SUBSTR_ALL to DuckDB *(PR [#7095](https://github.com/tobymao/sqlglot/pull/7095) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: transpilation of Snowflake REGEXP_SUBSTR and REGEXP_SUBSTR_ALL to DuckDB (#7095) - due to [`4f964db`](https://github.com/tobymao/sqlglot/commit/4f964db9bfbe42b31dc9f191e687cb2f5f7db0d8) - remove prefix from data type and cleanup *(commit by [@tobymao](https://github.com/tobymao))*: remove prefix from data type and cleanup - due to [`0858599`](https://github.com/tobymao/sqlglot/commit/0858599e274567f7a8c9361afab1526cd2d58eea) - support arrayConcat to clickhouse *(PR [#7108](https://github.com/tobymao/sqlglot/pull/7108) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support arrayConcat to clickhouse (#7108) - due to [`73ab9af`](https://github.com/tobymao/sqlglot/commit/73ab9af9c95274b3feac9278145432c27695054e) - support arrayDistinct(expr) for clickhouse *(PR [#7114](https://github.com/tobymao/sqlglot/pull/7114) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support arrayDistinct(expr) for clickhouse (#7114) - due to [`37fa8c7`](https://github.com/tobymao/sqlglot/commit/37fa8c7219a2f17a351f948d6eb314a543f8784d) - Split core functionality to TokenizerCore *(PR [#7116](https://github.com/tobymao/sqlglot/pull/7116) by [@VaggelisD](https://github.com/VaggelisD))*: Split core functionality to TokenizerCore (#7116) - due to [`1150e39`](https://github.com/tobymao/sqlglot/commit/1150e3911b0b82a9683f765de11605b14fd66ddb) - expression_core which is now compilable by mypyc *(PR [#7117](https://github.com/tobymao/sqlglot/pull/7117) by [@tobymao](https://github.com/tobymao))*: expression_core which is now compilable by mypyc (#7117) - due to [`f796956`](https://github.com/tobymao/sqlglot/commit/f796956d3cc857995572d715b8db530b2d76b8d6) - annotate `ArrayDistinct` for Hive/Spark/DBX *(PR [#7119](https://github.com/tobymao/sqlglot/pull/7119) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate `ArrayDistinct` for Hive/Spark/DBX (#7119) - due to [`2069b06`](https://github.com/tobymao/sqlglot/commit/2069b06284d6998d94c43c16d4b46ac50ea0d84a) - annotate ARRAY_EXCEPT for Hive/Spark/DBX *(PR [#7123](https://github.com/tobymao/sqlglot/pull/7123) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate ARRAY_EXCEPT for Hive/Spark/DBX (#7123) - due to [`5a30754`](https://github.com/tobymao/sqlglot/commit/5a30754df09ddb1260b394c812596adb03c2710d) - support `current_localtimestamp()` for DuckDB *(PR [#7128](https://github.com/tobymao/sqlglot/pull/7128) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support `current_localtimestamp()` for DuckDB (#7128) - due to [`93bf337`](https://github.com/tobymao/sqlglot/commit/93bf337ca2af1b5d9b06d6bf3c50c5bcce680077) - annotate date_diff(expr) for DuckDB *(PR [#7125](https://github.com/tobymao/sqlglot/pull/7125) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate date_diff(expr) for DuckDB (#7125) ### :sparkles: New Features - [`c8ddcc3`](https://github.com/tobymao/sqlglot/commit/c8ddcc383bab07b807ed1d6b6f9bef91417e43c1) - **optimizer**: Annotate COLLATION(expr) for Spark/DBX *(PR [#6957](https://github.com/tobymao/sqlglot/pull/6957) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`fc4b332`](https://github.com/tobymao/sqlglot/commit/fc4b3326a14a1b42bc954914ce43b8dad7ef23b2) - **optimizer**: Annotate BITMAP_COUNT(expr) for Spark/DBX *(PR [#6956](https://github.com/tobymao/sqlglot/pull/6956) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`8725010`](https://github.com/tobymao/sqlglot/commit/87250100eb2a1d2c206a26cce276f7babec0e409) - add exp.Trunc for numeric truncation *(PR [#6923](https://github.com/tobymao/sqlglot/pull/6923) by [@doripo](https://github.com/doripo))* - [`1418494`](https://github.com/tobymao/sqlglot/commit/1418494f777358f4b6bd1e05ee5cb02591d92c74) - **optimizer**: Annotate FORMAT_STRING(expr) for Spark/DBX *(PR [#6962](https://github.com/tobymao/sqlglot/pull/6962) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`ff1fd52`](https://github.com/tobymao/sqlglot/commit/ff1fd521147cb66acc36f2da7b1590d9e7f8140f) - **generator**: Add numeric TRUNC output for additional dialects *(PR [#6961](https://github.com/tobymao/sqlglot/pull/6961) by [@doripo](https://github.com/doripo))* - [`37fa84e`](https://github.com/tobymao/sqlglot/commit/37fa84e389b6bcbc94326d3defb4664d0826fb3f) - **snowflake**: support `CURRENT_VERSION()` transpilation for Spark *(PR [#6964](https://github.com/tobymao/sqlglot/pull/6964) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`51d3ebd`](https://github.com/tobymao/sqlglot/commit/51d3ebdca83e114449590d9f337ae6902659a8b4) - **snowflake**: transpile `CURRENT_VERSION()` to MySQL *(PR [#6965](https://github.com/tobymao/sqlglot/pull/6965) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`9008553`](https://github.com/tobymao/sqlglot/commit/90085534eb8863f588003bdf65d96771729889aa) - **snowflake**: transpile CURRENT_VERSION() to ClickHouse, Postgres, Trino, Redshift *(PR [#6966](https://github.com/tobymao/sqlglot/pull/6966) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`e8b379e`](https://github.com/tobymao/sqlglot/commit/e8b379eb67d034f829d2fd50daefea2a98b83976) - **sqlite**: Map SQLITE_VERSION() to exp.CurrentVersion expression *(PR [#6967](https://github.com/tobymao/sqlglot/pull/6967) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`dfd299f`](https://github.com/tobymao/sqlglot/commit/dfd299fcfaf7a61d13b073e7b59d6bdd0748c7b8) - **optimizer**: Annotate `RANDSTR(expr)` for Spark/DBX *(PR [#6971](https://github.com/tobymao/sqlglot/pull/6971) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`969e45d`](https://github.com/tobymao/sqlglot/commit/969e45d3ba1db25f4561b122b9401b5608356f58) - **optimizer**: Annotate REPEAT(expr) for Hive, Spark, DBX *(PR [#6974](https://github.com/tobymao/sqlglot/pull/6974) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`ade3639`](https://github.com/tobymao/sqlglot/commit/ade3639b337d0222a00feec7ac9762571586f7ab) - **snowflake**: transpilation support current_database *(PR [#6973](https://github.com/tobymao/sqlglot/pull/6973) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`57093d1`](https://github.com/tobymao/sqlglot/commit/57093d15d5bbc2217366ace42db109e215dca79f) - **optimizer**: Annotate `OVERLAY(expr)` for Spark/DBX *(PR [#6970](https://github.com/tobymao/sqlglot/pull/6970) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`f5b2328`](https://github.com/tobymao/sqlglot/commit/f5b23281b6829bace426808f0a55e73590b70bbd) - **optimizer**: Annotate RIGHT(expr) for Spark/DBX *(PR [#6980](https://github.com/tobymao/sqlglot/pull/6980) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`61a0d3f`](https://github.com/tobymao/sqlglot/commit/61a0d3f05be478dd4552e6559b6781891d4a3447) - **snowflake**: add support for JAROWINKLER_SIMILARITY *(PR [#6977](https://github.com/tobymao/sqlglot/pull/6977) by [@kyle-cheung](https://github.com/kyle-cheung))* - [`9d1f4e0`](https://github.com/tobymao/sqlglot/commit/9d1f4e0ea6f8b66b022a5263320275ed43efb5f3) - **snowflake**: transpilation support current_schema *(PR [#6976](https://github.com/tobymao/sqlglot/pull/6976) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`0d345aa`](https://github.com/tobymao/sqlglot/commit/0d345aafd037b047808716dfdb60cc554d47941d) - **optimizer**: Annotate REPLACE(expr) for Hive, Spark and DBX *(PR [#6975](https://github.com/tobymao/sqlglot/pull/6975) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`19f9000`](https://github.com/tobymao/sqlglot/commit/19f900031c9abe26bebb541e8907ca263454055c) - **snowflake**: transpilation support current_version *(PR [#6960](https://github.com/tobymao/sqlglot/pull/6960) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`f103a16`](https://github.com/tobymao/sqlglot/commit/f103a166aca95da726ac9281816181e53b916dc3) - **optimizer**: support parsing `VERSION()` for ClickHouse *(PR [#6986](https://github.com/tobymao/sqlglot/pull/6986) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2751c8f`](https://github.com/tobymao/sqlglot/commit/2751c8ff1d6c4acc1a0d407e601d572886ceffc3) - **mysql**: parse support for `VERSION()` *(PR [#6985](https://github.com/tobymao/sqlglot/pull/6985) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`37db91c`](https://github.com/tobymao/sqlglot/commit/37db91c5cea14488654a2b69aceab13b6c6a98b7) - **optimizer**: Annotate `REVERSE(expr)` for Hive, Spark and DBX *(PR [#6979](https://github.com/tobymao/sqlglot/pull/6979) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`f4d53a2`](https://github.com/tobymao/sqlglot/commit/f4d53a2d0f9aadf7fc63e484d64821cddf5d1f17) - **postgres**: support parsing VERSION() for Postgres/Redshift *(PR [#6987](https://github.com/tobymao/sqlglot/pull/6987) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`71bb0c3`](https://github.com/tobymao/sqlglot/commit/71bb0c3f7947c1959160c8b401354ceddee2e8ce) - **trino**: support for version() for trino *(PR [#6988](https://github.com/tobymao/sqlglot/pull/6988) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`fbca704`](https://github.com/tobymao/sqlglot/commit/fbca7040cd3ae9eb0bc599b5ce656724fccafab1) - **optimizer**: Annotate SPLIT(expr) for Hive/Spark/DBX *(PR [#6990](https://github.com/tobymao/sqlglot/pull/6990) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`264e95f`](https://github.com/tobymao/sqlglot/commit/264e95f04d95f2cd7bcf255ee7ae160db36882a7) - **optimizer**: Move TRANSLATE(expr) annotator to base *(PR [#6992](https://github.com/tobymao/sqlglot/pull/6992) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`eb8ad51`](https://github.com/tobymao/sqlglot/commit/eb8ad518142dc91e25d37310cb9cbfa33c44fe34) - **optimizer**: Annotate FILTER(expr, func) for Spark/DBX *(PR [#6995](https://github.com/tobymao/sqlglot/pull/6995) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`12e0869`](https://github.com/tobymao/sqlglot/commit/12e0869ff6820b35884e189b2b4f29aef56c3a51) - **optimizer**: annotate CURRENT_TIMESTAMP for MySQL *(PR [#7004](https://github.com/tobymao/sqlglot/pull/7004) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`cbdad37`](https://github.com/tobymao/sqlglot/commit/cbdad3762dd6935d75405a0c33a1656cab8c2d1e) - **mysql**: support CURTIME() for MySQL/SingleStore *(PR [#7005](https://github.com/tobymao/sqlglot/pull/7005) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`d16cc62`](https://github.com/tobymao/sqlglot/commit/d16cc62ea8342bec91092f8c7cf2504364581a7e) - **optimizer**: annotate ADD_MONTH(expr) for Hive/Spark/DBX *(PR [#7003](https://github.com/tobymao/sqlglot/pull/7003) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6381c48`](https://github.com/tobymao/sqlglot/commit/6381c4825c1929da56363035be2c4ae7a90336dd) - **exasol**: support NOW() for exasol *(PR [#7006](https://github.com/tobymao/sqlglot/pull/7006) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`d9ea168`](https://github.com/tobymao/sqlglot/commit/d9ea1683a98252ad43948be32fbd7cf77d17b67c) - **optimizer**: annotate CURRENT_USER to base *(PR [#7007](https://github.com/tobymao/sqlglot/pull/7007) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`7a2a777`](https://github.com/tobymao/sqlglot/commit/7a2a777fb8c215b51436942645965792257b8dc9) - **optimizer**: annotate FROM_UTC_TIMESTAMP(expr) for Spark/DBX *(PR [#7008](https://github.com/tobymao/sqlglot/pull/7008) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`8be32fd`](https://github.com/tobymao/sqlglot/commit/8be32fde55c8d256e73fd504246f695bf550f4cb) - **spark**: support MAKE_TIMESTAMP(expr) for Spark/DBX *(PR [#7009](https://github.com/tobymao/sqlglot/pull/7009) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b951d74`](https://github.com/tobymao/sqlglot/commit/b951d740a934a8f46ce2c96caf7d8ae80b61604c) - **optimizer**: annotate NEXT_DAY(expr) for Hive/Spark/DBX *(PR [#7010](https://github.com/tobymao/sqlglot/pull/7010) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`404797a`](https://github.com/tobymao/sqlglot/commit/404797acfb1a9f860bd87880fecacd79cb1b2161) - **optimizer**: Move `CURRENT_SCHEMA()` to Base Annotator *(PR [#7021](https://github.com/tobymao/sqlglot/pull/7021) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`994d3a3`](https://github.com/tobymao/sqlglot/commit/994d3a37983791d4e8484d6d39b819a1cff2f774) - **parser**: robust STAR with EXCLUDE (redshift) *(PR [#6972](https://github.com/tobymao/sqlglot/pull/6972) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#6963](https://github.com/tobymao/sqlglot/issues/6963) opened by [@iboland](https://github.com/iboland)* - [`3ea80fb`](https://github.com/tobymao/sqlglot/commit/3ea80fb86482e257c4565ab7876dd6cdd60a7be2) - **optimizer**: annotate REVERSE(str) for DuckDB *(PR [#7018](https://github.com/tobymao/sqlglot/pull/7018) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`352fb94`](https://github.com/tobymao/sqlglot/commit/352fb94c46e5dd0dfd824b4472b03cccf21d3f56) - **optimizer**: annotate ISODOW(expr) for DuckDB *(PR [#7016](https://github.com/tobymao/sqlglot/pull/7016) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`c2e5954`](https://github.com/tobymao/sqlglot/commit/c2e59545d8030b1d2e7859631c9d75ea0f6df883) - **optimizer**: annotate COUNTIF(expr) for DuckDB *(PR [#7012](https://github.com/tobymao/sqlglot/pull/7012) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`e35ee14`](https://github.com/tobymao/sqlglot/commit/e35ee143b08f671175e730e602e9a5dcd9155fde) - **optimizer**: annotate CountIf(expr) for ClickHouse *(PR [#7013](https://github.com/tobymao/sqlglot/pull/7013) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6971c17`](https://github.com/tobymao/sqlglot/commit/6971c1730b14b0516ff3aca3780f6d84203e6993) - **snowflake**: support aliases in semantic view dimensions *(PR [#6994](https://github.com/tobymao/sqlglot/pull/6994) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6993](https://github.com/tobymao/sqlglot/issues/6993) opened by [@sgomezvillamor](https://github.com/sgomezvillamor)* - [`b73be3e`](https://github.com/tobymao/sqlglot/commit/b73be3e8c95b44f2cd71498592bd4b5b63ba02d9) - **duckdb**: support `today()` for duckdb *(PR [#7015](https://github.com/tobymao/sqlglot/pull/7015) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`36e3310`](https://github.com/tobymao/sqlglot/commit/36e3310959260a7b1124a60589cdc90a3e631624) - **exasol**: support current_schema as no_param *(PR [#7000](https://github.com/tobymao/sqlglot/pull/7000) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`fd8860b`](https://github.com/tobymao/sqlglot/commit/fd8860b8b8d5e0c29c53597485c656923375e1d9) - **optimizer**: annotate FORMAT(expr) for DuckDB *(PR [#7017](https://github.com/tobymao/sqlglot/pull/7017) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`eecdfa1`](https://github.com/tobymao/sqlglot/commit/eecdfa1b15ac1808f93107d2ad6a51f52ffaf7cc) - **optimizer**: Annotate `DAYOFWEEK(expr)`, `DAYOFMONTH(expr)` for Hive/Spark/DBX *(PR [#6996](https://github.com/tobymao/sqlglot/pull/6996) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`4fc26f0`](https://github.com/tobymao/sqlglot/commit/4fc26f086701cebb3d3974b762d12e1435f4a195) - **optimizer**: annotate `TIME_BUCKET(expr)` for DuckDB *(PR [#7014](https://github.com/tobymao/sqlglot/pull/7014) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`00e6d9a`](https://github.com/tobymao/sqlglot/commit/00e6d9af02971aad6e7102cae3af2a7192fa7070) - **optimizer**: annotate `UNIX_DATE(expr)` for Spark/DBX *(PR [#7011](https://github.com/tobymao/sqlglot/pull/7011) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`86a5509`](https://github.com/tobymao/sqlglot/commit/86a5509bfcb8df6a8cf8b0971d9d12ae3204f2af) - **exasol**: support user for exasol *(PR [#7001](https://github.com/tobymao/sqlglot/pull/7001) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b27c163`](https://github.com/tobymao/sqlglot/commit/b27c163fcce0a4b0a4f75d131cdc105353e95464) - **optimizer**: support `CURDATE` for Exasol *(PR [#6999](https://github.com/tobymao/sqlglot/pull/6999) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`47dc558`](https://github.com/tobymao/sqlglot/commit/47dc5589f8b165d9f0296e6aa48de337f556f1a4) - **optimizer**: annotate ARRAY_COMPACT(expr) for Spark/DBX *(PR [#7034](https://github.com/tobymao/sqlglot/pull/7034) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`235b8ac`](https://github.com/tobymao/sqlglot/commit/235b8ac24d41324239d6581b5636ad19ec7b9376) - **optimizer**: annotate `ARRAY_INTERSECT(expr)` for Hive/Spark/DBX *(PR [#7037](https://github.com/tobymao/sqlglot/pull/7037) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`643a6c7`](https://github.com/tobymao/sqlglot/commit/643a6c7d97292eb29ba1bac6523747e161544a1a) - **snowflake**: Transpilation support for Snowflake REGEXP_LIKE to DuckDB *(PR [#7030](https://github.com/tobymao/sqlglot/pull/7030) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`70d6f2b`](https://github.com/tobymao/sqlglot/commit/70d6f2b5d8c0f37550ba7a288c5f7f7021c66bd7) - **optimizer**: annotate `ARRAY_INSERT(expr)` for Spark/DBX *(PR [#7044](https://github.com/tobymao/sqlglot/pull/7044) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`1011fca`](https://github.com/tobymao/sqlglot/commit/1011fca568e29db2b13d88da29dfbf6df3f41af4) - **duckdb**: transpile Snowflake's current_schemas *(PR [#7042](https://github.com/tobymao/sqlglot/pull/7042) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`3019d0a`](https://github.com/tobymao/sqlglot/commit/3019d0a0110a503b68b2a3cf7f93be1000f20a40) - **spark**: Map BIT_GET to GETBIT for Spark/DBX *(PR [#7041](https://github.com/tobymao/sqlglot/pull/7041) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`27ae429`](https://github.com/tobymao/sqlglot/commit/27ae42987864949f74b784d3dbb063bd3450e0dc) - **spark**: transpile BIT_COUNT to DuckDB *(PR [#7039](https://github.com/tobymao/sqlglot/pull/7039) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`f6cbd27`](https://github.com/tobymao/sqlglot/commit/f6cbd27fd61937efba6879f20f5ff0239e678469) - **postgres**: Add support for trigger DDL statements *(PR [#6978](https://github.com/tobymao/sqlglot/pull/6978) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - :arrow_lower_right: *addresses issue [#6926](https://github.com/tobymao/sqlglot/issues/6926) opened by [@Badg](https://github.com/Badg)* - [`c5939c1`](https://github.com/tobymao/sqlglot/commit/c5939c12c6816437f5abda3322f99cc597b1616c) - **spark**: Map curdate to current_date for Spark/DBX *(PR [#7048](https://github.com/tobymao/sqlglot/pull/7048) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`58419e1`](https://github.com/tobymao/sqlglot/commit/58419e1f47119a135276a722b85ccfa92ae3d1f1) - **optimizer**: move `SessionUser` to base *(PR [#7049](https://github.com/tobymao/sqlglot/pull/7049) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`3c67ec0`](https://github.com/tobymao/sqlglot/commit/3c67ec0007b509ec58297bf2c61f17d095b694b6) - **duckdb**: Add traspilation support for NULL values in ARRAY_CONTAINS function *(PR [#7055](https://github.com/tobymao/sqlglot/pull/7055) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`f98cba1`](https://github.com/tobymao/sqlglot/commit/f98cba17caa8f3c2dcd5669f3525094c5dd58781) - **optimizer**: move exp.Rand to base *(PR [#7065](https://github.com/tobymao/sqlglot/pull/7065) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`4f8a49c`](https://github.com/tobymao/sqlglot/commit/4f8a49cd42ef37178cd0626554bfd263a140046e) - **snowflake**: Transpilation support for Snowflake REGEXP_COUNT to DuckDB *(PR [#7054](https://github.com/tobymao/sqlglot/pull/7054) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`eaba8dc`](https://github.com/tobymao/sqlglot/commit/eaba8dc26b2ec28b8074eb0cf2b3db086cf7ccc3) - **optimizer**: move DEGREES(expr) to base *(PR [#7074](https://github.com/tobymao/sqlglot/pull/7074) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a87c1b4`](https://github.com/tobymao/sqlglot/commit/a87c1b46b54ef7f6895c68666c05041924691576) - **optimizer**: annotate DEGREES(expr) for T-SQL *(PR [#7077](https://github.com/tobymao/sqlglot/pull/7077) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`f057a1b`](https://github.com/tobymao/sqlglot/commit/f057a1b912d473a77f111d33f40ecce2f6d54cf8) - **optimizer**: move MONTHNAME to base *(PR [#7083](https://github.com/tobymao/sqlglot/pull/7083) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`dd2cf85`](https://github.com/tobymao/sqlglot/commit/dd2cf850134a2833d0da5eb58206accc298c9e0a) - **clickhouse**: support `UTCTimestamp` *(PR [#7082](https://github.com/tobymao/sqlglot/pull/7082) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`7ff13c6`](https://github.com/tobymao/sqlglot/commit/7ff13c690dddd9e1024b1a4ab6c9532f67bdece8) - **snowflake**: transpilation support map_contains_key *(PR [#7070](https://github.com/tobymao/sqlglot/pull/7070) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`6fa494d`](https://github.com/tobymao/sqlglot/commit/6fa494da191753da8b85fb6a706b99f7f4950f43) - **clickhouse**: support `arrayCompact(expr)` function *(PR [#7084](https://github.com/tobymao/sqlglot/pull/7084) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a58c37d`](https://github.com/tobymao/sqlglot/commit/a58c37d24441f12af92df329269910bc7b5a0c8f) - **snowflake**: transpilation of Snowflake REGEXP_REPLACE to DuckDB *(PR [#7078](https://github.com/tobymao/sqlglot/pull/7078) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`3a453d0`](https://github.com/tobymao/sqlglot/commit/3a453d045a261b82ed6a84b661da0c9a86e3161d) - **duckdb**: Add transpilation support for NULLs in ARRAY_DISTINCT *(PR [#7069](https://github.com/tobymao/sqlglot/pull/7069) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`dfcd5d7`](https://github.com/tobymao/sqlglot/commit/dfcd5d75f1caf3a32d5930a91f4fb1de598aa914) - **exasol**: Add Exasol reserved keywords to exasol dialect *(PR [#7086](https://github.com/tobymao/sqlglot/pull/7086) by [@nnamdi16](https://github.com/nnamdi16))* - [`b569a3d`](https://github.com/tobymao/sqlglot/commit/b569a3d7628600608144d1d93e17eab4cafe4217) - **DuckDb**: Add transpilation support for ARRAY_MAX and ARRAY_MIN functions *(PR [#7080](https://github.com/tobymao/sqlglot/pull/7080) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1074d66`](https://github.com/tobymao/sqlglot/commit/1074d66231d1de64b6b9aa43de6afbdc6717da5f) - **snowflake**: transpilation of Snowflake REGEXP_INSTR to DuckDB *(PR [#7097](https://github.com/tobymao/sqlglot/pull/7097) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ab1c2ab`](https://github.com/tobymao/sqlglot/commit/ab1c2ab44556a8f7ffe7dad09e4b50e75b122b5d) - **optimizer**: annotate PERCENTILE/APPROX_PERCENTILE for hive, spark2, spark, dbx *(PR [#7100](https://github.com/tobymao/sqlglot/pull/7100) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#7096](https://github.com/tobymao/sqlglot/issues/7096) opened by [@sunilmishra-amp](https://github.com/sunilmishra-amp)* - [`0f8287d`](https://github.com/tobymao/sqlglot/commit/0f8287d8e5ff1eee2aea29001443ee00a4b2ae47) - **optimizer**: annotate BIT_OR(expr) for Spark/DBX *(PR [#7101](https://github.com/tobymao/sqlglot/pull/7101) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`e19abfd`](https://github.com/tobymao/sqlglot/commit/e19abfded7c159c30f53063d10ae57406553f75d) - **optimizer**: annotate BIT_AND(expr) for Spark/DBX *(PR [#7103](https://github.com/tobymao/sqlglot/pull/7103) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`0c4f74f`](https://github.com/tobymao/sqlglot/commit/0c4f74f6574d3acf06935626615e2e1f14ae9c04) - **optimizer**: parse and annotate ELEMENT_AT for spark2, spark, dbx *(PR [#7104](https://github.com/tobymao/sqlglot/pull/7104) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#7096](https://github.com/tobymao/sqlglot/issues/7096) opened by [@sunilmishra-amp](https://github.com/sunilmishra-amp)* - [`8b8aef0`](https://github.com/tobymao/sqlglot/commit/8b8aef01197b670a727a59e46727b5a57f106a5d) - **optimizer**: annotate BIT_XOR(expr) for Spark/DBX *(PR [#7106](https://github.com/tobymao/sqlglot/pull/7106) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`ce09e81`](https://github.com/tobymao/sqlglot/commit/ce09e81bcda5c439825af54ac4e6eef124694833) - **DuckDB**: Enable transpilation for ARRAY_EXCEPT function *(PR [#7094](https://github.com/tobymao/sqlglot/pull/7094) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1b1d57a`](https://github.com/tobymao/sqlglot/commit/1b1d57a3541f48291a1534a0b1a28948f8b8207e) - **snowflake**: transpilation of Snowflake REGEXP_SUBSTR and REGEXP_SUBSTR_ALL to DuckDB *(PR [#7095](https://github.com/tobymao/sqlglot/pull/7095) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`0858599`](https://github.com/tobymao/sqlglot/commit/0858599e274567f7a8c9361afab1526cd2d58eea) - **clickhouse**: support arrayConcat to clickhouse *(PR [#7108](https://github.com/tobymao/sqlglot/pull/7108) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`9100ea7`](https://github.com/tobymao/sqlglot/commit/9100ea742cd83fd70aaf3192ceeba8abeda61b47) - **parser**: support DECLARE spark, dbx *(PR [#7113](https://github.com/tobymao/sqlglot/pull/7113) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#7112](https://github.com/tobymao/sqlglot/issues/7112) opened by [@aersam](https://github.com/aersam)* - [`73ab9af`](https://github.com/tobymao/sqlglot/commit/73ab9af9c95274b3feac9278145432c27695054e) - **clickhouse**: support arrayDistinct(expr) for clickhouse *(PR [#7114](https://github.com/tobymao/sqlglot/pull/7114) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`f796956`](https://github.com/tobymao/sqlglot/commit/f796956d3cc857995572d715b8db530b2d76b8d6) - **optimizer**: annotate `ArrayDistinct` for Hive/Spark/DBX *(PR [#7119](https://github.com/tobymao/sqlglot/pull/7119) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2069b06`](https://github.com/tobymao/sqlglot/commit/2069b06284d6998d94c43c16d4b46ac50ea0d84a) - **optimizer**: annotate ARRAY_EXCEPT for Hive/Spark/DBX *(PR [#7123](https://github.com/tobymao/sqlglot/pull/7123) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`5a30754`](https://github.com/tobymao/sqlglot/commit/5a30754df09ddb1260b394c812596adb03c2710d) - **duckdb**: support `current_localtimestamp()` for DuckDB *(PR [#7128](https://github.com/tobymao/sqlglot/pull/7128) by [@AbhishekASLK](https://github.com/AbhishekASLK))* ### :bug: Bug Fixes - [`836cc60`](https://github.com/tobymao/sqlglot/commit/836cc60c8177339e15bfef3bd3b7d98569400385) - **parser**: Limit named PK parsing to MySQL only *(PR [#6991](https://github.com/tobymao/sqlglot/pull/6991) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6989](https://github.com/tobymao/sqlglot/issues/6989) opened by [@sgomezvillamor](https://github.com/sgomezvillamor)* - [`495920e`](https://github.com/tobymao/sqlglot/commit/495920eb3c447949e0c787adf347c8bcd7035764) - **snowflake**: Parse MODIFY as ALTER in ALTER TABLE *(PR [#7024](https://github.com/tobymao/sqlglot/pull/7024) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#7023](https://github.com/tobymao/sqlglot/issues/7023) opened by [@abhaysharma419](https://github.com/abhaysharma419)* - [`1e753a3`](https://github.com/tobymao/sqlglot/commit/1e753a35405edaf9b6a1b90fccd74eaef23a028b) - **parser**: parse cast argument using _parse_assignment closes [#7027](https://github.com/tobymao/sqlglot/pull/7027) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`67e47a4`](https://github.com/tobymao/sqlglot/commit/67e47a47582cf0970ee6a9e40c9014ba04a0c065) - Support missing meta when updating position metadata for an expression *(PR [#7032](https://github.com/tobymao/sqlglot/pull/7032) by [@izeigerman](https://github.com/izeigerman))* - [`9bffc99`](https://github.com/tobymao/sqlglot/commit/9bffc99efec77605292e3332dfd626754441f9d8) - **snowflake**: wrap jarowinkler similarity in UPPER for case-insensitivty *(PR [#7022](https://github.com/tobymao/sqlglot/pull/7022) by [@kyle-cheung](https://github.com/kyle-cheung))* - [`9d7db06`](https://github.com/tobymao/sqlglot/commit/9d7db06cf8ef66583f11b6d54af573bb28f4434b) - **spark**: Generator for ARRAY_INSERT(expr) *(PR [#7036](https://github.com/tobymao/sqlglot/pull/7036) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - :arrow_lower_right: *fixes issue [#7035](https://github.com/tobymao/sqlglot/issues/7035) opened by [@AbhishekASLK](https://github.com/AbhishekASLK)* - [`f476d07`](https://github.com/tobymao/sqlglot/commit/f476d071a1412fb2d9cd6f39067380252ab4c15a) - **duckdb**: update transpilation of SEQ functions and GENERATOR for DuckDB *(PR [#7029](https://github.com/tobymao/sqlglot/pull/7029) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - :arrow_lower_right: *fixes issue [#6998](https://github.com/tobymao/sqlglot/issues/6998) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`aa14297`](https://github.com/tobymao/sqlglot/commit/aa142974382fa4115234834e082ca594e00eefa4) - **duckdb**: transpile LATERAL VIEW INLINE from spark to DuckDB *(PR [#7033](https://github.com/tobymao/sqlglot/pull/7033) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`f9d1f73`](https://github.com/tobymao/sqlglot/commit/f9d1f73b490e6694da0e800d9a5a70e1ba7f38d5) - **snowflake**: refactor colon (extract) parsing precedence *(PR [#7046](https://github.com/tobymao/sqlglot/pull/7046) by [@georgesittas](https://github.com/georgesittas))* - [`5ef8bef`](https://github.com/tobymao/sqlglot/commit/5ef8befd435fa644ae7e8ca80c8564207f7ad014) - **bigquery**: Parse & generate table alias before its version *(PR [#7075](https://github.com/tobymao/sqlglot/pull/7075) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#7073](https://github.com/tobymao/sqlglot/issues/7073) opened by [@chelsea-lin](https://github.com/chelsea-lin)* - [`bd4db6b`](https://github.com/tobymao/sqlglot/commit/bd4db6bef103c2d810e661948d2e3ddda0bf2c67) - **parser**: INTERVAL units with DCOLON *(PR [#7076](https://github.com/tobymao/sqlglot/pull/7076) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#7072](https://github.com/tobymao/sqlglot/issues/7072) opened by [@ligfx](https://github.com/ligfx)* - [`7664358`](https://github.com/tobymao/sqlglot/commit/7664358b27599936dacb2b7d8e5329fe32425e62) - **exasol**: fix parsing error in json_value for exasol dialect *(PR [#7088](https://github.com/tobymao/sqlglot/pull/7088) by [@nnamdi16](https://github.com/nnamdi16))* - [`af8c3e9`](https://github.com/tobymao/sqlglot/commit/af8c3e9222224ee4878bc81dac0dee032e4863a7) - missing comments in subquery *(commit by [@tobymao](https://github.com/tobymao))* - [`217e960`](https://github.com/tobymao/sqlglot/commit/217e960f57675cc5f5cb9ff9996c048a31d8004c) - **optimizer**: annotate ARRAY_CONTAINS *(PR [#7099](https://github.com/tobymao/sqlglot/pull/7099) by [@geooo109](https://github.com/geooo109))* - [`3ff4e0d`](https://github.com/tobymao/sqlglot/commit/3ff4e0d0b97041f72b759f1fd2bd0ec561c96423) - **deploy**: Fix _version not exists *(PR [#7129](https://github.com/tobymao/sqlglot/pull/7129) by [@VaggelisD](https://github.com/VaggelisD))* - [`93bf337`](https://github.com/tobymao/sqlglot/commit/93bf337ca2af1b5d9b06d6bf3c50c5bcce680077) - **optimizer**: annotate date_diff(expr) for DuckDB *(PR [#7125](https://github.com/tobymao/sqlglot/pull/7125) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`4a38462`](https://github.com/tobymao/sqlglot/commit/4a3846280d00b94cc677baff63d90e3f1361cd8a) - **deploy**: Use GA ARM machine *(PR [#7131](https://github.com/tobymao/sqlglot/pull/7131) by [@VaggelisD](https://github.com/VaggelisD))* ### :zap: Performance Improvements - [`a8aa371`](https://github.com/tobymao/sqlglot/commit/a8aa371f3f8d5964b8625a2f92f302ae1be297ea) - improve `DataType.build` perf by removing unnecessary constructor *(PR [#7092](https://github.com/tobymao/sqlglot/pull/7092) by [@georgesittas](https://github.com/georgesittas))* - [`f12a58f`](https://github.com/tobymao/sqlglot/commit/f12a58f86501ef726335f147b7171d01b162ea79) - extract Column instance check out of _set_type in annotate_types *(PR [#7091](https://github.com/tobymao/sqlglot/pull/7091) by [@georgesittas](https://github.com/georgesittas))* - [`ddbf64c`](https://github.com/tobymao/sqlglot/commit/ddbf64ceef63150ffef5e60b5601af0e84a9f54b) - optimize large query performance in optimizer pipeline *(PR [#7090](https://github.com/tobymao/sqlglot/pull/7090) by [@sabrikaragonen](https://github.com/sabrikaragonen))* - :arrow_lower_right: *addresses issue [#5112](https://github.com/tobymao/sqlglot/issues/5112) opened by [@karakanb](https://github.com/karakanb)* ### :recycle: Refactors - [`749cf18`](https://github.com/tobymao/sqlglot/commit/749cf18dbcb8d1d0c7d144e6481ecc3a443d4a0e) - require original SQL in `Parser.parse` *(PR [#7045](https://github.com/tobymao/sqlglot/pull/7045) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#7031](https://github.com/tobymao/sqlglot/issues/7031) opened by [@ultrabear](https://github.com/ultrabear)* - [`13ee312`](https://github.com/tobymao/sqlglot/commit/13ee31281fe21c670f8ce1656520c18762e8402f) - use `IntEnum` instead of auto/string enums *(PR [#7050](https://github.com/tobymao/sqlglot/pull/7050) by [@georgesittas](https://github.com/georgesittas))* - [`4f964db`](https://github.com/tobymao/sqlglot/commit/4f964db9bfbe42b31dc9f191e687cb2f5f7db0d8) - remove prefix from data type and cleanup *(commit by [@tobymao](https://github.com/tobymao))* - [`1150e39`](https://github.com/tobymao/sqlglot/commit/1150e3911b0b82a9683f765de11605b14fd66ddb) - expression_core which is now compilable by mypyc *(PR [#7117](https://github.com/tobymao/sqlglot/pull/7117) by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`1c5e168`](https://github.com/tobymao/sqlglot/commit/1c5e1684dbb6f508f08b30e9ea96393b25ef0185) - generate CNAME file at docs build time *(PR [#6958](https://github.com/tobymao/sqlglot/pull/6958) by [@georgesittas](https://github.com/georgesittas))* - [`fd80f4b`](https://github.com/tobymao/sqlglot/commit/fd80f4b9dbbb2a3709e4ea09118b826677b0d3e8) - transpilation test for LEAD from Snowflake to DuckDB *(PR [#6968](https://github.com/tobymao/sqlglot/pull/6968) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`4f82c90`](https://github.com/tobymao/sqlglot/commit/4f82c901bcc829d142248306202c89d440cbed86) - duckdb version() tests *(commit by [@geooo109](https://github.com/geooo109))* - [`eabd68f`](https://github.com/tobymao/sqlglot/commit/eabd68fa952b0b7ce86927a22e5fb03f94915a21) - used dictionary comprehension in annotators *(PR [#7025](https://github.com/tobymao/sqlglot/pull/7025) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`11da02f`](https://github.com/tobymao/sqlglot/commit/11da02f2bf7128c191576f68ffb0b5d881a1e8f2) - **optimizer**: move `CurrentVersion` annotator to base *(PR [#6997](https://github.com/tobymao/sqlglot/pull/6997) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b1f0542`](https://github.com/tobymao/sqlglot/commit/b1f05428d8a4d441398c0f3d4a65b49b0eda2729) - tokenizer optimizations *(PR [#7038](https://github.com/tobymao/sqlglot/pull/7038) by [@geooo109](https://github.com/geooo109))* - [`214043c`](https://github.com/tobymao/sqlglot/commit/214043c5a9f3215d2811603c62701b59b0e61f05) - update license version *(PR [#7047](https://github.com/tobymao/sqlglot/pull/7047) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`5172f1a`](https://github.com/tobymao/sqlglot/commit/5172f1a5ff62ebc0bcca4f27ab5b79c0aefbdf6a) - refactor _scan *(PR [#7051](https://github.com/tobymao/sqlglot/pull/7051) by [@geooo109](https://github.com/geooo109))* - [`6c08a77`](https://github.com/tobymao/sqlglot/commit/6c08a77598dd672ae0f05eb4b36edfba520f4002) - improve scope module perf by replacing instance checks with type checks *(PR [#7066](https://github.com/tobymao/sqlglot/pull/7066) by [@georgesittas](https://github.com/georgesittas))* - [`c2f7761`](https://github.com/tobymao/sqlglot/commit/c2f7761f4f75ceabf7df781e5431b19b6eade3d6) - improve schema module perf by caching normalized tables/names *(PR [#7068](https://github.com/tobymao/sqlglot/pull/7068) by [@georgesittas](https://github.com/georgesittas))* - [`13f1b06`](https://github.com/tobymao/sqlglot/commit/13f1b06570a20d2f8e6a9cc22b5a20e2d1af8674) - **snowflake**: add transpilation test for LAST_VALUE from snowflake to duckdb *(PR [#7079](https://github.com/tobymao/sqlglot/pull/7079) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`92c364a`](https://github.com/tobymao/sqlglot/commit/92c364ae64cf68fc60d9418cfea17a8414e37333) - refactor map_contains_key from sf to duckdb *(commit by [@geooo109](https://github.com/geooo109))* - [`9b4524a`](https://github.com/tobymao/sqlglot/commit/9b4524aa4c2deac4dcc46a628370101e9677a4c2) - small opts for generator sql method *(PR [#7085](https://github.com/tobymao/sqlglot/pull/7085) by [@geooo109](https://github.com/geooo109))* - [`aba0db1`](https://github.com/tobymao/sqlglot/commit/aba0db107a0cf02471234c54ffb982f8dca9cfcd) - remove `classproperty` as it is dead code *(PR [#7087](https://github.com/tobymao/sqlglot/pull/7087) by [@georgesittas](https://github.com/georgesittas))* - [`f927329`](https://github.com/tobymao/sqlglot/commit/f927329f3f47d2ed7835715103015d5316d2943a) - add some type hints *(PR [#7089](https://github.com/tobymao/sqlglot/pull/7089) by [@georgesittas](https://github.com/georgesittas))* - [`2935e95`](https://github.com/tobymao/sqlglot/commit/2935e957116e584287572a770a1ecd48a2407a9d) - refactor RANK/DENSE_RANK for exasol *(commit by [@geooo109](https://github.com/geooo109))* - [`37fa8c7`](https://github.com/tobymao/sqlglot/commit/37fa8c7219a2f17a351f948d6eb314a543f8784d) - **tokenizer**: Split core functionality to TokenizerCore *(PR [#7116](https://github.com/tobymao/sqlglot/pull/7116) by [@VaggelisD](https://github.com/VaggelisD))* - [`3e8f551`](https://github.com/tobymao/sqlglot/commit/3e8f551addd2240f8c79d88692180c4ca27a4149) - update claude.md *(commit by [@tobymao](https://github.com/tobymao))* ## [v28.10.0] - 2026-02-04 ### :boom: BREAKING CHANGES - due to [`55698db`](https://github.com/tobymao/sqlglot/commit/55698dbca84078160248e412cf595dd26aababef) - Annotate MAKE_TIME(expr) for DuckDB *(PR [#6931](https://github.com/tobymao/sqlglot/pull/6931) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate MAKE_TIME(expr) for DuckDB (#6931) - due to [`e750ce7`](https://github.com/tobymao/sqlglot/commit/e750ce7c4ac8235e395fe077c6c9b6d5572affaf) - Transpilation for SHA2 and SHA2_BINARY from Snowflake to DuckDB *(PR [#6929](https://github.com/tobymao/sqlglot/pull/6929) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation for SHA2 and SHA2_BINARY from Snowflake to DuckDB (#6929) - due to [`9b05968`](https://github.com/tobymao/sqlglot/commit/9b05968e23fe94f804d22d77bf91ab44071aea73) - Annotate BIT_LENGTH(expr) for DuckDB *(PR [#6932](https://github.com/tobymao/sqlglot/pull/6932) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate BIT_LENGTH(expr) for DuckDB (#6932) - due to [`2050362`](https://github.com/tobymao/sqlglot/commit/20503623debdc11d739746461e8bfb8c13514a58) - Annotate LENGTH(expr) for DuckDB *(PR [#6937](https://github.com/tobymao/sqlglot/pull/6937) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate LENGTH(expr) for DuckDB (#6937) - due to [`e803c7f`](https://github.com/tobymao/sqlglot/commit/e803c7f86e518dccfc19c2543394cd9758c59899) - Moved SIN, COS, TAN, COT to Base *(PR [#6936](https://github.com/tobymao/sqlglot/pull/6936) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Moved SIN, COS, TAN, COT to Base (#6936) - due to [`973d25d`](https://github.com/tobymao/sqlglot/commit/973d25dac469934394af4b1a6e0a11e04ad8524f) - support transpilation of ARRAY_REMOVE_AT *(PR [#6930](https://github.com/tobymao/sqlglot/pull/6930) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: support transpilation of ARRAY_REMOVE_AT (#6930) - due to [`2e2ff03`](https://github.com/tobymao/sqlglot/commit/2e2ff0363d72cc7cc80b308c6c62496969199b50) - Refactor RPAD/LPAD *(PR [#6869](https://github.com/tobymao/sqlglot/pull/6869) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Refactor RPAD/LPAD (#6869) - due to [`62aeff8`](https://github.com/tobymao/sqlglot/commit/62aeff8b978f372615a113cbd2ea86e26dd3ba55) - Annotate CURRENT_CATALOG to Base *(PR [#6940](https://github.com/tobymao/sqlglot/pull/6940) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate CURRENT_CATALOG to Base (#6940) - due to [`04002ae`](https://github.com/tobymao/sqlglot/commit/04002aedb48ee1d11f077b66a63722127f027243) - transpile NTH_VALUE from Snowflake to DuckDB *(PR [#6882](https://github.com/tobymao/sqlglot/pull/6882) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: transpile NTH_VALUE from Snowflake to DuckDB (#6882) - due to [`bdce7c7`](https://github.com/tobymao/sqlglot/commit/bdce7c722efa37d44ee1ba85aa4c77f958e0b19f) - Annotate DAYOFMONTH(expr), DAYOFYEAR(expr) for MySQL *(PR [#6941](https://github.com/tobymao/sqlglot/pull/6941) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate DAYOFMONTH(expr), DAYOFYEAR(expr) for MySQL (#6941) - due to [`edbbb59`](https://github.com/tobymao/sqlglot/commit/edbbb597998dbdb77fa89e9a98d6ae56f0915b00) - Annotate WEEK(expr) for MySQL *(PR [#6942](https://github.com/tobymao/sqlglot/pull/6942) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate WEEK(expr) for MySQL (#6942) - due to [`ccb484b`](https://github.com/tobymao/sqlglot/commit/ccb484b82bc665b39e6a0700a885567d19882623) - Annotate HOUR(expr) for MySQL, Hive, Spark, DBX *(PR [#6943](https://github.com/tobymao/sqlglot/pull/6943) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate HOUR(expr) for MySQL, Hive, Spark, DBX (#6943) - due to [`852adec`](https://github.com/tobymao/sqlglot/commit/852adeca09c7776810eb691a04a570e0cf673aa3) - Move `MD5(expr)` to Base *(PR [#6944](https://github.com/tobymao/sqlglot/pull/6944) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Move `MD5(expr)` to Base (#6944) - due to [`2326eae`](https://github.com/tobymao/sqlglot/commit/2326eae50e6ecd47d4a8b2c848a93b720538187a) - Move ASIN, ACOS, ATAN to Base *(PR [#6945](https://github.com/tobymao/sqlglot/pull/6945) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Move ASIN, ACOS, ATAN to Base (#6945) - due to [`7989906`](https://github.com/tobymao/sqlglot/commit/79899060ed3331a55ca1c935e00376a1c137840c) - Move ASINH, ACOSH, ATANH to Base *(PR [#6946](https://github.com/tobymao/sqlglot/pull/6946) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Move ASINH, ACOSH, ATANH to Base (#6946) - due to [`713d22f`](https://github.com/tobymao/sqlglot/commit/713d22f0d44790f8dc7d80ba12ae920815a28c51) - Annotate LENGTH, LEVENSHTEIN_DISTANCE for Presto/Trino *(PR [#6947](https://github.com/tobymao/sqlglot/pull/6947) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate LENGTH, LEVENSHTEIN_DISTANCE for Presto/Trino (#6947) - due to [`0e872dc`](https://github.com/tobymao/sqlglot/commit/0e872dcb5337d13befe981261466ca14a788ef1e) - Annotate POSITION and STRPOS for Trino/Presto *(PR [#6948](https://github.com/tobymao/sqlglot/pull/6948) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate POSITION and STRPOS for Trino/Presto (#6948) - due to [`d01657a`](https://github.com/tobymao/sqlglot/commit/d01657a581a42f1588436882d190b20f4ea004a0) - Annotate WIDTH_BUCKET(expr) for Presto/Trino *(PR [#6950](https://github.com/tobymao/sqlglot/pull/6950) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate WIDTH_BUCKET(expr) for Presto/Trino (#6950) - due to [`cec0f27`](https://github.com/tobymao/sqlglot/commit/cec0f27f17f01c7b1355f1c1306a0f08af639331) - Annotate BITWISE OPERATORS for Presto/Trino *(PR [#6951](https://github.com/tobymao/sqlglot/pull/6951) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate BITWISE OPERATORS for Presto/Trino (#6951) - due to [`2292d0d`](https://github.com/tobymao/sqlglot/commit/2292d0d477ea9aaa7016539deb1435164ee749da) - Move SINH, COSH, TANH to Base *(PR [#6954](https://github.com/tobymao/sqlglot/pull/6954) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Move SINH, COSH, TANH to Base (#6954) - due to [`686ab6a`](https://github.com/tobymao/sqlglot/commit/686ab6ab6da7a1bc7652846989aedd014a9a6d41) - Use replace instead of set for JSON dot access identifiers *(PR [#6953](https://github.com/tobymao/sqlglot/pull/6953) by [@georgesittas](https://github.com/georgesittas))*: Use replace instead of set for JSON dot access identifiers (#6953) ### :sparkles: New Features - [`55698db`](https://github.com/tobymao/sqlglot/commit/55698dbca84078160248e412cf595dd26aababef) - **optimizer**: Annotate MAKE_TIME(expr) for DuckDB *(PR [#6931](https://github.com/tobymao/sqlglot/pull/6931) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`e750ce7`](https://github.com/tobymao/sqlglot/commit/e750ce7c4ac8235e395fe077c6c9b6d5572affaf) - **snowflake**: Transpilation for SHA2 and SHA2_BINARY from Snowflake to DuckDB *(PR [#6929](https://github.com/tobymao/sqlglot/pull/6929) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`9b05968`](https://github.com/tobymao/sqlglot/commit/9b05968e23fe94f804d22d77bf91ab44071aea73) - **optimizer**: Annotate BIT_LENGTH(expr) for DuckDB *(PR [#6932](https://github.com/tobymao/sqlglot/pull/6932) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2050362`](https://github.com/tobymao/sqlglot/commit/20503623debdc11d739746461e8bfb8c13514a58) - **optimizer**: Annotate LENGTH(expr) for DuckDB *(PR [#6937](https://github.com/tobymao/sqlglot/pull/6937) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`e803c7f`](https://github.com/tobymao/sqlglot/commit/e803c7f86e518dccfc19c2543394cd9758c59899) - **optimizer**: Moved SIN, COS, TAN, COT to Base *(PR [#6936](https://github.com/tobymao/sqlglot/pull/6936) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`973d25d`](https://github.com/tobymao/sqlglot/commit/973d25dac469934394af4b1a6e0a11e04ad8524f) - **duckdb**: support transpilation of ARRAY_REMOVE_AT *(PR [#6930](https://github.com/tobymao/sqlglot/pull/6930) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`2e2ff03`](https://github.com/tobymao/sqlglot/commit/2e2ff0363d72cc7cc80b308c6c62496969199b50) - **snowflake**: Refactor RPAD/LPAD *(PR [#6869](https://github.com/tobymao/sqlglot/pull/6869) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`62aeff8`](https://github.com/tobymao/sqlglot/commit/62aeff8b978f372615a113cbd2ea86e26dd3ba55) - **optimizer**: Annotate CURRENT_CATALOG to Base *(PR [#6940](https://github.com/tobymao/sqlglot/pull/6940) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`04002ae`](https://github.com/tobymao/sqlglot/commit/04002aedb48ee1d11f077b66a63722127f027243) - **snowflake**: transpile NTH_VALUE from Snowflake to DuckDB *(PR [#6882](https://github.com/tobymao/sqlglot/pull/6882) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`ccb484b`](https://github.com/tobymao/sqlglot/commit/ccb484b82bc665b39e6a0700a885567d19882623) - **optimizer**: Annotate HOUR(expr) for MySQL, Hive, Spark, DBX *(PR [#6943](https://github.com/tobymao/sqlglot/pull/6943) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`852adec`](https://github.com/tobymao/sqlglot/commit/852adeca09c7776810eb691a04a570e0cf673aa3) - **optimizer**: Move `MD5(expr)` to Base *(PR [#6944](https://github.com/tobymao/sqlglot/pull/6944) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2326eae`](https://github.com/tobymao/sqlglot/commit/2326eae50e6ecd47d4a8b2c848a93b720538187a) - **optimizer**: Move ASIN, ACOS, ATAN to Base *(PR [#6945](https://github.com/tobymao/sqlglot/pull/6945) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`7989906`](https://github.com/tobymao/sqlglot/commit/79899060ed3331a55ca1c935e00376a1c137840c) - **optimizer**: Move ASINH, ACOSH, ATANH to Base *(PR [#6946](https://github.com/tobymao/sqlglot/pull/6946) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`713d22f`](https://github.com/tobymao/sqlglot/commit/713d22f0d44790f8dc7d80ba12ae920815a28c51) - **optimizer**: Annotate LENGTH, LEVENSHTEIN_DISTANCE for Presto/Trino *(PR [#6947](https://github.com/tobymao/sqlglot/pull/6947) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`0e872dc`](https://github.com/tobymao/sqlglot/commit/0e872dcb5337d13befe981261466ca14a788ef1e) - **optimizer**: Annotate POSITION and STRPOS for Trino/Presto *(PR [#6948](https://github.com/tobymao/sqlglot/pull/6948) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`d01657a`](https://github.com/tobymao/sqlglot/commit/d01657a581a42f1588436882d190b20f4ea004a0) - **optimizer**: Annotate WIDTH_BUCKET(expr) for Presto/Trino *(PR [#6950](https://github.com/tobymao/sqlglot/pull/6950) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`cec0f27`](https://github.com/tobymao/sqlglot/commit/cec0f27f17f01c7b1355f1c1306a0f08af639331) - **optimizer**: Annotate BITWISE OPERATORS for Presto/Trino *(PR [#6951](https://github.com/tobymao/sqlglot/pull/6951) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2292d0d`](https://github.com/tobymao/sqlglot/commit/2292d0d477ea9aaa7016539deb1435164ee749da) - **optimizer**: Move SINH, COSH, TANH to Base *(PR [#6954](https://github.com/tobymao/sqlglot/pull/6954) by [@AbhishekASLK](https://github.com/AbhishekASLK))* ### :bug: Bug Fixes - [`12c7cf4`](https://github.com/tobymao/sqlglot/commit/12c7cf46e47d7be0a54881db171fb07e6793507a) - **dremio**: Generate exp.TryCast as CAST *(PR [#6928](https://github.com/tobymao/sqlglot/pull/6928) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6927](https://github.com/tobymao/sqlglot/issues/6927) opened by [@AyushPatel101](https://github.com/AyushPatel101)* - [`087f70b`](https://github.com/tobymao/sqlglot/commit/087f70b8f7a0c7a7858580a20b0d7542a0b53c6b) - **tsql**: datepart when part is quoted *(PR [#6934](https://github.com/tobymao/sqlglot/pull/6934) by [@flow3d](https://github.com/flow3d))* - [`bdce7c7`](https://github.com/tobymao/sqlglot/commit/bdce7c722efa37d44ee1ba85aa4c77f958e0b19f) - **optimizer**: Annotate DAYOFMONTH(expr), DAYOFYEAR(expr) for MySQL *(PR [#6941](https://github.com/tobymao/sqlglot/pull/6941) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`edbbb59`](https://github.com/tobymao/sqlglot/commit/edbbb597998dbdb77fa89e9a98d6ae56f0915b00) - **optimizer**: Annotate WEEK(expr) for MySQL *(PR [#6942](https://github.com/tobymao/sqlglot/pull/6942) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`686ab6a`](https://github.com/tobymao/sqlglot/commit/686ab6ab6da7a1bc7652846989aedd014a9a6d41) - **optimizer**: Use replace instead of set for JSON dot access identifiers *(PR [#6953](https://github.com/tobymao/sqlglot/pull/6953) by [@georgesittas](https://github.com/georgesittas))* ## [v28.9.0] - 2026-02-02 ### :boom: BREAKING CHANGES - due to [`e9ff474`](https://github.com/tobymao/sqlglot/commit/e9ff4743e63c332ae8a4a101f976d4909918992a) - Annotate MINUTE, MONTH for DuckDB *(PR [#6919](https://github.com/tobymao/sqlglot/pull/6919) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate MINUTE, MONTH for DuckDB (#6919) - due to [`96dc339`](https://github.com/tobymao/sqlglot/commit/96dc339e0811c70dd12f92c297d2ff25456c71b7) - Annotate DAYOFWEEK, DAYOFYEAR for DuckDB *(PR [#6920](https://github.com/tobymao/sqlglot/pull/6920) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate DAYOFWEEK, DAYOFYEAR for DuckDB (#6920) - due to [`b56f685`](https://github.com/tobymao/sqlglot/commit/b56f685193982590ea03b681cf542c0157e751d4) - Annotate DAY, HOUR, SECOND and DAYOFMONTH *(PR [#6922](https://github.com/tobymao/sqlglot/pull/6922) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate DAY, HOUR, SECOND and DAYOFMONTH (#6922) - due to [`6e418ec`](https://github.com/tobymao/sqlglot/commit/6e418ecc95085e41a4fe4fed856bc3a08f4c46f8) - Annotate EPOCH(expr) for DuckDB *(PR [#6924](https://github.com/tobymao/sqlglot/pull/6924) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate EPOCH(expr) for DuckDB (#6924) - due to [`abd8d1b`](https://github.com/tobymao/sqlglot/commit/abd8d1bc5d892299dbe46a8208e23a4b2c1c833b) - Transpilation of SHA1 from Snowflake to DuckDB *(PR [#6888](https://github.com/tobymao/sqlglot/pull/6888) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation of SHA1 from Snowflake to DuckDB (#6888) - due to [`b2f5430`](https://github.com/tobymao/sqlglot/commit/b2f543030789ccf889fc6e065985fbeb821c26b7) - Annotate TO_DAYS(expr) for DuckDB *(PR [#6925](https://github.com/tobymao/sqlglot/pull/6925) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate TO_DAYS(expr) for DuckDB (#6925) ### :sparkles: New Features - [`e9ff474`](https://github.com/tobymao/sqlglot/commit/e9ff4743e63c332ae8a4a101f976d4909918992a) - **optimizer**: Annotate MINUTE, MONTH for DuckDB *(PR [#6919](https://github.com/tobymao/sqlglot/pull/6919) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`96dc339`](https://github.com/tobymao/sqlglot/commit/96dc339e0811c70dd12f92c297d2ff25456c71b7) - **optimizer**: Annotate DAYOFWEEK, DAYOFYEAR for DuckDB *(PR [#6920](https://github.com/tobymao/sqlglot/pull/6920) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b56f685`](https://github.com/tobymao/sqlglot/commit/b56f685193982590ea03b681cf542c0157e751d4) - **optimizer**: Annotate DAY, HOUR, SECOND and DAYOFMONTH *(PR [#6922](https://github.com/tobymao/sqlglot/pull/6922) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6e418ec`](https://github.com/tobymao/sqlglot/commit/6e418ecc95085e41a4fe4fed856bc3a08f4c46f8) - **optimizer**: Annotate EPOCH(expr) for DuckDB *(PR [#6924](https://github.com/tobymao/sqlglot/pull/6924) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`abd8d1b`](https://github.com/tobymao/sqlglot/commit/abd8d1bc5d892299dbe46a8208e23a4b2c1c833b) - **snowflake**: Transpilation of SHA1 from Snowflake to DuckDB *(PR [#6888](https://github.com/tobymao/sqlglot/pull/6888) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`b2f5430`](https://github.com/tobymao/sqlglot/commit/b2f543030789ccf889fc6e065985fbeb821c26b7) - **optimizer**: Annotate TO_DAYS(expr) for DuckDB *(PR [#6925](https://github.com/tobymao/sqlglot/pull/6925) by [@AbhishekASLK](https://github.com/AbhishekASLK))* ### :wrench: Chores - [`dfb2d3b`](https://github.com/tobymao/sqlglot/commit/dfb2d3b17d20f69536620976676a2b7248fdb699) - generate API docs before checking out api-docs branch *(PR [#6921](https://github.com/tobymao/sqlglot/pull/6921) by [@georgesittas](https://github.com/georgesittas))* ## [v28.8.0] - 2026-02-02 ### :boom: BREAKING CHANGES - due to [`9d2a12a`](https://github.com/tobymao/sqlglot/commit/9d2a12a650afcdaffe780144af26a0f21a6ec4e6) - Annotate SIN for DuckDB *(PR [#6892](https://github.com/tobymao/sqlglot/pull/6892) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SIN for DuckDB (#6892) - due to [`d8e13ae`](https://github.com/tobymao/sqlglot/commit/d8e13ae8c3f14495fd7ea356bf53e338e6a5347e) - Annotate COS for DuckDB *(PR [#6893](https://github.com/tobymao/sqlglot/pull/6893) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate COS for DuckDB (#6893) - due to [`25f718c`](https://github.com/tobymao/sqlglot/commit/25f718cea3a62034d6a5c263e80e5b0363e3f394) - Annotate STUFF for TSQL *(PR [#6890](https://github.com/tobymao/sqlglot/pull/6890) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate STUFF for TSQL (#6890) - due to [`bec45a5`](https://github.com/tobymao/sqlglot/commit/bec45a55377e9802fe5c572371834e12d760f180) - Annotate `ISINF(expr)` for DuckDB *(PR [#6894](https://github.com/tobymao/sqlglot/pull/6894) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `ISINF(expr)` for DuckDB (#6894) - due to [`aab8243`](https://github.com/tobymao/sqlglot/commit/aab8243a19d776c65473e67a2dcb1fb71af19175) - Annotate ISNAN(expr) for Base *(PR [#6895](https://github.com/tobymao/sqlglot/pull/6895) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ISNAN(expr) for Base (#6895) - due to [`482128e`](https://github.com/tobymao/sqlglot/commit/482128e30aa0d607b7e5fcd2bde142eefcf02c4a) - Annotate TAN for DuckDB *(PR [#6896](https://github.com/tobymao/sqlglot/pull/6896) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate TAN for DuckDB (#6896) - due to [`a6d7f6e`](https://github.com/tobymao/sqlglot/commit/a6d7f6e1ef9cd5d22598a3e21cc69162b07c28a1) - Annotate `COT` for DuckDB *(PR [#6897](https://github.com/tobymao/sqlglot/pull/6897) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `COT` for DuckDB (#6897) - due to [`2ec7c2b`](https://github.com/tobymao/sqlglot/commit/2ec7c2b4a58bad3e736d021e7e414d00e7c16187) - Annotate RANDOM() for DuckDB *(PR [#6898](https://github.com/tobymao/sqlglot/pull/6898) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate RANDOM() for DuckDB (#6898) - due to [`a0b053d`](https://github.com/tobymao/sqlglot/commit/a0b053d10c5d7303f0f335be8ffe235f5a8727d9) - Annotate ATAN(expr) for DuckDB *(PR [#6900](https://github.com/tobymao/sqlglot/pull/6900) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ATAN(expr) for DuckDB (#6900) - due to [`f39b514`](https://github.com/tobymao/sqlglot/commit/f39b514936e6188799bf1c392937050d8aef6ac8) - Annotate ASIN(expr) for DuckDB *(PR [#6901](https://github.com/tobymao/sqlglot/pull/6901) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ASIN(expr) for DuckDB (#6901) - due to [`5fb98a1`](https://github.com/tobymao/sqlglot/commit/5fb98a1a0106b2e4740f8ae72fabeb424dacd07e) - Annotate ACOS(expr) for DuckDB *(PR [#6902](https://github.com/tobymao/sqlglot/pull/6902) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ACOS(expr) for DuckDB (#6902) - due to [`9e95d95`](https://github.com/tobymao/sqlglot/commit/9e95d95578ac8cb07076322c9f099467f17efb3f) - Annotate ASINH(expr), ACOSH(expr), ATANH(expr) for DuckDB *(PR [#6903](https://github.com/tobymao/sqlglot/pull/6903) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ASINH(expr), ACOSH(expr), ATANH(expr) for DuckDB (#6903) - due to [`a8fef30`](https://github.com/tobymao/sqlglot/commit/a8fef30ed6760bd095bd2c6b156ea7cd80c322d0) - Annotate DEGREES(expr) for MySQL *(PR [#6913](https://github.com/tobymao/sqlglot/pull/6913) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate DEGREES(expr) for MySQL (#6913) - due to [`887d03a`](https://github.com/tobymao/sqlglot/commit/887d03af0fa10aef492cb54d8b48e5fc3a1ee6d1) - Annotate arc trignometric func for MySQL *(PR [#6912](https://github.com/tobymao/sqlglot/pull/6912) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate arc trignometric func for MySQL (#6912) - due to [`dc6bc5a`](https://github.com/tobymao/sqlglot/commit/dc6bc5af83dc6c2d9dee3ae82b7792fdc285450e) - Annotate SIN, COS, TAN, COT for MySQL *(PR [#6911](https://github.com/tobymao/sqlglot/pull/6911) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SIN, COS, TAN, COT for MySQL (#6911) - due to [`6cae76f`](https://github.com/tobymao/sqlglot/commit/6cae76fb6de11b2b49db0bb5409495ddb676da05) - Annotate `SECOND(expr)` to `INT` *(PR [#6910](https://github.com/tobymao/sqlglot/pull/6910) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `SECOND(expr)` to `INT` (#6910) - due to [`bd595a6`](https://github.com/tobymao/sqlglot/commit/bd595a6afb724ed1e5ca64122bf1dd69a3adc473) - Annotate QUARTER(expr) for DuckDB *(PR [#6905](https://github.com/tobymao/sqlglot/pull/6905) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate QUARTER(expr) for DuckDB (#6905) - due to [`8e625b5`](https://github.com/tobymao/sqlglot/commit/8e625b5fae51659c2317a9c7a732114e047da9e0) - Annotate ATAN2 for DuckDB *(PR [#6904](https://github.com/tobymao/sqlglot/pull/6904) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ATAN2 for DuckDB (#6904) - due to [`ea678d2`](https://github.com/tobymao/sqlglot/commit/ea678d26dee0bfb223660b587744c6635c036f2f) - support transpilation of CURRENT_TIME from snowflake to duckdb *(PR [#6909](https://github.com/tobymao/sqlglot/pull/6909) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of CURRENT_TIME from snowflake to duckdb (#6909) - due to [`8e4f4b3`](https://github.com/tobymao/sqlglot/commit/8e4f4b386cd5f7484bbe32c9d8921e2fef4b02c1) - Annotate QUARTER(expr) to INT instead of TINYINT *(PR [#6906](https://github.com/tobymao/sqlglot/pull/6906) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate QUARTER(expr) to INT instead of TINYINT (#6906) - due to [`a94e45a`](https://github.com/tobymao/sqlglot/commit/a94e45a3c55f744b20c35c2a5cc61bab0a3678d7) - Annotate MONTH(expr) to INT instead of TINYINT *(PR [#6907](https://github.com/tobymao/sqlglot/pull/6907) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate MONTH(expr) to INT instead of TINYINT (#6907) - due to [`db51b75`](https://github.com/tobymao/sqlglot/commit/db51b7517229df2c6cf446962a9732e548a168f5) - Moved `YEAR`, `QUARTER`, `WEEK` to snowflake *(PR [#6918](https://github.com/tobymao/sqlglot/pull/6918) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Moved `YEAR`, `QUARTER`, `WEEK` to snowflake (#6918) ### :sparkles: New Features - [`9d2a12a`](https://github.com/tobymao/sqlglot/commit/9d2a12a650afcdaffe780144af26a0f21a6ec4e6) - **duckdb**: Annotate SIN for DuckDB *(PR [#6892](https://github.com/tobymao/sqlglot/pull/6892) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`d8e13ae`](https://github.com/tobymao/sqlglot/commit/d8e13ae8c3f14495fd7ea356bf53e338e6a5347e) - **optimizer**: Annotate COS for DuckDB *(PR [#6893](https://github.com/tobymao/sqlglot/pull/6893) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`25f718c`](https://github.com/tobymao/sqlglot/commit/25f718cea3a62034d6a5c263e80e5b0363e3f394) - **tsql**: Annotate STUFF for TSQL *(PR [#6890](https://github.com/tobymao/sqlglot/pull/6890) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`bec45a5`](https://github.com/tobymao/sqlglot/commit/bec45a55377e9802fe5c572371834e12d760f180) - **optimizer**: Annotate `ISINF(expr)` for DuckDB *(PR [#6894](https://github.com/tobymao/sqlglot/pull/6894) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`aab8243`](https://github.com/tobymao/sqlglot/commit/aab8243a19d776c65473e67a2dcb1fb71af19175) - **optimmizer**: Annotate ISNAN(expr) for Base *(PR [#6895](https://github.com/tobymao/sqlglot/pull/6895) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`482128e`](https://github.com/tobymao/sqlglot/commit/482128e30aa0d607b7e5fcd2bde142eefcf02c4a) - **optimizer**: Annotate TAN for DuckDB *(PR [#6896](https://github.com/tobymao/sqlglot/pull/6896) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a6d7f6e`](https://github.com/tobymao/sqlglot/commit/a6d7f6e1ef9cd5d22598a3e21cc69162b07c28a1) - **optimizer**: Annotate `COT` for DuckDB *(PR [#6897](https://github.com/tobymao/sqlglot/pull/6897) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2ec7c2b`](https://github.com/tobymao/sqlglot/commit/2ec7c2b4a58bad3e736d021e7e414d00e7c16187) - **optimizer**: Annotate RANDOM() for DuckDB *(PR [#6898](https://github.com/tobymao/sqlglot/pull/6898) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a0b053d`](https://github.com/tobymao/sqlglot/commit/a0b053d10c5d7303f0f335be8ffe235f5a8727d9) - **optimizer**: Annotate ATAN(expr) for DuckDB *(PR [#6900](https://github.com/tobymao/sqlglot/pull/6900) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`f39b514`](https://github.com/tobymao/sqlglot/commit/f39b514936e6188799bf1c392937050d8aef6ac8) - **optimmizer**: Annotate ASIN(expr) for DuckDB *(PR [#6901](https://github.com/tobymao/sqlglot/pull/6901) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`5fb98a1`](https://github.com/tobymao/sqlglot/commit/5fb98a1a0106b2e4740f8ae72fabeb424dacd07e) - **optimizer**: Annotate ACOS(expr) for DuckDB *(PR [#6902](https://github.com/tobymao/sqlglot/pull/6902) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`9e95d95`](https://github.com/tobymao/sqlglot/commit/9e95d95578ac8cb07076322c9f099467f17efb3f) - **optimizer**: Annotate ASINH(expr), ACOSH(expr), ATANH(expr) for DuckDB *(PR [#6903](https://github.com/tobymao/sqlglot/pull/6903) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a8fef30`](https://github.com/tobymao/sqlglot/commit/a8fef30ed6760bd095bd2c6b156ea7cd80c322d0) - **optimizer**: Annotate DEGREES(expr) for MySQL *(PR [#6913](https://github.com/tobymao/sqlglot/pull/6913) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`887d03a`](https://github.com/tobymao/sqlglot/commit/887d03af0fa10aef492cb54d8b48e5fc3a1ee6d1) - **optimizer**: Annotate arc trignometric func for MySQL *(PR [#6912](https://github.com/tobymao/sqlglot/pull/6912) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`dc6bc5a`](https://github.com/tobymao/sqlglot/commit/dc6bc5af83dc6c2d9dee3ae82b7792fdc285450e) - **optimizer**: Annotate SIN, COS, TAN, COT for MySQL *(PR [#6911](https://github.com/tobymao/sqlglot/pull/6911) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6cae76f`](https://github.com/tobymao/sqlglot/commit/6cae76fb6de11b2b49db0bb5409495ddb676da05) - **mysql**: Annotate `SECOND(expr)` to `INT` *(PR [#6910](https://github.com/tobymao/sqlglot/pull/6910) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`bd595a6`](https://github.com/tobymao/sqlglot/commit/bd595a6afb724ed1e5ca64122bf1dd69a3adc473) - **optimizer**: Annotate QUARTER(expr) for DuckDB *(PR [#6905](https://github.com/tobymao/sqlglot/pull/6905) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`8e625b5`](https://github.com/tobymao/sqlglot/commit/8e625b5fae51659c2317a9c7a732114e047da9e0) - **optimizer**: Annotate ATAN2 for DuckDB *(PR [#6904](https://github.com/tobymao/sqlglot/pull/6904) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`ea678d2`](https://github.com/tobymao/sqlglot/commit/ea678d26dee0bfb223660b587744c6635c036f2f) - **snowflake**: support transpilation of CURRENT_TIME from snowflake to duckdb *(PR [#6909](https://github.com/tobymao/sqlglot/pull/6909) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`db51b75`](https://github.com/tobymao/sqlglot/commit/db51b7517229df2c6cf446962a9732e548a168f5) - **optimizer**: Moved `YEAR`, `QUARTER`, `WEEK` to snowflake *(PR [#6918](https://github.com/tobymao/sqlglot/pull/6918) by [@AbhishekASLK](https://github.com/AbhishekASLK))* ### :bug: Bug Fixes - [`8e4f4b3`](https://github.com/tobymao/sqlglot/commit/8e4f4b386cd5f7484bbe32c9d8921e2fef4b02c1) - **optimizer**: Annotate QUARTER(expr) to INT instead of TINYINT *(PR [#6906](https://github.com/tobymao/sqlglot/pull/6906) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a94e45a`](https://github.com/tobymao/sqlglot/commit/a94e45a3c55f744b20c35c2a5cc61bab0a3678d7) - **mysql**: Annotate MONTH(expr) to INT instead of TINYINT *(PR [#6907](https://github.com/tobymao/sqlglot/pull/6907) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`1fd5914`](https://github.com/tobymao/sqlglot/commit/1fd591403ad306912ac448a761540662c7a7f487) - **parser**: Literal number strings *(PR [#6916](https://github.com/tobymao/sqlglot/pull/6916) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6908](https://github.com/tobymao/sqlglot/issues/6908) opened by [@Matt711](https://github.com/Matt711)* - [`0a065be`](https://github.com/tobymao/sqlglot/commit/0a065be1e00739f47f52166b7cbc890f1a4aea41) - **postgres**: Allow reserved tokens too in EXCLUDE WITH constraint *(PR [#6917](https://github.com/tobymao/sqlglot/pull/6917) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6914](https://github.com/tobymao/sqlglot/issues/6914) opened by [@Badg](https://github.com/Badg)* ### :wrench: Chores - [`a65c870`](https://github.com/tobymao/sqlglot/commit/a65c8701a30652bfadd4d39cf729a9e13c1fa769) - add CLAUDE.md to document guidelines for SQLGlot coding *(PR [#6899](https://github.com/tobymao/sqlglot/pull/6899) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* ## [v28.7.0] - 2026-01-30 ### :boom: BREAKING CHANGES - due to [`ed4ba08`](https://github.com/tobymao/sqlglot/commit/ed4ba08940212f7ed9b67ea01b51f8df38fe85d2) - add support for Bitwise NOT *(PR [#6740](https://github.com/tobymao/sqlglot/pull/6740) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add support for Bitwise NOT (#6740) - due to [`894c581`](https://github.com/tobymao/sqlglot/commit/894c5817fea304b16589710f266b3176f768aab6) - annotate cot for spark and dbx *(PR [#6739](https://github.com/tobymao/sqlglot/pull/6739) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate cot for spark and dbx (#6739) - due to [`cc18c55`](https://github.com/tobymao/sqlglot/commit/cc18c55c0acf0546607187e8910cdd2a9559f15f) - add COSH function annotation for Hive and related dialects *(PR [#6738](https://github.com/tobymao/sqlglot/pull/6738) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add COSH function annotation for Hive and related dialects (#6738) - due to [`03dd8bd`](https://github.com/tobymao/sqlglot/commit/03dd8bd6ec9bdf1a8dfe77130bb1eb968d3cf3d8) - add SINH function annotation for Hive and related dialects *(PR [#6736](https://github.com/tobymao/sqlglot/pull/6736) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add SINH function annotation for Hive and related dialects (#6736) - due to [`1c9cf7b`](https://github.com/tobymao/sqlglot/commit/1c9cf7bfed3f819957110964f5c44794a3e9a8bb) - annotate snowflake ARRAY_COMPACT *(PR [#6735](https://github.com/tobymao/sqlglot/pull/6735) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate snowflake ARRAY_COMPACT (#6735) - due to [`9b1a634`](https://github.com/tobymao/sqlglot/commit/9b1a6343e2ed862241d5e1a7aee8e766e74c83eb) - cast APPROX_QUANTILE results to DOUBLE to respect Snowflake's typing during transpilation *(PR [#6734](https://github.com/tobymao/sqlglot/pull/6734) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: cast APPROX_QUANTILE results to DOUBLE to respect Snowflake's typing during transpilation (#6734) - due to [`f644541`](https://github.com/tobymao/sqlglot/commit/f644541b2b27896f370e253ac4b5751ac5892f28) - add TO_BINARY function annotation for Spark and DBX dialect *(PR [#6743](https://github.com/tobymao/sqlglot/pull/6743) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add TO_BINARY function annotation for Spark and DBX dialect (#6743) - due to [`aeaa43d`](https://github.com/tobymao/sqlglot/commit/aeaa43d16fb3fc01f3d4297badf3953c6d18ae9c) - Preserve key name in STRUCT for all identifiers *(PR [#6744](https://github.com/tobymao/sqlglot/pull/6744) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve key name in STRUCT for all identifiers (#6744) - due to [`27a7b68`](https://github.com/tobymao/sqlglot/commit/27a7b6838d7a06d3ba335a937f7b158415c28b40) - add annotation for ACOS function *(PR [#6747](https://github.com/tobymao/sqlglot/pull/6747) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add annotation for ACOS function (#6747) - due to [`7b5279c`](https://github.com/tobymao/sqlglot/commit/7b5279ccda0bd8947b9b244c221f03883e8865cf) - Fix optimizer for generate series *(PR [#6679](https://github.com/tobymao/sqlglot/pull/6679) by [@chrisqu777](https://github.com/chrisqu777))*: Fix optimizer for generate series (#6679) - due to [`a5ccfbb`](https://github.com/tobymao/sqlglot/commit/a5ccfbb1dd2fe7a1738e36fbc15dafdd00d25036) - add SHA function annotations for Hive *(PR [#6750](https://github.com/tobymao/sqlglot/pull/6750) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add SHA function annotations for Hive (#6750) - due to [`2191273`](https://github.com/tobymao/sqlglot/commit/219127309652ecd5a32940b09a29e10a00171866) - Transpilation support for Snowflake's BITMAP_CONSTRUCT_AGG function to DuckDB *(PR [#6745](https://github.com/tobymao/sqlglot/pull/6745) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation support for Snowflake's BITMAP_CONSTRUCT_AGG function to DuckDB (#6745) - due to [`ee0b213`](https://github.com/tobymao/sqlglot/commit/ee0b21355106861c74c3f67de5c1e6b0bb2a7f15) - Annotate RANDN function for Spark and DBX *(PR [#6751](https://github.com/tobymao/sqlglot/pull/6751) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate RANDN function for Spark and DBX (#6751) - due to [`e643817`](https://github.com/tobymao/sqlglot/commit/e6438170298e8dd90ccc3debe5065af7e0bcaa5e) - Annotate `SPACE` function to Hive *(PR [#6752](https://github.com/tobymao/sqlglot/pull/6752) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `SPACE` function to Hive (#6752) - due to [`48336d0`](https://github.com/tobymao/sqlglot/commit/48336d00d2ad15ba1056868aab99d7e8f9ddb496) - Exclude table-valued functions from unnest_subqueries *(PR [#6755](https://github.com/tobymao/sqlglot/pull/6755) by [@VaggelisD](https://github.com/VaggelisD))*: Exclude table-valued functions from unnest_subqueries (#6755) - due to [`6befad0`](https://github.com/tobymao/sqlglot/commit/6befad02d724a46feda8145d4ce092a534c18d99) - Annotate `BIT_LENGTH` for Spark and DBX *(PR [#6754](https://github.com/tobymao/sqlglot/pull/6754) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `BIT_LENGTH` for Spark and DBX (#6754) - due to [`076058d`](https://github.com/tobymao/sqlglot/commit/076058d9d808cda6b6ca08138afa7f26ee9f6a7c) - Annotate `SHA1` and `SHA256` function for DuckDB *(PR [#6753](https://github.com/tobymao/sqlglot/pull/6753) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `SHA1` and `SHA256` function for DuckDB (#6753) - due to [`ff1b5da`](https://github.com/tobymao/sqlglot/commit/ff1b5da9a0f69b664064155bc51ff41d1c928204) - Annotate KURTOSIS function *(PR [#6757](https://github.com/tobymao/sqlglot/pull/6757) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate KURTOSIS function (#6757) - due to [`af008bd`](https://github.com/tobymao/sqlglot/commit/af008bd51482c69b2c0c9ef01008ec0e657d6c9b) - Annotate SIN, COS, TAN for Hive and inherited dialects *(PR [#6759](https://github.com/tobymao/sqlglot/pull/6759) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SIN, COS, TAN for Hive and inherited dialects (#6759) - due to [`700fbe9`](https://github.com/tobymao/sqlglot/commit/700fbe9b648339342ef60e1ed2ec729de24b6229) - Annotate CORR for Hive and inherited dialects *(PR [#6769](https://github.com/tobymao/sqlglot/pull/6769) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate CORR for Hive and inherited dialects (#6769) - due to [`b87be58`](https://github.com/tobymao/sqlglot/commit/b87be5878524bf82df804926a59c2ffd94fa5adc) - Annotate `SEC` for Spark and DBX *(PR [#6768](https://github.com/tobymao/sqlglot/pull/6768) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `SEC` for Spark and DBX (#6768) - due to [`5a594ed`](https://github.com/tobymao/sqlglot/commit/5a594edd0bc079ef8e2e27ee07033b1bb5bcbd3e) - Annotate ATANH for Spark and DBX *(PR [#6767](https://github.com/tobymao/sqlglot/pull/6767) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ATANH for Spark and DBX (#6767) - due to [`1f9672f`](https://github.com/tobymao/sqlglot/commit/1f9672f390d05754260797beed0a5b1e0ea76358) - Annotate `ATAN` for Hive and inherited dialects *(PR [#6766](https://github.com/tobymao/sqlglot/pull/6766) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `ATAN` for Hive and inherited dialects (#6766) - due to [`38d6816`](https://github.com/tobymao/sqlglot/commit/38d6816e39c07c50f6e5c0b9f4763091b54f9e19) - Support type inference for BQ SAFE functions *(PR [#6765](https://github.com/tobymao/sqlglot/pull/6765) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: Support type inference for BQ SAFE functions (#6765) - due to [`c89a127`](https://github.com/tobymao/sqlglot/commit/c89a127008f581a2ca49132a171e2b51d6e1e7b2) - Implements transpilation for IS_NULL_VALUE *(PR [#6756](https://github.com/tobymao/sqlglot/pull/6756) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Implements transpilation for IS_NULL_VALUE (#6756) - due to [`428d676`](https://github.com/tobymao/sqlglot/commit/428d6766bc752ed2beb363936584766964da6bcc) - avoid redundant cast when transpiling trunc from snowflake *(PR [#6771](https://github.com/tobymao/sqlglot/pull/6771) by [@georgesittas](https://github.com/georgesittas))*: avoid redundant cast when transpiling trunc from snowflake (#6771) - due to [`8163ffa`](https://github.com/tobymao/sqlglot/commit/8163ffa2438e98567be67610ea33918489d36d18) - Implements transpilation for Snowflake's EQUAL_NULL *(PR [#6763](https://github.com/tobymao/sqlglot/pull/6763) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Implements transpilation for Snowflake's EQUAL_NULL (#6763) - due to [`4c01cbe`](https://github.com/tobymao/sqlglot/commit/4c01cbe8ff020d7d52e399de56874a99797e2484) - Annotate CBRT for Hive and inherited dialects *(PR [#6772](https://github.com/tobymao/sqlglot/pull/6772) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate CBRT for Hive and inherited dialects (#6772) - due to [`237aec0`](https://github.com/tobymao/sqlglot/commit/237aec0c53c8417e628d4b4ecd7ad4436843b55d) - Annotate CURRENT_CATALOG() for Hive, Spark, and DBX *(PR [#6773](https://github.com/tobymao/sqlglot/pull/6773) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate CURRENT_CATALOG() for Hive, Spark, and DBX (#6773) - due to [`82d533e`](https://github.com/tobymao/sqlglot/commit/82d533ea5bde8637c71520334174a6fa04ad021d) - Annotate CURRENT_DATABASE() for Hive, Spark and DBX *(PR [#6774](https://github.com/tobymao/sqlglot/pull/6774) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate CURRENT_DATABASE() for Hive, Spark and DBX (#6774) - due to [`69c874f`](https://github.com/tobymao/sqlglot/commit/69c874f317621c572c9b5f91e563f50afaa38bba) - properly support safe functions *(PR [#6775](https://github.com/tobymao/sqlglot/pull/6775) by [@georgesittas](https://github.com/georgesittas))*: properly support safe functions (#6775) - due to [`b5674f6`](https://github.com/tobymao/sqlglot/commit/b5674f6371aeac02716085095d9edb22e559aaa8) - robust correlated subqueries annotation *(PR [#6764](https://github.com/tobymao/sqlglot/pull/6764) by [@geooo109](https://github.com/geooo109))*: robust correlated subqueries annotation (#6764) - due to [`8634a8a`](https://github.com/tobymao/sqlglot/commit/8634a8a737d5ee6c40b4d33545e9f928e1e07df4) - Added Snowflake to DuckDB transpilation for EXTRACT *(PR [#6706](https://github.com/tobymao/sqlglot/pull/6706) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Added Snowflake to DuckDB transpilation for EXTRACT (#6706) - due to [`8cda928`](https://github.com/tobymao/sqlglot/commit/8cda928b3807685f6de5e89eac12522433cfddd6) - ARRAY_APPEND null propagation *(PR [#6762](https://github.com/tobymao/sqlglot/pull/6762) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: ARRAY_APPEND null propagation (#6762) - due to [`2ab4376`](https://github.com/tobymao/sqlglot/commit/2ab43769092840da802ece227d4c13cc95a2108a) - Annotate CURRENT_USER() for Hive, Spark and DBX *(PR [#6790](https://github.com/tobymao/sqlglot/pull/6790) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate CURRENT_USER() for Hive, Spark and DBX (#6790) - due to [`ee08d77`](https://github.com/tobymao/sqlglot/commit/ee08d777eb79e8632d3a23e40095fd1f760a77a6) - resolve parsing issue in substr with FROM/FOR syntax *(PR [#6791](https://github.com/tobymao/sqlglot/pull/6791) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: resolve parsing issue in substr with FROM/FOR syntax (#6791) - due to [`fca6a94`](https://github.com/tobymao/sqlglot/commit/fca6a947c27959d4b8f6f3453a941d31ceccf5a4) - Annotate CURRENT_SCHEMA() for Hive, Spark and DBX *(PR [#6792](https://github.com/tobymao/sqlglot/pull/6792) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate CURRENT_SCHEMA() for Hive, Spark and DBX (#6792) - due to [`d6ecc73`](https://github.com/tobymao/sqlglot/commit/d6ecc7367783d81aab7b6341cd3400ff0edf4794) - add support for grouping_id() *(PR [#6793](https://github.com/tobymao/sqlglot/pull/6793) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add support for grouping_id() (#6793) - due to [`7f4a150`](https://github.com/tobymao/sqlglot/commit/7f4a1502dd6e039677979b67958e618a15867ed5) - parse and annotate bq NET.REG_DOMAIN *(PR [#6777](https://github.com/tobymao/sqlglot/pull/6777) by [@geooo109](https://github.com/geooo109))*: parse and annotate bq NET.REG_DOMAIN (#6777) - due to [`ce0bbcf`](https://github.com/tobymao/sqlglot/commit/ce0bbcf0d6d85c59827438bd711e9aa59ac1d9ef) - Annotate MONTHNAME for Spark and DBX *(PR [#6794](https://github.com/tobymao/sqlglot/pull/6794) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate MONTHNAME for Spark and DBX (#6794) - due to [`bc0a43c`](https://github.com/tobymao/sqlglot/commit/bc0a43cc83e21763d12ca671b03392ce555ce14b) - Annotate MONTH for Hive, Spark and DBX *(PR [#6795](https://github.com/tobymao/sqlglot/pull/6795) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate MONTH for Hive, Spark and DBX (#6795) - due to [`86ca0b6`](https://github.com/tobymao/sqlglot/commit/86ca0b6bf757e77ded99ffaaed641f8a092d6354) - Annotate MONTHS_BETWEEN for Hive, Spark and DBX *(PR [#6796](https://github.com/tobymao/sqlglot/pull/6796) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate MONTHS_BETWEEN for Hive, Spark and DBX (#6796) - due to [`590bcf1`](https://github.com/tobymao/sqlglot/commit/590bcf1de5e6ad15188224f5e2c1dce0398a9ecd) - Annotate DATE_FROM_UNIX_DATE for Spark and DBX *(PR [#6797](https://github.com/tobymao/sqlglot/pull/6797) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate DATE_FROM_UNIX_DATE for Spark and DBX (#6797) - due to [`7e3df62`](https://github.com/tobymao/sqlglot/commit/7e3df62f3e480f9e42c055a00b1113f219348561) - parse DISTINCT as separate arg from quantile for PERCENTILE func *(PR [#6799](https://github.com/tobymao/sqlglot/pull/6799) by [@geooo109](https://github.com/geooo109))*: parse DISTINCT as separate arg from quantile for PERCENTILE func (#6799) - due to [`a2c4b08`](https://github.com/tobymao/sqlglot/commit/a2c4b08468729cbb1bd39545630a8feca9643b10) - Annotate UNHEX for Hive, Spark and DBX *(PR [#6800](https://github.com/tobymao/sqlglot/pull/6800) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate UNHEX for Hive, Spark and DBX (#6800) - due to [`3ab49b6`](https://github.com/tobymao/sqlglot/commit/3ab49b64d97b4212696e207e38ecd647421ada2b) - Annotate ASIN for Hive, Spark and DBX *(PR [#6807](https://github.com/tobymao/sqlglot/pull/6807) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ASIN for Hive, Spark and DBX (#6807) - due to [`d86e28a`](https://github.com/tobymao/sqlglot/commit/d86e28a05ea068f4a254883714f352ebee89ea55) - Annotate ASINH for Spark and DBX *(PR [#6808](https://github.com/tobymao/sqlglot/pull/6808) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ASINH for Spark and DBX (#6808) - due to [`8fe8d64`](https://github.com/tobymao/sqlglot/commit/8fe8d64373ec3cad3b4e519c728e1674daa64dac) - ARRAY_PREPEND null propagation *(PR [#6809](https://github.com/tobymao/sqlglot/pull/6809) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: ARRAY_PREPEND null propagation (#6809) - due to [`99b2b6a`](https://github.com/tobymao/sqlglot/commit/99b2b6ae0c6746ae8456968c98d96a31ac51d26a) - robust parsing of ALL/DISTINCT for PERCENTILE_APPROX func *(PR [#6812](https://github.com/tobymao/sqlglot/pull/6812) by [@geooo109](https://github.com/geooo109))*: robust parsing of ALL/DISTINCT for PERCENTILE_APPROX func (#6812) - due to [`e6eff62`](https://github.com/tobymao/sqlglot/commit/e6eff62309b51192e732e0ae18eaa5cda5a7257a) - Annotate `GET_BIT` for DuckDB *(PR [#6816](https://github.com/tobymao/sqlglot/pull/6816) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `GET_BIT` for DuckDB (#6816) - due to [`70c2097`](https://github.com/tobymao/sqlglot/commit/70c2097ba2a26e5bad04347c8cc974b5056f2c19) - Annotate DAYNAME for Base Dialect *(PR [#6817](https://github.com/tobymao/sqlglot/pull/6817) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate DAYNAME for Base Dialect (#6817) - due to [`2d5c3aa`](https://github.com/tobymao/sqlglot/commit/2d5c3aabdec756c3fe43392cb26181856acea7d6) - Annotate `CBRT` for Base Dialect *(PR [#6819](https://github.com/tobymao/sqlglot/pull/6819) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `CBRT` for Base Dialect (#6819) - due to [`0a478ad`](https://github.com/tobymao/sqlglot/commit/0a478adf096f4890f03991e2bd33257d0c2d3ad4) - introduce `BYTE_STRING_ESCAPES` concept for postgres/duckdb e-strings *(PR [#6818](https://github.com/tobymao/sqlglot/pull/6818) by [@georgesittas](https://github.com/georgesittas))*: introduce `BYTE_STRING_ESCAPES` concept for postgres/duckdb e-strings (#6818) - due to [`5673b09`](https://github.com/tobymao/sqlglot/commit/5673b09dd899289f866df3c30bc9b435ba30d34f) - support transpilation of try_to_date from snowflake to duckdb *(PR [#6806](https://github.com/tobymao/sqlglot/pull/6806) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of try_to_date from snowflake to duckdb (#6806) - due to [`c229079`](https://github.com/tobymao/sqlglot/commit/c229079299f3d79fdc8f5bbd9506f6594bbdbe12) - support transpilation try_to_double snowflake to duck db *(PR [#6821](https://github.com/tobymao/sqlglot/pull/6821) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation try_to_double snowflake to duck db (#6821) - due to [`dcf2cb9`](https://github.com/tobymao/sqlglot/commit/dcf2cb98ecef098368d1c4aa6d12164c341ea227) - annotate fields of UNNEST(STRUCT) with ALIAS for bq *(PR [#6830](https://github.com/tobymao/sqlglot/pull/6830) by [@geooo109](https://github.com/geooo109))*: annotate fields of UNNEST(STRUCT) with ALIAS for bq (#6830) - due to [`378349d`](https://github.com/tobymao/sqlglot/commit/378349ddbe738438e5ad565f6f7d243ee779815a) - Annotate SOUNDEX for Hive, Spark and DBX *(PR [#6832](https://github.com/tobymao/sqlglot/pull/6832) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SOUNDEX for Hive, Spark and DBX (#6832) - due to [`3972a6c`](https://github.com/tobymao/sqlglot/commit/3972a6cc9c6a0b7fcb13948cbc56d5e48502552b) - Transpile BASE64_ENCODE from Snowflake to DuckDB *(PR [#6826](https://github.com/tobymao/sqlglot/pull/6826) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpile BASE64_ENCODE from Snowflake to DuckDB (#6826) - due to [`a9daff7`](https://github.com/tobymao/sqlglot/commit/a9daff7028112aba5a4023c11d9dd96a4dba3d92) - Transpilation of Snowflake SEQ1/2/4/8 and GENERATOR to DuckDB *(PR [#6810](https://github.com/tobymao/sqlglot/pull/6810) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation of Snowflake SEQ1/2/4/8 and GENERATOR to DuckDB (#6810) - due to [`317d496`](https://github.com/tobymao/sqlglot/commit/317d4968938b8df301d4e55cfdd96af1a304f88d) - Annotate SESSION_USER() for Spark and DBX *(PR [#6834](https://github.com/tobymao/sqlglot/pull/6834) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SESSION_USER() for Spark and DBX (#6834) - due to [`98cc685`](https://github.com/tobymao/sqlglot/commit/98cc685253e8011b9d4e2e78137a8b505192724f) - Annotate FACTORIAL(expr) for Hive, Spark and DBX *(PR [#6835](https://github.com/tobymao/sqlglot/pull/6835) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate FACTORIAL(expr) for Hive, Spark and DBX (#6835) - due to [`f8024e0`](https://github.com/tobymao/sqlglot/commit/f8024e0ae3fe66076e78295868934203b03a7d49) - Annotate QUARTER for Hive, Spark and DBX *(PR [#6840](https://github.com/tobymao/sqlglot/pull/6840) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate QUARTER for Hive, Spark and DBX (#6840) - due to [`b3b36ba`](https://github.com/tobymao/sqlglot/commit/b3b36baf86f705901cf1b3509203203772907879) - robust representation of negative numbers *(PR [#6833](https://github.com/tobymao/sqlglot/pull/6833) by [@geooo109](https://github.com/geooo109))*: robust representation of negative numbers (#6833) - due to [`6ebe5cc`](https://github.com/tobymao/sqlglot/commit/6ebe5cc397c598e865c360f89097271c11173af1) - ARRAY_CAT null propagation *(PR [#6829](https://github.com/tobymao/sqlglot/pull/6829) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: ARRAY_CAT null propagation (#6829) - due to [`36211c2`](https://github.com/tobymao/sqlglot/commit/36211c223f062519a1b561dfb23df56fb11a39fc) - transpile BASE64_DECODE_STRING/BINARY to DuckDB *(PR [#6837](https://github.com/tobymao/sqlglot/pull/6837) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: transpile BASE64_DECODE_STRING/BINARY to DuckDB (#6837) - due to [`541abfe`](https://github.com/tobymao/sqlglot/commit/541abfe0afc8f3e080746bfa87fea99abf07fb1c) - annotate type for bq DATE_ADD *(PR [#6842](https://github.com/tobymao/sqlglot/pull/6842) by [@geooo109](https://github.com/geooo109))*: annotate type for bq DATE_ADD (#6842) - due to [`8797e12`](https://github.com/tobymao/sqlglot/commit/8797e124900a31a4701ba425ae56773acf503471) - add support for transpiling ARRAY_COMPACT *(PR [#6839](https://github.com/tobymao/sqlglot/pull/6839) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: add support for transpiling ARRAY_COMPACT (#6839) - due to [`89f583a`](https://github.com/tobymao/sqlglot/commit/89f583a35f36ff9a1caab760273576e05b926572) - Annotate SECOND for Hive, Spark and DBX *(PR [#6853](https://github.com/tobymao/sqlglot/pull/6853) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SECOND for Hive, Spark and DBX (#6853) - due to [`3852e1d`](https://github.com/tobymao/sqlglot/commit/3852e1d6e9bab29b4a9678d06e0583d38232165f) - Annotate ARRAY_SIZE(array) correctly for Spark and DBX *(PR [#6852](https://github.com/tobymao/sqlglot/pull/6852) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ARRAY_SIZE(array) correctly for Spark and DBX (#6852) - due to [`4a7e5a1`](https://github.com/tobymao/sqlglot/commit/4a7e5a1050b3704295dab11aaec22b238be1659d) - Transpilation for Snowflake EDITDISTANCE to Duckdb *(PR [#6846](https://github.com/tobymao/sqlglot/pull/6846) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation for Snowflake EDITDISTANCE to Duckdb (#6846) - due to [`6f3b76b`](https://github.com/tobymao/sqlglot/commit/6f3b76b94ea1fe76ef85291634cde53cc2408d02) - Annotate SIN, COS, TAN, COT for T-SQL *(PR [#6851](https://github.com/tobymao/sqlglot/pull/6851) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SIN, COS, TAN, COT for T-SQL (#6851) - due to [`f1c4fa0`](https://github.com/tobymao/sqlglot/commit/f1c4fa0c62dc33d850eac4db190320a651717f77) - input rounding issue when transpiling boolean logic functions to DuckDB *(PR [#6849](https://github.com/tobymao/sqlglot/pull/6849) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: input rounding issue when transpiling boolean logic functions to DuckDB (#6849) - due to [`eb9887f`](https://github.com/tobymao/sqlglot/commit/eb9887f4fcc7e8c9d48cee7bd78b4804fb215ae4) - support ATN2 function AST *(PR [#6862](https://github.com/tobymao/sqlglot/pull/6862) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support ATN2 function AST (#6862) - due to [`5922ba6`](https://github.com/tobymao/sqlglot/commit/5922ba69be7cc82f45439c818f2a1f2901fe6310) - Annotate inverse trigonometric functions for `TSQL` *(PR [#6865](https://github.com/tobymao/sqlglot/pull/6865) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate inverse trigonometric functions for `TSQL` (#6865) - due to [`fc55b98`](https://github.com/tobymao/sqlglot/commit/fc55b9889bcb1e0dad404dc15d357d8c755d85e6) - Transpilation of MINHASH functions from Snowflake to DuckDB *(PR [#6859](https://github.com/tobymao/sqlglot/pull/6859) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation of MINHASH functions from Snowflake to DuckDB (#6859) - due to [`3728646`](https://github.com/tobymao/sqlglot/commit/372864672b1f576d7e80d5b4df368742a79f8222) - Annotate `CURRENT_TIMEZONE()` for TSQL *(PR [#6871](https://github.com/tobymao/sqlglot/pull/6871) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate `CURRENT_TIMEZONE()` for TSQL (#6871) - due to [`c6bfe61`](https://github.com/tobymao/sqlglot/commit/c6bfe61c59f06c6ce7cdb93a65082cd0a81018ef) - improve some starrocks properties generation *(PR [#6827](https://github.com/tobymao/sqlglot/pull/6827) by [@jaogoy](https://github.com/jaogoy))*: improve some starrocks properties generation (#6827) - due to [`2103d1c`](https://github.com/tobymao/sqlglot/commit/2103d1c08dc36a7a6eb050149d730dcf2ea77dba) - Annotate MD5 for Hive, Spark and DBX *(PR [#6878](https://github.com/tobymao/sqlglot/pull/6878) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate MD5 for Hive, Spark and DBX (#6878) - due to [`6f49775`](https://github.com/tobymao/sqlglot/commit/6f49775edbae748fc3692e20562e5dad9d77b631) - Transpilation of ARRAY_CONSTRUCT_COMPACT to duckdb *(PR [#6875](https://github.com/tobymao/sqlglot/pull/6875) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: Transpilation of ARRAY_CONSTRUCT_COMPACT to duckdb (#6875) - due to [`37dc9c7`](https://github.com/tobymao/sqlglot/commit/37dc9c7b08169149af7fa7baa5cbf567a2688008) - support transpilation of ARRAY_INSERT *(PR [#6863](https://github.com/tobymao/sqlglot/pull/6863) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: support transpilation of ARRAY_INSERT (#6863) - due to [`3a769d4`](https://github.com/tobymao/sqlglot/commit/3a769d404ba35f4a9b26766e0b614d0e24763efc) - Added transpilation of Snowflake ARRAYS_ZIP to DuckDB *(PR [#6874](https://github.com/tobymao/sqlglot/pull/6874) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Added transpilation of Snowflake ARRAYS_ZIP to DuckDB (#6874) - due to [`9c39f08`](https://github.com/tobymao/sqlglot/commit/9c39f085924a9a59cd4a32322f3e45c710467563) - Annotate DAYOFWEEK for MySQL *(PR [#6885](https://github.com/tobymao/sqlglot/pull/6885) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate DAYOFWEEK for MySQL (#6885) - due to [`9a0aaab`](https://github.com/tobymao/sqlglot/commit/9a0aaab6277590966ce4258444649f573d88bd9e) - Annotate SOUNDEX(expr) for TSQL *(PR [#6887](https://github.com/tobymao/sqlglot/pull/6887) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate SOUNDEX(expr) for TSQL (#6887) - due to [`3e94c60`](https://github.com/tobymao/sqlglot/commit/3e94c603923ce27666ce23b0f5c985c93031b13e) - support transpilation of ARRAY_REMOVE *(PR [#6886](https://github.com/tobymao/sqlglot/pull/6886) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: support transpilation of ARRAY_REMOVE (#6886) - due to [`bce1b1f`](https://github.com/tobymao/sqlglot/commit/bce1b1f9a75db83b71eebc097065e8c8d5ee6051) - Transpilation support for Snowflake MAP_CAT to DuckDB *(PR [#6881](https://github.com/tobymao/sqlglot/pull/6881) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpilation support for Snowflake MAP_CAT to DuckDB (#6881) - due to [`b49a656`](https://github.com/tobymao/sqlglot/commit/b49a65696637937530eb0efe9b0de46c41a3436f) - Annotate FACTORIAL(expr) for DuckDB *(PR [#6891](https://github.com/tobymao/sqlglot/pull/6891) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate FACTORIAL(expr) for DuckDB (#6891) - due to [`6ce073b`](https://github.com/tobymao/sqlglot/commit/6ce073bec5864c562854cd5a9848dba56c79bdcc) - transpilation support for IS_ARRAY *(PR [#6877](https://github.com/tobymao/sqlglot/pull/6877) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpilation support for IS_ARRAY (#6877) - due to [`5f67a14`](https://github.com/tobymao/sqlglot/commit/5f67a149635cd000249eb1fc26b18493f29c4974) - bump sqlglotrs to 0.12.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.12.0 ### :sparkles: New Features - [`ed4ba08`](https://github.com/tobymao/sqlglot/commit/ed4ba08940212f7ed9b67ea01b51f8df38fe85d2) - **postgres**: add support for Bitwise NOT *(PR [#6740](https://github.com/tobymao/sqlglot/pull/6740) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - :arrow_lower_right: *addresses issue [#6730](https://github.com/tobymao/sqlglot/issues/6730) opened by [@Xynonners](https://github.com/Xynonners)* - [`894c581`](https://github.com/tobymao/sqlglot/commit/894c5817fea304b16589710f266b3176f768aab6) - **optimizer**: annotate cot for spark and dbx *(PR [#6739](https://github.com/tobymao/sqlglot/pull/6739) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`cc18c55`](https://github.com/tobymao/sqlglot/commit/cc18c55c0acf0546607187e8910cdd2a9559f15f) - **optimizer**: add COSH function annotation for Hive and related dialects *(PR [#6738](https://github.com/tobymao/sqlglot/pull/6738) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`03dd8bd`](https://github.com/tobymao/sqlglot/commit/03dd8bd6ec9bdf1a8dfe77130bb1eb968d3cf3d8) - **optimizer**: add SINH function annotation for Hive and related dialects *(PR [#6736](https://github.com/tobymao/sqlglot/pull/6736) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`9b1a634`](https://github.com/tobymao/sqlglot/commit/9b1a6343e2ed862241d5e1a7aee8e766e74c83eb) - **duckdb**: cast APPROX_QUANTILE results to DOUBLE to respect Snowflake's typing during transpilation *(PR [#6734](https://github.com/tobymao/sqlglot/pull/6734) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f644541`](https://github.com/tobymao/sqlglot/commit/f644541b2b27896f370e253ac4b5751ac5892f28) - **optimizer**: add TO_BINARY function annotation for Spark and DBX dialect *(PR [#6743](https://github.com/tobymao/sqlglot/pull/6743) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`5a50d05`](https://github.com/tobymao/sqlglot/commit/5a50d05b363802e785c8f9d3b8d73a68c8b046b0) - **duckdb**: Add transpilation support for NEXT_DAY function *(PR [#6728](https://github.com/tobymao/sqlglot/pull/6728) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`27a7b68`](https://github.com/tobymao/sqlglot/commit/27a7b6838d7a06d3ba335a937f7b158415c28b40) - **optimizer**: add annotation for ACOS function *(PR [#6747](https://github.com/tobymao/sqlglot/pull/6747) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`c1995b2`](https://github.com/tobymao/sqlglot/commit/c1995b20b22bae4d1633050cdbc19be3f960b223) - **spark**: add ACOSH function annotation *(PR [#6748](https://github.com/tobymao/sqlglot/pull/6748) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a5ccfbb`](https://github.com/tobymao/sqlglot/commit/a5ccfbb1dd2fe7a1738e36fbc15dafdd00d25036) - **optimizer**: add SHA function annotations for Hive *(PR [#6750](https://github.com/tobymao/sqlglot/pull/6750) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2191273`](https://github.com/tobymao/sqlglot/commit/219127309652ecd5a32940b09a29e10a00171866) - **snowflake**: Transpilation support for Snowflake's BITMAP_CONSTRUCT_AGG function to DuckDB *(PR [#6745](https://github.com/tobymao/sqlglot/pull/6745) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ee0b213`](https://github.com/tobymao/sqlglot/commit/ee0b21355106861c74c3f67de5c1e6b0bb2a7f15) - **optimizer**: Annotate RANDN function for Spark and DBX *(PR [#6751](https://github.com/tobymao/sqlglot/pull/6751) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`e643817`](https://github.com/tobymao/sqlglot/commit/e6438170298e8dd90ccc3debe5065af7e0bcaa5e) - **optimizer**: Annotate `SPACE` function to Hive *(PR [#6752](https://github.com/tobymao/sqlglot/pull/6752) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6befad0`](https://github.com/tobymao/sqlglot/commit/6befad02d724a46feda8145d4ce092a534c18d99) - **optimizer**: Annotate `BIT_LENGTH` for Spark and DBX *(PR [#6754](https://github.com/tobymao/sqlglot/pull/6754) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`076058d`](https://github.com/tobymao/sqlglot/commit/076058d9d808cda6b6ca08138afa7f26ee9f6a7c) - **optimizer**: Annotate `SHA1` and `SHA256` function for DuckDB *(PR [#6753](https://github.com/tobymao/sqlglot/pull/6753) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`ff1b5da`](https://github.com/tobymao/sqlglot/commit/ff1b5da9a0f69b664064155bc51ff41d1c928204) - **optimizer**: Annotate KURTOSIS function *(PR [#6757](https://github.com/tobymao/sqlglot/pull/6757) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`af008bd`](https://github.com/tobymao/sqlglot/commit/af008bd51482c69b2c0c9ef01008ec0e657d6c9b) - **optimizer**: Annotate SIN, COS, TAN for Hive and inherited dialects *(PR [#6759](https://github.com/tobymao/sqlglot/pull/6759) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b87be58`](https://github.com/tobymao/sqlglot/commit/b87be5878524bf82df804926a59c2ffd94fa5adc) - **optimizer**: Annotate `SEC` for Spark and DBX *(PR [#6768](https://github.com/tobymao/sqlglot/pull/6768) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`5a594ed`](https://github.com/tobymao/sqlglot/commit/5a594edd0bc079ef8e2e27ee07033b1bb5bcbd3e) - **optimizer**: Annotate ATANH for Spark and DBX *(PR [#6767](https://github.com/tobymao/sqlglot/pull/6767) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`1f9672f`](https://github.com/tobymao/sqlglot/commit/1f9672f390d05754260797beed0a5b1e0ea76358) - **optimizer**: Annotate `ATAN` for Hive and inherited dialects *(PR [#6766](https://github.com/tobymao/sqlglot/pull/6766) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`38d6816`](https://github.com/tobymao/sqlglot/commit/38d6816e39c07c50f6e5c0b9f4763091b54f9e19) - **bigquery**: Support type inference for BQ SAFE functions *(PR [#6765](https://github.com/tobymao/sqlglot/pull/6765) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`c89a127`](https://github.com/tobymao/sqlglot/commit/c89a127008f581a2ca49132a171e2b51d6e1e7b2) - **snowflake**: Implements transpilation for IS_NULL_VALUE *(PR [#6756](https://github.com/tobymao/sqlglot/pull/6756) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`428d676`](https://github.com/tobymao/sqlglot/commit/428d6766bc752ed2beb363936584766964da6bcc) - **duckdb**: avoid redundant cast when transpiling trunc from snowflake *(PR [#6771](https://github.com/tobymao/sqlglot/pull/6771) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6770](https://github.com/tobymao/sqlglot/issues/6770) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`8163ffa`](https://github.com/tobymao/sqlglot/commit/8163ffa2438e98567be67610ea33918489d36d18) - **snowflake**: Implements transpilation for Snowflake's EQUAL_NULL *(PR [#6763](https://github.com/tobymao/sqlglot/pull/6763) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4245205`](https://github.com/tobymao/sqlglot/commit/42452050bfdeb77fe197ffddbec901f9a3ea2d8a) - **duckdb**: Add transpilation support for TIME_FROM_PARTS function for overflow case *(PR [#6761](https://github.com/tobymao/sqlglot/pull/6761) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`4c01cbe`](https://github.com/tobymao/sqlglot/commit/4c01cbe8ff020d7d52e399de56874a99797e2484) - **optimizer**: Annotate CBRT for Hive and inherited dialects *(PR [#6772](https://github.com/tobymao/sqlglot/pull/6772) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`237aec0`](https://github.com/tobymao/sqlglot/commit/237aec0c53c8417e628d4b4ecd7ad4436843b55d) - **optimizer**: Annotate CURRENT_CATALOG() for Hive, Spark, and DBX *(PR [#6773](https://github.com/tobymao/sqlglot/pull/6773) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`82d533e`](https://github.com/tobymao/sqlglot/commit/82d533ea5bde8637c71520334174a6fa04ad021d) - **optimizer**: Annotate CURRENT_DATABASE() for Hive, Spark and DBX *(PR [#6774](https://github.com/tobymao/sqlglot/pull/6774) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b5674f6`](https://github.com/tobymao/sqlglot/commit/b5674f6371aeac02716085095d9edb22e559aaa8) - **optimizer**: robust correlated subqueries annotation *(PR [#6764](https://github.com/tobymao/sqlglot/pull/6764) by [@geooo109](https://github.com/geooo109))* - [`8634a8a`](https://github.com/tobymao/sqlglot/commit/8634a8a737d5ee6c40b4d33545e9f928e1e07df4) - **snowflake**: Added Snowflake to DuckDB transpilation for EXTRACT *(PR [#6706](https://github.com/tobymao/sqlglot/pull/6706) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`8cda928`](https://github.com/tobymao/sqlglot/commit/8cda928b3807685f6de5e89eac12522433cfddd6) - **databricks,duckdb,postgres,spark,snowflake**: ARRAY_APPEND null propagation *(PR [#6762](https://github.com/tobymao/sqlglot/pull/6762) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`2ab4376`](https://github.com/tobymao/sqlglot/commit/2ab43769092840da802ece227d4c13cc95a2108a) - **optimizer**: Annotate CURRENT_USER() for Hive, Spark and DBX *(PR [#6790](https://github.com/tobymao/sqlglot/pull/6790) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`fca6a94`](https://github.com/tobymao/sqlglot/commit/fca6a947c27959d4b8f6f3453a941d31ceccf5a4) - **optimizer**: Annotate CURRENT_SCHEMA() for Hive, Spark and DBX *(PR [#6792](https://github.com/tobymao/sqlglot/pull/6792) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`7f4a150`](https://github.com/tobymao/sqlglot/commit/7f4a1502dd6e039677979b67958e618a15867ed5) - **optimizer**: parse and annotate bq NET.REG_DOMAIN *(PR [#6777](https://github.com/tobymao/sqlglot/pull/6777) by [@geooo109](https://github.com/geooo109))* - [`ce0bbcf`](https://github.com/tobymao/sqlglot/commit/ce0bbcf0d6d85c59827438bd711e9aa59ac1d9ef) - **optimizer**: Annotate MONTHNAME for Spark and DBX *(PR [#6794](https://github.com/tobymao/sqlglot/pull/6794) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`bc0a43c`](https://github.com/tobymao/sqlglot/commit/bc0a43cc83e21763d12ca671b03392ce555ce14b) - **optimizer**: Annotate MONTH for Hive, Spark and DBX *(PR [#6795](https://github.com/tobymao/sqlglot/pull/6795) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`86ca0b6`](https://github.com/tobymao/sqlglot/commit/86ca0b6bf757e77ded99ffaaed641f8a092d6354) - **optimizer**: Annotate MONTHS_BETWEEN for Hive, Spark and DBX *(PR [#6796](https://github.com/tobymao/sqlglot/pull/6796) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`590bcf1`](https://github.com/tobymao/sqlglot/commit/590bcf1de5e6ad15188224f5e2c1dce0398a9ecd) - **optimizer**: Annotate DATE_FROM_UNIX_DATE for Spark and DBX *(PR [#6797](https://github.com/tobymao/sqlglot/pull/6797) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a2c4b08`](https://github.com/tobymao/sqlglot/commit/a2c4b08468729cbb1bd39545630a8feca9643b10) - **optimizer**: Annotate UNHEX for Hive, Spark and DBX *(PR [#6800](https://github.com/tobymao/sqlglot/pull/6800) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`39db28e`](https://github.com/tobymao/sqlglot/commit/39db28e28290acfd2e3f416a8e057670194e9e20) - **duckdb**: Add transpilation support for TIMESTAMP_FROM_PARTS function *(PR [#6801](https://github.com/tobymao/sqlglot/pull/6801) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`3ab49b6`](https://github.com/tobymao/sqlglot/commit/3ab49b64d97b4212696e207e38ecd647421ada2b) - **optimizer**: Annotate ASIN for Hive, Spark and DBX *(PR [#6807](https://github.com/tobymao/sqlglot/pull/6807) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`d86e28a`](https://github.com/tobymao/sqlglot/commit/d86e28a05ea068f4a254883714f352ebee89ea55) - **optimizer**: Annotate ASINH for Spark and DBX *(PR [#6808](https://github.com/tobymao/sqlglot/pull/6808) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`8fe8d64`](https://github.com/tobymao/sqlglot/commit/8fe8d64373ec3cad3b4e519c728e1674daa64dac) - **databricks,duckdb,postgres,spark,snowflake**: ARRAY_PREPEND null propagation *(PR [#6809](https://github.com/tobymao/sqlglot/pull/6809) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`1c31ae3`](https://github.com/tobymao/sqlglot/commit/1c31ae3883c8755fc6046b661bcf87c1aa76cd5c) - **duckdb**: Add transpilation support for TIME_SLICE function *(PR [#6805](https://github.com/tobymao/sqlglot/pull/6805) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`88412cb`](https://github.com/tobymao/sqlglot/commit/88412cb431c9cc64953274fd78df80c3aa70f081) - **starrocks**: add support for ROLLUP index property *(PR [#6814](https://github.com/tobymao/sqlglot/pull/6814) by [@petrikoro](https://github.com/petrikoro))* - [`e6eff62`](https://github.com/tobymao/sqlglot/commit/e6eff62309b51192e732e0ae18eaa5cda5a7257a) - **optimizer**: Annotate `GET_BIT` for DuckDB *(PR [#6816](https://github.com/tobymao/sqlglot/pull/6816) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`70c2097`](https://github.com/tobymao/sqlglot/commit/70c2097ba2a26e5bad04347c8cc974b5056f2c19) - **optimizer**: Annotate DAYNAME for Base Dialect *(PR [#6817](https://github.com/tobymao/sqlglot/pull/6817) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2d5c3aa`](https://github.com/tobymao/sqlglot/commit/2d5c3aabdec756c3fe43392cb26181856acea7d6) - **optimizer**: Annotate `CBRT` for Base Dialect *(PR [#6819](https://github.com/tobymao/sqlglot/pull/6819) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`5673b09`](https://github.com/tobymao/sqlglot/commit/5673b09dd899289f866df3c30bc9b435ba30d34f) - **snowflake**: support transpilation of try_to_date from snowflake to duckdb *(PR [#6806](https://github.com/tobymao/sqlglot/pull/6806) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`c229079`](https://github.com/tobymao/sqlglot/commit/c229079299f3d79fdc8f5bbd9506f6594bbdbe12) - **snowflake**: support transpilation try_to_double snowflake to duck db *(PR [#6821](https://github.com/tobymao/sqlglot/pull/6821) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`378349d`](https://github.com/tobymao/sqlglot/commit/378349ddbe738438e5ad565f6f7d243ee779815a) - **optimizer**: Annotate SOUNDEX for Hive, Spark and DBX *(PR [#6832](https://github.com/tobymao/sqlglot/pull/6832) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`3972a6c`](https://github.com/tobymao/sqlglot/commit/3972a6cc9c6a0b7fcb13948cbc56d5e48502552b) - **snowflake**: Transpile BASE64_ENCODE from Snowflake to DuckDB *(PR [#6826](https://github.com/tobymao/sqlglot/pull/6826) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`a9daff7`](https://github.com/tobymao/sqlglot/commit/a9daff7028112aba5a4023c11d9dd96a4dba3d92) - **snowflake**: Transpilation of Snowflake SEQ1/2/4/8 and GENERATOR to DuckDB *(PR [#6810](https://github.com/tobymao/sqlglot/pull/6810) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`317d496`](https://github.com/tobymao/sqlglot/commit/317d4968938b8df301d4e55cfdd96af1a304f88d) - **optimizer**: Annotate SESSION_USER() for Spark and DBX *(PR [#6834](https://github.com/tobymao/sqlglot/pull/6834) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`98cc685`](https://github.com/tobymao/sqlglot/commit/98cc685253e8011b9d4e2e78137a8b505192724f) - **optimizer**: Annotate FACTORIAL(expr) for Hive, Spark and DBX *(PR [#6835](https://github.com/tobymao/sqlglot/pull/6835) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`db057bb`](https://github.com/tobymao/sqlglot/commit/db057bb088183ff09da72f0fbe13515f057d4e3b) - **postgres**: support `VARIADIC` *(PR [#6841](https://github.com/tobymao/sqlglot/pull/6841) by [@syubogdanov](https://github.com/syubogdanov))* - [`7dade98`](https://github.com/tobymao/sqlglot/commit/7dade9843e9a7cc5887a83f51b4d7ff1650de6de) - **duckdb**: Add transpilation support for REVERSE function *(PR [#6838](https://github.com/tobymao/sqlglot/pull/6838) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`36211c2`](https://github.com/tobymao/sqlglot/commit/36211c223f062519a1b561dfb23df56fb11a39fc) - **snowflake**: transpile BASE64_DECODE_STRING/BINARY to DuckDB *(PR [#6837](https://github.com/tobymao/sqlglot/pull/6837) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`541abfe`](https://github.com/tobymao/sqlglot/commit/541abfe0afc8f3e080746bfa87fea99abf07fb1c) - **optimizer**: annotate type for bq DATE_ADD *(PR [#6842](https://github.com/tobymao/sqlglot/pull/6842) by [@geooo109](https://github.com/geooo109))* - [`8797e12`](https://github.com/tobymao/sqlglot/commit/8797e124900a31a4701ba425ae56773acf503471) - **duckdb**: add support for transpiling ARRAY_COMPACT *(PR [#6839](https://github.com/tobymao/sqlglot/pull/6839) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`89f583a`](https://github.com/tobymao/sqlglot/commit/89f583a35f36ff9a1caab760273576e05b926572) - **optimizer**: Annotate SECOND for Hive, Spark and DBX *(PR [#6853](https://github.com/tobymao/sqlglot/pull/6853) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`4a7e5a1`](https://github.com/tobymao/sqlglot/commit/4a7e5a1050b3704295dab11aaec22b238be1659d) - **snowflake**: Transpilation for Snowflake EDITDISTANCE to Duckdb *(PR [#6846](https://github.com/tobymao/sqlglot/pull/6846) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`6f3b76b`](https://github.com/tobymao/sqlglot/commit/6f3b76b94ea1fe76ef85291634cde53cc2408d02) - **optimizer**: Annotate SIN, COS, TAN, COT for T-SQL *(PR [#6851](https://github.com/tobymao/sqlglot/pull/6851) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`2a455ca`](https://github.com/tobymao/sqlglot/commit/2a455caf6426f4d4ba2e6801e131804b7eba8a01) - **duckdb**: Add transpilation support for FLATTEN (ARRAY_FLATTEN) *(PR [#6848](https://github.com/tobymao/sqlglot/pull/6848) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`e1b6529`](https://github.com/tobymao/sqlglot/commit/e1b6529c0d413f515c3a8d0902ea6a2995e7d92d) - **snowflake**: support directed joins closes [#6850](https://github.com/tobymao/sqlglot/pull/6850) *(PR [#6856](https://github.com/tobymao/sqlglot/pull/6856) by [@georgesittas](https://github.com/georgesittas))* - [`c94284f`](https://github.com/tobymao/sqlglot/commit/c94284f19961645212c9b38169bc2341988debf5) - **optimizer**: UDF annotation *(PR [#6843](https://github.com/tobymao/sqlglot/pull/6843) by [@georgesittas](https://github.com/georgesittas))* - [`394bba4`](https://github.com/tobymao/sqlglot/commit/394bba402f99ffacee999f236602b27470e8c95d) - **starrocks**: add full support for partitions *(PR [#6804](https://github.com/tobymao/sqlglot/pull/6804) by [@petrikoro](https://github.com/petrikoro))* - :arrow_lower_right: *addresses issue [#6803](https://github.com/tobymao/sqlglot/issues/6803) opened by [@petrikoro](https://github.com/petrikoro)* - [`eb9887f`](https://github.com/tobymao/sqlglot/commit/eb9887f4fcc7e8c9d48cee7bd78b4804fb215ae4) - **tsql**: support ATN2 function AST *(PR [#6862](https://github.com/tobymao/sqlglot/pull/6862) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`5922ba6`](https://github.com/tobymao/sqlglot/commit/5922ba69be7cc82f45439c818f2a1f2901fe6310) - **tsql**: Annotate inverse trigonometric functions for `TSQL` *(PR [#6865](https://github.com/tobymao/sqlglot/pull/6865) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`f0618b9`](https://github.com/tobymao/sqlglot/commit/f0618b97ae09e51e81e1d7c23a34afdc497f5419) - **spark**: support AS JSON suffix in describe statement closes [#6866](https://github.com/tobymao/sqlglot/pull/6866) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bf90b5d`](https://github.com/tobymao/sqlglot/commit/bf90b5dd5b4b48359cfa18b992bbb7a307169cb4) - **duckdb**: Add transpilation support for SPACE function *(PR [#6867](https://github.com/tobymao/sqlglot/pull/6867) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fc55b98`](https://github.com/tobymao/sqlglot/commit/fc55b9889bcb1e0dad404dc15d357d8c755d85e6) - **snowflake**: Transpilation of MINHASH functions from Snowflake to DuckDB *(PR [#6859](https://github.com/tobymao/sqlglot/pull/6859) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`3728646`](https://github.com/tobymao/sqlglot/commit/372864672b1f576d7e80d5b4df368742a79f8222) - **tsql**: Annotate `CURRENT_TIMEZONE()` for TSQL *(PR [#6871](https://github.com/tobymao/sqlglot/pull/6871) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`c6bfe61`](https://github.com/tobymao/sqlglot/commit/c6bfe61c59f06c6ce7cdb93a65082cd0a81018ef) - **starrocks**: improve some starrocks properties generation *(PR [#6827](https://github.com/tobymao/sqlglot/pull/6827) by [@jaogoy](https://github.com/jaogoy))* - [`2103d1c`](https://github.com/tobymao/sqlglot/commit/2103d1c08dc36a7a6eb050149d730dcf2ea77dba) - **optimizer**: Annotate MD5 for Hive, Spark and DBX *(PR [#6878](https://github.com/tobymao/sqlglot/pull/6878) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6f49775`](https://github.com/tobymao/sqlglot/commit/6f49775edbae748fc3692e20562e5dad9d77b631) - **duckdb**: Transpilation of ARRAY_CONSTRUCT_COMPACT to duckdb *(PR [#6875](https://github.com/tobymao/sqlglot/pull/6875) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`37dc9c7`](https://github.com/tobymao/sqlglot/commit/37dc9c7b08169149af7fa7baa5cbf567a2688008) - **duckdb**: support transpilation of ARRAY_INSERT *(PR [#6863](https://github.com/tobymao/sqlglot/pull/6863) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`3a769d4`](https://github.com/tobymao/sqlglot/commit/3a769d404ba35f4a9b26766e0b614d0e24763efc) - **snowflake**: Added transpilation of Snowflake ARRAYS_ZIP to DuckDB *(PR [#6874](https://github.com/tobymao/sqlglot/pull/6874) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`9a0aaab`](https://github.com/tobymao/sqlglot/commit/9a0aaab6277590966ce4258444649f573d88bd9e) - **optimizer**: Annotate SOUNDEX(expr) for TSQL *(PR [#6887](https://github.com/tobymao/sqlglot/pull/6887) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`3e94c60`](https://github.com/tobymao/sqlglot/commit/3e94c603923ce27666ce23b0f5c985c93031b13e) - **duckdb**: support transpilation of ARRAY_REMOVE *(PR [#6886](https://github.com/tobymao/sqlglot/pull/6886) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`bce1b1f`](https://github.com/tobymao/sqlglot/commit/bce1b1f9a75db83b71eebc097065e8c8d5ee6051) - **snowflake**: Transpilation support for Snowflake MAP_CAT to DuckDB *(PR [#6881](https://github.com/tobymao/sqlglot/pull/6881) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ad2e9d3`](https://github.com/tobymao/sqlglot/commit/ad2e9d31f00d370d93ae925134e89d15cfd701a1) - **postgres**: support function parameter mode (IN, OUT, INOUT, VARIADIC) *(PR [#6876](https://github.com/tobymao/sqlglot/pull/6876) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - :arrow_lower_right: *addresses issue [#6860](https://github.com/tobymao/sqlglot/issues/6860) opened by [@Badg](https://github.com/Badg)* - [`b49a656`](https://github.com/tobymao/sqlglot/commit/b49a65696637937530eb0efe9b0de46c41a3436f) - **optimizer**: Annotate FACTORIAL(expr) for DuckDB *(PR [#6891](https://github.com/tobymao/sqlglot/pull/6891) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6ce073b`](https://github.com/tobymao/sqlglot/commit/6ce073bec5864c562854cd5a9848dba56c79bdcc) - **snowflake**: transpilation support for IS_ARRAY *(PR [#6877](https://github.com/tobymao/sqlglot/pull/6877) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* ### :bug: Bug Fixes - [`aeaa43d`](https://github.com/tobymao/sqlglot/commit/aeaa43d16fb3fc01f3d4297badf3953c6d18ae9c) - **duckdb**: Preserve key name in STRUCT for all identifiers *(PR [#6744](https://github.com/tobymao/sqlglot/pull/6744) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6729](https://github.com/tobymao/sqlglot/issues/6729) opened by [@nikchha](https://github.com/nikchha)* - [`7b5279c`](https://github.com/tobymao/sqlglot/commit/7b5279ccda0bd8947b9b244c221f03883e8865cf) - **optimizer**: Fix optimizer for generate series *(PR [#6679](https://github.com/tobymao/sqlglot/pull/6679) by [@chrisqu777](https://github.com/chrisqu777))* - :arrow_lower_right: *fixes issue [#6657](https://github.com/tobymao/sqlglot/issues/6657) opened by [@metahexane](https://github.com/metahexane)* - [`48336d0`](https://github.com/tobymao/sqlglot/commit/48336d00d2ad15ba1056868aab99d7e8f9ddb496) - **optimizer**: Exclude table-valued functions from unnest_subqueries *(PR [#6755](https://github.com/tobymao/sqlglot/pull/6755) by [@VaggelisD](https://github.com/VaggelisD))* - [`700fbe9`](https://github.com/tobymao/sqlglot/commit/700fbe9b648339342ef60e1ed2ec729de24b6229) - **optimizer**: Annotate CORR for Hive and inherited dialects *(PR [#6769](https://github.com/tobymao/sqlglot/pull/6769) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`69c874f`](https://github.com/tobymao/sqlglot/commit/69c874f317621c572c9b5f91e563f50afaa38bba) - **bigquery**: properly support safe functions *(PR [#6775](https://github.com/tobymao/sqlglot/pull/6775) by [@georgesittas](https://github.com/georgesittas))* - [`ee08d77`](https://github.com/tobymao/sqlglot/commit/ee08d777eb79e8632d3a23e40095fd1f760a77a6) - **parser**: resolve parsing issue in substr with FROM/FOR syntax *(PR [#6791](https://github.com/tobymao/sqlglot/pull/6791) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - :arrow_lower_right: *fixes issue [#6787](https://github.com/tobymao/sqlglot/issues/6787) opened by [@AbhishekASLK](https://github.com/AbhishekASLK)* - [`d6ecc73`](https://github.com/tobymao/sqlglot/commit/d6ecc7367783d81aab7b6341cd3400ff0edf4794) - **parser**: add support for grouping_id() *(PR [#6793](https://github.com/tobymao/sqlglot/pull/6793) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - :arrow_lower_right: *fixes issue [#6784](https://github.com/tobymao/sqlglot/issues/6784) opened by [@AbhishekASLK](https://github.com/AbhishekASLK)* - [`7e3df62`](https://github.com/tobymao/sqlglot/commit/7e3df62f3e480f9e42c055a00b1113f219348561) - **hive, spark, databriicks**: parse DISTINCT as separate arg from quantile for PERCENTILE func *(PR [#6799](https://github.com/tobymao/sqlglot/pull/6799) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6786](https://github.com/tobymao/sqlglot/issues/6786) opened by [@AbhishekASLK](https://github.com/AbhishekASLK)* - [`c2069b7`](https://github.com/tobymao/sqlglot/commit/c2069b77d7316877e69051b62802d02d862780ba) - **starrocks**: omit TABLE keyword for INSERT OVERWRITE fixes [#6803](https://github.com/tobymao/sqlglot/pull/6803) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`99b2b6a`](https://github.com/tobymao/sqlglot/commit/99b2b6ae0c6746ae8456968c98d96a31ac51d26a) - **hive, spark2, spark, databricks**: robust parsing of ALL/DISTINCT for PERCENTILE_APPROX func *(PR [#6812](https://github.com/tobymao/sqlglot/pull/6812) by [@geooo109](https://github.com/geooo109))* - [`ba1d99f`](https://github.com/tobymao/sqlglot/commit/ba1d99fc5da8b075c24554d57ed8b8092d56d58a) - **starrocks**: rewrite BETWEEN as comparison operators for DELETE *(PR [#6815](https://github.com/tobymao/sqlglot/pull/6815) by [@petrikoro](https://github.com/petrikoro))* - [`7e46829`](https://github.com/tobymao/sqlglot/commit/7e468291eaac59cd4a150772b5e0cc56f0c0bcf8) - quote integration tests GHA heredoc delimiter *(PR [#6820](https://github.com/tobymao/sqlglot/pull/6820) by [@treysp](https://github.com/treysp))* - [`0a478ad`](https://github.com/tobymao/sqlglot/commit/0a478adf096f4890f03991e2bd33257d0c2d3ad4) - introduce `BYTE_STRING_ESCAPES` concept for postgres/duckdb e-strings *(PR [#6818](https://github.com/tobymao/sqlglot/pull/6818) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6813](https://github.com/tobymao/sqlglot/issues/6813) opened by [@trouver](https://github.com/trouver)* - [`d8f266b`](https://github.com/tobymao/sqlglot/commit/d8f266b45f98c1eafbf09f4090a13357aef3efa0) - **merge_subqueries**: Do not replace literals in GROUP BY *(PR [#6828](https://github.com/tobymao/sqlglot/pull/6828) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6823](https://github.com/tobymao/sqlglot/issues/6823) opened by [@karta0807913](https://github.com/karta0807913)* - [`dcf2cb9`](https://github.com/tobymao/sqlglot/commit/dcf2cb98ecef098368d1c4aa6d12164c341ea227) - **optimizer**: annotate fields of UNNEST(STRUCT) with ALIAS for bq *(PR [#6830](https://github.com/tobymao/sqlglot/pull/6830) by [@geooo109](https://github.com/geooo109))* - [`f8024e0`](https://github.com/tobymao/sqlglot/commit/f8024e0ae3fe66076e78295868934203b03a7d49) - **optimizer**: Annotate QUARTER for Hive, Spark and DBX *(PR [#6840](https://github.com/tobymao/sqlglot/pull/6840) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b3b36ba`](https://github.com/tobymao/sqlglot/commit/b3b36baf86f705901cf1b3509203203772907879) - **parser**: robust representation of negative numbers *(PR [#6833](https://github.com/tobymao/sqlglot/pull/6833) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6831](https://github.com/tobymao/sqlglot/issues/6831) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`6ebe5cc`](https://github.com/tobymao/sqlglot/commit/6ebe5cc397c598e865c360f89097271c11173af1) - **duckdb,postgres,redshift,snowflake**: ARRAY_CAT null propagation *(PR [#6829](https://github.com/tobymao/sqlglot/pull/6829) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`5a1028a`](https://github.com/tobymao/sqlglot/commit/5a1028ad937782ff54bcd6e7ee8029d24abb7eff) - **snowflake**: match_condition edge case when offset/limit are present *(PR [#6847](https://github.com/tobymao/sqlglot/pull/6847) by [@georgesittas](https://github.com/georgesittas))* - [`3852e1d`](https://github.com/tobymao/sqlglot/commit/3852e1d6e9bab29b4a9678d06e0583d38232165f) - **optimizer**: Annotate ARRAY_SIZE(array) correctly for Spark and DBX *(PR [#6852](https://github.com/tobymao/sqlglot/pull/6852) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`7abc6b5`](https://github.com/tobymao/sqlglot/commit/7abc6b5645ee3f0324e77f8c853d55893083ab7c) - Fix LATERAL VIEW POSEXPLODE transpilation *(PR [#6844](https://github.com/tobymao/sqlglot/pull/6844) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6836](https://github.com/tobymao/sqlglot/issues/6836) opened by [@nickhand](https://github.com/nickhand)* - [`f1c4fa0`](https://github.com/tobymao/sqlglot/commit/f1c4fa0c62dc33d850eac4db190320a651717f77) - **snowflake**: input rounding issue when transpiling boolean logic functions to DuckDB *(PR [#6849](https://github.com/tobymao/sqlglot/pull/6849) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`ddec250`](https://github.com/tobymao/sqlglot/commit/ddec2500e994dc4b7cc4a80b501dc9bc1f2fb4d1) - **presto**: don't overwrite_types when annotating types in struct_sql() *(PR [#6870](https://github.com/tobymao/sqlglot/pull/6870) by [@NickCrews](https://github.com/NickCrews))* - [`acec48e`](https://github.com/tobymao/sqlglot/commit/acec48e0e10a2298f8e4d981feed883905150b76) - allow check to appear as an identifier in a ddl column def fixes [#6872](https://github.com/tobymao/sqlglot/pull/6872) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`0e48e25`](https://github.com/tobymao/sqlglot/commit/0e48e255b28352ebed06253b25ef049215330624) - **optimizer**: qualify columns with circular dependency *(PR [#6873](https://github.com/tobymao/sqlglot/pull/6873) by [@geooo109](https://github.com/geooo109))* - [`9c39f08`](https://github.com/tobymao/sqlglot/commit/9c39f085924a9a59cd4a32322f3e45c710467563) - **optimizer**: Annotate DAYOFWEEK for MySQL *(PR [#6885](https://github.com/tobymao/sqlglot/pull/6885) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`8ae8ef7`](https://github.com/tobymao/sqlglot/commit/8ae8ef74f2402d0c5c6f2c3bb704f9ea2311b720) - **postgres**: missing ON TRUE required when transpiling APPLY *(PR [#6884](https://github.com/tobymao/sqlglot/pull/6884) by [@c3us-dev](https://github.com/c3us-dev))* - :arrow_lower_right: *fixes issue [#6883](https://github.com/tobymao/sqlglot/issues/6883) opened by [@c3us-dev](https://github.com/c3us-dev)* ### :wrench: Chores - [`167b670`](https://github.com/tobymao/sqlglot/commit/167b6708ba7bb37bb826cb1a4ceec39f0719e773) - AB-sort typing dicts *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`1c9cf7b`](https://github.com/tobymao/sqlglot/commit/1c9cf7bfed3f819957110964f5c44794a3e9a8bb) - **optimizer**: annotate snowflake ARRAY_COMPACT *(PR [#6735](https://github.com/tobymao/sqlglot/pull/6735) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`aefd6c5`](https://github.com/tobymao/sqlglot/commit/aefd6c508664c39d7f8ab7e79f34151a285e1b04) - Add Spark & DBX tests for ARRAY_COMPACT for PR6735 *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`e56ff6b`](https://github.com/tobymao/sqlglot/commit/e56ff6bb52997e2c9cc12bd4985dd977d44c7507) - Rename TokenType.TILDA to TokenType.TILDE *(PR [#6742](https://github.com/tobymao/sqlglot/pull/6742) by [@VaggelisD](https://github.com/VaggelisD))* - [`931fbce`](https://github.com/tobymao/sqlglot/commit/931fbce6543e84fb938cff91215cfefaab048f3d) - add YDB plugin to dialects list *(PR [#6741](https://github.com/tobymao/sqlglot/pull/6741) by [@vgvoleg](https://github.com/vgvoleg))* - [`3b5fb43`](https://github.com/tobymao/sqlglot/commit/3b5fb43885277d1ffda9926e5a9b58312d8840de) - Refactor PR 6679 *(PR [#6749](https://github.com/tobymao/sqlglot/pull/6749) by [@VaggelisD](https://github.com/VaggelisD))* - [`8c6d01e`](https://github.com/tobymao/sqlglot/commit/8c6d01e8eb409e750070055f65ac52e0ec921103) - refactor LENGTH type annotation *(PR [#6758](https://github.com/tobymao/sqlglot/pull/6758) by [@geooo109](https://github.com/geooo109))* - [`33d9e63`](https://github.com/tobymao/sqlglot/commit/33d9e63af776b6b51816774f712402f6cde6498c) - clean up redundant annotators *(PR [#6776](https://github.com/tobymao/sqlglot/pull/6776) by [@georgesittas](https://github.com/georgesittas))* - [`8fa7198`](https://github.com/tobymao/sqlglot/commit/8fa71980c2282d346cbedef1a8c330b7e5722379) - annotation tests for HEX func *(commit by [@geooo109](https://github.com/geooo109))* - [`d3a0ffa`](https://github.com/tobymao/sqlglot/commit/d3a0ffa1e49cb69403cef331e1d5704cef5d63f8) - refactor boolean function generators for duckdb *(PR [#6857](https://github.com/tobymao/sqlglot/pull/6857) by [@georgesittas](https://github.com/georgesittas))* - [`6fc77ce`](https://github.com/tobymao/sqlglot/commit/6fc77ce335f0f2d5099285bf17ae81576aac89fb) - refactor mysql, doris, starrocks partition by syntax *(PR [#6858](https://github.com/tobymao/sqlglot/pull/6858) by [@geooo109](https://github.com/geooo109))* - [`16959de`](https://github.com/tobymao/sqlglot/commit/16959de3118302fef2ad228dc233da46fce73dca) - fix style for starrocks *(commit by [@geooo109](https://github.com/geooo109))* - [`fa3c944`](https://github.com/tobymao/sqlglot/commit/fa3c9448d2fb132dd837b0dd5d6d4eb02297ba1a) - refactor starrocks *(PR [#6880](https://github.com/tobymao/sqlglot/pull/6880) by [@geooo109](https://github.com/geooo109))* - [`1438115`](https://github.com/tobymao/sqlglot/commit/14381150e5b28a0134899d36935b8c99aca9058c) - clean up sf IS_ARRAY tests *(commit by [@geooo109](https://github.com/geooo109))* - [`5f67a14`](https://github.com/tobymao/sqlglot/commit/5f67a149635cd000249eb1fc26b18493f29c4974) - bump sqlglotrs to 0.12.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v28.6.0] - 2026-01-13 ### :boom: BREAKING CHANGES - due to [`06ce65a`](https://github.com/tobymao/sqlglot/commit/06ce65ab4235d180d30c59a74756bdd8026fddc7) - support transpilation of bitwise agg functions *(PR [#6580](https://github.com/tobymao/sqlglot/pull/6580) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: support transpilation of bitwise agg functions (#6580) - due to [`1497ee6`](https://github.com/tobymao/sqlglot/commit/1497ee6197dd1bd15926ab40bb09f03f72a4da34) - corrected handling of ToChar for Postgres *(commit by [@dhawkins1234](https://github.com/dhawkins1234))*: corrected handling of ToChar for Postgres - due to [`c52705d`](https://github.com/tobymao/sqlglot/commit/c52705dec16390e0651d8a68c3500d8fa11dff12) - annotate EXISTS, ALL, ANY as BOOLEAN *(PR [#6590](https://github.com/tobymao/sqlglot/pull/6590) by [@doripo](https://github.com/doripo))*: annotate EXISTS, ALL, ANY as BOOLEAN (#6590) - due to [`8ef9d7b`](https://github.com/tobymao/sqlglot/commit/8ef9d7b2183589217df10004798fd751d4d618d0) - support transpilation of GREATEST, GREATEST_IGNORE_NULLS, LEAST, LEAST_IGNORE_NULLS from snowflake to duckdb *(PR [#6579](https://github.com/tobymao/sqlglot/pull/6579) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of GREATEST, GREATEST_IGNORE_NULLS, LEAST, LEAST_IGNORE_NULLS from snowflake to duckdb (#6579) - due to [`b93291b`](https://github.com/tobymao/sqlglot/commit/b93291bee7ada32b4d686db919d8d3d683d95425) - support transpilation of TRY_TO_BOOLEAN from Snowflake to DuckDB *(PR [#6594](https://github.com/tobymao/sqlglot/pull/6594) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of TRY_TO_BOOLEAN from Snowflake to DuckDB (#6594) - due to [`00e596d`](https://github.com/tobymao/sqlglot/commit/00e596d1a7a6b806cd1c632afd6b09f4f5270086) - support transpilation of BOOLXOR_AGG, BOOLAND_AGG, and BOOLOR_AGG from Snowflake to DuckDB *(PR [#6592](https://github.com/tobymao/sqlglot/pull/6592) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: support transpilation of BOOLXOR_AGG, BOOLAND_AGG, and BOOLOR_AGG from Snowflake to DuckDB (#6592) - due to [`0ba33ad`](https://github.com/tobymao/sqlglot/commit/0ba33ad69abcb718f761090cdd867e45d9481b80) - Add transpilation support for DAYNAME and MONTHNAME functions. *(PR [#6603](https://github.com/tobymao/sqlglot/pull/6603) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add transpilation support for DAYNAME and MONTHNAME functions. (#6603) - due to [`62c0ef0`](https://github.com/tobymao/sqlglot/commit/62c0ef0ad4579a55d65498eddd4e649ae464b559) - Fix BQ's exp.Date transpilation *(PR [#6595](https://github.com/tobymao/sqlglot/pull/6595) by [@VaggelisD](https://github.com/VaggelisD))*: Fix BQ's exp.Date transpilation (#6595) - due to [`77ff9d0`](https://github.com/tobymao/sqlglot/commit/77ff9d0c5cec8fdba7e64fe02c0db0a63a64f1e0) - annotate types for MAP_* functions *(PR [#6605](https://github.com/tobymao/sqlglot/pull/6605) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: annotate types for MAP_* functions (#6605) - due to [`e7c1574`](https://github.com/tobymao/sqlglot/commit/e7c1574f89314a304933a2b3b391528d530ea596) - Add transpilation support for DATE_DIFF function *(PR [#6609](https://github.com/tobymao/sqlglot/pull/6609) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add transpilation support for DATE_DIFF function (#6609) - due to [`d8f0bbd`](https://github.com/tobymao/sqlglot/commit/d8f0bbdbd208acc0eccb7e9be331d2661169ad97) - conditionally consume +/- in scientific literal notation *(PR [#6610](https://github.com/tobymao/sqlglot/pull/6610) by [@georgesittas](https://github.com/georgesittas))*: conditionally consume +/- in scientific literal notation (#6610) - due to [`13014e0`](https://github.com/tobymao/sqlglot/commit/13014e0f01d10f0a078ef8aed4569d3aa8bd741b) - move postgres range parsers to global level *(PR [#6591](https://github.com/tobymao/sqlglot/pull/6591) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: move postgres range parsers to global level (#6591) - due to [`d26ff44`](https://github.com/tobymao/sqlglot/commit/d26ff4444fba6697f25efa3dea5ab751fe560f6b) - support adjacent ranges operator *(PR [#6611](https://github.com/tobymao/sqlglot/pull/6611) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support adjacent ranges operator (#6611) - due to [`1f3436b`](https://github.com/tobymao/sqlglot/commit/1f3436bfe7ddbbf212d61616153f150d32824450) - bq robust literal/non-literal type annotation *(PR [#6600](https://github.com/tobymao/sqlglot/pull/6600) by [@geooo109](https://github.com/geooo109))*: bq robust literal/non-literal type annotation (#6600) - due to [`8f38887`](https://github.com/tobymao/sqlglot/commit/8f3888746a1f36446544483e920e1287ebf76b4f) - annotate snowflake array construct *(commit by [@georgesittas](https://github.com/georgesittas))*: annotate snowflake array construct - due to [`870dba4`](https://github.com/tobymao/sqlglot/commit/870dba41cc88dc9e4802f3695f3f715e0c35e0ed) - support transpilation of LAST_DAY from Snowflake to Duckdb *(PR [#6614](https://github.com/tobymao/sqlglot/pull/6614) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of LAST_DAY from Snowflake to Duckdb (#6614) - due to [`870d600`](https://github.com/tobymao/sqlglot/commit/870d600a93108b1a1d68936244564548dec5f683) - support: postgres point *(PR [#6615](https://github.com/tobymao/sqlglot/pull/6615) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support: postgres point (#6615) - due to [`302fda0`](https://github.com/tobymao/sqlglot/commit/302fda0151094bc074b98847390cd554414258bb) - support and, or *(PR [#6625](https://github.com/tobymao/sqlglot/pull/6625) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support and, or (#6625) - due to [`fc5800d`](https://github.com/tobymao/sqlglot/commit/fc5800d1359f9b0933622cb355fdb7a34f2f486a) - support Snowflake to DuckDB transpilation of ZIPF *(PR [#6618](https://github.com/tobymao/sqlglot/pull/6618) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: support Snowflake to DuckDB transpilation of ZIPF (#6618) - due to [`dea22ca`](https://github.com/tobymao/sqlglot/commit/dea22ca07076c1ef6a08f06f9fdc6070ca0fecb8) - support `RECORD` type *(PR [#6635](https://github.com/tobymao/sqlglot/pull/6635) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support `RECORD` type (#6635) - due to [`77783da`](https://github.com/tobymao/sqlglot/commit/77783da1329ad5d681a7d37928e4dbedd68ab365) - support: var-args in `xor` *(PR [#6634](https://github.com/tobymao/sqlglot/pull/6634) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support: var-args in `xor` (#6634) - due to [`b75a3e3`](https://github.com/tobymao/sqlglot/commit/b75a3e3b5d948f55df5b0096d116edf3b898e5e1) - handle BINARY keyword *(PR [#6636](https://github.com/tobymao/sqlglot/pull/6636) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: handle BINARY keyword (#6636) - due to [`e7b5d6f`](https://github.com/tobymao/sqlglot/commit/e7b5d6f1b8031fbf25e1fd7c3b42309c4aae8810) - support transpilation of TRY_TO_BINARY from Snowflake to DuckDB *(PR [#6629](https://github.com/tobymao/sqlglot/pull/6629) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of TRY_TO_BINARY from Snowflake to DuckDB (#6629) - due to [`2bf9405`](https://github.com/tobymao/sqlglot/commit/2bf9405adf9c9c72c77f7aa8ab792779a3b9c5f3) - USING keyword in chr *(PR [#6637](https://github.com/tobymao/sqlglot/pull/6637) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: USING keyword in chr (#6637) - due to [`235fc14`](https://github.com/tobymao/sqlglot/commit/235fc14f40c422e6339da0a6b17252ab9eb18ec2) - support charset *(PR [#6633](https://github.com/tobymao/sqlglot/pull/6633) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support charset (#6633) - due to [`d46969d`](https://github.com/tobymao/sqlglot/commit/d46969db50ff52b8657d5b33f0a106b69dbd1e2a) - annotate snowflake ARRAY_APPEND and ARRAY_PREPEND *(PR [#6645](https://github.com/tobymao/sqlglot/pull/6645) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate snowflake ARRAY_APPEND and ARRAY_PREPEND (#6645) - due to [`9c54329`](https://github.com/tobymao/sqlglot/commit/9c543291a0e28ad044523747d19262243fed1f5d) - Type annotation for Snowflake ENCRYPT and DECRYPT functions *(PR [#6643](https://github.com/tobymao/sqlglot/pull/6643) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Type annotation for Snowflake ENCRYPT and DECRYPT functions (#6643) - due to [`7870bd0`](https://github.com/tobymao/sqlglot/commit/7870bd0dc7503d0a1863d7623be7fc12bb9412e4) - type annotation for COVAR_POP and COVAR_SAMP *(PR [#6656](https://github.com/tobymao/sqlglot/pull/6656) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: type annotation for COVAR_POP and COVAR_SAMP (#6656) - due to [`2fa29b1`](https://github.com/tobymao/sqlglot/commit/2fa29b1e0feb6d8f9290c105e5fa0f349a011e22) - Type annotation for ARRAY_REMOVE function *(PR [#6653](https://github.com/tobymao/sqlglot/pull/6653) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: Type annotation for ARRAY_REMOVE function (#6653) - due to [`3d59af5`](https://github.com/tobymao/sqlglot/commit/3d59af557caf5c0fd109ae687b8408264543f9ea) - Added UNIFORM transpilation for Snowflake to DuckDB *(PR [#6640](https://github.com/tobymao/sqlglot/pull/6640) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Added UNIFORM transpilation for Snowflake to DuckDB (#6640) - due to [`5552f12`](https://github.com/tobymao/sqlglot/commit/5552f121f9c9a8103fc3ccf58fd5eed076fe0575) - annotation support for LOCALTIME function *(PR [#6651](https://github.com/tobymao/sqlglot/pull/6651) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotation support for LOCALTIME function (#6651) - due to [`a669651`](https://github.com/tobymao/sqlglot/commit/a6696518efefabaf815d7c61f1ae923c39ce6107) - Type annotation for Snowflake LOCALTIMESTAMP *(PR [#6652](https://github.com/tobymao/sqlglot/pull/6652) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Type annotation for Snowflake LOCALTIMESTAMP (#6652) - due to [`40ccce4`](https://github.com/tobymao/sqlglot/commit/40ccce492746cc969467c234b9c7d345454da683) - Transpile Snowflake NORMAL to DuckDB using Box-Muller transform *(PR [#6654](https://github.com/tobymao/sqlglot/pull/6654) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpile Snowflake NORMAL to DuckDB using Box-Muller transform (#6654) - due to [`d6bf569`](https://github.com/tobymao/sqlglot/commit/d6bf569ef2442d5055ab788a2078adf485cc1120) - updates for Snowflake to DuckDB transpilation of TO_TIMESTAMP functions *(PR [#6622](https://github.com/tobymao/sqlglot/pull/6622) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: updates for Snowflake to DuckDB transpilation of TO_TIMESTAMP functions (#6622) - due to [`13251fd`](https://github.com/tobymao/sqlglot/commit/13251fd7f03d31af69b86a4ef5bb6e940cc9318b) - annotation support for ELT function *(PR [#6659](https://github.com/tobymao/sqlglot/pull/6659) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotation support for ELT function (#6659) - due to [`b287e4e`](https://github.com/tobymao/sqlglot/commit/b287e4ef9e05c0ba1fadc3638977e994eb911834) - Support transpilation for BITMAP_BUCKET_NUMBER *(PR [#6668](https://github.com/tobymao/sqlglot/pull/6668) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Support transpilation for BITMAP_BUCKET_NUMBER (#6668) - due to [`d5b84cb`](https://github.com/tobymao/sqlglot/commit/d5b84cbbd1406ebd9f5e2373e12b0d15dcb85d7f) - ignore comments in the semantic differ *(PR [#6675](https://github.com/tobymao/sqlglot/pull/6675) by [@georgesittas](https://github.com/georgesittas))*: ignore comments in the semantic differ (#6675) - due to [`399c80f`](https://github.com/tobymao/sqlglot/commit/399c80f2e66a1acdcd7d567e44c297db7071dc8f) - rename args for CovarSamp/Pop, stop inheriting from Binary *(commit by [@georgesittas](https://github.com/georgesittas))*: rename args for CovarSamp/Pop, stop inheriting from Binary - due to [`c9a70c0`](https://github.com/tobymao/sqlglot/commit/c9a70c004e0ec563e8a712668a30da9856cb0516) - update transpilation of DATE_TRUNC to duckdb *(PR [#6644](https://github.com/tobymao/sqlglot/pull/6644) by [@toriwei](https://github.com/toriwei))*: update transpilation of DATE_TRUNC to duckdb (#6644) - due to [`460b3a2`](https://github.com/tobymao/sqlglot/commit/460b3a2ae62d8294c57068453b5a11e4a7e12a91) - Allow varlen args in exp.MD5Digest *(PR [#6685](https://github.com/tobymao/sqlglot/pull/6685) by [@VaggelisD](https://github.com/VaggelisD))*: Allow varlen args in exp.MD5Digest (#6685) - due to [`dcdee68`](https://github.com/tobymao/sqlglot/commit/dcdee68cb1a77286232865de9df8d8a01898fcc7) - Allow non aggregation functions in PIVOT *(PR [#6687](https://github.com/tobymao/sqlglot/pull/6687) by [@VaggelisD](https://github.com/VaggelisD))*: Allow non aggregation functions in PIVOT (#6687) - due to [`a4be3fa`](https://github.com/tobymao/sqlglot/commit/a4be3faf63c981a2b10b1f8c709581acf59277ce) - support SYSTIMESTAMP as NO_PAREN_FUNCTION *(PR [#6677](https://github.com/tobymao/sqlglot/pull/6677) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support SYSTIMESTAMP as NO_PAREN_FUNCTION (#6677) - due to [`243448c`](https://github.com/tobymao/sqlglot/commit/243448ca5ccc6a26d680661dcb7d921a055dbfa9) - Transpile date extraction from Snowflake to DuckDB (YEAR*, WEEK*, DAY*, etc) *(PR [#6666](https://github.com/tobymao/sqlglot/pull/6666) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Transpile date extraction from Snowflake to DuckDB (YEAR*, WEEK*, DAY*, etc) (#6666) - due to [`29cff1f`](https://github.com/tobymao/sqlglot/commit/29cff1fbe0011476b22e1b1442af857499f871a6) - bq robust literal/non-literal binary annotation *(PR [#6688](https://github.com/tobymao/sqlglot/pull/6688) by [@geooo109](https://github.com/geooo109))*: bq robust literal/non-literal binary annotation (#6688) - due to [`65acd6c`](https://github.com/tobymao/sqlglot/commit/65acd6c00f023c3279947b23e04139bc9554db85) - transpile snowflake SYSDATE *(PR [#6693](https://github.com/tobymao/sqlglot/pull/6693) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpile snowflake SYSDATE (#6693) - due to [`36ff96c`](https://github.com/tobymao/sqlglot/commit/36ff96c727e40dbe467d6338128f87a74b99a980) - support transpilation of BITSHIFTLEFT and BITSHIFTRIGHT *(PR [#6586](https://github.com/tobymao/sqlglot/pull/6586) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: support transpilation of BITSHIFTLEFT and BITSHIFTRIGHT (#6586) - due to [`fb8f57a`](https://github.com/tobymao/sqlglot/commit/fb8f57ac54a4e0213941f3bd057ccb03c4e125b7) - Fix UPDATE statement for multi tables *(PR [#6700](https://github.com/tobymao/sqlglot/pull/6700) by [@VaggelisD](https://github.com/VaggelisD))*: Fix UPDATE statement for multi tables (#6700) - due to [`21f9d3c`](https://github.com/tobymao/sqlglot/commit/21f9d3ca7c6fa0d38f0f63000f904589b91c7d0a) - Normalize struct field names when annotating types *(PR [#6674](https://github.com/tobymao/sqlglot/pull/6674) by [@vchan](https://github.com/vchan))*: Normalize struct field names when annotating types (#6674) - due to [`7362c23`](https://github.com/tobymao/sqlglot/commit/7362c2357eba540a12eea079e9f4212f3545c8d1) - interval expressions *(PR [#6648](https://github.com/tobymao/sqlglot/pull/6648) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: interval expressions (#6648) - due to [`a503a7a`](https://github.com/tobymao/sqlglot/commit/a503a7adbfe3352924e94a21303a2ac23903833c) - annotate Encode for hive dialect hierarchy *(commit by [@georgesittas](https://github.com/georgesittas))*: annotate Encode for hive dialect hierarchy - due to [`c447df7`](https://github.com/tobymao/sqlglot/commit/c447df71a645f55e4798887bde4d42285f5dea4a) - annotate the LOCALTIMESTAMP *(PR [#6709](https://github.com/tobymao/sqlglot/pull/6709) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate the LOCALTIMESTAMP (#6709) - due to [`e91a14b`](https://github.com/tobymao/sqlglot/commit/e91a14b5b325d3d71fbdbe701e531588723bcd2e) - annotate the CURRENT_TIMEZONE *(PR [#6708](https://github.com/tobymao/sqlglot/pull/6708) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate the CURRENT_TIMEZONE (#6708) - due to [`b53c8d4`](https://github.com/tobymao/sqlglot/commit/b53c8d47481d735c74ae8704a90594f33263eee8) - annotate the UNIX_TIMESTAMP *(PR [#6717](https://github.com/tobymao/sqlglot/pull/6717) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: annotate the UNIX_TIMESTAMP (#6717) - due to [`cfb06ab`](https://github.com/tobymao/sqlglot/commit/cfb06ab58c606715e688d39e704b1587fcd256e8) - add support for transpiling ARRAY_AGG with ORDER BY *(PR [#6691](https://github.com/tobymao/sqlglot/pull/6691) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: add support for transpiling ARRAY_AGG with ORDER BY (#6691) - due to [`2965077`](https://github.com/tobymao/sqlglot/commit/296507773fc4476862cc1ee374f88c7d73cb147f) - add support for JSON_KEYS func *(PR [#6718](https://github.com/tobymao/sqlglot/pull/6718) by [@geooo109](https://github.com/geooo109))*: add support for JSON_KEYS func (#6718) - due to [`bbaba5f`](https://github.com/tobymao/sqlglot/commit/bbaba5fd7d0111947035534168593a5a3ee6839a) - annotate type for snowflake ARRAY_CAT *(PR [#6721](https://github.com/tobymao/sqlglot/pull/6721) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake ARRAY_CAT (#6721) - due to [`0a1c7ab`](https://github.com/tobymao/sqlglot/commit/0a1c7abddc0cbd2f853a431848c1ae45e6876ba2) - support transpilation of GETBIT from snowflake to duckdb *(PR [#6692](https://github.com/tobymao/sqlglot/pull/6692) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of GETBIT from snowflake to duckdb (#6692) - due to [`cb320c4`](https://github.com/tobymao/sqlglot/commit/cb320c41361f0bb7ad71522366ff4cb8691607bf) - bq annotate type for raw strings *(PR [#6723](https://github.com/tobymao/sqlglot/pull/6723) by [@geooo109](https://github.com/geooo109))*: bq annotate type for raw strings (#6723) - due to [`09fa467`](https://github.com/tobymao/sqlglot/commit/09fa467461656d5c4d4e57c7044c46ff4fcf3f7f) - Annotate ATAN2 for Spark & DBX *(PR [#6725](https://github.com/tobymao/sqlglot/pull/6725) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate ATAN2 for Spark & DBX (#6725) - due to [`b59b3bf`](https://github.com/tobymao/sqlglot/commit/b59b3bfd06c18120db2938748c561495dd885ab4) - Annotate TANH for Spark & DBX *(PR [#6726](https://github.com/tobymao/sqlglot/pull/6726) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: Annotate TANH for Spark & DBX (#6726) - due to [`917071b`](https://github.com/tobymao/sqlglot/commit/917071b42b91f5111d88d599a1caed83e6d7661c) - Support transpilation of TO_TIME and TRY_TO_TIME from snowflake to duckdb *(PR [#6690](https://github.com/tobymao/sqlglot/pull/6690) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Support transpilation of TO_TIME and TRY_TO_TIME from snowflake to duckdb (#6690) - due to [`e50a97e`](https://github.com/tobymao/sqlglot/commit/e50a97ed05d50be9fbea7720511a760c11f4a86e) - Type annotate for Snowflake Kurtosis *(PR [#6720](https://github.com/tobymao/sqlglot/pull/6720) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Type annotate for Snowflake Kurtosis (#6720) - due to [`33b8a5d`](https://github.com/tobymao/sqlglot/commit/33b8a5d25bf49791fb95ec20132ab6ff0bb885e0) - bump sqlglotrs to 0.11.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.11.0 ### :sparkles: New Features - [`06ce65a`](https://github.com/tobymao/sqlglot/commit/06ce65ab4235d180d30c59a74756bdd8026fddc7) - **snowflake**: support transpilation of bitwise agg functions *(PR [#6580](https://github.com/tobymao/sqlglot/pull/6580) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`82abf40`](https://github.com/tobymao/sqlglot/commit/82abf4085dd93d23b1612304704fcb89b2cb09e2) - **duckdb**: Add tranwspilation support for CEIL function *(PR [#6589](https://github.com/tobymao/sqlglot/pull/6589) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`9f9994d`](https://github.com/tobymao/sqlglot/commit/9f9994df1582a1b3e16dcf27c618dadfdffd8745) - **duckdb**: Add transpilation support for float/decimal numbers and preserve end-of-month logic & type *(PR [#6576](https://github.com/tobymao/sqlglot/pull/6576) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8ef9d7b`](https://github.com/tobymao/sqlglot/commit/8ef9d7b2183589217df10004798fd751d4d618d0) - **snowflake**: support transpilation of GREATEST, GREATEST_IGNORE_NULLS, LEAST, LEAST_IGNORE_NULLS from snowflake to duckdb *(PR [#6579](https://github.com/tobymao/sqlglot/pull/6579) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`b93291b`](https://github.com/tobymao/sqlglot/commit/b93291bee7ada32b4d686db919d8d3d683d95425) - **snowflake**: support transpilation of TRY_TO_BOOLEAN from Snowflake to DuckDB *(PR [#6594](https://github.com/tobymao/sqlglot/pull/6594) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`00e596d`](https://github.com/tobymao/sqlglot/commit/00e596d1a7a6b806cd1c632afd6b09f4f5270086) - **snowflake**: support transpilation of BOOLXOR_AGG, BOOLAND_AGG, and BOOLOR_AGG from Snowflake to DuckDB *(PR [#6592](https://github.com/tobymao/sqlglot/pull/6592) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`0ba33ad`](https://github.com/tobymao/sqlglot/commit/0ba33ad69abcb718f761090cdd867e45d9481b80) - **duckdb**: Add transpilation support for DAYNAME and MONTHNAME functions. *(PR [#6603](https://github.com/tobymao/sqlglot/pull/6603) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`77ff9d0`](https://github.com/tobymao/sqlglot/commit/77ff9d0c5cec8fdba7e64fe02c0db0a63a64f1e0) - **snowflake**: annotate types for MAP_* functions *(PR [#6605](https://github.com/tobymao/sqlglot/pull/6605) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`e7c1574`](https://github.com/tobymao/sqlglot/commit/e7c1574f89314a304933a2b3b391528d530ea596) - **duckdb**: Add transpilation support for DATE_DIFF function *(PR [#6609](https://github.com/tobymao/sqlglot/pull/6609) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`d26ff44`](https://github.com/tobymao/sqlglot/commit/d26ff4444fba6697f25efa3dea5ab751fe560f6b) - **postgres**: support adjacent ranges operator *(PR [#6611](https://github.com/tobymao/sqlglot/pull/6611) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`8f38887`](https://github.com/tobymao/sqlglot/commit/8f3888746a1f36446544483e920e1287ebf76b4f) - **optimizer**: annotate snowflake array construct *(commit by [@georgesittas](https://github.com/georgesittas))* - [`870dba4`](https://github.com/tobymao/sqlglot/commit/870dba41cc88dc9e4802f3695f3f715e0c35e0ed) - **snowflake**: support transpilation of LAST_DAY from Snowflake to Duckdb *(PR [#6614](https://github.com/tobymao/sqlglot/pull/6614) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`fc5800d`](https://github.com/tobymao/sqlglot/commit/fc5800d1359f9b0933622cb355fdb7a34f2f486a) - **snowflake**: support Snowflake to DuckDB transpilation of ZIPF *(PR [#6618](https://github.com/tobymao/sqlglot/pull/6618) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`4aea018`](https://github.com/tobymao/sqlglot/commit/4aea018c2d4b701ac1b895445ef390307666693f) - **duckdb**: Add transpilation support for nanoseconds used in date/time functions. *(PR [#6617](https://github.com/tobymao/sqlglot/pull/6617) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`dea22ca`](https://github.com/tobymao/sqlglot/commit/dea22ca07076c1ef6a08f06f9fdc6070ca0fecb8) - **singlestore**: support `RECORD` type *(PR [#6635](https://github.com/tobymao/sqlglot/pull/6635) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`77783da`](https://github.com/tobymao/sqlglot/commit/77783da1329ad5d681a7d37928e4dbedd68ab365) - **clickhouse**: support: var-args in `xor` *(PR [#6634](https://github.com/tobymao/sqlglot/pull/6634) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b75a3e3`](https://github.com/tobymao/sqlglot/commit/b75a3e3b5d948f55df5b0096d116edf3b898e5e1) - **mysql**: handle BINARY keyword *(PR [#6636](https://github.com/tobymao/sqlglot/pull/6636) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`c17878a`](https://github.com/tobymao/sqlglot/commit/c17878a35c761824b58b0e0743585e3a5717d1b4) - add ability to create dialect plugins *(PR [#6627](https://github.com/tobymao/sqlglot/pull/6627) by [@vgvoleg](https://github.com/vgvoleg))* - :arrow_lower_right: *addresses issue [#6626](https://github.com/tobymao/sqlglot/issues/6626) opened by [@vgvoleg](https://github.com/vgvoleg)* - [`e7b5d6f`](https://github.com/tobymao/sqlglot/commit/e7b5d6f1b8031fbf25e1fd7c3b42309c4aae8810) - **snowflake**: support transpilation of TRY_TO_BINARY from Snowflake to DuckDB *(PR [#6629](https://github.com/tobymao/sqlglot/pull/6629) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`9c54329`](https://github.com/tobymao/sqlglot/commit/9c543291a0e28ad044523747d19262243fed1f5d) - **snowflake**: Type annotation for Snowflake ENCRYPT and DECRYPT functions *(PR [#6643](https://github.com/tobymao/sqlglot/pull/6643) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`7870bd0`](https://github.com/tobymao/sqlglot/commit/7870bd0dc7503d0a1863d7623be7fc12bb9412e4) - **snowflake**: type annotation for COVAR_POP and COVAR_SAMP *(PR [#6656](https://github.com/tobymao/sqlglot/pull/6656) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`2fa29b1`](https://github.com/tobymao/sqlglot/commit/2fa29b1e0feb6d8f9290c105e5fa0f349a011e22) - **snowflake**: Type annotation for ARRAY_REMOVE function *(PR [#6653](https://github.com/tobymao/sqlglot/pull/6653) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`3d59af5`](https://github.com/tobymao/sqlglot/commit/3d59af557caf5c0fd109ae687b8408264543f9ea) - **snowflake**: Added UNIFORM transpilation for Snowflake to DuckDB *(PR [#6640](https://github.com/tobymao/sqlglot/pull/6640) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`5552f12`](https://github.com/tobymao/sqlglot/commit/5552f121f9c9a8103fc3ccf58fd5eed076fe0575) - annotation support for LOCALTIME function *(PR [#6651](https://github.com/tobymao/sqlglot/pull/6651) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a669651`](https://github.com/tobymao/sqlglot/commit/a6696518efefabaf815d7c61f1ae923c39ce6107) - **snowflake**: Type annotation for Snowflake LOCALTIMESTAMP *(PR [#6652](https://github.com/tobymao/sqlglot/pull/6652) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`40ccce4`](https://github.com/tobymao/sqlglot/commit/40ccce492746cc969467c234b9c7d345454da683) - **duckdb**: Transpile Snowflake NORMAL to DuckDB using Box-Muller transform *(PR [#6654](https://github.com/tobymao/sqlglot/pull/6654) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`d6bf569`](https://github.com/tobymao/sqlglot/commit/d6bf569ef2442d5055ab788a2078adf485cc1120) - **snowflake**: updates for Snowflake to DuckDB transpilation of TO_TIMESTAMP functions *(PR [#6622](https://github.com/tobymao/sqlglot/pull/6622) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`51af50e`](https://github.com/tobymao/sqlglot/commit/51af50e937dafb80d42cd639b7fb8e139d811ea0) - **snowflake**: support out of range values for DATE_FROM_PARTS when transpiling to DuckDB *(PR [#6671](https://github.com/tobymao/sqlglot/pull/6671) by [@toriwei](https://github.com/toriwei))* - [`13251fd`](https://github.com/tobymao/sqlglot/commit/13251fd7f03d31af69b86a4ef5bb6e940cc9318b) - **mysql**: annotation support for ELT function *(PR [#6659](https://github.com/tobymao/sqlglot/pull/6659) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b287e4e`](https://github.com/tobymao/sqlglot/commit/b287e4ef9e05c0ba1fadc3638977e994eb911834) - **snowflake**: Support transpilation for BITMAP_BUCKET_NUMBER *(PR [#6668](https://github.com/tobymao/sqlglot/pull/6668) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d5b84cb`](https://github.com/tobymao/sqlglot/commit/d5b84cbbd1406ebd9f5e2373e12b0d15dcb85d7f) - **diff**: ignore comments in the semantic differ *(PR [#6675](https://github.com/tobymao/sqlglot/pull/6675) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6673](https://github.com/tobymao/sqlglot/issues/6673) opened by [@GaryLiuGTA](https://github.com/GaryLiuGTA)* - [`26c16f2`](https://github.com/tobymao/sqlglot/commit/26c16f2f4bb9aaa91253a06c337b8320dc4609c9) - **snowflake**: transpile CORR with NaN-->NULL *(PR [#6619](https://github.com/tobymao/sqlglot/pull/6619) by [@treysp](https://github.com/treysp))* - [`c9a70c0`](https://github.com/tobymao/sqlglot/commit/c9a70c004e0ec563e8a712668a30da9856cb0516) - **snowflake**: update transpilation of DATE_TRUNC to duckdb *(PR [#6644](https://github.com/tobymao/sqlglot/pull/6644) by [@toriwei](https://github.com/toriwei))* - [`a4be3fa`](https://github.com/tobymao/sqlglot/commit/a4be3faf63c981a2b10b1f8c709581acf59277ce) - **oracle,exasol**: support SYSTIMESTAMP as NO_PAREN_FUNCTION *(PR [#6677](https://github.com/tobymao/sqlglot/pull/6677) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - :arrow_lower_right: *addresses issue [#6686](https://github.com/tobymao/sqlglot/issues/6686) opened by [@Hfuegl](https://github.com/Hfuegl)* - [`243448c`](https://github.com/tobymao/sqlglot/commit/243448ca5ccc6a26d680661dcb7d921a055dbfa9) - **duckdb**: Transpile date extraction from Snowflake to DuckDB (YEAR*, WEEK*, DAY*, etc) *(PR [#6666](https://github.com/tobymao/sqlglot/pull/6666) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`efa77df`](https://github.com/tobymao/sqlglot/commit/efa77df3734f2e8534ca911d39c9a5c6103a82fa) - **mysql**: translate UPDATE … FROM … syntax to UPDATE … JOIN … when generating MySQL *(PR [#6655](https://github.com/tobymao/sqlglot/pull/6655) by [@brdbry](https://github.com/brdbry))* - [`65acd6c`](https://github.com/tobymao/sqlglot/commit/65acd6c00f023c3279947b23e04139bc9554db85) - **duckdb**: transpile snowflake SYSDATE *(PR [#6693](https://github.com/tobymao/sqlglot/pull/6693) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`36ff96c`](https://github.com/tobymao/sqlglot/commit/36ff96c727e40dbe467d6338128f87a74b99a980) - **snowflake**: support transpilation of BITSHIFTLEFT and BITSHIFTRIGHT *(PR [#6586](https://github.com/tobymao/sqlglot/pull/6586) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`142acd2`](https://github.com/tobymao/sqlglot/commit/142acd21ae9e1b3b1495707b2b6811db830fa61f) - pretty formatting for nested data types *(PR [#6707](https://github.com/tobymao/sqlglot/pull/6707) by [@treysp](https://github.com/treysp))* - [`a503a7a`](https://github.com/tobymao/sqlglot/commit/a503a7adbfe3352924e94a21303a2ac23903833c) - **optimizer**: annotate Encode for hive dialect hierarchy *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c447df7`](https://github.com/tobymao/sqlglot/commit/c447df71a645f55e4798887bde4d42285f5dea4a) - **spark,databricks**: annotate the LOCALTIMESTAMP *(PR [#6709](https://github.com/tobymao/sqlglot/pull/6709) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`e91a14b`](https://github.com/tobymao/sqlglot/commit/e91a14b5b325d3d71fbdbe701e531588723bcd2e) - **spark,databricks**: annotate the CURRENT_TIMEZONE *(PR [#6708](https://github.com/tobymao/sqlglot/pull/6708) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b53c8d4`](https://github.com/tobymao/sqlglot/commit/b53c8d47481d735c74ae8704a90594f33263eee8) - **hive,spark,databricks**: annotate the UNIX_TIMESTAMP *(PR [#6717](https://github.com/tobymao/sqlglot/pull/6717) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`cfb06ab`](https://github.com/tobymao/sqlglot/commit/cfb06ab58c606715e688d39e704b1587fcd256e8) - **duckdb**: add support for transpiling ARRAY_AGG with ORDER BY *(PR [#6691](https://github.com/tobymao/sqlglot/pull/6691) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`2965077`](https://github.com/tobymao/sqlglot/commit/296507773fc4476862cc1ee374f88c7d73cb147f) - **parser**: add support for JSON_KEYS func *(PR [#6718](https://github.com/tobymao/sqlglot/pull/6718) by [@geooo109](https://github.com/geooo109))* - [`52de0a5`](https://github.com/tobymao/sqlglot/commit/52de0a5c9ebc14c738041e47a385981f072291a9) - **duckdb**: Add transpilation support for float/decimal numbers for TIME functions *(PR [#6719](https://github.com/tobymao/sqlglot/pull/6719) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`bbeb881`](https://github.com/tobymao/sqlglot/commit/bbeb881016e4b84dc5cb6a29846224784591abba) - **oracle**: Added support for IN/OUT keywords in stored procedure parameters *(PR [#6710](https://github.com/tobymao/sqlglot/pull/6710) by [@rsanchez-xtillion](https://github.com/rsanchez-xtillion))* - [`0a1c7ab`](https://github.com/tobymao/sqlglot/commit/0a1c7abddc0cbd2f853a431848c1ae45e6876ba2) - **snowflake**: support transpilation of GETBIT from snowflake to duckdb *(PR [#6692](https://github.com/tobymao/sqlglot/pull/6692) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`cb320c4`](https://github.com/tobymao/sqlglot/commit/cb320c41361f0bb7ad71522366ff4cb8691607bf) - **optimizer**: bq annotate type for raw strings *(PR [#6723](https://github.com/tobymao/sqlglot/pull/6723) by [@geooo109](https://github.com/geooo109))* - [`09fa467`](https://github.com/tobymao/sqlglot/commit/09fa467461656d5c4d4e57c7044c46ff4fcf3f7f) - **optimizer**: Annotate ATAN2 for Spark & DBX *(PR [#6725](https://github.com/tobymao/sqlglot/pull/6725) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b59b3bf`](https://github.com/tobymao/sqlglot/commit/b59b3bfd06c18120db2938748c561495dd885ab4) - **optimizer**: Annotate TANH for Spark & DBX *(PR [#6726](https://github.com/tobymao/sqlglot/pull/6726) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`917071b`](https://github.com/tobymao/sqlglot/commit/917071b42b91f5111d88d599a1caed83e6d7661c) - **snowflake**: Support transpilation of TO_TIME and TRY_TO_TIME from snowflake to duckdb *(PR [#6690](https://github.com/tobymao/sqlglot/pull/6690) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`e50a97e`](https://github.com/tobymao/sqlglot/commit/e50a97ed05d50be9fbea7720511a760c11f4a86e) - **snowflake**: Type annotate for Snowflake Kurtosis *(PR [#6720](https://github.com/tobymao/sqlglot/pull/6720) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`197943f`](https://github.com/tobymao/sqlglot/commit/197943fb56e997cbb7e77a8b2d9f0c2453d052c7) - **postgres**: support index predicate in conflict `INSERT` clause closes [#6727](https://github.com/tobymao/sqlglot/pull/6727) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`8f16463`](https://github.com/tobymao/sqlglot/commit/8f16463ebdbb7f51ab778cf34ab89709e018691f) - **presto**: Add support for WEEK function *(PR [#6593](https://github.com/tobymao/sqlglot/pull/6593) by [@chrisqu777](https://github.com/chrisqu777))* - [`1497ee6`](https://github.com/tobymao/sqlglot/commit/1497ee6197dd1bd15926ab40bb09f03f72a4da34) - **postgres**: corrected handling of ToChar for Postgres *(commit by [@dhawkins1234](https://github.com/dhawkins1234))* - [`c52705d`](https://github.com/tobymao/sqlglot/commit/c52705dec16390e0651d8a68c3500d8fa11dff12) - **optimizer**: annotate EXISTS, ALL, ANY as BOOLEAN *(PR [#6590](https://github.com/tobymao/sqlglot/pull/6590) by [@doripo](https://github.com/doripo))* - [`eab6f72`](https://github.com/tobymao/sqlglot/commit/eab6f72a2829def3fd6958f47353ba60ed0e0334) - **parser**: only parse CREATE TABLE pk key ordering in tsql *(PR [#6604](https://github.com/tobymao/sqlglot/pull/6604) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6596](https://github.com/tobymao/sqlglot/issues/6596) opened by [@osmith42](https://github.com/osmith42)* - [`62c0ef0`](https://github.com/tobymao/sqlglot/commit/62c0ef0ad4579a55d65498eddd4e649ae464b559) - **duckdb**: Fix BQ's exp.Date transpilation *(PR [#6595](https://github.com/tobymao/sqlglot/pull/6595) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6581](https://github.com/tobymao/sqlglot/issues/6581) opened by [@nikchha](https://github.com/nikchha)* - [`d8f0bbd`](https://github.com/tobymao/sqlglot/commit/d8f0bbdbd208acc0eccb7e9be331d2661169ad97) - **tokenizer**: conditionally consume +/- in scientific literal notation *(PR [#6610](https://github.com/tobymao/sqlglot/pull/6610) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6608](https://github.com/tobymao/sqlglot/issues/6608) opened by [@pjpjean](https://github.com/pjpjean)* - [`c0b3e5c`](https://github.com/tobymao/sqlglot/commit/c0b3e5c06c7d0ec3335146eab1a86f202298c94a) - **spark**: make_interval week *(PR [#6612](https://github.com/tobymao/sqlglot/pull/6612) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`13014e0`](https://github.com/tobymao/sqlglot/commit/13014e0f01d10f0a078ef8aed4569d3aa8bd741b) - **postgres**: move postgres range parsers to global level *(PR [#6591](https://github.com/tobymao/sqlglot/pull/6591) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`1f3436b`](https://github.com/tobymao/sqlglot/commit/1f3436bfe7ddbbf212d61616153f150d32824450) - **optimizer**: bq robust literal/non-literal type annotation *(PR [#6600](https://github.com/tobymao/sqlglot/pull/6600) by [@geooo109](https://github.com/geooo109))* - [`870d600`](https://github.com/tobymao/sqlglot/commit/870d600a93108b1a1d68936244564548dec5f683) - **postgres**: support: postgres point *(PR [#6615](https://github.com/tobymao/sqlglot/pull/6615) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b8b22fa`](https://github.com/tobymao/sqlglot/commit/b8b22fae5cf17ebfe119f815c4cc0ef2dc8132bc) - **trino**: mark as supporting `...EXCEPT ALL` *(PR [#6616](https://github.com/tobymao/sqlglot/pull/6616) by [@NickCrews](https://github.com/NickCrews))* - [`9382ebd`](https://github.com/tobymao/sqlglot/commit/9382ebdd79c89e9c92ae98a29147c1523cec415f) - **postgres**: Fix exp.WidthBucket required args *(PR [#6621](https://github.com/tobymao/sqlglot/pull/6621) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6620](https://github.com/tobymao/sqlglot/issues/6620) opened by [@codetalker-ai](https://github.com/codetalker-ai)* - [`302fda0`](https://github.com/tobymao/sqlglot/commit/302fda0151094bc074b98847390cd554414258bb) - **clickhouse**: support and, or *(PR [#6625](https://github.com/tobymao/sqlglot/pull/6625) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`00c80b1`](https://github.com/tobymao/sqlglot/commit/00c80b12936811dde7e661720c1bd17c6ba9271a) - Parse joins with derived tables in UPDATE *(PR [#6632](https://github.com/tobymao/sqlglot/pull/6632) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6628](https://github.com/tobymao/sqlglot/issues/6628) opened by [@marktdodds](https://github.com/marktdodds)* - [`2bf9405`](https://github.com/tobymao/sqlglot/commit/2bf9405adf9c9c72c77f7aa8ab792779a3b9c5f3) - **oracle**: USING keyword in chr *(PR [#6637](https://github.com/tobymao/sqlglot/pull/6637) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`235fc14`](https://github.com/tobymao/sqlglot/commit/235fc14f40c422e6339da0a6b17252ab9eb18ec2) - **mysql,singlestore**: support charset *(PR [#6633](https://github.com/tobymao/sqlglot/pull/6633) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`b0afbac`](https://github.com/tobymao/sqlglot/commit/b0afbaca100e73d094f247371fff13b5a4b5290a) - **exasol**: fix TO_CHAR parsing leaking canonical datetime format tokens *(PR [#6650](https://github.com/tobymao/sqlglot/pull/6650) by [@nnamdi16](https://github.com/nnamdi16))* - [`6ecbb01`](https://github.com/tobymao/sqlglot/commit/6ecbb01a37708347cc6595ed8f17eb1a623d37eb) - **druid**: array expression should use square brackets *(PR [#6664](https://github.com/tobymao/sqlglot/pull/6664) by [@its-felix](https://github.com/its-felix))* - [`af50c1c`](https://github.com/tobymao/sqlglot/commit/af50c1ce464d42d4df1fac8876ae25aea556912a) - **duckdb**: Fix NOT precedence for JSON extractions *(PR [#6670](https://github.com/tobymao/sqlglot/pull/6670) by [@kyle-cheung](https://github.com/kyle-cheung))* - [`460b3a2`](https://github.com/tobymao/sqlglot/commit/460b3a2ae62d8294c57068453b5a11e4a7e12a91) - **exasol**: Allow varlen args in exp.MD5Digest *(PR [#6685](https://github.com/tobymao/sqlglot/pull/6685) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6683](https://github.com/tobymao/sqlglot/issues/6683) opened by [@Hfuegl](https://github.com/Hfuegl)* - [`dcdee68`](https://github.com/tobymao/sqlglot/commit/dcdee68cb1a77286232865de9df8d8a01898fcc7) - **spark**: Allow non aggregation functions in PIVOT *(PR [#6687](https://github.com/tobymao/sqlglot/pull/6687) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6684](https://github.com/tobymao/sqlglot/issues/6684) opened by [@gauravdawar-e6](https://github.com/gauravdawar-e6)* - [`29cff1f`](https://github.com/tobymao/sqlglot/commit/29cff1fbe0011476b22e1b1442af857499f871a6) - **optimizer**: bq robust literal/non-literal binary annotation *(PR [#6688](https://github.com/tobymao/sqlglot/pull/6688) by [@geooo109](https://github.com/geooo109))* - [`f866c83`](https://github.com/tobymao/sqlglot/commit/f866c835b84e9503b2f335a0da4b89e3426aa9e7) - **oracle**: properly parse xmlelement *(commit by [@georgesittas](https://github.com/georgesittas))* - [`91b3678`](https://github.com/tobymao/sqlglot/commit/91b3678c4e2380b3b344cc37643d12528c3e142b) - **resolver**: correctly resolve unnest alias shadowing for BigQuery *(PR [#6665](https://github.com/tobymao/sqlglot/pull/6665) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`2a3f1fe`](https://github.com/tobymao/sqlglot/commit/2a3f1fe854746141299303fdae36ce33688722e1) - **spark, databricks**: parse LTRIM/RTRIM *(PR [#6699](https://github.com/tobymao/sqlglot/pull/6699) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6696](https://github.com/tobymao/sqlglot/issues/6696) opened by [@avinmathew](https://github.com/avinmathew)* - [`fb8f57a`](https://github.com/tobymao/sqlglot/commit/fb8f57ac54a4e0213941f3bd057ccb03c4e125b7) - **doris, starrocks**: Fix UPDATE statement for multi tables *(PR [#6700](https://github.com/tobymao/sqlglot/pull/6700) by [@VaggelisD](https://github.com/VaggelisD))* - [`21f9d3c`](https://github.com/tobymao/sqlglot/commit/21f9d3ca7c6fa0d38f0f63000f904589b91c7d0a) - **optimizer**: Normalize struct field names when annotating types *(PR [#6674](https://github.com/tobymao/sqlglot/pull/6674) by [@vchan](https://github.com/vchan))* - [`7362c23`](https://github.com/tobymao/sqlglot/commit/7362c2357eba540a12eea079e9f4212f3545c8d1) - **oracle**: interval expressions *(PR [#6648](https://github.com/tobymao/sqlglot/pull/6648) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a84dbc2`](https://github.com/tobymao/sqlglot/commit/a84dbc2de051539ac4dfe10e0fa991c988840874) - **parser**: prevent INTERVAL from consuming GENERATED as interval unit *(PR [#6714](https://github.com/tobymao/sqlglot/pull/6714) by [@harshsinh](https://github.com/harshsinh))* - :arrow_lower_right: *fixes issue [#6713](https://github.com/tobymao/sqlglot/issues/6713) opened by [@harshsinh](https://github.com/harshsinh)* ### :recycle: Refactors - [`399c80f`](https://github.com/tobymao/sqlglot/commit/399c80f2e66a1acdcd7d567e44c297db7071dc8f) - rename args for CovarSamp/Pop, stop inheriting from Binary *(commit by [@georgesittas](https://github.com/georgesittas))* - [`14b03ae`](https://github.com/tobymao/sqlglot/commit/14b03ae492c9cac67ae1e78b67a83427e4fe6681) - flip Getbit lsb flag to msb since more dialects match lsb *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`e0193ea`](https://github.com/tobymao/sqlglot/commit/e0193eaeea0036bbe835c49a64a4120365525b89) - Fix make style of 6593 *(PR [#6597](https://github.com/tobymao/sqlglot/pull/6597) by [@VaggelisD](https://github.com/VaggelisD))* - [`0534db1`](https://github.com/tobymao/sqlglot/commit/0534db11022c9cbaf91015b38039da48945aac2a) - Fix integration test failing with empty PR description *(PR [#6598](https://github.com/tobymao/sqlglot/pull/6598) by [@VaggelisD](https://github.com/VaggelisD))* - [`c523e2a`](https://github.com/tobymao/sqlglot/commit/c523e2a4ee3b347eacf6f4f9a3629b649e902f47) - Follow up of 6551 *(PR [#6599](https://github.com/tobymao/sqlglot/pull/6599) by [@VaggelisD](https://github.com/VaggelisD))* - [`26c3c1f`](https://github.com/tobymao/sqlglot/commit/26c3c1fcea5f105432430de3d1e2b00627191706) - Fix integration test condition for skipping fork PRs *(PR [#6606](https://github.com/tobymao/sqlglot/pull/6606) by [@VaggelisD](https://github.com/VaggelisD))* - [`d0965ba`](https://github.com/tobymao/sqlglot/commit/d0965baa8224f72f01a04f2d9b72399f04b6103e) - Add test for PR 6616 *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`020fb05`](https://github.com/tobymao/sqlglot/commit/020fb05971f95ba3962d051c1ef795147ee9e19d) - refactor duckdb `ZIPF` transpilation logic *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a4e1dec`](https://github.com/tobymao/sqlglot/commit/a4e1dec87fc854ee15789672189753cf2e4450e1) - Refactor PR 6617 *(PR [#6630](https://github.com/tobymao/sqlglot/pull/6630) by [@VaggelisD](https://github.com/VaggelisD))* - [`7f2a74c`](https://github.com/tobymao/sqlglot/commit/7f2a74c1025ab2c9627889db174a7f6404c2d585) - refactor `RANDSTR` duckdb transpilation logic *(PR [#6631](https://github.com/tobymao/sqlglot/pull/6631) by [@georgesittas](https://github.com/georgesittas))* - [`9a41cfc`](https://github.com/tobymao/sqlglot/commit/9a41cfcc1b57488373d7a63d17b6a0d91f90c9e8) - Refactor PR 6637 *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`2f8ffcf`](https://github.com/tobymao/sqlglot/commit/2f8ffcfac03a09460a4603d66ee7b13affbc5ebf) - **snowflake**: Adding type annotation tests for Snowflake's STDDEV / STDDEV_SAMP, STDDEV_POP *(PR [#6641](https://github.com/tobymao/sqlglot/pull/6641) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`d46969d`](https://github.com/tobymao/sqlglot/commit/d46969db50ff52b8657d5b33f0a106b69dbd1e2a) - **optimizer**: annotate snowflake ARRAY_APPEND and ARRAY_PREPEND *(PR [#6645](https://github.com/tobymao/sqlglot/pull/6645) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`4deaabc`](https://github.com/tobymao/sqlglot/commit/4deaabcedb823ff29174674fa3d398cb7966e219) - array_append/array_prepend tests clean up *(commit by [@geooo109](https://github.com/geooo109))* - [`b3ea996`](https://github.com/tobymao/sqlglot/commit/b3ea99607b4947905bfb55994309c0ee3e646c62) - array_append/array_prepend databricks tests *(commit by [@geooo109](https://github.com/geooo109))* - [`7f6b9f2`](https://github.com/tobymao/sqlglot/commit/7f6b9f2eda20509b442d28f7fc470e7a0aee63b7) - snowflake remove aead arg gen for DECRYPT *(commit by [@geooo109](https://github.com/geooo109))* - [`1fdcd63`](https://github.com/tobymao/sqlglot/commit/1fdcd6389587da95dbda34227ec688181055dcf9) - Fix onboarding md paragraph *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`5d76a45`](https://github.com/tobymao/sqlglot/commit/5d76a45ff71feb19eee4b294f2c128e30976cb5b) - Follow up 6677 *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`8d49255`](https://github.com/tobymao/sqlglot/commit/8d492558ff18b6af53a1aaea47cc0a863fbfe482) - Remove incorrect test of PR6655 *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`94e6fe0`](https://github.com/tobymao/sqlglot/commit/94e6fe03752500ddeace317081dc758d36c78c1f) - Follow up of 6693 *(PR [#6698](https://github.com/tobymao/sqlglot/pull/6698) by [@VaggelisD](https://github.com/VaggelisD))* - [`dd4d55a`](https://github.com/tobymao/sqlglot/commit/dd4d55af58a7602a1924f6b1f9a93f58636f172c) - _annotate_by_args UNKNOWN *(PR [#6703](https://github.com/tobymao/sqlglot/pull/6703) by [@geooo109](https://github.com/geooo109))* - [`2ecfc27`](https://github.com/tobymao/sqlglot/commit/2ecfc27daf3570bca442797e0b4f4dc4e5363dd6) - Follow up 6648 *(PR [#6716](https://github.com/tobymao/sqlglot/pull/6716) by [@VaggelisD](https://github.com/VaggelisD))* - [`bbaba5f`](https://github.com/tobymao/sqlglot/commit/bbaba5fd7d0111947035534168593a5a3ee6839a) - **optimizer**: annotate type for snowflake ARRAY_CAT *(PR [#6721](https://github.com/tobymao/sqlglot/pull/6721) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`4968ccd`](https://github.com/tobymao/sqlglot/commit/4968ccd1d74d58dace3793d4dbf7a0bff22d3300) - add deployment instructions in README *(PR [#6722](https://github.com/tobymao/sqlglot/pull/6722) by [@georgesittas](https://github.com/georgesittas))* - [`2893ac3`](https://github.com/tobymao/sqlglot/commit/2893ac399d24846482b45fd79e99b90f5ef2cca6) - **optimizer**: Remove duplicate INITCAP annotation from Snowflake *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`33b8a5d`](https://github.com/tobymao/sqlglot/commit/33b8a5d25bf49791fb95ec20132ab6ff0bb885e0) - bump sqlglotrs to 0.11.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v28.5.0] - 2025-12-17 ### :boom: BREAKING CHANGES - due to [`4dfc810`](https://github.com/tobymao/sqlglot/commit/4dfc810f45d5a617ada2ba4ed57002549c8d1853) - support transpilation of BOOLNOT from snowflake to duckdb *(PR [#6577](https://github.com/tobymao/sqlglot/pull/6577) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of BOOLNOT from snowflake to duckdb (#6577) - due to [`b857185`](https://github.com/tobymao/sqlglot/commit/b8571850ca55802671484d118560a7b90e893c39) - remove Sysdate in favor of CurrentTimestamp with sysdate arg *(PR [#6584](https://github.com/tobymao/sqlglot/pull/6584) by [@georgesittas](https://github.com/georgesittas))*: remove Sysdate in favor of CurrentTimestamp with sysdate arg (#6584) - due to [`bf217d6`](https://github.com/tobymao/sqlglot/commit/bf217d69f92efcbce5b69d637976e915ca63998d) - make `JSONArrayAgg` an `AggFunc` *(PR [#6585](https://github.com/tobymao/sqlglot/pull/6585) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: make `JSONArrayAgg` an `AggFunc` (#6585) - due to [`604efe5`](https://github.com/tobymao/sqlglot/commit/604efe5cf5812d0b1dd9d625ed278907d0d7fb8f) - Type annotation fixes for TO_TIMESTAMP* *(PR [#6557](https://github.com/tobymao/sqlglot/pull/6557) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Type annotation fixes for TO_TIMESTAMP* (#6557) ### :sparkles: New Features - [`4dfc810`](https://github.com/tobymao/sqlglot/commit/4dfc810f45d5a617ada2ba4ed57002549c8d1853) - **snowflake**: support transpilation of BOOLNOT from snowflake to duckdb *(PR [#6577](https://github.com/tobymao/sqlglot/pull/6577) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7077981`](https://github.com/tobymao/sqlglot/commit/707798166c1b45e633bd0e8d02d1c0146598b03a) - **snowflake**: Transpilation of Snowflake MONTHS_BETWEEN to DuckDB *(PR [#6561](https://github.com/tobymao/sqlglot/pull/6561) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`604efe5`](https://github.com/tobymao/sqlglot/commit/604efe5cf5812d0b1dd9d625ed278907d0d7fb8f) - **snowflake**: Type annotation fixes for TO_TIMESTAMP* *(PR [#6557](https://github.com/tobymao/sqlglot/pull/6557) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`3567880`](https://github.com/tobymao/sqlglot/commit/35678808dafb37c5d37c806682e6af9b6351bced) - add tokens to functions *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`b857185`](https://github.com/tobymao/sqlglot/commit/b8571850ca55802671484d118560a7b90e893c39) - **snowflake**: remove Sysdate in favor of CurrentTimestamp with sysdate arg *(PR [#6584](https://github.com/tobymao/sqlglot/pull/6584) by [@georgesittas](https://github.com/georgesittas))* - [`bf217d6`](https://github.com/tobymao/sqlglot/commit/bf217d69f92efcbce5b69d637976e915ca63998d) - make `JSONArrayAgg` an `AggFunc` *(PR [#6585](https://github.com/tobymao/sqlglot/pull/6585) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`48f5e99`](https://github.com/tobymao/sqlglot/commit/48f5e999d3d3f6ad51c30e7a33a3a574d0e50d2b) - **duckdb**: preserve l/r-trim syntax *(PR [#6588](https://github.com/tobymao/sqlglot/pull/6588) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6587](https://github.com/tobymao/sqlglot/issues/6587) opened by [@baruchoxman](https://github.com/baruchoxman)* ### :wrench: Chores - [`ea0263a`](https://github.com/tobymao/sqlglot/commit/ea0263aa555591b03b06a4b6dee093fe42b545f9) - Skip integration tests GA for external contributors & fix `git diff` *(PR [#6582](https://github.com/tobymao/sqlglot/pull/6582) by [@VaggelisD](https://github.com/VaggelisD))* ## [v28.4.1] - 2025-12-16 ### :boom: BREAKING CHANGES - due to [`cfc9346`](https://github.com/tobymao/sqlglot/commit/cfc9346ba0477523d3de8f923d83fd09814b22ac) - bump sqlglotrs to 0.10.0 *(commit by [@tobymao](https://github.com/tobymao))*: bump sqlglotrs to 0.10.0 ### :wrench: Chores - [`cfc9346`](https://github.com/tobymao/sqlglot/commit/cfc9346ba0477523d3de8f923d83fd09814b22ac) - bump sqlglotrs to 0.10.0 *(commit by [@tobymao](https://github.com/tobymao))* ## [v28.4.0] - 2025-12-16 ### :boom: BREAKING CHANGES - due to [`938f4b6`](https://github.com/tobymao/sqlglot/commit/938f4b6ebc1c0d26bd3c1400883978c79a435189) - annotate type for LAST_DAY *(PR [#5528](https://github.com/tobymao/sqlglot/pull/5528) by [@geooo109](https://github.com/geooo109))*: annotate type for LAST_DAY (#5528) - due to [`7d12dac`](https://github.com/tobymao/sqlglot/commit/7d12dac613ba5119334408f2c52cb270067156d9) - annotate type for bigquery GENERATE_TIMESTAMP_ARRAY *(PR [#5529](https://github.com/tobymao/sqlglot/pull/5529) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery GENERATE_TIMESTAMP_ARRAY (#5529) - due to [`d50ebe2`](https://github.com/tobymao/sqlglot/commit/d50ebe286dd8e2836b9eb2a3406f15976db3aa05) - annotate type for bigquery TIME_TRUNC *(PR [#5530](https://github.com/tobymao/sqlglot/pull/5530) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TIME_TRUNC (#5530) - due to [`29748be`](https://github.com/tobymao/sqlglot/commit/29748be7dfc10edc9f29665c98327883dd25c13d) - annotate type for bigquery TIME *(PR [#5531](https://github.com/tobymao/sqlglot/pull/5531) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TIME (#5531) - due to [`7003b3f`](https://github.com/tobymao/sqlglot/commit/7003b3fa39cd455e3643066364696708d1ac4f38) - parse and annotate type for bigquery DATE_FROM_UNIX_DATE *(PR [#5532](https://github.com/tobymao/sqlglot/pull/5532) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery DATE_FROM_UNIX_DATE (#5532) - due to [`a276ca6`](https://github.com/tobymao/sqlglot/commit/a276ca6fd5f9d47fa8c90fcfa19f9864e7a28f8f) - parse and annotate type for bigquery JUSTIFY funcs *(PR [#5534](https://github.com/tobymao/sqlglot/pull/5534) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery JUSTIFY funcs (#5534) - due to [`374178e`](https://github.com/tobymao/sqlglot/commit/374178e22fe8d2d2275b65fe08e27ef66c611220) - parse and annotate type for bigquery UNIX_MICROS and UNIX_MILLIS *(PR [#5535](https://github.com/tobymao/sqlglot/pull/5535) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery UNIX_MICROS and UNIX_MILLIS (#5535) - due to [`1d8d1ab`](https://github.com/tobymao/sqlglot/commit/1d8d1abe459053a135a46525d0a13bb861220927) - annotate type for bigquery DATE_TRUNC *(PR [#5540](https://github.com/tobymao/sqlglot/pull/5540) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery DATE_TRUNC (#5540) - due to [`306ba65`](https://github.com/tobymao/sqlglot/commit/306ba6531839ea2823f5165de7bde01d17560845) - annotate type for bigquery TIMESTAMP_TRUNC *(PR [#5541](https://github.com/tobymao/sqlglot/pull/5541) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TIMESTAMP_TRUNC (#5541) - due to [`d799c5a`](https://github.com/tobymao/sqlglot/commit/d799c5af23010a67c29edb6d45a40fb24903e1a3) - preserve projection names when merging subqueries *(commit by [@snovik75](https://github.com/snovik75))*: preserve projection names when merging subqueries - due to [`8130bd4`](https://github.com/tobymao/sqlglot/commit/8130bd40815803a6781ee8f20fccd30987516192) - WEEKDAY of WEEK as VAR *(PR [#5552](https://github.com/tobymao/sqlglot/pull/5552) by [@geooo109](https://github.com/geooo109))*: WEEKDAY of WEEK as VAR (#5552) - due to [`f3ffe19`](https://github.com/tobymao/sqlglot/commit/f3ffe19ec01533c5f27b9d3a7b6704b83c005118) - annotate type for bigquery format_time *(PR [#5559](https://github.com/tobymao/sqlglot/pull/5559) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery format_time (#5559) - due to [`6872b43`](https://github.com/tobymao/sqlglot/commit/6872b43ba17a39137172fd2fa9f0d059ce595ef9) - use dialect in DataType.build fixes [#5560](https://github.com/tobymao/sqlglot/pull/5560) *(commit by [@georgesittas](https://github.com/georgesittas))*: use dialect in DataType.build fixes #5560 - due to [`3ab3690`](https://github.com/tobymao/sqlglot/commit/3ab369096313b418699b7942b1c513c0c66a5331) - parse and annotate type for bigquery PARSE_DATETIME *(PR [#5558](https://github.com/tobymao/sqlglot/pull/5558) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery PARSE_DATETIME (#5558) - due to [`e5da951`](https://github.com/tobymao/sqlglot/commit/e5da951542eb55691bc43fbbfbec4a30100de038) - parse and annotate type for bigquery PARSE_TIME *(PR [#5561](https://github.com/tobymao/sqlglot/pull/5561) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery PARSE_TIME (#5561) - due to [`798e213`](https://github.com/tobymao/sqlglot/commit/798e213fd10c3b61afbd8cef621546de65fa6f26) - improve transpilability of ANY_VALUE closes [#5563](https://github.com/tobymao/sqlglot/pull/5563) *(commit by [@georgesittas](https://github.com/georgesittas))*: improve transpilability of ANY_VALUE closes #5563 - due to [`8c0cb76`](https://github.com/tobymao/sqlglot/commit/8c0cb764fd825062fb7334032b8eeffbc39627d5) - more robust CREATE SEQUENCE *(PR [#5566](https://github.com/tobymao/sqlglot/pull/5566) by [@geooo109](https://github.com/geooo109))*: more robust CREATE SEQUENCE (#5566) - due to [`c7041c7`](https://github.com/tobymao/sqlglot/commit/c7041c71250b17192c2f25fb8f33407324d332c2) - parse and annotate type for bigquery BYTE_LENGHT *(PR [#5568](https://github.com/tobymao/sqlglot/pull/5568) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery BYTE_LENGHT (#5568) - due to [`a6c61c3`](https://github.com/tobymao/sqlglot/commit/a6c61c34f1e168c97dd5c2b8ec071372ba593992) - parse and annotate type for bigquery CODE_POINTS_TO_STRING *(PR [#5569](https://github.com/tobymao/sqlglot/pull/5569) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery CODE_POINTS_TO_STRING (#5569) - due to [`51e0335`](https://github.com/tobymao/sqlglot/commit/51e0335377fe2bc2e2a94a623475791e9dd19fb9) - parse and annotate type for bigquery REVERSE *(PR [#5571](https://github.com/tobymao/sqlglot/pull/5571) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery REVERSE (#5571) - due to [`2a33339`](https://github.com/tobymao/sqlglot/commit/2a333395cde71936df911488afcff92cae735e11) - annotate type for bigquery REPLACE *(PR [#5572](https://github.com/tobymao/sqlglot/pull/5572) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery REPLACE (#5572) - due to [`1e6f813`](https://github.com/tobymao/sqlglot/commit/1e6f81343de641e588f1a05ce7dc01bed72bd849) - annotate type for bigquery REGEXP_EXTRACT_ALL *(PR [#5573](https://github.com/tobymao/sqlglot/pull/5573) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery REGEXP_EXTRACT_ALL (#5573) - due to [`d0d62ed`](https://github.com/tobymao/sqlglot/commit/d0d62ede6320b3fd0eee04b7073f5708676dc58c) - support `TO_CHAR` with numeric inputs *(PR [#5570](https://github.com/tobymao/sqlglot/pull/5570) by [@jasonthomassql](https://github.com/jasonthomassql))*: support `TO_CHAR` with numeric inputs (#5570) - due to [`7928985`](https://github.com/tobymao/sqlglot/commit/7928985a655c3d0244bc9175a37f502b19a5c5f0) - allow dashes in JSONPath keys *(PR [#5574](https://github.com/tobymao/sqlglot/pull/5574) by [@georgesittas](https://github.com/georgesittas))*: allow dashes in JSONPath keys (#5574) - due to [`eb09e6e`](https://github.com/tobymao/sqlglot/commit/eb09e6e32491a05846488de7b72b1dca0e0a2669) - parse and annotate type for bigquery TRANSLATE *(PR [#5575](https://github.com/tobymao/sqlglot/pull/5575) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery TRANSLATE (#5575) - due to [`f9a522b`](https://github.com/tobymao/sqlglot/commit/f9a522b26cd5d643b8b18fa64d70f2a3f0ff2d2c) - parse and annotate type for bigquery SOUNDEX *(PR [#5576](https://github.com/tobymao/sqlglot/pull/5576) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery SOUNDEX (#5576) - due to [`51da41b`](https://github.com/tobymao/sqlglot/commit/51da41b90ce421b154e45add28353ac044640a1c) - annotate type for bigquery MD5 *(PR [#5577](https://github.com/tobymao/sqlglot/pull/5577) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery MD5 (#5577) - due to [`bcf302f`](https://github.com/tobymao/sqlglot/commit/bcf302ff6ad2d0adfc29f708a8b53b5c0e547619) - annotate type for bigquery MIN/MAX BY *(PR [#5579](https://github.com/tobymao/sqlglot/pull/5579) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery MIN/MAX BY (#5579) - due to [`c501d9e`](https://github.com/tobymao/sqlglot/commit/c501d9e6f58e4880e4d23f21f53f72dcb5fdaa8c) - parse and annotate type for bigquery GROUPING *(PR [#5581](https://github.com/tobymao/sqlglot/pull/5581) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery GROUPING (#5581) - due to [`7b180bd`](https://github.com/tobymao/sqlglot/commit/7b180bdc3da9e39946c22970bd2523f7d8beaf29) - raise if query modifier is specified multiple times *(PR [#5608](https://github.com/tobymao/sqlglot/pull/5608) by [@georgesittas](https://github.com/georgesittas))*: raise if query modifier is specified multiple times (#5608) - due to [`36602a2`](https://github.com/tobymao/sqlglot/commit/36602a2ecc9ffca98e89044d23e40f33c6ed71e4) - parse LIST_FILTER into ArrayFilter closes [#5633](https://github.com/tobymao/sqlglot/pull/5633) *(commit by [@georgesittas](https://github.com/georgesittas))*: parse LIST_FILTER into ArrayFilter closes #5633 - due to [`0188d21`](https://github.com/tobymao/sqlglot/commit/0188d21d443c991a528eb9d220459890b7dca477) - parse LIST_TRANSFORM into Transform closes [#5634](https://github.com/tobymao/sqlglot/pull/5634) *(commit by [@georgesittas](https://github.com/georgesittas))*: parse LIST_TRANSFORM into Transform closes #5634 - due to [`3ab1d44`](https://github.com/tobymao/sqlglot/commit/3ab1d4487279cab3be2d3764e51516c6db21629d) - Wrap CONCAT items with COALESCE less aggressively *(PR [#5641](https://github.com/tobymao/sqlglot/pull/5641) by [@VaggelisD](https://github.com/VaggelisD))*: Wrap CONCAT items with COALESCE less aggressively (#5641) - due to [`af0b299`](https://github.com/tobymao/sqlglot/commit/af0b299561914953b30ab36004e53dcb92d39e1c) - Qualify columns generated by exp.Aliases *(PR [#5647](https://github.com/tobymao/sqlglot/pull/5647) by [@VaggelisD](https://github.com/VaggelisD))*: Qualify columns generated by exp.Aliases (#5647) - due to [`53aa8fe`](https://github.com/tobymao/sqlglot/commit/53aa8fe7f188012f765066f32c4179035fff036d) - support alter table with check closes [#5649](https://github.com/tobymao/sqlglot/pull/5649) *(commit by [@georgesittas](https://github.com/georgesittas))*: support alter table with check closes #5649 - due to [`1a60a5a`](https://github.com/tobymao/sqlglot/commit/1a60a5a845c7431d7d3d7ccb71119699316f4b41) - Added parsing/generation of JSON_ARRAY_CONTAINS function *(PR [#5661](https://github.com/tobymao/sqlglot/pull/5661) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))*: Added parsing/generation of JSON_ARRAY_CONTAINS function (#5661) - due to [`e0db0a9`](https://github.com/tobymao/sqlglot/commit/e0db0a95d3cb7614242dbd1b439d408e7e7bd475) - add parse and annotate type for bigquery FARM_FINGERPRINT *(PR [#5667](https://github.com/tobymao/sqlglot/pull/5667) by [@geooo109](https://github.com/geooo109))*: add parse and annotate type for bigquery FARM_FINGERPRINT (#5667) - due to [`56588c7`](https://github.com/tobymao/sqlglot/commit/56588c7e22b4db4f0e44696a460483ca1e549163) - Add support for vector_search function. Move predict to BigQuery dialect. *(PR [#5660](https://github.com/tobymao/sqlglot/pull/5660) by [@rloredo](https://github.com/rloredo))*: Add support for vector_search function. Move predict to BigQuery dialect. (#5660) - due to [`a688a0f`](https://github.com/tobymao/sqlglot/commit/a688a0f0d70f87139e531d1419b338b695bec384) - parse and annotate type for bigquery APPROX_TOP_COUNT *(PR [#5670](https://github.com/tobymao/sqlglot/pull/5670) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery APPROX_TOP_COUNT (#5670) - due to [`3c93fcc`](https://github.com/tobymao/sqlglot/commit/3c93fcce96ec82e78753f6c9dd5fb0e730a82058) - parse and annotate type for bigquery APPROX_TOP_SUM *(PR [#5675](https://github.com/tobymao/sqlglot/pull/5675) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery APPROX_TOP_SUM (#5675) - due to [`741d45a`](https://github.com/tobymao/sqlglot/commit/741d45a0ca7c1bad67da4393cd10cc9cfa49ea68) - parse and annotate type for bigquery FROM/TO_BASE32 *(PR [#5676](https://github.com/tobymao/sqlglot/pull/5676) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery FROM/TO_BASE32 (#5676) - due to [`9ae045c`](https://github.com/tobymao/sqlglot/commit/9ae045c0405e43b148e3b9261825288ebf09100c) - parse and annotate type for bigquery FROM_HEX *(PR [#5679](https://github.com/tobymao/sqlglot/pull/5679) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery FROM_HEX (#5679) - due to [`5a22a25`](https://github.com/tobymao/sqlglot/commit/5a22a254143978989027f6e7f6163019a34f112a) - annotate type for bigquery TO_HEX *(PR [#5680](https://github.com/tobymao/sqlglot/pull/5680) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TO_HEX (#5680) - due to [`5c1eb2d`](https://github.com/tobymao/sqlglot/commit/5c1eb2df5dd3dcc6ed2c8204cec56b5c3d276f87) - parse and annotate type for bq PARSE_BIG/NUMERIC *(PR [#5690](https://github.com/tobymao/sqlglot/pull/5690) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq PARSE_BIG/NUMERIC (#5690) - due to [`311373d`](https://github.com/tobymao/sqlglot/commit/311373d22134de906d1c1cef019541e85e2f7c9f) - parse and annotate type for bq CODE_POINTS_TO_BYTES *(PR [#5686](https://github.com/tobymao/sqlglot/pull/5686) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq CODE_POINTS_TO_BYTES (#5686) - due to [`79d9de1`](https://github.com/tobymao/sqlglot/commit/79d9de1745598f8f3ae2c82c1389dd455c946a09) - parse and annotate type for bq TO_CODE_POINTS *(PR [#5685](https://github.com/tobymao/sqlglot/pull/5685) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq TO_CODE_POINTS (#5685) - due to [`5df3ea9`](https://github.com/tobymao/sqlglot/commit/5df3ea92f59125955124ea1883b777b489db3042) - parse and annotate type for bq SAFE_CONVERT_BYTES_TO_STRING *(PR [#5681](https://github.com/tobymao/sqlglot/pull/5681) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SAFE_CONVERT_BYTES_TO_STRING (#5681) - due to [`c832746`](https://github.com/tobymao/sqlglot/commit/c832746018fbc2c531d5b2a7c7f8cd5d78e511ff) - parse and annotate type for bigquery APPROX_QUANTILES *(PR [#5678](https://github.com/tobymao/sqlglot/pull/5678) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery APPROX_QUANTILES (#5678) - due to [`99e169e`](https://github.com/tobymao/sqlglot/commit/99e169ea13d5be3712a47f6b55b98a4764a3c24d) - parse and annotate type for bq BOOL *(PR [#5697](https://github.com/tobymao/sqlglot/pull/5697) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq BOOL (#5697) - due to [`3f31770`](https://github.com/tobymao/sqlglot/commit/3f31770c793f464fcac1ce2b8dfa03d4b7f0231c) - parse and annotate type for bq FLOAT64 *(PR [#5700](https://github.com/tobymao/sqlglot/pull/5700) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq FLOAT64 (#5700) - due to [`de2fe15`](https://github.com/tobymao/sqlglot/commit/de2fe1503b5bb003431d1f0c7b9ae87932a6cc1c) - annotate type for bq CONTAINS_SUBSTR *(PR [#5705](https://github.com/tobymao/sqlglot/pull/5705) by [@geooo109](https://github.com/geooo109))*: annotate type for bq CONTAINS_SUBSTR (#5705) - due to [`770888f`](https://github.com/tobymao/sqlglot/commit/770888f4e9a9061329e3c416f968f7dd9639fb81) - annotate type for bq NORMALIZE *(PR [#5711](https://github.com/tobymao/sqlglot/pull/5711) by [@geooo109](https://github.com/geooo109))*: annotate type for bq NORMALIZE (#5711) - due to [`506033f`](https://github.com/tobymao/sqlglot/commit/506033f299f7a4c28f6efd8bf715be5dcf73e929) - parse and annotate type for bq NORMALIZE_AND_CASEFOLD *(PR [#5712](https://github.com/tobymao/sqlglot/pull/5712) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq NORMALIZE_AND_CASEFOLD (#5712) - due to [`848aea1`](https://github.com/tobymao/sqlglot/commit/848aea1dbaaeb580b633796dcca06c28314b9c3e) - parse and annotate type for bq OCTET_LENGTH *(PR [#5713](https://github.com/tobymao/sqlglot/pull/5713) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq OCTET_LENGTH (#5713) - due to [`727bf83`](https://github.com/tobymao/sqlglot/commit/727bf8378f232188d35834d980b035552999ea3b) - add support for REVOKE DDL *(PR [#5703](https://github.com/tobymao/sqlglot/pull/5703) by [@newtonapple](https://github.com/newtonapple))*: add support for REVOKE DDL (#5703) - due to [`baffd2c`](https://github.com/tobymao/sqlglot/commit/baffd2c0be9657683781f3f8831c47e32dbf68bb) - parse and annotate type for bq REGEXP_INSTR *(PR [#5710](https://github.com/tobymao/sqlglot/pull/5710) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq REGEXP_INSTR (#5710) - due to [`b79eb19`](https://github.com/tobymao/sqlglot/commit/b79eb198cc21203efa82128b357d435338e9133d) - annotate type for bq ROW_NUMBER *(PR [#5716](https://github.com/tobymao/sqlglot/pull/5716) by [@geooo109](https://github.com/geooo109))*: annotate type for bq ROW_NUMBER (#5716) - due to [`f709bef`](https://github.com/tobymao/sqlglot/commit/f709bef3af7cd0daa25fe3d58b1753c3e65720ef) - annotate type for bq FIRST_VALUE *(PR [#5718](https://github.com/tobymao/sqlglot/pull/5718) by [@geooo109](https://github.com/geooo109))*: annotate type for bq FIRST_VALUE (#5718) - due to [`15a9061`](https://github.com/tobymao/sqlglot/commit/15a906170e5d5cdaa207ec7607edfdd7d4a8b774) - annotate type for bq PERCENTILE_DISC *(PR [#5722](https://github.com/tobymao/sqlglot/pull/5722) by [@geooo109](https://github.com/geooo109))*: annotate type for bq PERCENTILE_DISC (#5722) - due to [`7d49609`](https://github.com/tobymao/sqlglot/commit/7d4960963f0ef70b96f5b969bb008d2742e833ea) - annotate type for bq NTH_VALUE *(PR [#5720](https://github.com/tobymao/sqlglot/pull/5720) by [@geooo109](https://github.com/geooo109))*: annotate type for bq NTH_VALUE (#5720) - due to [`d41acf1`](https://github.com/tobymao/sqlglot/commit/d41acf11221bee30a5ae089cbac9b158ed3dd515) - annotate type for bq LEAD *(PR [#5719](https://github.com/tobymao/sqlglot/pull/5719) by [@geooo109](https://github.com/geooo109))*: annotate type for bq LEAD (#5719) - due to [`ff12130`](https://github.com/tobymao/sqlglot/commit/ff12130c23a215917f20fda7d50322f1cb7de599) - annotate type for bq PERNCENTILE_CONT *(PR [#5729](https://github.com/tobymao/sqlglot/pull/5729) by [@geooo109](https://github.com/geooo109))*: annotate type for bq PERNCENTILE_CONT (#5729) - due to [`fdb8a0a`](https://github.com/tobymao/sqlglot/commit/fdb8a0a6d0d74194255f313bd934db7fc1ce0d3f) - parse and annotate type for bq FORMAT *(PR [#5715](https://github.com/tobymao/sqlglot/pull/5715) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq FORMAT (#5715) - due to [`012bdd3`](https://github.com/tobymao/sqlglot/commit/012bdd3c8aeff180f85354ffd403fc1aa5815dcf) - parse and annotate type for bq CUME_DIST *(PR [#5735](https://github.com/tobymao/sqlglot/pull/5735) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq CUME_DIST (#5735) - due to [`b99eaeb`](https://github.com/tobymao/sqlglot/commit/b99eaeb0c6eb3dc613e76d205e02632bd6af353b) - parse and annotate type for bq DENSE_RANK *(PR [#5736](https://github.com/tobymao/sqlglot/pull/5736) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq DENSE_RANK (#5736) - due to [`bb95c73`](https://github.com/tobymao/sqlglot/commit/bb95c7312c942ef987955f01e060604d60e32e83) - parse and annotate type for bq RANK *(PR [#5738](https://github.com/tobymao/sqlglot/pull/5738) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq RANK (#5738) - due to [`8713c08`](https://github.com/tobymao/sqlglot/commit/8713c082b0aa8454a5773fc2a85e08a132dc6ce3) - parse and annotate type for bq PERCENT_RANK *(PR [#5739](https://github.com/tobymao/sqlglot/pull/5739) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq PERCENT_RANK (#5739) - due to [`9ce4e31`](https://github.com/tobymao/sqlglot/commit/9ce4e31aecbde6ea1f227a7166c0f3dc9e302a66) - annotate type for bq JSON_OBJECT *(PR [#5740](https://github.com/tobymao/sqlglot/pull/5740) by [@geooo109](https://github.com/geooo109))*: annotate type for bq JSON_OBJECT (#5740) - due to [`d35ec6e`](https://github.com/tobymao/sqlglot/commit/d35ec6e37e21cf3cec848ed55bd73128c4633cd2) - annotate type for bq JSON_QUERY/JSON_QUERY_ARRAY *(PR [#5741](https://github.com/tobymao/sqlglot/pull/5741) by [@geooo109](https://github.com/geooo109))*: annotate type for bq JSON_QUERY/JSON_QUERY_ARRAY (#5741) - due to [`4753642`](https://github.com/tobymao/sqlglot/commit/4753642cfcfb1f192ec4d21a492737b27affef09) - annotate type for bq JSON_EXTRACT_SCALAR *(commit by [@geooo109](https://github.com/geooo109))*: annotate type for bq JSON_EXTRACT_SCALAR - due to [`113a530`](https://github.com/tobymao/sqlglot/commit/113a5308d050fd5ceacab4c6188e5eea5dd740b1) - parse and annotate type for bq JSON_ARRAY_APPEND *(PR [#5747](https://github.com/tobymao/sqlglot/pull/5747) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_ARRAY_APPEND (#5747) - due to [`268e2c6`](https://github.com/tobymao/sqlglot/commit/268e2c694d1eb99f1fe64477bc38ed4946bf1c32) - parse and annotate type for bq JSON_ARRAY_INSERT *(PR [#5748](https://github.com/tobymao/sqlglot/pull/5748) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_ARRAY_INSERT (#5748) - due to [`455ec1f`](https://github.com/tobymao/sqlglot/commit/455ec1f4f8aecb5435fa4cb2912bfc21db8dd44d) - parse and annotate type for bq JSON_KEYS *(PR [#5749](https://github.com/tobymao/sqlglot/pull/5749) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_KEYS (#5749) - due to [`59895fa`](https://github.com/tobymao/sqlglot/commit/59895faa23ebe1b27938c37a7b39df87de609844) - parse and annotate type for bq JSON_REMOVE *(PR [#5750](https://github.com/tobymao/sqlglot/pull/5750) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_REMOVE (#5750) - due to [`06d7df7`](https://github.com/tobymao/sqlglot/commit/06d7df7a05f2824cabf48e8d1e8a4ebca8fda496) - parse and annotate type for bq JSON_SET *(PR [#5751](https://github.com/tobymao/sqlglot/pull/5751) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_SET (#5751) - due to [`e72b341`](https://github.com/tobymao/sqlglot/commit/e72b3419c8a367caa0e5e80030979cd94e87a40d) - parse and annotate type for bq JSON_STRIP_NULLS *(PR [#5753](https://github.com/tobymao/sqlglot/pull/5753) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_STRIP_NULLS (#5753) - due to [`5de61a7`](https://github.com/tobymao/sqlglot/commit/5de61a7ab850d4e68fde4d76ee396d30d7bdef33) - parse and annotate type for bq JSON_EXTRACT_STRING_ARRAY *(PR [#5758](https://github.com/tobymao/sqlglot/pull/5758) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_EXTRACT_STRING_ARRAY (#5758) - due to [`36c9393`](https://github.com/tobymao/sqlglot/commit/36c93939575a19bd611269719c39d3d216be8cde) - parse and annotate type for bq JSON LAX funcs *(PR [#5760](https://github.com/tobymao/sqlglot/pull/5760) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON LAX funcs (#5760) - due to [`88862b5`](https://github.com/tobymao/sqlglot/commit/88862b56bc29c8a600b4d0e4693d5846d3a577ff) - annotate type for bq TO_JSON_STRING *(PR [#5762](https://github.com/tobymao/sqlglot/pull/5762) by [@geooo109](https://github.com/geooo109))*: annotate type for bq TO_JSON_STRING (#5762) - due to [`1c551d5`](https://github.com/tobymao/sqlglot/commit/1c551d5ed3315e314013c1f063deabd9d8613e5d) - parse and annotate type for bq TO_JSON *(PR [#5768](https://github.com/tobymao/sqlglot/pull/5768) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq TO_JSON (#5768) - due to [`1707f2d`](https://github.com/tobymao/sqlglot/commit/1707f2d7f9d3b58e8c216db638f8e572f9fe6f13) - annotate type for ABS *(PR [#5770](https://github.com/tobymao/sqlglot/pull/5770) by [@geooo109](https://github.com/geooo109))*: annotate type for ABS (#5770) - due to [`69acc51`](https://github.com/tobymao/sqlglot/commit/69acc5142b2d4f0b30832c350aa49f16d1adabef) - annotate type for bq IS_INF, IS_NAN *(PR [#5771](https://github.com/tobymao/sqlglot/pull/5771) by [@geooo109](https://github.com/geooo109))*: annotate type for bq IS_INF, IS_NAN (#5771) - due to [`0da2076`](https://github.com/tobymao/sqlglot/commit/0da207652331920416b29e2cc67bdc3c3f964466) - annotate type for bq CBRT *(PR [#5772](https://github.com/tobymao/sqlglot/pull/5772) by [@geooo109](https://github.com/geooo109))*: annotate type for bq CBRT (#5772) - due to [`a4968cb`](https://github.com/tobymao/sqlglot/commit/a4968cb5693670c1a2e9cd2c86404dd90fd76160) - annotate type for bq RAND *(PR [#5774](https://github.com/tobymao/sqlglot/pull/5774) by [@geooo109](https://github.com/geooo109))*: annotate type for bq RAND (#5774) - due to [`3e63350`](https://github.com/tobymao/sqlglot/commit/3e63350bd1d58b510cecd1a573d27be3fd2565ce) - parse and annotate type for bq ACOS *(PR [#5776](https://github.com/tobymao/sqlglot/pull/5776) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ACOS (#5776) - due to [`2be9d01`](https://github.com/tobymao/sqlglot/commit/2be9d01830c778186dc274c94c6db0dd6c4116d1) - parse and annotate type for bq ACOSH *(PR [#5779](https://github.com/tobymao/sqlglot/pull/5779) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ACOSH (#5779) - due to [`b77d3da`](https://github.com/tobymao/sqlglot/commit/b77d3da8f2548858d2b9d8590fcde83e1ec62b8a) - remove `"EXCLUDE" -> TokenType.EXCEPT` in DuckDB, Snowflake *(PR [#5766](https://github.com/tobymao/sqlglot/pull/5766) by [@treysp](https://github.com/treysp))*: remove `"EXCLUDE" -> TokenType.EXCEPT` in DuckDB, Snowflake (#5766) - due to [`7da2f31`](https://github.com/tobymao/sqlglot/commit/7da2f31d6613f16585e98c3fa1f592c617ae40c9) - parse and annotate type for bq ASIN/H *(PR [#5783](https://github.com/tobymao/sqlglot/pull/5783) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ASIN/H (#5783) - due to [`341ea83`](https://github.com/tobymao/sqlglot/commit/341ea83a07c707fdbf565b8d9ef4b9b6341ed1d5) - parse and annotate type for bq ATAN/H/2 *(PR [#5784](https://github.com/tobymao/sqlglot/pull/5784) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ATAN/H/2 (#5784) - due to [`aa360cb`](https://github.com/tobymao/sqlglot/commit/aa360cb0e204aa056557ff8b15aa2d4f678430e6) - use regexp_like as it exists *(PR [#5781](https://github.com/tobymao/sqlglot/pull/5781) by [@jasonthomassql](https://github.com/jasonthomassql))*: use regexp_like as it exists (#5781) - due to [`c2a1ad4`](https://github.com/tobymao/sqlglot/commit/c2a1ad4050771401a5b26bcadd90060e4527fbff) - parse and annotate type for bq COT/H *(PR [#5786](https://github.com/tobymao/sqlglot/pull/5786) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq COT/H (#5786) - due to [`316ae91`](https://github.com/tobymao/sqlglot/commit/316ae913d8b1a63f3071ebb1b826328108d74cef) - Added handling of UTC_DATE and exp.CurrentDate *(PR [#5785](https://github.com/tobymao/sqlglot/pull/5785) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))*: Added handling of UTC_DATE and exp.CurrentDate (#5785) - due to [`2c6d237`](https://github.com/tobymao/sqlglot/commit/2c6d23742ea9fcc2b9c784315d3d5364e360fea5) - parse and annotate type for bq CSC/H *(PR [#5787](https://github.com/tobymao/sqlglot/pull/5787) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq CSC/H (#5787) - due to [`8a35076`](https://github.com/tobymao/sqlglot/commit/8a350763c2337f6910a5f0e19af387ba488fcb70) - parse and annotate type for bq SEC/H *(PR [#5788](https://github.com/tobymao/sqlglot/pull/5788) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SEC/H (#5788) - due to [`79901cb`](https://github.com/tobymao/sqlglot/commit/79901cb506737ae1932fa44a705858d2597ee587) - parse and annotate type for bq SIN\H *(PR [#5790](https://github.com/tobymao/sqlglot/pull/5790) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SIN\H (#5790) - due to [`74fb547`](https://github.com/tobymao/sqlglot/commit/74fb5476def1b389da425885db56bd6592fd7f78) - parse and annotate type for bq RANGE_BUCKET *(PR [#5793](https://github.com/tobymao/sqlglot/pull/5793) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq RANGE_BUCKET (#5793) - due to [`eca65e8`](https://github.com/tobymao/sqlglot/commit/eca65e8b79f65850b014a4cb7913ba4a5861dbe9) - parse and annotate type for bq COSINE/EUCLIDEAN_DISTANCE *(PR [#5792](https://github.com/tobymao/sqlglot/pull/5792) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq COSINE/EUCLIDEAN_DISTANCE (#5792) - due to [`a180d3f`](https://github.com/tobymao/sqlglot/commit/a180d3f2f9f3938611027269028c03274aa1889c) - parse and annotate type for bq SAFE math funcs *(PR [#5797](https://github.com/tobymao/sqlglot/pull/5797) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SAFE math funcs (#5797) - due to [`fc7ad7a`](https://github.com/tobymao/sqlglot/commit/fc7ad7a4d953424b56542eacfe1835f5789921c7) - parse ALTER SESSION *(PR [#5734](https://github.com/tobymao/sqlglot/pull/5734) by [@tekumara](https://github.com/tekumara))*: parse ALTER SESSION (#5734) - due to [`8ec1a6c`](https://github.com/tobymao/sqlglot/commit/8ec1a6cf5a8edc2d834c713ce0fd8d87237f11ed) - annotate type for bq STRING_AGG *(PR [#5798](https://github.com/tobymao/sqlglot/pull/5798) by [@geooo109](https://github.com/geooo109))*: annotate type for bq STRING_AGG (#5798) - due to [`dd97bfa`](https://github.com/tobymao/sqlglot/commit/dd97bfa1dc2f86b727c55b06b3c54b18c02e360d) - annotate type for bq DATETIME_TRUNC *(PR [#5799](https://github.com/tobymao/sqlglot/pull/5799) by [@geooo109](https://github.com/geooo109))*: annotate type for bq DATETIME_TRUNC (#5799) - due to [`d3e9dda`](https://github.com/tobymao/sqlglot/commit/d3e9dda183695dd1e4a9832a6671bccc6db561a0) - annotate type for bq GENERATE_UUID *(commit by [@geooo109](https://github.com/geooo109))*: annotate type for bq GENERATE_UUID - due to [`3726b33`](https://github.com/tobymao/sqlglot/commit/3726b33bb6b4ab286617f510e96e1fbd27c429f3) - support nulls_first arg for array_sort *(PR [#5802](https://github.com/tobymao/sqlglot/pull/5802) by [@treysp](https://github.com/treysp))*: support nulls_first arg for array_sort (#5802) - due to [`cf1d1e3`](https://github.com/tobymao/sqlglot/commit/cf1d1e3e0ef9e6cd1b1c6128c63ddf06c30f1339) - annotate type for snowflake's REVERSE function *(PR [#5803](https://github.com/tobymao/sqlglot/pull/5803) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for snowflake's REVERSE function (#5803) - due to [`ad0b407`](https://github.com/tobymao/sqlglot/commit/ad0b407098e1611d4fc0e1f0916511337b9aefdb) - Mark 'BEGIN' as TokenType.BEGIN for transactions *(PR [#5826](https://github.com/tobymao/sqlglot/pull/5826) by [@VaggelisD](https://github.com/VaggelisD))*: Mark 'BEGIN' as TokenType.BEGIN for transactions (#5826) - due to [`0198282`](https://github.com/tobymao/sqlglot/commit/0198282a82bbf3e81476e164718d63fd1210acdc) - : Update tests for concat string function *(PR [#5809](https://github.com/tobymao/sqlglot/pull/5809) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: : Update tests for concat string function (#5809) - due to [`db2c430`](https://github.com/tobymao/sqlglot/commit/db2c4303237a1244070c359245c398a724df6de2) - annoate the "contains" function *(PR [#5829](https://github.com/tobymao/sqlglot/pull/5829) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annoate the "contains" function (#5829) - due to [`9c8a600`](https://github.com/tobymao/sqlglot/commit/9c8a6001f41816035f391d046eb9692d6f13cefc) - correct parsing of TO_VARCHAR *(PR [#5840](https://github.com/tobymao/sqlglot/pull/5840) by [@geooo109](https://github.com/geooo109))*: correct parsing of TO_VARCHAR (#5840) - due to [`1e9aef1`](https://github.com/tobymao/sqlglot/commit/1e9aef1bb20f4dc5e9c03d59cb3165c235c11ce1) - convert NULL annotations to UNKNOWN *(PR [#5842](https://github.com/tobymao/sqlglot/pull/5842) by [@georgesittas](https://github.com/georgesittas))*: convert NULL annotations to UNKNOWN (#5842) - due to [`44c9e70`](https://github.com/tobymao/sqlglot/commit/44c9e70bd8c9421035eb0e87e4286061ec5d2fa8) - add tests for snowflake STARTSWITH function *(PR [#5847](https://github.com/tobymao/sqlglot/pull/5847) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: add tests for snowflake STARTSWITH function (#5847) - due to [`0779c2d`](https://github.com/tobymao/sqlglot/commit/0779c2d4e8ce0228592de6882763940783fa5e87) - support BIT_X aggregates again for duckdb, postgres *(PR [#5851](https://github.com/tobymao/sqlglot/pull/5851) by [@georgesittas](https://github.com/georgesittas))*: support BIT_X aggregates again for duckdb, postgres (#5851) - due to [`c50d6e3`](https://github.com/tobymao/sqlglot/commit/c50d6e3c7b96f00d27c34a02c8e0dced21e6c373) - annotate type for snowflake LEFT, RIGHT and SUBSTRING functions *(PR [#5849](https://github.com/tobymao/sqlglot/pull/5849) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annotate type for snowflake LEFT, RIGHT and SUBSTRING functions (#5849) - due to [`e441e16`](https://github.com/tobymao/sqlglot/commit/e441e16991626c2da2d38bc9c3a2b408e3f773bd) - make dump/pickling non-recursive to avoid hitting stack limits *(PR [#5850](https://github.com/tobymao/sqlglot/pull/5850) by [@tobymao](https://github.com/tobymao))*: make dump/pickling non-recursive to avoid hitting stack limits (#5850) - due to [`b128339`](https://github.com/tobymao/sqlglot/commit/b12833977e2a395712481cf11e293fdbd70fd4ce) - annotate and add tests for snowflake LENGTH and LOWER functions *(PR [#5856](https://github.com/tobymao/sqlglot/pull/5856) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annotate and add tests for snowflake LENGTH and LOWER functions (#5856) - due to [`134957a`](https://github.com/tobymao/sqlglot/commit/134957af11c55a4ab16f58d0725d6bb8ab23eb28) - annotate types for Snowflake TRIM function *(PR [#5811](https://github.com/tobymao/sqlglot/pull/5811) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake TRIM function (#5811) - due to [`d3cd6bf`](https://github.com/tobymao/sqlglot/commit/d3cd6bf6e5fbaa490868ee3cd2cc99dd5e40a396) - Annotate and add tests for snowflake REPLACE and SPACE functions *(PR [#5871](https://github.com/tobymao/sqlglot/pull/5871) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate and add tests for snowflake REPLACE and SPACE functions (#5871) - due to [`96ae7a3`](https://github.com/tobymao/sqlglot/commit/96ae7a3bcbf9de1932150baa0bd704d4ce05c9f7) - Annotate and add tests for snowflake REPEAT and SPLIT functions *(PR [#5875](https://github.com/tobymao/sqlglot/pull/5875) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate and add tests for snowflake REPEAT and SPLIT functions (#5875) - due to [`f2d3bf7`](https://github.com/tobymao/sqlglot/commit/f2d3bf74e804e5a5e2ac6ca94210ba04df07e7f3) - annotate types for Snowflake UUID_STRING function *(PR [#5881](https://github.com/tobymao/sqlglot/pull/5881) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake UUID_STRING function (#5881) - due to [`ec80ff3`](https://github.com/tobymao/sqlglot/commit/ec80ff34957c3e3f80c44175383b06cf72988a68) - make dump a list instead of a nested dict to avoid all recursion errors *(PR [#5885](https://github.com/tobymao/sqlglot/pull/5885) by [@tobymao](https://github.com/tobymao))*: make dump a list instead of a nested dict to avoid all recursion errors (#5885) - due to [`2fdaccd`](https://github.com/tobymao/sqlglot/commit/2fdaccd1a9045bda3d529025a4706c397b8a836f) - annotate types for Snowflake SHA1, SHA2 functions *(PR [#5884](https://github.com/tobymao/sqlglot/pull/5884) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake SHA1, SHA2 functions (#5884) - due to [`faba309`](https://github.com/tobymao/sqlglot/commit/faba30905390e5efaf0ba9a05aab9ac2724b1b85) - annotate types for Snowflake AI_AGG function *(PR [#5894](https://github.com/tobymao/sqlglot/pull/5894) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake AI_AGG function (#5894) - due to [`304bec5`](https://github.com/tobymao/sqlglot/commit/304bec5f7342501ad28ea4cd0a4b9aa092f2192f) - Annotate snowflake MD5 functions *(PR [#5883](https://github.com/tobymao/sqlglot/pull/5883) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate snowflake MD5 functions (#5883) - due to [`c0180ec`](https://github.com/tobymao/sqlglot/commit/c0180ec163a43836fed754efcb6f26ad37cdae50) - annotate types for Snowflake AI_SUMMARIZE_AGG function *(PR [#5902](https://github.com/tobymao/sqlglot/pull/5902) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake AI_SUMMARIZE_AGG function (#5902) - due to [`f5409df`](https://github.com/tobymao/sqlglot/commit/f5409df64ed6069880669878db687e4b98c3e280) - use column name in struct type annotation *(PR [#5903](https://github.com/tobymao/sqlglot/pull/5903) by [@georgesittas](https://github.com/georgesittas))*: use column name in struct type annotation (#5903) - due to [`5a973e9`](https://github.com/tobymao/sqlglot/commit/5a973e9a88fa7f522a9bf91dc60fb0f6effef53d) - annotate types for Snowflake AI_CLASSIFY function *(PR [#5909](https://github.com/tobymao/sqlglot/pull/5909) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake AI_CLASSIFY function (#5909) - due to [`2d0d908`](https://github.com/tobymao/sqlglot/commit/2d0d908b5bbc32ff3bc92eb1ae9fc6e5ac3409bc) - produce TableAlias instead of Alias for USING in merge builder *(PR [#5911](https://github.com/tobymao/sqlglot/pull/5911) by [@georgesittas](https://github.com/georgesittas))*: produce TableAlias instead of Alias for USING in merge builder (#5911) - due to [`f4ad258`](https://github.com/tobymao/sqlglot/commit/f4ad25882951de4e4442dfd5189a56d5a1c5e630) - Annotate types for Snowflake BASE64_DECODE_BINARY function *(PR [#5917](https://github.com/tobymao/sqlglot/pull/5917) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate types for Snowflake BASE64_DECODE_BINARY function (#5917) - due to [`6d0e3f8`](https://github.com/tobymao/sqlglot/commit/6d0e3f8dcae7ed1a7659ece69b1f94cec5e7300e) - Add parser support to ilike like function versions. *(PR [#5915](https://github.com/tobymao/sqlglot/pull/5915) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add parser support to ilike like function versions. (#5915) - due to [`22c7ed7`](https://github.com/tobymao/sqlglot/commit/22c7ed7734b41ca544bb67bcc1ca4151f6d5f05f) - parse tuple *(PR [#5920](https://github.com/tobymao/sqlglot/pull/5920) by [@geooo109](https://github.com/geooo109))*: parse tuple (#5920) - due to [`fc5624e`](https://github.com/tobymao/sqlglot/commit/fc5624eca43d2855ac350c92d85b184a6893d5ca) - annotate types for Snowflake ASCII function *(PR [#5926](https://github.com/tobymao/sqlglot/pull/5926) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake ASCII function (#5926) - due to [`4e81690`](https://github.com/tobymao/sqlglot/commit/4e8169045edcaa28ae43abeb07370df63846fbfd) - annotate type for Snowflake COLLATE function *(PR [#5931](https://github.com/tobymao/sqlglot/pull/5931) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COLLATE function (#5931) - due to [`f07d35d`](https://github.com/tobymao/sqlglot/commit/f07d35d29104c6203efaab738118d1903614b83c) - annotate type for Snowflake CHR function *(PR [#5929](https://github.com/tobymao/sqlglot/pull/5929) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake CHR function (#5929) - due to [`f8c0ee4`](https://github.com/tobymao/sqlglot/commit/f8c0ee4d3c1a4d4a92b897d1cc85f9904c8e566b) - Add function and annotate snowflake hex decode string and binary functions *(PR [#5928](https://github.com/tobymao/sqlglot/pull/5928) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add function and annotate snowflake hex decode string and binary functions (#5928) - due to [`66f9501`](https://github.com/tobymao/sqlglot/commit/66f9501d76d087798bad93e578273ab2a45e2575) - annotate types for Snowflake BIT_LENGTH function *(PR [#5927](https://github.com/tobymao/sqlglot/pull/5927) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake BIT_LENGTH function (#5927) - due to [`7878437`](https://github.com/tobymao/sqlglot/commit/78784370712df65a2e1e79a1c2b441131ed7222a) - annotate snowflake's `BASE64_DECODE_STRING`, `BASE64_ENCODE` *(PR [#5922](https://github.com/tobymao/sqlglot/pull/5922) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annotate snowflake's `BASE64_DECODE_STRING`, `BASE64_ENCODE` (#5922) - due to [`9bcad04`](https://github.com/tobymao/sqlglot/commit/9bcad040bd51dd03821c68eea1a73534fc7a81b7) - Annotate type for HEX ENCODE function. *(PR [#5936](https://github.com/tobymao/sqlglot/pull/5936) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for HEX ENCODE function. (#5936) - due to [`590928f`](https://github.com/tobymao/sqlglot/commit/590928f4637306e8cf3f1302d5dd5d5dbc76e7e0) - annotate type for Snowflake INITCAP function *(PR [#5941](https://github.com/tobymao/sqlglot/pull/5941) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake INITCAP function (#5941) - due to [`ac04de1`](https://github.com/tobymao/sqlglot/commit/ac04de1944c7a976406581b489b3cf9b11dafb77) - annotate type for Snowflake EDITDISTANCE function *(PR [#5940](https://github.com/tobymao/sqlglot/pull/5940) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake EDITDISTANCE function (#5940) - due to [`9e28af8`](https://github.com/tobymao/sqlglot/commit/9e28af8a52ced951ecf7f4e85a6305e20a13de1f) - Annotate type for snowflake COMPRESS function *(PR [#5938](https://github.com/tobymao/sqlglot/pull/5938) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake COMPRESS function (#5938) - due to [`7f13eaf`](https://github.com/tobymao/sqlglot/commit/7f13eaf7769a3381a56c9209af590835be2f95cd) - Annotate type for snowflake DECOMPRESS_BINARY function *(PR [#5945](https://github.com/tobymao/sqlglot/pull/5945) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake DECOMPRESS_BINARY function (#5945) - due to [`be12b29`](https://github.com/tobymao/sqlglot/commit/be12b29b5a7bd6d6e09dbd8c17086bd77c19abc0) - Annotate type for snowflake DECOMPRESS_STRING function *(PR [#5947](https://github.com/tobymao/sqlglot/pull/5947) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake DECOMPRESS_STRING function (#5947) - due to [`1573fef`](https://github.com/tobymao/sqlglot/commit/1573fefac27b5b1215e3d458f8ccf1b9dadbb772) - annotate types for Snowflake JAROWINKLER_SIMILARITY function *(PR [#5950](https://github.com/tobymao/sqlglot/pull/5950) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake JAROWINKLER_SIMILARITY function (#5950) - due to [`883c6ab`](https://github.com/tobymao/sqlglot/commit/883c6abe589865f478d95604e8d670e57afd04af) - annotate type for Snowflake COLLATION function *(PR [#5939](https://github.com/tobymao/sqlglot/pull/5939) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COLLATION function (#5939) - due to [`68473ac`](https://github.com/tobymao/sqlglot/commit/68473ac3ec8dc76512dc76819892a1b0324c7ddc) - Annotate type for snowflake PARSE_URL function *(PR [#5962](https://github.com/tobymao/sqlglot/pull/5962) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake PARSE_URL function (#5962) - due to [`b015a9d`](https://github.com/tobymao/sqlglot/commit/b015a9d944d0a87069a7750ad74953c399d7da34) - annotate type for Snowflake REGEXP_INSTR function *(commit by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_INSTR function - due to [`1f29ba7`](https://github.com/tobymao/sqlglot/commit/1f29ba710f4213beb1a2f993244d7d824f3536ce) - annotate type for Snowflake PARSE_IP function *(PR [#5961](https://github.com/tobymao/sqlglot/pull/5961) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake PARSE_IP function (#5961) - due to [`bf45d5d`](https://github.com/tobymao/sqlglot/commit/bf45d5d3cb0c0f380824019eb32ec29049268a61) - annotate types for Snowflake RTRIMMED_LENGTH function *(PR [#5968](https://github.com/tobymao/sqlglot/pull/5968) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake RTRIMMED_LENGTH function (#5968) - due to [`13caa69`](https://github.com/tobymao/sqlglot/commit/13caa6991f003ad7abb590073451e591b6fd888c) - Annotate type for snowflake POSITION function *(PR [#5964](https://github.com/tobymao/sqlglot/pull/5964) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake POSITION function (#5964) - due to [`13a30df`](https://github.com/tobymao/sqlglot/commit/13a30dfa37096df5bfc2c31538325c40a49f7917) - Annotate type for snowflake TRY_BASE64_DECODE_BINARY function *(PR [#5972](https://github.com/tobymao/sqlglot/pull/5972) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_BASE64_DECODE_BINARY function (#5972) - due to [`1f5fdd7`](https://github.com/tobymao/sqlglot/commit/1f5fdd799c047de167a4572f7ac26b7ad92167f2) - Annotate type for snowflake TRY_BASE64_DECODE_STRING function *(PR [#5974](https://github.com/tobymao/sqlglot/pull/5974) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_BASE64_DECODE_STRING function (#5974) - due to [`324e82f`](https://github.com/tobymao/sqlglot/commit/324e82fe1fb11722f91341010602a743b151e055) - Annotate type for snowflake TRY_HEX_DECODE_BINARY function *(PR [#5975](https://github.com/tobymao/sqlglot/pull/5975) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_HEX_DECODE_BINARY function (#5975) - due to [`6caf99d`](https://github.com/tobymao/sqlglot/commit/6caf99d556a3357ffaa6c294a9babcd30dd5fac5) - Annotate type for snowflake TRY_HEX_DECODE_STRING function *(PR [#5976](https://github.com/tobymao/sqlglot/pull/5976) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_HEX_DECODE_STRING function (#5976) - due to [`73186a8`](https://github.com/tobymao/sqlglot/commit/73186a812ce422c108ee81b3de11da6ee9a9e902) - annotate type for Snowflake REGEXP_COUNT function *(PR [#5963](https://github.com/tobymao/sqlglot/pull/5963) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_COUNT function (#5963) - due to [`c3bdb3c`](https://github.com/tobymao/sqlglot/commit/c3bdb3cd1af1809ed82be0ae40744d9fffc8ce18) - array start index is 1, support array_flatten, fixes [#5983](https://github.com/tobymao/sqlglot/pull/5983) *(commit by [@georgesittas](https://github.com/georgesittas))*: array start index is 1, support array_flatten, fixes #5983 - due to [`244fb48`](https://github.com/tobymao/sqlglot/commit/244fb48fc9c4776f427c08b825d139b1c172fd26) - annotate type for Snowflake SPLIT_PART function *(PR [#5988](https://github.com/tobymao/sqlglot/pull/5988) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake SPLIT_PART function (#5988) - due to [`0d772e0`](https://github.com/tobymao/sqlglot/commit/0d772e0b9d687b24d49203c05d7a90cc1dce02d5) - add ast node for `DIRECTORY` source *(PR [#5990](https://github.com/tobymao/sqlglot/pull/5990) by [@georgesittas](https://github.com/georgesittas))*: add ast node for `DIRECTORY` source (#5990) - due to [`3c7b5c0`](https://github.com/tobymao/sqlglot/commit/3c7b5c0e2dc071b7b9f6da308ba58a3a43da93dc) - Annotate type for snowflake SOUNDEX_P123 function *(PR [#5987](https://github.com/tobymao/sqlglot/pull/5987) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake SOUNDEX_P123 function (#5987) - due to [`f25e42e`](https://github.com/tobymao/sqlglot/commit/f25e42e3f5b3b7b671bd724ba7b09a9b07d13995) - annotate type for Snowflake REGEXP_INSTR function *(PR [#5978](https://github.com/tobymao/sqlglot/pull/5978) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_INSTR function (#5978) - due to [`13cb26e`](https://github.com/tobymao/sqlglot/commit/13cb26e2f29373538d60a8124ddebf95fd22a8d8) - annotate type for Snowflake REGEXP_SUBSTR_ALL function *(PR [#5979](https://github.com/tobymao/sqlglot/pull/5979) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_SUBSTR_ALL function (#5979) - due to [`4ce683e`](https://github.com/tobymao/sqlglot/commit/4ce683eb8ac5716a334cbd7625438b9f89623c7a) - Annotate type for snowflake UNICODE function *(PR [#5993](https://github.com/tobymao/sqlglot/pull/5993) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake UNICODE function (#5993) - due to [`c7657fb`](https://github.com/tobymao/sqlglot/commit/c7657fbd27a4350c424ef65947471ab9ec086831) - remove `unalias_group_by` transformation since it is unsafe *(PR [#5997](https://github.com/tobymao/sqlglot/pull/5997) by [@georgesittas](https://github.com/georgesittas))*: remove `unalias_group_by` transformation since it is unsafe (#5997) - due to [`587196c`](https://github.com/tobymao/sqlglot/commit/587196c9c2d122f73f9deb7e87c2831f27f6ed02) - Annotate type for snowflake STRTOK_TO_ARRAY function *(PR [#5994](https://github.com/tobymao/sqlglot/pull/5994) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake STRTOK_TO_ARRAY function (#5994) - due to [`bced710`](https://github.com/tobymao/sqlglot/commit/bced71084ffb3a8f7a11db843777f05b68f367da) - Annotate type for snowflake STRTOK function. *(PR [#5991](https://github.com/tobymao/sqlglot/pull/5991) by [@georgesittas](https://github.com/georgesittas))*: Annotate type for snowflake STRTOK function. (#5991) - due to [`be1cdc8`](https://github.com/tobymao/sqlglot/commit/be1cdc81b511d462b710b50941d5c2770d901e91) - Fix roundtrip of ~ operator *(PR [#6017](https://github.com/tobymao/sqlglot/pull/6017) by [@VaggelisD](https://github.com/VaggelisD))*: Fix roundtrip of ~ operator (#6017) - due to [`74a13f2`](https://github.com/tobymao/sqlglot/commit/74a13f2a548b9cd41061e835cb3cd9dd2a5a9fb3) - Annotate type for snowflake DIV0 and DIVNULL functions *(PR [#6008](https://github.com/tobymao/sqlglot/pull/6008) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake DIV0 and DIVNULL functions (#6008) - due to [`fec2b31`](https://github.com/tobymao/sqlglot/commit/fec2b31956f2debdad7c53744a577894cd8d747c) - Annotate type for snowflake SEARCH function *(PR [#5985](https://github.com/tobymao/sqlglot/pull/5985) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake SEARCH function (#5985) - due to [`27a76cd`](https://github.com/tobymao/sqlglot/commit/27a76cdfe4212f16f945521eb3997580eacf1d61) - Annotate type for snowflake COT, SIN and TAN functions *(PR [#6022](https://github.com/tobymao/sqlglot/pull/6022) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake COT, SIN and TAN functions (#6022) - due to [`0911276`](https://github.com/tobymao/sqlglot/commit/091127663ab4cb94b02be5aa40c6a46dd7f89243) - annotate type for Snowflake EXP function *(PR [#6007](https://github.com/tobymao/sqlglot/pull/6007) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake EXP function (#6007) - due to [`a96d50e`](https://github.com/tobymao/sqlglot/commit/a96d50e14bed5e87ff2dce9c545e0c48897b64d6) - annotate type for Snowflake COSH function *(PR [#6006](https://github.com/tobymao/sqlglot/pull/6006) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COSH function (#6006) - due to [`4df58e0`](https://github.com/tobymao/sqlglot/commit/4df58e0f0b8985590fb29a8ab6ba0ced987ac5b9) - annotate type for Snowflake DEGREES function *(PR [#6027](https://github.com/tobymao/sqlglot/pull/6027) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DEGREES function (#6027) - due to [`db71a20`](https://github.com/tobymao/sqlglot/commit/db71a2023aaeca2ffda782ae7b91fdee356c402e) - annotate type for Snowflake COS function *(PR [#6028](https://github.com/tobymao/sqlglot/pull/6028) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COS function (#6028) - due to [`5dd2ed3`](https://github.com/tobymao/sqlglot/commit/5dd2ed3c69cf9e8c3e327297e0cc932f0954e108) - bump sqlglotrs to 0.7.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.7.0 - due to [`6beb917`](https://github.com/tobymao/sqlglot/commit/6beb9172dffd0aaea46b75477485060737e774b9) - Annotate type for snowflake ROUND function *(PR [#6032](https://github.com/tobymao/sqlglot/pull/6032) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake ROUND function (#6032) - due to [`0939d69`](https://github.com/tobymao/sqlglot/commit/0939d69223a860581b1c30cc2f762294946b93f3) - move odbc date literal handling in t-sql closes [#6037](https://github.com/tobymao/sqlglot/pull/6037) *(PR [#6044](https://github.com/tobymao/sqlglot/pull/6044) by [@georgesittas](https://github.com/georgesittas))*: move odbc date literal handling in t-sql closes #6037 (#6044) - due to [`56c8b3b`](https://github.com/tobymao/sqlglot/commit/56c8b3bbff7451b9049e1a168716bb41222a86ed) - Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark *(PR [#6004](https://github.com/tobymao/sqlglot/pull/6004) by [@tsamaras](https://github.com/tsamaras))*: Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark (#6004) - due to [`7ac01c2`](https://github.com/tobymao/sqlglot/commit/7ac01c2ae9bc4375efb63c60e3221e85088fdd1f) - bump sqlglotrs to 0.7.1 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.7.1 - due to [`6f31b86`](https://github.com/tobymao/sqlglot/commit/6f31b86599258afe156aa3d9ccc42389cac37021) - Annotate type for snowflake FLOOR function *(PR [#6030](https://github.com/tobymao/sqlglot/pull/6030) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake FLOOR function (#6030) - due to [`cecab2f`](https://github.com/tobymao/sqlglot/commit/cecab2fd66d578ddc765b5fd0e7b155971280a0c) - annotate type for Snowflake ATANH function *(PR [#6054](https://github.com/tobymao/sqlglot/pull/6054) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake ATANH function (#6054) - due to [`08339a9`](https://github.com/tobymao/sqlglot/commit/08339a902138211f67cfb009d2576b22ea8d8e42) - annotate type for Snowflake FACTORIAL function *(PR [#6053](https://github.com/tobymao/sqlglot/pull/6053) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake FACTORIAL function (#6053) - due to [`9060f60`](https://github.com/tobymao/sqlglot/commit/9060f603818db863b7570a2c3c50c3eb88155e76) - Annotate type for snowflake ATAN2 function. *(PR [#6060](https://github.com/tobymao/sqlglot/pull/6060) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake ATAN2 function. (#6060) - due to [`b3eb2e4`](https://github.com/tobymao/sqlglot/commit/b3eb2e4ca6177ee61b27675e8ec8b4815587df31) - annotate type for Snowflake SINH function *(PR [#6052](https://github.com/tobymao/sqlglot/pull/6052) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake SINH function (#6052) - due to [`157d2fa`](https://github.com/tobymao/sqlglot/commit/157d2fa06ab110ebc760aa7567d7fda801a5ced9) - annotate type for Snowflake CEIL function *(PR [#6051](https://github.com/tobymao/sqlglot/pull/6051) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake CEIL function (#6051) - due to [`e7833de`](https://github.com/tobymao/sqlglot/commit/e7833de9744a4aa69d244285e7f6f7281af178ba) - support DELETE with USING and multiple VALUES *(PR [#6072](https://github.com/tobymao/sqlglot/pull/6072) by [@geooo109](https://github.com/geooo109))*: support DELETE with USING and multiple VALUES (#6072) - due to [`354140d`](https://github.com/tobymao/sqlglot/commit/354140d0a279f317439bdb247e1ab9578f9a035d) - Annotate type for snowflake TANH and ATAN functions *(PR [#6069](https://github.com/tobymao/sqlglot/pull/6069) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TANH and ATAN functions (#6069) - due to [`c67276d`](https://github.com/tobymao/sqlglot/commit/c67276d5be970252e14d1817d8498fc9985222d9) - Annotate type for snowflake RADIANS function. *(PR [#6064](https://github.com/tobymao/sqlglot/pull/6064) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake RADIANS function. (#6064) - due to [`2238ac2`](https://github.com/tobymao/sqlglot/commit/2238ac27478bd272ba39928bbec1075c4191ee1b) - transpile timestamp literals in datediff fixes [#6083](https://github.com/tobymao/sqlglot/pull/6083) *(PR [#6086](https://github.com/tobymao/sqlglot/pull/6086) by [@georgesittas](https://github.com/georgesittas))*: transpile timestamp literals in datediff fixes #6083 (#6086) - due to [`c49ba0e`](https://github.com/tobymao/sqlglot/commit/c49ba0eee21f7776703d2a26c6641b4a32a1cff7) - Annotate type for snowflake WIDTH_BUCKET function *(PR [#6078](https://github.com/tobymao/sqlglot/pull/6078) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake WIDTH_BUCKET function (#6078) - due to [`fbc1f13`](https://github.com/tobymao/sqlglot/commit/fbc1f1335eecaaaab4fc93ddbb74611a4df0aea7) - annotate type for Snowflake CONVERT_TIMEZONE function *(PR [#6076](https://github.com/tobymao/sqlglot/pull/6076) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake CONVERT_TIMEZONE function (#6076) - due to [`70e977c`](https://github.com/tobymao/sqlglot/commit/70e977c5edfb495529d38a9096cb40762a9b5d7b) - annotate type for Snowflake DATE_TRUNC function *(PR [#6080](https://github.com/tobymao/sqlglot/pull/6080) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DATE_TRUNC function (#6080) - due to [`e9cf146`](https://github.com/tobymao/sqlglot/commit/e9cf146a4a6cd78f6a59c195e7ec12240b836e5e) - annotate type for Snowflake DATE_PART function *(PR [#6079](https://github.com/tobymao/sqlglot/pull/6079) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DATE_PART function (#6079) - due to [`5109890`](https://github.com/tobymao/sqlglot/commit/510989043d18baa17502a971262462814a2eb5be) - VALUES with ORDER BY/LIMIT/OFFSET *(PR [#6094](https://github.com/tobymao/sqlglot/pull/6094) by [@geooo109](https://github.com/geooo109))*: VALUES with ORDER BY/LIMIT/OFFSET (#6094) - due to [`6fe5824`](https://github.com/tobymao/sqlglot/commit/6fe58247888c326093618657fb027e482d82d107) - Annotate type for hour, minute, second functions *(PR [#6100](https://github.com/tobymao/sqlglot/pull/6100) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for hour, minute, second functions (#6100) - due to [`a4d07a0`](https://github.com/tobymao/sqlglot/commit/a4d07a07eefbdaf88d30df2310a9533afdc75a82) - Annotate type for snowflake EXTRACT function *(PR [#6099](https://github.com/tobymao/sqlglot/pull/6099) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake EXTRACT function (#6099) - due to [`483770b`](https://github.com/tobymao/sqlglot/commit/483770b816fab14b7eb7222974ed2c99045302a7) - Annotate type for snowflake TIME_SLICE function *(PR [#6098](https://github.com/tobymao/sqlglot/pull/6098) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TIME_SLICE function (#6098) - due to [`06f40f9`](https://github.com/tobymao/sqlglot/commit/06f40f900ce693ba4203514e422cba8cda0dbb07) - don't simplify x XOR x due to NULL semantics *(PR [#6115](https://github.com/tobymao/sqlglot/pull/6115) by [@geooo109](https://github.com/geooo109))*: don't simplify x XOR x due to NULL semantics (#6115) - due to [`c286cee`](https://github.com/tobymao/sqlglot/commit/c286cee54ab93e1fd0b3be658f7e767e3e00afe9) - Annotate type for snowflake MONTHNAME function *(PR [#6116](https://github.com/tobymao/sqlglot/pull/6116) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake MONTHNAME function (#6116) - due to [`1a34788`](https://github.com/tobymao/sqlglot/commit/1a34788025bdd8a018c4bb9214f72152e68bdd14) - Annotate type for snowflake PREVIOUS_DAY function *(PR [#6117](https://github.com/tobymao/sqlglot/pull/6117) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake PREVIOUS_DAY function (#6117) - due to [`533faf8`](https://github.com/tobymao/sqlglot/commit/533faf87b6df351070b565dd1fe9ce4e13b6c46e) - transpile duckdb `READ_PARQUET` to `parquet.` closes [#6122](https://github.com/tobymao/sqlglot/pull/6122) *(commit by [@georgesittas](https://github.com/georgesittas))*: transpile duckdb `READ_PARQUET` to `parquet.` closes #6122 - due to [`cd4e557`](https://github.com/tobymao/sqlglot/commit/cd4e557658b1384f36c9a1ef9da5a09b893229b1) - Annotate type for snowflake RANDOM function *(PR [#6124](https://github.com/tobymao/sqlglot/pull/6124) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Annotate type for snowflake RANDOM function (#6124) - due to [`fe63d84`](https://github.com/tobymao/sqlglot/commit/fe63d84f1bd365b22221f348d79c0546aa3118b0) - annotate type for Snowflake MONTHS_BETWEEN function *(PR [#6120](https://github.com/tobymao/sqlglot/pull/6120) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for Snowflake MONTHS_BETWEEN function (#6120) - due to [`598d09b`](https://github.com/tobymao/sqlglot/commit/598d09b036d938c90a44955d67175ea868090ba2) - annotate type for Snowflake DATEADD function *(PR [#6089](https://github.com/tobymao/sqlglot/pull/6089) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DATEADD function (#6089) - due to [`b98bcee`](https://github.com/tobymao/sqlglot/commit/b98bcee148ba426816e166dbfa9ba8e0979aae21) - Annotate type for snowflake next_day function *(PR [#6125](https://github.com/tobymao/sqlglot/pull/6125) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Annotate type for snowflake next_day function (#6125) - due to [`e2129c6`](https://github.com/tobymao/sqlglot/commit/e2129c6766ca1f10ff6663bec98be984abb33c91) - Do not consider BIT_COUNT an aggregate function *(PR [#6135](https://github.com/tobymao/sqlglot/pull/6135) by [@VaggelisD](https://github.com/VaggelisD))*: Do not consider BIT_COUNT an aggregate function (#6135) - due to [`d136414`](https://github.com/tobymao/sqlglot/commit/d136414e520270ac9ab2fd8e9df4691d269b3af0) - avoid simplifying AND with NULL *(PR [#6148](https://github.com/tobymao/sqlglot/pull/6148) by [@geooo109](https://github.com/geooo109))*: avoid simplifying AND with NULL (#6148) - due to [`3a334f3`](https://github.com/tobymao/sqlglot/commit/3a334f376b9766b6b99fdf195ae763bb44976ec4) - annotate type for boolnot snowflake function *(PR [#6141](https://github.com/tobymao/sqlglot/pull/6141) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for boolnot snowflake function (#6141) - due to [`99949cc`](https://github.com/tobymao/sqlglot/commit/99949ccd3ff81b524edeae437d874b86250dbb5b) - avoid needlessly copying in lineage *(PR [#6150](https://github.com/tobymao/sqlglot/pull/6150) by [@georgesittas](https://github.com/georgesittas))*: avoid needlessly copying in lineage (#6150) - due to [`4e36f9d`](https://github.com/tobymao/sqlglot/commit/4e36f9dd6a854b378c9bbf6b2e9811045affc63d) - Annotate type for snowflake TIMEADD function *(PR [#6134](https://github.com/tobymao/sqlglot/pull/6134) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TIMEADD function (#6134) - due to [`5242cdd`](https://github.com/tobymao/sqlglot/commit/5242cddf487e367e7f543ca19d9bccae858f36ac) - annotate type for bq LENGTH *(commit by [@geooo109](https://github.com/geooo109))*: annotate type for bq LENGTH - due to [`0fc6dbf`](https://github.com/tobymao/sqlglot/commit/0fc6dbf2e7b611fa0977e3c3e61be1cc84bcf4a9) - add GREATEST_IGNORE_NULLS function support *(PR [#6161](https://github.com/tobymao/sqlglot/pull/6161) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add GREATEST_IGNORE_NULLS function support (#6161) - due to [`d382a31`](https://github.com/tobymao/sqlglot/commit/d382a3106d5ce2e9b75527aacd4a37d1f8e16d18) - simplify double negation only if the inner expr is BOOLEAN *(PR [#6151](https://github.com/tobymao/sqlglot/pull/6151) by [@geooo109](https://github.com/geooo109))*: simplify double negation only if the inner expr is BOOLEAN (#6151) - due to [`bcf6c89`](https://github.com/tobymao/sqlglot/commit/bcf6c89a47abd3c2c4383d1c908f892b6619b6fa) - add type annotation tests for snowflake BOOLAND *(PR [#6153](https://github.com/tobymao/sqlglot/pull/6153) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: add type annotation tests for snowflake BOOLAND (#6153) - due to [`52d1eec`](https://github.com/tobymao/sqlglot/commit/52d1eecaad505703e8b22dcfe8954652f57985b6) - Annotate type for snowflake TIMESTAMP_FROM_PARTS function *(PR [#6139](https://github.com/tobymao/sqlglot/pull/6139) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TIMESTAMP_FROM_PARTS function (#6139) - due to [`8651fe6`](https://github.com/tobymao/sqlglot/commit/8651fe6526dea865c0d54d6d53086359a7835d32) - annotate types for BOOLOR *(PR [#6159](https://github.com/tobymao/sqlglot/pull/6159) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BOOLOR (#6159) - due to [`812ba9a`](https://github.com/tobymao/sqlglot/commit/812ba9abad8247df81c8f8b514336c8766292112) - Annotate type for snowflake date parts functions *(PR [#6158](https://github.com/tobymao/sqlglot/pull/6158) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: Annotate type for snowflake date parts functions (#6158) - due to [`9f8c123`](https://github.com/tobymao/sqlglot/commit/9f8c123ae44249e274334d0aa551ac33814f2b32) - make qualify table callback more generic *(PR [#6171](https://github.com/tobymao/sqlglot/pull/6171) by [@tobymao](https://github.com/tobymao))*: make qualify table callback more generic (#6171) - due to [`74b4e7c`](https://github.com/tobymao/sqlglot/commit/74b4e7c311e9d4ff39ce2e4d91940eced96aa32f) - fix type annotation for Snowflake BOOLOR and BOOLAND *(PR [#6169](https://github.com/tobymao/sqlglot/pull/6169) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: fix type annotation for Snowflake BOOLOR and BOOLAND (#6169) - due to [`ef87520`](https://github.com/tobymao/sqlglot/commit/ef875204596b8529f3358025c7a61d757a999bdc) - Transpile `REGEXP_REPLACE` with 'g' option *(PR [#6174](https://github.com/tobymao/sqlglot/pull/6174) by [@VaggelisD](https://github.com/VaggelisD))*: Transpile `REGEXP_REPLACE` with 'g' option (#6174) - due to [`93071e2`](https://github.com/tobymao/sqlglot/commit/93071e255406f62ea83dd89a3be4871b7edfb3fe) - Fix simplify_parens from removing negated *(PR [#6194](https://github.com/tobymao/sqlglot/pull/6194) by [@VaggelisD](https://github.com/VaggelisD))*: Fix simplify_parens from removing negated (#6194) - due to [`e90168a`](https://github.com/tobymao/sqlglot/commit/e90168a6829b85534edcecec7d0df2a8b1b56fc4) - annotate type for Snowflake's `IS_NULL_VALUE` function *(PR [#6186](https://github.com/tobymao/sqlglot/pull/6186) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotate type for Snowflake's `IS_NULL_VALUE` function (#6186) - due to [`c93b535`](https://github.com/tobymao/sqlglot/commit/c93b5354827282c806899c36b11e7a7598e96e38) - annotate type for LEAST_IGNORE_NULLS *(PR [#6196](https://github.com/tobymao/sqlglot/pull/6196) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotate type for LEAST_IGNORE_NULLS (#6196) - due to [`f60c71f`](https://github.com/tobymao/sqlglot/commit/f60c71fb03db91bfe90430d032ac16f4945d5dff) - annotate types for REGR_VALX *(PR [#6198](https://github.com/tobymao/sqlglot/pull/6198) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for REGR_VALX (#6198) - due to [`b82c571`](https://github.com/tobymao/sqlglot/commit/b82c57131707297abe174539023b9cb62b7cd6c7) - annotate types for REGR_VALY *(PR [#6206](https://github.com/tobymao/sqlglot/pull/6206) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for REGR_VALY (#6206) - due to [`39d8e19`](https://github.com/tobymao/sqlglot/commit/39d8e19419c2adbb80465be414d1cc3bbc6d007b) - include VARIABLE kind in SET transpilation to DuckDB *(PR [#6201](https://github.com/tobymao/sqlglot/pull/6201) by [@toriwei](https://github.com/toriwei))*: include VARIABLE kind in SET transpilation to DuckDB (#6201) - due to [`e7ddad1`](https://github.com/tobymao/sqlglot/commit/e7ddad10b5edf9b801d2151e3e5fca448754df0d) - ensure `NULL` coerces into any type *(PR [#6211](https://github.com/tobymao/sqlglot/pull/6211) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: ensure `NULL` coerces into any type (#6211) - due to [`0037266`](https://github.com/tobymao/sqlglot/commit/00372664bf6acf2b0fff9ad4b206b597ef5378f7) - annotate types for GETBIT *(PR [#6219](https://github.com/tobymao/sqlglot/pull/6219) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for GETBIT (#6219) - due to [`a5458ce`](https://github.com/tobymao/sqlglot/commit/a5458ceca3bc239fb611791e38020632dd0824c8) - add type annotation for DECODE function support *(PR [#6199](https://github.com/tobymao/sqlglot/pull/6199) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for DECODE function support (#6199) - due to [`417f1e8`](https://github.com/tobymao/sqlglot/commit/417f1e8ee50fb8f4377fad261660ffbd7444a429) - annotate types for BITNOT *(PR [#6234](https://github.com/tobymao/sqlglot/pull/6234) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITNOT (#6234) - due to [`fe8ab40`](https://github.com/tobymao/sqlglot/commit/fe8ab40e8e0559201e0b1896a6f1a8fb6b5b932d) - 1st-class parsing support for BITAND, BIT_AND, BIT_NOT *(PR [#6243](https://github.com/tobymao/sqlglot/pull/6243) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: 1st-class parsing support for BITAND, BIT_AND, BIT_NOT (#6243) - due to [`5ae3c47`](https://github.com/tobymao/sqlglot/commit/5ae3c47b1c6993b87341472c08714f4a0f738168) - add type annotation for GROUPING() function *(PR [#6244](https://github.com/tobymao/sqlglot/pull/6244) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for GROUPING() function (#6244) - due to [`4133265`](https://github.com/tobymao/sqlglot/commit/413326514507ef06537dcc3d4b80a3fcbcd26f66) - parse `has` function into an `ArrayContains` expression *(PR [#6245](https://github.com/tobymao/sqlglot/pull/6245) by [@joeyutong](https://github.com/joeyutong))*: parse `has` function into an `ArrayContains` expression (#6245) - due to [`cdd45b9`](https://github.com/tobymao/sqlglot/commit/cdd45b949fd1eefb147053424279b56b8effcbcf) - annotate types for GROUPING_ID function. *(PR [#6249](https://github.com/tobymao/sqlglot/pull/6249) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotate types for GROUPING_ID function. (#6249) - due to [`080ff3b`](https://github.com/tobymao/sqlglot/commit/080ff3bd93b36291d5bb0092d722f8307f0ae082) - annotate types for BITAND_AGG *(PR [#6248](https://github.com/tobymao/sqlglot/pull/6248) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITAND_AGG (#6248) - due to [`87a818a`](https://github.com/tobymao/sqlglot/commit/87a818a899f61a675c22c697f468b3f6f7e2787f) - annotate types for BITOR_AGG *(PR [#6251](https://github.com/tobymao/sqlglot/pull/6251) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITOR_AGG (#6251) - due to [`4c4189b`](https://github.com/tobymao/sqlglot/commit/4c4189b4083d272a6e678d83b5c567a2e9c0d672) - Transpile CONCAT function to double pipe operators when source … *(PR [#6241](https://github.com/tobymao/sqlglot/pull/6241) by [@vchan](https://github.com/vchan))*: Transpile CONCAT function to double pipe operators when source … (#6241) - due to [`a1b884d`](https://github.com/tobymao/sqlglot/commit/a1b884dc9ddfd2185de48cc9451a39f152879d39) - annotate types for BITXOR_AGG *(PR [#6253](https://github.com/tobymao/sqlglot/pull/6253) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITXOR_AGG (#6253) - due to [`fc78d20`](https://github.com/tobymao/sqlglot/commit/fc78d2016d8f7d20c094df791f746de323cd3639) - Unwrap subqueries without modifiers *(PR [#6247](https://github.com/tobymao/sqlglot/pull/6247) by [@VaggelisD](https://github.com/VaggelisD))*: Unwrap subqueries without modifiers (#6247) - due to [`ad2ad23`](https://github.com/tobymao/sqlglot/commit/ad2ad234b5a508040dce4f3920439be052742573) - add missing return type mapping for MAX_BY and MAX_BY function *(PR [#6250](https://github.com/tobymao/sqlglot/pull/6250) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add missing return type mapping for MAX_BY and MAX_BY function (#6250) - due to [`39c1d81`](https://github.com/tobymao/sqlglot/commit/39c1d81174f2390b6b0c9dd14c0e550ad452a1df) - annotate types for BOOLXOR_AGG *(PR [#6261](https://github.com/tobymao/sqlglot/pull/6261) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BOOLXOR_AGG (#6261) - due to [`71590d2`](https://github.com/tobymao/sqlglot/commit/71590d22cdb05594e2173a1500f763dc1a32a81d) - add type annotation for SKEW function. *(PR [#6262](https://github.com/tobymao/sqlglot/pull/6262) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for SKEW function. (#6262) - due to [`5fd366d`](https://github.com/tobymao/sqlglot/commit/5fd366d9e6f7b3f1eb7a9cf41975cf13ce890ffe) - annotate types for OBJECT_AGG *(PR [#6265](https://github.com/tobymao/sqlglot/pull/6265) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for OBJECT_AGG (#6265) - due to [`00abc39`](https://github.com/tobymao/sqlglot/commit/00abc393c9042e839457c5a6582e95cdb74356f3) - handle casting for bytestrings *(PR [#6252](https://github.com/tobymao/sqlglot/pull/6252) by [@toriwei](https://github.com/toriwei))*: handle casting for bytestrings (#6252) - due to [`3dae0fb`](https://github.com/tobymao/sqlglot/commit/3dae0fbb528762e5d5fd446350d42e9c841e2959) - Support position and occurrence args for REGEXP_EXTRACT *(PR [#6266](https://github.com/tobymao/sqlglot/pull/6266) by [@vchan](https://github.com/vchan))*: Support position and occurrence args for REGEXP_EXTRACT (#6266) - due to [`ddea61d`](https://github.com/tobymao/sqlglot/commit/ddea61d83f6699c97cc7b25aabe01a138138bdb1) - simplify connector complements only for non-null operands *(PR [#6214](https://github.com/tobymao/sqlglot/pull/6214) by [@geooo109](https://github.com/geooo109))*: simplify connector complements only for non-null operands (#6214) - due to [`771732d`](https://github.com/tobymao/sqlglot/commit/771732d81459cc576f11eccc49794f33e62d14af) - annotate types for REGR_AVGY *(PR [#6271](https://github.com/tobymao/sqlglot/pull/6271) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for REGR_AVGY (#6271) - due to [`8470be0`](https://github.com/tobymao/sqlglot/commit/8470be00731a4d79518a533a5f7ba884fa2f047e) - add type annotation for BITMAP_COUNT function. *(PR [#6274](https://github.com/tobymao/sqlglot/pull/6274) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for BITMAP_COUNT function. (#6274) - due to [`98f25f9`](https://github.com/tobymao/sqlglot/commit/98f25f92cc1175ac7b2118a5a342db82adade13a) - support splitBy function *(PR [#6278](https://github.com/tobymao/sqlglot/pull/6278) by [@joeyutong](https://github.com/joeyutong))*: support splitBy function (#6278) - due to [`fabbf05`](https://github.com/tobymao/sqlglot/commit/fabbf057aba88f30205767d8c339727de45991c8) - Add support for shorthand struct array literals in duckDB. *(PR [#6233](https://github.com/tobymao/sqlglot/pull/6233) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add support for shorthand struct array literals in duckDB. (#6233) - due to [`c02b64c`](https://github.com/tobymao/sqlglot/commit/c02b64c3524dd074c2108baaca668ab2607ac843) - Handle pseudocolumns differently than columns *(PR [#6273](https://github.com/tobymao/sqlglot/pull/6273) by [@VaggelisD](https://github.com/VaggelisD))*: Handle pseudocolumns differently than columns (#6273) - due to [`05c5181`](https://github.com/tobymao/sqlglot/commit/05c5181b36a7ada32b96fc91bdfbf73b38a1a408) - refactor `Connector` simplification to factor in types *(PR [#6152](https://github.com/tobymao/sqlglot/pull/6152) by [@geooo109](https://github.com/geooo109))*: refactor `Connector` simplification to factor in types (#6152) - due to [`9c1a222`](https://github.com/tobymao/sqlglot/commit/9c1a2221b0327ba6848542c7b906e92f25a05bea) - add type annotation for BITMAP_CONSTRUCT_AGG function. *(PR [#6285](https://github.com/tobymao/sqlglot/pull/6285) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for BITMAP_CONSTRUCT_AGG function. (#6285) - due to [`cb0bcff`](https://github.com/tobymao/sqlglot/commit/cb0bcff310e9acdf806fc98e99cb9938b747c771) - cast UUID() output to varchar when source dialect UUID() returns string *(PR [#6284](https://github.com/tobymao/sqlglot/pull/6284) by [@toriwei](https://github.com/toriwei))*: cast UUID() output to varchar when source dialect UUID() returns string (#6284) - due to [`358105d`](https://github.com/tobymao/sqlglot/commit/358105d1296c7425e071ccf3189a31a02c00c923) - type annotation for BITMAP_BIT_POSITION function *(PR [#6301](https://github.com/tobymao/sqlglot/pull/6301) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: type annotation for BITMAP_BIT_POSITION function (#6301) - due to [`4ee7a50`](https://github.com/tobymao/sqlglot/commit/4ee7a500cc460b6f6a1ed103a12dca72e6d01c18) - type inference for BITMAP_OR_AGG *(PR [#6297](https://github.com/tobymao/sqlglot/pull/6297) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: type inference for BITMAP_OR_AGG (#6297) - due to [`fcd537d`](https://github.com/tobymao/sqlglot/commit/fcd537de2c993ad0bd18acd84dbae354165f7d3f) - conflict resolution. type annotation for BITMAP_BUCKET_NUMBER function. Tests added all dialects that support BITMAP_BUCKET_NUMBER *(PR [#6299](https://github.com/tobymao/sqlglot/pull/6299) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: conflict resolution. type annotation for BITMAP_BUCKET_NUMBER function. Tests added all dialects that support BITMAP_BUCKET_NUMBER (#6299) - due to [`3dffd59`](https://github.com/tobymao/sqlglot/commit/3dffd598496a9f2d94caec9d7f3dcb9791c94019) - annotate types for PERCENTILE_DISC and WithinGroup *(PR [#6300](https://github.com/tobymao/sqlglot/pull/6300) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for PERCENTILE_DISC and WithinGroup (#6300) - due to [`f9287f7`](https://github.com/tobymao/sqlglot/commit/f9287f7d596a6d8a1e1cd2c48978a4dec77a96cb) - robust deduplication of connectors *(PR [#6296](https://github.com/tobymao/sqlglot/pull/6296) by [@geooo109](https://github.com/geooo109))*: robust deduplication of connectors (#6296) - due to [`ea0ea79`](https://github.com/tobymao/sqlglot/commit/ea0ea79c1c611b62c79f82f744fe0c98803598a3) - Parse `LIKE` functions *(PR [#6314](https://github.com/tobymao/sqlglot/pull/6314) by [@VaggelisD](https://github.com/VaggelisD))*: Parse `LIKE` functions (#6314) - due to [`e903883`](https://github.com/tobymao/sqlglot/commit/e90388328fcf5b8061c99e325b87d5beb0046ffc) - type annotation for APPROX_TOP_K_ACCUMULATE functio… *(PR [#6309](https://github.com/tobymao/sqlglot/pull/6309) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: type annotation for APPROX_TOP_K_ACCUMULATE functio… (#6309) - due to [`d3fefad`](https://github.com/tobymao/sqlglot/commit/d3fefad80d25ff5a6dd02426667ff0ea8478a1b2) - support `DATEDIFF_BIG` *(PR [#6323](https://github.com/tobymao/sqlglot/pull/6323) by [@lBilali](https://github.com/lBilali))*: support `DATEDIFF_BIG` (#6323) - due to [`21d1468`](https://github.com/tobymao/sqlglot/commit/21d1468377b9c8ad48c6cca1ae3b3744a807c29e) - annotate type for APPROX_TOP_K *(PR [#6286](https://github.com/tobymao/sqlglot/pull/6286) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for APPROX_TOP_K (#6286) - due to [`85ddcc5`](https://github.com/tobymao/sqlglot/commit/85ddcc5eca22ac726582de454f2f12b9d4877634) - Do not normalize JSON fields in dot notation *(PR [#6320](https://github.com/tobymao/sqlglot/pull/6320) by [@VaggelisD](https://github.com/VaggelisD))*: Do not normalize JSON fields in dot notation (#6320) - due to [`933e981`](https://github.com/tobymao/sqlglot/commit/933e98102fb39d24ae0350da13337d981287130a) - more robust NULL reduction *(PR [#6327](https://github.com/tobymao/sqlglot/pull/6327) by [@geooo109](https://github.com/geooo109))*: more robust NULL reduction (#6327) - due to [`e4d1a4f`](https://github.com/tobymao/sqlglot/commit/e4d1a4fcd6741d679c5444bf023077d2aaa8f980) - map date/timestamp `TRUNC` to `DATE_TRUNC` *(PR [#6328](https://github.com/tobymao/sqlglot/pull/6328) by [@nnamdi16](https://github.com/nnamdi16))*: map date/timestamp `TRUNC` to `DATE_TRUNC` (#6328) - due to [`e1b6558`](https://github.com/tobymao/sqlglot/commit/e1b6558cb1a860bbd695f25b66e52064b57c0a84) - handle all datepart alternatives *(PR [#6324](https://github.com/tobymao/sqlglot/pull/6324) by [@lBilali](https://github.com/lBilali))*: handle all datepart alternatives (#6324) - due to [`06daa47`](https://github.com/tobymao/sqlglot/commit/06daa47dedebac672548e1db230b89f5c9eae84e) - update annotated type of ARRAY_AGG to untyped array *(PR [#6347](https://github.com/tobymao/sqlglot/pull/6347) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: update annotated type of ARRAY_AGG to untyped array (#6347) - due to [`7484c06`](https://github.com/tobymao/sqlglot/commit/7484c06be4534cd22dee14da542d5e29ff2c13a2) - Support rounding mode argument for ROUND function *(PR [#6350](https://github.com/tobymao/sqlglot/pull/6350) by [@vchan](https://github.com/vchan))*: Support rounding mode argument for ROUND function (#6350) - due to [`c495a40`](https://github.com/tobymao/sqlglot/commit/c495a40ee4c1a69b14892e8455ae1bd2ceb5ea4f) - annotate type for MINHASH *(PR [#6355](https://github.com/tobymao/sqlglot/pull/6355) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for MINHASH (#6355) - due to [`b1f9a97`](https://github.com/tobymao/sqlglot/commit/b1f9a976be3c0bcd895bef5bcdb95a013eeb28b7) - annotate type for APPROXIMATE_SIMILARITY *(PR [#6360](https://github.com/tobymao/sqlglot/pull/6360) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for APPROXIMATE_SIMILARITY (#6360) - due to [`3aafca7`](https://github.com/tobymao/sqlglot/commit/3aafca74546b932cea93ed830c021f347ae03ded) - annotate type for MINHASH_COMBINE *(PR [#6362](https://github.com/tobymao/sqlglot/pull/6362) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for MINHASH_COMBINE (#6362) - due to [`df13a65`](https://github.com/tobymao/sqlglot/commit/df13a655646bd2ef5d8b4613670bb5fe48845b73) - unnest deep stuff *(PR [#6366](https://github.com/tobymao/sqlglot/pull/6366) by [@tobymao](https://github.com/tobymao))*: unnest deep stuff (#6366) - due to [`d4c2256`](https://github.com/tobymao/sqlglot/commit/d4c2256fb493ed2f16c29694ae5c31517123d419) - at time zone precedence *(PR [#6383](https://github.com/tobymao/sqlglot/pull/6383) by [@geooo109](https://github.com/geooo109))*: at time zone precedence (#6383) - due to [`4fb4d08`](https://github.com/tobymao/sqlglot/commit/4fb4d08ef8896bda434d4f89c21c669c6146fd02) - properly support table alias in the `INSERT` DML *(PR [#6374](https://github.com/tobymao/sqlglot/pull/6374) by [@snovik75](https://github.com/snovik75))*: properly support table alias in the `INSERT` DML (#6374) - due to [`bf07abd`](https://github.com/tobymao/sqlglot/commit/bf07abd4ee9eb0f5510cb7d1f232bdcaea88941e) - annotation support for APPROX_TOP_K_COMBINE *(PR [#6378](https://github.com/tobymao/sqlglot/pull/6378) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_TOP_K_COMBINE (#6378) - due to [`50348ac`](https://github.com/tobymao/sqlglot/commit/50348ac31f784aa97bd09d5d6c6613fbd68402ee) - support order by clause for mysql delete statement *(PR [#6381](https://github.com/tobymao/sqlglot/pull/6381) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support order by clause for mysql delete statement (#6381) - due to [`21d3859`](https://github.com/tobymao/sqlglot/commit/21d38590fec6cb55a1a03aeb2621bd9fca677496) - Disable STRING_AGG sep canonicalization *(PR [#6395](https://github.com/tobymao/sqlglot/pull/6395) by [@VaggelisD](https://github.com/VaggelisD))*: Disable STRING_AGG sep canonicalization (#6395) - due to [`95727f6`](https://github.com/tobymao/sqlglot/commit/95727f60d601796b34c850dee9366d79f6e4a24b) - canonicalize table aliases *(PR [#6369](https://github.com/tobymao/sqlglot/pull/6369) by [@georgesittas](https://github.com/georgesittas))*: canonicalize table aliases (#6369) - due to [`c7cb098`](https://github.com/tobymao/sqlglot/commit/c7cb0983a0fa463c43d2c4ee925816e9a1628c79) - Fix underscore separator with scientific notation *(PR [#6401](https://github.com/tobymao/sqlglot/pull/6401) by [@VaggelisD](https://github.com/VaggelisD))*: Fix underscore separator with scientific notation (#6401) - due to [`bb4eda1`](https://github.com/tobymao/sqlglot/commit/bb4eda1beb68b92de9ab014a63c67797a07df2fa) - support transpiling SHA1 from BigQuery to DuckDB *(PR [#6404](https://github.com/tobymao/sqlglot/pull/6404) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpiling SHA1 from BigQuery to DuckDB (#6404) - due to [`d038ad7`](https://github.com/tobymao/sqlglot/commit/d038ad7f036a140f3eae4bdde15824437d4e44ee) - support named primary keys for mysql *(PR [#6389](https://github.com/tobymao/sqlglot/pull/6389) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support named primary keys for mysql (#6389) - due to [`05e83b5`](https://github.com/tobymao/sqlglot/commit/05e83b56f1bf9323cfa819a7f1beb542524c1219) - support transpilation of LEAST from BigQuery to DuckDB *(PR [#6415](https://github.com/tobymao/sqlglot/pull/6415) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of LEAST from BigQuery to DuckDB (#6415) - due to [`4f3bb0d`](https://github.com/tobymao/sqlglot/commit/4f3bb0d6714bf89ff72e13e1398d8f01cefafb00) - Correct transpilation of BigQuery's JSON_EXTRACT_SCALAR… *(PR [#6414](https://github.com/tobymao/sqlglot/pull/6414) by [@vchan](https://github.com/vchan))*: Correct transpilation of BigQuery's JSON_EXTRACT_SCALAR… (#6414) - due to [`8c314a8`](https://github.com/tobymao/sqlglot/commit/8c314a8b457a5c3ed470ac8fcff022fec881c248) - support cte pivot for duckdb *(PR [#6413](https://github.com/tobymao/sqlglot/pull/6413) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support cte pivot for duckdb (#6413) - due to [`c6b0a63`](https://github.com/tobymao/sqlglot/commit/c6b0a6342a21d79635a26d40001c916d05d47cf7) - change version to be a tuple so that it can be pickled, also simpler *(commit by [@tobymao](https://github.com/tobymao))*: change version to be a tuple so that it can be pickled, also simpler - due to [`07d9958`](https://github.com/tobymao/sqlglot/commit/07d99583b4aebdc682bb7604ccdf45bddb89f9c3) - replace direct comparison with dialect properties *(PR [#6398](https://github.com/tobymao/sqlglot/pull/6398) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: replace direct comparison with dialect properties (#6398) - due to [`38472ce`](https://github.com/tobymao/sqlglot/commit/38472ce14bce731ba4c309d515223ae99e2575ac) - transpile bigquery's %x format literal *(PR [#6375](https://github.com/tobymao/sqlglot/pull/6375) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpile bigquery's %x format literal (#6375) - due to [`92ee124`](https://github.com/tobymao/sqlglot/commit/92ee1241ea3088d4e63c094404252339c54ad0c1) - postgres qualify GENERATE_SERIES and table projection *(PR [#6373](https://github.com/tobymao/sqlglot/pull/6373) by [@geooo109](https://github.com/geooo109))*: postgres qualify GENERATE_SERIES and table projection (#6373) - due to [`0b9d8ac`](https://github.com/tobymao/sqlglot/commit/0b9d8acbe75457424436e8c0acc047ab66e9fdc0) - Annotate type for snowflake MAX function *(PR [#6422](https://github.com/tobymao/sqlglot/pull/6422) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake MAX function (#6422) - due to [`68e9414`](https://github.com/tobymao/sqlglot/commit/68e9414725a60b2842d870fa222d8466057a94f6) - Annotate type for snowflake MIN function *(PR [#6427](https://github.com/tobymao/sqlglot/pull/6427) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Annotate type for snowflake MIN function (#6427) - due to [`1318de7`](https://github.com/tobymao/sqlglot/commit/1318de77a8aa514ec7eb9f9b8c03228e3f8eb008) - Annotate type for snowflake NORMAL *(PR [#6434](https://github.com/tobymao/sqlglot/pull/6434) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake NORMAL (#6434) - due to [`ffbb5c7`](https://github.com/tobymao/sqlglot/commit/ffbb5c7e40aa064ffcd4827e96ea66cfd045118e) - annotate type for HASH_AGG in Snowflake *(PR [#6438](https://github.com/tobymao/sqlglot/pull/6438) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for HASH_AGG in Snowflake (#6438) - due to [`161255f`](https://github.com/tobymao/sqlglot/commit/161255f6c90b9c3ed2074e734f6d074db1d7a6dd) - Add support for `LOCALTIME` function *(PR [#6443](https://github.com/tobymao/sqlglot/pull/6443) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for `LOCALTIME` function (#6443) - due to [`ca329f0`](https://github.com/tobymao/sqlglot/commit/ca329f037a230c315437d830638b514190764c5a) - support transpilation of SHA256 from bigquery to duckdb *(PR [#6421](https://github.com/tobymao/sqlglot/pull/6421) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of SHA256 from bigquery to duckdb (#6421) - due to [`e18ae24`](https://github.com/tobymao/sqlglot/commit/e18ae248423dbbca78a24a60ea0193da2ee7f68c) - Annotate type for snowflake REGR_SLOPE function *(PR [#6425](https://github.com/tobymao/sqlglot/pull/6425) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake REGR_SLOPE function (#6425) - due to [`1d847f0`](https://github.com/tobymao/sqlglot/commit/1d847f0a1f88fce5df340ab646a72c8abbc12a86) - parse & annotate `CHECK_JSON`, `CHECK_XML` *(PR [#6439](https://github.com/tobymao/sqlglot/pull/6439) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: parse & annotate `CHECK_JSON`, `CHECK_XML` (#6439) - due to [`cb3080d`](https://github.com/tobymao/sqlglot/commit/cb3080d4bed18b1bfbbd08380ed60deeefd15530) - annotation support for APPROX_TOP_K_ESTIMATE . Return type ARRAY *(PR [#6445](https://github.com/tobymao/sqlglot/pull/6445) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_TOP_K_ESTIMATE . Return type ARRAY (#6445) - due to [`313afe5`](https://github.com/tobymao/sqlglot/commit/313afe540aa2cdc4cc179c4852c6ef37362bcb3e) - annotate type for snowflake func ARRAY_UNION_AGG *(PR [#6446](https://github.com/tobymao/sqlglot/pull/6446) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func ARRAY_UNION_AGG (#6446) - due to [`cd9f037`](https://github.com/tobymao/sqlglot/commit/cd9f037882eef253e86fdb1d51521e0acd7db3f9) - store pk name if provided *(PR [#6424](https://github.com/tobymao/sqlglot/pull/6424) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: store pk name if provided (#6424) - due to [`65194e4`](https://github.com/tobymao/sqlglot/commit/65194e465489151aa51859a6e3f5672f7d4c5f3b) - Annotate type for snowflake RANDSTR function *(PR [#6436](https://github.com/tobymao/sqlglot/pull/6436) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake RANDSTR function (#6436) - due to [`a56262e`](https://github.com/tobymao/sqlglot/commit/a56262e6b4276baae144855478807c173db77ab9) - Annotate type for snowflake MEDIAN *(PR [#6426](https://github.com/tobymao/sqlglot/pull/6426) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Annotate type for snowflake MEDIAN (#6426) - due to [`2c56567`](https://github.com/tobymao/sqlglot/commit/2c56567755c8a6571d8b7d410c9de943e54df58b) - Annotate type for snowflake SEARCH_IP *(PR [#6440](https://github.com/tobymao/sqlglot/pull/6440) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake SEARCH_IP (#6440) - due to [`ac86568`](https://github.com/tobymao/sqlglot/commit/ac86568a939f692b99813da100297b61fb54e044) - Added decfloat type *(PR [#6444](https://github.com/tobymao/sqlglot/pull/6444) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Added decfloat type (#6444) - due to [`b321ca6`](https://github.com/tobymao/sqlglot/commit/b321ca6191fefc88da1a6de83a465886b5754b7a) - bump sqlglotrs to 0.8.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.8.0 - due to [`ebe718a`](https://github.com/tobymao/sqlglot/commit/ebe718a72d5b5871a8d6e67754ff50e873d55b41) - Add support for format elements used in date/time functions like FORMAT_DATETIME *(PR [#6428](https://github.com/tobymao/sqlglot/pull/6428) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add support for format elements used in date/time functions like FORMAT_DATETIME (#6428) - due to [`c111f64`](https://github.com/tobymao/sqlglot/commit/c111f643d61064280024b4cc5c0fc250581fbe55) - annotation support for APPROX_PERCENTILE_ACCUMULATE *(PR [#6455](https://github.com/tobymao/sqlglot/pull/6455) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_PERCENTILE_ACCUMULATE (#6455) - due to [`f305305`](https://github.com/tobymao/sqlglot/commit/f305305e5cf3ef45afba822542aebeb944c00e0b) - Annotate types for BigQuery's AVG function *(PR [#6459](https://github.com/tobymao/sqlglot/pull/6459) by [@vchan](https://github.com/vchan))*: Annotate types for BigQuery's AVG function (#6459) - due to [`910349f`](https://github.com/tobymao/sqlglot/commit/910349f3c30af59ce1820e48cae0cbb77539877d) - Annotate types for BigQuery's SAFE_DIVIDE function *(PR [#6464](https://github.com/tobymao/sqlglot/pull/6464) by [@vchan](https://github.com/vchan))*: Annotate types for BigQuery's SAFE_DIVIDE function (#6464) - due to [`5e75621`](https://github.com/tobymao/sqlglot/commit/5e75621e90defd50076383485f6a4689a8c551ac) - annotate type for snowflake func ARRAY_UNIQUE_AGG *(PR [#6465](https://github.com/tobymao/sqlglot/pull/6465) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func ARRAY_UNIQUE_AGG (#6465) - due to [`94d46b8`](https://github.com/tobymao/sqlglot/commit/94d46b8eafd5abe252407d2bbe306ca579a29b20) - annotation support for APPROX_PERCENTILE_ESTIMATE. Return type DOUBLE *(PR [#6461](https://github.com/tobymao/sqlglot/pull/6461) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_PERCENTILE_ESTIMATE. Return type DOUBLE (#6461) - due to [`2ac30b0`](https://github.com/tobymao/sqlglot/commit/2ac30b08bd663bbaf00ae075c4db0c3d27ab6640) - annotation support for APPROX_PERCENTILE_COMBINE *(PR [#6460](https://github.com/tobymao/sqlglot/pull/6460) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_PERCENTILE_COMBINE (#6460) - due to [`d44bda3`](https://github.com/tobymao/sqlglot/commit/d44bda376c06956947a09a9f279cce886a63b981) - Annotate type for ZIPF *(PR [#6453](https://github.com/tobymao/sqlglot/pull/6453) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for ZIPF (#6453) - due to [`34dbd47`](https://github.com/tobymao/sqlglot/commit/34dbd478957c1796998d0b263f63c8ce1db7a320) - Annotate type for XMLGET *(PR [#6457](https://github.com/tobymao/sqlglot/pull/6457) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for XMLGET (#6457) - due to [`0d211f2`](https://github.com/tobymao/sqlglot/commit/0d211f2b36167cfb7856b8ec25f597f70317a9c7) - annotate type for MODE function snowflake *(PR [#6447](https://github.com/tobymao/sqlglot/pull/6447) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for MODE function snowflake (#6447) - due to [`cc4c8ab`](https://github.com/tobymao/sqlglot/commit/cc4c8ab43ab71790bc2bb9f8f3c06e34f89f999f) - annotate type for PERCENTILE_CONT in Snowflake *(PR [#6470](https://github.com/tobymao/sqlglot/pull/6470) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for PERCENTILE_CONT in Snowflake (#6470) - due to [`7dbc242`](https://github.com/tobymao/sqlglot/commit/7dbc242a637a8890511cc14f22bce4d425f1f55d) - annotation support for CURRENT REGION. Return type VARCHAR *(PR [#6473](https://github.com/tobymao/sqlglot/pull/6473) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT REGION. Return type VARCHAR (#6473) - due to [`43a6a5c`](https://github.com/tobymao/sqlglot/commit/43a6a5c601421e15a7f94dd489cb4fbcf9d2c8c3) - annotation support for CURRENT_ORGANIZATION_NAME. Return type VARCHAR *(PR [#6475](https://github.com/tobymao/sqlglot/pull/6475) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ORGANIZATION_NAME. Return type VARCHAR (#6475) - due to [`f1f7c6a`](https://github.com/tobymao/sqlglot/commit/f1f7c6ae6b6aa3f6f2251d0f81ee667440ca53d1) - annotation support for CURRENT_ORGANIZATION_USER. *(PR [#6476](https://github.com/tobymao/sqlglot/pull/6476) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ORGANIZATION_USER. (#6476) - due to [`88dfd26`](https://github.com/tobymao/sqlglot/commit/88dfd26b832d13e517fe7c18d2c086885bf4954d) - annotate type for snowflake func TO_BINARY *(PR [#6474](https://github.com/tobymao/sqlglot/pull/6474) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func TO_BINARY (#6474) - due to [`d268203`](https://github.com/tobymao/sqlglot/commit/d268203e1dbae4e3aff863108f6d09a6f8274db5) - annotation support for CURRENT_ROLE_TYPE *(PR [#6479](https://github.com/tobymao/sqlglot/pull/6479) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ROLE_TYPE (#6479) - due to [`fd4431b`](https://github.com/tobymao/sqlglot/commit/fd4431bf9550c03aa761c642a68a21a146fd8548) - annotate type for VECTOR_L1_DISTANCE, VECTOR_L2_DISTANCE, VECTOR_COSINE_SIMILARITY functions *(PR [#6468](https://github.com/tobymao/sqlglot/pull/6468) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: annotate type for VECTOR_L1_DISTANCE, VECTOR_L2_DISTANCE, VECTOR_COSINE_SIMILARITY functions (#6468) - due to [`e6adba7`](https://github.com/tobymao/sqlglot/commit/e6adba76cc2f27633a9d38bfaea3356e71d00a4c) - Add support for coercing STRING literals to temporal types *(PR [#6482](https://github.com/tobymao/sqlglot/pull/6482) by [@vchan](https://github.com/vchan))*: Add support for coercing STRING literals to temporal types (#6482) - due to [`68a5e61`](https://github.com/tobymao/sqlglot/commit/68a5e615b24e518cb90c9b80cf25355fcabdb468) - annotate type for REGR_* functions *(PR [#6452](https://github.com/tobymao/sqlglot/pull/6452) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: annotate type for REGR_* functions (#6452) - due to [`f7458a4`](https://github.com/tobymao/sqlglot/commit/f7458a40d3b09a2e212f6705ac4a77c99714508e) - annotate type for snowflake func TO_BOOLEAN *(PR [#6481](https://github.com/tobymao/sqlglot/pull/6481) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func TO_BOOLEAN (#6481) - due to [`1531a67`](https://github.com/tobymao/sqlglot/commit/1531a67ac7806f3b4582f6cf1ea02342a517de74) - annotate type for VECTOR_INNER_PRODUCT *(PR [#6486](https://github.com/tobymao/sqlglot/pull/6486) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: annotate type for VECTOR_INNER_PRODUCT (#6486) - due to [`df4c1d3`](https://github.com/tobymao/sqlglot/commit/df4c1d37ff77151a74b5de3d119c7e03f5db85f4) - REGEXP_EXTRACT position arg overflow *(PR [#6458](https://github.com/tobymao/sqlglot/pull/6458) by [@treysp](https://github.com/treysp))*: REGEXP_EXTRACT position arg overflow (#6458) - due to [`f6b2b3b`](https://github.com/tobymao/sqlglot/commit/f6b2b3bc6e1c95340149be65d80ef7e177b28d82) - support padside argument for BIT[OR|AND|XOR] *(PR [#6487](https://github.com/tobymao/sqlglot/pull/6487) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support padside argument for BIT[OR|AND|XOR] (#6487) - due to [`5a49c3f`](https://github.com/tobymao/sqlglot/commit/5a49c3f7a7619ad9e711ff2cd9e85b8606969b36) - support ORDER / LIMIT expressions for BigQuery ARRAY_AGG / STRING_AGG functions *(PR [#6463](https://github.com/tobymao/sqlglot/pull/6463) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support ORDER / LIMIT expressions for BigQuery ARRAY_AGG / STRING_AGG functions (#6463) - due to [`ef130f1`](https://github.com/tobymao/sqlglot/commit/ef130f1b944b4be835d4a6831fec9a333a825a34) - Annotated type for ARRAY_CONSTRUCT_COMPACT [#6496](https://github.com/tobymao/sqlglot/pull/6496) *(commit by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotated type for ARRAY_CONSTRUCT_COMPACT #6496 - due to [`1b6076b`](https://github.com/tobymao/sqlglot/commit/1b6076bd5a64b044f52f5366244ba0746aca75e1) - wrap connectives generated due to transpiling LIKE ANY closes [#6493](https://github.com/tobymao/sqlglot/pull/6493) *(commit by [@georgesittas](https://github.com/georgesittas))*: wrap connectives generated due to transpiling LIKE ANY closes #6493 - due to [`36ad534`](https://github.com/tobymao/sqlglot/commit/36ad534b14eabe9ee197017f5087e8e5190f8526) - qualified select list with "LOCAL" *(PR [#6450](https://github.com/tobymao/sqlglot/pull/6450) by [@nnamdi16](https://github.com/nnamdi16))*: qualified select list with "LOCAL" (#6450) - due to [`36cf0bf`](https://github.com/tobymao/sqlglot/commit/36cf0bf6671f622344afee52d7aafe30f19ecf9a) - annotation support for CURRENT_ROLE. *(PR [#6478](https://github.com/tobymao/sqlglot/pull/6478) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ROLE. (#6478) - due to [`cbba04c`](https://github.com/tobymao/sqlglot/commit/cbba04cb292fe8b3fd38c87d9ccb624cdcb52843) - support comma-separated syntax for OVERLAY function *(PR [#6497](https://github.com/tobymao/sqlglot/pull/6497) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support comma-separated syntax for OVERLAY function (#6497) - due to [`dc8f26a`](https://github.com/tobymao/sqlglot/commit/dc8f26a3a5e023a0e54caa345b129fb1b4fe805f) - bq annotate type for NULL *(PR [#6491](https://github.com/tobymao/sqlglot/pull/6491) by [@geooo109](https://github.com/geooo109))*: bq annotate type for NULL (#6491) - due to [`52aceaa`](https://github.com/tobymao/sqlglot/commit/52aceaaa887dddb35f8ede5c2d9577fdeee35c48) - annotate `HavingMax` by `this` *(PR [#6499](https://github.com/tobymao/sqlglot/pull/6499) by [@georgesittas](https://github.com/georgesittas))*: annotate `HavingMax` by `this` (#6499) - due to [`c97a81d`](https://github.com/tobymao/sqlglot/commit/c97a81d68a1584fad48475725665a7678fcad9d1) - annotate TO_HEX(MD5(...)) in BigQuery *(PR [#6500](https://github.com/tobymao/sqlglot/pull/6500) by [@georgesittas](https://github.com/georgesittas))*: annotate TO_HEX(MD5(...)) in BigQuery (#6500) - due to [`a5797a1`](https://github.com/tobymao/sqlglot/commit/a5797a1c867c4ade71ae4ddf93232576993cf5bc) - handle named arguments and non-integer scale input for ROUND *(PR [#6495](https://github.com/tobymao/sqlglot/pull/6495) by [@toriwei](https://github.com/toriwei))*: handle named arguments and non-integer scale input for ROUND (#6495) - due to [`3224235`](https://github.com/tobymao/sqlglot/commit/3224235c1b7a80511af11f7dbffe608a747a3df0) - make CTE builder produce AST consistent with parser closes [#6503](https://github.com/tobymao/sqlglot/pull/6503) *(commit by [@georgesittas](https://github.com/georgesittas))*: make CTE builder produce AST consistent with parser closes #6503 - due to [`8b5298a`](https://github.com/tobymao/sqlglot/commit/8b5298a6578af80fd9676eb222422862d5468859) - Transpile BQ's WEEK based `DATE_DIFF` *(PR [#6507](https://github.com/tobymao/sqlglot/pull/6507) by [@VaggelisD](https://github.com/VaggelisD))*: Transpile BQ's WEEK based `DATE_DIFF` (#6507) - due to [`9454a18`](https://github.com/tobymao/sqlglot/commit/9454a18cca41a510e61522f6b785d646980e2100) - uppercase join method, side, kind for consistency fixes [#6510](https://github.com/tobymao/sqlglot/pull/6510) *(PR [#6511](https://github.com/tobymao/sqlglot/pull/6511) by [@georgesittas](https://github.com/georgesittas))*: uppercase join method, side, kind for consistency fixes #6510 (#6511) - due to [`41b776b`](https://github.com/tobymao/sqlglot/commit/41b776bdc6936f18accd9f7308b55acd383bb596) - added support for current_catalog *(PR [#6492](https://github.com/tobymao/sqlglot/pull/6492) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: added support for current_catalog (#6492) - due to [`dd19bea`](https://github.com/tobymao/sqlglot/commit/dd19beae95f077cfd8b6e315eca7ff212817b250) - annotation support for CURRENT_ACCOUNT *(PR [#6512](https://github.com/tobymao/sqlglot/pull/6512) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ACCOUNT (#6512) - due to [`2e8105e`](https://github.com/tobymao/sqlglot/commit/2e8105eebaec25fc8f94f1e68951198660f404e1) - Annotate type for VAR_POP, VAR_SAMP, DuckDB consistency fix for VAR_SAMP *(PR [#6488](https://github.com/tobymao/sqlglot/pull/6488) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for VAR_POP, VAR_SAMP, DuckDB consistency fix for VAR_SAMP (#6488) - due to [`cfb02c1`](https://github.com/tobymao/sqlglot/commit/cfb02c1aa676e801b2d13a84467b4904cd834ffe) - annotation support for CURRENT_ACCOUNT_NAME *(PR [#6513](https://github.com/tobymao/sqlglot/pull/6513) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ACCOUNT_NAME (#6513) - due to [`1004e31`](https://github.com/tobymao/sqlglot/commit/1004e31cce62cce2e2afb7eab85ed8bdecaede3b) - annotation support for CURRENT_AVAILABLE_ROLES *(PR [#6514](https://github.com/tobymao/sqlglot/pull/6514) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_AVAILABLE_ROLES (#6514) - due to [`ff201fe`](https://github.com/tobymao/sqlglot/commit/ff201febd27937a97674dd091928456dde733254) - annotation support for CURRENT_CLIENT *(PR [#6515](https://github.com/tobymao/sqlglot/pull/6515) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_CLIENT (#6515) - due to [`d777a9c`](https://github.com/tobymao/sqlglot/commit/d777a9c0feef15ac036f7b413112de4d7cc8bea4) - annotation support for CURRENT_IP_ADDRESS *(PR [#6518](https://github.com/tobymao/sqlglot/pull/6518) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_IP_ADDRESS (#6518) - due to [`c296061`](https://github.com/tobymao/sqlglot/commit/c2960615a3bd279b7c5f775d5b93ae12aa27a3b8) - Transpilation of TO_BINARY from snowflake to duckdb *(PR [#6504](https://github.com/tobymao/sqlglot/pull/6504) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Transpilation of TO_BINARY from snowflake to duckdb (#6504) - due to [`7a70164`](https://github.com/tobymao/sqlglot/commit/7a70164d8cf361cf4c0a7d5789bb51676f772959) - transpile Snowflake's `RANDSTR` function *(PR [#6502](https://github.com/tobymao/sqlglot/pull/6502) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: transpile Snowflake's `RANDSTR` function (#6502) - due to [`a26d419`](https://github.com/tobymao/sqlglot/commit/a26d4191e5468e39eafdf7a981e7b890d438b2c9) - annotation support for CURRENT_DATABASE *(PR [#6516](https://github.com/tobymao/sqlglot/pull/6516) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_DATABASE (#6516) - due to [`0acdf7f`](https://github.com/tobymao/sqlglot/commit/0acdf7fc783f2722536ec24dcf8600957febf7ca) - annotation support for CURRENT_SCHEMAS *(PR [#6519](https://github.com/tobymao/sqlglot/pull/6519) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_SCHEMAS (#6519) - due to [`43cce89`](https://github.com/tobymao/sqlglot/commit/43cce895da80d21abc89d40de5d7fddd68871bf0) - annotation support for CURRENT_SECONDARY_ROLES *(PR [#6520](https://github.com/tobymao/sqlglot/pull/6520) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_SECONDARY_ROLES (#6520) - due to [`c21b4b1`](https://github.com/tobymao/sqlglot/commit/c21b4b1134b368ee5144339b59e70ddcc54f3dbc) - annotation support for CURRENT_SESSION *(PR [#6521](https://github.com/tobymao/sqlglot/pull/6521) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_SESSION (#6521) - due to [`57a83c0`](https://github.com/tobymao/sqlglot/commit/57a83c018dace690f7bb363c25ee6bde33c3d60f) - annotation support for CURRENT_STATEMENT *(PR [#6522](https://github.com/tobymao/sqlglot/pull/6522) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_STATEMENT (#6522) - due to [`4b240e4`](https://github.com/tobymao/sqlglot/commit/4b240e40a8809a6eea2a279370a884f4a7b03dfa) - annotation support for CURRENT_VERSION *(PR [#6524](https://github.com/tobymao/sqlglot/pull/6524) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_VERSION (#6524) - due to [`c1a831f`](https://github.com/tobymao/sqlglot/commit/c1a831f5bf662ab8d8e07dc2bb949f2adcbe7d7c) - annotation support for CURRENT_TRANSACTION *(PR [#6523](https://github.com/tobymao/sqlglot/pull/6523) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_TRANSACTION (#6523) - due to [`2e162b0`](https://github.com/tobymao/sqlglot/commit/2e162b0d34066e7aa7edac3156739bcd31a634fc) - annotation support for CURRENT_WAREHOUSE *(PR [#6525](https://github.com/tobymao/sqlglot/pull/6525) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_WAREHOUSE (#6525) - due to [`9d06859`](https://github.com/tobymao/sqlglot/commit/9d0685923209c04747fa6fa2b35ee2e516453abc) - annotate bigquery ARRAY when arg contains set operations *(PR [#6517](https://github.com/tobymao/sqlglot/pull/6517) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate bigquery ARRAY when arg contains set operations (#6517) - due to [`2fd14ed`](https://github.com/tobymao/sqlglot/commit/2fd14ed32b3793444405005fb98342222b4d7956) - query schema directly when type annotation fails for processing UNNEST source *(PR [#6451](https://github.com/tobymao/sqlglot/pull/6451) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: query schema directly when type annotation fails for processing UNNEST source (#6451) - due to [`41a9e88`](https://github.com/tobymao/sqlglot/commit/41a9e88bb9800205df0b3e10a1976699dc4fe4f9) - Add support to transpile binary args for bitwise operators *(PR [#6508](https://github.com/tobymao/sqlglot/pull/6508) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add support to transpile binary args for bitwise operators (#6508) - due to [`06c7ffb`](https://github.com/tobymao/sqlglot/commit/06c7ffbe14985a4da35a97d47322021e79525adf) - cleanup bitwise operator fixes *(commit by [@georgesittas](https://github.com/georgesittas))*: cleanup bitwise operator fixes - due to [`62b348c`](https://github.com/tobymao/sqlglot/commit/62b348ce46d014895bd17d89ccb0b3e186e46d15) - add support for noop string escapes *(PR [#6526](https://github.com/tobymao/sqlglot/pull/6526) by [@nian0114](https://github.com/nian0114))*: add support for noop string escapes (#6526) - due to [`1876c5a`](https://github.com/tobymao/sqlglot/commit/1876c5a86c3b737b7360c4fef25c44dc010b66db) - consolidate can_quote logic and fix an issue with identify=False *(PR [#6534](https://github.com/tobymao/sqlglot/pull/6534) by [@tobymao](https://github.com/tobymao))*: consolidate can_quote logic and fix an issue with identify=False (#6534) - due to [`edb8964`](https://github.com/tobymao/sqlglot/commit/edb8964ed064a687e52323143d52281eaa391c9a) - bump sqlglotrs to 0.9.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.9.0 - due to [`2555856`](https://github.com/tobymao/sqlglot/commit/2555856cac7434ef91cc1584d52610178e45c4b9) - annotate scalar subqueries *(PR [#6536](https://github.com/tobymao/sqlglot/pull/6536) by [@georgesittas](https://github.com/georgesittas))*: annotate scalar subqueries (#6536) - due to [`71e7630`](https://github.com/tobymao/sqlglot/commit/71e763096462aa888a353ac1ad3675a9e5b4841a) - normalize FLOAT to DOUBLE *(PR [#6501](https://github.com/tobymao/sqlglot/pull/6501) by [@toriwei](https://github.com/toriwei))*: normalize FLOAT to DOUBLE (#6501) - due to [`9badf6a`](https://github.com/tobymao/sqlglot/commit/9badf6a6b1972fc37164b29aa416bb897d7ec6a6) - Annotate type for TRY_* functions *(PR [#6509](https://github.com/tobymao/sqlglot/pull/6509) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for TRY_* functions (#6509) - due to [`aad1332`](https://github.com/tobymao/sqlglot/commit/aad1332fee7c82c29dae3caed9a6a1c882c1d4a0) - support transpilation of BITMAP_BIT_POSITION from snowflake to duckdb *(PR [#6541](https://github.com/tobymao/sqlglot/pull/6541) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support transpilation of BITMAP_BIT_POSITION from snowflake to duckdb (#6541) - due to [`f21cf76`](https://github.com/tobymao/sqlglot/commit/f21cf763575b67084ea81a377c5bdb3e86041e4c) - bq annotate SAFE_DIVIDE with both args as INT64 *(PR [#6543](https://github.com/tobymao/sqlglot/pull/6543) by [@geooo109](https://github.com/geooo109))*: bq annotate SAFE_DIVIDE with both args as INT64 (#6543) - due to [`4a57302`](https://github.com/tobymao/sqlglot/commit/4a5730242787920d0a2412aef495eb2eeaaa2119) - ensure structs are annotated as unknown if any argument is unknown *(PR [#6544](https://github.com/tobymao/sqlglot/pull/6544) by [@georgesittas](https://github.com/georgesittas))*: ensure structs are annotated as unknown if any argument is unknown (#6544) - due to [`8a12611`](https://github.com/tobymao/sqlglot/commit/8a12611e9499497d0c8b1e1e418986b2d91a6505) - New type + type annotation for TO_FILE *(PR [#6548](https://github.com/tobymao/sqlglot/pull/6548) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: New type + type annotation for TO_FILE (#6548) - due to [`63a2e49`](https://github.com/tobymao/sqlglot/commit/63a2e49485f237e1c7e16358c412acb5df50e22c) - stop treating `None` args as leaves to be diffed *(PR [#6556](https://github.com/tobymao/sqlglot/pull/6556) by [@georgesittas](https://github.com/georgesittas))*: stop treating `None` args as leaves to be diffed (#6556) - due to [`906c933`](https://github.com/tobymao/sqlglot/commit/906c933235c82598b0d08f8c66dd3db0b8f409a5) - overlap operator *(PR [#6545](https://github.com/tobymao/sqlglot/pull/6545) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: overlap operator (#6545) - due to [`370b1f6`](https://github.com/tobymao/sqlglot/commit/370b1f621844d3ac8831c998ea2046f1e1b91b65) - add support for session_user *(PR [#6555](https://github.com/tobymao/sqlglot/pull/6555) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: add support for session_user (#6555) - due to [`dbbace0`](https://github.com/tobymao/sqlglot/commit/dbbace01cd5f1fc44f5ad278def25f547686f9c5) - remove transpilation support of APPROX_TOP_K to duckdb *(PR [#6560](https://github.com/tobymao/sqlglot/pull/6560) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: remove transpilation support of APPROX_TOP_K to duckdb (#6560) - due to [`2bc2506`](https://github.com/tobymao/sqlglot/commit/2bc2506e0e0b26e82661a08217855d693f30dc25) - support SAFE.TIMESTAMP annotation *(PR [#6550](https://github.com/tobymao/sqlglot/pull/6550) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support SAFE.TIMESTAMP annotation (#6550) - due to [`a51cc7b`](https://github.com/tobymao/sqlglot/commit/a51cc7b6e02c5b37bf43b82a0d76b83d41248ac9) - elt function in mysql *(PR [#6568](https://github.com/tobymao/sqlglot/pull/6568) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: elt function in mysql (#6568) - due to [`14dc1e5`](https://github.com/tobymao/sqlglot/commit/14dc1e5bc74b3b8907ba02bf89ad1763940c9ea2) - make `DATE_PART` roundtrip *(PR [#6573](https://github.com/tobymao/sqlglot/pull/6573) by [@georgesittas](https://github.com/georgesittas))*: make `DATE_PART` roundtrip (#6573) - due to [`4339b26`](https://github.com/tobymao/sqlglot/commit/4339b26db546862b10a0e8d746506b406ecfa306) - expose struct fields using UNNEST without aliases *(PR [#6566](https://github.com/tobymao/sqlglot/pull/6566) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: expose struct fields using UNNEST without aliases (#6566) - due to [`8a44ad5`](https://github.com/tobymao/sqlglot/commit/8a44ad560cb65a34a722b257a82e69a41e7e45e0) - Mark _DBT_MAX_PARTITION as pseudocolumn *(PR [#6572](https://github.com/tobymao/sqlglot/pull/6572) by [@VaggelisD](https://github.com/VaggelisD))*: Mark _DBT_MAX_PARTITION as pseudocolumn (#6572) - due to [`7bfffe5`](https://github.com/tobymao/sqlglot/commit/7bfffe5d894c60bd0139d57c53bb1816c2739d74) - support transpilation of TO_BOOLEAN from snowflake to duckdb *(PR [#6564](https://github.com/tobymao/sqlglot/pull/6564) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of TO_BOOLEAN from snowflake to duckdb (#6564) ### :sparkles: New Features - [`1fb90db`](https://github.com/tobymao/sqlglot/commit/1fb90db52b59e6e3a40597c6f611d0476b72025b) - **teradata**: Add support for Teradata set query band expression *(PR [#5519](https://github.com/tobymao/sqlglot/pull/5519) by [@treff7es](https://github.com/treff7es))* - [`a49baaf`](https://github.com/tobymao/sqlglot/commit/a49baaf717cb41abb25ca51ae5adddc8473baa8b) - **doris**: Override table_sql to avoid AS keyword in UPDATE and DELETE statements *(PR [#5517](https://github.com/tobymao/sqlglot/pull/5517) by [@peterylh](https://github.com/peterylh))* - [`75fd6d2`](https://github.com/tobymao/sqlglot/commit/75fd6d21fb7bc8399432e73d10b4837ae62d2ab5) - **exasol**: Add support for date difference functions in Exasol dialect *(PR [#5510](https://github.com/tobymao/sqlglot/pull/5510) by [@nnamdi16](https://github.com/nnamdi16))* - [`2a91bb4`](https://github.com/tobymao/sqlglot/commit/2a91bb4f17c7569a5b409cc07e970e5d68235149) - **teradata**: Add support for Teradata locking select *(PR [#5524](https://github.com/tobymao/sqlglot/pull/5524) by [@treff7es](https://github.com/treff7es))* - [`938f4b6`](https://github.com/tobymao/sqlglot/commit/938f4b6ebc1c0d26bd3c1400883978c79a435189) - **optimizer**: annotate type for LAST_DAY *(PR [#5528](https://github.com/tobymao/sqlglot/pull/5528) by [@geooo109](https://github.com/geooo109))* - [`7d12dac`](https://github.com/tobymao/sqlglot/commit/7d12dac613ba5119334408f2c52cb270067156d9) - **optimizer**: annotate type for bigquery GENERATE_TIMESTAMP_ARRAY *(PR [#5529](https://github.com/tobymao/sqlglot/pull/5529) by [@geooo109](https://github.com/geooo109))* - [`d50ebe2`](https://github.com/tobymao/sqlglot/commit/d50ebe286dd8e2836b9eb2a3406f15976db3aa05) - **optimizer**: annotate type for bigquery TIME_TRUNC *(PR [#5530](https://github.com/tobymao/sqlglot/pull/5530) by [@geooo109](https://github.com/geooo109))* - [`29748be`](https://github.com/tobymao/sqlglot/commit/29748be7dfc10edc9f29665c98327883dd25c13d) - **optimizer**: annotate type for bigquery TIME *(PR [#5531](https://github.com/tobymao/sqlglot/pull/5531) by [@geooo109](https://github.com/geooo109))* - [`7003b3f`](https://github.com/tobymao/sqlglot/commit/7003b3fa39cd455e3643066364696708d1ac4f38) - **optimizer**: parse and annotate type for bigquery DATE_FROM_UNIX_DATE *(PR [#5532](https://github.com/tobymao/sqlglot/pull/5532) by [@geooo109](https://github.com/geooo109))* - [`a276ca6`](https://github.com/tobymao/sqlglot/commit/a276ca6fd5f9d47fa8c90fcfa19f9864e7a28f8f) - **optimizer**: parse and annotate type for bigquery JUSTIFY funcs *(PR [#5534](https://github.com/tobymao/sqlglot/pull/5534) by [@geooo109](https://github.com/geooo109))* - [`374178e`](https://github.com/tobymao/sqlglot/commit/374178e22fe8d2d2275b65fe08e27ef66c611220) - **optimizer**: parse and annotate type for bigquery UNIX_MICROS and UNIX_MILLIS *(PR [#5535](https://github.com/tobymao/sqlglot/pull/5535) by [@geooo109](https://github.com/geooo109))* - [`1d8d1ab`](https://github.com/tobymao/sqlglot/commit/1d8d1abe459053a135a46525d0a13bb861220927) - **optimizer**: annotate type for bigquery DATE_TRUNC *(PR [#5540](https://github.com/tobymao/sqlglot/pull/5540) by [@geooo109](https://github.com/geooo109))* - [`306ba65`](https://github.com/tobymao/sqlglot/commit/306ba6531839ea2823f5165de7bde01d17560845) - **optimizer**: annotate type for bigquery TIMESTAMP_TRUNC *(PR [#5541](https://github.com/tobymao/sqlglot/pull/5541) by [@geooo109](https://github.com/geooo109))* - [`6a68cca`](https://github.com/tobymao/sqlglot/commit/6a68cca97ad4bdd75c544ada0a5af0fa92ec4664) - **dremio**: support lowercase `TIME_MAPPING` formats *(PR [#5556](https://github.com/tobymao/sqlglot/pull/5556) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`f3ffe19`](https://github.com/tobymao/sqlglot/commit/f3ffe19ec01533c5f27b9d3a7b6704b83c005118) - **optimizer**: annotate type for bigquery format_time *(PR [#5559](https://github.com/tobymao/sqlglot/pull/5559) by [@geooo109](https://github.com/geooo109))* - [`3ab3690`](https://github.com/tobymao/sqlglot/commit/3ab369096313b418699b7942b1c513c0c66a5331) - **optimizer**: parse and annotate type for bigquery PARSE_DATETIME *(PR [#5558](https://github.com/tobymao/sqlglot/pull/5558) by [@geooo109](https://github.com/geooo109))* - [`e5da951`](https://github.com/tobymao/sqlglot/commit/e5da951542eb55691bc43fbbfbec4a30100de038) - **optimizer**: parse and annotate type for bigquery PARSE_TIME *(PR [#5561](https://github.com/tobymao/sqlglot/pull/5561) by [@geooo109](https://github.com/geooo109))* - [`902a0cd`](https://github.com/tobymao/sqlglot/commit/902a0cdfe46f693aa55612d45a2de2def21f0b8c) - **singlestore**: Added parsing/generation of UNIXTIME functions *(PR [#5562](https://github.com/tobymao/sqlglot/pull/5562) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`798e213`](https://github.com/tobymao/sqlglot/commit/798e213fd10c3b61afbd8cef621546de65fa6f26) - **duckdb**: improve transpilability of ANY_VALUE closes [#5563](https://github.com/tobymao/sqlglot/pull/5563) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c7041c7`](https://github.com/tobymao/sqlglot/commit/c7041c71250b17192c2f25fb8f33407324d332c2) - **optimizer**: parse and annotate type for bigquery BYTE_LENGHT *(PR [#5568](https://github.com/tobymao/sqlglot/pull/5568) by [@geooo109](https://github.com/geooo109))* - [`a6c61c3`](https://github.com/tobymao/sqlglot/commit/a6c61c34f1e168c97dd5c2b8ec071372ba593992) - **optimizer**: parse and annotate type for bigquery CODE_POINTS_TO_STRING *(PR [#5569](https://github.com/tobymao/sqlglot/pull/5569) by [@geooo109](https://github.com/geooo109))* - [`2a33339`](https://github.com/tobymao/sqlglot/commit/2a333395cde71936df911488afcff92cae735e11) - **optimizer**: annotate type for bigquery REPLACE *(PR [#5572](https://github.com/tobymao/sqlglot/pull/5572) by [@geooo109](https://github.com/geooo109))* - [`1e6f813`](https://github.com/tobymao/sqlglot/commit/1e6f81343de641e588f1a05ce7dc01bed72bd849) - **optimizer**: annotate type for bigquery REGEXP_EXTRACT_ALL *(PR [#5573](https://github.com/tobymao/sqlglot/pull/5573) by [@geooo109](https://github.com/geooo109))* - [`eb09e6e`](https://github.com/tobymao/sqlglot/commit/eb09e6e32491a05846488de7b72b1dca0e0a2669) - **optimizer**: parse and annotate type for bigquery TRANSLATE *(PR [#5575](https://github.com/tobymao/sqlglot/pull/5575) by [@geooo109](https://github.com/geooo109))* - [`f9a522b`](https://github.com/tobymao/sqlglot/commit/f9a522b26cd5d643b8b18fa64d70f2a3f0ff2d2c) - **optimizer**: parse and annotate type for bigquery SOUNDEX *(PR [#5576](https://github.com/tobymao/sqlglot/pull/5576) by [@geooo109](https://github.com/geooo109))* - [`51da41b`](https://github.com/tobymao/sqlglot/commit/51da41b90ce421b154e45add28353ac044640a1c) - **optimizer**: annotate type for bigquery MD5 *(PR [#5577](https://github.com/tobymao/sqlglot/pull/5577) by [@geooo109](https://github.com/geooo109))* - [`bcf302f`](https://github.com/tobymao/sqlglot/commit/bcf302ff6ad2d0adfc29f708a8b53b5c0e547619) - **optimizer**: annotate type for bigquery MIN/MAX BY *(PR [#5579](https://github.com/tobymao/sqlglot/pull/5579) by [@geooo109](https://github.com/geooo109))* - [`c501d9e`](https://github.com/tobymao/sqlglot/commit/c501d9e6f58e4880e4d23f21f53f72dcb5fdaa8c) - **optimizer**: parse and annotate type for bigquery GROUPING *(PR [#5581](https://github.com/tobymao/sqlglot/pull/5581) by [@geooo109](https://github.com/geooo109))* - [`8612825`](https://github.com/tobymao/sqlglot/commit/86128253f911b733d45b073356e3b8ddf261c22b) - **spark**: generate date/time ops as interval binary ops *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8fda774`](https://github.com/tobymao/sqlglot/commit/8fda774b7a9b0c66948349dfe030d3c122ff6eee) - **singlestore**: Added parsing and generation of JSON_EXTRACT *(PR [#5555](https://github.com/tobymao/sqlglot/pull/5555) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`82cc954`](https://github.com/tobymao/sqlglot/commit/82cc9549a875211a400e5c4e818b05ca48a0a9f4) - **exasol**: map div function to IntDiv in exasol dialect *(PR [#5593](https://github.com/tobymao/sqlglot/pull/5593) by [@nnamdi16](https://github.com/nnamdi16))* - [`eb0fe68`](https://github.com/tobymao/sqlglot/commit/eb0fe68d6b5977053c871badf2f5c1895b3e1c66) - **trino**: add JSON_VALUE function support with RETURNING clause *(PR [#5590](https://github.com/tobymao/sqlglot/pull/5590) by [@rev-rwasilewski](https://github.com/rev-rwasilewski))* - [`9e95c11`](https://github.com/tobymao/sqlglot/commit/9e95c115ea0304d9ccb4cb0be8389f5ff5f2a952) - **exasol**: mapped weekofyear to week in Exasol dialect *(PR [#5594](https://github.com/tobymao/sqlglot/pull/5594) by [@nnamdi16](https://github.com/nnamdi16))* - [`8f013c3`](https://github.com/tobymao/sqlglot/commit/8f013c37a412ca5978889c1e47b0c6f7add0715d) - **singlestore**: Fixed parsing of DATE function *(PR [#5601](https://github.com/tobymao/sqlglot/pull/5601) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`a4a299a`](https://github.com/tobymao/sqlglot/commit/a4a299acbaf4461f0c2b470bc4e9e9590515eda7) - transpile `TO_CHAR` from Dremio to Databricks *(PR [#5598](https://github.com/tobymao/sqlglot/pull/5598) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`093f35c`](https://github.com/tobymao/sqlglot/commit/093f35c201c3c22c3a14c6f8de26c06246bdf19c) - **dremio**: handle `DATE_FORMAT`, `TO_DATE`, and `TO_TIMESTAMP` *(PR [#5597](https://github.com/tobymao/sqlglot/pull/5597) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`02e60e7`](https://github.com/tobymao/sqlglot/commit/02e60e73fc0c2dae815aa225be247a17ccdf4b82) - **singlestore**: desugarize DAYNAME into DATE_FORMAT *(PR [#5610](https://github.com/tobymao/sqlglot/pull/5610) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`7b180bd`](https://github.com/tobymao/sqlglot/commit/7b180bdc3da9e39946c22970bd2523f7d8beaf29) - **parser**: raise if query modifier is specified multiple times *(PR [#5608](https://github.com/tobymao/sqlglot/pull/5608) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5604](https://github.com/tobymao/sqlglot/issues/5604) opened by [@bricct](https://github.com/bricct)* - [`442eafc`](https://github.com/tobymao/sqlglot/commit/442eafcb00a2650930bd6023aa9a5febfebbe796) - **singlestore**: Added parsing of HOUR function *(PR [#5612](https://github.com/tobymao/sqlglot/pull/5612) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`5320359`](https://github.com/tobymao/sqlglot/commit/532035978605efd1d43de75aafca750e2894c0b9) - **singlestore**: Added parsing of MICROSECOND function *(PR [#5619](https://github.com/tobymao/sqlglot/pull/5619) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`db1db97`](https://github.com/tobymao/sqlglot/commit/db1db9732352187629df853ad937ebaf4abfe487) - **doris**: update exp.UniqueKeyProperty SQL generation logic *(PR [#5613](https://github.com/tobymao/sqlglot/pull/5613) by [@xinge-ji](https://github.com/xinge-ji))* - [`54623a6`](https://github.com/tobymao/sqlglot/commit/54623a6b85432272703f12a197b05ced78529f90) - **singlestore**: Added parsing of MINUTE function *(PR [#5620](https://github.com/tobymao/sqlglot/pull/5620) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`565c9f8`](https://github.com/tobymao/sqlglot/commit/565c9f8c55cfbef5d3a9e1470551f1dc4416825e) - **singlestore**: Added generation of DAYOFWEEK_ISO function *(PR [#5627](https://github.com/tobymao/sqlglot/pull/5627) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`8db916e`](https://github.com/tobymao/sqlglot/commit/8db916e2f2ce241bdff130d626f98df182b48f3e) - **singlestore**: Added parsing of WEEKDAY function *(PR [#5624](https://github.com/tobymao/sqlglot/pull/5624) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`aa6274a`](https://github.com/tobymao/sqlglot/commit/aa6274a0ea647df1251563945635260a6ddd4972) - **singlestore**: Fixed generation of DAY_OF_MONTH function *(PR [#5629](https://github.com/tobymao/sqlglot/pull/5629) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`dee44b8`](https://github.com/tobymao/sqlglot/commit/dee44b8c1d70ca6079867896fb68cad256909dad) - **singlestore**: Added parsing of MONTHNAME function *(PR [#5623](https://github.com/tobymao/sqlglot/pull/5623) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`deebf0c`](https://github.com/tobymao/sqlglot/commit/deebf0c3cc379e28c4ab66b6bb7a9c84c14e88c6) - **singlestore**: Added parsing of SECOND function *(PR [#5621](https://github.com/tobymao/sqlglot/pull/5621) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`12a60b9`](https://github.com/tobymao/sqlglot/commit/12a60b99b6b2b0673b57218c691794deb67aa3a5) - **singlestore**: Removed redundant deletions from TRANSFORMS *(PR [#5632](https://github.com/tobymao/sqlglot/pull/5632) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`36602a2`](https://github.com/tobymao/sqlglot/commit/36602a2ecc9ffca98e89044d23e40f33c6ed71e4) - **duckdb**: parse LIST_FILTER into ArrayFilter closes [#5633](https://github.com/tobymao/sqlglot/pull/5633) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`0188d21`](https://github.com/tobymao/sqlglot/commit/0188d21d443c991a528eb9d220459890b7dca477) - **duckdb**: parse LIST_TRANSFORM into Transform closes [#5634](https://github.com/tobymao/sqlglot/pull/5634) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b117d59`](https://github.com/tobymao/sqlglot/commit/b117d59f3c43f6f44cd0ccdf22717f7bcd990889) - **dremio**: add dremio date_add and date_sub parsing *(PR [#5617](https://github.com/tobymao/sqlglot/pull/5617) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`999b9e7`](https://github.com/tobymao/sqlglot/commit/999b9e793c0819a4d2af6400fc924946d26b3e6f) - **singlestore**: Changed generation of exp.TsOrDsToDate to handle case when format is not provided *(PR [#5639](https://github.com/tobymao/sqlglot/pull/5639) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`b556e97`](https://github.com/tobymao/sqlglot/commit/b556e97f8cfbde21c0a921ac1c01c9e4f2ec2535) - **singlestore**: Marked exp.All as unsupported *(PR [#5640](https://github.com/tobymao/sqlglot/pull/5640) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`c076694`](https://github.com/tobymao/sqlglot/commit/c0766946e6799fb61c38e855fd18812d08a5c251) - **clickhouse**: support custom partition key expressions *(PR [#5645](https://github.com/tobymao/sqlglot/pull/5645) by [@GaliFFun](https://github.com/GaliFFun))* - [`cab62b0`](https://github.com/tobymao/sqlglot/commit/cab62b06ce926e3116a6a45a9c57e4901cd8a281) - **doris**: add support for BUILD and REFRESH properties in materialized view *(PR [#5614](https://github.com/tobymao/sqlglot/pull/5614) by [@xinge-ji](https://github.com/xinge-ji))* - [`af0b299`](https://github.com/tobymao/sqlglot/commit/af0b299561914953b30ab36004e53dcb92d39e1c) - **optimizer**: Qualify columns generated by exp.Aliases *(PR [#5647](https://github.com/tobymao/sqlglot/pull/5647) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5638](https://github.com/tobymao/sqlglot/issues/5638) opened by [@catlynkong](https://github.com/catlynkong)* - [`981e0e7`](https://github.com/tobymao/sqlglot/commit/981e0e70a304665e746158c859bcc81f99384685) - **doris**: add support for PARTITION BY LIST *(PR [#5615](https://github.com/tobymao/sqlglot/pull/5615) by [@xinge-ji](https://github.com/xinge-ji))* - [`53aa8fe`](https://github.com/tobymao/sqlglot/commit/53aa8fe7f188012f765066f32c4179035fff036d) - **tsql**: support alter table with check closes [#5649](https://github.com/tobymao/sqlglot/pull/5649) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`23cac6c`](https://github.com/tobymao/sqlglot/commit/23cac6c58099a9ac818ac5d3970a427ca3579cca) - **exasol**: Add support for GROUP_CONCAT and LISTAGG functions *(PR [#5646](https://github.com/tobymao/sqlglot/pull/5646) by [@nnamdi16](https://github.com/nnamdi16))* - [`d087ac8`](https://github.com/tobymao/sqlglot/commit/d087ac89376df5ab16de99c8b67f99060f0a6170) - **bigquery**: Add support for ml.generate_embedding function *(PR [#5652](https://github.com/tobymao/sqlglot/pull/5652) by [@rloredo](https://github.com/rloredo))* - [`e71bcb5`](https://github.com/tobymao/sqlglot/commit/e71bcb51181de63c8ad13004216506529fcf9644) - **dremio**: support array_generate_range *(PR [#5653](https://github.com/tobymao/sqlglot/pull/5653) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`edbd04b`](https://github.com/tobymao/sqlglot/commit/edbd04b6a91b1a6f76e4fa938098ba5ed581ba72) - **singlestore**: Fixed generation of exp.RegexpLike *(PR [#5663](https://github.com/tobymao/sqlglot/pull/5663) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`4992edb`](https://github.com/tobymao/sqlglot/commit/4992edbb79f4922917cc5ce5aa687e6f7da7798c) - **singlestore**: Fixed exp.Xor generation *(PR [#5662](https://github.com/tobymao/sqlglot/pull/5662) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`20de3d3`](https://github.com/tobymao/sqlglot/commit/20de3d37cdae0705c67f80fbacbe024a62f34657) - **singlestore**: Fixed parsing/generation of exp.Hll *(PR [#5664](https://github.com/tobymao/sqlglot/pull/5664) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`1a60a5a`](https://github.com/tobymao/sqlglot/commit/1a60a5a845c7431d7d3d7ccb71119699316f4b41) - **singlestore**: Added parsing/generation of JSON_ARRAY_CONTAINS function *(PR [#5661](https://github.com/tobymao/sqlglot/pull/5661) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`f662dc0`](https://github.com/tobymao/sqlglot/commit/f662dc0b47fd14d00899c14a899756a5ba1fe9da) - **singlestore**: Fixed generation of exp.ApproxDistinct *(PR [#5666](https://github.com/tobymao/sqlglot/pull/5666) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e0db0a9`](https://github.com/tobymao/sqlglot/commit/e0db0a95d3cb7614242dbd1b439d408e7e7bd475) - **optimizer**: add parse and annotate type for bigquery FARM_FINGERPRINT *(PR [#5667](https://github.com/tobymao/sqlglot/pull/5667) by [@geooo109](https://github.com/geooo109))* - [`dcd4ef7`](https://github.com/tobymao/sqlglot/commit/dcd4ef769727ed1227911f2d9a85244d61173003) - **singlestore**: Fixed exp.CountIf generation *(PR [#5668](https://github.com/tobymao/sqlglot/pull/5668) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e431e85`](https://github.com/tobymao/sqlglot/commit/e431e851c2c5d20f049adbc38e370a64d39c346f) - **singlestore**: Fixed generation of exp.LogicalOr *(PR [#5669](https://github.com/tobymao/sqlglot/pull/5669) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`56588c7`](https://github.com/tobymao/sqlglot/commit/56588c7e22b4db4f0e44696a460483ca1e549163) - **bigquery**: Add support for vector_search function. Move predict to BigQuery dialect. *(PR [#5660](https://github.com/tobymao/sqlglot/pull/5660) by [@rloredo](https://github.com/rloredo))* - [`f0d2cc2`](https://github.com/tobymao/sqlglot/commit/f0d2cc2b0f72340172ecd154f632aa6a24c15512) - **singlestore**: Fixed generation of exp.LogicalAnd *(PR [#5671](https://github.com/tobymao/sqlglot/pull/5671) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`a688a0f`](https://github.com/tobymao/sqlglot/commit/a688a0f0d70f87139e531d1419b338b695bec384) - **optimizer**: parse and annotate type for bigquery APPROX_TOP_COUNT *(PR [#5670](https://github.com/tobymao/sqlglot/pull/5670) by [@geooo109](https://github.com/geooo109))* - [`fa8d571`](https://github.com/tobymao/sqlglot/commit/fa8d57132b1d21d92eb5de3ba88b41f880e14889) - **singlestore**: Fixed generation/parsing of exp.ApproxQuantile *(PR [#5672](https://github.com/tobymao/sqlglot/pull/5672) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`9955ebe`](https://github.com/tobymao/sqlglot/commit/9955ebe90d3421815738ecb643806add755c5df3) - **singlestore**: Fixed parsing/generation of exp.Variance *(PR [#5673](https://github.com/tobymao/sqlglot/pull/5673) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`3c93fcc`](https://github.com/tobymao/sqlglot/commit/3c93fcce96ec82e78753f6c9dd5fb0e730a82058) - **optimizer**: parse and annotate type for bigquery APPROX_TOP_SUM *(PR [#5675](https://github.com/tobymao/sqlglot/pull/5675) by [@geooo109](https://github.com/geooo109))* - [`60cbb9d`](https://github.com/tobymao/sqlglot/commit/60cbb9d0e3c9b5a36c1368c9b5bb05def8ce8658) - **dremio**: add CURRENT_DATE_UTC *(PR [#5674](https://github.com/tobymao/sqlglot/pull/5674) by [@jasonthomassql](https://github.com/jasonthomassql))* - :arrow_lower_right: *addresses issue [#5655](https://github.com/tobymao/sqlglot/issues/5655) opened by [@jasonthomassql](https://github.com/jasonthomassql)* - [`741d45a`](https://github.com/tobymao/sqlglot/commit/741d45a0ca7c1bad67da4393cd10cc9cfa49ea68) - **optimizer**: parse and annotate type for bigquery FROM/TO_BASE32 *(PR [#5676](https://github.com/tobymao/sqlglot/pull/5676) by [@geooo109](https://github.com/geooo109))* - [`9ae045c`](https://github.com/tobymao/sqlglot/commit/9ae045c0405e43b148e3b9261825288ebf09100c) - **optimizer**: parse and annotate type for bigquery FROM_HEX *(PR [#5679](https://github.com/tobymao/sqlglot/pull/5679) by [@geooo109](https://github.com/geooo109))* - [`5a22a25`](https://github.com/tobymao/sqlglot/commit/5a22a254143978989027f6e7f6163019a34f112a) - **optimizer**: annotate type for bigquery TO_HEX *(PR [#5680](https://github.com/tobymao/sqlglot/pull/5680) by [@geooo109](https://github.com/geooo109))* - [`d920ac3`](https://github.com/tobymao/sqlglot/commit/d920ac3886ce006d76616bc31884ee2f5c4162bc) - **singlestore**: Fixed parsing/generation of exp.RegexpExtractAll *(PR [#5692](https://github.com/tobymao/sqlglot/pull/5692) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`260c72b`](https://github.com/tobymao/sqlglot/commit/260c72befc0510ebe1d007284c0eef9343de20d7) - **singlestore**: Fixed parsing/generation of exp.Contains *(PR [#5684](https://github.com/tobymao/sqlglot/pull/5684) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`081dc67`](https://github.com/tobymao/sqlglot/commit/081dc673b89d3d8d0709b29e359142297ff64536) - **singlestore**: Fixed generaion/parsing of exp.VariancePop *(PR [#5682](https://github.com/tobymao/sqlglot/pull/5682) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`eb538bf`](https://github.com/tobymao/sqlglot/commit/eb538bf225645d0a54d614733e447c13cf91a37a) - **singlestore**: Fixed generation of exp.Chr *(PR [#5683](https://github.com/tobymao/sqlglot/pull/5683) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`32d9dd1`](https://github.com/tobymao/sqlglot/commit/32d9dd1309ce0876114f57993596c4456aa1d50f) - **singlestore**: Fixed exp.MD5Digest generation *(PR [#5688](https://github.com/tobymao/sqlglot/pull/5688) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`5c1eb2d`](https://github.com/tobymao/sqlglot/commit/5c1eb2df5dd3dcc6ed2c8204cec56b5c3d276f87) - **optimizer**: parse and annotate type for bq PARSE_BIG/NUMERIC *(PR [#5690](https://github.com/tobymao/sqlglot/pull/5690) by [@geooo109](https://github.com/geooo109))* - [`6f88500`](https://github.com/tobymao/sqlglot/commit/6f885007a075339cf20034459571a6ae821c61c0) - **singlestore**: Fixed exp.IsAscii generation *(PR [#5687](https://github.com/tobymao/sqlglot/pull/5687) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`311373d`](https://github.com/tobymao/sqlglot/commit/311373d22134de906d1c1cef019541e85e2f7c9f) - **optimizer**: parse and annotate type for bq CODE_POINTS_TO_BYTES *(PR [#5686](https://github.com/tobymao/sqlglot/pull/5686) by [@geooo109](https://github.com/geooo109))* - [`79d9de1`](https://github.com/tobymao/sqlglot/commit/79d9de1745598f8f3ae2c82c1389dd455c946a09) - **optimizer**: parse and annotate type for bq TO_CODE_POINTS *(PR [#5685](https://github.com/tobymao/sqlglot/pull/5685) by [@geooo109](https://github.com/geooo109))* - [`5df3ea9`](https://github.com/tobymao/sqlglot/commit/5df3ea92f59125955124ea1883b777b489db3042) - **optimizer**: parse and annotate type for bq SAFE_CONVERT_BYTES_TO_STRING *(PR [#5681](https://github.com/tobymao/sqlglot/pull/5681) by [@geooo109](https://github.com/geooo109))* - [`c832746`](https://github.com/tobymao/sqlglot/commit/c832746018fbc2c531d5b2a7c7f8cd5d78e511ff) - **optimizer**: parse and annotate type for bigquery APPROX_QUANTILES *(PR [#5678](https://github.com/tobymao/sqlglot/pull/5678) by [@geooo109](https://github.com/geooo109))* - [`8fa5ae8`](https://github.com/tobymao/sqlglot/commit/8fa5ae8a61c698abaea265b4950390ea3ddfa7e9) - **singlestore**: Fixed generation/parsing of exp.RegexpExtract *(PR [#5691](https://github.com/tobymao/sqlglot/pull/5691) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`d6d409a`](https://github.com/tobymao/sqlglot/commit/d6d409a548042063f80d02dfaf5b61a0096d1d50) - **singlestore**: Fixed generaion of exp.Repeat *(PR [#5693](https://github.com/tobymao/sqlglot/pull/5693) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`b7db08b`](https://github.com/tobymao/sqlglot/commit/b7db08b96c7d7d02ec54f26b8749b3d57f021d8b) - **singlestore**: Fixed generation of exp.StartsWith *(PR [#5694](https://github.com/tobymao/sqlglot/pull/5694) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`87b04ef`](https://github.com/tobymao/sqlglot/commit/87b04ef0fc2df5064be9e6b75b264cff0639face) - **singlestore**: Fixed generation of exp.FromBase *(PR [#5695](https://github.com/tobymao/sqlglot/pull/5695) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`9c1d0fd`](https://github.com/tobymao/sqlglot/commit/9c1d0fdac9acd3fb3109ca3d3cae9c9ffaed1a7d) - **duckdb**: transpile array unique aggregation closes [#5689](https://github.com/tobymao/sqlglot/pull/5689) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`99e169e`](https://github.com/tobymao/sqlglot/commit/99e169ea13d5be3712a47f6b55b98a4764a3c24d) - **optimizer**: parse and annotate type for bq BOOL *(PR [#5697](https://github.com/tobymao/sqlglot/pull/5697) by [@geooo109](https://github.com/geooo109))* - [`3f31770`](https://github.com/tobymao/sqlglot/commit/3f31770c793f464fcac1ce2b8dfa03d4b7f0231c) - **optimizer**: parse and annotate type for bq FLOAT64 *(PR [#5700](https://github.com/tobymao/sqlglot/pull/5700) by [@geooo109](https://github.com/geooo109))* - [`f6f8f56`](https://github.com/tobymao/sqlglot/commit/f6f8f56a59d550dfc7dfcab0c3b9a6885c7e758a) - **singlestore**: Fixed parsing/generation of exp.JSONFormat *(PR [#5706](https://github.com/tobymao/sqlglot/pull/5706) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`de2fe15`](https://github.com/tobymao/sqlglot/commit/de2fe1503b5bb003431d1f0c7b9ae87932a6cc1c) - **optimizer**: annotate type for bq CONTAINS_SUBSTR *(PR [#5705](https://github.com/tobymao/sqlglot/pull/5705) by [@geooo109](https://github.com/geooo109))* - [`a78146e`](https://github.com/tobymao/sqlglot/commit/a78146e37bfc972050b4467c39769407061e9bc3) - **singlestore**: Fixed parsing/generation of exp.DateBin *(PR [#5709](https://github.com/tobymao/sqlglot/pull/5709) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`ab0c985`](https://github.com/tobymao/sqlglot/commit/ab0c985424ae9d9340eafd15ecdc9b31bdd8837c) - **singlestore**: Marked exp.Reduce finish argument as unsupported *(PR [#5707](https://github.com/tobymao/sqlglot/pull/5707) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`770888f`](https://github.com/tobymao/sqlglot/commit/770888f4e9a9061329e3c416f968f7dd9639fb81) - **optimizer**: annotate type for bq NORMALIZE *(PR [#5711](https://github.com/tobymao/sqlglot/pull/5711) by [@geooo109](https://github.com/geooo109))* - [`506033f`](https://github.com/tobymao/sqlglot/commit/506033f299f7a4c28f6efd8bf715be5dcf73e929) - **optimizer**: parse and annotate type for bq NORMALIZE_AND_CASEFOLD *(PR [#5712](https://github.com/tobymao/sqlglot/pull/5712) by [@geooo109](https://github.com/geooo109))* - [`848aea1`](https://github.com/tobymao/sqlglot/commit/848aea1dbaaeb580b633796dcca06c28314b9c3e) - **optimizer**: parse and annotate type for bq OCTET_LENGTH *(PR [#5713](https://github.com/tobymao/sqlglot/pull/5713) by [@geooo109](https://github.com/geooo109))* - [`727bf83`](https://github.com/tobymao/sqlglot/commit/727bf8378f232188d35834d980b035552999ea3b) - add support for REVOKE DDL *(PR [#5703](https://github.com/tobymao/sqlglot/pull/5703) by [@newtonapple](https://github.com/newtonapple))* - [`baffd2c`](https://github.com/tobymao/sqlglot/commit/baffd2c0be9657683781f3f8831c47e32dbf68bb) - **optimizer**: parse and annotate type for bq REGEXP_INSTR *(PR [#5710](https://github.com/tobymao/sqlglot/pull/5710) by [@geooo109](https://github.com/geooo109))* - [`b79eb19`](https://github.com/tobymao/sqlglot/commit/b79eb198cc21203efa82128b357d435338e9133d) - **optimizer**: annotate type for bq ROW_NUMBER *(PR [#5716](https://github.com/tobymao/sqlglot/pull/5716) by [@geooo109](https://github.com/geooo109))* - [`f709bef`](https://github.com/tobymao/sqlglot/commit/f709bef3af7cd0daa25fe3d58b1753c3e65720ef) - **optimizer**: annotate type for bq FIRST_VALUE *(PR [#5718](https://github.com/tobymao/sqlglot/pull/5718) by [@geooo109](https://github.com/geooo109))* - [`b9ae9e5`](https://github.com/tobymao/sqlglot/commit/b9ae9e534dee1e32fccbf22cab9bc17fbd920629) - **singlestore**: Implemeted generation of exp.TsOrDiToDi *(PR [#5724](https://github.com/tobymao/sqlglot/pull/5724) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`9b14fff`](https://github.com/tobymao/sqlglot/commit/9b14fffd2c9404f76a3faced2ec9d6eaac8feb01) - **singlestore**: Implemented generation of exp.DateToDi *(PR [#5717](https://github.com/tobymao/sqlglot/pull/5717) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`07d8c23`](https://github.com/tobymao/sqlglot/commit/07d8c2347baba6523310c4d31cddfb0e5c0eddc1) - **singlestore**: Implemented generation of exp.DiToDate *(PR [#5721](https://github.com/tobymao/sqlglot/pull/5721) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`ad34a85`](https://github.com/tobymao/sqlglot/commit/ad34a855a433bc0f51a707cbcb66f8dce667a562) - **singlestore**: Implemented generation of exp.FromTimeZone *(PR [#5723](https://github.com/tobymao/sqlglot/pull/5723) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`29d5e4f`](https://github.com/tobymao/sqlglot/commit/29d5e4f62a799f35c0904a23cedacc6efa95a63b) - **singlestore**: Implemented generation of exp.DatetimeAdd *(PR [#5728](https://github.com/tobymao/sqlglot/pull/5728) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`15a9061`](https://github.com/tobymao/sqlglot/commit/15a906170e5d5cdaa207ec7607edfdd7d4a8b774) - **optimizer**: annotate type for bq PERCENTILE_DISC *(PR [#5722](https://github.com/tobymao/sqlglot/pull/5722) by [@geooo109](https://github.com/geooo109))* - [`7d49609`](https://github.com/tobymao/sqlglot/commit/7d4960963f0ef70b96f5b969bb008d2742e833ea) - **optimizer**: annotate type for bq NTH_VALUE *(PR [#5720](https://github.com/tobymao/sqlglot/pull/5720) by [@geooo109](https://github.com/geooo109))* - [`d41acf1`](https://github.com/tobymao/sqlglot/commit/d41acf11221bee30a5ae089cbac9b158ed3dd515) - **optimizer**: annotate type for bq LEAD *(PR [#5719](https://github.com/tobymao/sqlglot/pull/5719) by [@geooo109](https://github.com/geooo109))* - [`113809a`](https://github.com/tobymao/sqlglot/commit/113809a07efee0f12758bd2571c8515885568466) - **singlestore**: Implemented exp.TimeStrToDate generation *(PR [#5725](https://github.com/tobymao/sqlglot/pull/5725) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`cf63d0d`](https://github.com/tobymao/sqlglot/commit/cf63d0df4c2f58b2cf0c87e2a3a6f63f836a50a1) - **dremio**: add regexp_like and alias regexp_matches *(PR [#5731](https://github.com/tobymao/sqlglot/pull/5731) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`e42160f`](https://github.com/tobymao/sqlglot/commit/e42160f27fa68828898969073f2f4a0014f5e3e9) - **dremio**: support alias repeatstr *(PR [#5730](https://github.com/tobymao/sqlglot/pull/5730) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`ff12130`](https://github.com/tobymao/sqlglot/commit/ff12130c23a215917f20fda7d50322f1cb7de599) - **optimizer**: annotate type for bq PERNCENTILE_CONT *(PR [#5729](https://github.com/tobymao/sqlglot/pull/5729) by [@geooo109](https://github.com/geooo109))* - [`fdb8a0a`](https://github.com/tobymao/sqlglot/commit/fdb8a0a6d0d74194255f313bd934db7fc1ce0d3f) - **optimizer**: parse and annotate type for bq FORMAT *(PR [#5715](https://github.com/tobymao/sqlglot/pull/5715) by [@geooo109](https://github.com/geooo109))* - [`e272292`](https://github.com/tobymao/sqlglot/commit/e272292197f2bb81ccfad1de06a95f321f0b565f) - **singlestore**: Implemented generation of exp.Time *(PR [#5727](https://github.com/tobymao/sqlglot/pull/5727) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`012bdd3`](https://github.com/tobymao/sqlglot/commit/012bdd3c8aeff180f85354ffd403fc1aa5815dcf) - **optimizer**: parse and annotate type for bq CUME_DIST *(PR [#5735](https://github.com/tobymao/sqlglot/pull/5735) by [@geooo109](https://github.com/geooo109))* - [`b99eaeb`](https://github.com/tobymao/sqlglot/commit/b99eaeb0c6eb3dc613e76d205e02632bd6af353b) - **optimizer**: parse and annotate type for bq DENSE_RANK *(PR [#5736](https://github.com/tobymao/sqlglot/pull/5736) by [@geooo109](https://github.com/geooo109))* - [`8cf6ef9`](https://github.com/tobymao/sqlglot/commit/8cf6ef92a0f43943efb0fe380f41dc09f43aca85) - **optimizer**: parse and annotate_type for bq NTILE *(PR [#5737](https://github.com/tobymao/sqlglot/pull/5737) by [@geooo109](https://github.com/geooo109))* - [`bb95c73`](https://github.com/tobymao/sqlglot/commit/bb95c7312c942ef987955f01e060604d60e32e83) - **optimizer**: parse and annotate type for bq RANK *(PR [#5738](https://github.com/tobymao/sqlglot/pull/5738) by [@geooo109](https://github.com/geooo109))* - [`8713c08`](https://github.com/tobymao/sqlglot/commit/8713c082b0aa8454a5773fc2a85e08a132dc6ce3) - **optimizer**: parse and annotate type for bq PERCENT_RANK *(PR [#5739](https://github.com/tobymao/sqlglot/pull/5739) by [@geooo109](https://github.com/geooo109))* - [`9ce4e31`](https://github.com/tobymao/sqlglot/commit/9ce4e31aecbde6ea1f227a7166c0f3dc9e302a66) - **optimizer**: annotate type for bq JSON_OBJECT *(PR [#5740](https://github.com/tobymao/sqlglot/pull/5740) by [@geooo109](https://github.com/geooo109))* - [`d35ec6e`](https://github.com/tobymao/sqlglot/commit/d35ec6e37e21cf3cec848ed55bd73128c4633cd2) - **optimizer**: annotate type for bq JSON_QUERY/JSON_QUERY_ARRAY *(PR [#5741](https://github.com/tobymao/sqlglot/pull/5741) by [@geooo109](https://github.com/geooo109))* - [`4753642`](https://github.com/tobymao/sqlglot/commit/4753642cfcfb1f192ec4d21a492737b27affef09) - **optimizer**: annotate type for bq JSON_EXTRACT_SCALAR *(commit by [@geooo109](https://github.com/geooo109))* - [`6249dbe`](https://github.com/tobymao/sqlglot/commit/6249dbe4173ad5278adf84452dcf7253a2395b91) - **singlestore**: Added generation of exp.DatetimeDiff *(PR [#5743](https://github.com/tobymao/sqlglot/pull/5743) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`113a530`](https://github.com/tobymao/sqlglot/commit/113a5308d050fd5ceacab4c6188e5eea5dd740b1) - **optimizer**: parse and annotate type for bq JSON_ARRAY_APPEND *(PR [#5747](https://github.com/tobymao/sqlglot/pull/5747) by [@geooo109](https://github.com/geooo109))* - [`8603705`](https://github.com/tobymao/sqlglot/commit/8603705a8e5513699adc2499389c67412eee70cb) - **singlestore**: feat(singlestore): Implemented generation of exp.DatetimeSub *(PR [#5744](https://github.com/tobymao/sqlglot/pull/5744) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`7d71c0b`](https://github.com/tobymao/sqlglot/commit/7d71c0bb576f9de3447b4780ab64a3f4d92c6432) - **singlestore**: Fixed generation of exp.DatetimeTrunc and exp.DateTrunc *(PR [#5745](https://github.com/tobymao/sqlglot/pull/5745) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`268e2c6`](https://github.com/tobymao/sqlglot/commit/268e2c694d1eb99f1fe64477bc38ed4946bf1c32) - **optimizer**: parse and annotate type for bq JSON_ARRAY_INSERT *(PR [#5748](https://github.com/tobymao/sqlglot/pull/5748) by [@geooo109](https://github.com/geooo109))* - [`455ec1f`](https://github.com/tobymao/sqlglot/commit/455ec1f4f8aecb5435fa4cb2912bfc21db8dd44d) - **optimizer**: parse and annotate type for bq JSON_KEYS *(PR [#5749](https://github.com/tobymao/sqlglot/pull/5749) by [@geooo109](https://github.com/geooo109))* - [`59895fa`](https://github.com/tobymao/sqlglot/commit/59895faa23ebe1b27938c37a7b39df87de609844) - **optimizer**: parse and annotate type for bq JSON_REMOVE *(PR [#5750](https://github.com/tobymao/sqlglot/pull/5750) by [@geooo109](https://github.com/geooo109))* - [`06d7df7`](https://github.com/tobymao/sqlglot/commit/06d7df7a05f2824cabf48e8d1e8a4ebca8fda496) - **optimizer**: parse and annotate type for bq JSON_SET *(PR [#5751](https://github.com/tobymao/sqlglot/pull/5751) by [@geooo109](https://github.com/geooo109))* - [`7f5079a`](https://github.com/tobymao/sqlglot/commit/7f5079a1b71c4dd28e98b77b5b749e074fce862c) - **singlestore**: Improved geneation of exp.DataType *(PR [#5746](https://github.com/tobymao/sqlglot/pull/5746) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`ad9405c`](https://github.com/tobymao/sqlglot/commit/ad9405cd43108ff80d16711f8b33ff57430ed686) - **singlestore**: fixed generation of exp.TimestampTrunc *(PR [#5754](https://github.com/tobymao/sqlglot/pull/5754) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`a1852f9`](https://github.com/tobymao/sqlglot/commit/a1852f93fdfe926072c12954c95796d038e15140) - **dremio**: parse date_part *(PR [#5756](https://github.com/tobymao/sqlglot/pull/5756) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`0db1df6`](https://github.com/tobymao/sqlglot/commit/0db1df617ec4f05b1ee6cf1d606272f6e799a9b9) - **singlestore**: Fixed generation of exp.DateDiff *(PR [#5752](https://github.com/tobymao/sqlglot/pull/5752) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e72b341`](https://github.com/tobymao/sqlglot/commit/e72b3419c8a367caa0e5e80030979cd94e87a40d) - **optimizer**: parse and annotate type for bq JSON_STRIP_NULLS *(PR [#5753](https://github.com/tobymao/sqlglot/pull/5753) by [@geooo109](https://github.com/geooo109))* - [`5de61a7`](https://github.com/tobymao/sqlglot/commit/5de61a7ab850d4e68fde4d76ee396d30d7bdef33) - **optimizer**: parse and annotate type for bq JSON_EXTRACT_STRING_ARRAY *(PR [#5758](https://github.com/tobymao/sqlglot/pull/5758) by [@geooo109](https://github.com/geooo109))* - [`36c9393`](https://github.com/tobymao/sqlglot/commit/36c93939575a19bd611269719c39d3d216be8cde) - **optimizer**: parse and annotate type for bq JSON LAX funcs *(PR [#5760](https://github.com/tobymao/sqlglot/pull/5760) by [@geooo109](https://github.com/geooo109))* - [`c443d5c`](https://github.com/tobymao/sqlglot/commit/c443d5caf2d9695856103eebfff21cb215777112) - **dremio**: parse datetype *(PR [#5759](https://github.com/tobymao/sqlglot/pull/5759) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`5172a99`](https://github.com/tobymao/sqlglot/commit/5172a99fc4d5e21a1dbe4509d6d7ab1ccfe8bff7) - **singlestore**: Fixed parsing of columns with table name *(PR [#5767](https://github.com/tobymao/sqlglot/pull/5767) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`88862b5`](https://github.com/tobymao/sqlglot/commit/88862b56bc29c8a600b4d0e4693d5846d3a577ff) - **optimizer**: annotate type for bq TO_JSON_STRING *(PR [#5762](https://github.com/tobymao/sqlglot/pull/5762) by [@geooo109](https://github.com/geooo109))* - [`1c551d5`](https://github.com/tobymao/sqlglot/commit/1c551d5ed3315e314013c1f063deabd9d8613e5d) - **optimizer**: parse and annotate type for bq TO_JSON *(PR [#5768](https://github.com/tobymao/sqlglot/pull/5768) by [@geooo109](https://github.com/geooo109))* - [`a024d48`](https://github.com/tobymao/sqlglot/commit/a024d48fedd049796329050a1f51822dd1388695) - **singlestore**: Added generation of exp.TsOrDsDiff *(PR [#5769](https://github.com/tobymao/sqlglot/pull/5769) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`1707f2d`](https://github.com/tobymao/sqlglot/commit/1707f2d7f9d3b58e8c216db638f8e572f9fe6f13) - **optimizer**: annotate type for ABS *(PR [#5770](https://github.com/tobymao/sqlglot/pull/5770) by [@geooo109](https://github.com/geooo109))* - [`69acc51`](https://github.com/tobymao/sqlglot/commit/69acc5142b2d4f0b30832c350aa49f16d1adabef) - **optimizer**: annotate type for bq IS_INF, IS_NAN *(PR [#5771](https://github.com/tobymao/sqlglot/pull/5771) by [@geooo109](https://github.com/geooo109))* - [`0da2076`](https://github.com/tobymao/sqlglot/commit/0da207652331920416b29e2cc67bdc3c3f964466) - **optimizer**: annotate type for bq CBRT *(PR [#5772](https://github.com/tobymao/sqlglot/pull/5772) by [@geooo109](https://github.com/geooo109))* - [`a4968cb`](https://github.com/tobymao/sqlglot/commit/a4968cb5693670c1a2e9cd2c86404dd90fd76160) - **optimizer**: annotate type for bq RAND *(PR [#5774](https://github.com/tobymao/sqlglot/pull/5774) by [@geooo109](https://github.com/geooo109))* - [`dd7781a`](https://github.com/tobymao/sqlglot/commit/dd7781a15b842a5826714958ed7af9024903cd1e) - **singlestore**: Fixed generation of exp.Collate *(PR [#5775](https://github.com/tobymao/sqlglot/pull/5775) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`fb684cb`](https://github.com/tobymao/sqlglot/commit/fb684cbdb6178ddc441f598cc1a6e914291cd00e) - **singelstore**: Fixed generation of exp.RegexpILike *(PR [#5777](https://github.com/tobymao/sqlglot/pull/5777) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`3e63350`](https://github.com/tobymao/sqlglot/commit/3e63350bd1d58b510cecd1a573d27be3fd2565ce) - **optimizer**: parse and annotate type for bq ACOS *(PR [#5776](https://github.com/tobymao/sqlglot/pull/5776) by [@geooo109](https://github.com/geooo109))* - [`8705a78`](https://github.com/tobymao/sqlglot/commit/8705a787df034b4cecb4ba95e9599772c5561ba9) - **singlestore**: Fixed generation of exp.CastToStrType *(PR [#5778](https://github.com/tobymao/sqlglot/pull/5778) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e3c35ad`](https://github.com/tobymao/sqlglot/commit/e3c35ade797f46549cc803e1acd8816041713a10) - **singlestore**: Fixed generation of exp.UnicodeString *(PR [#5773](https://github.com/tobymao/sqlglot/pull/5773) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`2be9d01`](https://github.com/tobymao/sqlglot/commit/2be9d01830c778186dc274c94c6db0dd6c4116d1) - **optimizer**: parse and annotate type for bq ACOSH *(PR [#5779](https://github.com/tobymao/sqlglot/pull/5779) by [@geooo109](https://github.com/geooo109))* - [`7da2f31`](https://github.com/tobymao/sqlglot/commit/7da2f31d6613f16585e98c3fa1f592c617ae40c9) - **optimizer**: parse and annotate type for bq ASIN/H *(PR [#5783](https://github.com/tobymao/sqlglot/pull/5783) by [@geooo109](https://github.com/geooo109))* - [`341ea83`](https://github.com/tobymao/sqlglot/commit/341ea83a07c707fdbf565b8d9ef4b9b6341ed1d5) - **optimizer**: parse and annotate type for bq ATAN/H/2 *(PR [#5784](https://github.com/tobymao/sqlglot/pull/5784) by [@geooo109](https://github.com/geooo109))* - [`be54a45`](https://github.com/tobymao/sqlglot/commit/be54a458413ce3be6c321e5f4feb3e5df5ee6d08) - **singlestore**: Implemented generation of exp.Cbrt *(PR [#5782](https://github.com/tobymao/sqlglot/pull/5782) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`aa360cb`](https://github.com/tobymao/sqlglot/commit/aa360cb0e204aa056557ff8b15aa2d4f678430e6) - **databricks**: use regexp_like as it exists *(PR [#5781](https://github.com/tobymao/sqlglot/pull/5781) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`c2a1ad4`](https://github.com/tobymao/sqlglot/commit/c2a1ad4050771401a5b26bcadd90060e4527fbff) - **optimizer**: parse and annotate type for bq COT/H *(PR [#5786](https://github.com/tobymao/sqlglot/pull/5786) by [@geooo109](https://github.com/geooo109))* - [`316ae91`](https://github.com/tobymao/sqlglot/commit/316ae913d8b1a63f3071ebb1b826328108d74cef) - **singlestore**: Added handling of UTC_DATE and exp.CurrentDate *(PR [#5785](https://github.com/tobymao/sqlglot/pull/5785) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`2c6d237`](https://github.com/tobymao/sqlglot/commit/2c6d23742ea9fcc2b9c784315d3d5364e360fea5) - **optimizer**: parse and annotate type for bq CSC/H *(PR [#5787](https://github.com/tobymao/sqlglot/pull/5787) by [@geooo109](https://github.com/geooo109))* - [`8a35076`](https://github.com/tobymao/sqlglot/commit/8a350763c2337f6910a5f0e19af387ba488fcb70) - **optimizer**: parse and annotate type for bq SEC/H *(PR [#5788](https://github.com/tobymao/sqlglot/pull/5788) by [@geooo109](https://github.com/geooo109))* - [`566bfb2`](https://github.com/tobymao/sqlglot/commit/566bfb2a64a64b74da63b3a89d68caf702ab6522) - **singlestore**: Added support of UTC_TIME and CURRENT_TIME *(PR [#5789](https://github.com/tobymao/sqlglot/pull/5789) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`79901cb`](https://github.com/tobymao/sqlglot/commit/79901cb506737ae1932fa44a705858d2597ee587) - **optimizer**: parse and annotate type for bq SIN\H *(PR [#5790](https://github.com/tobymao/sqlglot/pull/5790) by [@geooo109](https://github.com/geooo109))* - [`74fb547`](https://github.com/tobymao/sqlglot/commit/74fb5476def1b389da425885db56bd6592fd7f78) - **optimizer**: parse and annotate type for bq RANGE_BUCKET *(PR [#5793](https://github.com/tobymao/sqlglot/pull/5793) by [@geooo109](https://github.com/geooo109))* - [`eca65e8`](https://github.com/tobymao/sqlglot/commit/eca65e8b79f65850b014a4cb7913ba4a5861dbe9) - **optimizer**: parse and annotate type for bq COSINE/EUCLIDEAN_DISTANCE *(PR [#5792](https://github.com/tobymao/sqlglot/pull/5792) by [@geooo109](https://github.com/geooo109))* - [`a180d3f`](https://github.com/tobymao/sqlglot/commit/a180d3f2f9f3938611027269028c03274aa1889c) - **optimizer**: parse and annotate type for bq SAFE math funcs *(PR [#5797](https://github.com/tobymao/sqlglot/pull/5797) by [@geooo109](https://github.com/geooo109))* - [`fc7ad7a`](https://github.com/tobymao/sqlglot/commit/fc7ad7a4d953424b56542eacfe1835f5789921c7) - **snowflake**: parse ALTER SESSION *(PR [#5734](https://github.com/tobymao/sqlglot/pull/5734) by [@tekumara](https://github.com/tekumara))* - [`8ec1a6c`](https://github.com/tobymao/sqlglot/commit/8ec1a6cf5a8edc2d834c713ce0fd8d87237f11ed) - **optimizer**: annotate type for bq STRING_AGG *(PR [#5798](https://github.com/tobymao/sqlglot/pull/5798) by [@geooo109](https://github.com/geooo109))* - [`dd97bfa`](https://github.com/tobymao/sqlglot/commit/dd97bfa1dc2f86b727c55b06b3c54b18c02e360d) - **optimizer**: annotate type for bq DATETIME_TRUNC *(PR [#5799](https://github.com/tobymao/sqlglot/pull/5799) by [@geooo109](https://github.com/geooo109))* - [`d3e9dda`](https://github.com/tobymao/sqlglot/commit/d3e9dda183695dd1e4a9832a6671bccc6db561a0) - **optimizer**: annotate type for bq GENERATE_UUID *(commit by [@geooo109](https://github.com/geooo109))* - [`cf1d1e3`](https://github.com/tobymao/sqlglot/commit/cf1d1e3e0ef9e6cd1b1c6128c63ddf06c30f1339) - **optimizer**: annotate type for snowflake's REVERSE function *(PR [#5803](https://github.com/tobymao/sqlglot/pull/5803) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`1d07c52`](https://github.com/tobymao/sqlglot/commit/1d07c52badb2e392e6895cbb275d2224789366c9) - **SingleStore**: Implemented generation of CURRENT_DATETIME *(PR [#5816](https://github.com/tobymao/sqlglot/pull/5816) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`cad4fd0`](https://github.com/tobymao/sqlglot/commit/cad4fd0c5b0ec90e693fa6883af0ab287b921019) - **singlestore**: Added handling of exp.JSONObject *(PR [#5817](https://github.com/tobymao/sqlglot/pull/5817) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e3cb076`](https://github.com/tobymao/sqlglot/commit/e3cb0766bd5c3ccb31ea52cfc76201f548798dc1) - **singlestore**: Implemented generation of exp.StandardHash *(PR [#5823](https://github.com/tobymao/sqlglot/pull/5823) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`0198282`](https://github.com/tobymao/sqlglot/commit/0198282a82bbf3e81476e164718d63fd1210acdc) - **optimizer**: : Update tests for concat string function *(PR [#5809](https://github.com/tobymao/sqlglot/pull/5809) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`4e8a436`](https://github.com/tobymao/sqlglot/commit/4e8a436c16f487a72bd1ac2432bcb1c46599d901) - **singlestore**: Added generation of exp.JSONExists *(PR [#5820](https://github.com/tobymao/sqlglot/pull/5820) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`82bea49`](https://github.com/tobymao/sqlglot/commit/82bea49978ae459492b5127a2a52049826e2fd06) - **singlestore**: Refactored parsing of JSON_BUILD_OBJECT *(PR [#5828](https://github.com/tobymao/sqlglot/pull/5828) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`f7d38c3`](https://github.com/tobymao/sqlglot/commit/f7d38c3a10c505346f04e39a2712d60b4c96370f) - **singlestore**: Implemented generation of exp.Stuff *(PR [#5825](https://github.com/tobymao/sqlglot/pull/5825) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`030a5b5`](https://github.com/tobymao/sqlglot/commit/030a5b5ea03ecee869b07cfd27f4ea044732822e) - **singlestore**: Added generation of exp.JSONBExists *(PR [#5821](https://github.com/tobymao/sqlglot/pull/5821) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e58fef1`](https://github.com/tobymao/sqlglot/commit/e58fef1d6dc654a3b36461bcbea21c99cdc96477) - **singlestore**: Implemented parsing and generation of exp.MatchAgainst *(PR [#5822](https://github.com/tobymao/sqlglot/pull/5822) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e94f530`](https://github.com/tobymao/sqlglot/commit/e94f530af0e0cdad995b4c8dc5ed86953490d37f) - **singlestore**: Added handling of exp.JSONArray *(PR [#5818](https://github.com/tobymao/sqlglot/pull/5818) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`1c42ef4`](https://github.com/tobymao/sqlglot/commit/1c42ef4374aeab8a1ee9848892d7f8c4511c7f04) - **singlestore**: Fixed parsing/generation of exp.JSONArrayAgg *(PR [#5819](https://github.com/tobymao/sqlglot/pull/5819) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`67219f0`](https://github.com/tobymao/sqlglot/commit/67219f0606231514f430e146e2fdb99e796f718b) - **singlestore**: Added support of UTC_TIMESTAMP and CURRENT_TIMESTAMP *(PR [#5808](https://github.com/tobymao/sqlglot/pull/5808) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`db2c430`](https://github.com/tobymao/sqlglot/commit/db2c4303237a1244070c359245c398a724df6de2) - **optimizer**: annoate the "contains" function *(PR [#5829](https://github.com/tobymao/sqlglot/pull/5829) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`a398fb4`](https://github.com/tobymao/sqlglot/commit/a398fb4df28c868f4cfc34530044b9d7b78e2e90) - **singlestore**: Splitted truncation of multiple tables into several queries *(PR [#5839](https://github.com/tobymao/sqlglot/pull/5839) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`cd27c96`](https://github.com/tobymao/sqlglot/commit/cd27c96fe85aba5f54116f38649edd8db064a5e6) - **snowflake**: transpile `TO_HEX` from bigquery *(PR [#5838](https://github.com/tobymao/sqlglot/pull/5838) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`d2e4ab7`](https://github.com/tobymao/sqlglot/commit/d2e4ab7df41ae3601e9b66e1338db3d851729339) - **snowflake**: add tests for endswith function *(PR [#5846](https://github.com/tobymao/sqlglot/pull/5846) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c50d6e3`](https://github.com/tobymao/sqlglot/commit/c50d6e3c7b96f00d27c34a02c8e0dced21e6c373) - **optimizer**: annotate type for snowflake LEFT, RIGHT and SUBSTRING functions *(PR [#5849](https://github.com/tobymao/sqlglot/pull/5849) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ca6c8f7`](https://github.com/tobymao/sqlglot/commit/ca6c8f753ba8458544439e20671f0981c98d168d) - **singlestore**: Improved parsting/generation of exp.Show *(PR [#5853](https://github.com/tobymao/sqlglot/pull/5853) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`722eceb`](https://github.com/tobymao/sqlglot/commit/722ecebfa43aa5948031edd1828b6482a241d9ef) - **snowflake**: MD5Digest transpiling to MD5_BINARY *(PR [#5855](https://github.com/tobymao/sqlglot/pull/5855) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`b128339`](https://github.com/tobymao/sqlglot/commit/b12833977e2a395712481cf11e293fdbd70fd4ce) - **optimizer**: annotate and add tests for snowflake LENGTH and LOWER functions *(PR [#5856](https://github.com/tobymao/sqlglot/pull/5856) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`134957a`](https://github.com/tobymao/sqlglot/commit/134957af11c55a4ab16f58d0725d6bb8ab23eb28) - **optimizer**: annotate types for Snowflake TRIM function *(PR [#5811](https://github.com/tobymao/sqlglot/pull/5811) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`0475dae`](https://github.com/tobymao/sqlglot/commit/0475dae21231b85407bf778fd9f1abaecdeb68de) - **singlestore**: Marked several exp.Describe args as unsupported *(PR [#5861](https://github.com/tobymao/sqlglot/pull/5861) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`7a07b41`](https://github.com/tobymao/sqlglot/commit/7a07b41b2357149adc6afb50bb98e37e6a3175f1) - **optimizer**: Add tests for snowflake LTRIM and RTRIM functions *(PR [#5857](https://github.com/tobymao/sqlglot/pull/5857) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fb90666`](https://github.com/tobymao/sqlglot/commit/fb90666ff3e710d70815a68defde3dc85aeef7b3) - **singlestore**: Added collate handling to exp.AlterColumn *(PR [#5864](https://github.com/tobymao/sqlglot/pull/5864) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`2f27692`](https://github.com/tobymao/sqlglot/commit/2f276929d6b6f788eb5b3ee0b1a8a8c108833474) - **snowflake**: JSONFormat transpiling to TO_JSON *(PR [#5860](https://github.com/tobymao/sqlglot/pull/5860) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`487c811`](https://github.com/tobymao/sqlglot/commit/487c8119cbfaf2783f5f17ec90c8e69e4432a4fa) - **singlestore**: Fixed parsing/generation of exp.RenameColumn *(PR [#5865](https://github.com/tobymao/sqlglot/pull/5865) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`76cf4d8`](https://github.com/tobymao/sqlglot/commit/76cf4d892a6d011a2e0020fb1ea82518d4f49e71) - **bigquery**: add support for ML.TRANSLATE func *(PR [#5859](https://github.com/tobymao/sqlglot/pull/5859) by [@geooo109](https://github.com/geooo109))* - [`a899eb1`](https://github.com/tobymao/sqlglot/commit/a899eb188d5e354d3ed56d1e7c32861eecf3e906) - **singlestore**: Fixed parsing and generation of VECTOR type *(PR [#5854](https://github.com/tobymao/sqlglot/pull/5854) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`0acf076`](https://github.com/tobymao/sqlglot/commit/0acf0769773061fca3ec03125a5d43a4aa9c8e4b) - **postgres**: Support `?|` JSONB operator *(PR [#5866](https://github.com/tobymao/sqlglot/pull/5866) by [@VaggelisD](https://github.com/VaggelisD))* - [`bd4b278`](https://github.com/tobymao/sqlglot/commit/bd4b2780c32ee52d25b6539d7b4479b6a7f80d18) - **optimizer**: annotate types for Snowflake UPPER function *(PR [#5812](https://github.com/tobymao/sqlglot/pull/5812) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`edab189`](https://github.com/tobymao/sqlglot/commit/edab1890e2c790b737be4995a31667448eff148e) - **postgres**: Support ?& JSONB operator *(PR [#5867](https://github.com/tobymao/sqlglot/pull/5867) by [@VaggelisD](https://github.com/VaggelisD))* - [`960ec06`](https://github.com/tobymao/sqlglot/commit/960ec069eb275b7b8cc6705dbbb1143159f06237) - **postgres**: Support #- JSONB operator *(PR [#5868](https://github.com/tobymao/sqlglot/pull/5868) by [@VaggelisD](https://github.com/VaggelisD))* - [`d3cd6bf`](https://github.com/tobymao/sqlglot/commit/d3cd6bf6e5fbaa490868ee3cd2cc99dd5e40a396) - **optimizer**: Annotate and add tests for snowflake REPLACE and SPACE functions *(PR [#5871](https://github.com/tobymao/sqlglot/pull/5871) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ba22531`](https://github.com/tobymao/sqlglot/commit/ba2253113ea5a7c76c8df7ec9b6faf37da698fa4) - **bigquery**: Add support for ML.FORECAST(...) *(PR [#5873](https://github.com/tobymao/sqlglot/pull/5873) by [@VaggelisD](https://github.com/VaggelisD))* - [`cd818ba`](https://github.com/tobymao/sqlglot/commit/cd818bad51e93ec349b97675e4c1f5bd7c4c1522) - **singlestore**: Fixed generation/parsing of computed collumns *(PR [#5878](https://github.com/tobymao/sqlglot/pull/5878) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`5d1f241`](https://github.com/tobymao/sqlglot/commit/5d1f241209197419111e9eda37fb6f2a5ec2bc4b) - **tsql**: support JSON_ARRAYAGG *(PR [#5879](https://github.com/tobymao/sqlglot/pull/5879) by [@geooo109](https://github.com/geooo109))* - [`96ae7a3`](https://github.com/tobymao/sqlglot/commit/96ae7a3bcbf9de1932150baa0bd704d4ce05c9f7) - **optimizer**: Annotate and add tests for snowflake REPEAT and SPLIT functions *(PR [#5875](https://github.com/tobymao/sqlglot/pull/5875) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`0fe6a25`](https://github.com/tobymao/sqlglot/commit/0fe6a25e366dcbc5a4a0878b285d147a6aa00412) - **postgres**: support JSON_AGG *(PR [#5880](https://github.com/tobymao/sqlglot/pull/5880) by [@geooo109](https://github.com/geooo109))* - [`854eeeb`](https://github.com/tobymao/sqlglot/commit/854eeeb5b25954cc26b91135d58eb8370271f1de) - **optimizer**: annotate types for Snowflake REGEXP_LIKE, REGEXP_REPLACE, REGEXP_SUBSTR functions *(PR [#5876](https://github.com/tobymao/sqlglot/pull/5876) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f2d3bf7`](https://github.com/tobymao/sqlglot/commit/f2d3bf74e804e5a5e2ac6ca94210ba04df07e7f3) - **optimizer**: annotate types for Snowflake UUID_STRING function *(PR [#5881](https://github.com/tobymao/sqlglot/pull/5881) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`5b9463a`](https://github.com/tobymao/sqlglot/commit/5b9463ad11a49c821585985c35394ebb30e827dd) - **mysql**: add support for binary `MOD` operator fixes [#5887](https://github.com/tobymao/sqlglot/pull/5887) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d24eabc`](https://github.com/tobymao/sqlglot/commit/d24eabcbe30dc0f7c2dbae346e429efef58b5680) - **bigquery**: Add support for ML.GENERATE_TEXT_EMBEDDING(...) *(PR [#5891](https://github.com/tobymao/sqlglot/pull/5891) by [@VaggelisD](https://github.com/VaggelisD))* - [`950a3fa`](https://github.com/tobymao/sqlglot/commit/950a3fa6d6307f7713f40117655da2f9710ebfa9) - **mysql**: SOUNDS LIKE, SUBSTR *(PR [#5886](https://github.com/tobymao/sqlglot/pull/5886) by [@vuvova](https://github.com/vuvova))* - [`688afc5`](https://github.com/tobymao/sqlglot/commit/688afc55ab08588636eba92893c603ca68e43e6e) - **singlestore**: Fixed generation of exp.National *(PR [#5890](https://github.com/tobymao/sqlglot/pull/5890) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`c77147e`](https://github.com/tobymao/sqlglot/commit/c77147ebaafa6942f80af75dd6c2d7a62a7e6fe2) - **parser**: Extend support for `IS UNKOWN` across all dialects *(PR [#5888](https://github.com/tobymao/sqlglot/pull/5888) by [@VaggelisD](https://github.com/VaggelisD))* - [`ec80ff3`](https://github.com/tobymao/sqlglot/commit/ec80ff34957c3e3f80c44175383b06cf72988a68) - make dump a list instead of a nested dict to avoid all recursion errors *(PR [#5885](https://github.com/tobymao/sqlglot/pull/5885) by [@tobymao](https://github.com/tobymao))* - [`2fdaccd`](https://github.com/tobymao/sqlglot/commit/2fdaccd1a9045bda3d529025a4706c397b8a836f) - **optimizer**: annotate types for Snowflake SHA1, SHA2 functions *(PR [#5884](https://github.com/tobymao/sqlglot/pull/5884) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`faba309`](https://github.com/tobymao/sqlglot/commit/faba30905390e5efaf0ba9a05aab9ac2724b1b85) - **optimizer**: annotate types for Snowflake AI_AGG function *(PR [#5894](https://github.com/tobymao/sqlglot/pull/5894) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`dd27844`](https://github.com/tobymao/sqlglot/commit/dd2784435c7bdd2ceaaaaa359fcd112ad1f8190c) - **snowflake**: transpile `BYTE_LENGTH` *(PR [#5899](https://github.com/tobymao/sqlglot/pull/5899) by [@ozadari](https://github.com/ozadari))* - [`304bec5`](https://github.com/tobymao/sqlglot/commit/304bec5f7342501ad28ea4cd0a4b9aa092f2192f) - **optimizer**: Annotate snowflake MD5 functions *(PR [#5883](https://github.com/tobymao/sqlglot/pull/5883) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ec3006d`](https://github.com/tobymao/sqlglot/commit/ec3006d815951fdc1a80d6722ce6f1176417d595) - **optimizer**: Add tests for snowflake NOT ILIKE and NOT LIKE *(PR [#5901](https://github.com/tobymao/sqlglot/pull/5901) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c0180ec`](https://github.com/tobymao/sqlglot/commit/c0180ec163a43836fed754efcb6f26ad37cdae50) - **optimizer**: annotate types for Snowflake AI_SUMMARIZE_AGG function *(PR [#5902](https://github.com/tobymao/sqlglot/pull/5902) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`1ee026d`](https://github.com/tobymao/sqlglot/commit/1ee026d22d4f6c3613c1809a6738cdea846c48a9) - **postgres**: support `SUBSTRING(value FOR length FROM start)` variant *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d5cf114`](https://github.com/tobymao/sqlglot/commit/d5cf1149932850a91cb5f1ebecda2652616729ef) - **duckdb**: support INSTALL *(PR [#5904](https://github.com/tobymao/sqlglot/pull/5904) by [@geooo109](https://github.com/geooo109))* - [`73e05bb`](https://github.com/tobymao/sqlglot/commit/73e05bb15bb86e4a07cc09bf02028a6cf7fa1e6f) - **snowflake**: properly generate `BITNOT` *(PR [#5906](https://github.com/tobymao/sqlglot/pull/5906) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`16f317c`](https://github.com/tobymao/sqlglot/commit/16f317c04f7c0a398c38b461e05f4d4c30baf98b) - **snowflake**: add support for `!` syntax *(PR [#5907](https://github.com/tobymao/sqlglot/pull/5907) by [@georgesittas](https://github.com/georgesittas))* - [`5a973e9`](https://github.com/tobymao/sqlglot/commit/5a973e9a88fa7f522a9bf91dc60fb0f6effef53d) - **optimizer**: annotate types for Snowflake AI_CLASSIFY function *(PR [#5909](https://github.com/tobymao/sqlglot/pull/5909) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f4ad258`](https://github.com/tobymao/sqlglot/commit/f4ad25882951de4e4442dfd5189a56d5a1c5e630) - **optimizer**: Annotate types for Snowflake BASE64_DECODE_BINARY function *(PR [#5917](https://github.com/tobymao/sqlglot/pull/5917) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`6d0e3f8`](https://github.com/tobymao/sqlglot/commit/6d0e3f8dcae7ed1a7659ece69b1f94cec5e7300e) - **optimizer**: Add parser support to ilike like function versions. *(PR [#5915](https://github.com/tobymao/sqlglot/pull/5915) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fc5624e`](https://github.com/tobymao/sqlglot/commit/fc5624eca43d2855ac350c92d85b184a6893d5ca) - **optimizer**: annotate types for Snowflake ASCII function *(PR [#5926](https://github.com/tobymao/sqlglot/pull/5926) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`4e81690`](https://github.com/tobymao/sqlglot/commit/4e8169045edcaa28ae43abeb07370df63846fbfd) - **optimizer**: annotate type for Snowflake COLLATE function *(PR [#5931](https://github.com/tobymao/sqlglot/pull/5931) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f07d35d`](https://github.com/tobymao/sqlglot/commit/f07d35d29104c6203efaab738118d1903614b83c) - **optimizer**: annotate type for Snowflake CHR function *(PR [#5929](https://github.com/tobymao/sqlglot/pull/5929) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f8c0ee4`](https://github.com/tobymao/sqlglot/commit/f8c0ee4d3c1a4d4a92b897d1cc85f9904c8e566b) - **optimizer**: Add function and annotate snowflake hex decode string and binary functions *(PR [#5928](https://github.com/tobymao/sqlglot/pull/5928) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`66f9501`](https://github.com/tobymao/sqlglot/commit/66f9501d76d087798bad93e578273ab2a45e2575) - **optimizer**: annotate types for Snowflake BIT_LENGTH function *(PR [#5927](https://github.com/tobymao/sqlglot/pull/5927) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f4c810e`](https://github.com/tobymao/sqlglot/commit/f4c810e043d9379e94efb185e368e27ad9c15715) - transpile Trino `FORMAT` to DuckDB and Snowflake, closes [#5933](https://github.com/tobymao/sqlglot/pull/5933) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7878437`](https://github.com/tobymao/sqlglot/commit/78784370712df65a2e1e79a1c2b441131ed7222a) - **optimizer**: annotate snowflake's `BASE64_DECODE_STRING`, `BASE64_ENCODE` *(PR [#5922](https://github.com/tobymao/sqlglot/pull/5922) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`9bcad04`](https://github.com/tobymao/sqlglot/commit/9bcad040bd51dd03821c68eea1a73534fc7a81b7) - **optimizer**: Annotate type for HEX ENCODE function. *(PR [#5936](https://github.com/tobymao/sqlglot/pull/5936) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`590928f`](https://github.com/tobymao/sqlglot/commit/590928f4637306e8cf3f1302d5dd5d5dbc76e7e0) - **optimizer**: annotate type for Snowflake INITCAP function *(PR [#5941](https://github.com/tobymao/sqlglot/pull/5941) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`ac04de1`](https://github.com/tobymao/sqlglot/commit/ac04de1944c7a976406581b489b3cf9b11dafb77) - **optimizer**: annotate type for Snowflake EDITDISTANCE function *(PR [#5940](https://github.com/tobymao/sqlglot/pull/5940) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`a385990`](https://github.com/tobymao/sqlglot/commit/a38599080932a8b54a169df8b7a69650cb47b6bc) - **parser**: support wrapped aggregate functions *(PR [#5943](https://github.com/tobymao/sqlglot/pull/5943) by [@geooo109](https://github.com/geooo109))* - [`9e28af8`](https://github.com/tobymao/sqlglot/commit/9e28af8a52ced951ecf7f4e85a6305e20a13de1f) - **optimizer**: Annotate type for snowflake COMPRESS function *(PR [#5938](https://github.com/tobymao/sqlglot/pull/5938) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`7f13eaf`](https://github.com/tobymao/sqlglot/commit/7f13eaf7769a3381a56c9209af590835be2f95cd) - **optimizer**: Annotate type for snowflake DECOMPRESS_BINARY function *(PR [#5945](https://github.com/tobymao/sqlglot/pull/5945) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`be12b29`](https://github.com/tobymao/sqlglot/commit/be12b29b5a7bd6d6e09dbd8c17086bd77c19abc0) - **optimizer**: Annotate type for snowflake DECOMPRESS_STRING function *(PR [#5947](https://github.com/tobymao/sqlglot/pull/5947) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`a55fce5`](https://github.com/tobymao/sqlglot/commit/a55fce5310a50af132c5d06bb299fe3f025442c4) - **optimizer**: Annotate type for snowflake LPAD function *(PR [#5948](https://github.com/tobymao/sqlglot/pull/5948) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`05e07aa`](https://github.com/tobymao/sqlglot/commit/05e07aa740d7977a6b42ec15ae4fa9c2168a15f5) - **optimizer**: annotate type for Snowflake INSERT function *(PR [#5942](https://github.com/tobymao/sqlglot/pull/5942) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`6268e10`](https://github.com/tobymao/sqlglot/commit/6268e107a947badaa00508544f5389412806ecd0) - **solr**: initial dialect implementation *(PR [#5946](https://github.com/tobymao/sqlglot/pull/5946) by [@aadel](https://github.com/aadel))* - [`1573fef`](https://github.com/tobymao/sqlglot/commit/1573fefac27b5b1215e3d458f8ccf1b9dadbb772) - **optimizer**: annotate types for Snowflake JAROWINKLER_SIMILARITY function *(PR [#5950](https://github.com/tobymao/sqlglot/pull/5950) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`883c6ab`](https://github.com/tobymao/sqlglot/commit/883c6abe589865f478d95604e8d670e57afd04af) - **optimizer**: annotate type for Snowflake COLLATION function *(PR [#5939](https://github.com/tobymao/sqlglot/pull/5939) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`627c18d`](https://github.com/tobymao/sqlglot/commit/627c18d7da6bf644bc14c0f17963dea0be20604a) - **mysql**: add valid INTERVAL units *(PR [#5951](https://github.com/tobymao/sqlglot/pull/5951) by [@geooo109](https://github.com/geooo109))* - [`88e4e4c`](https://github.com/tobymao/sqlglot/commit/88e4e4c55f3a113127eb3c82c0be46c29bcf15ab) - **optimizer**: Annotate type for OCTET_LENGTH function *(PR [#5960](https://github.com/tobymao/sqlglot/pull/5960) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`68473ac`](https://github.com/tobymao/sqlglot/commit/68473ac3ec8dc76512dc76819892a1b0324c7ddc) - **optimizer**: Annotate type for snowflake PARSE_URL function *(PR [#5962](https://github.com/tobymao/sqlglot/pull/5962) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`b015a9d`](https://github.com/tobymao/sqlglot/commit/b015a9d944d0a87069a7750ad74953c399d7da34) - **optimizer**: annotate type for Snowflake REGEXP_INSTR function *(commit by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`1f29ba7`](https://github.com/tobymao/sqlglot/commit/1f29ba710f4213beb1a2f993244d7d824f3536ce) - **optimizer**: annotate type for Snowflake PARSE_IP function *(PR [#5961](https://github.com/tobymao/sqlglot/pull/5961) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`bf45d5d`](https://github.com/tobymao/sqlglot/commit/bf45d5d3cb0c0f380824019eb32ec29049268a61) - **optimizer**: annotate types for Snowflake RTRIMMED_LENGTH function *(PR [#5968](https://github.com/tobymao/sqlglot/pull/5968) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`13caa69`](https://github.com/tobymao/sqlglot/commit/13caa6991f003ad7abb590073451e591b6fd888c) - **optimizer**: Annotate type for snowflake POSITION function *(PR [#5964](https://github.com/tobymao/sqlglot/pull/5964) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1471306`](https://github.com/tobymao/sqlglot/commit/1471306ed317830c294e3654075f55424d14bf5a) - support parse into grant principal and privilege *(PR [#5971](https://github.com/tobymao/sqlglot/pull/5971) by [@eakmanrq](https://github.com/eakmanrq))* - [`13a30df`](https://github.com/tobymao/sqlglot/commit/13a30dfa37096df5bfc2c31538325c40a49f7917) - **optimizer**: Annotate type for snowflake TRY_BASE64_DECODE_BINARY function *(PR [#5972](https://github.com/tobymao/sqlglot/pull/5972) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1f5fdd7`](https://github.com/tobymao/sqlglot/commit/1f5fdd799c047de167a4572f7ac26b7ad92167f2) - **optimizer**: Annotate type for snowflake TRY_BASE64_DECODE_STRING function *(PR [#5974](https://github.com/tobymao/sqlglot/pull/5974) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`324e82f`](https://github.com/tobymao/sqlglot/commit/324e82fe1fb11722f91341010602a743b151e055) - **optimizer**: Annotate type for snowflake TRY_HEX_DECODE_BINARY function *(PR [#5975](https://github.com/tobymao/sqlglot/pull/5975) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`6caf99d`](https://github.com/tobymao/sqlglot/commit/6caf99d556a3357ffaa6c294a9babcd30dd5fac5) - **optimizer**: Annotate type for snowflake TRY_HEX_DECODE_STRING function *(PR [#5976](https://github.com/tobymao/sqlglot/pull/5976) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`73186a8`](https://github.com/tobymao/sqlglot/commit/73186a812ce422c108ee81b3de11da6ee9a9e902) - **optimizer**: annotate type for Snowflake REGEXP_COUNT function *(PR [#5963](https://github.com/tobymao/sqlglot/pull/5963) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`6124de7`](https://github.com/tobymao/sqlglot/commit/6124de76fa6d6725e844cd37e09ebfe99469b0ec) - **optimizer**: Annotate type for snowflake SOUNDEX function *(PR [#5986](https://github.com/tobymao/sqlglot/pull/5986) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`244fb48`](https://github.com/tobymao/sqlglot/commit/244fb48fc9c4776f427c08b825d139b1c172fd26) - **optimizer**: annotate type for Snowflake SPLIT_PART function *(PR [#5988](https://github.com/tobymao/sqlglot/pull/5988) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`0d772e0`](https://github.com/tobymao/sqlglot/commit/0d772e0b9d687b24d49203c05d7a90cc1dce02d5) - **snowflake**: add ast node for `DIRECTORY` source *(PR [#5990](https://github.com/tobymao/sqlglot/pull/5990) by [@georgesittas](https://github.com/georgesittas))* - [`3c7b5c0`](https://github.com/tobymao/sqlglot/commit/3c7b5c0e2dc071b7b9f6da308ba58a3a43da93dc) - **optimizer**: Annotate type for snowflake SOUNDEX_P123 function *(PR [#5987](https://github.com/tobymao/sqlglot/pull/5987) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`475c09b`](https://github.com/tobymao/sqlglot/commit/475c09bd27179db4d186638645698dd4ad6553cd) - **optimizer**: Annotate type for snowflake TRANSLATE function *(PR [#5992](https://github.com/tobymao/sqlglot/pull/5992) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`f25e42e`](https://github.com/tobymao/sqlglot/commit/f25e42e3f5b3b7b671bd724ba7b09a9b07d13995) - **optimizer**: annotate type for Snowflake REGEXP_INSTR function *(PR [#5978](https://github.com/tobymao/sqlglot/pull/5978) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`13cb26e`](https://github.com/tobymao/sqlglot/commit/13cb26e2f29373538d60a8124ddebf95fd22a8d8) - **optimizer**: annotate type for Snowflake REGEXP_SUBSTR_ALL function *(PR [#5979](https://github.com/tobymao/sqlglot/pull/5979) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`4ce683e`](https://github.com/tobymao/sqlglot/commit/4ce683eb8ac5716a334cbd7625438b9f89623c7a) - **optimizer**: Annotate type for snowflake UNICODE function *(PR [#5993](https://github.com/tobymao/sqlglot/pull/5993) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`587196c`](https://github.com/tobymao/sqlglot/commit/587196c9c2d122f73f9deb7e87c2831f27f6ed02) - **optimizer**: Annotate type for snowflake STRTOK_TO_ARRAY function *(PR [#5994](https://github.com/tobymao/sqlglot/pull/5994) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`bced710`](https://github.com/tobymao/sqlglot/commit/bced71084ffb3a8f7a11db843777f05b68f367da) - **optimizer**: Annotate type for snowflake STRTOK function. *(PR [#5991](https://github.com/tobymao/sqlglot/pull/5991) by [@georgesittas](https://github.com/georgesittas))* - [`74a13f2`](https://github.com/tobymao/sqlglot/commit/74a13f2a548b9cd41061e835cb3cd9dd2a5a9fb3) - **optimizer**: Annotate type for snowflake DIV0 and DIVNULL functions *(PR [#6008](https://github.com/tobymao/sqlglot/pull/6008) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fec2b31`](https://github.com/tobymao/sqlglot/commit/fec2b31956f2debdad7c53744a577894cd8d747c) - **optimizer**: Annotate type for snowflake SEARCH function *(PR [#5985](https://github.com/tobymao/sqlglot/pull/5985) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`27a76cd`](https://github.com/tobymao/sqlglot/commit/27a76cdfe4212f16f945521eb3997580eacf1d61) - **optimizer**: Annotate type for snowflake COT, SIN and TAN functions *(PR [#6022](https://github.com/tobymao/sqlglot/pull/6022) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8b48f7b`](https://github.com/tobymao/sqlglot/commit/8b48f7b985342cfcc45bc2b94540a1a2bf5995c4) - **optimizer**: Annotate type for snowflake SIGN and ABS functions *(PR [#6025](https://github.com/tobymao/sqlglot/pull/6025) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`0911276`](https://github.com/tobymao/sqlglot/commit/091127663ab4cb94b02be5aa40c6a46dd7f89243) - **optimizer**: annotate type for Snowflake EXP function *(PR [#6007](https://github.com/tobymao/sqlglot/pull/6007) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`a96d50e`](https://github.com/tobymao/sqlglot/commit/a96d50e14bed5e87ff2dce9c545e0c48897b64d6) - **optimizer**: annotate type for Snowflake COSH function *(PR [#6006](https://github.com/tobymao/sqlglot/pull/6006) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`4df58e0`](https://github.com/tobymao/sqlglot/commit/4df58e0f0b8985590fb29a8ab6ba0ced987ac5b9) - **optimizer**: annotate type for Snowflake DEGREES function *(PR [#6027](https://github.com/tobymao/sqlglot/pull/6027) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`db71a20`](https://github.com/tobymao/sqlglot/commit/db71a2023aaeca2ffda782ae7b91fdee356c402e) - **optimizer**: annotate type for Snowflake COS function *(PR [#6028](https://github.com/tobymao/sqlglot/pull/6028) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`6beb917`](https://github.com/tobymao/sqlglot/commit/6beb9172dffd0aaea46b75477485060737e774b9) - **optimizer**: Annotate type for snowflake ROUND function *(PR [#6032](https://github.com/tobymao/sqlglot/pull/6032) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8e03ad9`](https://github.com/tobymao/sqlglot/commit/8e03ad9dd087ebc72bf58cb6383607c0ce2e8f8f) - **optimizer**: Annotate type for snowflake MOD function *(PR [#6031](https://github.com/tobymao/sqlglot/pull/6031) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`15b3fac`](https://github.com/tobymao/sqlglot/commit/15b3fac3dd5efd4c347ac40055f07a9be5906802) - **mysql**: support `FOR ORDINALITY` clause in `COLUMN` expression *(PR [#6046](https://github.com/tobymao/sqlglot/pull/6046) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6039](https://github.com/tobymao/sqlglot/issues/6039) opened by [@jdddog](https://github.com/jdddog)* - [`56c8b3b`](https://github.com/tobymao/sqlglot/commit/56c8b3bbff7451b9049e1a168716bb41222a86ed) - **hive,spark**: Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark *(PR [#6004](https://github.com/tobymao/sqlglot/pull/6004) by [@tsamaras](https://github.com/tsamaras))* - [`6f31b86`](https://github.com/tobymao/sqlglot/commit/6f31b86599258afe156aa3d9ccc42389cac37021) - **optimizer**: Annotate type for snowflake FLOOR function *(PR [#6030](https://github.com/tobymao/sqlglot/pull/6030) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`b7463d5`](https://github.com/tobymao/sqlglot/commit/b7463d5b0a1e286498d7ccfd9a07ef7edfa80bb2) - **optimizer**: Annotate type for snowflake ASIN function. *(PR [#6049](https://github.com/tobymao/sqlglot/pull/6049) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fe959a5`](https://github.com/tobymao/sqlglot/commit/fe959a5598508526ed5910a4c62372116b5d3c30) - **optimizer**: Annotate type for snowflake CBRT function *(PR [#6050](https://github.com/tobymao/sqlglot/pull/6050) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`cecab2f`](https://github.com/tobymao/sqlglot/commit/cecab2fd66d578ddc765b5fd0e7b155971280a0c) - **optimizer**: annotate type for Snowflake ATANH function *(PR [#6054](https://github.com/tobymao/sqlglot/pull/6054) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`08339a9`](https://github.com/tobymao/sqlglot/commit/08339a902138211f67cfb009d2576b22ea8d8e42) - **optimizer**: annotate type for Snowflake FACTORIAL function *(PR [#6053](https://github.com/tobymao/sqlglot/pull/6053) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`9060f60`](https://github.com/tobymao/sqlglot/commit/9060f603818db863b7570a2c3c50c3eb88155e76) - **optimizer**: Annotate type for snowflake ATAN2 function. *(PR [#6060](https://github.com/tobymao/sqlglot/pull/6060) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`b3eb2e4`](https://github.com/tobymao/sqlglot/commit/b3eb2e4ca6177ee61b27675e8ec8b4815587df31) - **optimizer**: annotate type for Snowflake SINH function *(PR [#6052](https://github.com/tobymao/sqlglot/pull/6052) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`440b960`](https://github.com/tobymao/sqlglot/commit/440b960529801674fa23708212485fda95749699) - **duckdb**: support `USING KEY (...)` in recursive DuckDB CTEs *(PR [#6068](https://github.com/tobymao/sqlglot/pull/6068) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6066](https://github.com/tobymao/sqlglot/issues/6066) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`157d2fa`](https://github.com/tobymao/sqlglot/commit/157d2fa06ab110ebc760aa7567d7fda801a5ced9) - **optimizer**: annotate type for Snowflake CEIL function *(PR [#6051](https://github.com/tobymao/sqlglot/pull/6051) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`eb6d6e7`](https://github.com/tobymao/sqlglot/commit/eb6d6e7ccde37456ab56ad976e7d95cea23c14e3) - **duckdb**: support `DEFAULT VALUES` clause in `INSERT` DML *(PR [#6067](https://github.com/tobymao/sqlglot/pull/6067) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6065](https://github.com/tobymao/sqlglot/issues/6065) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`354140d`](https://github.com/tobymao/sqlglot/commit/354140d0a279f317439bdb247e1ab9578f9a035d) - **optimizer**: Annotate type for snowflake TANH and ATAN functions *(PR [#6069](https://github.com/tobymao/sqlglot/pull/6069) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c94e3e0`](https://github.com/tobymao/sqlglot/commit/c94e3e0e4e20bd76d4cf630123d2c05a0e3044c3) - add ColumnDef expression parser *(PR [#6075](https://github.com/tobymao/sqlglot/pull/6075) by [@geooo109](https://github.com/geooo109))* - [`c67276d`](https://github.com/tobymao/sqlglot/commit/c67276d5be970252e14d1817d8498fc9985222d9) - **optimizer**: Annotate type for snowflake RADIANS function. *(PR [#6064](https://github.com/tobymao/sqlglot/pull/6064) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c49ba0e`](https://github.com/tobymao/sqlglot/commit/c49ba0eee21f7776703d2a26c6641b4a32a1cff7) - **optimizer**: Annotate type for snowflake WIDTH_BUCKET function *(PR [#6078](https://github.com/tobymao/sqlglot/pull/6078) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fbc1f13`](https://github.com/tobymao/sqlglot/commit/fbc1f1335eecaaaab4fc93ddbb74611a4df0aea7) - **optimizer**: annotate type for Snowflake CONVERT_TIMEZONE function *(PR [#6076](https://github.com/tobymao/sqlglot/pull/6076) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`70e977c`](https://github.com/tobymao/sqlglot/commit/70e977c5edfb495529d38a9096cb40762a9b5d7b) - **optimizer**: annotate type for Snowflake DATE_TRUNC function *(PR [#6080](https://github.com/tobymao/sqlglot/pull/6080) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`e9cf146`](https://github.com/tobymao/sqlglot/commit/e9cf146a4a6cd78f6a59c195e7ec12240b836e5e) - **optimizer**: annotate type for Snowflake DATE_PART function *(PR [#6079](https://github.com/tobymao/sqlglot/pull/6079) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`cdf3b1b`](https://github.com/tobymao/sqlglot/commit/cdf3b1b34dc044064d0a5ba7ff22723b8ae33e5d) - **optimizer**: Annotate type for snowflake add_months function *(PR [#6097](https://github.com/tobymao/sqlglot/pull/6097) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`6fe5824`](https://github.com/tobymao/sqlglot/commit/6fe58247888c326093618657fb027e482d82d107) - **optimizer**: Annotate type for hour, minute, second functions *(PR [#6100](https://github.com/tobymao/sqlglot/pull/6100) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`483770b`](https://github.com/tobymao/sqlglot/commit/483770b816fab14b7eb7222974ed2c99045302a7) - **optimizer**: Annotate type for snowflake TIME_SLICE function *(PR [#6098](https://github.com/tobymao/sqlglot/pull/6098) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`071a995`](https://github.com/tobymao/sqlglot/commit/071a9954aad220c1e13ba7a6714a083058a8e03f) - **tsql**: add support for iso_week on DATEPART *(PR [#6111](https://github.com/tobymao/sqlglot/pull/6111) by [@lBilali](https://github.com/lBilali))* - :arrow_lower_right: *addresses issue [#6110](https://github.com/tobymao/sqlglot/issues/6110) opened by [@lBilali](https://github.com/lBilali)* - [`c286cee`](https://github.com/tobymao/sqlglot/commit/c286cee54ab93e1fd0b3be658f7e767e3e00afe9) - **optimizer**: Annotate type for snowflake MONTHNAME function *(PR [#6116](https://github.com/tobymao/sqlglot/pull/6116) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1a34788`](https://github.com/tobymao/sqlglot/commit/1a34788025bdd8a018c4bb9214f72152e68bdd14) - **optimizer**: Annotate type for snowflake PREVIOUS_DAY function *(PR [#6117](https://github.com/tobymao/sqlglot/pull/6117) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`533faf8`](https://github.com/tobymao/sqlglot/commit/533faf87b6df351070b565dd1fe9ce4e13b6c46e) - **spark**: transpile duckdb `READ_PARQUET` to `parquet.` closes [#6122](https://github.com/tobymao/sqlglot/pull/6122) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`cd4e557`](https://github.com/tobymao/sqlglot/commit/cd4e557658b1384f36c9a1ef9da5a09b893229b1) - **optimizer**: Annotate type for snowflake RANDOM function *(PR [#6124](https://github.com/tobymao/sqlglot/pull/6124) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`fe63d84`](https://github.com/tobymao/sqlglot/commit/fe63d84f1bd365b22221f348d79c0546aa3118b0) - **optimizer**: annotate type for Snowflake MONTHS_BETWEEN function *(PR [#6120](https://github.com/tobymao/sqlglot/pull/6120) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`7cb7598`](https://github.com/tobymao/sqlglot/commit/7cb7598e13260aa45c851dc620b4994ddfa089fe) - **optimizer**: Annotate type for snowflake TIME_FROM_PARTS function *(PR [#6119](https://github.com/tobymao/sqlglot/pull/6119) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`598d09b`](https://github.com/tobymao/sqlglot/commit/598d09b036d938c90a44955d67175ea868090ba2) - **optimizer**: annotate type for Snowflake DATEADD function *(PR [#6089](https://github.com/tobymao/sqlglot/pull/6089) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`b98bcee`](https://github.com/tobymao/sqlglot/commit/b98bcee148ba426816e166dbfa9ba8e0979aae21) - **optimizer**: Annotate type for snowflake next_day function *(PR [#6125](https://github.com/tobymao/sqlglot/pull/6125) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`fe1927f`](https://github.com/tobymao/sqlglot/commit/fe1927f28600e2d8863a4e7f06e6a21bf6ff7f9c) - **duckdb**: Transpile unix_micros to epoch_us *(PR [#6127](https://github.com/tobymao/sqlglot/pull/6127) by [@vchan](https://github.com/vchan))* - [`a531f10`](https://github.com/tobymao/sqlglot/commit/a531f107235c29ac6a7e627a323f00b8ecf7023d) - **duckdb**: transpile TimeSub *(PR [#6142](https://github.com/tobymao/sqlglot/pull/6142) by [@toriwei](https://github.com/toriwei))* - [`b1a9dff`](https://github.com/tobymao/sqlglot/commit/b1a9dfff52a0ffbb0b7c8bfedb0a90e245b97851) - make qualify faster by owly resetting scope when needed *(PR [#6081](https://github.com/tobymao/sqlglot/pull/6081) by [@tobymao](https://github.com/tobymao))* - [`3a334f3`](https://github.com/tobymao/sqlglot/commit/3a334f376b9766b6b99fdf195ae763bb44976ec4) - **optimizer**: annotate type for boolnot snowflake function *(PR [#6141](https://github.com/tobymao/sqlglot/pull/6141) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`4e36f9d`](https://github.com/tobymao/sqlglot/commit/4e36f9dd6a854b378c9bbf6b2e9811045affc63d) - **optimizer**: Annotate type for snowflake TIMEADD function *(PR [#6134](https://github.com/tobymao/sqlglot/pull/6134) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`5242cdd`](https://github.com/tobymao/sqlglot/commit/5242cddf487e367e7f543ca19d9bccae858f36ac) - **optimizer**: annotate type for bq LENGTH *(commit by [@geooo109](https://github.com/geooo109))* - [`0fc6dbf`](https://github.com/tobymao/sqlglot/commit/0fc6dbf2e7b611fa0977e3c3e61be1cc84bcf4a9) - **snowflake**: add GREATEST_IGNORE_NULLS function support *(PR [#6161](https://github.com/tobymao/sqlglot/pull/6161) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`54ecadc`](https://github.com/tobymao/sqlglot/commit/54ecadc57b8f1e87fd2a2ba35a5366d75231ea85) - **duckdb**: support `KV_METADATA` in `COPY` statement closes [#6165](https://github.com/tobymao/sqlglot/pull/6165) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e241964`](https://github.com/tobymao/sqlglot/commit/e2419642a4966a4da194147aa488793eae152af4) - **duckdb**: support `USING` condition for `MERGE` closes [#6162](https://github.com/tobymao/sqlglot/pull/6162) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bcf6c89`](https://github.com/tobymao/sqlglot/commit/bcf6c89a47abd3c2c4383d1c908f892b6619b6fa) - **optimizer**: add type annotation tests for snowflake BOOLAND *(PR [#6153](https://github.com/tobymao/sqlglot/pull/6153) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`52d1eec`](https://github.com/tobymao/sqlglot/commit/52d1eecaad505703e8b22dcfe8954652f57985b6) - **optimizer**: Annotate type for snowflake TIMESTAMP_FROM_PARTS function *(PR [#6139](https://github.com/tobymao/sqlglot/pull/6139) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8651fe6`](https://github.com/tobymao/sqlglot/commit/8651fe6526dea865c0d54d6d53086359a7835d32) - **optimizer**: annotate types for BOOLOR *(PR [#6159](https://github.com/tobymao/sqlglot/pull/6159) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`812ba9a`](https://github.com/tobymao/sqlglot/commit/812ba9abad8247df81c8f8b514336c8766292112) - **optimizer**: Annotate type for snowflake date parts functions *(PR [#6158](https://github.com/tobymao/sqlglot/pull/6158) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`9f8c123`](https://github.com/tobymao/sqlglot/commit/9f8c123ae44249e274334d0aa551ac33814f2b32) - make qualify table callback more generic *(PR [#6171](https://github.com/tobymao/sqlglot/pull/6171) by [@tobymao](https://github.com/tobymao))* - [`74b4e7c`](https://github.com/tobymao/sqlglot/commit/74b4e7c311e9d4ff39ce2e4d91940eced96aa32f) - **optimizer**: fix type annotation for Snowflake BOOLOR and BOOLAND *(PR [#6169](https://github.com/tobymao/sqlglot/pull/6169) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`e90168a`](https://github.com/tobymao/sqlglot/commit/e90168a6829b85534edcecec7d0df2a8b1b56fc4) - **optimizer**: annotate type for Snowflake's `IS_NULL_VALUE` function *(PR [#6186](https://github.com/tobymao/sqlglot/pull/6186) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`cea2595`](https://github.com/tobymao/sqlglot/commit/cea25952c98e70f2a4c35e675fe7ee4df0af02cd) - **duckdb**: Transpile DATE function from BQ->DuckDB *(PR [#6178](https://github.com/tobymao/sqlglot/pull/6178) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`00aaa47`](https://github.com/tobymao/sqlglot/commit/00aaa47feff1cf9e69320074c35d9adfc8538026) - **duckDB**: Transpile BigQuery's CURRENT_DATE (Conversion) function to DuckDB *(PR [#6189](https://github.com/tobymao/sqlglot/pull/6189) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c93b535`](https://github.com/tobymao/sqlglot/commit/c93b5354827282c806899c36b11e7a7598e96e38) - **snowflake**: annotate type for LEAST_IGNORE_NULLS *(PR [#6196](https://github.com/tobymao/sqlglot/pull/6196) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d2162fb`](https://github.com/tobymao/sqlglot/commit/d2162fbece0747b8ee42fa1f78e26baa0c944d41) - check same ref on Expression.__eq__ *(PR [#6200](https://github.com/tobymao/sqlglot/pull/6200) by [@georgesittas](https://github.com/georgesittas))* - [`f60c71f`](https://github.com/tobymao/sqlglot/commit/f60c71fb03db91bfe90430d032ac16f4945d5dff) - **optimizer**: annotate types for REGR_VALX *(PR [#6198](https://github.com/tobymao/sqlglot/pull/6198) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`42e0ae4`](https://github.com/tobymao/sqlglot/commit/42e0ae43b3531bf6c593bcac2ece2ab1d969e5e1) - **duckdb**: transpile BigQuery function TIMESTAMP_SUB to DuckDB *(PR [#6202](https://github.com/tobymao/sqlglot/pull/6202) by [@toriwei](https://github.com/toriwei))* - [`b82c571`](https://github.com/tobymao/sqlglot/commit/b82c57131707297abe174539023b9cb62b7cd6c7) - **snowflake**: annotate types for REGR_VALY *(PR [#6206](https://github.com/tobymao/sqlglot/pull/6206) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`39d8e19`](https://github.com/tobymao/sqlglot/commit/39d8e19419c2adbb80465be414d1cc3bbc6d007b) - **snowflake**: include VARIABLE kind in SET transpilation to DuckDB *(PR [#6201](https://github.com/tobymao/sqlglot/pull/6201) by [@toriwei](https://github.com/toriwei))* - :arrow_lower_right: *addresses issue [#6177](https://github.com/tobymao/sqlglot/issues/6177) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`0037266`](https://github.com/tobymao/sqlglot/commit/00372664bf6acf2b0fff9ad4b206b597ef5378f7) - **snowflake**: annotate types for GETBIT *(PR [#6219](https://github.com/tobymao/sqlglot/pull/6219) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`a5458ce`](https://github.com/tobymao/sqlglot/commit/a5458ceca3bc239fb611791e38020632dd0824c8) - **snowflake**: add type annotation for DECODE function support *(PR [#6199](https://github.com/tobymao/sqlglot/pull/6199) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`a9d0f63`](https://github.com/tobymao/sqlglot/commit/a9d0f6333c38ffb0b5afc3c213ac7bf008d98ad6) - **DuckDB**: Transpile unix_millis to epoch_ms *(PR [#6224](https://github.com/tobymao/sqlglot/pull/6224) by [@vchan](https://github.com/vchan))* - [`238f705`](https://github.com/tobymao/sqlglot/commit/238f705940751f09464ee0f8260186f3f8124374) - **DuckDB**: Transpile unix_seconds to epoch *(PR [#6225](https://github.com/tobymao/sqlglot/pull/6225) by [@vchan](https://github.com/vchan))* - [`c8b0129`](https://github.com/tobymao/sqlglot/commit/c8b0129380df389be6ff22cafb4251181e919d23) - **exasol**: support bracket-delimited identifiers *(PR [#6231](https://github.com/tobymao/sqlglot/pull/6231) by [@JoepvandenHoven-Bluemine](https://github.com/JoepvandenHoven-Bluemine))* - [`417f1e8`](https://github.com/tobymao/sqlglot/commit/417f1e8ee50fb8f4377fad261660ffbd7444a429) - **snowflake**: annotate types for BITNOT *(PR [#6234](https://github.com/tobymao/sqlglot/pull/6234) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`fe8ab40`](https://github.com/tobymao/sqlglot/commit/fe8ab40e8e0559201e0b1896a6f1a8fb6b5b932d) - **snowflake**: 1st-class parsing support for BITAND, BIT_AND, BIT_NOT *(PR [#6243](https://github.com/tobymao/sqlglot/pull/6243) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`5ae3c47`](https://github.com/tobymao/sqlglot/commit/5ae3c47b1c6993b87341472c08714f4a0f738168) - **snowflake**: add type annotation for GROUPING() function *(PR [#6244](https://github.com/tobymao/sqlglot/pull/6244) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4133265`](https://github.com/tobymao/sqlglot/commit/413326514507ef06537dcc3d4b80a3fcbcd26f66) - **clickhouse**: parse `has` function into an `ArrayContains` expression *(PR [#6245](https://github.com/tobymao/sqlglot/pull/6245) by [@joeyutong](https://github.com/joeyutong))* - [`b722aa2`](https://github.com/tobymao/sqlglot/commit/b722aa2d4b65c698921066426838f080a31bdc35) - **duckdb**: cast LOWER() result to BLOB if input is bytes *(PR [#6218](https://github.com/tobymao/sqlglot/pull/6218) by [@toriwei](https://github.com/toriwei))* - [`cdd45b9`](https://github.com/tobymao/sqlglot/commit/cdd45b949fd1eefb147053424279b56b8effcbcf) - **optimizer**: annotate types for GROUPING_ID function. *(PR [#6249](https://github.com/tobymao/sqlglot/pull/6249) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`080ff3b`](https://github.com/tobymao/sqlglot/commit/080ff3bd93b36291d5bb0092d722f8307f0ae082) - **snowflake**: annotate types for BITAND_AGG *(PR [#6248](https://github.com/tobymao/sqlglot/pull/6248) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`87a818a`](https://github.com/tobymao/sqlglot/commit/87a818a899f61a675c22c697f468b3f6f7e2787f) - **snowflake**: annotate types for BITOR_AGG *(PR [#6251](https://github.com/tobymao/sqlglot/pull/6251) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`a1b884d`](https://github.com/tobymao/sqlglot/commit/a1b884dc9ddfd2185de48cc9451a39f152879d39) - **snowflake**: annotate types for BITXOR_AGG *(PR [#6253](https://github.com/tobymao/sqlglot/pull/6253) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`71d93b1`](https://github.com/tobymao/sqlglot/commit/71d93b181d2aa3a77a022820446d6fec0133291f) - **duckdb**: implement casting to blob for UPPER() and move to helper method *(PR [#6254](https://github.com/tobymao/sqlglot/pull/6254) by [@toriwei](https://github.com/toriwei))* - [`ad2ad23`](https://github.com/tobymao/sqlglot/commit/ad2ad234b5a508040dce4f3920439be052742573) - **snowflake**: add missing return type mapping for MAX_BY and MAX_BY function *(PR [#6250](https://github.com/tobymao/sqlglot/pull/6250) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`39c1d81`](https://github.com/tobymao/sqlglot/commit/39c1d81174f2390b6b0c9dd14c0e550ad452a1df) - **snowflake**: annotate types for BOOLXOR_AGG *(PR [#6261](https://github.com/tobymao/sqlglot/pull/6261) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`71590d2`](https://github.com/tobymao/sqlglot/commit/71590d22cdb05594e2173a1500f763dc1a32a81d) - **snowflake**: add type annotation for SKEW function. *(PR [#6262](https://github.com/tobymao/sqlglot/pull/6262) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`5fd366d`](https://github.com/tobymao/sqlglot/commit/5fd366d9e6f7b3f1eb7a9cf41975cf13ce890ffe) - **snowflake**: annotate types for OBJECT_AGG *(PR [#6265](https://github.com/tobymao/sqlglot/pull/6265) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`3dae0fb`](https://github.com/tobymao/sqlglot/commit/3dae0fbb528762e5d5fd446350d42e9c841e2959) - **duckdb**: Support position and occurrence args for REGEXP_EXTRACT *(PR [#6266](https://github.com/tobymao/sqlglot/pull/6266) by [@vchan](https://github.com/vchan))* - [`dba0414`](https://github.com/tobymao/sqlglot/commit/dba04145c4bcda8c55890b4d7173dd6c0a64c37e) - **clickhouse**: Parse toStartOfxxx into exp.TimestampTrunc *(PR [#6268](https://github.com/tobymao/sqlglot/pull/6268) by [@joeyutong](https://github.com/joeyutong))* - [`d959ad0`](https://github.com/tobymao/sqlglot/commit/d959ad02140d692483a63b67d69d2a5d49954ea3) - transpile DuckDB exclusive end RANGE to SEQUENCE *(PR [#6270](https://github.com/tobymao/sqlglot/pull/6270) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6267](https://github.com/tobymao/sqlglot/issues/6267) opened by [@joeyutong](https://github.com/joeyutong)* - [`771732d`](https://github.com/tobymao/sqlglot/commit/771732d81459cc576f11eccc49794f33e62d14af) - **snowflake**: annotate types for REGR_AVGY *(PR [#6271](https://github.com/tobymao/sqlglot/pull/6271) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`8470be0`](https://github.com/tobymao/sqlglot/commit/8470be00731a4d79518a533a5f7ba884fa2f047e) - **snowflake**: add type annotation for BITMAP_COUNT function. *(PR [#6274](https://github.com/tobymao/sqlglot/pull/6274) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`98f25f9`](https://github.com/tobymao/sqlglot/commit/98f25f92cc1175ac7b2118a5a342db82adade13a) - **clickhouse**: support splitBy function *(PR [#6278](https://github.com/tobymao/sqlglot/pull/6278) by [@joeyutong](https://github.com/joeyutong))* - [`fabbf05`](https://github.com/tobymao/sqlglot/commit/fabbf057aba88f30205767d8c339727de45991c8) - **duckDB**: Add support for shorthand struct array literals in duckDB. *(PR [#6233](https://github.com/tobymao/sqlglot/pull/6233) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`a909fde`](https://github.com/tobymao/sqlglot/commit/a909fde068919823dc4cccc2655af48e4290137a) - **duckdb**: Add support for CREATE MACRO *(PR [#6292](https://github.com/tobymao/sqlglot/pull/6292) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#6290](https://github.com/tobymao/sqlglot/issues/6290) opened by [@francescomucio](https://github.com/francescomucio)* - [`11989be`](https://github.com/tobymao/sqlglot/commit/11989be34153ccdedeab3ab18ccf735f86e8b822) - add more expressions with positional meta *(PR [#6289](https://github.com/tobymao/sqlglot/pull/6289) by [@tobymao](https://github.com/tobymao))* - [`87651a6`](https://github.com/tobymao/sqlglot/commit/87651a671db2fe6162f06e2dcdef0b98e229bea5) - semantic facts closes [#6287](https://github.com/tobymao/sqlglot/pull/6287) *(PR [#6288](https://github.com/tobymao/sqlglot/pull/6288) by [@tobymao](https://github.com/tobymao))* - [`9c1a222`](https://github.com/tobymao/sqlglot/commit/9c1a2221b0327ba6848542c7b906e92f25a05bea) - **snowflake**: add type annotation for BITMAP_CONSTRUCT_AGG function. *(PR [#6285](https://github.com/tobymao/sqlglot/pull/6285) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`358105d`](https://github.com/tobymao/sqlglot/commit/358105d1296c7425e071ccf3189a31a02c00c923) - **snowflake**: type annotation for BITMAP_BIT_POSITION function *(PR [#6301](https://github.com/tobymao/sqlglot/pull/6301) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4ee7a50`](https://github.com/tobymao/sqlglot/commit/4ee7a500cc460b6f6a1ed103a12dca72e6d01c18) - **snowflake**: type inference for BITMAP_OR_AGG *(PR [#6297](https://github.com/tobymao/sqlglot/pull/6297) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`fcd537d`](https://github.com/tobymao/sqlglot/commit/fcd537de2c993ad0bd18acd84dbae354165f7d3f) - **snowflake**: conflict resolution. type annotation for BITMAP_BUCKET_NUMBER function. Tests added all dialects that support BITMAP_BUCKET_NUMBER *(PR [#6299](https://github.com/tobymao/sqlglot/pull/6299) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`3dffd59`](https://github.com/tobymao/sqlglot/commit/3dffd598496a9f2d94caec9d7f3dcb9791c94019) - **snowflake**: annotate types for PERCENTILE_DISC and WithinGroup *(PR [#6300](https://github.com/tobymao/sqlglot/pull/6300) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`6ce3cd7`](https://github.com/tobymao/sqlglot/commit/6ce3cd7de958d9f3773579ab22ae6cbbcb56ceb0) - **sqlite**: support binary `MATCH` operator closes [#6305](https://github.com/tobymao/sqlglot/pull/6305) *(PR [#6306](https://github.com/tobymao/sqlglot/pull/6306) by [@georgesittas](https://github.com/georgesittas))* - [`e903883`](https://github.com/tobymao/sqlglot/commit/e90388328fcf5b8061c99e325b87d5beb0046ffc) - **snowflake**: type annotation for APPROX_TOP_K_ACCUMULATE functio… *(PR [#6309](https://github.com/tobymao/sqlglot/pull/6309) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`afc0242`](https://github.com/tobymao/sqlglot/commit/afc0242c564f8de53e11865c2fba43fb36df0694) - **duckDB**: Cast inputs (BLOB → VARCHAR) for duckDB STARTS_WITH *(PR [#6240](https://github.com/tobymao/sqlglot/pull/6240) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`d170bbd`](https://github.com/tobymao/sqlglot/commit/d170bbde800a0308aaf8c81e59152c65be312155) - **duckdb**: transpile bigquery's `BYTES` variant of `REPLACE` *(PR [#6312](https://github.com/tobymao/sqlglot/pull/6312) by [@toriwei](https://github.com/toriwei))* - [`d3fefad`](https://github.com/tobymao/sqlglot/commit/d3fefad80d25ff5a6dd02426667ff0ea8478a1b2) - **tsql**: support `DATEDIFF_BIG` *(PR [#6323](https://github.com/tobymao/sqlglot/pull/6323) by [@lBilali](https://github.com/lBilali))* - [`21d1468`](https://github.com/tobymao/sqlglot/commit/21d1468377b9c8ad48c6cca1ae3b3744a807c29e) - **optimizer**: annotate type for APPROX_TOP_K *(PR [#6286](https://github.com/tobymao/sqlglot/pull/6286) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`93b4039`](https://github.com/tobymao/sqlglot/commit/93b4039f957f3eefbaaed2cb147bfa8c8c2a304e) - **duckdb**: preserve time zone and timestamp in DATE_TRUNC() *(PR [#6318](https://github.com/tobymao/sqlglot/pull/6318) by [@toriwei](https://github.com/toriwei))* - [`b71990f`](https://github.com/tobymao/sqlglot/commit/b71990f528d55c845f5771bfc4c5f6098eb97ad7) - **duckdb**: Add transpilation support for ANY_VALUE function with HAVING MAX and MIN clauses *(PR [#6325](https://github.com/tobymao/sqlglot/pull/6325) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`64c0d55`](https://github.com/tobymao/sqlglot/commit/64c0d554207ad40bcd6a93c20d15020752a5929d) - **sqlite**: support indexed table clause closes [#6331](https://github.com/tobymao/sqlglot/pull/6331) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6725217`](https://github.com/tobymao/sqlglot/commit/6725217d4058b5202006576bdf6ff4ec7230a9b9) - **sqlite**: support `NOT NULL` operator closes [#6334](https://github.com/tobymao/sqlglot/pull/6334) closes [#6335](https://github.com/tobymao/sqlglot/pull/6335) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ca81217`](https://github.com/tobymao/sqlglot/commit/ca812171ab800e3faa73ea1874dd6814c8d6f701) - **duckdb**: Transpile INITCAP with custom delimiters *(PR [#6302](https://github.com/tobymao/sqlglot/pull/6302) by [@treysp](https://github.com/treysp))* - [`7484c06`](https://github.com/tobymao/sqlglot/commit/7484c06be4534cd22dee14da542d5e29ff2c13a2) - **DuckDB**: Support rounding mode argument for ROUND function *(PR [#6350](https://github.com/tobymao/sqlglot/pull/6350) by [@vchan](https://github.com/vchan))* - [`79e314d`](https://github.com/tobymao/sqlglot/commit/79e314df76161319ba8495b95f54603cfef0c08a) - **duckdb**: handle casting BLOB input for TRIM() *(PR [#6353](https://github.com/tobymao/sqlglot/pull/6353) by [@toriwei](https://github.com/toriwei))* - [`c495a40`](https://github.com/tobymao/sqlglot/commit/c495a40ee4c1a69b14892e8455ae1bd2ceb5ea4f) - **optimizer**: annotate type for MINHASH *(PR [#6355](https://github.com/tobymao/sqlglot/pull/6355) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`f16f8a0`](https://github.com/tobymao/sqlglot/commit/f16f8a08072556fd617b5125300262d9bb8c1e48) - improve validate qualify column message closes [#6348](https://github.com/tobymao/sqlglot/pull/6348) *(PR [#6356](https://github.com/tobymao/sqlglot/pull/6356) by [@tobymao](https://github.com/tobymao))* - [`17abe23`](https://github.com/tobymao/sqlglot/commit/17abe231bc4d59912952f266ad4df86ece22c8d2) - make simplify more efficient in number of iterations *(PR [#6351](https://github.com/tobymao/sqlglot/pull/6351) by [@tobymao](https://github.com/tobymao))* - [`b1f9a97`](https://github.com/tobymao/sqlglot/commit/b1f9a976be3c0bcd895bef5bcdb95a013eeb28b7) - **optimizer**: annotate type for APPROXIMATE_SIMILARITY *(PR [#6360](https://github.com/tobymao/sqlglot/pull/6360) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`3aafca7`](https://github.com/tobymao/sqlglot/commit/3aafca74546b932cea93ed830c021f347ae03ded) - **optimizer**: annotate type for MINHASH_COMBINE *(PR [#6362](https://github.com/tobymao/sqlglot/pull/6362) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`09a4bd8`](https://github.com/tobymao/sqlglot/commit/09a4bd8870a075e641c6e3e4cee74d73a39e760a) - Trigger integration tests *(PR [#6339](https://github.com/tobymao/sqlglot/pull/6339) by [@erindru](https://github.com/erindru))* - [`7769129`](https://github.com/tobymao/sqlglot/commit/7769129eba7ae5f3594e0061bdb1079fedc5aafd) - bignum and time_ns to duckdb closes [#6379](https://github.com/tobymao/sqlglot/pull/6379) *(commit by [@tobymao](https://github.com/tobymao))* - [`90a3fa9`](https://github.com/tobymao/sqlglot/commit/90a3fa9f6ddf0aa32b41118c59d4facd9fdb3398) - mark IgnoreNulls and RespectNulls as unsupported on postgres and mysql *(PR [#6377](https://github.com/tobymao/sqlglot/pull/6377) by [@NickCrews](https://github.com/NickCrews))* - :arrow_lower_right: *addresses issue [#6376](https://github.com/tobymao/sqlglot/issues/6376) opened by [@NickCrews](https://github.com/NickCrews)* - [`5bb1170`](https://github.com/tobymao/sqlglot/commit/5bb117082caeee719442d783ce6742d027b1492e) - transpile bigquery `greatest` null handling to duckdb *(PR [#6361](https://github.com/tobymao/sqlglot/pull/6361) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`bf07abd`](https://github.com/tobymao/sqlglot/commit/bf07abd4ee9eb0f5510cb7d1f232bdcaea88941e) - **snowflake**: annotation support for APPROX_TOP_K_COMBINE *(PR [#6378](https://github.com/tobymao/sqlglot/pull/6378) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`01890eb`](https://github.com/tobymao/sqlglot/commit/01890eb16d6624de4f26b7d8eadf850df6f2a042) - **trino**: support refresh materialized view statement closes [#6387](https://github.com/tobymao/sqlglot/pull/6387) *(PR [#6388](https://github.com/tobymao/sqlglot/pull/6388) by [@georgesittas](https://github.com/georgesittas))* - [`e4ea6cc`](https://github.com/tobymao/sqlglot/commit/e4ea6ccf08c0ff4063424bf538bc3b22f4b4cfaf) - transpile BQ APPROX_QUANTILES to DuckDB *(PR [#6349](https://github.com/tobymao/sqlglot/pull/6349) by [@treysp](https://github.com/treysp))* - [`95727f6`](https://github.com/tobymao/sqlglot/commit/95727f60d601796b34c850dee9366d79f6e4a24b) - **optimizer**: canonicalize table aliases *(PR [#6369](https://github.com/tobymao/sqlglot/pull/6369) by [@georgesittas](https://github.com/georgesittas))* - [`3b6855b`](https://github.com/tobymao/sqlglot/commit/3b6855b9787111f27225108241fbe4f389443e29) - **mysql**: support ZEROFILL column attribute *(PR [#6400](https://github.com/tobymao/sqlglot/pull/6400) by [@nian0114](https://github.com/nian0114))* - :arrow_lower_right: *addresses issue [#6399](https://github.com/tobymao/sqlglot/issues/6399) opened by [@nian0114](https://github.com/nian0114)* - [`bb4eda1`](https://github.com/tobymao/sqlglot/commit/bb4eda1beb68b92de9ab014a63c67797a07df2fa) - **duckdb**: support transpiling SHA1 from BigQuery to DuckDB *(PR [#6404](https://github.com/tobymao/sqlglot/pull/6404) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`05e83b5`](https://github.com/tobymao/sqlglot/commit/05e83b56f1bf9323cfa819a7f1beb542524c1219) - **duckdb**: support transpilation of LEAST from BigQuery to DuckDB *(PR [#6415](https://github.com/tobymao/sqlglot/pull/6415) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`38472ce`](https://github.com/tobymao/sqlglot/commit/38472ce14bce731ba4c309d515223ae99e2575ac) - **duckdb**: transpile bigquery's %x format literal *(PR [#6375](https://github.com/tobymao/sqlglot/pull/6375) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`a6e1581`](https://github.com/tobymao/sqlglot/commit/a6e15811cf5643bcc18e1e227fea20922b05c54a) - **DuckDB**: Cast BIGNUMERIC and BIGDECIMAL types to DECIMAL(38, 5) *(PR [#6419](https://github.com/tobymao/sqlglot/pull/6419) by [@vchan](https://github.com/vchan))* - [`0b9d8ac`](https://github.com/tobymao/sqlglot/commit/0b9d8acbe75457424436e8c0acc047ab66e9fdc0) - **snowflake**: Annotate type for snowflake MAX function *(PR [#6422](https://github.com/tobymao/sqlglot/pull/6422) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`68e9414`](https://github.com/tobymao/sqlglot/commit/68e9414725a60b2842d870fa222d8466057a94f6) - **snowflake**: Annotate type for snowflake MIN function *(PR [#6427](https://github.com/tobymao/sqlglot/pull/6427) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`1318de7`](https://github.com/tobymao/sqlglot/commit/1318de77a8aa514ec7eb9f9b8c03228e3f8eb008) - **snowflake**: Annotate type for snowflake NORMAL *(PR [#6434](https://github.com/tobymao/sqlglot/pull/6434) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ffbb5c7`](https://github.com/tobymao/sqlglot/commit/ffbb5c7e40aa064ffcd4827e96ea66cfd045118e) - **snowflake**: annotate type for HASH_AGG in Snowflake *(PR [#6438](https://github.com/tobymao/sqlglot/pull/6438) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`161255f`](https://github.com/tobymao/sqlglot/commit/161255f6c90b9c3ed2074e734f6d074db1d7a6dd) - Add support for `LOCALTIME` function *(PR [#6443](https://github.com/tobymao/sqlglot/pull/6443) by [@VaggelisD](https://github.com/VaggelisD))* - [`ca329f0`](https://github.com/tobymao/sqlglot/commit/ca329f037a230c315437d830638b514190764c5a) - **duckdb**: support transpilation of SHA256 from bigquery to duckdb *(PR [#6421](https://github.com/tobymao/sqlglot/pull/6421) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`e18ae24`](https://github.com/tobymao/sqlglot/commit/e18ae248423dbbca78a24a60ea0193da2ee7f68c) - **snowflake**: Annotate type for snowflake REGR_SLOPE function *(PR [#6425](https://github.com/tobymao/sqlglot/pull/6425) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`1d847f0`](https://github.com/tobymao/sqlglot/commit/1d847f0a1f88fce5df340ab646a72c8abbc12a86) - **snowflake**: parse & annotate `CHECK_JSON`, `CHECK_XML` *(PR [#6439](https://github.com/tobymao/sqlglot/pull/6439) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`6843812`](https://github.com/tobymao/sqlglot/commit/68438129ceeea70f801e0ae728c51c19291fc7d8) - add correlation id to remote workflow trigger *(PR [#6441](https://github.com/tobymao/sqlglot/pull/6441) by [@erindru](https://github.com/erindru))* - [`cb3080d`](https://github.com/tobymao/sqlglot/commit/cb3080d4bed18b1bfbbd08380ed60deeefd15530) - **snowflake**: annotation support for APPROX_TOP_K_ESTIMATE . Return type ARRAY *(PR [#6445](https://github.com/tobymao/sqlglot/pull/6445) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`cd9f037`](https://github.com/tobymao/sqlglot/commit/cd9f037882eef253e86fdb1d51521e0acd7db3f9) - **optimizer**: store pk name if provided *(PR [#6424](https://github.com/tobymao/sqlglot/pull/6424) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`65194e4`](https://github.com/tobymao/sqlglot/commit/65194e465489151aa51859a6e3f5672f7d4c5f3b) - **snowflake**: Annotate type for snowflake RANDSTR function *(PR [#6436](https://github.com/tobymao/sqlglot/pull/6436) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`351d783`](https://github.com/tobymao/sqlglot/commit/351d7834915e02a9f4949f9925437e2731f3a8b4) - add support for LOCALTIMESTAMP *(PR [#6448](https://github.com/tobymao/sqlglot/pull/6448) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a56262e`](https://github.com/tobymao/sqlglot/commit/a56262e6b4276baae144855478807c173db77ab9) - **snowflake**: Annotate type for snowflake MEDIAN *(PR [#6426](https://github.com/tobymao/sqlglot/pull/6426) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`2c56567`](https://github.com/tobymao/sqlglot/commit/2c56567755c8a6571d8b7d410c9de943e54df58b) - **snowflake**: Annotate type for snowflake SEARCH_IP *(PR [#6440](https://github.com/tobymao/sqlglot/pull/6440) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ac86568`](https://github.com/tobymao/sqlglot/commit/ac86568a939f692b99813da100297b61fb54e044) - **snowflake**: Added decfloat type *(PR [#6444](https://github.com/tobymao/sqlglot/pull/6444) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ebe718a`](https://github.com/tobymao/sqlglot/commit/ebe718a72d5b5871a8d6e67754ff50e873d55b41) - **duckdb**: Add support for format elements used in date/time functions like FORMAT_DATETIME *(PR [#6428](https://github.com/tobymao/sqlglot/pull/6428) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c111f64`](https://github.com/tobymao/sqlglot/commit/c111f643d61064280024b4cc5c0fc250581fbe55) - **snowflake**: annotation support for APPROX_PERCENTILE_ACCUMULATE *(PR [#6455](https://github.com/tobymao/sqlglot/pull/6455) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`a7d211e`](https://github.com/tobymao/sqlglot/commit/a7d211e6fdce968c64b050c77e026cc23fdc07e5) - **duckdb**: transpile DECFLOAT type to DECIMAL(38, 5) *(PR [#6462](https://github.com/tobymao/sqlglot/pull/6462) by [@toriwei](https://github.com/toriwei))* - [`94d46b8`](https://github.com/tobymao/sqlglot/commit/94d46b8eafd5abe252407d2bbe306ca579a29b20) - **snowflake**: annotation support for APPROX_PERCENTILE_ESTIMATE. Return type DOUBLE *(PR [#6461](https://github.com/tobymao/sqlglot/pull/6461) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`2ac30b0`](https://github.com/tobymao/sqlglot/commit/2ac30b08bd663bbaf00ae075c4db0c3d27ab6640) - **snowflake**: annotation support for APPROX_PERCENTILE_COMBINE *(PR [#6460](https://github.com/tobymao/sqlglot/pull/6460) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d44bda3`](https://github.com/tobymao/sqlglot/commit/d44bda376c06956947a09a9f279cce886a63b981) - **optimizer**: Annotate type for ZIPF *(PR [#6453](https://github.com/tobymao/sqlglot/pull/6453) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`34dbd47`](https://github.com/tobymao/sqlglot/commit/34dbd478957c1796998d0b263f63c8ce1db7a320) - **optimizer**: Annotate type for XMLGET *(PR [#6457](https://github.com/tobymao/sqlglot/pull/6457) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ff3f0f9`](https://github.com/tobymao/sqlglot/commit/ff3f0f998674f5b2741c3f6cadbe24fa8fb607ad) - **databricks**: add support for ?:: operator *(PR [#6469](https://github.com/tobymao/sqlglot/pull/6469) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`0d211f2`](https://github.com/tobymao/sqlglot/commit/0d211f2b36167cfb7856b8ec25f597f70317a9c7) - **snowflake**: annotate type for MODE function snowflake *(PR [#6447](https://github.com/tobymao/sqlglot/pull/6447) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`cc4c8ab`](https://github.com/tobymao/sqlglot/commit/cc4c8ab43ab71790bc2bb9f8f3c06e34f89f999f) - **snowflake**: annotate type for PERCENTILE_CONT in Snowflake *(PR [#6470](https://github.com/tobymao/sqlglot/pull/6470) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7dbc242`](https://github.com/tobymao/sqlglot/commit/7dbc242a637a8890511cc14f22bce4d425f1f55d) - **snowflake**: annotation support for CURRENT REGION. Return type VARCHAR *(PR [#6473](https://github.com/tobymao/sqlglot/pull/6473) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`43a6a5c`](https://github.com/tobymao/sqlglot/commit/43a6a5c601421e15a7f94dd489cb4fbcf9d2c8c3) - **snowflake**: annotation support for CURRENT_ORGANIZATION_NAME. Return type VARCHAR *(PR [#6475](https://github.com/tobymao/sqlglot/pull/6475) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`f1f7c6a`](https://github.com/tobymao/sqlglot/commit/f1f7c6ae6b6aa3f6f2251d0f81ee667440ca53d1) - **snowflake**: annotation support for CURRENT_ORGANIZATION_USER. *(PR [#6476](https://github.com/tobymao/sqlglot/pull/6476) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d268203`](https://github.com/tobymao/sqlglot/commit/d268203e1dbae4e3aff863108f6d09a6f8274db5) - **snowflake**: annotation support for CURRENT_ROLE_TYPE *(PR [#6479](https://github.com/tobymao/sqlglot/pull/6479) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`fd4431b`](https://github.com/tobymao/sqlglot/commit/fd4431bf9550c03aa761c642a68a21a146fd8548) - **snowflake**: annotate type for VECTOR_L1_DISTANCE, VECTOR_L2_DISTANCE, VECTOR_COSINE_SIMILARITY functions *(PR [#6468](https://github.com/tobymao/sqlglot/pull/6468) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`e6adba7`](https://github.com/tobymao/sqlglot/commit/e6adba76cc2f27633a9d38bfaea3356e71d00a4c) - **BigQuery**: Add support for coercing STRING literals to temporal types *(PR [#6482](https://github.com/tobymao/sqlglot/pull/6482) by [@vchan](https://github.com/vchan))* - [`68a5e61`](https://github.com/tobymao/sqlglot/commit/68a5e615b24e518cb90c9b80cf25355fcabdb468) - **snowflake**: annotate type for REGR_* functions *(PR [#6452](https://github.com/tobymao/sqlglot/pull/6452) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`f7458a4`](https://github.com/tobymao/sqlglot/commit/f7458a40d3b09a2e212f6705ac4a77c99714508e) - **optimizer**: annotate type for snowflake func TO_BOOLEAN *(PR [#6481](https://github.com/tobymao/sqlglot/pull/6481) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`1531a67`](https://github.com/tobymao/sqlglot/commit/1531a67ac7806f3b4582f6cf1ea02342a517de74) - **snowflake**: annotate type for VECTOR_INNER_PRODUCT *(PR [#6486](https://github.com/tobymao/sqlglot/pull/6486) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`f6b2b3b`](https://github.com/tobymao/sqlglot/commit/f6b2b3bc6e1c95340149be65d80ef7e177b28d82) - **snowflake**: support padside argument for BIT[OR|AND|XOR] *(PR [#6487](https://github.com/tobymao/sqlglot/pull/6487) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`e891397`](https://github.com/tobymao/sqlglot/commit/e89139714aefd8a6481a90d9753c81910c9f88e9) - **BigQuery**: Add support for the NET.HOST function *(PR [#6480](https://github.com/tobymao/sqlglot/pull/6480) by [@vchan](https://github.com/vchan))* - [`2cc67cd`](https://github.com/tobymao/sqlglot/commit/2cc67cd7386914043a9cb4eb322fb1fa9af15c8b) - **singlestore**: support dcolonqmark *(PR [#6485](https://github.com/tobymao/sqlglot/pull/6485) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`7d485c7`](https://github.com/tobymao/sqlglot/commit/7d485c7cffe7b6d0113cfcfcf0736de0383bd380) - **duckdb**: Add transpilation support for the negative integer args for BITNOT *(PR [#6490](https://github.com/tobymao/sqlglot/pull/6490) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ef130f1`](https://github.com/tobymao/sqlglot/commit/ef130f1b944b4be835d4a6831fec9a333a825a34) - **snowflake**: Annotated type for ARRAY_CONSTRUCT_COMPACT [#6496](https://github.com/tobymao/sqlglot/pull/6496) *(commit by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`36cf0bf`](https://github.com/tobymao/sqlglot/commit/36cf0bf6671f622344afee52d7aafe30f19ecf9a) - **snowflake**: annotation support for CURRENT_ROLE. *(PR [#6478](https://github.com/tobymao/sqlglot/pull/6478) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`cbba04c`](https://github.com/tobymao/sqlglot/commit/cbba04cb292fe8b3fd38c87d9ccb624cdcb52843) - **databricks**: support comma-separated syntax for OVERLAY function *(PR [#6497](https://github.com/tobymao/sqlglot/pull/6497) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`dc8f26a`](https://github.com/tobymao/sqlglot/commit/dc8f26a3a5e023a0e54caa345b129fb1b4fe805f) - **optimizer**: bq annotate type for NULL *(PR [#6491](https://github.com/tobymao/sqlglot/pull/6491) by [@geooo109](https://github.com/geooo109))* - [`c97a81d`](https://github.com/tobymao/sqlglot/commit/c97a81d68a1584fad48475725665a7678fcad9d1) - **optimizer**: annotate TO_HEX(MD5(...)) in BigQuery *(PR [#6500](https://github.com/tobymao/sqlglot/pull/6500) by [@georgesittas](https://github.com/georgesittas))* - [`a5797a1`](https://github.com/tobymao/sqlglot/commit/a5797a1c867c4ade71ae4ddf93232576993cf5bc) - **duckdb**: handle named arguments and non-integer scale input for ROUND *(PR [#6495](https://github.com/tobymao/sqlglot/pull/6495) by [@toriwei](https://github.com/toriwei))* - [`8b5298a`](https://github.com/tobymao/sqlglot/commit/8b5298a6578af80fd9676eb222422862d5468859) - **duckdb**: Transpile BQ's WEEK based `DATE_DIFF` *(PR [#6507](https://github.com/tobymao/sqlglot/pull/6507) by [@VaggelisD](https://github.com/VaggelisD))* - [`2c013a5`](https://github.com/tobymao/sqlglot/commit/2c013a5cc8e37cde8a8f9443e0397191ce82f0f5) - **exasol**: qualify bare stars to facilitate transpilation *(PR [#6431](https://github.com/tobymao/sqlglot/pull/6431) by [@nnamdi16](https://github.com/nnamdi16))* - [`41b776b`](https://github.com/tobymao/sqlglot/commit/41b776bdc6936f18accd9f7308b55acd383bb596) - **postgres,trino,duckdb**: added support for current_catalog *(PR [#6492](https://github.com/tobymao/sqlglot/pull/6492) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`dd19bea`](https://github.com/tobymao/sqlglot/commit/dd19beae95f077cfd8b6e315eca7ff212817b250) - **snowflake**: annotation support for CURRENT_ACCOUNT *(PR [#6512](https://github.com/tobymao/sqlglot/pull/6512) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`2e8105e`](https://github.com/tobymao/sqlglot/commit/2e8105eebaec25fc8f94f1e68951198660f404e1) - **snowflake**: Annotate type for VAR_POP, VAR_SAMP, DuckDB consistency fix for VAR_SAMP *(PR [#6488](https://github.com/tobymao/sqlglot/pull/6488) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`cfb02c1`](https://github.com/tobymao/sqlglot/commit/cfb02c1aa676e801b2d13a84467b4904cd834ffe) - **snowflake**: annotation support for CURRENT_ACCOUNT_NAME *(PR [#6513](https://github.com/tobymao/sqlglot/pull/6513) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`1004e31`](https://github.com/tobymao/sqlglot/commit/1004e31cce62cce2e2afb7eab85ed8bdecaede3b) - **snowflake**: annotation support for CURRENT_AVAILABLE_ROLES *(PR [#6514](https://github.com/tobymao/sqlglot/pull/6514) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`ff201fe`](https://github.com/tobymao/sqlglot/commit/ff201febd27937a97674dd091928456dde733254) - **snowflake**: annotation support for CURRENT_CLIENT *(PR [#6515](https://github.com/tobymao/sqlglot/pull/6515) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d777a9c`](https://github.com/tobymao/sqlglot/commit/d777a9c0feef15ac036f7b413112de4d7cc8bea4) - **snowflake**: annotation support for CURRENT_IP_ADDRESS *(PR [#6518](https://github.com/tobymao/sqlglot/pull/6518) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`c296061`](https://github.com/tobymao/sqlglot/commit/c2960615a3bd279b7c5f775d5b93ae12aa27a3b8) - **snowflake**: Transpilation of TO_BINARY from snowflake to duckdb *(PR [#6504](https://github.com/tobymao/sqlglot/pull/6504) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7a70164`](https://github.com/tobymao/sqlglot/commit/7a70164d8cf361cf4c0a7d5789bb51676f772959) - **duckdb**: transpile Snowflake's `RANDSTR` function *(PR [#6502](https://github.com/tobymao/sqlglot/pull/6502) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`a26d419`](https://github.com/tobymao/sqlglot/commit/a26d4191e5468e39eafdf7a981e7b890d438b2c9) - **snowflake**: annotation support for CURRENT_DATABASE *(PR [#6516](https://github.com/tobymao/sqlglot/pull/6516) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`0acdf7f`](https://github.com/tobymao/sqlglot/commit/0acdf7fc783f2722536ec24dcf8600957febf7ca) - **snowflake**: annotation support for CURRENT_SCHEMAS *(PR [#6519](https://github.com/tobymao/sqlglot/pull/6519) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`43cce89`](https://github.com/tobymao/sqlglot/commit/43cce895da80d21abc89d40de5d7fddd68871bf0) - **snowflake**: annotation support for CURRENT_SECONDARY_ROLES *(PR [#6520](https://github.com/tobymao/sqlglot/pull/6520) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`c21b4b1`](https://github.com/tobymao/sqlglot/commit/c21b4b1134b368ee5144339b59e70ddcc54f3dbc) - **snowflake**: annotation support for CURRENT_SESSION *(PR [#6521](https://github.com/tobymao/sqlglot/pull/6521) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`57a83c0`](https://github.com/tobymao/sqlglot/commit/57a83c018dace690f7bb363c25ee6bde33c3d60f) - **snowflake**: annotation support for CURRENT_STATEMENT *(PR [#6522](https://github.com/tobymao/sqlglot/pull/6522) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4b240e4`](https://github.com/tobymao/sqlglot/commit/4b240e40a8809a6eea2a279370a884f4a7b03dfa) - **snowflake**: annotation support for CURRENT_VERSION *(PR [#6524](https://github.com/tobymao/sqlglot/pull/6524) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`c1a831f`](https://github.com/tobymao/sqlglot/commit/c1a831f5bf662ab8d8e07dc2bb949f2adcbe7d7c) - **snowflake**: annotation support for CURRENT_TRANSACTION *(PR [#6523](https://github.com/tobymao/sqlglot/pull/6523) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`2e162b0`](https://github.com/tobymao/sqlglot/commit/2e162b0d34066e7aa7edac3156739bcd31a634fc) - **snowflake**: annotation support for CURRENT_WAREHOUSE *(PR [#6525](https://github.com/tobymao/sqlglot/pull/6525) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`18e9814`](https://github.com/tobymao/sqlglot/commit/18e98145906eaa5b769af49cf46b58a1d9448aee) - **snowflake**: support DAYOFWEEK_ISO date part *(PR [#6531](https://github.com/tobymao/sqlglot/pull/6531) by [@toriwei](https://github.com/toriwei))* - [`ee5e7b9`](https://github.com/tobymao/sqlglot/commit/ee5e7b931ca745a000dc8a720b56aee7b44186b2) - Automatically trigger integration tests scoped to modified dialects *(PR [#6505](https://github.com/tobymao/sqlglot/pull/6505) by [@erindru](https://github.com/erindru))* - [`e60634f`](https://github.com/tobymao/sqlglot/commit/e60634f0e1c396b54ad357132606286bd21d3e36) - **clickhouse**: Add support for quantilesExactExclusive agg func *(PR [#6535](https://github.com/tobymao/sqlglot/pull/6535) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#6533](https://github.com/tobymao/sqlglot/issues/6533) opened by [@vargasj-ms](https://github.com/vargasj-ms)* - [`41a9e88`](https://github.com/tobymao/sqlglot/commit/41a9e88bb9800205df0b3e10a1976699dc4fe4f9) - **duckdb**: Add support to transpile binary args for bitwise operators *(PR [#6508](https://github.com/tobymao/sqlglot/pull/6508) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`2555856`](https://github.com/tobymao/sqlglot/commit/2555856cac7434ef91cc1584d52610178e45c4b9) - **optimizer**: annotate scalar subqueries *(PR [#6536](https://github.com/tobymao/sqlglot/pull/6536) by [@georgesittas](https://github.com/georgesittas))* - [`71e7630`](https://github.com/tobymao/sqlglot/commit/71e763096462aa888a353ac1ad3675a9e5b4841a) - **snowflake**: normalize FLOAT to DOUBLE *(PR [#6501](https://github.com/tobymao/sqlglot/pull/6501) by [@toriwei](https://github.com/toriwei))* - [`9badf6a`](https://github.com/tobymao/sqlglot/commit/9badf6a6b1972fc37164b29aa416bb897d7ec6a6) - **snowflake**: Annotate type for TRY_* functions *(PR [#6509](https://github.com/tobymao/sqlglot/pull/6509) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`aad1332`](https://github.com/tobymao/sqlglot/commit/aad1332fee7c82c29dae3caed9a6a1c882c1d4a0) - **duckdb**: support transpilation of BITMAP_BIT_POSITION from snowflake to duckdb *(PR [#6541](https://github.com/tobymao/sqlglot/pull/6541) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`980f538`](https://github.com/tobymao/sqlglot/commit/980f53852444ec38c45483cbbf63a286244262a6) - **databricks, snowflake**: support UNIFORM function *(PR [#6547](https://github.com/tobymao/sqlglot/pull/6547) by [@toriwei](https://github.com/toriwei))* - [`8a12611`](https://github.com/tobymao/sqlglot/commit/8a12611e9499497d0c8b1e1e418986b2d91a6505) - **snowflake**: New type + type annotation for TO_FILE *(PR [#6548](https://github.com/tobymao/sqlglot/pull/6548) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`906c933`](https://github.com/tobymao/sqlglot/commit/906c933235c82598b0d08f8c66dd3db0b8f409a5) - **postgres**: overlap operator *(PR [#6545](https://github.com/tobymao/sqlglot/pull/6545) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`6fa5e9c`](https://github.com/tobymao/sqlglot/commit/6fa5e9ce017185d721fab23d271f2fcb6098190e) - **snowflake**: transpile FLOOR with scale arg to DuckDB *(PR [#6549](https://github.com/tobymao/sqlglot/pull/6549) by [@treysp](https://github.com/treysp))* - [`487b1db`](https://github.com/tobymao/sqlglot/commit/487b1dbff86f05acbd3caa769cd330a8a373480b) - make parser more conservative as assignments are relatively rare *(commit by [@tobymao](https://github.com/tobymao))* - [`370b1f6`](https://github.com/tobymao/sqlglot/commit/370b1f621844d3ac8831c998ea2046f1e1b91b65) - **postgres,tsql,duckdb**: add support for session_user *(PR [#6555](https://github.com/tobymao/sqlglot/pull/6555) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`8d83ee2`](https://github.com/tobymao/sqlglot/commit/8d83ee207070cc17ce0e433d636464255e09748f) - **duckdb**: Transpiled REGR_VALX and VALY *(PR [#6538](https://github.com/tobymao/sqlglot/pull/6538) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ad546b3`](https://github.com/tobymao/sqlglot/commit/ad546b30d40abe0da97b38bd300452e0354231e3) - **databricks**: add support for getdate(), now(), and current_timezone() *(PR [#6567](https://github.com/tobymao/sqlglot/pull/6567) by [@toriwei](https://github.com/toriwei))* - [`dbbace0`](https://github.com/tobymao/sqlglot/commit/dbbace01cd5f1fc44f5ad278def25f547686f9c5) - **snowflake**: remove transpilation support of APPROX_TOP_K to duckdb *(PR [#6560](https://github.com/tobymao/sqlglot/pull/6560) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`2bc2506`](https://github.com/tobymao/sqlglot/commit/2bc2506e0e0b26e82661a08217855d693f30dc25) - **bigquery**: support SAFE.TIMESTAMP annotation *(PR [#6550](https://github.com/tobymao/sqlglot/pull/6550) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`a51cc7b`](https://github.com/tobymao/sqlglot/commit/a51cc7b6e02c5b37bf43b82a0d76b83d41248ac9) - **mysql**: elt function in mysql *(PR [#6568](https://github.com/tobymao/sqlglot/pull/6568) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`4339b26`](https://github.com/tobymao/sqlglot/commit/4339b26db546862b10a0e8d746506b406ecfa306) - **optimizer**: expose struct fields using UNNEST without aliases *(PR [#6566](https://github.com/tobymao/sqlglot/pull/6566) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`7bfffe5`](https://github.com/tobymao/sqlglot/commit/7bfffe5d894c60bd0139d57c53bb1816c2739d74) - **snowflake**: support transpilation of TO_BOOLEAN from snowflake to duckdb *(PR [#6564](https://github.com/tobymao/sqlglot/pull/6564) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`d1cd9d6`](https://github.com/tobymao/sqlglot/commit/d1cd9d65be7779ea442260a92978d569f3d6ccd8) - **databricks**: support CURDATE *(PR [#6578](https://github.com/tobymao/sqlglot/pull/6578) by [@toriwei](https://github.com/toriwei))* - [`c906a96`](https://github.com/tobymao/sqlglot/commit/c906a96b33d28dfc1191840817dbf2bab0731d17) - **snowflake**: support GETDATE, SYSDATE, SYSTIMESTAMP *(PR [#6575](https://github.com/tobymao/sqlglot/pull/6575) by [@toriwei](https://github.com/toriwei))* ### :bug: Bug Fixes - [`9020684`](https://github.com/tobymao/sqlglot/commit/9020684a7e984a10fa4775339596ac5a0d6a6d93) - nested natural join performance closes [#5514](https://github.com/tobymao/sqlglot/pull/5514) *(PR [#5515](https://github.com/tobymao/sqlglot/pull/5515) by [@tobymao](https://github.com/tobymao))* - [`394870a`](https://github.com/tobymao/sqlglot/commit/394870a7ee9bb3bc814b7c3847193687f06b432b) - **duckdb**: transpile ADD_MONTHS *(PR [#5523](https://github.com/tobymao/sqlglot/pull/5523) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5505](https://github.com/tobymao/sqlglot/issues/5505) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`249692c`](https://github.com/tobymao/sqlglot/commit/249692c67450a1fe3775e1f35b6f62fdb0a62e1a) - **duckdb**: put guard in AddMonths generator before annotating it *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d799c5a`](https://github.com/tobymao/sqlglot/commit/d799c5af23010a67c29edb6d45a40fb24903e1a3) - **optimizer**: preserve projection names when merging subqueries *(commit by [@snovik75](https://github.com/snovik75))* - [`8130bd4`](https://github.com/tobymao/sqlglot/commit/8130bd40815803a6781ee8f20fccd30987516192) - **parser**: WEEKDAY of WEEK as VAR *(PR [#5552](https://github.com/tobymao/sqlglot/pull/5552) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5547](https://github.com/tobymao/sqlglot/issues/5547) opened by [@rloredo](https://github.com/rloredo)* - [`4e1373f`](https://github.com/tobymao/sqlglot/commit/4e1373f301cbea3cb5762fc1430b65deae3f9d04) - **doris**: Rename Table *(PR [#5549](https://github.com/tobymao/sqlglot/pull/5549) by [@xinge-ji](https://github.com/xinge-ji))* - [`16f544d`](https://github.com/tobymao/sqlglot/commit/16f544dc25d5d61277d32f02e4be18c10d16cf9f) - **doris**: fix DATE_TRUNC and partition by *(PR [#5553](https://github.com/tobymao/sqlglot/pull/5553) by [@xinge-ji](https://github.com/xinge-ji))* - [`6295414`](https://github.com/tobymao/sqlglot/commit/6295414fb41401f92993e661b880a0727e74c087) - convert unit to Var instead of choosing default in `unit_to_var` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6872b43`](https://github.com/tobymao/sqlglot/commit/6872b43ba17a39137172fd2fa9f0d059ce595ef9) - **parser**: use dialect in DataType.build fixes [#5560](https://github.com/tobymao/sqlglot/pull/5560) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6f354d9`](https://github.com/tobymao/sqlglot/commit/6f354d958fb9ca9242b7fc1d2da86af74d57fedc) - **clickhouse**: add ROWS keyword in OFFSET followed by FETCH fixes [#5564](https://github.com/tobymao/sqlglot/pull/5564) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8c0cb76`](https://github.com/tobymao/sqlglot/commit/8c0cb764fd825062fb7334032b8eeffbc39627d5) - **parser**: more robust CREATE SEQUENCE *(PR [#5566](https://github.com/tobymao/sqlglot/pull/5566) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5537](https://github.com/tobymao/sqlglot/issues/5537) opened by [@tekumara](https://github.com/tekumara)* - [`7e9df88`](https://github.com/tobymao/sqlglot/commit/7e9df880bc118d0dbb2dbd6344f805f79af2fe5e) - **doris**: CURRENT_DATE *(PR [#5567](https://github.com/tobymao/sqlglot/pull/5567) by [@xinge-ji](https://github.com/xinge-ji))* - [`51e0335`](https://github.com/tobymao/sqlglot/commit/51e0335377fe2bc2e2a94a623475791e9dd19fb9) - **optimizer**: parse and annotate type for bigquery REVERSE *(PR [#5571](https://github.com/tobymao/sqlglot/pull/5571) by [@geooo109](https://github.com/geooo109))* - [`d0d62ed`](https://github.com/tobymao/sqlglot/commit/d0d62ede6320b3fd0eee04b7073f5708676dc58c) - **dremio**: support `TO_CHAR` with numeric inputs *(PR [#5570](https://github.com/tobymao/sqlglot/pull/5570) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`7928985`](https://github.com/tobymao/sqlglot/commit/7928985a655c3d0244bc9175a37f502b19a5c5f0) - **bigquery**: allow dashes in JSONPath keys *(PR [#5574](https://github.com/tobymao/sqlglot/pull/5574) by [@georgesittas](https://github.com/georgesittas))* - [`866042d`](https://github.com/tobymao/sqlglot/commit/866042d0268da0cebce042c0868878c0fb39c3d1) - Remove TokenType.APPLY from table alias tokens *(PR [#5592](https://github.com/tobymao/sqlglot/pull/5592) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5591](https://github.com/tobymao/sqlglot/issues/5591) opened by [@saadbelgi](https://github.com/saadbelgi)* - [`b485f66`](https://github.com/tobymao/sqlglot/commit/b485f6666fa8625b7da45ef832b5d666fbb707ea) - **dremio**: improve `TO_CHAR` transpilability *(PR [#5580](https://github.com/tobymao/sqlglot/pull/5580) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`81874e9`](https://github.com/tobymao/sqlglot/commit/81874e9c3aafcc2cf8fb443f65146c5b3598b9b3) - handle unknown types in `unit_to_str` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`f1269f5`](https://github.com/tobymao/sqlglot/commit/f1269f5ecfccfee4cdeeda5bfd10eb1c47994fad) - **tsql**: do not attach limit modifier to set operation *(PR [#5609](https://github.com/tobymao/sqlglot/pull/5609) by [@georgesittas](https://github.com/georgesittas))* - [`a6edf8e`](https://github.com/tobymao/sqlglot/commit/a6edf8ee3273a7736ed801ef8dea302613b119da) - **tsql**: Remove ORDER from set op modifiers too *(PR [#5626](https://github.com/tobymao/sqlglot/pull/5626) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5618](https://github.com/tobymao/sqlglot/issues/5618) opened by [@MQMMMQM](https://github.com/MQMMMQM)* - [`ce5840e`](https://github.com/tobymao/sqlglot/commit/ce5840ed615e162a93cd911ab6207160878fcc64) - **exasol**: update several dialect properties to correctly reflect semantics *(PR [#5642](https://github.com/tobymao/sqlglot/pull/5642) by [@nnamdi16](https://github.com/nnamdi16))* - [`3ab1d44`](https://github.com/tobymao/sqlglot/commit/3ab1d4487279cab3be2d3764e51516c6db21629d) - **generator**: Wrap CONCAT items with COALESCE less aggressively *(PR [#5641](https://github.com/tobymao/sqlglot/pull/5641) by [@VaggelisD](https://github.com/VaggelisD))* - [`045d2f0`](https://github.com/tobymao/sqlglot/commit/045d2f02649b0e6dc178c079e4e0db201ed9bf08) - **duckdb**: Transpile Spark's FIRST(col, TRUE) *(PR [#5644](https://github.com/tobymao/sqlglot/pull/5644) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5643](https://github.com/tobymao/sqlglot/issues/5643) opened by [@michal-clutch](https://github.com/michal-clutch)* - [`0427c7b`](https://github.com/tobymao/sqlglot/commit/0427c7b7aa9f8161324085a98c5f531fa35c8b0c) - **optimizer**: qualify columns for AggFunc with DISTINCT *(PR [#5708](https://github.com/tobymao/sqlglot/pull/5708) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5698](https://github.com/tobymao/sqlglot/issues/5698) opened by [@georgesittas](https://github.com/georgesittas)* - [`ec93497`](https://github.com/tobymao/sqlglot/commit/ec93497bac82090b88c6e749ec2adc99bbc23a61) - **bigquery**: support commands inside for loops *(PR [#5732](https://github.com/tobymao/sqlglot/pull/5732) by [@treysp](https://github.com/treysp))* - [`85845bb`](https://github.com/tobymao/sqlglot/commit/85845bb941ac9a4ee090a89cd3d3dab4ab5835a7) - **snowflake**: allow exclude as id var *(PR [#5764](https://github.com/tobymao/sqlglot/pull/5764) by [@treysp](https://github.com/treysp))* - [`db2d9cc`](https://github.com/tobymao/sqlglot/commit/db2d9cca9718fb196066dbf60840124917d1f8ac) - **tokenizer**: handle empty hex strings *(PR [#5763](https://github.com/tobymao/sqlglot/pull/5763) by [@paulolieuthier](https://github.com/paulolieuthier))* - :arrow_lower_right: *fixes issue [#5761](https://github.com/tobymao/sqlglot/issues/5761) opened by [@paulolieuthier](https://github.com/paulolieuthier)* - [`982257b`](https://github.com/tobymao/sqlglot/commit/982257b40973cdfc20a8d6dd9a1674cda7eb75c4) - **bigquery**: Crash when ARRAY_CONCAT is called with no expressions *(PR [#5755](https://github.com/tobymao/sqlglot/pull/5755) by [@ozadari](https://github.com/ozadari))* - [`24ca504`](https://github.com/tobymao/sqlglot/commit/24ca504360779c8a20a58accf506eb9600ac9bf8) - **bigquery**: Crash when ARRAY_CONCAT is called with no expressions *(PR [#5755](https://github.com/tobymao/sqlglot/pull/5755) by [@ozadari](https://github.com/ozadari))* - [`d8f6a37`](https://github.com/tobymao/sqlglot/commit/d8f6a376ba1fcca48e4a65923dd7a319ce6cfb91) - **optimizer**: allow aliased negative integer literal as group by column *(PR [#5791](https://github.com/tobymao/sqlglot/pull/5791) by [@treysp](https://github.com/treysp))* - [`1259576`](https://github.com/tobymao/sqlglot/commit/1259576283f1d45abb70ec40c60e500214a27b6f) - **hive**: DATE_SUB to DATE_ADD use parens if needed *(PR [#5796](https://github.com/tobymao/sqlglot/pull/5796) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5794](https://github.com/tobymao/sqlglot/issues/5794) opened by [@mingelchan](https://github.com/mingelchan)* - [`b0516b4`](https://github.com/tobymao/sqlglot/commit/b0516b4bc9cf2bba2cb57e6bb79ff09b5e2244e3) - **optimizer**: Do not qualify columns if a projection coflicts with a source *(PR [#5780](https://github.com/tobymao/sqlglot/pull/5780) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5262](https://github.com/TobikoData/sqlmesh/issues/5262) opened by [@mChlopek](https://github.com/mChlopek)* - [`8af0d40`](https://github.com/tobymao/sqlglot/commit/8af0d40055450f71b7e36e576f4a9a1104bc02b2) - **parser**: address edge case where `values` is used as an identifier *(PR [#5801](https://github.com/tobymao/sqlglot/pull/5801) by [@georgesittas](https://github.com/georgesittas))* - [`3726b33`](https://github.com/tobymao/sqlglot/commit/3726b33bb6b4ab286617f510e96e1fbd27c429f3) - **snowflake**: support nulls_first arg for array_sort *(PR [#5802](https://github.com/tobymao/sqlglot/pull/5802) by [@treysp](https://github.com/treysp))* - [`3408de0`](https://github.com/tobymao/sqlglot/commit/3408de09e50d2510c1a6f511dc2dec357059044f) - parsing quoted built-in data types *(PR [#5810](https://github.com/tobymao/sqlglot/pull/5810) by [@treysp](https://github.com/treysp))* - [`ad0b407`](https://github.com/tobymao/sqlglot/commit/ad0b407098e1611d4fc0e1f0916511337b9aefdb) - **postgres**: Mark 'BEGIN' as TokenType.BEGIN for transactions *(PR [#5826](https://github.com/tobymao/sqlglot/pull/5826) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5815](https://github.com/tobymao/sqlglot/issues/5815) opened by [@karakanb](https://github.com/karakanb)* - [`e1a1b5b`](https://github.com/tobymao/sqlglot/commit/e1a1b5befefb0ca30ac1310cecb82a44f6089034) - **snowflake**: transpile BigQuery's `&` to `BITAND` *(PR [#5827](https://github.com/tobymao/sqlglot/pull/5827) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`32d0278`](https://github.com/tobymao/sqlglot/commit/32d027827eaa7aa0cd9faf2ac1f84739f914050f) - parse and generation of BITWISE AGG funcs across dialects *(PR [#5831](https://github.com/tobymao/sqlglot/pull/5831) by [@geooo109](https://github.com/geooo109))* - [`5f39a83`](https://github.com/tobymao/sqlglot/commit/5f39a83f1ff957aca57eb4745f83c296436acaac) - **bigquery**: properly generate `LIMIT` for `STRING_AGG` *(PR [#5830](https://github.com/tobymao/sqlglot/pull/5830) by [@georgesittas](https://github.com/georgesittas))* - [`f3d55c0`](https://github.com/tobymao/sqlglot/commit/f3d55c05c8411c9871f8ca4d23f726f976c9236b) - remove always token *(PR [#5832](https://github.com/tobymao/sqlglot/pull/5832) by [@tobymao](https://github.com/tobymao))* - [`1724775`](https://github.com/tobymao/sqlglot/commit/1724775429f66c2768864c8f96ace861eaa435fd) - suppert types() with no args *(PR [#5833](https://github.com/tobymao/sqlglot/pull/5833) by [@tobymao](https://github.com/tobymao))* - [`31c82c6`](https://github.com/tobymao/sqlglot/commit/31c82c6d6cd402e59cb59a94daafd22410eae0f6) - support `case.*` *(PR [#5835](https://github.com/tobymao/sqlglot/pull/5835) by [@georgesittas](https://github.com/georgesittas))* - [`c00f73b`](https://github.com/tobymao/sqlglot/commit/c00f73bac2530a62c25093c60bf02d0a4231bb0b) - window spec no and only exclude *(PR [#5834](https://github.com/tobymao/sqlglot/pull/5834) by [@tobymao](https://github.com/tobymao))* - [`5e7979f`](https://github.com/tobymao/sqlglot/commit/5e7979f3cf5f7996e198ddd81069d49a4a3b9391) - select session *(PR [#5836](https://github.com/tobymao/sqlglot/pull/5836) by [@tobymao](https://github.com/tobymao))* - [`9c8a600`](https://github.com/tobymao/sqlglot/commit/9c8a6001f41816035f391d046eb9692d6f13cefc) - **snowflake**: correct parsing of TO_VARCHAR *(PR [#5840](https://github.com/tobymao/sqlglot/pull/5840) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5837](https://github.com/tobymao/sqlglot/issues/5837) opened by [@ultrabear](https://github.com/ultrabear)* - [`f3d07fd`](https://github.com/tobymao/sqlglot/commit/f3d07fd8a106b034f64bb100291671c0fe39a106) - **snowflake**: Enable parsing of COPY INTO without files list *(PR [#5841](https://github.com/tobymao/sqlglot/pull/5841) by [@whummer](https://github.com/whummer))* - [`0ffb1fa`](https://github.com/tobymao/sqlglot/commit/0ffb1faac3b32aad845306eed0e000ff0d055554) - **duckdb**: transpile joins without ON/USING to CROSS JOIN *(PR [#5804](https://github.com/tobymao/sqlglot/pull/5804) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5795](https://github.com/tobymao/sqlglot/issues/5795) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`1e9aef1`](https://github.com/tobymao/sqlglot/commit/1e9aef1bb20f4dc5e9c03d59cb3165c235c11ce1) - **optimizer**: convert NULL annotations to UNKNOWN *(PR [#5842](https://github.com/tobymao/sqlglot/pull/5842) by [@georgesittas](https://github.com/georgesittas))* - [`bbcf0d4`](https://github.com/tobymao/sqlglot/commit/bbcf0d4404ea014f08319c44313719b4377adcdb) - **duckdb**: support trailing commas before `FOR` in pivot, fixes [#5843](https://github.com/tobymao/sqlglot/pull/5843) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ad8a408`](https://github.com/tobymao/sqlglot/commit/ad8a408a4e3e26e32472fc55c67b44687992ae47) - **parser**: more robust nested pipe syntax *(PR [#5845](https://github.com/tobymao/sqlglot/pull/5845) by [@geooo109](https://github.com/geooo109))* - [`44c9e70`](https://github.com/tobymao/sqlglot/commit/44c9e70bd8c9421035eb0e87e4286061ec5d2fa8) - **optimizer**: add tests for snowflake STARTSWITH function *(PR [#5847](https://github.com/tobymao/sqlglot/pull/5847) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`0779c2d`](https://github.com/tobymao/sqlglot/commit/0779c2d4e8ce0228592de6882763940783fa5e87) - support BIT_X aggregates again for duckdb, postgres *(PR [#5851](https://github.com/tobymao/sqlglot/pull/5851) by [@georgesittas](https://github.com/georgesittas))* - [`d131aab`](https://github.com/tobymao/sqlglot/commit/d131aab6815bf77d444a763d9bb4028d8f0e742d) - **redshift**: convert FETCH clauses to LIMIT for Redshift dialect *(PR [#5848](https://github.com/tobymao/sqlglot/pull/5848) by [@tomasmontielp](https://github.com/tomasmontielp))* - [`b22c4ec`](https://github.com/tobymao/sqlglot/commit/b22c4ecf4c032d89ca737f01d614102aa9c2b1ed) - **fabric**: UUID to UNIQUEIDENTIFIER *(PR [#5863](https://github.com/tobymao/sqlglot/pull/5863) by [@fresioAS](https://github.com/fresioAS))* - [`03d4f49`](https://github.com/tobymao/sqlglot/commit/03d4f49d92cd034d37074359b8c2cf96c5c3f5cf) - **clickhouse**: arrays are 1-indexed *(PR [#5862](https://github.com/tobymao/sqlglot/pull/5862) by [@joeyutong](https://github.com/joeyutong))* - [`1d9e357`](https://github.com/tobymao/sqlglot/commit/1d9e357fb7549635ca25c6c42299880d7864e074) - **optimizer**: expand columns on the LHS of recursive CTEs *(PR [#5872](https://github.com/tobymao/sqlglot/pull/5872) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5814](https://github.com/tobymao/sqlglot/issues/5814) opened by [@suresh-summation](https://github.com/suresh-summation)* - [`7fcc52a`](https://github.com/tobymao/sqlglot/commit/7fcc52a22241c480c22b3e6f843e7a210c75a0ec) - **parser**: Require an explicit alias in EXCLUDE/RENAME/REPLACE star ops *(PR [#5892](https://github.com/tobymao/sqlglot/pull/5892) by [@VaggelisD](https://github.com/VaggelisD))* - [`5fdcc65`](https://github.com/tobymao/sqlglot/commit/5fdcc651277ba4e86e11d0c5952a56e40299a998) - **snowflake**: parse OCTET_LENGTH *(PR [#5900](https://github.com/tobymao/sqlglot/pull/5900) by [@geooo109](https://github.com/geooo109))* - [`f5409df`](https://github.com/tobymao/sqlglot/commit/f5409df64ed6069880669878db687e4b98c3e280) - **optimizer**: use column name in struct type annotation *(PR [#5903](https://github.com/tobymao/sqlglot/pull/5903) by [@georgesittas](https://github.com/georgesittas))* - [`74886d8`](https://github.com/tobymao/sqlglot/commit/74886d82f70c9317af51c77b322e67a6aa260a5e) - **snowflake**: transpile BQ UNNEST with alias *(PR [#5897](https://github.com/tobymao/sqlglot/pull/5897) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5895](https://github.com/tobymao/sqlglot/issues/5895) opened by [@YuvalOmerRep](https://github.com/YuvalOmerRep)* - [`bd3e965`](https://github.com/tobymao/sqlglot/commit/bd3e9655aa72ffef8a9e0221205fa2c3915ef58b) - allow `lock` to be used as an identifier *(commit by [@georgesittas](https://github.com/georgesittas))* - [`2d0d908`](https://github.com/tobymao/sqlglot/commit/2d0d908b5bbc32ff3bc92eb1ae9fc6e5ac3409bc) - produce TableAlias instead of Alias for USING in merge builder *(PR [#5911](https://github.com/tobymao/sqlglot/pull/5911) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5910](https://github.com/tobymao/sqlglot/issues/5910) opened by [@deepyaman](https://github.com/deepyaman)* - [`0e256b3`](https://github.com/tobymao/sqlglot/commit/0e256b3f864bc2d026817bd08e89ee89f44ad256) - edge case with parsing `interval` as identifier *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d127051`](https://github.com/tobymao/sqlglot/commit/d1270517c3e124ca59caf29e4506eb3848f7452e) - precedence issue with column operator parsing *(PR [#5914](https://github.com/tobymao/sqlglot/pull/5914) by [@georgesittas](https://github.com/georgesittas))* - [`6807a32`](https://github.com/tobymao/sqlglot/commit/6807a32cccf984dc13a30b815750b2c41374b845) - escape byte string delimiters *(PR [#5916](https://github.com/tobymao/sqlglot/pull/5916) by [@georgesittas](https://github.com/georgesittas))* - [`22c7ed7`](https://github.com/tobymao/sqlglot/commit/22c7ed7734b41ca544bb67bcc1ca4151f6d5f05f) - **clickhouse**: parse tuple *(PR [#5920](https://github.com/tobymao/sqlglot/pull/5920) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5913](https://github.com/tobymao/sqlglot/issues/5913) opened by [@tiagoskaneta](https://github.com/tiagoskaneta)* - [`223160b`](https://github.com/tobymao/sqlglot/commit/223160bd7914d51e9ec1abb8d0f1053e13a65c98) - **parser**: NULLABLE as an identifier *(PR [#5921](https://github.com/tobymao/sqlglot/pull/5921) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5919](https://github.com/tobymao/sqlglot/issues/5919) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`42cfc79`](https://github.com/tobymao/sqlglot/commit/42cfc79ce120dee83084e2bb6b8bbd19f45bf06f) - **snowflake**: parse DAYOFWEEKISO *(PR [#5925](https://github.com/tobymao/sqlglot/pull/5925) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5924](https://github.com/tobymao/sqlglot/issues/5924) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`0be2cb4`](https://github.com/tobymao/sqlglot/commit/0be2cb448ee1a5ac020ac47e9944875c30e42632) - **postgres**: support `DISTINCT` qualifier in `JSON_AGG` fixes [#5935](https://github.com/tobymao/sqlglot/pull/5935) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e34b2e1`](https://github.com/tobymao/sqlglot/commit/e34b2e14d1f87d095955765173a5e17fc9985220) - allow grouping set parser to consume more syntax fixes [#5937](https://github.com/tobymao/sqlglot/pull/5937) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`3846d4d`](https://github.com/tobymao/sqlglot/commit/3846d4dcdf8cbf8e90b2661083a567ab0547ad3c) - **solr**: properly support OR alternative operator *(commit by [@georgesittas](https://github.com/georgesittas))* - [`df428d5`](https://github.com/tobymao/sqlglot/commit/df428d516113a47ae50d04cd50a250830589c072) - **parser**: interval identifier followed by END *(PR [#5944](https://github.com/tobymao/sqlglot/pull/5944) by [@geooo109](https://github.com/geooo109))* - [`e178d16`](https://github.com/tobymao/sqlglot/commit/e178d1674a71e6f35a6acfa8f4a317f0fe2e4516) - **duckdb**: UNNEST as table *(PR [#5953](https://github.com/tobymao/sqlglot/pull/5953) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5952](https://github.com/tobymao/sqlglot/issues/5952) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`24feb8e`](https://github.com/tobymao/sqlglot/commit/24feb8ee0bc43f3f14fd768c9a0d986355becea2) - **parser**: parse `UPDATE` clauses in any order *(PR [#5958](https://github.com/tobymao/sqlglot/pull/5958) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5956](https://github.com/tobymao/sqlglot/issues/5956) opened by [@sfc-gh-clathrope](https://github.com/sfc-gh-clathrope)* - [`980f99a`](https://github.com/tobymao/sqlglot/commit/980f99a4cc0613012a189ee5636af37ec736040c) - **snowflake**: properly generate inferred `STRUCT` data types *(PR [#5954](https://github.com/tobymao/sqlglot/pull/5954) by [@georgesittas](https://github.com/georgesittas))* - [`5432976`](https://github.com/tobymao/sqlglot/commit/543297680755344185e0f306843bc4909f4f75ed) - **bigquery**: allow GRANT as an id var *(PR [#5965](https://github.com/tobymao/sqlglot/pull/5965) by [@treysp](https://github.com/treysp))* - [`7a3744f`](https://github.com/tobymao/sqlglot/commit/7a3744f203b93211e5dd97e6730b6bf59d6d96e0) - **sqlite**: support `RANGE CURRENT ROW` in window spec *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c3bdb3c`](https://github.com/tobymao/sqlglot/commit/c3bdb3cd1af1809ed82be0ae40744d9fffc8ce18) - **starrocks**: array start index is 1, support array_flatten, fixes [#5983](https://github.com/tobymao/sqlglot/pull/5983) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`51b1bb1`](https://github.com/tobymao/sqlglot/commit/51b1bb178fa952edc13b2cbc6f624d30b0bde798) - move `WATERMARK` logic to risingwave fixes [#5989](https://github.com/tobymao/sqlglot/pull/5989) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`033ddf0`](https://github.com/tobymao/sqlglot/commit/033ddf04da895f1f5d38aff5361b2ae0793fefea) - **optimizer**: convert INNER JOINs to LEFT JOINs when merging LEFT JOIN subqueries *(PR [#5980](https://github.com/tobymao/sqlglot/pull/5980) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5969](https://github.com/tobymao/sqlglot/issues/5969) opened by [@karta0807913](https://github.com/karta0807913)* - [`c7657fb`](https://github.com/tobymao/sqlglot/commit/c7657fbd27a4350c424ef65947471ab9ec086831) - remove `unalias_group_by` transformation since it is unsafe *(PR [#5997](https://github.com/tobymao/sqlglot/pull/5997) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5995](https://github.com/tobymao/sqlglot/issues/5995) opened by [@capricornsky0119](https://github.com/capricornsky0119)* - [`b6f9694`](https://github.com/tobymao/sqlglot/commit/b6f9694c535cdd1403a63036cc246fda4e6d4d22) - **optimizer**: avoid merging subquery with JOIN when outer query uses JOIN *(PR [#5999](https://github.com/tobymao/sqlglot/pull/5999) by [@geooo109](https://github.com/geooo109))* - [`23fd7b9`](https://github.com/tobymao/sqlglot/commit/23fd7b9116541b96e5d89389e862c6004e92d109) - respect multi-part Column units instead of converting to Var *(PR [#6005](https://github.com/tobymao/sqlglot/pull/6005) by [@georgesittas](https://github.com/georgesittas))* - [`be1cdc8`](https://github.com/tobymao/sqlglot/commit/be1cdc81b511d462b710b50941d5c2770d901e91) - **duckdb**: Fix roundtrip of ~ operator *(PR [#6017](https://github.com/tobymao/sqlglot/pull/6017) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6016](https://github.com/tobymao/sqlglot/issues/6016) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`27c278f`](https://github.com/tobymao/sqlglot/commit/27c278f562f5ce98a1a4d31f8e66f148a1f42236) - **parser**: Allow LIMIT with % percentage *(PR [#6019](https://github.com/tobymao/sqlglot/pull/6019) by [@VaggelisD](https://github.com/VaggelisD))* - [`39bf3f8`](https://github.com/tobymao/sqlglot/commit/39bf3f893389663796cdd799ef0f1e684f315a01) - **parser**: Allow CUBE & ROLLUP inside GROUPING SETS *(PR [#6018](https://github.com/tobymao/sqlglot/pull/6018) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6015](https://github.com/tobymao/sqlglot/issues/6015) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`ba7ad34`](https://github.com/tobymao/sqlglot/commit/ba7ad341d5ee1298b8fe54be11ca6252c1a44c99) - **duckdb**: Parse ROW type as STRUCT *(PR [#6020](https://github.com/tobymao/sqlglot/pull/6020) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6012](https://github.com/tobymao/sqlglot/issues/6012) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`718d6bb`](https://github.com/tobymao/sqlglot/commit/718d6bbf7f40e5b3e99563e2f1ac9eadeff57c3d) - handle unicode heredoc tags & Rust grapheme clusters properly *(PR [#6024](https://github.com/tobymao/sqlglot/pull/6024) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6010](https://github.com/tobymao/sqlglot/issues/6010) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`c8cfb9d`](https://github.com/tobymao/sqlglot/commit/c8cfb9db2e789be2dc7f8a154082a9210b736502) - **snowflake**: transpile ARRAY_CONTAINS with VARIANT CAST *(PR [#6029](https://github.com/tobymao/sqlglot/pull/6029) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6026](https://github.com/tobymao/sqlglot/issues/6026) opened by [@Birkman](https://github.com/Birkman)* - [`6a6ca92`](https://github.com/tobymao/sqlglot/commit/6a6ca927c4e6e06f5cb38ad1153a8b556999ef90) - **parser**: Allow nested GROUPING SETS *(PR [#6041](https://github.com/tobymao/sqlglot/pull/6041) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6038](https://github.com/tobymao/sqlglot/issues/6038) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`41baeaa`](https://github.com/tobymao/sqlglot/commit/41baeaa1530c5419c945409133e3b7caa5250ec7) - **optimizer**: more robust CROSS JOIN substitution and JOIN reordering *(PR [#6021](https://github.com/tobymao/sqlglot/pull/6021) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6009](https://github.com/tobymao/sqlglot/issues/6009) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`e2f299f`](https://github.com/tobymao/sqlglot/commit/e2f299f5ad18d75a394e55bd1ee59ed243d77e54) - allow subqueries to have modifiers closes [#6014](https://github.com/tobymao/sqlglot/pull/6014) *(PR [#6034](https://github.com/tobymao/sqlglot/pull/6034) by [@tobymao](https://github.com/tobymao))* - [`0d65266`](https://github.com/tobymao/sqlglot/commit/0d6526693f8e7dda9b7c180d31c364bde91afc72) - parse lambda for arg_min/max arguments closes [#6036](https://github.com/tobymao/sqlglot/pull/6036) *(PR [#6042](https://github.com/tobymao/sqlglot/pull/6042) by [@georgesittas](https://github.com/georgesittas))* - [`0939d69`](https://github.com/tobymao/sqlglot/commit/0939d69223a860581b1c30cc2f762294946b93f3) - move odbc date literal handling in t-sql closes [#6037](https://github.com/tobymao/sqlglot/pull/6037) *(PR [#6044](https://github.com/tobymao/sqlglot/pull/6044) by [@georgesittas](https://github.com/georgesittas))* - [`65848e5`](https://github.com/tobymao/sqlglot/commit/65848e5a3e4c1cb26e6ca4deb7819a282838c3c2) - **tsql**: UPDATE with OPTIONS *(PR [#6043](https://github.com/tobymao/sqlglot/pull/6043) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6033](https://github.com/tobymao/sqlglot/issues/6033) opened by [@ligfx](https://github.com/ligfx)* - [`3bb6bb3`](https://github.com/tobymao/sqlglot/commit/3bb6bb3e5193ed53c803c3786a1791f15cd2f89a) - **parser**: support :: cast operator after IS NULL/IS NOT NULL *(PR [#6056](https://github.com/tobymao/sqlglot/pull/6056) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6055](https://github.com/tobymao/sqlglot/issues/6055) opened by [@vchan](https://github.com/vchan)* - [`2c7cc29`](https://github.com/tobymao/sqlglot/commit/2c7cc29a329dcbaaa90a6f857d2383d2967ea6cc) - **duckdb**: Transform exp.HexString to BLOB in hex notation *(PR [#6045](https://github.com/tobymao/sqlglot/pull/6045) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6035](https://github.com/tobymao/sqlglot/issues/6035) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`e7833de`](https://github.com/tobymao/sqlglot/commit/e7833de9744a4aa69d244285e7f6f7281af178ba) - **parser**: support DELETE with USING and multiple VALUES *(PR [#6072](https://github.com/tobymao/sqlglot/pull/6072) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6070](https://github.com/tobymao/sqlglot/issues/6070) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`2238ac2`](https://github.com/tobymao/sqlglot/commit/2238ac27478bd272ba39928bbec1075c4191ee1b) - **duckdb**: transpile timestamp literals in datediff fixes [#6083](https://github.com/tobymao/sqlglot/pull/6083) *(PR [#6086](https://github.com/tobymao/sqlglot/pull/6086) by [@georgesittas](https://github.com/georgesittas))* - [`bef541c`](https://github.com/tobymao/sqlglot/commit/bef541cec36f8c4295f815c3f5cd22491738901b) - **parser**: query mods and set ops in FROM-first syntax *(PR [#6092](https://github.com/tobymao/sqlglot/pull/6092) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6088](https://github.com/tobymao/sqlglot/issues/6088) opened by [@denis-komarov](https://github.com/denis-komarov)* - :arrow_lower_right: *fixes issue [#6091](https://github.com/tobymao/sqlglot/issues/6091) opened by [@denis-komarov](https://github.com/denis-komarov)* - :arrow_lower_right: *fixes issue [#6093](https://github.com/tobymao/sqlglot/issues/6093) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`5109890`](https://github.com/tobymao/sqlglot/commit/510989043d18baa17502a971262462814a2eb5be) - **parser**: VALUES with ORDER BY/LIMIT/OFFSET *(PR [#6094](https://github.com/tobymao/sqlglot/pull/6094) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6087](https://github.com/tobymao/sqlglot/issues/6087) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`4b062c8`](https://github.com/tobymao/sqlglot/commit/4b062c850bd9867be0d622f3f526762fa2b72302) - consume more syntax for cubes/rollups fixes [#6101](https://github.com/tobymao/sqlglot/pull/6101) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`f00866a`](https://github.com/tobymao/sqlglot/commit/f00866aeb8b7f51e27173c688225fe16d777eb1a) - **duckdb**: 1 arg FORMAT func *(PR [#6109](https://github.com/tobymao/sqlglot/pull/6109) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6108](https://github.com/tobymao/sqlglot/issues/6108) opened by [@erindru](https://github.com/erindru)* - [`77dfd5a`](https://github.com/tobymao/sqlglot/commit/77dfd5a41bb9ce5450e0f6b7a78c953c8ade14d5) - lineage does not modify sql input if expression *(PR [#6113](https://github.com/tobymao/sqlglot/pull/6113) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#6112](https://github.com/tobymao/sqlglot/issues/6112) opened by [@snovik75](https://github.com/snovik75)* - [`06f40f9`](https://github.com/tobymao/sqlglot/commit/06f40f900ce693ba4203514e422cba8cda0dbb07) - **optimizer**: don't simplify x XOR x due to NULL semantics *(PR [#6115](https://github.com/tobymao/sqlglot/pull/6115) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6104](https://github.com/tobymao/sqlglot/issues/6104) opened by [@dllggyx](https://github.com/dllggyx)* - [`03e2dff`](https://github.com/tobymao/sqlglot/commit/03e2dff9b074dc228cf3854ff1f4357e091aa9b3) - allow parsing `analyze` as an identifier fixes [#6123](https://github.com/tobymao/sqlglot/pull/6123) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8744431`](https://github.com/tobymao/sqlglot/commit/874443148c8ec2a773dfaca5da10d3587a49de3e) - transpile bigquery DATETIME_DIFF to duckdb *(PR [#6126](https://github.com/tobymao/sqlglot/pull/6126) by [@toriwei](https://github.com/toriwei))* - :arrow_lower_right: *fixes issue [#6107](https://github.com/tobymao/sqlglot/issues/6107) opened by [@izeigerman](https://github.com/izeigerman)* - [`b94e81b`](https://github.com/tobymao/sqlglot/commit/b94e81b42b89c75625b2da779c0f53777d9b6b48) - **optimizer**: avoid removing string literals from WHERE clause *(PR [#6131](https://github.com/tobymao/sqlglot/pull/6131) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6128](https://github.com/tobymao/sqlglot/issues/6128) opened by [@dllggyx](https://github.com/dllggyx)* - [`e2129c6`](https://github.com/tobymao/sqlglot/commit/e2129c6766ca1f10ff6663bec98be984abb33c91) - **optimizer**: Do not consider BIT_COUNT an aggregate function *(PR [#6135](https://github.com/tobymao/sqlglot/pull/6135) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6130](https://github.com/tobymao/sqlglot/issues/6130) opened by [@dllggyx](https://github.com/dllggyx)* - [`03bfeed`](https://github.com/tobymao/sqlglot/commit/03bfeed56c5c2f143ce2e1be38d519f902d19961) - **starrocks**: disable IS TRUE/FALSE syntax support *(PR [#6145](https://github.com/tobymao/sqlglot/pull/6145) by [@petrikoro](https://github.com/petrikoro))* - :arrow_lower_right: *fixes issue [#6144](https://github.com/tobymao/sqlglot/issues/6144) opened by [@petrikoro](https://github.com/petrikoro)* - [`d136414`](https://github.com/tobymao/sqlglot/commit/d136414e520270ac9ab2fd8e9df4691d269b3af0) - **optimizer**: avoid simplifying AND with NULL *(PR [#6148](https://github.com/tobymao/sqlglot/pull/6148) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6136](https://github.com/tobymao/sqlglot/issues/6136) opened by [@dllggyx](https://github.com/dllggyx)* - [`1fd9991`](https://github.com/tobymao/sqlglot/commit/1fd99911a60f0543fbc79221a8c6a6f232ed0a2a) - **clickhouse**: NOT + IN precedence *(PR [#6149](https://github.com/tobymao/sqlglot/pull/6149) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6143](https://github.com/tobymao/sqlglot/issues/6143) opened by [@mlipiev](https://github.com/mlipiev)* - [`3acf796`](https://github.com/tobymao/sqlglot/commit/3acf7965105a098fea6336df0c304d94acbd05ec) - **duckdb**: Allow ESCAPE NULL *(PR [#6164](https://github.com/tobymao/sqlglot/pull/6164) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6160](https://github.com/tobymao/sqlglot/issues/6160) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`f7f1fca`](https://github.com/tobymao/sqlglot/commit/f7f1fca39a75df16ebb93f038e6277a25b8be6b9) - **duckdb**: Support positional index in list comprehension *(PR [#6163](https://github.com/tobymao/sqlglot/pull/6163) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6156](https://github.com/tobymao/sqlglot/issues/6156) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`d382a31`](https://github.com/tobymao/sqlglot/commit/d382a3106d5ce2e9b75527aacd4a37d1f8e16d18) - **optimizer**: simplify double negation only if the inner expr is BOOLEAN *(PR [#6151](https://github.com/tobymao/sqlglot/pull/6151) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6129](https://github.com/tobymao/sqlglot/issues/6129) opened by [@dllggyx](https://github.com/dllggyx)* - [`dfe6b3c`](https://github.com/tobymao/sqlglot/commit/dfe6b3c8e6db40e22e626e2d56e9a7008dd75c32) - **optimizer**: Disambiguate JOIN ON columns during qualify *(PR [#6155](https://github.com/tobymao/sqlglot/pull/6155) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6132](https://github.com/tobymao/sqlglot/issues/6132) opened by [@Fosly](https://github.com/Fosly)* - [`f267ece`](https://github.com/tobymao/sqlglot/commit/f267ecea92b0751f6b35a4ad0c70fe6754e49038) - normalize before qualifying tables *(PR [#6176](https://github.com/tobymao/sqlglot/pull/6176) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6167](https://github.com/tobymao/sqlglot/issues/6167) opened by [@schelip](https://github.com/schelip)* - [`ef87520`](https://github.com/tobymao/sqlglot/commit/ef875204596b8529f3358025c7a61d757a999bdc) - **postgres, duckdb**: Transpile `REGEXP_REPLACE` with 'g' option *(PR [#6174](https://github.com/tobymao/sqlglot/pull/6174) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6170](https://github.com/tobymao/sqlglot/issues/6170) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`51a8d70`](https://github.com/tobymao/sqlglot/commit/51a8d700a9602278d1e98425af0fa87d02c739fe) - **parser**: allow LIMIT % OFFSET *(PR [#6184](https://github.com/tobymao/sqlglot/pull/6184) by [@toriwei](https://github.com/toriwei))* - :arrow_lower_right: *fixes issue [#6166](https://github.com/tobymao/sqlglot/issues/6166) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`8bf0a9f`](https://github.com/tobymao/sqlglot/commit/8bf0a9fe8e167984dc2e7b43d52d3850e063da3f) - **duckdb**: Cast literal arg to timestamp for epoch_us function *(PR [#6190](https://github.com/tobymao/sqlglot/pull/6190) by [@vchan](https://github.com/vchan))* - [`93071e2`](https://github.com/tobymao/sqlglot/commit/93071e255406f62ea83dd89a3be4871b7edfb3fe) - **optimizer**: Fix simplify_parens from removing negated *(PR [#6194](https://github.com/tobymao/sqlglot/pull/6194) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6179](https://github.com/tobymao/sqlglot/issues/6179) opened by [@dllggyx](https://github.com/dllggyx)* - [`2ac3a03`](https://github.com/tobymao/sqlglot/commit/2ac3a03409d9239d0cf7fb265843d7837a0a3fcd) - **lineage**: correct star detection and add join star tests *(PR [#6185](https://github.com/tobymao/sqlglot/pull/6185) by [@lancewl](https://github.com/lancewl))* - [`c9ae2eb`](https://github.com/tobymao/sqlglot/commit/c9ae2ebdb86abdb767f2fcb00da0b6277b4aea45) - **duckdb**: transpile BigQuery TIMESTAMP_ADD to duckdb *(PR [#6188](https://github.com/tobymao/sqlglot/pull/6188) by [@toriwei](https://github.com/toriwei))* - [`ba0e17a`](https://github.com/tobymao/sqlglot/commit/ba0e17a25af417e24162bfab49c3074454a5c1a8) - **snowflake**: Transpile `ARRAY_CONCAT_AGG` to `ARRAY_FLATTEN(ARRAY_AGG(...))` *(PR [#6192](https://github.com/tobymao/sqlglot/pull/6192) by [@ozadari](https://github.com/ozadari))* - [`730e4cc`](https://github.com/tobymao/sqlglot/commit/730e4cc5b77bff9135667193cc0a65c24cdfb6b5) - **trino**: Allow 2nd arg for FIRST/LAST functions *(PR [#6205](https://github.com/tobymao/sqlglot/pull/6205) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6204](https://github.com/tobymao/sqlglot/issues/6204) opened by [@Harmuth94](https://github.com/Harmuth94)* - [`e7ddad1`](https://github.com/tobymao/sqlglot/commit/e7ddad10b5edf9b801d2151e3e5fca448754df0d) - **optimizer**: ensure `NULL` coerces into any type *(PR [#6211](https://github.com/tobymao/sqlglot/pull/6211) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4c4189b`](https://github.com/tobymao/sqlglot/commit/4c4189b4083d272a6e678d83b5c567a2e9c0d672) - Transpile CONCAT function to double pipe operators when source … *(PR [#6241](https://github.com/tobymao/sqlglot/pull/6241) by [@vchan](https://github.com/vchan))* - [`fc78d20`](https://github.com/tobymao/sqlglot/commit/fc78d2016d8f7d20c094df791f746de323cd3639) - **parser**: Unwrap subqueries without modifiers *(PR [#6247](https://github.com/tobymao/sqlglot/pull/6247) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6237](https://github.com/tobymao/sqlglot/issues/6237) opened by [@preet-sheth](https://github.com/preet-sheth)* - [`7ad4c17`](https://github.com/tobymao/sqlglot/commit/7ad4c177fbf8dda78aa8de1ca112f606b2fd5456) - **databricks**: Support table names in FROM STREAM *(PR [#6259](https://github.com/tobymao/sqlglot/pull/6259) by [@roveo](https://github.com/roveo))* - [`00abc39`](https://github.com/tobymao/sqlglot/commit/00abc393c9042e839457c5a6582e95cdb74356f3) - **generator**: handle casting for bytestrings *(PR [#6252](https://github.com/tobymao/sqlglot/pull/6252) by [@toriwei](https://github.com/toriwei))* - [`bcf2eac`](https://github.com/tobymao/sqlglot/commit/bcf2eace0baf1d85047841f36cb5c0082c61b29c) - **duckdb**: map int8 to bigint instead of tinyint fixes [#6269](https://github.com/tobymao/sqlglot/pull/6269) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ddea61d`](https://github.com/tobymao/sqlglot/commit/ddea61d83f6699c97cc7b25aabe01a138138bdb1) - **optimizer**: simplify connector complements only for non-null operands *(PR [#6214](https://github.com/tobymao/sqlglot/pull/6214) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6213](https://github.com/tobymao/sqlglot/issues/6213) opened by [@geooo109](https://github.com/geooo109)* - [`e17320e`](https://github.com/tobymao/sqlglot/commit/e17320ee3bdd0ef541d616c447b4973d12780dae) - Handle edge cases in for DuckDB RANGE to Spark SEQUENCE transpilation *(PR [#6276](https://github.com/tobymao/sqlglot/pull/6276) by [@joeyutong](https://github.com/joeyutong))* - [`33b6218`](https://github.com/tobymao/sqlglot/commit/33b62183a15cdedf0b1ebd96fcb856afbe8879a0) - sqlsecurityproperty parseerror *(PR [#6280](https://github.com/tobymao/sqlglot/pull/6280) by [@ds-cbo](https://github.com/ds-cbo))* - :arrow_lower_right: *fixes issue [#6279](https://github.com/tobymao/sqlglot/issues/6279) opened by [@ds-cbo](https://github.com/ds-cbo)* - [`c02b64c`](https://github.com/tobymao/sqlglot/commit/c02b64c3524dd074c2108baaca668ab2607ac843) - **optimizer**: Handle pseudocolumns differently than columns *(PR [#6273](https://github.com/tobymao/sqlglot/pull/6273) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6256](https://github.com/tobymao/sqlglot/issues/6256) opened by [@azilya](https://github.com/azilya)* - [`05c5181`](https://github.com/tobymao/sqlglot/commit/05c5181b36a7ada32b96fc91bdfbf73b38a1a408) - **optimizer**: refactor `Connector` simplification to factor in types *(PR [#6152](https://github.com/tobymao/sqlglot/pull/6152) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6137](https://github.com/tobymao/sqlglot/issues/6137) opened by [@dllggyx](https://github.com/dllggyx)* - [`cb0bcff`](https://github.com/tobymao/sqlglot/commit/cb0bcff310e9acdf806fc98e99cb9938b747c771) - **duckdb**: cast UUID() output to varchar when source dialect UUID() returns string *(PR [#6284](https://github.com/tobymao/sqlglot/pull/6284) by [@toriwei](https://github.com/toriwei))* - [`f9287f7`](https://github.com/tobymao/sqlglot/commit/f9287f7d596a6d8a1e1cd2c48978a4dec77a96cb) - **optimizer**: robust deduplication of connectors *(PR [#6296](https://github.com/tobymao/sqlglot/pull/6296) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6182](https://github.com/tobymao/sqlglot/issues/6182) opened by [@dllggyx](https://github.com/dllggyx)* - [`ea0ea79`](https://github.com/tobymao/sqlglot/commit/ea0ea79c1c611b62c79f82f744fe0c98803598a3) - **clickhouse**: Parse `LIKE` functions *(PR [#6314](https://github.com/tobymao/sqlglot/pull/6314) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6313](https://github.com/tobymao/sqlglot/issues/6313) opened by [@CainYang](https://github.com/CainYang)* - [`bbd4c90`](https://github.com/tobymao/sqlglot/commit/bbd4c901a9550beb363758e6be1e1877d4e56f2c) - **sqlite**: support IS with identifier as RHS *(PR [#6316](https://github.com/tobymao/sqlglot/pull/6316) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6315](https://github.com/tobymao/sqlglot/issues/6315) opened by [@VLDB2026](https://github.com/VLDB2026)* - [`65d213a`](https://github.com/tobymao/sqlglot/commit/65d213a7662962d4226368590508fbf61675c055) - **dialect**: fix typo from millenium to millennium [#6321](https://github.com/tobymao/sqlglot/pull/6321) *(commit by [@lBilali](https://github.com/lBilali))* - [`c9d1615`](https://github.com/tobymao/sqlglot/commit/c9d16150a408a41daf704d2d0b0ebfce57425b81) - **tsql**: map iso_week with the correct python directive from strftime *(PR [#6322](https://github.com/tobymao/sqlglot/pull/6322) by [@lBilali](https://github.com/lBilali))* - [`85ddcc5`](https://github.com/tobymao/sqlglot/commit/85ddcc5eca22ac726582de454f2f12b9d4877634) - **bigquery**: Do not normalize JSON fields in dot notation *(PR [#6320](https://github.com/tobymao/sqlglot/pull/6320) by [@VaggelisD](https://github.com/VaggelisD))* - [`933e981`](https://github.com/tobymao/sqlglot/commit/933e98102fb39d24ae0350da13337d981287130a) - **optimizer**: more robust NULL reduction *(PR [#6327](https://github.com/tobymao/sqlglot/pull/6327) by [@geooo109](https://github.com/geooo109))* - [`e1c6d57`](https://github.com/tobymao/sqlglot/commit/e1c6d5716f80eb24b6d0a9c93e187a8c9f05e555) - **parser**: improve between .. preceding .. following parser fixes [#6332](https://github.com/tobymao/sqlglot/pull/6332) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`65706e8`](https://github.com/tobymao/sqlglot/commit/65706e8c7edeb7de674d427718eac181df206dc9) - avoid full traversal for pushdown_cte_alias_columns *(commit by [@tobymao](https://github.com/tobymao))* - [`c81258e`](https://github.com/tobymao/sqlglot/commit/c81258e9c26f637f6f8520051c159685c8b1cb7e) - **parser**: allow using OVER token as unquoted identifier *(PR [#6338](https://github.com/tobymao/sqlglot/pull/6338) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6337](https://github.com/tobymao/sqlglot/issues/6337) opened by [@VLDB2026](https://github.com/VLDB2026)* - [`73abfac`](https://github.com/tobymao/sqlglot/commit/73abfac4cec27350754c942be71175fa7bdfd1d0) - **redshift**: do not inherit postgres `ROUND` generator closes [#6340](https://github.com/tobymao/sqlglot/pull/6340) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`0f79f2a`](https://github.com/tobymao/sqlglot/commit/0f79f2a55c4ba14d4a5fcfd01a0a727271992b8c) - **snowflake**: MAX_BY and MIN_BY with count should return plain `ARRAY` *(PR [#6343](https://github.com/tobymao/sqlglot/pull/6343) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`e1b6558`](https://github.com/tobymao/sqlglot/commit/e1b6558cb1a860bbd695f25b66e52064b57c0a84) - **tsql**: handle all datepart alternatives *(PR [#6324](https://github.com/tobymao/sqlglot/pull/6324) by [@lBilali](https://github.com/lBilali))* - [`06daa47`](https://github.com/tobymao/sqlglot/commit/06daa47dedebac672548e1db230b89f5c9eae84e) - **optimizer**: update annotated type of ARRAY_AGG to untyped array *(PR [#6347](https://github.com/tobymao/sqlglot/pull/6347) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`826db4d`](https://github.com/tobymao/sqlglot/commit/826db4d3c413941e3b0b31e1f907fabd017bd461) - **redshift**: properly parse default IAM_ROLE and AVRO/JSON formats in COPY *(PR [#6346](https://github.com/tobymao/sqlglot/pull/6346) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6345](https://github.com/tobymao/sqlglot/issues/6345) opened by [@zachary-povey](https://github.com/zachary-povey)* - [`c367bac`](https://github.com/tobymao/sqlglot/commit/c367bac878a3c17773009b54b9836e7b9a5b84fe) - **duckdb**: Support update without set in DuckDB merge when matched *(PR [#6357](https://github.com/tobymao/sqlglot/pull/6357) by [@themisvaltinos](https://github.com/themisvaltinos))* - [`df13a65`](https://github.com/tobymao/sqlglot/commit/df13a655646bd2ef5d8b4613670bb5fe48845b73) - unnest deep stuff *(PR [#6366](https://github.com/tobymao/sqlglot/pull/6366) by [@tobymao](https://github.com/tobymao))* - [`20e33fd`](https://github.com/tobymao/sqlglot/commit/20e33fd0d1bc1899727d023411e604f1ea9347b8) - **duckdb**: regexp_extract_all closes [#6380](https://github.com/tobymao/sqlglot/pull/6380) *(commit by [@tobymao](https://github.com/tobymao))* - [`d4c2256`](https://github.com/tobymao/sqlglot/commit/d4c2256fb493ed2f16c29694ae5c31517123d419) - **parser**: at time zone precedence *(PR [#6383](https://github.com/tobymao/sqlglot/pull/6383) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6359](https://github.com/tobymao/sqlglot/issues/6359) opened by [@parth-wisdom](https://github.com/parth-wisdom)* - [`4fb4d08`](https://github.com/tobymao/sqlglot/commit/4fb4d08ef8896bda434d4f89c21c669c6146fd02) - **oracle**: properly support table alias in the `INSERT` DML *(PR [#6374](https://github.com/tobymao/sqlglot/pull/6374) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#6371](https://github.com/tobymao/sqlglot/issues/6371) opened by [@snovik75](https://github.com/snovik75)* - [`2169f5b`](https://github.com/tobymao/sqlglot/commit/2169f5b8f30b6c8be1635bb5648a1abf636e49a6) - **parser**: support SET with := *(PR [#6385](https://github.com/tobymao/sqlglot/pull/6385) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6384](https://github.com/tobymao/sqlglot/issues/6384) opened by [@AndyVW77](https://github.com/AndyVW77)* - [`50348ac`](https://github.com/tobymao/sqlglot/commit/50348ac31f784aa97bd09d5d6c6613fbd68402ee) - **mysql**: support order by clause for mysql delete statement *(PR [#6381](https://github.com/tobymao/sqlglot/pull/6381) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - :arrow_lower_right: *fixes issue [#6372](https://github.com/tobymao/sqlglot/issues/6372) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`21d3859`](https://github.com/tobymao/sqlglot/commit/21d38590fec6cb55a1a03aeb2621bd9fca677496) - **bigquery**: Disable STRING_AGG sep canonicalization *(PR [#6395](https://github.com/tobymao/sqlglot/pull/6395) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6392](https://github.com/tobymao/sqlglot/issues/6392) opened by [@erindru](https://github.com/erindru)* - [`67f499d`](https://github.com/tobymao/sqlglot/commit/67f499dd497efdf4f3fc49dd75e49a77e036ee63) - **duckdb**: Make exp.DateFromParts more lenient *(PR [#6397](https://github.com/tobymao/sqlglot/pull/6397) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6394](https://github.com/tobymao/sqlglot/issues/6394) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`39f8c37`](https://github.com/tobymao/sqlglot/commit/39f8c37aca755d97e1e41f232042d1c649e58908) - **parser**: support FROM-syntax with joins *(PR [#6402](https://github.com/tobymao/sqlglot/pull/6402) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6396](https://github.com/tobymao/sqlglot/issues/6396) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`9ddae4d`](https://github.com/tobymao/sqlglot/commit/9ddae4d56d1e3a15fc3b4b76ce3b3040683c220f) - **duckdb**: support IN with no paren *(PR [#6409](https://github.com/tobymao/sqlglot/pull/6409) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6407](https://github.com/tobymao/sqlglot/issues/6407) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`c7cb098`](https://github.com/tobymao/sqlglot/commit/c7cb0983a0fa463c43d2c4ee925816e9a1628c79) - **tokenizer**: Fix underscore separator with scientific notation *(PR [#6401](https://github.com/tobymao/sqlglot/pull/6401) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6393](https://github.com/tobymao/sqlglot/issues/6393) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`f5635d2`](https://github.com/tobymao/sqlglot/commit/f5635d2cc2a5612d6403bbf508b545f2a4e8f773) - **duckdb**: splice with col named after type closes [#6411](https://github.com/tobymao/sqlglot/pull/6411) *(commit by [@tobymao](https://github.com/tobymao))* - [`097d865`](https://github.com/tobymao/sqlglot/commit/097d865554d9ba2e226962fa71778ae0a6c596cb) - **duckdb**: pivot using cast closes [#6410](https://github.com/tobymao/sqlglot/pull/6410) *(commit by [@tobymao](https://github.com/tobymao))* - [`d038ad7`](https://github.com/tobymao/sqlglot/commit/d038ad7f036a140f3eae4bdde15824437d4e44ee) - **mysql**: support named primary keys for mysql *(PR [#6389](https://github.com/tobymao/sqlglot/pull/6389) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - :arrow_lower_right: *fixes issue [#6382](https://github.com/tobymao/sqlglot/issues/6382) opened by [@AndyVW77](https://github.com/AndyVW77)* - [`4f3bb0d`](https://github.com/tobymao/sqlglot/commit/4f3bb0d6714bf89ff72e13e1398d8f01cefafb00) - **DuckDB**: Correct transpilation of BigQuery's JSON_EXTRACT_SCALAR… *(PR [#6414](https://github.com/tobymao/sqlglot/pull/6414) by [@vchan](https://github.com/vchan))* - [`e2f306f`](https://github.com/tobymao/sqlglot/commit/e2f306f1893a3f565cbbf7857ffd9795850aba7b) - interval column ops closes [#6416](https://github.com/tobymao/sqlglot/pull/6416) *(commit by [@tobymao](https://github.com/tobymao))* - [`8c314a8`](https://github.com/tobymao/sqlglot/commit/8c314a8b457a5c3ed470ac8fcff022fec881c248) - **duckdb**: support cte pivot for duckdb *(PR [#6413](https://github.com/tobymao/sqlglot/pull/6413) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - :arrow_lower_right: *fixes issue [#6405](https://github.com/tobymao/sqlglot/issues/6405) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`92ee124`](https://github.com/tobymao/sqlglot/commit/92ee1241ea3088d4e63c094404252339c54ad0c1) - **optimizer**: postgres qualify GENERATE_SERIES and table projection *(PR [#6373](https://github.com/tobymao/sqlglot/pull/6373) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6358](https://github.com/tobymao/sqlglot/issues/6358) opened by [@metahexane](https://github.com/metahexane)* - [`7021d54`](https://github.com/tobymao/sqlglot/commit/7021d54ecf0ceab3c3606642cbfca8e080cc8613) - **tsql**: CEILING generation *(PR [#6477](https://github.com/tobymao/sqlglot/pull/6477) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6472](https://github.com/tobymao/sqlglot/issues/6472) opened by [@ricky-ho](https://github.com/ricky-ho)* - [`df4c1d3`](https://github.com/tobymao/sqlglot/commit/df4c1d37ff77151a74b5de3d119c7e03f5db85f4) - REGEXP_EXTRACT position arg overflow *(PR [#6458](https://github.com/tobymao/sqlglot/pull/6458) by [@treysp](https://github.com/treysp))* - :arrow_lower_right: *fixes issue [#6442](https://github.com/tobymao/sqlglot/issues/6442) opened by [@erindru](https://github.com/erindru)* - [`5a49c3f`](https://github.com/tobymao/sqlglot/commit/5a49c3f7a7619ad9e711ff2cd9e85b8606969b36) - **optimizer**: support ORDER / LIMIT expressions for BigQuery ARRAY_AGG / STRING_AGG functions *(PR [#6463](https://github.com/tobymao/sqlglot/pull/6463) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`1b6076b`](https://github.com/tobymao/sqlglot/commit/1b6076bd5a64b044f52f5366244ba0746aca75e1) - wrap connectives generated due to transpiling LIKE ANY closes [#6493](https://github.com/tobymao/sqlglot/pull/6493) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`36ad534`](https://github.com/tobymao/sqlglot/commit/36ad534b14eabe9ee197017f5087e8e5190f8526) - **exasol**: qualified select list with "LOCAL" *(PR [#6450](https://github.com/tobymao/sqlglot/pull/6450) by [@nnamdi16](https://github.com/nnamdi16))* - [`52aceaa`](https://github.com/tobymao/sqlglot/commit/52aceaaa887dddb35f8ede5c2d9577fdeee35c48) - **optimizer**: annotate `HavingMax` by `this` *(PR [#6499](https://github.com/tobymao/sqlglot/pull/6499) by [@georgesittas](https://github.com/georgesittas))* - [`ce5487e`](https://github.com/tobymao/sqlglot/commit/ce5487ef2ec0a3de8fa79b9febf41236c05c04cc) - sources doesn't store columns, clean up this old code *(commit by [@tobymao](https://github.com/tobymao))* - [`3224235`](https://github.com/tobymao/sqlglot/commit/3224235c1b7a80511af11f7dbffe608a747a3df0) - make CTE builder produce AST consistent with parser closes [#6503](https://github.com/tobymao/sqlglot/pull/6503) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`9454a18`](https://github.com/tobymao/sqlglot/commit/9454a18cca41a510e61522f6b785d646980e2100) - uppercase join method, side, kind for consistency fixes [#6510](https://github.com/tobymao/sqlglot/pull/6510) *(PR [#6511](https://github.com/tobymao/sqlglot/pull/6511) by [@georgesittas](https://github.com/georgesittas))* - [`a6ec4b6`](https://github.com/tobymao/sqlglot/commit/a6ec4b688891691b26ab874a3401e370c0b8d574) - reorder join mark check in eliminate_join_marks *(PR [#6528](https://github.com/tobymao/sqlglot/pull/6528) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#6527](https://github.com/tobymao/sqlglot/issues/6527) opened by [@snovik75](https://github.com/snovik75)* - [`9d06859`](https://github.com/tobymao/sqlglot/commit/9d0685923209c04747fa6fa2b35ee2e516453abc) - **optimizer**: annotate bigquery ARRAY when arg contains set operations *(PR [#6517](https://github.com/tobymao/sqlglot/pull/6517) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`241073d`](https://github.com/tobymao/sqlglot/commit/241073d886e0b4ad7b2252a8c8c394e717ef700a) - on_qualify type *(commit by [@tobymao](https://github.com/tobymao))* - [`2fd14ed`](https://github.com/tobymao/sqlglot/commit/2fd14ed32b3793444405005fb98342222b4d7956) - **optimizer**: query schema directly when type annotation fails for processing UNNEST source *(PR [#6451](https://github.com/tobymao/sqlglot/pull/6451) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`62b348c`](https://github.com/tobymao/sqlglot/commit/62b348ce46d014895bd17d89ccb0b3e186e46d15) - **tokenizer**: add support for noop string escapes *(PR [#6526](https://github.com/tobymao/sqlglot/pull/6526) by [@nian0114](https://github.com/nian0114))* - [`724e4b3`](https://github.com/tobymao/sqlglot/commit/724e4b3657018430e23976cf6a69989298521180) - **snowflake**: don't simplify match_condition closes [#6537](https://github.com/tobymao/sqlglot/pull/6537) *(commit by [@tobymao](https://github.com/tobymao))* - [`12a6f7d`](https://github.com/tobymao/sqlglot/commit/12a6f7dc1b8604a2c9f4937654bd8bc458336110) - Only trigger integration tests on PR events *(PR [#6539](https://github.com/tobymao/sqlglot/pull/6539) by [@erindru](https://github.com/erindru))* - [`f21cf76`](https://github.com/tobymao/sqlglot/commit/f21cf763575b67084ea81a377c5bdb3e86041e4c) - **optimizer**: bq annotate SAFE_DIVIDE with both args as INT64 *(PR [#6543](https://github.com/tobymao/sqlglot/pull/6543) by [@geooo109](https://github.com/geooo109))* - [`4a57302`](https://github.com/tobymao/sqlglot/commit/4a5730242787920d0a2412aef495eb2eeaaa2119) - **optimizer**: ensure structs are annotated as unknown if any argument is unknown *(PR [#6544](https://github.com/tobymao/sqlglot/pull/6544) by [@georgesittas](https://github.com/georgesittas))* - [`63a2e49`](https://github.com/tobymao/sqlglot/commit/63a2e49485f237e1c7e16358c412acb5df50e22c) - **diff**: stop treating `None` args as leaves to be diffed *(PR [#6556](https://github.com/tobymao/sqlglot/pull/6556) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6554](https://github.com/tobymao/sqlglot/issues/6554) opened by [@GaryLiuGTA](https://github.com/GaryLiuGTA)* - [`c118af2`](https://github.com/tobymao/sqlglot/commit/c118af2f78af3e557f569c31b1561802338a48c4) - **lineage**: Fix GraphHTML edge 'from' key *(PR [#6571](https://github.com/tobymao/sqlglot/pull/6571) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6570](https://github.com/tobymao/sqlglot/issues/6570) opened by [@PhilHenson82](https://github.com/PhilHenson82)* - [`baeb656`](https://github.com/tobymao/sqlglot/commit/baeb656ee2ae354f91f36ebcaee60848e09f43b4) - **optimizer**: fallback scenario in get_table of resolver raises with wrong message *(PR [#6563](https://github.com/tobymao/sqlglot/pull/6563) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#6562](https://github.com/tobymao/sqlglot/issues/6562) opened by [@snovik75](https://github.com/snovik75)* - [`14dc1e5`](https://github.com/tobymao/sqlglot/commit/14dc1e5bc74b3b8907ba02bf89ad1763940c9ea2) - **snowflake**: make `DATE_PART` roundtrip *(PR [#6573](https://github.com/tobymao/sqlglot/pull/6573) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6565](https://github.com/tobymao/sqlglot/issues/6565) opened by [@erindru](https://github.com/erindru)* - [`8a44ad5`](https://github.com/tobymao/sqlglot/commit/8a44ad560cb65a34a722b257a82e69a41e7e45e0) - **bigquery**: Mark _DBT_MAX_PARTITION as pseudocolumn *(PR [#6572](https://github.com/tobymao/sqlglot/pull/6572) by [@VaggelisD](https://github.com/VaggelisD))* ### :recycle: Refactors - [`e441e16`](https://github.com/tobymao/sqlglot/commit/e441e16991626c2da2d38bc9c3a2b408e3f773bd) - make dump/pickling non-recursive to avoid hitting stack limits *(PR [#5850](https://github.com/tobymao/sqlglot/pull/5850) by [@tobymao](https://github.com/tobymao))* - [`095b2ac`](https://github.com/tobymao/sqlglot/commit/095b2ac3af230eff86d9bc1b0fd3a0a2095f151c) - clean up duckdb INSTALL tests *(commit by [@geooo109](https://github.com/geooo109))* - [`d425ba2`](https://github.com/tobymao/sqlglot/commit/d425ba26b96b368801f8f486fa375cd75105993d) - make hash and eq non recursive *(PR [#5966](https://github.com/tobymao/sqlglot/pull/5966) by [@tobymao](https://github.com/tobymao))* - [`8f00c80`](https://github.com/tobymao/sqlglot/commit/8f00c804a67209a5eca1fcb28aeb95941c58e583) - _parse_in expr len check *(commit by [@geooo109](https://github.com/geooo109))* - [`2c9d15c`](https://github.com/tobymao/sqlglot/commit/2c9d15c92da25c8456b2463c69aa56c8ec47c453) - replace direct arg manipulation *(PR [#6073](https://github.com/tobymao/sqlglot/pull/6073) by [@geooo109](https://github.com/geooo109))* - [`58dbce3`](https://github.com/tobymao/sqlglot/commit/58dbce30da5ab94af82247ab8a7eb85200d9b8af) - bq static type annotators *(PR [#6103](https://github.com/tobymao/sqlglot/pull/6103) by [@geooo109](https://github.com/geooo109))* - [`c970235`](https://github.com/tobymao/sqlglot/commit/c97023549623fe5974d6bff57e64339eff74187e) - clean up MONTHNAME test *(commit by [@geooo109](https://github.com/geooo109))* - [`6d775fd`](https://github.com/tobymao/sqlglot/commit/6d775fdb6091cb866c27c0f1141514b23d689284) - snowflake GREATEST type checks *(commit by [@geooo109](https://github.com/geooo109))* - [`e797fb1`](https://github.com/tobymao/sqlglot/commit/e797fb105f7fa4e7bd42698eda71037cae9fd155) - update `LIKE` operator when using functional syntax with spark dialect *(PR [#6173](https://github.com/tobymao/sqlglot/pull/6173) by [@themattmorris](https://github.com/themattmorris))* - :arrow_lower_right: *addresses issue [#6172](https://github.com/tobymao/sqlglot/issues/6172) opened by [@themattmorris](https://github.com/themattmorris)* - [`9c98fc2`](https://github.com/tobymao/sqlglot/commit/9c98fc2b39fef2bd052b60ba4e15a4b93fd66c00) - **optimizer**: avoid extra copy in simplify *(commit by [@geooo109](https://github.com/geooo109))* - [`43985fb`](https://github.com/tobymao/sqlglot/commit/43985fbcb9edea088119951c5c245a9606cf92ae) - **snowflake**: remove redundant tests for ANY_VALUE *(commit by [@geooo109](https://github.com/geooo109))* - [`bf7b032`](https://github.com/tobymao/sqlglot/commit/bf7b032baae0c0fd112054a7bed6fa2f56f32890) - clean up struct name inheritance *(PR [#6295](https://github.com/tobymao/sqlglot/pull/6295) by [@georgesittas](https://github.com/georgesittas))* - [`49e0f43`](https://github.com/tobymao/sqlglot/commit/49e0f43ba19739575987f2e9c52c2061a6f59717) - extra test for spark approx_top_k_accumulate *(commit by [@geooo109](https://github.com/geooo109))* - [`e4d1a4f`](https://github.com/tobymao/sqlglot/commit/e4d1a4fcd6741d679c5444bf023077d2aaa8f980) - **exasol**: map date/timestamp `TRUNC` to `DATE_TRUNC` *(PR [#6328](https://github.com/tobymao/sqlglot/pull/6328) by [@nnamdi16](https://github.com/nnamdi16))* - [`c6b0a63`](https://github.com/tobymao/sqlglot/commit/c6b0a6342a21d79635a26d40001c916d05d47cf7) - change version to be a tuple so that it can be pickled, also simpler *(commit by [@tobymao](https://github.com/tobymao))* - [`625654a`](https://github.com/tobymao/sqlglot/commit/625654a9623cc5407bfde922c29f32a8ee905a3b) - move resolver to own file *(commit by [@tobymao](https://github.com/tobymao))* - [`2d380e7`](https://github.com/tobymao/sqlglot/commit/2d380e72c9e3b842a8fe57c191f494c8872c00ee) - add test to make sure callback doesn't trigger ctes *(commit by [@tobymao](https://github.com/tobymao))* - [`1876c5a`](https://github.com/tobymao/sqlglot/commit/1876c5a86c3b737b7360c4fef25c44dc010b66db) - consolidate can_quote logic and fix an issue with identify=False *(PR [#6534](https://github.com/tobymao/sqlglot/pull/6534) by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`720f634`](https://github.com/tobymao/sqlglot/commit/720f6343f6144e8986ec6b7e50419c3d7a331f0a) - Fix style on main, refactor exasol tests *(PR [#5527](https://github.com/tobymao/sqlglot/pull/5527) by [@VaggelisD](https://github.com/VaggelisD))* - [`5653501`](https://github.com/tobymao/sqlglot/commit/5653501606f041282b6315c3efa33b9a3baf8d98) - Refactor PR 5517 *(PR [#5526](https://github.com/tobymao/sqlglot/pull/5526) by [@VaggelisD](https://github.com/VaggelisD))* - [`d15dfe3`](https://github.com/tobymao/sqlglot/commit/d15dfe3f0f4444e4999ad65051b2474e62f422b3) - build type using dialect for bigquery *(PR [#5539](https://github.com/tobymao/sqlglot/pull/5539) by [@geooo109](https://github.com/geooo109))* - [`173e442`](https://github.com/tobymao/sqlglot/commit/173e4425b692728abffa8542324690823f984303) - refactor JSON_VALUE handling for MySQL and Trino *(commit by [@georgesittas](https://github.com/georgesittas))* - [`4c04c0c`](https://github.com/tobymao/sqlglot/commit/4c04c0ce859ab8314ed36fb8779f14c0fc2f1094) - use a valid SPDX identifier as license classifier *(PR [#5606](https://github.com/tobymao/sqlglot/pull/5606) by [@ecederstrand](https://github.com/ecederstrand))* - [`249f638`](https://github.com/tobymao/sqlglot/commit/249f638877ddd2a1732d1e6bc859793f3bc0622d) - add table to document dialect support level *(PR [#5628](https://github.com/tobymao/sqlglot/pull/5628) by [@georgesittas](https://github.com/georgesittas))* - [`3357125`](https://github.com/tobymao/sqlglot/commit/33571250d172d64a3e0450738b3ad330e5c0a795) - **doris**: refactor unique key prop generation *(PR [#5625](https://github.com/tobymao/sqlglot/pull/5625) by [@georgesittas](https://github.com/georgesittas))* - [`545f1ac`](https://github.com/tobymao/sqlglot/commit/545f1acd76bdc4e537209266984137f6c69ce622) - Clean up of PR5614 *(PR [#5648](https://github.com/tobymao/sqlglot/pull/5648) by [@VaggelisD](https://github.com/VaggelisD))* - [`41521e3`](https://github.com/tobymao/sqlglot/commit/41521e31b465acd51ab02b1ac4e5512b98175b7e) - bump sqlglotrs to 0.6.2 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b77d3da`](https://github.com/tobymao/sqlglot/commit/b77d3da8f2548858d2b9d8590fcde83e1ec62b8a) - remove `"EXCLUDE" -> TokenType.EXCEPT` in DuckDB, Snowflake *(PR [#5766](https://github.com/tobymao/sqlglot/pull/5766) by [@treysp](https://github.com/treysp))* - [`005564a`](https://github.com/tobymao/sqlglot/commit/005564ab28cb14be469f09e89b01275d6e25874e) - **snowflake**: refactor logic related to ALTER SESSION *(commit by [@georgesittas](https://github.com/georgesittas))* - [`164fec1`](https://github.com/tobymao/sqlglot/commit/164fec1b36e3c7df41e2e5a5ad6b226fc5f76305) - **optimizer**: test type annotation for snowflake CHARINDEX function *(PR [#5805](https://github.com/tobymao/sqlglot/pull/5805) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`b244f30`](https://github.com/tobymao/sqlglot/commit/b244f30524846bd08d03a73410ae9b4674254ecd) - move `exp.Contains` to `BOOLEAN` entry in `TYPE_TO_EXPRESSIONS` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e8974e7`](https://github.com/tobymao/sqlglot/commit/e8974e70d9956ce7a5cb119ba465660f5f172a17) - **optimizer**: Add tests for snowflake likeall, likeany and ilikeany functions *(PR [#5908](https://github.com/tobymao/sqlglot/pull/5908) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`837890c`](https://github.com/tobymao/sqlglot/commit/837890c7e8bcc3695541bbe32fd8088eee70fea3) - handle badly formed binary expressions gracefully in type inference *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c18aaf8`](https://github.com/tobymao/sqlglot/commit/c18aaf80fd7375e89dfc8863da619d84f3257353) - cleanup *(commit by [@tobymao](https://github.com/tobymao))* - [`1514bc6`](https://github.com/tobymao/sqlglot/commit/1514bc640ec129a96aedd9e89bfd5d61e832d6b1) - **optimizer**: add type inference tests for Snowflake RPAD function *(PR [#5967](https://github.com/tobymao/sqlglot/pull/5967) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`050b89d`](https://github.com/tobymao/sqlglot/commit/050b89deb9be842f2ddd07c78ea201ec4eae4779) - **optimizer**: Annotate type for snowflake regexp function *(PR [#5970](https://github.com/tobymao/sqlglot/pull/5970) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`345c6a1`](https://github.com/tobymao/sqlglot/commit/345c6a153481a22d6df1b12ef1863e2133688fdf) - add uv support to Makefile *(PR [#5973](https://github.com/tobymao/sqlglot/pull/5973) by [@eakmanrq](https://github.com/eakmanrq))* - [`1b1c6f8`](https://github.com/tobymao/sqlglot/commit/1b1c6f8d418371d49f0d3511baf3c5e35dd3ef42) - coerce type for EXTRACT canonicalization *(PR [#5998](https://github.com/tobymao/sqlglot/pull/5998) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5996](https://github.com/tobymao/sqlglot/issues/5996) opened by [@snovik75](https://github.com/snovik75)* - [`f00ae73`](https://github.com/tobymao/sqlglot/commit/f00ae735c8f185b4c6c132373c9fa9bbe58e37b7) - **optimizer**: Annotate type for sqrt function *(PR [#6003](https://github.com/tobymao/sqlglot/pull/6003) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ac97f14`](https://github.com/tobymao/sqlglot/commit/ac97f14ee1a576a276018f6c9ae1237ecf9ceda7) - simplify `SEARCH` Snowflake instantiation *(commit by [@georgesittas](https://github.com/georgesittas))* - [`5dd2ed3`](https://github.com/tobymao/sqlglot/commit/5dd2ed3c69cf9e8c3e327297e0cc932f0954e108) - bump sqlglotrs to 0.7.0 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7ac01c2`](https://github.com/tobymao/sqlglot/commit/7ac01c2ae9bc4375efb63c60e3221e85088fdd1f) - bump sqlglotrs to 0.7.1 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`9ab3a96`](https://github.com/tobymao/sqlglot/commit/9ab3a96a853639224c80a9daff4674187a1a84ef) - bump sqlglotrs to 0.7.2 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`15030a3`](https://github.com/tobymao/sqlglot/commit/15030a3996d005d79f27408a68d17f94c98aec68) - **optimizer**: Add tests for snowflake LN and LOG functions *(PR [#6048](https://github.com/tobymao/sqlglot/pull/6048) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`2ae8dbd`](https://github.com/tobymao/sqlglot/commit/2ae8dbd4d1b43bb27647144c32b2a781ff3edbeb) - push docs to `api-docs` branch instead of main *(commit by [@georgesittas](https://github.com/georgesittas))* - [`75b8d16`](https://github.com/tobymao/sqlglot/commit/75b8d16e41b677ea7e150c89d713795073aae6e3) - remove docs from main branch *(PR [#6057](https://github.com/tobymao/sqlglot/pull/6057) by [@georgesittas](https://github.com/georgesittas))* - [`cfa2493`](https://github.com/tobymao/sqlglot/commit/cfa249328eef31ab0e0688dcc03521da3343ce47) - **optimizer**: Annotate type for snowflake SQUARE function *(PR [#6059](https://github.com/tobymao/sqlglot/pull/6059) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`e26c394`](https://github.com/tobymao/sqlglot/commit/e26c3949beb7f73020fcd099237dbe31a4db8d84) - **optimizer**: Annotate type for snowflake POW function *(PR [#6058](https://github.com/tobymao/sqlglot/pull/6058) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`7d303ad`](https://github.com/tobymao/sqlglot/commit/7d303adc5efe9d51eb62aeab80bfa4f844e1911d) - include Python 3.14 in the testing matrix *(PR [#6074](https://github.com/tobymao/sqlglot/pull/6074) by [@georgesittas](https://github.com/georgesittas))* - [`dab2a3f`](https://github.com/tobymao/sqlglot/commit/dab2a3fbdb8a523f05319eb34a1fd34534272206) - bump sqlglotrs version to 0.7.3 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d36ba87`](https://github.com/tobymao/sqlglot/commit/d36ba8774a2a4b53c122e3b78086ce0f09e77244) - **optimizer**: add tests for Snowflake DATE_FROM_PARTS function *(PR [#6077](https://github.com/tobymao/sqlglot/pull/6077) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`2bc05cf`](https://github.com/tobymao/sqlglot/commit/2bc05cf3bd53b874a1505c747e38f8a6a1dbf8c7) - **optimizer**: add tests for Snowflake DATEDIFF function *(PR [#6090](https://github.com/tobymao/sqlglot/pull/6090) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`a4d07a0`](https://github.com/tobymao/sqlglot/commit/a4d07a07eefbdaf88d30df2310a9533afdc75a82) - **optimizer**: Annotate type for snowflake EXTRACT function *(PR [#6099](https://github.com/tobymao/sqlglot/pull/6099) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ab1da2e`](https://github.com/tobymao/sqlglot/commit/ab1da2e54a83e29d708047d4b3f8abcc1094229d) - **optimizer**: add type annotation tests for snowflake LAST_DAY function *(PR [#6105](https://github.com/tobymao/sqlglot/pull/6105) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`4e24c0a`](https://github.com/tobymao/sqlglot/commit/4e24c0ad92e7071a1f1537886173e29999b46f72) - **optimizer**: add type annotation tests for snowflake TIMESTAMPDIFF function *(PR [#6138](https://github.com/tobymao/sqlglot/pull/6138) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`ae8571f`](https://github.com/tobymao/sqlglot/commit/ae8571fdec71587188e45fe087e1967f5ba641bc) - **optimizer**: add type annotation tests for snowflake TIMEDIFF *(PR [#6140](https://github.com/tobymao/sqlglot/pull/6140) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`3059320`](https://github.com/tobymao/sqlglot/commit/30593202b30001933f05747937975013754b75fa) - copy by default in `lineage` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`99949cc`](https://github.com/tobymao/sqlglot/commit/99949ccd3ff81b524edeae437d874b86250dbb5b) - avoid needlessly copying in lineage *(PR [#6150](https://github.com/tobymao/sqlglot/pull/6150) by [@georgesittas](https://github.com/georgesittas))* - [`e7756d8`](https://github.com/tobymao/sqlglot/commit/e7756d8e9f347bfba3f861463890bf57e532cc54) - **optimizer**: add annotation tests for snowflake's BOOLXOR *(PR [#6154](https://github.com/tobymao/sqlglot/pull/6154) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`72e43e3`](https://github.com/tobymao/sqlglot/commit/72e43e3ea08f9dce5a32654060a56f2ee31bea8f) - **optimizer**: add type annotation tests for snowflake's TIMESTAMPADD function *(PR [#6146](https://github.com/tobymao/sqlglot/pull/6146) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`aca106c`](https://github.com/tobymao/sqlglot/commit/aca106c660b8aaf229065ec5c5a4a80d10e8daf6) - **optimizer**: add type annotation tests for snowflake GREATEST *(PR [#6157](https://github.com/tobymao/sqlglot/pull/6157) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`f763604`](https://github.com/tobymao/sqlglot/commit/f7636041d7b796545ed923ffd4803521f05fa7ea) - add `IS [NOT]` tests *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1ab5854`](https://github.com/tobymao/sqlglot/commit/1ab5854216da591e6036ac103239ac0280e09c3d) - **optimizer**: add snowflake test for [NOT] IN *(PR [#6180](https://github.com/tobymao/sqlglot/pull/6180) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`64939ce`](https://github.com/tobymao/sqlglot/commit/64939ce9926f4740387a151311e918e807bfa681) - **optimizer**: add annotation tests for ZEROIFNULL *(PR [#6187](https://github.com/tobymao/sqlglot/pull/6187) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`4b6bcdd`](https://github.com/tobymao/sqlglot/commit/4b6bcdd4dc297bd42ad000ffda98d14110565dc9) - **optimizer**: Add tests for snowflake's `NULLIFZERO` *(PR [#6197](https://github.com/tobymao/sqlglot/pull/6197) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`ef68075`](https://github.com/tobymao/sqlglot/commit/ef680756c33da180ed2f21fb6113a0123db341c9) - **optimizer**: add annotation tests for NVL2 *(PR [#6208](https://github.com/tobymao/sqlglot/pull/6208) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7f550f2`](https://github.com/tobymao/sqlglot/commit/7f550f22da40d8c1cfc8afb183d6e4dbd50241ea) - **optimizer**: add annotation tests for NVL *(PR [#6207](https://github.com/tobymao/sqlglot/pull/6207) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`d7be4a5`](https://github.com/tobymao/sqlglot/commit/d7be4a5da3dca6bcc44230b2a176c8b17b81c46e) - **optimizer**: add annotation test for COALESCE *(PR [#6210](https://github.com/tobymao/sqlglot/pull/6210) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`8aa7356`](https://github.com/tobymao/sqlglot/commit/8aa7356ab8adee26193086754ca1a1805957d944) - **optimizer**: add annotation tests for IFF *(PR [#6215](https://github.com/tobymao/sqlglot/pull/6215) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`160a1b9`](https://github.com/tobymao/sqlglot/commit/160a1b90f4ce39a2fce6f7f0e9e854d974fed053) - **optimizer**: mixed type annotation test for sf IFNULL *(commit by [@geooo109](https://github.com/geooo109))* - [`893ad2a`](https://github.com/tobymao/sqlglot/commit/893ad2a5b1a28339ccc65c85ac813506e6ad56f1) - **optimizer**: add annotation tests for NULLIF *(PR [#6221](https://github.com/tobymao/sqlglot/pull/6221) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`78d7733`](https://github.com/tobymao/sqlglot/commit/78d77335819d1796fa3989ef072d3f8fd4b83559) - remove redundant or term for unknown in annotate_types *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b202f3a`](https://github.com/tobymao/sqlglot/commit/b202f3ad64e88a47e52c45e32c9e4faae6c8ac45) - **optimizer**: add test for BITXOR *(PR [#6223](https://github.com/tobymao/sqlglot/pull/6223) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`b20f2e8`](https://github.com/tobymao/sqlglot/commit/b20f2e88d86038f1a98f4b97b5a2ae0b86652e33) - **optimizer**: add test for BITSHIFTLEFT *(PR [#6227](https://github.com/tobymao/sqlglot/pull/6227) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7f93e85`](https://github.com/tobymao/sqlglot/commit/7f93e8551b00cc32014236a07c8794bd7a3a2b91) - **optimizer**: add annotation tests for BITSHIFTRIGHT *(PR [#6228](https://github.com/tobymao/sqlglot/pull/6228) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`fcf017c`](https://github.com/tobymao/sqlglot/commit/fcf017cfb95923fea8ae5669340713a326f4f306) - rename `EXPRESSION_SPEC` to `EXPRESSION_METADATA` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`55bc9e4`](https://github.com/tobymao/sqlglot/commit/55bc9e4019f8ef8d7e571256d7b0e07b30d9240c) - remove predicate/connector/not from typing metadata *(commit by [@georgesittas](https://github.com/georgesittas))* - [`349ab29`](https://github.com/tobymao/sqlglot/commit/349ab29aa84fb087388b6a1494fea70273a4a560) - **optimizer**: add annotation test for BOOLAND_OR *(PR [#6260](https://github.com/tobymao/sqlglot/pull/6260) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`75ec424`](https://github.com/tobymao/sqlglot/commit/75ec424667b95462bb1750a251a5096da0d5161b) - **optimizer**: add annotation test for BOOLAND_AGG *(PR [#6257](https://github.com/tobymao/sqlglot/pull/6257) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`bb574aa`](https://github.com/tobymao/sqlglot/commit/bb574aa0cf0a8c0b92f9af7ef3dfddb7de725a8b) - **optimizer**: add annotation test for ARRAY_AGG *(PR [#6264](https://github.com/tobymao/sqlglot/pull/6264) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`a95c5cc`](https://github.com/tobymao/sqlglot/commit/a95c5ccf411dc4d28ef9c19fb03bd8a3615d7c4b) - **optimizer**: add nonnull clickhouse column test case *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6d6c689`](https://github.com/tobymao/sqlglot/commit/6d6c68915ca699da7cb707675aece963df97f80b) - **optimizer**: add annotation tests for ANY_VALUE *(PR [#6275](https://github.com/tobymao/sqlglot/pull/6275) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`2459f88`](https://github.com/tobymao/sqlglot/commit/2459f8832ae398aa1381025724a4286f7f5e3e9d) - Follow up of 6280 *(PR [#6281](https://github.com/tobymao/sqlglot/pull/6281) by [@VaggelisD](https://github.com/VaggelisD))* - [`a7d33d0`](https://github.com/tobymao/sqlglot/commit/a7d33d0e190fc5c9f23a1ab43082ac017d20fd18) - **optimizer**: add annotation tests for APPROX_PERCENTILE *(PR [#6283](https://github.com/tobymao/sqlglot/pull/6283) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`1b2d139`](https://github.com/tobymao/sqlglot/commit/1b2d139d3338c7053dee333914323236a2d15d97) - **optimizer**: add type annotation tests with window for sf APPROX_PERCENTILE *(commit by [@geooo109](https://github.com/geooo109))* - [`d059648`](https://github.com/tobymao/sqlglot/commit/d05964851c99553ba06e318bbbda39f9851120db) - **optimizer**: add annotation tests for APPROX_COUNT_DISTINCT *(PR [#6282](https://github.com/tobymao/sqlglot/pull/6282) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`6bd59ac`](https://github.com/tobymao/sqlglot/commit/6bd59acf2288da5bfe6151c5adf6f2a63792dc1e) - Follow up of PR 6288 *(PR [#6293](https://github.com/tobymao/sqlglot/pull/6293) by [@VaggelisD](https://github.com/VaggelisD))* - [`546fd2a`](https://github.com/tobymao/sqlglot/commit/546fd2a2588f7b385bdbb9e39490bd6a422283ca) - Remove dead line in qualify_columns *(PR [#6304](https://github.com/tobymao/sqlglot/pull/6304) by [@VaggelisD](https://github.com/VaggelisD))* - [`ac7ac19`](https://github.com/tobymao/sqlglot/commit/ac7ac198a3b915e63ba8a055e9a0193c3dd3e26a) - **exasol**: Implement ODBC date time literals in Exasol Sqlglot *(PR [#6311](https://github.com/tobymao/sqlglot/pull/6311) by [@nnamdi16](https://github.com/nnamdi16))* - [`8d1d25c`](https://github.com/tobymao/sqlglot/commit/8d1d25c6de7ad03c50e3efe892d16d16329d8ee9) - **exasol**: Implement local qualifier for-aliases, in GROUP BY, WHERE AND HAVING clause in exasol dialect *(PR [#6277](https://github.com/tobymao/sqlglot/pull/6277) by [@nnamdi16](https://github.com/nnamdi16))* - [`509b0aa`](https://github.com/tobymao/sqlglot/commit/509b0aaada0e27542864771ba14777d398b6cee0) - **exasol**: Implement day_of_week function *(PR [#6319](https://github.com/tobymao/sqlglot/pull/6319) by [@nnamdi16](https://github.com/nnamdi16))* - [`487d218`](https://github.com/tobymao/sqlglot/commit/487d218a6fcad4e28c65c6df55435ba218826186) - iterative annotate types *(PR [#6342](https://github.com/tobymao/sqlglot/pull/6342) by [@geooo109](https://github.com/geooo109))* - [`8201062`](https://github.com/tobymao/sqlglot/commit/8201062ac41b85e5a89aa8e1c5973852f105c66e) - clean up derived table traversal in table qualification *(PR [#6363](https://github.com/tobymao/sqlglot/pull/6363) by [@georgesittas](https://github.com/georgesittas))* - [`6b7084d`](https://github.com/tobymao/sqlglot/commit/6b7084d0c9f4735432afc12509c77c286cc50513) - **optimizer**: refactor costly scope walking loop in qualify tables *(PR [#6364](https://github.com/tobymao/sqlglot/pull/6364) by [@georgesittas](https://github.com/georgesittas))* - [`0319241`](https://github.com/tobymao/sqlglot/commit/0319241162bbe6d278a626100eac73999b250968) - **mysql,postgres**: tests for unsupported IGNORE/RESPECT NULLS *(PR [#6386](https://github.com/tobymao/sqlglot/pull/6386) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#6376](https://github.com/tobymao/sqlglot/issues/6376) opened by [@NickCrews](https://github.com/NickCrews)* - [`11354cc`](https://github.com/tobymao/sqlglot/commit/11354cc85d116cd24c28114a437111965ba828a9) - Make integration test workflow more robust *(PR [#6403](https://github.com/tobymao/sqlglot/pull/6403) by [@erindru](https://github.com/erindru))* - [`f758cea`](https://github.com/tobymao/sqlglot/commit/f758cea0e9fca5850895a730c554c17b488d29ca) - **exasol**: transformed rank function, ignoring parameters *(PR [#6408](https://github.com/tobymao/sqlglot/pull/6408) by [@nnamdi16](https://github.com/nnamdi16))* - [`07d9958`](https://github.com/tobymao/sqlglot/commit/07d99583b4aebdc682bb7604ccdf45bddb89f9c3) - **optimizer**: replace direct comparison with dialect properties *(PR [#6398](https://github.com/tobymao/sqlglot/pull/6398) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`137549e`](https://github.com/tobymao/sqlglot/commit/137549e5e803416d46e13e9a8123cef9b53d349a) - **exasol**: transform substring_index using substr and instr *(PR [#6406](https://github.com/tobymao/sqlglot/pull/6406) by [@nnamdi16](https://github.com/nnamdi16))* - [`78f1824`](https://github.com/tobymao/sqlglot/commit/78f1824c790f523845cbda488ecf4c43a92ac0f0) - **exasol**: transform substring_index using substr and instr *(PR [#6406](https://github.com/tobymao/sqlglot/pull/6406) by [@nnamdi16](https://github.com/nnamdi16))* - [`39cc555`](https://github.com/tobymao/sqlglot/commit/39cc55586ed76a4a583e6db22a9ee51e09bff92e) - **snowflake**: annotate type for COUNT *(PR [#6437](https://github.com/tobymao/sqlglot/pull/6437) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`61f39ba`](https://github.com/tobymao/sqlglot/commit/61f39bab9a0668c338e8c1b5e0fa953f22c0a886) - **optimizer**: improve error message for ambiguous columns *(PR [#6423](https://github.com/tobymao/sqlglot/pull/6423) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`313afe5`](https://github.com/tobymao/sqlglot/commit/313afe540aa2cdc4cc179c4852c6ef37362bcb3e) - **optimizer**: annotate type for snowflake func ARRAY_UNION_AGG *(PR [#6446](https://github.com/tobymao/sqlglot/pull/6446) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`b321ca6`](https://github.com/tobymao/sqlglot/commit/b321ca6191fefc88da1a6de83a465886b5754b7a) - bump sqlglotrs to 0.8.0 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`4a061e2`](https://github.com/tobymao/sqlglot/commit/4a061e26b638c9acb0c8a77d9347914b35082bb3) - **optimizer**: Include BIGDECIMAL in numeric precedence *(PR [#6456](https://github.com/tobymao/sqlglot/pull/6456) by [@vchan](https://github.com/vchan))* - [`f305305`](https://github.com/tobymao/sqlglot/commit/f305305e5cf3ef45afba822542aebeb944c00e0b) - **optimizer**: Annotate types for BigQuery's AVG function *(PR [#6459](https://github.com/tobymao/sqlglot/pull/6459) by [@vchan](https://github.com/vchan))* - [`910349f`](https://github.com/tobymao/sqlglot/commit/910349f3c30af59ce1820e48cae0cbb77539877d) - **optimizer**: Annotate types for BigQuery's SAFE_DIVIDE function *(PR [#6464](https://github.com/tobymao/sqlglot/pull/6464) by [@vchan](https://github.com/vchan))* - [`5e75621`](https://github.com/tobymao/sqlglot/commit/5e75621e90defd50076383485f6a4689a8c551ac) - **optimizer**: annotate type for snowflake func ARRAY_UNIQUE_AGG *(PR [#6465](https://github.com/tobymao/sqlglot/pull/6465) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`4d77500`](https://github.com/tobymao/sqlglot/commit/4d775007d2ceb997ff33721def768493c95f98a5) - **optimizer**: add tests for snowflake CAST function *(PR [#6471](https://github.com/tobymao/sqlglot/pull/6471) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`88dfd26`](https://github.com/tobymao/sqlglot/commit/88dfd26b832d13e517fe7c18d2c086885bf4954d) - **optimizer**: annotate type for snowflake func TO_BINARY *(PR [#6474](https://github.com/tobymao/sqlglot/pull/6474) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`483318b`](https://github.com/tobymao/sqlglot/commit/483318bc25e4ee4fa2731be1a0aea02858872ab5) - clean up TO_BINARY tests *(commit by [@geooo109](https://github.com/geooo109))* - [`bff7084`](https://github.com/tobymao/sqlglot/commit/bff70841d0bfbede6ea0fae2e7b37d68735a53d8) - remove duckdb TO_BINARY 2 arg test *(commit by [@geooo109](https://github.com/geooo109))* - [`80591f9`](https://github.com/tobymao/sqlglot/commit/80591f9513dff9160884e4bbbd48d9c26cf8f253) - starrocks TO_BINARY tests *(commit by [@geooo109](https://github.com/geooo109))* - [`01e5a05`](https://github.com/tobymao/sqlglot/commit/01e5a050c76f728ef542f0127209e2cd1c5f5558) - **exasol**: implementing the last day function in exasol sql dialect *(PR [#6483](https://github.com/tobymao/sqlglot/pull/6483) by [@nnamdi16](https://github.com/nnamdi16))* - [`aacc981`](https://github.com/tobymao/sqlglot/commit/aacc98105fb381c17a80ee011f107157279312d7) - **duckdb**: tests for MAX_BY and MIN_BY *(PR [#6489](https://github.com/tobymao/sqlglot/pull/6489) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`06c7ffb`](https://github.com/tobymao/sqlglot/commit/06c7ffbe14985a4da35a97d47322021e79525adf) - cleanup bitwise operator fixes *(commit by [@georgesittas](https://github.com/georgesittas))* - [`edb8964`](https://github.com/tobymao/sqlglot/commit/edb8964ed064a687e52323143d52281eaa391c9a) - bump sqlglotrs to 0.9.0 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`0211328`](https://github.com/tobymao/sqlglot/commit/021132821fb33620643295533ce1517a172e7dc6) - add test-fast and test-fast-rs Makefile targets *(PR [#6546](https://github.com/tobymao/sqlglot/pull/6546) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`60b00bc`](https://github.com/tobymao/sqlglot/commit/60b00bc4d462c5ac03410a804305ad57ed6fbfbb) - Refactor PR 6555 *(PR [#6569](https://github.com/tobymao/sqlglot/pull/6569) by [@VaggelisD](https://github.com/VaggelisD))* ## [v28.3.0] - 2025-12-11 ### :boom: BREAKING CHANGES - due to [`62b348c`](https://github.com/tobymao/sqlglot/commit/62b348ce46d014895bd17d89ccb0b3e186e46d15) - add support for noop string escapes *(PR [#6526](https://github.com/tobymao/sqlglot/pull/6526) by [@nian0114](https://github.com/nian0114))*: add support for noop string escapes (#6526) - due to [`1876c5a`](https://github.com/tobymao/sqlglot/commit/1876c5a86c3b737b7360c4fef25c44dc010b66db) - consolidate can_quote logic and fix an issue with identify=False *(PR [#6534](https://github.com/tobymao/sqlglot/pull/6534) by [@tobymao](https://github.com/tobymao))*: consolidate can_quote logic and fix an issue with identify=False (#6534) - due to [`edb8964`](https://github.com/tobymao/sqlglot/commit/edb8964ed064a687e52323143d52281eaa391c9a) - bump sqlglotrs to 0.9.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.9.0 ### :bug: Bug Fixes - [`62b348c`](https://github.com/tobymao/sqlglot/commit/62b348ce46d014895bd17d89ccb0b3e186e46d15) - **tokenizer**: add support for noop string escapes *(PR [#6526](https://github.com/tobymao/sqlglot/pull/6526) by [@nian0114](https://github.com/nian0114))* ### :recycle: Refactors - [`1876c5a`](https://github.com/tobymao/sqlglot/commit/1876c5a86c3b737b7360c4fef25c44dc010b66db) - consolidate can_quote logic and fix an issue with identify=False *(PR [#6534](https://github.com/tobymao/sqlglot/pull/6534) by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`edb8964`](https://github.com/tobymao/sqlglot/commit/edb8964ed064a687e52323143d52281eaa391c9a) - bump sqlglotrs to 0.9.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v28.2.0] - 2025-12-11 ### :boom: BREAKING CHANGES - due to [`ebe718a`](https://github.com/tobymao/sqlglot/commit/ebe718a72d5b5871a8d6e67754ff50e873d55b41) - Add support for format elements used in date/time functions like FORMAT_DATETIME *(PR [#6428](https://github.com/tobymao/sqlglot/pull/6428) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add support for format elements used in date/time functions like FORMAT_DATETIME (#6428) - due to [`c111f64`](https://github.com/tobymao/sqlglot/commit/c111f643d61064280024b4cc5c0fc250581fbe55) - annotation support for APPROX_PERCENTILE_ACCUMULATE *(PR [#6455](https://github.com/tobymao/sqlglot/pull/6455) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_PERCENTILE_ACCUMULATE (#6455) - due to [`f305305`](https://github.com/tobymao/sqlglot/commit/f305305e5cf3ef45afba822542aebeb944c00e0b) - Annotate types for BigQuery's AVG function *(PR [#6459](https://github.com/tobymao/sqlglot/pull/6459) by [@vchan](https://github.com/vchan))*: Annotate types for BigQuery's AVG function (#6459) - due to [`910349f`](https://github.com/tobymao/sqlglot/commit/910349f3c30af59ce1820e48cae0cbb77539877d) - Annotate types for BigQuery's SAFE_DIVIDE function *(PR [#6464](https://github.com/tobymao/sqlglot/pull/6464) by [@vchan](https://github.com/vchan))*: Annotate types for BigQuery's SAFE_DIVIDE function (#6464) - due to [`5e75621`](https://github.com/tobymao/sqlglot/commit/5e75621e90defd50076383485f6a4689a8c551ac) - annotate type for snowflake func ARRAY_UNIQUE_AGG *(PR [#6465](https://github.com/tobymao/sqlglot/pull/6465) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func ARRAY_UNIQUE_AGG (#6465) - due to [`94d46b8`](https://github.com/tobymao/sqlglot/commit/94d46b8eafd5abe252407d2bbe306ca579a29b20) - annotation support for APPROX_PERCENTILE_ESTIMATE. Return type DOUBLE *(PR [#6461](https://github.com/tobymao/sqlglot/pull/6461) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_PERCENTILE_ESTIMATE. Return type DOUBLE (#6461) - due to [`2ac30b0`](https://github.com/tobymao/sqlglot/commit/2ac30b08bd663bbaf00ae075c4db0c3d27ab6640) - annotation support for APPROX_PERCENTILE_COMBINE *(PR [#6460](https://github.com/tobymao/sqlglot/pull/6460) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_PERCENTILE_COMBINE (#6460) - due to [`d44bda3`](https://github.com/tobymao/sqlglot/commit/d44bda376c06956947a09a9f279cce886a63b981) - Annotate type for ZIPF *(PR [#6453](https://github.com/tobymao/sqlglot/pull/6453) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for ZIPF (#6453) - due to [`34dbd47`](https://github.com/tobymao/sqlglot/commit/34dbd478957c1796998d0b263f63c8ce1db7a320) - Annotate type for XMLGET *(PR [#6457](https://github.com/tobymao/sqlglot/pull/6457) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for XMLGET (#6457) - due to [`0d211f2`](https://github.com/tobymao/sqlglot/commit/0d211f2b36167cfb7856b8ec25f597f70317a9c7) - annotate type for MODE function snowflake *(PR [#6447](https://github.com/tobymao/sqlglot/pull/6447) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for MODE function snowflake (#6447) - due to [`cc4c8ab`](https://github.com/tobymao/sqlglot/commit/cc4c8ab43ab71790bc2bb9f8f3c06e34f89f999f) - annotate type for PERCENTILE_CONT in Snowflake *(PR [#6470](https://github.com/tobymao/sqlglot/pull/6470) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for PERCENTILE_CONT in Snowflake (#6470) - due to [`7dbc242`](https://github.com/tobymao/sqlglot/commit/7dbc242a637a8890511cc14f22bce4d425f1f55d) - annotation support for CURRENT REGION. Return type VARCHAR *(PR [#6473](https://github.com/tobymao/sqlglot/pull/6473) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT REGION. Return type VARCHAR (#6473) - due to [`43a6a5c`](https://github.com/tobymao/sqlglot/commit/43a6a5c601421e15a7f94dd489cb4fbcf9d2c8c3) - annotation support for CURRENT_ORGANIZATION_NAME. Return type VARCHAR *(PR [#6475](https://github.com/tobymao/sqlglot/pull/6475) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ORGANIZATION_NAME. Return type VARCHAR (#6475) - due to [`f1f7c6a`](https://github.com/tobymao/sqlglot/commit/f1f7c6ae6b6aa3f6f2251d0f81ee667440ca53d1) - annotation support for CURRENT_ORGANIZATION_USER. *(PR [#6476](https://github.com/tobymao/sqlglot/pull/6476) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ORGANIZATION_USER. (#6476) - due to [`88dfd26`](https://github.com/tobymao/sqlglot/commit/88dfd26b832d13e517fe7c18d2c086885bf4954d) - annotate type for snowflake func TO_BINARY *(PR [#6474](https://github.com/tobymao/sqlglot/pull/6474) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func TO_BINARY (#6474) - due to [`d268203`](https://github.com/tobymao/sqlglot/commit/d268203e1dbae4e3aff863108f6d09a6f8274db5) - annotation support for CURRENT_ROLE_TYPE *(PR [#6479](https://github.com/tobymao/sqlglot/pull/6479) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ROLE_TYPE (#6479) - due to [`fd4431b`](https://github.com/tobymao/sqlglot/commit/fd4431bf9550c03aa761c642a68a21a146fd8548) - annotate type for VECTOR_L1_DISTANCE, VECTOR_L2_DISTANCE, VECTOR_COSINE_SIMILARITY functions *(PR [#6468](https://github.com/tobymao/sqlglot/pull/6468) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: annotate type for VECTOR_L1_DISTANCE, VECTOR_L2_DISTANCE, VECTOR_COSINE_SIMILARITY functions (#6468) - due to [`e6adba7`](https://github.com/tobymao/sqlglot/commit/e6adba76cc2f27633a9d38bfaea3356e71d00a4c) - Add support for coercing STRING literals to temporal types *(PR [#6482](https://github.com/tobymao/sqlglot/pull/6482) by [@vchan](https://github.com/vchan))*: Add support for coercing STRING literals to temporal types (#6482) - due to [`68a5e61`](https://github.com/tobymao/sqlglot/commit/68a5e615b24e518cb90c9b80cf25355fcabdb468) - annotate type for REGR_* functions *(PR [#6452](https://github.com/tobymao/sqlglot/pull/6452) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: annotate type for REGR_* functions (#6452) - due to [`f7458a4`](https://github.com/tobymao/sqlglot/commit/f7458a40d3b09a2e212f6705ac4a77c99714508e) - annotate type for snowflake func TO_BOOLEAN *(PR [#6481](https://github.com/tobymao/sqlglot/pull/6481) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func TO_BOOLEAN (#6481) - due to [`1531a67`](https://github.com/tobymao/sqlglot/commit/1531a67ac7806f3b4582f6cf1ea02342a517de74) - annotate type for VECTOR_INNER_PRODUCT *(PR [#6486](https://github.com/tobymao/sqlglot/pull/6486) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: annotate type for VECTOR_INNER_PRODUCT (#6486) - due to [`df4c1d3`](https://github.com/tobymao/sqlglot/commit/df4c1d37ff77151a74b5de3d119c7e03f5db85f4) - REGEXP_EXTRACT position arg overflow *(PR [#6458](https://github.com/tobymao/sqlglot/pull/6458) by [@treysp](https://github.com/treysp))*: REGEXP_EXTRACT position arg overflow (#6458) - due to [`f6b2b3b`](https://github.com/tobymao/sqlglot/commit/f6b2b3bc6e1c95340149be65d80ef7e177b28d82) - support padside argument for BIT[OR|AND|XOR] *(PR [#6487](https://github.com/tobymao/sqlglot/pull/6487) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support padside argument for BIT[OR|AND|XOR] (#6487) - due to [`5a49c3f`](https://github.com/tobymao/sqlglot/commit/5a49c3f7a7619ad9e711ff2cd9e85b8606969b36) - support ORDER / LIMIT expressions for BigQuery ARRAY_AGG / STRING_AGG functions *(PR [#6463](https://github.com/tobymao/sqlglot/pull/6463) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support ORDER / LIMIT expressions for BigQuery ARRAY_AGG / STRING_AGG functions (#6463) - due to [`ef130f1`](https://github.com/tobymao/sqlglot/commit/ef130f1b944b4be835d4a6831fec9a333a825a34) - Annotated type for ARRAY_CONSTRUCT_COMPACT [#6496](https://github.com/tobymao/sqlglot/pull/6496) *(commit by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotated type for ARRAY_CONSTRUCT_COMPACT #6496 - due to [`1b6076b`](https://github.com/tobymao/sqlglot/commit/1b6076bd5a64b044f52f5366244ba0746aca75e1) - wrap connectives generated due to transpiling LIKE ANY closes [#6493](https://github.com/tobymao/sqlglot/pull/6493) *(commit by [@georgesittas](https://github.com/georgesittas))*: wrap connectives generated due to transpiling LIKE ANY closes #6493 - due to [`36ad534`](https://github.com/tobymao/sqlglot/commit/36ad534b14eabe9ee197017f5087e8e5190f8526) - qualified select list with "LOCAL" *(PR [#6450](https://github.com/tobymao/sqlglot/pull/6450) by [@nnamdi16](https://github.com/nnamdi16))*: qualified select list with "LOCAL" (#6450) - due to [`36cf0bf`](https://github.com/tobymao/sqlglot/commit/36cf0bf6671f622344afee52d7aafe30f19ecf9a) - annotation support for CURRENT_ROLE. *(PR [#6478](https://github.com/tobymao/sqlglot/pull/6478) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ROLE. (#6478) - due to [`cbba04c`](https://github.com/tobymao/sqlglot/commit/cbba04cb292fe8b3fd38c87d9ccb624cdcb52843) - support comma-separated syntax for OVERLAY function *(PR [#6497](https://github.com/tobymao/sqlglot/pull/6497) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: support comma-separated syntax for OVERLAY function (#6497) - due to [`dc8f26a`](https://github.com/tobymao/sqlglot/commit/dc8f26a3a5e023a0e54caa345b129fb1b4fe805f) - bq annotate type for NULL *(PR [#6491](https://github.com/tobymao/sqlglot/pull/6491) by [@geooo109](https://github.com/geooo109))*: bq annotate type for NULL (#6491) - due to [`52aceaa`](https://github.com/tobymao/sqlglot/commit/52aceaaa887dddb35f8ede5c2d9577fdeee35c48) - annotate `HavingMax` by `this` *(PR [#6499](https://github.com/tobymao/sqlglot/pull/6499) by [@georgesittas](https://github.com/georgesittas))*: annotate `HavingMax` by `this` (#6499) - due to [`c97a81d`](https://github.com/tobymao/sqlglot/commit/c97a81d68a1584fad48475725665a7678fcad9d1) - annotate TO_HEX(MD5(...)) in BigQuery *(PR [#6500](https://github.com/tobymao/sqlglot/pull/6500) by [@georgesittas](https://github.com/georgesittas))*: annotate TO_HEX(MD5(...)) in BigQuery (#6500) - due to [`a5797a1`](https://github.com/tobymao/sqlglot/commit/a5797a1c867c4ade71ae4ddf93232576993cf5bc) - handle named arguments and non-integer scale input for ROUND *(PR [#6495](https://github.com/tobymao/sqlglot/pull/6495) by [@toriwei](https://github.com/toriwei))*: handle named arguments and non-integer scale input for ROUND (#6495) - due to [`3224235`](https://github.com/tobymao/sqlglot/commit/3224235c1b7a80511af11f7dbffe608a747a3df0) - make CTE builder produce AST consistent with parser closes [#6503](https://github.com/tobymao/sqlglot/pull/6503) *(commit by [@georgesittas](https://github.com/georgesittas))*: make CTE builder produce AST consistent with parser closes #6503 - due to [`8b5298a`](https://github.com/tobymao/sqlglot/commit/8b5298a6578af80fd9676eb222422862d5468859) - Transpile BQ's WEEK based `DATE_DIFF` *(PR [#6507](https://github.com/tobymao/sqlglot/pull/6507) by [@VaggelisD](https://github.com/VaggelisD))*: Transpile BQ's WEEK based `DATE_DIFF` (#6507) - due to [`9454a18`](https://github.com/tobymao/sqlglot/commit/9454a18cca41a510e61522f6b785d646980e2100) - uppercase join method, side, kind for consistency fixes [#6510](https://github.com/tobymao/sqlglot/pull/6510) *(PR [#6511](https://github.com/tobymao/sqlglot/pull/6511) by [@georgesittas](https://github.com/georgesittas))*: uppercase join method, side, kind for consistency fixes #6510 (#6511) - due to [`41b776b`](https://github.com/tobymao/sqlglot/commit/41b776bdc6936f18accd9f7308b55acd383bb596) - added support for current_catalog *(PR [#6492](https://github.com/tobymao/sqlglot/pull/6492) by [@AbhishekASLK](https://github.com/AbhishekASLK))*: added support for current_catalog (#6492) - due to [`dd19bea`](https://github.com/tobymao/sqlglot/commit/dd19beae95f077cfd8b6e315eca7ff212817b250) - annotation support for CURRENT_ACCOUNT *(PR [#6512](https://github.com/tobymao/sqlglot/pull/6512) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ACCOUNT (#6512) - due to [`2e8105e`](https://github.com/tobymao/sqlglot/commit/2e8105eebaec25fc8f94f1e68951198660f404e1) - Annotate type for VAR_POP, VAR_SAMP, DuckDB consistency fix for VAR_SAMP *(PR [#6488](https://github.com/tobymao/sqlglot/pull/6488) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for VAR_POP, VAR_SAMP, DuckDB consistency fix for VAR_SAMP (#6488) - due to [`cfb02c1`](https://github.com/tobymao/sqlglot/commit/cfb02c1aa676e801b2d13a84467b4904cd834ffe) - annotation support for CURRENT_ACCOUNT_NAME *(PR [#6513](https://github.com/tobymao/sqlglot/pull/6513) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_ACCOUNT_NAME (#6513) - due to [`1004e31`](https://github.com/tobymao/sqlglot/commit/1004e31cce62cce2e2afb7eab85ed8bdecaede3b) - annotation support for CURRENT_AVAILABLE_ROLES *(PR [#6514](https://github.com/tobymao/sqlglot/pull/6514) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_AVAILABLE_ROLES (#6514) - due to [`ff201fe`](https://github.com/tobymao/sqlglot/commit/ff201febd27937a97674dd091928456dde733254) - annotation support for CURRENT_CLIENT *(PR [#6515](https://github.com/tobymao/sqlglot/pull/6515) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_CLIENT (#6515) - due to [`d777a9c`](https://github.com/tobymao/sqlglot/commit/d777a9c0feef15ac036f7b413112de4d7cc8bea4) - annotation support for CURRENT_IP_ADDRESS *(PR [#6518](https://github.com/tobymao/sqlglot/pull/6518) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_IP_ADDRESS (#6518) - due to [`c296061`](https://github.com/tobymao/sqlglot/commit/c2960615a3bd279b7c5f775d5b93ae12aa27a3b8) - Transpilation of TO_BINARY from snowflake to duckdb *(PR [#6504](https://github.com/tobymao/sqlglot/pull/6504) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Transpilation of TO_BINARY from snowflake to duckdb (#6504) - due to [`7a70164`](https://github.com/tobymao/sqlglot/commit/7a70164d8cf361cf4c0a7d5789bb51676f772959) - transpile Snowflake's `RANDSTR` function *(PR [#6502](https://github.com/tobymao/sqlglot/pull/6502) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: transpile Snowflake's `RANDSTR` function (#6502) - due to [`a26d419`](https://github.com/tobymao/sqlglot/commit/a26d4191e5468e39eafdf7a981e7b890d438b2c9) - annotation support for CURRENT_DATABASE *(PR [#6516](https://github.com/tobymao/sqlglot/pull/6516) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_DATABASE (#6516) - due to [`0acdf7f`](https://github.com/tobymao/sqlglot/commit/0acdf7fc783f2722536ec24dcf8600957febf7ca) - annotation support for CURRENT_SCHEMAS *(PR [#6519](https://github.com/tobymao/sqlglot/pull/6519) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_SCHEMAS (#6519) - due to [`43cce89`](https://github.com/tobymao/sqlglot/commit/43cce895da80d21abc89d40de5d7fddd68871bf0) - annotation support for CURRENT_SECONDARY_ROLES *(PR [#6520](https://github.com/tobymao/sqlglot/pull/6520) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_SECONDARY_ROLES (#6520) - due to [`c21b4b1`](https://github.com/tobymao/sqlglot/commit/c21b4b1134b368ee5144339b59e70ddcc54f3dbc) - annotation support for CURRENT_SESSION *(PR [#6521](https://github.com/tobymao/sqlglot/pull/6521) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_SESSION (#6521) - due to [`57a83c0`](https://github.com/tobymao/sqlglot/commit/57a83c018dace690f7bb363c25ee6bde33c3d60f) - annotation support for CURRENT_STATEMENT *(PR [#6522](https://github.com/tobymao/sqlglot/pull/6522) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_STATEMENT (#6522) - due to [`4b240e4`](https://github.com/tobymao/sqlglot/commit/4b240e40a8809a6eea2a279370a884f4a7b03dfa) - annotation support for CURRENT_VERSION *(PR [#6524](https://github.com/tobymao/sqlglot/pull/6524) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_VERSION (#6524) - due to [`c1a831f`](https://github.com/tobymao/sqlglot/commit/c1a831f5bf662ab8d8e07dc2bb949f2adcbe7d7c) - annotation support for CURRENT_TRANSACTION *(PR [#6523](https://github.com/tobymao/sqlglot/pull/6523) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_TRANSACTION (#6523) - due to [`2e162b0`](https://github.com/tobymao/sqlglot/commit/2e162b0d34066e7aa7edac3156739bcd31a634fc) - annotation support for CURRENT_WAREHOUSE *(PR [#6525](https://github.com/tobymao/sqlglot/pull/6525) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for CURRENT_WAREHOUSE (#6525) - due to [`9d06859`](https://github.com/tobymao/sqlglot/commit/9d0685923209c04747fa6fa2b35ee2e516453abc) - annotate bigquery ARRAY when arg contains set operations *(PR [#6517](https://github.com/tobymao/sqlglot/pull/6517) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate bigquery ARRAY when arg contains set operations (#6517) - due to [`2fd14ed`](https://github.com/tobymao/sqlglot/commit/2fd14ed32b3793444405005fb98342222b4d7956) - query schema directly when type annotation fails for processing UNNEST source *(PR [#6451](https://github.com/tobymao/sqlglot/pull/6451) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: query schema directly when type annotation fails for processing UNNEST source (#6451) - due to [`41a9e88`](https://github.com/tobymao/sqlglot/commit/41a9e88bb9800205df0b3e10a1976699dc4fe4f9) - Add support to transpile binary args for bitwise operators *(PR [#6508](https://github.com/tobymao/sqlglot/pull/6508) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add support to transpile binary args for bitwise operators (#6508) - due to [`06c7ffb`](https://github.com/tobymao/sqlglot/commit/06c7ffbe14985a4da35a97d47322021e79525adf) - cleanup bitwise operator fixes *(commit by [@georgesittas](https://github.com/georgesittas))*: cleanup bitwise operator fixes ### :sparkles: New Features - [`ebe718a`](https://github.com/tobymao/sqlglot/commit/ebe718a72d5b5871a8d6e67754ff50e873d55b41) - **duckdb**: Add support for format elements used in date/time functions like FORMAT_DATETIME *(PR [#6428](https://github.com/tobymao/sqlglot/pull/6428) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c111f64`](https://github.com/tobymao/sqlglot/commit/c111f643d61064280024b4cc5c0fc250581fbe55) - **snowflake**: annotation support for APPROX_PERCENTILE_ACCUMULATE *(PR [#6455](https://github.com/tobymao/sqlglot/pull/6455) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`a7d211e`](https://github.com/tobymao/sqlglot/commit/a7d211e6fdce968c64b050c77e026cc23fdc07e5) - **duckdb**: transpile DECFLOAT type to DECIMAL(38, 5) *(PR [#6462](https://github.com/tobymao/sqlglot/pull/6462) by [@toriwei](https://github.com/toriwei))* - [`94d46b8`](https://github.com/tobymao/sqlglot/commit/94d46b8eafd5abe252407d2bbe306ca579a29b20) - **snowflake**: annotation support for APPROX_PERCENTILE_ESTIMATE. Return type DOUBLE *(PR [#6461](https://github.com/tobymao/sqlglot/pull/6461) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`2ac30b0`](https://github.com/tobymao/sqlglot/commit/2ac30b08bd663bbaf00ae075c4db0c3d27ab6640) - **snowflake**: annotation support for APPROX_PERCENTILE_COMBINE *(PR [#6460](https://github.com/tobymao/sqlglot/pull/6460) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d44bda3`](https://github.com/tobymao/sqlglot/commit/d44bda376c06956947a09a9f279cce886a63b981) - **optimizer**: Annotate type for ZIPF *(PR [#6453](https://github.com/tobymao/sqlglot/pull/6453) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`34dbd47`](https://github.com/tobymao/sqlglot/commit/34dbd478957c1796998d0b263f63c8ce1db7a320) - **optimizer**: Annotate type for XMLGET *(PR [#6457](https://github.com/tobymao/sqlglot/pull/6457) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ff3f0f9`](https://github.com/tobymao/sqlglot/commit/ff3f0f998674f5b2741c3f6cadbe24fa8fb607ad) - **databricks**: add support for ?:: operator *(PR [#6469](https://github.com/tobymao/sqlglot/pull/6469) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`0d211f2`](https://github.com/tobymao/sqlglot/commit/0d211f2b36167cfb7856b8ec25f597f70317a9c7) - **snowflake**: annotate type for MODE function snowflake *(PR [#6447](https://github.com/tobymao/sqlglot/pull/6447) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`cc4c8ab`](https://github.com/tobymao/sqlglot/commit/cc4c8ab43ab71790bc2bb9f8f3c06e34f89f999f) - **snowflake**: annotate type for PERCENTILE_CONT in Snowflake *(PR [#6470](https://github.com/tobymao/sqlglot/pull/6470) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7dbc242`](https://github.com/tobymao/sqlglot/commit/7dbc242a637a8890511cc14f22bce4d425f1f55d) - **snowflake**: annotation support for CURRENT REGION. Return type VARCHAR *(PR [#6473](https://github.com/tobymao/sqlglot/pull/6473) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`43a6a5c`](https://github.com/tobymao/sqlglot/commit/43a6a5c601421e15a7f94dd489cb4fbcf9d2c8c3) - **snowflake**: annotation support for CURRENT_ORGANIZATION_NAME. Return type VARCHAR *(PR [#6475](https://github.com/tobymao/sqlglot/pull/6475) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`f1f7c6a`](https://github.com/tobymao/sqlglot/commit/f1f7c6ae6b6aa3f6f2251d0f81ee667440ca53d1) - **snowflake**: annotation support for CURRENT_ORGANIZATION_USER. *(PR [#6476](https://github.com/tobymao/sqlglot/pull/6476) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d268203`](https://github.com/tobymao/sqlglot/commit/d268203e1dbae4e3aff863108f6d09a6f8274db5) - **snowflake**: annotation support for CURRENT_ROLE_TYPE *(PR [#6479](https://github.com/tobymao/sqlglot/pull/6479) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`fd4431b`](https://github.com/tobymao/sqlglot/commit/fd4431bf9550c03aa761c642a68a21a146fd8548) - **snowflake**: annotate type for VECTOR_L1_DISTANCE, VECTOR_L2_DISTANCE, VECTOR_COSINE_SIMILARITY functions *(PR [#6468](https://github.com/tobymao/sqlglot/pull/6468) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`e6adba7`](https://github.com/tobymao/sqlglot/commit/e6adba76cc2f27633a9d38bfaea3356e71d00a4c) - **BigQuery**: Add support for coercing STRING literals to temporal types *(PR [#6482](https://github.com/tobymao/sqlglot/pull/6482) by [@vchan](https://github.com/vchan))* - [`68a5e61`](https://github.com/tobymao/sqlglot/commit/68a5e615b24e518cb90c9b80cf25355fcabdb468) - **snowflake**: annotate type for REGR_* functions *(PR [#6452](https://github.com/tobymao/sqlglot/pull/6452) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`f7458a4`](https://github.com/tobymao/sqlglot/commit/f7458a40d3b09a2e212f6705ac4a77c99714508e) - **optimizer**: annotate type for snowflake func TO_BOOLEAN *(PR [#6481](https://github.com/tobymao/sqlglot/pull/6481) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`1531a67`](https://github.com/tobymao/sqlglot/commit/1531a67ac7806f3b4582f6cf1ea02342a517de74) - **snowflake**: annotate type for VECTOR_INNER_PRODUCT *(PR [#6486](https://github.com/tobymao/sqlglot/pull/6486) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`f6b2b3b`](https://github.com/tobymao/sqlglot/commit/f6b2b3bc6e1c95340149be65d80ef7e177b28d82) - **snowflake**: support padside argument for BIT[OR|AND|XOR] *(PR [#6487](https://github.com/tobymao/sqlglot/pull/6487) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`e891397`](https://github.com/tobymao/sqlglot/commit/e89139714aefd8a6481a90d9753c81910c9f88e9) - **BigQuery**: Add support for the NET.HOST function *(PR [#6480](https://github.com/tobymao/sqlglot/pull/6480) by [@vchan](https://github.com/vchan))* - [`2cc67cd`](https://github.com/tobymao/sqlglot/commit/2cc67cd7386914043a9cb4eb322fb1fa9af15c8b) - **singlestore**: support dcolonqmark *(PR [#6485](https://github.com/tobymao/sqlglot/pull/6485) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`7d485c7`](https://github.com/tobymao/sqlglot/commit/7d485c7cffe7b6d0113cfcfcf0736de0383bd380) - **duckdb**: Add transpilation support for the negative integer args for BITNOT *(PR [#6490](https://github.com/tobymao/sqlglot/pull/6490) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ef130f1`](https://github.com/tobymao/sqlglot/commit/ef130f1b944b4be835d4a6831fec9a333a825a34) - **snowflake**: Annotated type for ARRAY_CONSTRUCT_COMPACT [#6496](https://github.com/tobymao/sqlglot/pull/6496) *(commit by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`36cf0bf`](https://github.com/tobymao/sqlglot/commit/36cf0bf6671f622344afee52d7aafe30f19ecf9a) - **snowflake**: annotation support for CURRENT_ROLE. *(PR [#6478](https://github.com/tobymao/sqlglot/pull/6478) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`cbba04c`](https://github.com/tobymao/sqlglot/commit/cbba04cb292fe8b3fd38c87d9ccb624cdcb52843) - **databricks**: support comma-separated syntax for OVERLAY function *(PR [#6497](https://github.com/tobymao/sqlglot/pull/6497) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`dc8f26a`](https://github.com/tobymao/sqlglot/commit/dc8f26a3a5e023a0e54caa345b129fb1b4fe805f) - **optimizer**: bq annotate type for NULL *(PR [#6491](https://github.com/tobymao/sqlglot/pull/6491) by [@geooo109](https://github.com/geooo109))* - [`c97a81d`](https://github.com/tobymao/sqlglot/commit/c97a81d68a1584fad48475725665a7678fcad9d1) - **optimizer**: annotate TO_HEX(MD5(...)) in BigQuery *(PR [#6500](https://github.com/tobymao/sqlglot/pull/6500) by [@georgesittas](https://github.com/georgesittas))* - [`a5797a1`](https://github.com/tobymao/sqlglot/commit/a5797a1c867c4ade71ae4ddf93232576993cf5bc) - **duckdb**: handle named arguments and non-integer scale input for ROUND *(PR [#6495](https://github.com/tobymao/sqlglot/pull/6495) by [@toriwei](https://github.com/toriwei))* - [`8b5298a`](https://github.com/tobymao/sqlglot/commit/8b5298a6578af80fd9676eb222422862d5468859) - **duckdb**: Transpile BQ's WEEK based `DATE_DIFF` *(PR [#6507](https://github.com/tobymao/sqlglot/pull/6507) by [@VaggelisD](https://github.com/VaggelisD))* - [`2c013a5`](https://github.com/tobymao/sqlglot/commit/2c013a5cc8e37cde8a8f9443e0397191ce82f0f5) - **exasol**: qualify bare stars to facilitate transpilation *(PR [#6431](https://github.com/tobymao/sqlglot/pull/6431) by [@nnamdi16](https://github.com/nnamdi16))* - [`41b776b`](https://github.com/tobymao/sqlglot/commit/41b776bdc6936f18accd9f7308b55acd383bb596) - **postgres,trino,duckdb**: added support for current_catalog *(PR [#6492](https://github.com/tobymao/sqlglot/pull/6492) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`dd19bea`](https://github.com/tobymao/sqlglot/commit/dd19beae95f077cfd8b6e315eca7ff212817b250) - **snowflake**: annotation support for CURRENT_ACCOUNT *(PR [#6512](https://github.com/tobymao/sqlglot/pull/6512) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`2e8105e`](https://github.com/tobymao/sqlglot/commit/2e8105eebaec25fc8f94f1e68951198660f404e1) - **snowflake**: Annotate type for VAR_POP, VAR_SAMP, DuckDB consistency fix for VAR_SAMP *(PR [#6488](https://github.com/tobymao/sqlglot/pull/6488) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`cfb02c1`](https://github.com/tobymao/sqlglot/commit/cfb02c1aa676e801b2d13a84467b4904cd834ffe) - **snowflake**: annotation support for CURRENT_ACCOUNT_NAME *(PR [#6513](https://github.com/tobymao/sqlglot/pull/6513) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`1004e31`](https://github.com/tobymao/sqlglot/commit/1004e31cce62cce2e2afb7eab85ed8bdecaede3b) - **snowflake**: annotation support for CURRENT_AVAILABLE_ROLES *(PR [#6514](https://github.com/tobymao/sqlglot/pull/6514) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`ff201fe`](https://github.com/tobymao/sqlglot/commit/ff201febd27937a97674dd091928456dde733254) - **snowflake**: annotation support for CURRENT_CLIENT *(PR [#6515](https://github.com/tobymao/sqlglot/pull/6515) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d777a9c`](https://github.com/tobymao/sqlglot/commit/d777a9c0feef15ac036f7b413112de4d7cc8bea4) - **snowflake**: annotation support for CURRENT_IP_ADDRESS *(PR [#6518](https://github.com/tobymao/sqlglot/pull/6518) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`c296061`](https://github.com/tobymao/sqlglot/commit/c2960615a3bd279b7c5f775d5b93ae12aa27a3b8) - **snowflake**: Transpilation of TO_BINARY from snowflake to duckdb *(PR [#6504](https://github.com/tobymao/sqlglot/pull/6504) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7a70164`](https://github.com/tobymao/sqlglot/commit/7a70164d8cf361cf4c0a7d5789bb51676f772959) - **duckdb**: transpile Snowflake's `RANDSTR` function *(PR [#6502](https://github.com/tobymao/sqlglot/pull/6502) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`a26d419`](https://github.com/tobymao/sqlglot/commit/a26d4191e5468e39eafdf7a981e7b890d438b2c9) - **snowflake**: annotation support for CURRENT_DATABASE *(PR [#6516](https://github.com/tobymao/sqlglot/pull/6516) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`0acdf7f`](https://github.com/tobymao/sqlglot/commit/0acdf7fc783f2722536ec24dcf8600957febf7ca) - **snowflake**: annotation support for CURRENT_SCHEMAS *(PR [#6519](https://github.com/tobymao/sqlglot/pull/6519) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`43cce89`](https://github.com/tobymao/sqlglot/commit/43cce895da80d21abc89d40de5d7fddd68871bf0) - **snowflake**: annotation support for CURRENT_SECONDARY_ROLES *(PR [#6520](https://github.com/tobymao/sqlglot/pull/6520) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`c21b4b1`](https://github.com/tobymao/sqlglot/commit/c21b4b1134b368ee5144339b59e70ddcc54f3dbc) - **snowflake**: annotation support for CURRENT_SESSION *(PR [#6521](https://github.com/tobymao/sqlglot/pull/6521) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`57a83c0`](https://github.com/tobymao/sqlglot/commit/57a83c018dace690f7bb363c25ee6bde33c3d60f) - **snowflake**: annotation support for CURRENT_STATEMENT *(PR [#6522](https://github.com/tobymao/sqlglot/pull/6522) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4b240e4`](https://github.com/tobymao/sqlglot/commit/4b240e40a8809a6eea2a279370a884f4a7b03dfa) - **snowflake**: annotation support for CURRENT_VERSION *(PR [#6524](https://github.com/tobymao/sqlglot/pull/6524) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`c1a831f`](https://github.com/tobymao/sqlglot/commit/c1a831f5bf662ab8d8e07dc2bb949f2adcbe7d7c) - **snowflake**: annotation support for CURRENT_TRANSACTION *(PR [#6523](https://github.com/tobymao/sqlglot/pull/6523) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`2e162b0`](https://github.com/tobymao/sqlglot/commit/2e162b0d34066e7aa7edac3156739bcd31a634fc) - **snowflake**: annotation support for CURRENT_WAREHOUSE *(PR [#6525](https://github.com/tobymao/sqlglot/pull/6525) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`18e9814`](https://github.com/tobymao/sqlglot/commit/18e98145906eaa5b769af49cf46b58a1d9448aee) - **snowflake**: support DAYOFWEEK_ISO date part *(PR [#6531](https://github.com/tobymao/sqlglot/pull/6531) by [@toriwei](https://github.com/toriwei))* - [`ee5e7b9`](https://github.com/tobymao/sqlglot/commit/ee5e7b931ca745a000dc8a720b56aee7b44186b2) - Automatically trigger integration tests scoped to modified dialects *(PR [#6505](https://github.com/tobymao/sqlglot/pull/6505) by [@erindru](https://github.com/erindru))* - [`e60634f`](https://github.com/tobymao/sqlglot/commit/e60634f0e1c396b54ad357132606286bd21d3e36) - **clickhouse**: Add support for quantilesExactExclusive agg func *(PR [#6535](https://github.com/tobymao/sqlglot/pull/6535) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#6533](https://github.com/tobymao/sqlglot/issues/6533) opened by [@vargasj-ms](https://github.com/vargasj-ms)* - [`41a9e88`](https://github.com/tobymao/sqlglot/commit/41a9e88bb9800205df0b3e10a1976699dc4fe4f9) - **duckdb**: Add support to transpile binary args for bitwise operators *(PR [#6508](https://github.com/tobymao/sqlglot/pull/6508) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ### :bug: Bug Fixes - [`7021d54`](https://github.com/tobymao/sqlglot/commit/7021d54ecf0ceab3c3606642cbfca8e080cc8613) - **tsql**: CEILING generation *(PR [#6477](https://github.com/tobymao/sqlglot/pull/6477) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6472](https://github.com/tobymao/sqlglot/issues/6472) opened by [@ricky-ho](https://github.com/ricky-ho)* - [`df4c1d3`](https://github.com/tobymao/sqlglot/commit/df4c1d37ff77151a74b5de3d119c7e03f5db85f4) - REGEXP_EXTRACT position arg overflow *(PR [#6458](https://github.com/tobymao/sqlglot/pull/6458) by [@treysp](https://github.com/treysp))* - :arrow_lower_right: *fixes issue [#6442](https://github.com/tobymao/sqlglot/issues/6442) opened by [@erindru](https://github.com/erindru)* - [`5a49c3f`](https://github.com/tobymao/sqlglot/commit/5a49c3f7a7619ad9e711ff2cd9e85b8606969b36) - **optimizer**: support ORDER / LIMIT expressions for BigQuery ARRAY_AGG / STRING_AGG functions *(PR [#6463](https://github.com/tobymao/sqlglot/pull/6463) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`1b6076b`](https://github.com/tobymao/sqlglot/commit/1b6076bd5a64b044f52f5366244ba0746aca75e1) - wrap connectives generated due to transpiling LIKE ANY closes [#6493](https://github.com/tobymao/sqlglot/pull/6493) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`36ad534`](https://github.com/tobymao/sqlglot/commit/36ad534b14eabe9ee197017f5087e8e5190f8526) - **exasol**: qualified select list with "LOCAL" *(PR [#6450](https://github.com/tobymao/sqlglot/pull/6450) by [@nnamdi16](https://github.com/nnamdi16))* - [`52aceaa`](https://github.com/tobymao/sqlglot/commit/52aceaaa887dddb35f8ede5c2d9577fdeee35c48) - **optimizer**: annotate `HavingMax` by `this` *(PR [#6499](https://github.com/tobymao/sqlglot/pull/6499) by [@georgesittas](https://github.com/georgesittas))* - [`ce5487e`](https://github.com/tobymao/sqlglot/commit/ce5487ef2ec0a3de8fa79b9febf41236c05c04cc) - sources doesn't store columns, clean up this old code *(commit by [@tobymao](https://github.com/tobymao))* - [`3224235`](https://github.com/tobymao/sqlglot/commit/3224235c1b7a80511af11f7dbffe608a747a3df0) - make CTE builder produce AST consistent with parser closes [#6503](https://github.com/tobymao/sqlglot/pull/6503) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`9454a18`](https://github.com/tobymao/sqlglot/commit/9454a18cca41a510e61522f6b785d646980e2100) - uppercase join method, side, kind for consistency fixes [#6510](https://github.com/tobymao/sqlglot/pull/6510) *(PR [#6511](https://github.com/tobymao/sqlglot/pull/6511) by [@georgesittas](https://github.com/georgesittas))* - [`a6ec4b6`](https://github.com/tobymao/sqlglot/commit/a6ec4b688891691b26ab874a3401e370c0b8d574) - reorder join mark check in eliminate_join_marks *(PR [#6528](https://github.com/tobymao/sqlglot/pull/6528) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#6527](https://github.com/tobymao/sqlglot/issues/6527) opened by [@snovik75](https://github.com/snovik75)* - [`9d06859`](https://github.com/tobymao/sqlglot/commit/9d0685923209c04747fa6fa2b35ee2e516453abc) - **optimizer**: annotate bigquery ARRAY when arg contains set operations *(PR [#6517](https://github.com/tobymao/sqlglot/pull/6517) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`241073d`](https://github.com/tobymao/sqlglot/commit/241073d886e0b4ad7b2252a8c8c394e717ef700a) - on_qualify type *(commit by [@tobymao](https://github.com/tobymao))* - [`2fd14ed`](https://github.com/tobymao/sqlglot/commit/2fd14ed32b3793444405005fb98342222b4d7956) - **optimizer**: query schema directly when type annotation fails for processing UNNEST source *(PR [#6451](https://github.com/tobymao/sqlglot/pull/6451) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* ### :recycle: Refactors - [`2d380e7`](https://github.com/tobymao/sqlglot/commit/2d380e72c9e3b842a8fe57c191f494c8872c00ee) - add test to make sure callback doesn't trigger ctes *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`4a061e2`](https://github.com/tobymao/sqlglot/commit/4a061e26b638c9acb0c8a77d9347914b35082bb3) - **optimizer**: Include BIGDECIMAL in numeric precedence *(PR [#6456](https://github.com/tobymao/sqlglot/pull/6456) by [@vchan](https://github.com/vchan))* - [`f305305`](https://github.com/tobymao/sqlglot/commit/f305305e5cf3ef45afba822542aebeb944c00e0b) - **optimizer**: Annotate types for BigQuery's AVG function *(PR [#6459](https://github.com/tobymao/sqlglot/pull/6459) by [@vchan](https://github.com/vchan))* - [`910349f`](https://github.com/tobymao/sqlglot/commit/910349f3c30af59ce1820e48cae0cbb77539877d) - **optimizer**: Annotate types for BigQuery's SAFE_DIVIDE function *(PR [#6464](https://github.com/tobymao/sqlglot/pull/6464) by [@vchan](https://github.com/vchan))* - [`5e75621`](https://github.com/tobymao/sqlglot/commit/5e75621e90defd50076383485f6a4689a8c551ac) - **optimizer**: annotate type for snowflake func ARRAY_UNIQUE_AGG *(PR [#6465](https://github.com/tobymao/sqlglot/pull/6465) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`4d77500`](https://github.com/tobymao/sqlglot/commit/4d775007d2ceb997ff33721def768493c95f98a5) - **optimizer**: add tests for snowflake CAST function *(PR [#6471](https://github.com/tobymao/sqlglot/pull/6471) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`88dfd26`](https://github.com/tobymao/sqlglot/commit/88dfd26b832d13e517fe7c18d2c086885bf4954d) - **optimizer**: annotate type for snowflake func TO_BINARY *(PR [#6474](https://github.com/tobymao/sqlglot/pull/6474) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`483318b`](https://github.com/tobymao/sqlglot/commit/483318bc25e4ee4fa2731be1a0aea02858872ab5) - clean up TO_BINARY tests *(commit by [@geooo109](https://github.com/geooo109))* - [`bff7084`](https://github.com/tobymao/sqlglot/commit/bff70841d0bfbede6ea0fae2e7b37d68735a53d8) - remove duckdb TO_BINARY 2 arg test *(commit by [@geooo109](https://github.com/geooo109))* - [`80591f9`](https://github.com/tobymao/sqlglot/commit/80591f9513dff9160884e4bbbd48d9c26cf8f253) - starrocks TO_BINARY tests *(commit by [@geooo109](https://github.com/geooo109))* - [`01e5a05`](https://github.com/tobymao/sqlglot/commit/01e5a050c76f728ef542f0127209e2cd1c5f5558) - **exasol**: implementing the last day function in exasol sql dialect *(PR [#6483](https://github.com/tobymao/sqlglot/pull/6483) by [@nnamdi16](https://github.com/nnamdi16))* - [`aacc981`](https://github.com/tobymao/sqlglot/commit/aacc98105fb381c17a80ee011f107157279312d7) - **duckdb**: tests for MAX_BY and MIN_BY *(PR [#6489](https://github.com/tobymao/sqlglot/pull/6489) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`06c7ffb`](https://github.com/tobymao/sqlglot/commit/06c7ffbe14985a4da35a97d47322021e79525adf) - cleanup bitwise operator fixes *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v28.1.0] - 2025-12-02 ### :boom: BREAKING CHANGES - due to [`e4d1a4f`](https://github.com/tobymao/sqlglot/commit/e4d1a4fcd6741d679c5444bf023077d2aaa8f980) - map date/timestamp `TRUNC` to `DATE_TRUNC` *(PR [#6328](https://github.com/tobymao/sqlglot/pull/6328) by [@nnamdi16](https://github.com/nnamdi16))*: map date/timestamp `TRUNC` to `DATE_TRUNC` (#6328) - due to [`e1b6558`](https://github.com/tobymao/sqlglot/commit/e1b6558cb1a860bbd695f25b66e52064b57c0a84) - handle all datepart alternatives *(PR [#6324](https://github.com/tobymao/sqlglot/pull/6324) by [@lBilali](https://github.com/lBilali))*: handle all datepart alternatives (#6324) - due to [`06daa47`](https://github.com/tobymao/sqlglot/commit/06daa47dedebac672548e1db230b89f5c9eae84e) - update annotated type of ARRAY_AGG to untyped array *(PR [#6347](https://github.com/tobymao/sqlglot/pull/6347) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: update annotated type of ARRAY_AGG to untyped array (#6347) - due to [`7484c06`](https://github.com/tobymao/sqlglot/commit/7484c06be4534cd22dee14da542d5e29ff2c13a2) - Support rounding mode argument for ROUND function *(PR [#6350](https://github.com/tobymao/sqlglot/pull/6350) by [@vchan](https://github.com/vchan))*: Support rounding mode argument for ROUND function (#6350) - due to [`c495a40`](https://github.com/tobymao/sqlglot/commit/c495a40ee4c1a69b14892e8455ae1bd2ceb5ea4f) - annotate type for MINHASH *(PR [#6355](https://github.com/tobymao/sqlglot/pull/6355) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for MINHASH (#6355) - due to [`b1f9a97`](https://github.com/tobymao/sqlglot/commit/b1f9a976be3c0bcd895bef5bcdb95a013eeb28b7) - annotate type for APPROXIMATE_SIMILARITY *(PR [#6360](https://github.com/tobymao/sqlglot/pull/6360) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for APPROXIMATE_SIMILARITY (#6360) - due to [`3aafca7`](https://github.com/tobymao/sqlglot/commit/3aafca74546b932cea93ed830c021f347ae03ded) - annotate type for MINHASH_COMBINE *(PR [#6362](https://github.com/tobymao/sqlglot/pull/6362) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for MINHASH_COMBINE (#6362) - due to [`df13a65`](https://github.com/tobymao/sqlglot/commit/df13a655646bd2ef5d8b4613670bb5fe48845b73) - unnest deep stuff *(PR [#6366](https://github.com/tobymao/sqlglot/pull/6366) by [@tobymao](https://github.com/tobymao))*: unnest deep stuff (#6366) - due to [`d4c2256`](https://github.com/tobymao/sqlglot/commit/d4c2256fb493ed2f16c29694ae5c31517123d419) - at time zone precedence *(PR [#6383](https://github.com/tobymao/sqlglot/pull/6383) by [@geooo109](https://github.com/geooo109))*: at time zone precedence (#6383) - due to [`4fb4d08`](https://github.com/tobymao/sqlglot/commit/4fb4d08ef8896bda434d4f89c21c669c6146fd02) - properly support table alias in the `INSERT` DML *(PR [#6374](https://github.com/tobymao/sqlglot/pull/6374) by [@snovik75](https://github.com/snovik75))*: properly support table alias in the `INSERT` DML (#6374) - due to [`bf07abd`](https://github.com/tobymao/sqlglot/commit/bf07abd4ee9eb0f5510cb7d1f232bdcaea88941e) - annotation support for APPROX_TOP_K_COMBINE *(PR [#6378](https://github.com/tobymao/sqlglot/pull/6378) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_TOP_K_COMBINE (#6378) - due to [`50348ac`](https://github.com/tobymao/sqlglot/commit/50348ac31f784aa97bd09d5d6c6613fbd68402ee) - support order by clause for mysql delete statement *(PR [#6381](https://github.com/tobymao/sqlglot/pull/6381) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support order by clause for mysql delete statement (#6381) - due to [`21d3859`](https://github.com/tobymao/sqlglot/commit/21d38590fec6cb55a1a03aeb2621bd9fca677496) - Disable STRING_AGG sep canonicalization *(PR [#6395](https://github.com/tobymao/sqlglot/pull/6395) by [@VaggelisD](https://github.com/VaggelisD))*: Disable STRING_AGG sep canonicalization (#6395) - due to [`95727f6`](https://github.com/tobymao/sqlglot/commit/95727f60d601796b34c850dee9366d79f6e4a24b) - canonicalize table aliases *(PR [#6369](https://github.com/tobymao/sqlglot/pull/6369) by [@georgesittas](https://github.com/georgesittas))*: canonicalize table aliases (#6369) - due to [`c7cb098`](https://github.com/tobymao/sqlglot/commit/c7cb0983a0fa463c43d2c4ee925816e9a1628c79) - Fix underscore separator with scientific notation *(PR [#6401](https://github.com/tobymao/sqlglot/pull/6401) by [@VaggelisD](https://github.com/VaggelisD))*: Fix underscore separator with scientific notation (#6401) - due to [`bb4eda1`](https://github.com/tobymao/sqlglot/commit/bb4eda1beb68b92de9ab014a63c67797a07df2fa) - support transpiling SHA1 from BigQuery to DuckDB *(PR [#6404](https://github.com/tobymao/sqlglot/pull/6404) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpiling SHA1 from BigQuery to DuckDB (#6404) - due to [`d038ad7`](https://github.com/tobymao/sqlglot/commit/d038ad7f036a140f3eae4bdde15824437d4e44ee) - support named primary keys for mysql *(PR [#6389](https://github.com/tobymao/sqlglot/pull/6389) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support named primary keys for mysql (#6389) - due to [`05e83b5`](https://github.com/tobymao/sqlglot/commit/05e83b56f1bf9323cfa819a7f1beb542524c1219) - support transpilation of LEAST from BigQuery to DuckDB *(PR [#6415](https://github.com/tobymao/sqlglot/pull/6415) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of LEAST from BigQuery to DuckDB (#6415) - due to [`4f3bb0d`](https://github.com/tobymao/sqlglot/commit/4f3bb0d6714bf89ff72e13e1398d8f01cefafb00) - Correct transpilation of BigQuery's JSON_EXTRACT_SCALAR… *(PR [#6414](https://github.com/tobymao/sqlglot/pull/6414) by [@vchan](https://github.com/vchan))*: Correct transpilation of BigQuery's JSON_EXTRACT_SCALAR… (#6414) - due to [`8c314a8`](https://github.com/tobymao/sqlglot/commit/8c314a8b457a5c3ed470ac8fcff022fec881c248) - support cte pivot for duckdb *(PR [#6413](https://github.com/tobymao/sqlglot/pull/6413) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: support cte pivot for duckdb (#6413) - due to [`c6b0a63`](https://github.com/tobymao/sqlglot/commit/c6b0a6342a21d79635a26d40001c916d05d47cf7) - change version to be a tuple so that it can be pickled, also simpler *(commit by [@tobymao](https://github.com/tobymao))*: change version to be a tuple so that it can be pickled, also simpler - due to [`07d9958`](https://github.com/tobymao/sqlglot/commit/07d99583b4aebdc682bb7604ccdf45bddb89f9c3) - replace direct comparison with dialect properties *(PR [#6398](https://github.com/tobymao/sqlglot/pull/6398) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: replace direct comparison with dialect properties (#6398) - due to [`38472ce`](https://github.com/tobymao/sqlglot/commit/38472ce14bce731ba4c309d515223ae99e2575ac) - transpile bigquery's %x format literal *(PR [#6375](https://github.com/tobymao/sqlglot/pull/6375) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: transpile bigquery's %x format literal (#6375) - due to [`92ee124`](https://github.com/tobymao/sqlglot/commit/92ee1241ea3088d4e63c094404252339c54ad0c1) - postgres qualify GENERATE_SERIES and table projection *(PR [#6373](https://github.com/tobymao/sqlglot/pull/6373) by [@geooo109](https://github.com/geooo109))*: postgres qualify GENERATE_SERIES and table projection (#6373) - due to [`0b9d8ac`](https://github.com/tobymao/sqlglot/commit/0b9d8acbe75457424436e8c0acc047ab66e9fdc0) - Annotate type for snowflake MAX function *(PR [#6422](https://github.com/tobymao/sqlglot/pull/6422) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake MAX function (#6422) - due to [`68e9414`](https://github.com/tobymao/sqlglot/commit/68e9414725a60b2842d870fa222d8466057a94f6) - Annotate type for snowflake MIN function *(PR [#6427](https://github.com/tobymao/sqlglot/pull/6427) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Annotate type for snowflake MIN function (#6427) - due to [`1318de7`](https://github.com/tobymao/sqlglot/commit/1318de77a8aa514ec7eb9f9b8c03228e3f8eb008) - Annotate type for snowflake NORMAL *(PR [#6434](https://github.com/tobymao/sqlglot/pull/6434) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake NORMAL (#6434) - due to [`ffbb5c7`](https://github.com/tobymao/sqlglot/commit/ffbb5c7e40aa064ffcd4827e96ea66cfd045118e) - annotate type for HASH_AGG in Snowflake *(PR [#6438](https://github.com/tobymao/sqlglot/pull/6438) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for HASH_AGG in Snowflake (#6438) - due to [`161255f`](https://github.com/tobymao/sqlglot/commit/161255f6c90b9c3ed2074e734f6d074db1d7a6dd) - Add support for `LOCALTIME` function *(PR [#6443](https://github.com/tobymao/sqlglot/pull/6443) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for `LOCALTIME` function (#6443) - due to [`ca329f0`](https://github.com/tobymao/sqlglot/commit/ca329f037a230c315437d830638b514190764c5a) - support transpilation of SHA256 from bigquery to duckdb *(PR [#6421](https://github.com/tobymao/sqlglot/pull/6421) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: support transpilation of SHA256 from bigquery to duckdb (#6421) - due to [`e18ae24`](https://github.com/tobymao/sqlglot/commit/e18ae248423dbbca78a24a60ea0193da2ee7f68c) - Annotate type for snowflake REGR_SLOPE function *(PR [#6425](https://github.com/tobymao/sqlglot/pull/6425) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake REGR_SLOPE function (#6425) - due to [`1d847f0`](https://github.com/tobymao/sqlglot/commit/1d847f0a1f88fce5df340ab646a72c8abbc12a86) - parse & annotate `CHECK_JSON`, `CHECK_XML` *(PR [#6439](https://github.com/tobymao/sqlglot/pull/6439) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: parse & annotate `CHECK_JSON`, `CHECK_XML` (#6439) - due to [`cb3080d`](https://github.com/tobymao/sqlglot/commit/cb3080d4bed18b1bfbbd08380ed60deeefd15530) - annotation support for APPROX_TOP_K_ESTIMATE . Return type ARRAY *(PR [#6445](https://github.com/tobymao/sqlglot/pull/6445) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotation support for APPROX_TOP_K_ESTIMATE . Return type ARRAY (#6445) - due to [`313afe5`](https://github.com/tobymao/sqlglot/commit/313afe540aa2cdc4cc179c4852c6ef37362bcb3e) - annotate type for snowflake func ARRAY_UNION_AGG *(PR [#6446](https://github.com/tobymao/sqlglot/pull/6446) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for snowflake func ARRAY_UNION_AGG (#6446) - due to [`cd9f037`](https://github.com/tobymao/sqlglot/commit/cd9f037882eef253e86fdb1d51521e0acd7db3f9) - store pk name if provided *(PR [#6424](https://github.com/tobymao/sqlglot/pull/6424) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: store pk name if provided (#6424) - due to [`65194e4`](https://github.com/tobymao/sqlglot/commit/65194e465489151aa51859a6e3f5672f7d4c5f3b) - Annotate type for snowflake RANDSTR function *(PR [#6436](https://github.com/tobymao/sqlglot/pull/6436) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake RANDSTR function (#6436) - due to [`a56262e`](https://github.com/tobymao/sqlglot/commit/a56262e6b4276baae144855478807c173db77ab9) - Annotate type for snowflake MEDIAN *(PR [#6426](https://github.com/tobymao/sqlglot/pull/6426) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Annotate type for snowflake MEDIAN (#6426) - due to [`2c56567`](https://github.com/tobymao/sqlglot/commit/2c56567755c8a6571d8b7d410c9de943e54df58b) - Annotate type for snowflake SEARCH_IP *(PR [#6440](https://github.com/tobymao/sqlglot/pull/6440) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Annotate type for snowflake SEARCH_IP (#6440) - due to [`ac86568`](https://github.com/tobymao/sqlglot/commit/ac86568a939f692b99813da100297b61fb54e044) - Added decfloat type *(PR [#6444](https://github.com/tobymao/sqlglot/pull/6444) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))*: Added decfloat type (#6444) - due to [`b321ca6`](https://github.com/tobymao/sqlglot/commit/b321ca6191fefc88da1a6de83a465886b5754b7a) - bump sqlglotrs to 0.8.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.8.0 ### :sparkles: New Features - [`ca81217`](https://github.com/tobymao/sqlglot/commit/ca812171ab800e3faa73ea1874dd6814c8d6f701) - **duckdb**: Transpile INITCAP with custom delimiters *(PR [#6302](https://github.com/tobymao/sqlglot/pull/6302) by [@treysp](https://github.com/treysp))* - [`7484c06`](https://github.com/tobymao/sqlglot/commit/7484c06be4534cd22dee14da542d5e29ff2c13a2) - **DuckDB**: Support rounding mode argument for ROUND function *(PR [#6350](https://github.com/tobymao/sqlglot/pull/6350) by [@vchan](https://github.com/vchan))* - [`79e314d`](https://github.com/tobymao/sqlglot/commit/79e314df76161319ba8495b95f54603cfef0c08a) - **duckdb**: handle casting BLOB input for TRIM() *(PR [#6353](https://github.com/tobymao/sqlglot/pull/6353) by [@toriwei](https://github.com/toriwei))* - [`c495a40`](https://github.com/tobymao/sqlglot/commit/c495a40ee4c1a69b14892e8455ae1bd2ceb5ea4f) - **optimizer**: annotate type for MINHASH *(PR [#6355](https://github.com/tobymao/sqlglot/pull/6355) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`f16f8a0`](https://github.com/tobymao/sqlglot/commit/f16f8a08072556fd617b5125300262d9bb8c1e48) - improve validate qualify column message closes [#6348](https://github.com/tobymao/sqlglot/pull/6348) *(PR [#6356](https://github.com/tobymao/sqlglot/pull/6356) by [@tobymao](https://github.com/tobymao))* - [`17abe23`](https://github.com/tobymao/sqlglot/commit/17abe231bc4d59912952f266ad4df86ece22c8d2) - make simplify more efficient in number of iterations *(PR [#6351](https://github.com/tobymao/sqlglot/pull/6351) by [@tobymao](https://github.com/tobymao))* - [`b1f9a97`](https://github.com/tobymao/sqlglot/commit/b1f9a976be3c0bcd895bef5bcdb95a013eeb28b7) - **optimizer**: annotate type for APPROXIMATE_SIMILARITY *(PR [#6360](https://github.com/tobymao/sqlglot/pull/6360) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`3aafca7`](https://github.com/tobymao/sqlglot/commit/3aafca74546b932cea93ed830c021f347ae03ded) - **optimizer**: annotate type for MINHASH_COMBINE *(PR [#6362](https://github.com/tobymao/sqlglot/pull/6362) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`09a4bd8`](https://github.com/tobymao/sqlglot/commit/09a4bd8870a075e641c6e3e4cee74d73a39e760a) - Trigger integration tests *(PR [#6339](https://github.com/tobymao/sqlglot/pull/6339) by [@erindru](https://github.com/erindru))* - [`7769129`](https://github.com/tobymao/sqlglot/commit/7769129eba7ae5f3594e0061bdb1079fedc5aafd) - bignum and time_ns to duckdb closes [#6379](https://github.com/tobymao/sqlglot/pull/6379) *(commit by [@tobymao](https://github.com/tobymao))* - [`90a3fa9`](https://github.com/tobymao/sqlglot/commit/90a3fa9f6ddf0aa32b41118c59d4facd9fdb3398) - mark IgnoreNulls and RespectNulls as unsupported on postgres and mysql *(PR [#6377](https://github.com/tobymao/sqlglot/pull/6377) by [@NickCrews](https://github.com/NickCrews))* - :arrow_lower_right: *addresses issue [#6376](https://github.com/tobymao/sqlglot/issues/6376) opened by [@NickCrews](https://github.com/NickCrews)* - [`5bb1170`](https://github.com/tobymao/sqlglot/commit/5bb117082caeee719442d783ce6742d027b1492e) - transpile bigquery `greatest` null handling to duckdb *(PR [#6361](https://github.com/tobymao/sqlglot/pull/6361) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`bf07abd`](https://github.com/tobymao/sqlglot/commit/bf07abd4ee9eb0f5510cb7d1f232bdcaea88941e) - **snowflake**: annotation support for APPROX_TOP_K_COMBINE *(PR [#6378](https://github.com/tobymao/sqlglot/pull/6378) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`01890eb`](https://github.com/tobymao/sqlglot/commit/01890eb16d6624de4f26b7d8eadf850df6f2a042) - **trino**: support refresh materialized view statement closes [#6387](https://github.com/tobymao/sqlglot/pull/6387) *(PR [#6388](https://github.com/tobymao/sqlglot/pull/6388) by [@georgesittas](https://github.com/georgesittas))* - [`e4ea6cc`](https://github.com/tobymao/sqlglot/commit/e4ea6ccf08c0ff4063424bf538bc3b22f4b4cfaf) - transpile BQ APPROX_QUANTILES to DuckDB *(PR [#6349](https://github.com/tobymao/sqlglot/pull/6349) by [@treysp](https://github.com/treysp))* - [`95727f6`](https://github.com/tobymao/sqlglot/commit/95727f60d601796b34c850dee9366d79f6e4a24b) - **optimizer**: canonicalize table aliases *(PR [#6369](https://github.com/tobymao/sqlglot/pull/6369) by [@georgesittas](https://github.com/georgesittas))* - [`3b6855b`](https://github.com/tobymao/sqlglot/commit/3b6855b9787111f27225108241fbe4f389443e29) - **mysql**: support ZEROFILL column attribute *(PR [#6400](https://github.com/tobymao/sqlglot/pull/6400) by [@nian0114](https://github.com/nian0114))* - :arrow_lower_right: *addresses issue [#6399](https://github.com/tobymao/sqlglot/issues/6399) opened by [@nian0114](https://github.com/nian0114)* - [`bb4eda1`](https://github.com/tobymao/sqlglot/commit/bb4eda1beb68b92de9ab014a63c67797a07df2fa) - **duckdb**: support transpiling SHA1 from BigQuery to DuckDB *(PR [#6404](https://github.com/tobymao/sqlglot/pull/6404) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`05e83b5`](https://github.com/tobymao/sqlglot/commit/05e83b56f1bf9323cfa819a7f1beb542524c1219) - **duckdb**: support transpilation of LEAST from BigQuery to DuckDB *(PR [#6415](https://github.com/tobymao/sqlglot/pull/6415) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`38472ce`](https://github.com/tobymao/sqlglot/commit/38472ce14bce731ba4c309d515223ae99e2575ac) - **duckdb**: transpile bigquery's %x format literal *(PR [#6375](https://github.com/tobymao/sqlglot/pull/6375) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`a6e1581`](https://github.com/tobymao/sqlglot/commit/a6e15811cf5643bcc18e1e227fea20922b05c54a) - **DuckDB**: Cast BIGNUMERIC and BIGDECIMAL types to DECIMAL(38, 5) *(PR [#6419](https://github.com/tobymao/sqlglot/pull/6419) by [@vchan](https://github.com/vchan))* - [`0b9d8ac`](https://github.com/tobymao/sqlglot/commit/0b9d8acbe75457424436e8c0acc047ab66e9fdc0) - **snowflake**: Annotate type for snowflake MAX function *(PR [#6422](https://github.com/tobymao/sqlglot/pull/6422) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`68e9414`](https://github.com/tobymao/sqlglot/commit/68e9414725a60b2842d870fa222d8466057a94f6) - **snowflake**: Annotate type for snowflake MIN function *(PR [#6427](https://github.com/tobymao/sqlglot/pull/6427) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`1318de7`](https://github.com/tobymao/sqlglot/commit/1318de77a8aa514ec7eb9f9b8c03228e3f8eb008) - **snowflake**: Annotate type for snowflake NORMAL *(PR [#6434](https://github.com/tobymao/sqlglot/pull/6434) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ffbb5c7`](https://github.com/tobymao/sqlglot/commit/ffbb5c7e40aa064ffcd4827e96ea66cfd045118e) - **snowflake**: annotate type for HASH_AGG in Snowflake *(PR [#6438](https://github.com/tobymao/sqlglot/pull/6438) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`161255f`](https://github.com/tobymao/sqlglot/commit/161255f6c90b9c3ed2074e734f6d074db1d7a6dd) - Add support for `LOCALTIME` function *(PR [#6443](https://github.com/tobymao/sqlglot/pull/6443) by [@VaggelisD](https://github.com/VaggelisD))* - [`ca329f0`](https://github.com/tobymao/sqlglot/commit/ca329f037a230c315437d830638b514190764c5a) - **duckdb**: support transpilation of SHA256 from bigquery to duckdb *(PR [#6421](https://github.com/tobymao/sqlglot/pull/6421) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`e18ae24`](https://github.com/tobymao/sqlglot/commit/e18ae248423dbbca78a24a60ea0193da2ee7f68c) - **snowflake**: Annotate type for snowflake REGR_SLOPE function *(PR [#6425](https://github.com/tobymao/sqlglot/pull/6425) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`1d847f0`](https://github.com/tobymao/sqlglot/commit/1d847f0a1f88fce5df340ab646a72c8abbc12a86) - **snowflake**: parse & annotate `CHECK_JSON`, `CHECK_XML` *(PR [#6439](https://github.com/tobymao/sqlglot/pull/6439) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`6843812`](https://github.com/tobymao/sqlglot/commit/68438129ceeea70f801e0ae728c51c19291fc7d8) - add correlation id to remote workflow trigger *(PR [#6441](https://github.com/tobymao/sqlglot/pull/6441) by [@erindru](https://github.com/erindru))* - [`cb3080d`](https://github.com/tobymao/sqlglot/commit/cb3080d4bed18b1bfbbd08380ed60deeefd15530) - **snowflake**: annotation support for APPROX_TOP_K_ESTIMATE . Return type ARRAY *(PR [#6445](https://github.com/tobymao/sqlglot/pull/6445) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`cd9f037`](https://github.com/tobymao/sqlglot/commit/cd9f037882eef253e86fdb1d51521e0acd7db3f9) - **optimizer**: store pk name if provided *(PR [#6424](https://github.com/tobymao/sqlglot/pull/6424) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`65194e4`](https://github.com/tobymao/sqlglot/commit/65194e465489151aa51859a6e3f5672f7d4c5f3b) - **snowflake**: Annotate type for snowflake RANDSTR function *(PR [#6436](https://github.com/tobymao/sqlglot/pull/6436) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`351d783`](https://github.com/tobymao/sqlglot/commit/351d7834915e02a9f4949f9925437e2731f3a8b4) - add support for LOCALTIMESTAMP *(PR [#6448](https://github.com/tobymao/sqlglot/pull/6448) by [@AbhishekASLK](https://github.com/AbhishekASLK))* - [`a56262e`](https://github.com/tobymao/sqlglot/commit/a56262e6b4276baae144855478807c173db77ab9) - **snowflake**: Annotate type for snowflake MEDIAN *(PR [#6426](https://github.com/tobymao/sqlglot/pull/6426) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`2c56567`](https://github.com/tobymao/sqlglot/commit/2c56567755c8a6571d8b7d410c9de943e54df58b) - **snowflake**: Annotate type for snowflake SEARCH_IP *(PR [#6440](https://github.com/tobymao/sqlglot/pull/6440) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* - [`ac86568`](https://github.com/tobymao/sqlglot/commit/ac86568a939f692b99813da100297b61fb54e044) - **snowflake**: Added decfloat type *(PR [#6444](https://github.com/tobymao/sqlglot/pull/6444) by [@fivetran-kwoodbeck](https://github.com/fivetran-kwoodbeck))* ### :bug: Bug Fixes - [`0f79f2a`](https://github.com/tobymao/sqlglot/commit/0f79f2a55c4ba14d4a5fcfd01a0a727271992b8c) - **snowflake**: MAX_BY and MIN_BY with count should return plain `ARRAY` *(PR [#6343](https://github.com/tobymao/sqlglot/pull/6343) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`e1b6558`](https://github.com/tobymao/sqlglot/commit/e1b6558cb1a860bbd695f25b66e52064b57c0a84) - **tsql**: handle all datepart alternatives *(PR [#6324](https://github.com/tobymao/sqlglot/pull/6324) by [@lBilali](https://github.com/lBilali))* - [`06daa47`](https://github.com/tobymao/sqlglot/commit/06daa47dedebac672548e1db230b89f5c9eae84e) - **optimizer**: update annotated type of ARRAY_AGG to untyped array *(PR [#6347](https://github.com/tobymao/sqlglot/pull/6347) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`826db4d`](https://github.com/tobymao/sqlglot/commit/826db4d3c413941e3b0b31e1f907fabd017bd461) - **redshift**: properly parse default IAM_ROLE and AVRO/JSON formats in COPY *(PR [#6346](https://github.com/tobymao/sqlglot/pull/6346) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6345](https://github.com/tobymao/sqlglot/issues/6345) opened by [@zachary-povey](https://github.com/zachary-povey)* - [`c367bac`](https://github.com/tobymao/sqlglot/commit/c367bac878a3c17773009b54b9836e7b9a5b84fe) - **duckdb**: Support update without set in DuckDB merge when matched *(PR [#6357](https://github.com/tobymao/sqlglot/pull/6357) by [@themisvaltinos](https://github.com/themisvaltinos))* - [`df13a65`](https://github.com/tobymao/sqlglot/commit/df13a655646bd2ef5d8b4613670bb5fe48845b73) - unnest deep stuff *(PR [#6366](https://github.com/tobymao/sqlglot/pull/6366) by [@tobymao](https://github.com/tobymao))* - [`20e33fd`](https://github.com/tobymao/sqlglot/commit/20e33fd0d1bc1899727d023411e604f1ea9347b8) - **duckdb**: regexp_extract_all closes [#6380](https://github.com/tobymao/sqlglot/pull/6380) *(commit by [@tobymao](https://github.com/tobymao))* - [`d4c2256`](https://github.com/tobymao/sqlglot/commit/d4c2256fb493ed2f16c29694ae5c31517123d419) - **parser**: at time zone precedence *(PR [#6383](https://github.com/tobymao/sqlglot/pull/6383) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6359](https://github.com/tobymao/sqlglot/issues/6359) opened by [@parth-wisdom](https://github.com/parth-wisdom)* - [`4fb4d08`](https://github.com/tobymao/sqlglot/commit/4fb4d08ef8896bda434d4f89c21c669c6146fd02) - **oracle**: properly support table alias in the `INSERT` DML *(PR [#6374](https://github.com/tobymao/sqlglot/pull/6374) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#6371](https://github.com/tobymao/sqlglot/issues/6371) opened by [@snovik75](https://github.com/snovik75)* - [`2169f5b`](https://github.com/tobymao/sqlglot/commit/2169f5b8f30b6c8be1635bb5648a1abf636e49a6) - **parser**: support SET with := *(PR [#6385](https://github.com/tobymao/sqlglot/pull/6385) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6384](https://github.com/tobymao/sqlglot/issues/6384) opened by [@AndyVW77](https://github.com/AndyVW77)* - [`50348ac`](https://github.com/tobymao/sqlglot/commit/50348ac31f784aa97bd09d5d6c6613fbd68402ee) - **mysql**: support order by clause for mysql delete statement *(PR [#6381](https://github.com/tobymao/sqlglot/pull/6381) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - :arrow_lower_right: *fixes issue [#6372](https://github.com/tobymao/sqlglot/issues/6372) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`21d3859`](https://github.com/tobymao/sqlglot/commit/21d38590fec6cb55a1a03aeb2621bd9fca677496) - **bigquery**: Disable STRING_AGG sep canonicalization *(PR [#6395](https://github.com/tobymao/sqlglot/pull/6395) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6392](https://github.com/tobymao/sqlglot/issues/6392) opened by [@erindru](https://github.com/erindru)* - [`67f499d`](https://github.com/tobymao/sqlglot/commit/67f499dd497efdf4f3fc49dd75e49a77e036ee63) - **duckdb**: Make exp.DateFromParts more lenient *(PR [#6397](https://github.com/tobymao/sqlglot/pull/6397) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6394](https://github.com/tobymao/sqlglot/issues/6394) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`39f8c37`](https://github.com/tobymao/sqlglot/commit/39f8c37aca755d97e1e41f232042d1c649e58908) - **parser**: support FROM-syntax with joins *(PR [#6402](https://github.com/tobymao/sqlglot/pull/6402) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6396](https://github.com/tobymao/sqlglot/issues/6396) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`9ddae4d`](https://github.com/tobymao/sqlglot/commit/9ddae4d56d1e3a15fc3b4b76ce3b3040683c220f) - **duckdb**: support IN with no paren *(PR [#6409](https://github.com/tobymao/sqlglot/pull/6409) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6407](https://github.com/tobymao/sqlglot/issues/6407) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`c7cb098`](https://github.com/tobymao/sqlglot/commit/c7cb0983a0fa463c43d2c4ee925816e9a1628c79) - **tokenizer**: Fix underscore separator with scientific notation *(PR [#6401](https://github.com/tobymao/sqlglot/pull/6401) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6393](https://github.com/tobymao/sqlglot/issues/6393) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`f5635d2`](https://github.com/tobymao/sqlglot/commit/f5635d2cc2a5612d6403bbf508b545f2a4e8f773) - **duckdb**: splice with col named after type closes [#6411](https://github.com/tobymao/sqlglot/pull/6411) *(commit by [@tobymao](https://github.com/tobymao))* - [`097d865`](https://github.com/tobymao/sqlglot/commit/097d865554d9ba2e226962fa71778ae0a6c596cb) - **duckdb**: pivot using cast closes [#6410](https://github.com/tobymao/sqlglot/pull/6410) *(commit by [@tobymao](https://github.com/tobymao))* - [`d038ad7`](https://github.com/tobymao/sqlglot/commit/d038ad7f036a140f3eae4bdde15824437d4e44ee) - **mysql**: support named primary keys for mysql *(PR [#6389](https://github.com/tobymao/sqlglot/pull/6389) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - :arrow_lower_right: *fixes issue [#6382](https://github.com/tobymao/sqlglot/issues/6382) opened by [@AndyVW77](https://github.com/AndyVW77)* - [`4f3bb0d`](https://github.com/tobymao/sqlglot/commit/4f3bb0d6714bf89ff72e13e1398d8f01cefafb00) - **DuckDB**: Correct transpilation of BigQuery's JSON_EXTRACT_SCALAR… *(PR [#6414](https://github.com/tobymao/sqlglot/pull/6414) by [@vchan](https://github.com/vchan))* - [`e2f306f`](https://github.com/tobymao/sqlglot/commit/e2f306f1893a3f565cbbf7857ffd9795850aba7b) - interval column ops closes [#6416](https://github.com/tobymao/sqlglot/pull/6416) *(commit by [@tobymao](https://github.com/tobymao))* - [`8c314a8`](https://github.com/tobymao/sqlglot/commit/8c314a8b457a5c3ed470ac8fcff022fec881c248) - **duckdb**: support cte pivot for duckdb *(PR [#6413](https://github.com/tobymao/sqlglot/pull/6413) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - :arrow_lower_right: *fixes issue [#6405](https://github.com/tobymao/sqlglot/issues/6405) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`92ee124`](https://github.com/tobymao/sqlglot/commit/92ee1241ea3088d4e63c094404252339c54ad0c1) - **optimizer**: postgres qualify GENERATE_SERIES and table projection *(PR [#6373](https://github.com/tobymao/sqlglot/pull/6373) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6358](https://github.com/tobymao/sqlglot/issues/6358) opened by [@metahexane](https://github.com/metahexane)* ### :recycle: Refactors - [`e4d1a4f`](https://github.com/tobymao/sqlglot/commit/e4d1a4fcd6741d679c5444bf023077d2aaa8f980) - **exasol**: map date/timestamp `TRUNC` to `DATE_TRUNC` *(PR [#6328](https://github.com/tobymao/sqlglot/pull/6328) by [@nnamdi16](https://github.com/nnamdi16))* - [`c6b0a63`](https://github.com/tobymao/sqlglot/commit/c6b0a6342a21d79635a26d40001c916d05d47cf7) - change version to be a tuple so that it can be pickled, also simpler *(commit by [@tobymao](https://github.com/tobymao))* - [`625654a`](https://github.com/tobymao/sqlglot/commit/625654a9623cc5407bfde922c29f32a8ee905a3b) - move resolver to own file *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`487d218`](https://github.com/tobymao/sqlglot/commit/487d218a6fcad4e28c65c6df55435ba218826186) - iterative annotate types *(PR [#6342](https://github.com/tobymao/sqlglot/pull/6342) by [@geooo109](https://github.com/geooo109))* - [`8201062`](https://github.com/tobymao/sqlglot/commit/8201062ac41b85e5a89aa8e1c5973852f105c66e) - clean up derived table traversal in table qualification *(PR [#6363](https://github.com/tobymao/sqlglot/pull/6363) by [@georgesittas](https://github.com/georgesittas))* - [`6b7084d`](https://github.com/tobymao/sqlglot/commit/6b7084d0c9f4735432afc12509c77c286cc50513) - **optimizer**: refactor costly scope walking loop in qualify tables *(PR [#6364](https://github.com/tobymao/sqlglot/pull/6364) by [@georgesittas](https://github.com/georgesittas))* - [`0319241`](https://github.com/tobymao/sqlglot/commit/0319241162bbe6d278a626100eac73999b250968) - **mysql,postgres**: tests for unsupported IGNORE/RESPECT NULLS *(PR [#6386](https://github.com/tobymao/sqlglot/pull/6386) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#6376](https://github.com/tobymao/sqlglot/issues/6376) opened by [@NickCrews](https://github.com/NickCrews)* - [`11354cc`](https://github.com/tobymao/sqlglot/commit/11354cc85d116cd24c28114a437111965ba828a9) - Make integration test workflow more robust *(PR [#6403](https://github.com/tobymao/sqlglot/pull/6403) by [@erindru](https://github.com/erindru))* - [`f758cea`](https://github.com/tobymao/sqlglot/commit/f758cea0e9fca5850895a730c554c17b488d29ca) - **exasol**: transformed rank function, ignoring parameters *(PR [#6408](https://github.com/tobymao/sqlglot/pull/6408) by [@nnamdi16](https://github.com/nnamdi16))* - [`07d9958`](https://github.com/tobymao/sqlglot/commit/07d99583b4aebdc682bb7604ccdf45bddb89f9c3) - **optimizer**: replace direct comparison with dialect properties *(PR [#6398](https://github.com/tobymao/sqlglot/pull/6398) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`137549e`](https://github.com/tobymao/sqlglot/commit/137549e5e803416d46e13e9a8123cef9b53d349a) - **exasol**: transform substring_index using substr and instr *(PR [#6406](https://github.com/tobymao/sqlglot/pull/6406) by [@nnamdi16](https://github.com/nnamdi16))* - [`78f1824`](https://github.com/tobymao/sqlglot/commit/78f1824c790f523845cbda488ecf4c43a92ac0f0) - **exasol**: transform substring_index using substr and instr *(PR [#6406](https://github.com/tobymao/sqlglot/pull/6406) by [@nnamdi16](https://github.com/nnamdi16))* - [`39cc555`](https://github.com/tobymao/sqlglot/commit/39cc55586ed76a4a583e6db22a9ee51e09bff92e) - **snowflake**: annotate type for COUNT *(PR [#6437](https://github.com/tobymao/sqlglot/pull/6437) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`61f39ba`](https://github.com/tobymao/sqlglot/commit/61f39bab9a0668c338e8c1b5e0fa953f22c0a886) - **optimizer**: improve error message for ambiguous columns *(PR [#6423](https://github.com/tobymao/sqlglot/pull/6423) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`313afe5`](https://github.com/tobymao/sqlglot/commit/313afe540aa2cdc4cc179c4852c6ef37362bcb3e) - **optimizer**: annotate type for snowflake func ARRAY_UNION_AGG *(PR [#6446](https://github.com/tobymao/sqlglot/pull/6446) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`b321ca6`](https://github.com/tobymao/sqlglot/commit/b321ca6191fefc88da1a6de83a465886b5754b7a) - bump sqlglotrs to 0.8.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v28.0.0] - 2025-11-17 ### :boom: BREAKING CHANGES - due to [`39d8e19`](https://github.com/tobymao/sqlglot/commit/39d8e19419c2adbb80465be414d1cc3bbc6d007b) - include VARIABLE kind in SET transpilation to DuckDB *(PR [#6201](https://github.com/tobymao/sqlglot/pull/6201) by [@toriwei](https://github.com/toriwei))*: include VARIABLE kind in SET transpilation to DuckDB (#6201) - due to [`e7ddad1`](https://github.com/tobymao/sqlglot/commit/e7ddad10b5edf9b801d2151e3e5fca448754df0d) - ensure `NULL` coerces into any type *(PR [#6211](https://github.com/tobymao/sqlglot/pull/6211) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: ensure `NULL` coerces into any type (#6211) - due to [`0037266`](https://github.com/tobymao/sqlglot/commit/00372664bf6acf2b0fff9ad4b206b597ef5378f7) - annotate types for GETBIT *(PR [#6219](https://github.com/tobymao/sqlglot/pull/6219) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for GETBIT (#6219) - due to [`a5458ce`](https://github.com/tobymao/sqlglot/commit/a5458ceca3bc239fb611791e38020632dd0824c8) - add type annotation for DECODE function support *(PR [#6199](https://github.com/tobymao/sqlglot/pull/6199) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for DECODE function support (#6199) - due to [`417f1e8`](https://github.com/tobymao/sqlglot/commit/417f1e8ee50fb8f4377fad261660ffbd7444a429) - annotate types for BITNOT *(PR [#6234](https://github.com/tobymao/sqlglot/pull/6234) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITNOT (#6234) - due to [`fe8ab40`](https://github.com/tobymao/sqlglot/commit/fe8ab40e8e0559201e0b1896a6f1a8fb6b5b932d) - 1st-class parsing support for BITAND, BIT_AND, BIT_NOT *(PR [#6243](https://github.com/tobymao/sqlglot/pull/6243) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: 1st-class parsing support for BITAND, BIT_AND, BIT_NOT (#6243) - due to [`5ae3c47`](https://github.com/tobymao/sqlglot/commit/5ae3c47b1c6993b87341472c08714f4a0f738168) - add type annotation for GROUPING() function *(PR [#6244](https://github.com/tobymao/sqlglot/pull/6244) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for GROUPING() function (#6244) - due to [`4133265`](https://github.com/tobymao/sqlglot/commit/413326514507ef06537dcc3d4b80a3fcbcd26f66) - parse `has` function into an `ArrayContains` expression *(PR [#6245](https://github.com/tobymao/sqlglot/pull/6245) by [@joeyutong](https://github.com/joeyutong))*: parse `has` function into an `ArrayContains` expression (#6245) - due to [`cdd45b9`](https://github.com/tobymao/sqlglot/commit/cdd45b949fd1eefb147053424279b56b8effcbcf) - annotate types for GROUPING_ID function. *(PR [#6249](https://github.com/tobymao/sqlglot/pull/6249) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotate types for GROUPING_ID function. (#6249) - due to [`080ff3b`](https://github.com/tobymao/sqlglot/commit/080ff3bd93b36291d5bb0092d722f8307f0ae082) - annotate types for BITAND_AGG *(PR [#6248](https://github.com/tobymao/sqlglot/pull/6248) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITAND_AGG (#6248) - due to [`87a818a`](https://github.com/tobymao/sqlglot/commit/87a818a899f61a675c22c697f468b3f6f7e2787f) - annotate types for BITOR_AGG *(PR [#6251](https://github.com/tobymao/sqlglot/pull/6251) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITOR_AGG (#6251) - due to [`4c4189b`](https://github.com/tobymao/sqlglot/commit/4c4189b4083d272a6e678d83b5c567a2e9c0d672) - Transpile CONCAT function to double pipe operators when source … *(PR [#6241](https://github.com/tobymao/sqlglot/pull/6241) by [@vchan](https://github.com/vchan))*: Transpile CONCAT function to double pipe operators when source … (#6241) - due to [`a1b884d`](https://github.com/tobymao/sqlglot/commit/a1b884dc9ddfd2185de48cc9451a39f152879d39) - annotate types for BITXOR_AGG *(PR [#6253](https://github.com/tobymao/sqlglot/pull/6253) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BITXOR_AGG (#6253) - due to [`fc78d20`](https://github.com/tobymao/sqlglot/commit/fc78d2016d8f7d20c094df791f746de323cd3639) - Unwrap subqueries without modifiers *(PR [#6247](https://github.com/tobymao/sqlglot/pull/6247) by [@VaggelisD](https://github.com/VaggelisD))*: Unwrap subqueries without modifiers (#6247) - due to [`ad2ad23`](https://github.com/tobymao/sqlglot/commit/ad2ad234b5a508040dce4f3920439be052742573) - add missing return type mapping for MAX_BY and MAX_BY function *(PR [#6250](https://github.com/tobymao/sqlglot/pull/6250) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add missing return type mapping for MAX_BY and MAX_BY function (#6250) - due to [`39c1d81`](https://github.com/tobymao/sqlglot/commit/39c1d81174f2390b6b0c9dd14c0e550ad452a1df) - annotate types for BOOLXOR_AGG *(PR [#6261](https://github.com/tobymao/sqlglot/pull/6261) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BOOLXOR_AGG (#6261) - due to [`71590d2`](https://github.com/tobymao/sqlglot/commit/71590d22cdb05594e2173a1500f763dc1a32a81d) - add type annotation for SKEW function. *(PR [#6262](https://github.com/tobymao/sqlglot/pull/6262) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for SKEW function. (#6262) - due to [`5fd366d`](https://github.com/tobymao/sqlglot/commit/5fd366d9e6f7b3f1eb7a9cf41975cf13ce890ffe) - annotate types for OBJECT_AGG *(PR [#6265](https://github.com/tobymao/sqlglot/pull/6265) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for OBJECT_AGG (#6265) - due to [`00abc39`](https://github.com/tobymao/sqlglot/commit/00abc393c9042e839457c5a6582e95cdb74356f3) - handle casting for bytestrings *(PR [#6252](https://github.com/tobymao/sqlglot/pull/6252) by [@toriwei](https://github.com/toriwei))*: handle casting for bytestrings (#6252) - due to [`3dae0fb`](https://github.com/tobymao/sqlglot/commit/3dae0fbb528762e5d5fd446350d42e9c841e2959) - Support position and occurrence args for REGEXP_EXTRACT *(PR [#6266](https://github.com/tobymao/sqlglot/pull/6266) by [@vchan](https://github.com/vchan))*: Support position and occurrence args for REGEXP_EXTRACT (#6266) - due to [`ddea61d`](https://github.com/tobymao/sqlglot/commit/ddea61d83f6699c97cc7b25aabe01a138138bdb1) - simplify connector complements only for non-null operands *(PR [#6214](https://github.com/tobymao/sqlglot/pull/6214) by [@geooo109](https://github.com/geooo109))*: simplify connector complements only for non-null operands (#6214) - due to [`771732d`](https://github.com/tobymao/sqlglot/commit/771732d81459cc576f11eccc49794f33e62d14af) - annotate types for REGR_AVGY *(PR [#6271](https://github.com/tobymao/sqlglot/pull/6271) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for REGR_AVGY (#6271) - due to [`8470be0`](https://github.com/tobymao/sqlglot/commit/8470be00731a4d79518a533a5f7ba884fa2f047e) - add type annotation for BITMAP_COUNT function. *(PR [#6274](https://github.com/tobymao/sqlglot/pull/6274) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for BITMAP_COUNT function. (#6274) - due to [`98f25f9`](https://github.com/tobymao/sqlglot/commit/98f25f92cc1175ac7b2118a5a342db82adade13a) - support splitBy function *(PR [#6278](https://github.com/tobymao/sqlglot/pull/6278) by [@joeyutong](https://github.com/joeyutong))*: support splitBy function (#6278) - due to [`fabbf05`](https://github.com/tobymao/sqlglot/commit/fabbf057aba88f30205767d8c339727de45991c8) - Add support for shorthand struct array literals in duckDB. *(PR [#6233](https://github.com/tobymao/sqlglot/pull/6233) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add support for shorthand struct array literals in duckDB. (#6233) - due to [`c02b64c`](https://github.com/tobymao/sqlglot/commit/c02b64c3524dd074c2108baaca668ab2607ac843) - Handle pseudocolumns differently than columns *(PR [#6273](https://github.com/tobymao/sqlglot/pull/6273) by [@VaggelisD](https://github.com/VaggelisD))*: Handle pseudocolumns differently than columns (#6273) - due to [`05c5181`](https://github.com/tobymao/sqlglot/commit/05c5181b36a7ada32b96fc91bdfbf73b38a1a408) - refactor `Connector` simplification to factor in types *(PR [#6152](https://github.com/tobymao/sqlglot/pull/6152) by [@geooo109](https://github.com/geooo109))*: refactor `Connector` simplification to factor in types (#6152) - due to [`9c1a222`](https://github.com/tobymao/sqlglot/commit/9c1a2221b0327ba6848542c7b906e92f25a05bea) - add type annotation for BITMAP_CONSTRUCT_AGG function. *(PR [#6285](https://github.com/tobymao/sqlglot/pull/6285) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add type annotation for BITMAP_CONSTRUCT_AGG function. (#6285) - due to [`cb0bcff`](https://github.com/tobymao/sqlglot/commit/cb0bcff310e9acdf806fc98e99cb9938b747c771) - cast UUID() output to varchar when source dialect UUID() returns string *(PR [#6284](https://github.com/tobymao/sqlglot/pull/6284) by [@toriwei](https://github.com/toriwei))*: cast UUID() output to varchar when source dialect UUID() returns string (#6284) - due to [`358105d`](https://github.com/tobymao/sqlglot/commit/358105d1296c7425e071ccf3189a31a02c00c923) - type annotation for BITMAP_BIT_POSITION function *(PR [#6301](https://github.com/tobymao/sqlglot/pull/6301) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: type annotation for BITMAP_BIT_POSITION function (#6301) - due to [`4ee7a50`](https://github.com/tobymao/sqlglot/commit/4ee7a500cc460b6f6a1ed103a12dca72e6d01c18) - type inference for BITMAP_OR_AGG *(PR [#6297](https://github.com/tobymao/sqlglot/pull/6297) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: type inference for BITMAP_OR_AGG (#6297) - due to [`fcd537d`](https://github.com/tobymao/sqlglot/commit/fcd537de2c993ad0bd18acd84dbae354165f7d3f) - conflict resolution. type annotation for BITMAP_BUCKET_NUMBER function. Tests added all dialects that support BITMAP_BUCKET_NUMBER *(PR [#6299](https://github.com/tobymao/sqlglot/pull/6299) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: conflict resolution. type annotation for BITMAP_BUCKET_NUMBER function. Tests added all dialects that support BITMAP_BUCKET_NUMBER (#6299) - due to [`3dffd59`](https://github.com/tobymao/sqlglot/commit/3dffd598496a9f2d94caec9d7f3dcb9791c94019) - annotate types for PERCENTILE_DISC and WithinGroup *(PR [#6300](https://github.com/tobymao/sqlglot/pull/6300) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for PERCENTILE_DISC and WithinGroup (#6300) - due to [`f9287f7`](https://github.com/tobymao/sqlglot/commit/f9287f7d596a6d8a1e1cd2c48978a4dec77a96cb) - robust deduplication of connectors *(PR [#6296](https://github.com/tobymao/sqlglot/pull/6296) by [@geooo109](https://github.com/geooo109))*: robust deduplication of connectors (#6296) - due to [`ea0ea79`](https://github.com/tobymao/sqlglot/commit/ea0ea79c1c611b62c79f82f744fe0c98803598a3) - Parse `LIKE` functions *(PR [#6314](https://github.com/tobymao/sqlglot/pull/6314) by [@VaggelisD](https://github.com/VaggelisD))*: Parse `LIKE` functions (#6314) - due to [`e903883`](https://github.com/tobymao/sqlglot/commit/e90388328fcf5b8061c99e325b87d5beb0046ffc) - type annotation for APPROX_TOP_K_ACCUMULATE functio… *(PR [#6309](https://github.com/tobymao/sqlglot/pull/6309) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: type annotation for APPROX_TOP_K_ACCUMULATE functio… (#6309) - due to [`d3fefad`](https://github.com/tobymao/sqlglot/commit/d3fefad80d25ff5a6dd02426667ff0ea8478a1b2) - support `DATEDIFF_BIG` *(PR [#6323](https://github.com/tobymao/sqlglot/pull/6323) by [@lBilali](https://github.com/lBilali))*: support `DATEDIFF_BIG` (#6323) - due to [`21d1468`](https://github.com/tobymao/sqlglot/commit/21d1468377b9c8ad48c6cca1ae3b3744a807c29e) - annotate type for APPROX_TOP_K *(PR [#6286](https://github.com/tobymao/sqlglot/pull/6286) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for APPROX_TOP_K (#6286) - due to [`85ddcc5`](https://github.com/tobymao/sqlglot/commit/85ddcc5eca22ac726582de454f2f12b9d4877634) - Do not normalize JSON fields in dot notation *(PR [#6320](https://github.com/tobymao/sqlglot/pull/6320) by [@VaggelisD](https://github.com/VaggelisD))*: Do not normalize JSON fields in dot notation (#6320) - due to [`933e981`](https://github.com/tobymao/sqlglot/commit/933e98102fb39d24ae0350da13337d981287130a) - more robust NULL reduction *(PR [#6327](https://github.com/tobymao/sqlglot/pull/6327) by [@geooo109](https://github.com/geooo109))*: more robust NULL reduction (#6327) ### :sparkles: New Features - [`39d8e19`](https://github.com/tobymao/sqlglot/commit/39d8e19419c2adbb80465be414d1cc3bbc6d007b) - **snowflake**: include VARIABLE kind in SET transpilation to DuckDB *(PR [#6201](https://github.com/tobymao/sqlglot/pull/6201) by [@toriwei](https://github.com/toriwei))* - :arrow_lower_right: *addresses issue [#6177](https://github.com/tobymao/sqlglot/issues/6177) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`0037266`](https://github.com/tobymao/sqlglot/commit/00372664bf6acf2b0fff9ad4b206b597ef5378f7) - **snowflake**: annotate types for GETBIT *(PR [#6219](https://github.com/tobymao/sqlglot/pull/6219) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`a5458ce`](https://github.com/tobymao/sqlglot/commit/a5458ceca3bc239fb611791e38020632dd0824c8) - **snowflake**: add type annotation for DECODE function support *(PR [#6199](https://github.com/tobymao/sqlglot/pull/6199) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`a9d0f63`](https://github.com/tobymao/sqlglot/commit/a9d0f6333c38ffb0b5afc3c213ac7bf008d98ad6) - **DuckDB**: Transpile unix_millis to epoch_ms *(PR [#6224](https://github.com/tobymao/sqlglot/pull/6224) by [@vchan](https://github.com/vchan))* - [`238f705`](https://github.com/tobymao/sqlglot/commit/238f705940751f09464ee0f8260186f3f8124374) - **DuckDB**: Transpile unix_seconds to epoch *(PR [#6225](https://github.com/tobymao/sqlglot/pull/6225) by [@vchan](https://github.com/vchan))* - [`c8b0129`](https://github.com/tobymao/sqlglot/commit/c8b0129380df389be6ff22cafb4251181e919d23) - **exasol**: support bracket-delimited identifiers *(PR [#6231](https://github.com/tobymao/sqlglot/pull/6231) by [@JoepvandenHoven-Bluemine](https://github.com/JoepvandenHoven-Bluemine))* - [`417f1e8`](https://github.com/tobymao/sqlglot/commit/417f1e8ee50fb8f4377fad261660ffbd7444a429) - **snowflake**: annotate types for BITNOT *(PR [#6234](https://github.com/tobymao/sqlglot/pull/6234) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`fe8ab40`](https://github.com/tobymao/sqlglot/commit/fe8ab40e8e0559201e0b1896a6f1a8fb6b5b932d) - **snowflake**: 1st-class parsing support for BITAND, BIT_AND, BIT_NOT *(PR [#6243](https://github.com/tobymao/sqlglot/pull/6243) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`5ae3c47`](https://github.com/tobymao/sqlglot/commit/5ae3c47b1c6993b87341472c08714f4a0f738168) - **snowflake**: add type annotation for GROUPING() function *(PR [#6244](https://github.com/tobymao/sqlglot/pull/6244) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4133265`](https://github.com/tobymao/sqlglot/commit/413326514507ef06537dcc3d4b80a3fcbcd26f66) - **clickhouse**: parse `has` function into an `ArrayContains` expression *(PR [#6245](https://github.com/tobymao/sqlglot/pull/6245) by [@joeyutong](https://github.com/joeyutong))* - [`b722aa2`](https://github.com/tobymao/sqlglot/commit/b722aa2d4b65c698921066426838f080a31bdc35) - **duckdb**: cast LOWER() result to BLOB if input is bytes *(PR [#6218](https://github.com/tobymao/sqlglot/pull/6218) by [@toriwei](https://github.com/toriwei))* - [`cdd45b9`](https://github.com/tobymao/sqlglot/commit/cdd45b949fd1eefb147053424279b56b8effcbcf) - **optimizer**: annotate types for GROUPING_ID function. *(PR [#6249](https://github.com/tobymao/sqlglot/pull/6249) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`080ff3b`](https://github.com/tobymao/sqlglot/commit/080ff3bd93b36291d5bb0092d722f8307f0ae082) - **snowflake**: annotate types for BITAND_AGG *(PR [#6248](https://github.com/tobymao/sqlglot/pull/6248) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`87a818a`](https://github.com/tobymao/sqlglot/commit/87a818a899f61a675c22c697f468b3f6f7e2787f) - **snowflake**: annotate types for BITOR_AGG *(PR [#6251](https://github.com/tobymao/sqlglot/pull/6251) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`a1b884d`](https://github.com/tobymao/sqlglot/commit/a1b884dc9ddfd2185de48cc9451a39f152879d39) - **snowflake**: annotate types for BITXOR_AGG *(PR [#6253](https://github.com/tobymao/sqlglot/pull/6253) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`71d93b1`](https://github.com/tobymao/sqlglot/commit/71d93b181d2aa3a77a022820446d6fec0133291f) - **duckdb**: implement casting to blob for UPPER() and move to helper method *(PR [#6254](https://github.com/tobymao/sqlglot/pull/6254) by [@toriwei](https://github.com/toriwei))* - [`ad2ad23`](https://github.com/tobymao/sqlglot/commit/ad2ad234b5a508040dce4f3920439be052742573) - **snowflake**: add missing return type mapping for MAX_BY and MAX_BY function *(PR [#6250](https://github.com/tobymao/sqlglot/pull/6250) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`39c1d81`](https://github.com/tobymao/sqlglot/commit/39c1d81174f2390b6b0c9dd14c0e550ad452a1df) - **snowflake**: annotate types for BOOLXOR_AGG *(PR [#6261](https://github.com/tobymao/sqlglot/pull/6261) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`71590d2`](https://github.com/tobymao/sqlglot/commit/71590d22cdb05594e2173a1500f763dc1a32a81d) - **snowflake**: add type annotation for SKEW function. *(PR [#6262](https://github.com/tobymao/sqlglot/pull/6262) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`5fd366d`](https://github.com/tobymao/sqlglot/commit/5fd366d9e6f7b3f1eb7a9cf41975cf13ce890ffe) - **snowflake**: annotate types for OBJECT_AGG *(PR [#6265](https://github.com/tobymao/sqlglot/pull/6265) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`3dae0fb`](https://github.com/tobymao/sqlglot/commit/3dae0fbb528762e5d5fd446350d42e9c841e2959) - **duckdb**: Support position and occurrence args for REGEXP_EXTRACT *(PR [#6266](https://github.com/tobymao/sqlglot/pull/6266) by [@vchan](https://github.com/vchan))* - [`dba0414`](https://github.com/tobymao/sqlglot/commit/dba04145c4bcda8c55890b4d7173dd6c0a64c37e) - **clickhouse**: Parse toStartOfxxx into exp.TimestampTrunc *(PR [#6268](https://github.com/tobymao/sqlglot/pull/6268) by [@joeyutong](https://github.com/joeyutong))* - [`d959ad0`](https://github.com/tobymao/sqlglot/commit/d959ad02140d692483a63b67d69d2a5d49954ea3) - transpile DuckDB exclusive end RANGE to SEQUENCE *(PR [#6270](https://github.com/tobymao/sqlglot/pull/6270) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6267](https://github.com/tobymao/sqlglot/issues/6267) opened by [@joeyutong](https://github.com/joeyutong)* - [`771732d`](https://github.com/tobymao/sqlglot/commit/771732d81459cc576f11eccc49794f33e62d14af) - **snowflake**: annotate types for REGR_AVGY *(PR [#6271](https://github.com/tobymao/sqlglot/pull/6271) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`8470be0`](https://github.com/tobymao/sqlglot/commit/8470be00731a4d79518a533a5f7ba884fa2f047e) - **snowflake**: add type annotation for BITMAP_COUNT function. *(PR [#6274](https://github.com/tobymao/sqlglot/pull/6274) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`98f25f9`](https://github.com/tobymao/sqlglot/commit/98f25f92cc1175ac7b2118a5a342db82adade13a) - **clickhouse**: support splitBy function *(PR [#6278](https://github.com/tobymao/sqlglot/pull/6278) by [@joeyutong](https://github.com/joeyutong))* - [`fabbf05`](https://github.com/tobymao/sqlglot/commit/fabbf057aba88f30205767d8c339727de45991c8) - **duckDB**: Add support for shorthand struct array literals in duckDB. *(PR [#6233](https://github.com/tobymao/sqlglot/pull/6233) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`a909fde`](https://github.com/tobymao/sqlglot/commit/a909fde068919823dc4cccc2655af48e4290137a) - **duckdb**: Add support for CREATE MACRO *(PR [#6292](https://github.com/tobymao/sqlglot/pull/6292) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#6290](https://github.com/tobymao/sqlglot/issues/6290) opened by [@francescomucio](https://github.com/francescomucio)* - [`11989be`](https://github.com/tobymao/sqlglot/commit/11989be34153ccdedeab3ab18ccf735f86e8b822) - add more expressions with positional meta *(PR [#6289](https://github.com/tobymao/sqlglot/pull/6289) by [@tobymao](https://github.com/tobymao))* - [`87651a6`](https://github.com/tobymao/sqlglot/commit/87651a671db2fe6162f06e2dcdef0b98e229bea5) - semantic facts closes [#6287](https://github.com/tobymao/sqlglot/pull/6287) *(PR [#6288](https://github.com/tobymao/sqlglot/pull/6288) by [@tobymao](https://github.com/tobymao))* - [`9c1a222`](https://github.com/tobymao/sqlglot/commit/9c1a2221b0327ba6848542c7b906e92f25a05bea) - **snowflake**: add type annotation for BITMAP_CONSTRUCT_AGG function. *(PR [#6285](https://github.com/tobymao/sqlglot/pull/6285) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`358105d`](https://github.com/tobymao/sqlglot/commit/358105d1296c7425e071ccf3189a31a02c00c923) - **snowflake**: type annotation for BITMAP_BIT_POSITION function *(PR [#6301](https://github.com/tobymao/sqlglot/pull/6301) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4ee7a50`](https://github.com/tobymao/sqlglot/commit/4ee7a500cc460b6f6a1ed103a12dca72e6d01c18) - **snowflake**: type inference for BITMAP_OR_AGG *(PR [#6297](https://github.com/tobymao/sqlglot/pull/6297) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`fcd537d`](https://github.com/tobymao/sqlglot/commit/fcd537de2c993ad0bd18acd84dbae354165f7d3f) - **snowflake**: conflict resolution. type annotation for BITMAP_BUCKET_NUMBER function. Tests added all dialects that support BITMAP_BUCKET_NUMBER *(PR [#6299](https://github.com/tobymao/sqlglot/pull/6299) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`3dffd59`](https://github.com/tobymao/sqlglot/commit/3dffd598496a9f2d94caec9d7f3dcb9791c94019) - **snowflake**: annotate types for PERCENTILE_DISC and WithinGroup *(PR [#6300](https://github.com/tobymao/sqlglot/pull/6300) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`6ce3cd7`](https://github.com/tobymao/sqlglot/commit/6ce3cd7de958d9f3773579ab22ae6cbbcb56ceb0) - **sqlite**: support binary `MATCH` operator closes [#6305](https://github.com/tobymao/sqlglot/pull/6305) *(PR [#6306](https://github.com/tobymao/sqlglot/pull/6306) by [@georgesittas](https://github.com/georgesittas))* - [`e903883`](https://github.com/tobymao/sqlglot/commit/e90388328fcf5b8061c99e325b87d5beb0046ffc) - **snowflake**: type annotation for APPROX_TOP_K_ACCUMULATE functio… *(PR [#6309](https://github.com/tobymao/sqlglot/pull/6309) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`afc0242`](https://github.com/tobymao/sqlglot/commit/afc0242c564f8de53e11865c2fba43fb36df0694) - **duckDB**: Cast inputs (BLOB → VARCHAR) for duckDB STARTS_WITH *(PR [#6240](https://github.com/tobymao/sqlglot/pull/6240) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`d170bbd`](https://github.com/tobymao/sqlglot/commit/d170bbde800a0308aaf8c81e59152c65be312155) - **duckdb**: transpile bigquery's `BYTES` variant of `REPLACE` *(PR [#6312](https://github.com/tobymao/sqlglot/pull/6312) by [@toriwei](https://github.com/toriwei))* - [`d3fefad`](https://github.com/tobymao/sqlglot/commit/d3fefad80d25ff5a6dd02426667ff0ea8478a1b2) - **tsql**: support `DATEDIFF_BIG` *(PR [#6323](https://github.com/tobymao/sqlglot/pull/6323) by [@lBilali](https://github.com/lBilali))* - [`21d1468`](https://github.com/tobymao/sqlglot/commit/21d1468377b9c8ad48c6cca1ae3b3744a807c29e) - **optimizer**: annotate type for APPROX_TOP_K *(PR [#6286](https://github.com/tobymao/sqlglot/pull/6286) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`93b4039`](https://github.com/tobymao/sqlglot/commit/93b4039f957f3eefbaaed2cb147bfa8c8c2a304e) - **duckdb**: preserve time zone and timestamp in DATE_TRUNC() *(PR [#6318](https://github.com/tobymao/sqlglot/pull/6318) by [@toriwei](https://github.com/toriwei))* - [`b71990f`](https://github.com/tobymao/sqlglot/commit/b71990f528d55c845f5771bfc4c5f6098eb97ad7) - **duckdb**: Add transpilation support for ANY_VALUE function with HAVING MAX and MIN clauses *(PR [#6325](https://github.com/tobymao/sqlglot/pull/6325) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`64c0d55`](https://github.com/tobymao/sqlglot/commit/64c0d554207ad40bcd6a93c20d15020752a5929d) - **sqlite**: support indexed table clause closes [#6331](https://github.com/tobymao/sqlglot/pull/6331) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6725217`](https://github.com/tobymao/sqlglot/commit/6725217d4058b5202006576bdf6ff4ec7230a9b9) - **sqlite**: support `NOT NULL` operator closes [#6334](https://github.com/tobymao/sqlglot/pull/6334) closes [#6335](https://github.com/tobymao/sqlglot/pull/6335) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`e7ddad1`](https://github.com/tobymao/sqlglot/commit/e7ddad10b5edf9b801d2151e3e5fca448754df0d) - **optimizer**: ensure `NULL` coerces into any type *(PR [#6211](https://github.com/tobymao/sqlglot/pull/6211) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`4c4189b`](https://github.com/tobymao/sqlglot/commit/4c4189b4083d272a6e678d83b5c567a2e9c0d672) - Transpile CONCAT function to double pipe operators when source … *(PR [#6241](https://github.com/tobymao/sqlglot/pull/6241) by [@vchan](https://github.com/vchan))* - [`fc78d20`](https://github.com/tobymao/sqlglot/commit/fc78d2016d8f7d20c094df791f746de323cd3639) - **parser**: Unwrap subqueries without modifiers *(PR [#6247](https://github.com/tobymao/sqlglot/pull/6247) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6237](https://github.com/tobymao/sqlglot/issues/6237) opened by [@preet-sheth](https://github.com/preet-sheth)* - [`7ad4c17`](https://github.com/tobymao/sqlglot/commit/7ad4c177fbf8dda78aa8de1ca112f606b2fd5456) - **databricks**: Support table names in FROM STREAM *(PR [#6259](https://github.com/tobymao/sqlglot/pull/6259) by [@roveo](https://github.com/roveo))* - [`00abc39`](https://github.com/tobymao/sqlglot/commit/00abc393c9042e839457c5a6582e95cdb74356f3) - **generator**: handle casting for bytestrings *(PR [#6252](https://github.com/tobymao/sqlglot/pull/6252) by [@toriwei](https://github.com/toriwei))* - [`bcf2eac`](https://github.com/tobymao/sqlglot/commit/bcf2eace0baf1d85047841f36cb5c0082c61b29c) - **duckdb**: map int8 to bigint instead of tinyint fixes [#6269](https://github.com/tobymao/sqlglot/pull/6269) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ddea61d`](https://github.com/tobymao/sqlglot/commit/ddea61d83f6699c97cc7b25aabe01a138138bdb1) - **optimizer**: simplify connector complements only for non-null operands *(PR [#6214](https://github.com/tobymao/sqlglot/pull/6214) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6213](https://github.com/tobymao/sqlglot/issues/6213) opened by [@geooo109](https://github.com/geooo109)* - [`e17320e`](https://github.com/tobymao/sqlglot/commit/e17320ee3bdd0ef541d616c447b4973d12780dae) - Handle edge cases in for DuckDB RANGE to Spark SEQUENCE transpilation *(PR [#6276](https://github.com/tobymao/sqlglot/pull/6276) by [@joeyutong](https://github.com/joeyutong))* - [`33b6218`](https://github.com/tobymao/sqlglot/commit/33b62183a15cdedf0b1ebd96fcb856afbe8879a0) - sqlsecurityproperty parseerror *(PR [#6280](https://github.com/tobymao/sqlglot/pull/6280) by [@ds-cbo](https://github.com/ds-cbo))* - :arrow_lower_right: *fixes issue [#6279](https://github.com/tobymao/sqlglot/issues/6279) opened by [@ds-cbo](https://github.com/ds-cbo)* - [`c02b64c`](https://github.com/tobymao/sqlglot/commit/c02b64c3524dd074c2108baaca668ab2607ac843) - **optimizer**: Handle pseudocolumns differently than columns *(PR [#6273](https://github.com/tobymao/sqlglot/pull/6273) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6256](https://github.com/tobymao/sqlglot/issues/6256) opened by [@azilya](https://github.com/azilya)* - [`05c5181`](https://github.com/tobymao/sqlglot/commit/05c5181b36a7ada32b96fc91bdfbf73b38a1a408) - **optimizer**: refactor `Connector` simplification to factor in types *(PR [#6152](https://github.com/tobymao/sqlglot/pull/6152) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6137](https://github.com/tobymao/sqlglot/issues/6137) opened by [@dllggyx](https://github.com/dllggyx)* - [`cb0bcff`](https://github.com/tobymao/sqlglot/commit/cb0bcff310e9acdf806fc98e99cb9938b747c771) - **duckdb**: cast UUID() output to varchar when source dialect UUID() returns string *(PR [#6284](https://github.com/tobymao/sqlglot/pull/6284) by [@toriwei](https://github.com/toriwei))* - [`f9287f7`](https://github.com/tobymao/sqlglot/commit/f9287f7d596a6d8a1e1cd2c48978a4dec77a96cb) - **optimizer**: robust deduplication of connectors *(PR [#6296](https://github.com/tobymao/sqlglot/pull/6296) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6182](https://github.com/tobymao/sqlglot/issues/6182) opened by [@dllggyx](https://github.com/dllggyx)* - [`ea0ea79`](https://github.com/tobymao/sqlglot/commit/ea0ea79c1c611b62c79f82f744fe0c98803598a3) - **clickhouse**: Parse `LIKE` functions *(PR [#6314](https://github.com/tobymao/sqlglot/pull/6314) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6313](https://github.com/tobymao/sqlglot/issues/6313) opened by [@CainYang](https://github.com/CainYang)* - [`bbd4c90`](https://github.com/tobymao/sqlglot/commit/bbd4c901a9550beb363758e6be1e1877d4e56f2c) - **sqlite**: support IS with identifier as RHS *(PR [#6316](https://github.com/tobymao/sqlglot/pull/6316) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6315](https://github.com/tobymao/sqlglot/issues/6315) opened by [@VLDB2026](https://github.com/VLDB2026)* - [`65d213a`](https://github.com/tobymao/sqlglot/commit/65d213a7662962d4226368590508fbf61675c055) - **dialect**: fix typo from millenium to millennium [#6321](https://github.com/tobymao/sqlglot/pull/6321) *(commit by [@lBilali](https://github.com/lBilali))* - [`c9d1615`](https://github.com/tobymao/sqlglot/commit/c9d16150a408a41daf704d2d0b0ebfce57425b81) - **tsql**: map iso_week with the correct python directive from strftime *(PR [#6322](https://github.com/tobymao/sqlglot/pull/6322) by [@lBilali](https://github.com/lBilali))* - [`85ddcc5`](https://github.com/tobymao/sqlglot/commit/85ddcc5eca22ac726582de454f2f12b9d4877634) - **bigquery**: Do not normalize JSON fields in dot notation *(PR [#6320](https://github.com/tobymao/sqlglot/pull/6320) by [@VaggelisD](https://github.com/VaggelisD))* - [`933e981`](https://github.com/tobymao/sqlglot/commit/933e98102fb39d24ae0350da13337d981287130a) - **optimizer**: more robust NULL reduction *(PR [#6327](https://github.com/tobymao/sqlglot/pull/6327) by [@geooo109](https://github.com/geooo109))* - [`e1c6d57`](https://github.com/tobymao/sqlglot/commit/e1c6d5716f80eb24b6d0a9c93e187a8c9f05e555) - **parser**: improve between .. preceding .. following parser fixes [#6332](https://github.com/tobymao/sqlglot/pull/6332) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`65706e8`](https://github.com/tobymao/sqlglot/commit/65706e8c7edeb7de674d427718eac181df206dc9) - avoid full traversal for pushdown_cte_alias_columns *(commit by [@tobymao](https://github.com/tobymao))* - [`c81258e`](https://github.com/tobymao/sqlglot/commit/c81258e9c26f637f6f8520051c159685c8b1cb7e) - **parser**: allow using OVER token as unquoted identifier *(PR [#6338](https://github.com/tobymao/sqlglot/pull/6338) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6337](https://github.com/tobymao/sqlglot/issues/6337) opened by [@VLDB2026](https://github.com/VLDB2026)* - [`73abfac`](https://github.com/tobymao/sqlglot/commit/73abfac4cec27350754c942be71175fa7bdfd1d0) - **redshift**: do not inherit postgres `ROUND` generator closes [#6340](https://github.com/tobymao/sqlglot/pull/6340) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`9c98fc2`](https://github.com/tobymao/sqlglot/commit/9c98fc2b39fef2bd052b60ba4e15a4b93fd66c00) - **optimizer**: avoid extra copy in simplify *(commit by [@geooo109](https://github.com/geooo109))* - [`43985fb`](https://github.com/tobymao/sqlglot/commit/43985fbcb9edea088119951c5c245a9606cf92ae) - **snowflake**: remove redundant tests for ANY_VALUE *(commit by [@geooo109](https://github.com/geooo109))* - [`bf7b032`](https://github.com/tobymao/sqlglot/commit/bf7b032baae0c0fd112054a7bed6fa2f56f32890) - clean up struct name inheritance *(PR [#6295](https://github.com/tobymao/sqlglot/pull/6295) by [@georgesittas](https://github.com/georgesittas))* - [`49e0f43`](https://github.com/tobymao/sqlglot/commit/49e0f43ba19739575987f2e9c52c2061a6f59717) - extra test for spark approx_top_k_accumulate *(commit by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`d7be4a5`](https://github.com/tobymao/sqlglot/commit/d7be4a5da3dca6bcc44230b2a176c8b17b81c46e) - **optimizer**: add annotation test for COALESCE *(PR [#6210](https://github.com/tobymao/sqlglot/pull/6210) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`8aa7356`](https://github.com/tobymao/sqlglot/commit/8aa7356ab8adee26193086754ca1a1805957d944) - **optimizer**: add annotation tests for IFF *(PR [#6215](https://github.com/tobymao/sqlglot/pull/6215) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`160a1b9`](https://github.com/tobymao/sqlglot/commit/160a1b90f4ce39a2fce6f7f0e9e854d974fed053) - **optimizer**: mixed type annotation test for sf IFNULL *(commit by [@geooo109](https://github.com/geooo109))* - [`893ad2a`](https://github.com/tobymao/sqlglot/commit/893ad2a5b1a28339ccc65c85ac813506e6ad56f1) - **optimizer**: add annotation tests for NULLIF *(PR [#6221](https://github.com/tobymao/sqlglot/pull/6221) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`78d7733`](https://github.com/tobymao/sqlglot/commit/78d77335819d1796fa3989ef072d3f8fd4b83559) - remove redundant or term for unknown in annotate_types *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b202f3a`](https://github.com/tobymao/sqlglot/commit/b202f3ad64e88a47e52c45e32c9e4faae6c8ac45) - **optimizer**: add test for BITXOR *(PR [#6223](https://github.com/tobymao/sqlglot/pull/6223) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`b20f2e8`](https://github.com/tobymao/sqlglot/commit/b20f2e88d86038f1a98f4b97b5a2ae0b86652e33) - **optimizer**: add test for BITSHIFTLEFT *(PR [#6227](https://github.com/tobymao/sqlglot/pull/6227) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7f93e85`](https://github.com/tobymao/sqlglot/commit/7f93e8551b00cc32014236a07c8794bd7a3a2b91) - **optimizer**: add annotation tests for BITSHIFTRIGHT *(PR [#6228](https://github.com/tobymao/sqlglot/pull/6228) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`fcf017c`](https://github.com/tobymao/sqlglot/commit/fcf017cfb95923fea8ae5669340713a326f4f306) - rename `EXPRESSION_SPEC` to `EXPRESSION_METADATA` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`55bc9e4`](https://github.com/tobymao/sqlglot/commit/55bc9e4019f8ef8d7e571256d7b0e07b30d9240c) - remove predicate/connector/not from typing metadata *(commit by [@georgesittas](https://github.com/georgesittas))* - [`349ab29`](https://github.com/tobymao/sqlglot/commit/349ab29aa84fb087388b6a1494fea70273a4a560) - **optimizer**: add annotation test for BOOLAND_OR *(PR [#6260](https://github.com/tobymao/sqlglot/pull/6260) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`75ec424`](https://github.com/tobymao/sqlglot/commit/75ec424667b95462bb1750a251a5096da0d5161b) - **optimizer**: add annotation test for BOOLAND_AGG *(PR [#6257](https://github.com/tobymao/sqlglot/pull/6257) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`bb574aa`](https://github.com/tobymao/sqlglot/commit/bb574aa0cf0a8c0b92f9af7ef3dfddb7de725a8b) - **optimizer**: add annotation test for ARRAY_AGG *(PR [#6264](https://github.com/tobymao/sqlglot/pull/6264) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`a95c5cc`](https://github.com/tobymao/sqlglot/commit/a95c5ccf411dc4d28ef9c19fb03bd8a3615d7c4b) - **optimizer**: add nonnull clickhouse column test case *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6d6c689`](https://github.com/tobymao/sqlglot/commit/6d6c68915ca699da7cb707675aece963df97f80b) - **optimizer**: add annotation tests for ANY_VALUE *(PR [#6275](https://github.com/tobymao/sqlglot/pull/6275) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`2459f88`](https://github.com/tobymao/sqlglot/commit/2459f8832ae398aa1381025724a4286f7f5e3e9d) - Follow up of 6280 *(PR [#6281](https://github.com/tobymao/sqlglot/pull/6281) by [@VaggelisD](https://github.com/VaggelisD))* - [`a7d33d0`](https://github.com/tobymao/sqlglot/commit/a7d33d0e190fc5c9f23a1ab43082ac017d20fd18) - **optimizer**: add annotation tests for APPROX_PERCENTILE *(PR [#6283](https://github.com/tobymao/sqlglot/pull/6283) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`1b2d139`](https://github.com/tobymao/sqlglot/commit/1b2d139d3338c7053dee333914323236a2d15d97) - **optimizer**: add type annotation tests with window for sf APPROX_PERCENTILE *(commit by [@geooo109](https://github.com/geooo109))* - [`d059648`](https://github.com/tobymao/sqlglot/commit/d05964851c99553ba06e318bbbda39f9851120db) - **optimizer**: add annotation tests for APPROX_COUNT_DISTINCT *(PR [#6282](https://github.com/tobymao/sqlglot/pull/6282) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`6bd59ac`](https://github.com/tobymao/sqlglot/commit/6bd59acf2288da5bfe6151c5adf6f2a63792dc1e) - Follow up of PR 6288 *(PR [#6293](https://github.com/tobymao/sqlglot/pull/6293) by [@VaggelisD](https://github.com/VaggelisD))* - [`546fd2a`](https://github.com/tobymao/sqlglot/commit/546fd2a2588f7b385bdbb9e39490bd6a422283ca) - Remove dead line in qualify_columns *(PR [#6304](https://github.com/tobymao/sqlglot/pull/6304) by [@VaggelisD](https://github.com/VaggelisD))* - [`ac7ac19`](https://github.com/tobymao/sqlglot/commit/ac7ac198a3b915e63ba8a055e9a0193c3dd3e26a) - **exasol**: Implement ODBC date time literals in Exasol Sqlglot *(PR [#6311](https://github.com/tobymao/sqlglot/pull/6311) by [@nnamdi16](https://github.com/nnamdi16))* - [`8d1d25c`](https://github.com/tobymao/sqlglot/commit/8d1d25c6de7ad03c50e3efe892d16d16329d8ee9) - **exasol**: Implement local qualifier for-aliases, in GROUP BY, WHERE AND HAVING clause in exasol dialect *(PR [#6277](https://github.com/tobymao/sqlglot/pull/6277) by [@nnamdi16](https://github.com/nnamdi16))* - [`509b0aa`](https://github.com/tobymao/sqlglot/commit/509b0aaada0e27542864771ba14777d398b6cee0) - **exasol**: Implement day_of_week function *(PR [#6319](https://github.com/tobymao/sqlglot/pull/6319) by [@nnamdi16](https://github.com/nnamdi16))* ## [v27.29.0] - 2025-10-29 ### :boom: BREAKING CHANGES - due to [`5242cdd`](https://github.com/tobymao/sqlglot/commit/5242cddf487e367e7f543ca19d9bccae858f36ac) - annotate type for bq LENGTH *(commit by [@geooo109](https://github.com/geooo109))*: annotate type for bq LENGTH - due to [`0fc6dbf`](https://github.com/tobymao/sqlglot/commit/0fc6dbf2e7b611fa0977e3c3e61be1cc84bcf4a9) - add GREATEST_IGNORE_NULLS function support *(PR [#6161](https://github.com/tobymao/sqlglot/pull/6161) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: add GREATEST_IGNORE_NULLS function support (#6161) - due to [`d382a31`](https://github.com/tobymao/sqlglot/commit/d382a3106d5ce2e9b75527aacd4a37d1f8e16d18) - simplify double negation only if the inner expr is BOOLEAN *(PR [#6151](https://github.com/tobymao/sqlglot/pull/6151) by [@geooo109](https://github.com/geooo109))*: simplify double negation only if the inner expr is BOOLEAN (#6151) - due to [`bcf6c89`](https://github.com/tobymao/sqlglot/commit/bcf6c89a47abd3c2c4383d1c908f892b6619b6fa) - add type annotation tests for snowflake BOOLAND *(PR [#6153](https://github.com/tobymao/sqlglot/pull/6153) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: add type annotation tests for snowflake BOOLAND (#6153) - due to [`52d1eec`](https://github.com/tobymao/sqlglot/commit/52d1eecaad505703e8b22dcfe8954652f57985b6) - Annotate type for snowflake TIMESTAMP_FROM_PARTS function *(PR [#6139](https://github.com/tobymao/sqlglot/pull/6139) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TIMESTAMP_FROM_PARTS function (#6139) - due to [`8651fe6`](https://github.com/tobymao/sqlglot/commit/8651fe6526dea865c0d54d6d53086359a7835d32) - annotate types for BOOLOR *(PR [#6159](https://github.com/tobymao/sqlglot/pull/6159) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for BOOLOR (#6159) - due to [`812ba9a`](https://github.com/tobymao/sqlglot/commit/812ba9abad8247df81c8f8b514336c8766292112) - Annotate type for snowflake date parts functions *(PR [#6158](https://github.com/tobymao/sqlglot/pull/6158) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: Annotate type for snowflake date parts functions (#6158) - due to [`9f8c123`](https://github.com/tobymao/sqlglot/commit/9f8c123ae44249e274334d0aa551ac33814f2b32) - make qualify table callback more generic *(PR [#6171](https://github.com/tobymao/sqlglot/pull/6171) by [@tobymao](https://github.com/tobymao))*: make qualify table callback more generic (#6171) - due to [`74b4e7c`](https://github.com/tobymao/sqlglot/commit/74b4e7c311e9d4ff39ce2e4d91940eced96aa32f) - fix type annotation for Snowflake BOOLOR and BOOLAND *(PR [#6169](https://github.com/tobymao/sqlglot/pull/6169) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: fix type annotation for Snowflake BOOLOR and BOOLAND (#6169) - due to [`ef87520`](https://github.com/tobymao/sqlglot/commit/ef875204596b8529f3358025c7a61d757a999bdc) - Transpile `REGEXP_REPLACE` with 'g' option *(PR [#6174](https://github.com/tobymao/sqlglot/pull/6174) by [@VaggelisD](https://github.com/VaggelisD))*: Transpile `REGEXP_REPLACE` with 'g' option (#6174) - due to [`93071e2`](https://github.com/tobymao/sqlglot/commit/93071e255406f62ea83dd89a3be4871b7edfb3fe) - Fix simplify_parens from removing negated *(PR [#6194](https://github.com/tobymao/sqlglot/pull/6194) by [@VaggelisD](https://github.com/VaggelisD))*: Fix simplify_parens from removing negated (#6194) - due to [`e90168a`](https://github.com/tobymao/sqlglot/commit/e90168a6829b85534edcecec7d0df2a8b1b56fc4) - annotate type for Snowflake's `IS_NULL_VALUE` function *(PR [#6186](https://github.com/tobymao/sqlglot/pull/6186) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotate type for Snowflake's `IS_NULL_VALUE` function (#6186) - due to [`c93b535`](https://github.com/tobymao/sqlglot/commit/c93b5354827282c806899c36b11e7a7598e96e38) - annotate type for LEAST_IGNORE_NULLS *(PR [#6196](https://github.com/tobymao/sqlglot/pull/6196) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: annotate type for LEAST_IGNORE_NULLS (#6196) - due to [`f60c71f`](https://github.com/tobymao/sqlglot/commit/f60c71fb03db91bfe90430d032ac16f4945d5dff) - annotate types for REGR_VALX *(PR [#6198](https://github.com/tobymao/sqlglot/pull/6198) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for REGR_VALX (#6198) - due to [`b82c571`](https://github.com/tobymao/sqlglot/commit/b82c57131707297abe174539023b9cb62b7cd6c7) - annotate types for REGR_VALY *(PR [#6206](https://github.com/tobymao/sqlglot/pull/6206) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate types for REGR_VALY (#6206) ### :sparkles: New Features - [`5242cdd`](https://github.com/tobymao/sqlglot/commit/5242cddf487e367e7f543ca19d9bccae858f36ac) - **optimizer**: annotate type for bq LENGTH *(commit by [@geooo109](https://github.com/geooo109))* - [`0fc6dbf`](https://github.com/tobymao/sqlglot/commit/0fc6dbf2e7b611fa0977e3c3e61be1cc84bcf4a9) - **snowflake**: add GREATEST_IGNORE_NULLS function support *(PR [#6161](https://github.com/tobymao/sqlglot/pull/6161) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`54ecadc`](https://github.com/tobymao/sqlglot/commit/54ecadc57b8f1e87fd2a2ba35a5366d75231ea85) - **duckdb**: support `KV_METADATA` in `COPY` statement closes [#6165](https://github.com/tobymao/sqlglot/pull/6165) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e241964`](https://github.com/tobymao/sqlglot/commit/e2419642a4966a4da194147aa488793eae152af4) - **duckdb**: support `USING` condition for `MERGE` closes [#6162](https://github.com/tobymao/sqlglot/pull/6162) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bcf6c89`](https://github.com/tobymao/sqlglot/commit/bcf6c89a47abd3c2c4383d1c908f892b6619b6fa) - **optimizer**: add type annotation tests for snowflake BOOLAND *(PR [#6153](https://github.com/tobymao/sqlglot/pull/6153) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`52d1eec`](https://github.com/tobymao/sqlglot/commit/52d1eecaad505703e8b22dcfe8954652f57985b6) - **optimizer**: Annotate type for snowflake TIMESTAMP_FROM_PARTS function *(PR [#6139](https://github.com/tobymao/sqlglot/pull/6139) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8651fe6`](https://github.com/tobymao/sqlglot/commit/8651fe6526dea865c0d54d6d53086359a7835d32) - **optimizer**: annotate types for BOOLOR *(PR [#6159](https://github.com/tobymao/sqlglot/pull/6159) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`812ba9a`](https://github.com/tobymao/sqlglot/commit/812ba9abad8247df81c8f8b514336c8766292112) - **optimizer**: Annotate type for snowflake date parts functions *(PR [#6158](https://github.com/tobymao/sqlglot/pull/6158) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`9f8c123`](https://github.com/tobymao/sqlglot/commit/9f8c123ae44249e274334d0aa551ac33814f2b32) - make qualify table callback more generic *(PR [#6171](https://github.com/tobymao/sqlglot/pull/6171) by [@tobymao](https://github.com/tobymao))* - [`74b4e7c`](https://github.com/tobymao/sqlglot/commit/74b4e7c311e9d4ff39ce2e4d91940eced96aa32f) - **optimizer**: fix type annotation for Snowflake BOOLOR and BOOLAND *(PR [#6169](https://github.com/tobymao/sqlglot/pull/6169) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`e90168a`](https://github.com/tobymao/sqlglot/commit/e90168a6829b85534edcecec7d0df2a8b1b56fc4) - **optimizer**: annotate type for Snowflake's `IS_NULL_VALUE` function *(PR [#6186](https://github.com/tobymao/sqlglot/pull/6186) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`cea2595`](https://github.com/tobymao/sqlglot/commit/cea25952c98e70f2a4c35e675fe7ee4df0af02cd) - **duckdb**: Transpile DATE function from BQ->DuckDB *(PR [#6178](https://github.com/tobymao/sqlglot/pull/6178) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`00aaa47`](https://github.com/tobymao/sqlglot/commit/00aaa47feff1cf9e69320074c35d9adfc8538026) - **duckDB**: Transpile BigQuery's CURRENT_DATE (Conversion) function to DuckDB *(PR [#6189](https://github.com/tobymao/sqlglot/pull/6189) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c93b535`](https://github.com/tobymao/sqlglot/commit/c93b5354827282c806899c36b11e7a7598e96e38) - **snowflake**: annotate type for LEAST_IGNORE_NULLS *(PR [#6196](https://github.com/tobymao/sqlglot/pull/6196) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`d2162fb`](https://github.com/tobymao/sqlglot/commit/d2162fbece0747b8ee42fa1f78e26baa0c944d41) - check same ref on Expression.__eq__ *(PR [#6200](https://github.com/tobymao/sqlglot/pull/6200) by [@georgesittas](https://github.com/georgesittas))* - [`f60c71f`](https://github.com/tobymao/sqlglot/commit/f60c71fb03db91bfe90430d032ac16f4945d5dff) - **optimizer**: annotate types for REGR_VALX *(PR [#6198](https://github.com/tobymao/sqlglot/pull/6198) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`42e0ae4`](https://github.com/tobymao/sqlglot/commit/42e0ae43b3531bf6c593bcac2ece2ab1d969e5e1) - **duckdb**: transpile BigQuery function TIMESTAMP_SUB to DuckDB *(PR [#6202](https://github.com/tobymao/sqlglot/pull/6202) by [@toriwei](https://github.com/toriwei))* - [`b82c571`](https://github.com/tobymao/sqlglot/commit/b82c57131707297abe174539023b9cb62b7cd6c7) - **snowflake**: annotate types for REGR_VALY *(PR [#6206](https://github.com/tobymao/sqlglot/pull/6206) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* ### :bug: Bug Fixes - [`3acf796`](https://github.com/tobymao/sqlglot/commit/3acf7965105a098fea6336df0c304d94acbd05ec) - **duckdb**: Allow ESCAPE NULL *(PR [#6164](https://github.com/tobymao/sqlglot/pull/6164) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6160](https://github.com/tobymao/sqlglot/issues/6160) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`f7f1fca`](https://github.com/tobymao/sqlglot/commit/f7f1fca39a75df16ebb93f038e6277a25b8be6b9) - **duckdb**: Support positional index in list comprehension *(PR [#6163](https://github.com/tobymao/sqlglot/pull/6163) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6156](https://github.com/tobymao/sqlglot/issues/6156) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`d382a31`](https://github.com/tobymao/sqlglot/commit/d382a3106d5ce2e9b75527aacd4a37d1f8e16d18) - **optimizer**: simplify double negation only if the inner expr is BOOLEAN *(PR [#6151](https://github.com/tobymao/sqlglot/pull/6151) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6129](https://github.com/tobymao/sqlglot/issues/6129) opened by [@dllggyx](https://github.com/dllggyx)* - [`dfe6b3c`](https://github.com/tobymao/sqlglot/commit/dfe6b3c8e6db40e22e626e2d56e9a7008dd75c32) - **optimizer**: Disambiguate JOIN ON columns during qualify *(PR [#6155](https://github.com/tobymao/sqlglot/pull/6155) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6132](https://github.com/tobymao/sqlglot/issues/6132) opened by [@Fosly](https://github.com/Fosly)* - [`f267ece`](https://github.com/tobymao/sqlglot/commit/f267ecea92b0751f6b35a4ad0c70fe6754e49038) - normalize before qualifying tables *(PR [#6176](https://github.com/tobymao/sqlglot/pull/6176) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6167](https://github.com/tobymao/sqlglot/issues/6167) opened by [@schelip](https://github.com/schelip)* - [`ef87520`](https://github.com/tobymao/sqlglot/commit/ef875204596b8529f3358025c7a61d757a999bdc) - **postgres, duckdb**: Transpile `REGEXP_REPLACE` with 'g' option *(PR [#6174](https://github.com/tobymao/sqlglot/pull/6174) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6170](https://github.com/tobymao/sqlglot/issues/6170) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`51a8d70`](https://github.com/tobymao/sqlglot/commit/51a8d700a9602278d1e98425af0fa87d02c739fe) - **parser**: allow LIMIT % OFFSET *(PR [#6184](https://github.com/tobymao/sqlglot/pull/6184) by [@toriwei](https://github.com/toriwei))* - :arrow_lower_right: *fixes issue [#6166](https://github.com/tobymao/sqlglot/issues/6166) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`8bf0a9f`](https://github.com/tobymao/sqlglot/commit/8bf0a9fe8e167984dc2e7b43d52d3850e063da3f) - **duckdb**: Cast literal arg to timestamp for epoch_us function *(PR [#6190](https://github.com/tobymao/sqlglot/pull/6190) by [@vchan](https://github.com/vchan))* - [`93071e2`](https://github.com/tobymao/sqlglot/commit/93071e255406f62ea83dd89a3be4871b7edfb3fe) - **optimizer**: Fix simplify_parens from removing negated *(PR [#6194](https://github.com/tobymao/sqlglot/pull/6194) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6179](https://github.com/tobymao/sqlglot/issues/6179) opened by [@dllggyx](https://github.com/dllggyx)* - [`2ac3a03`](https://github.com/tobymao/sqlglot/commit/2ac3a03409d9239d0cf7fb265843d7837a0a3fcd) - **lineage**: correct star detection and add join star tests *(PR [#6185](https://github.com/tobymao/sqlglot/pull/6185) by [@lancewl](https://github.com/lancewl))* - [`c9ae2eb`](https://github.com/tobymao/sqlglot/commit/c9ae2ebdb86abdb767f2fcb00da0b6277b4aea45) - **duckdb**: transpile BigQuery TIMESTAMP_ADD to duckdb *(PR [#6188](https://github.com/tobymao/sqlglot/pull/6188) by [@toriwei](https://github.com/toriwei))* - [`ba0e17a`](https://github.com/tobymao/sqlglot/commit/ba0e17a25af417e24162bfab49c3074454a5c1a8) - **snowflake**: Transpile `ARRAY_CONCAT_AGG` to `ARRAY_FLATTEN(ARRAY_AGG(...))` *(PR [#6192](https://github.com/tobymao/sqlglot/pull/6192) by [@ozadari](https://github.com/ozadari))* - [`730e4cc`](https://github.com/tobymao/sqlglot/commit/730e4cc5b77bff9135667193cc0a65c24cdfb6b5) - **trino**: Allow 2nd arg for FIRST/LAST functions *(PR [#6205](https://github.com/tobymao/sqlglot/pull/6205) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6204](https://github.com/tobymao/sqlglot/issues/6204) opened by [@Harmuth94](https://github.com/Harmuth94)* ### :recycle: Refactors - [`6d775fd`](https://github.com/tobymao/sqlglot/commit/6d775fdb6091cb866c27c0f1141514b23d689284) - snowflake GREATEST type checks *(commit by [@geooo109](https://github.com/geooo109))* - [`e797fb1`](https://github.com/tobymao/sqlglot/commit/e797fb105f7fa4e7bd42698eda71037cae9fd155) - update `LIKE` operator when using functional syntax with spark dialect *(PR [#6173](https://github.com/tobymao/sqlglot/pull/6173) by [@themattmorris](https://github.com/themattmorris))* - :arrow_lower_right: *addresses issue [#6172](https://github.com/tobymao/sqlglot/issues/6172) opened by [@themattmorris](https://github.com/themattmorris)* ### :wrench: Chores - [`aca106c`](https://github.com/tobymao/sqlglot/commit/aca106c660b8aaf229065ec5c5a4a80d10e8daf6) - **optimizer**: add type annotation tests for snowflake GREATEST *(PR [#6157](https://github.com/tobymao/sqlglot/pull/6157) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`f763604`](https://github.com/tobymao/sqlglot/commit/f7636041d7b796545ed923ffd4803521f05fa7ea) - add `IS [NOT]` tests *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1ab5854`](https://github.com/tobymao/sqlglot/commit/1ab5854216da591e6036ac103239ac0280e09c3d) - **optimizer**: add snowflake test for [NOT] IN *(PR [#6180](https://github.com/tobymao/sqlglot/pull/6180) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`64939ce`](https://github.com/tobymao/sqlglot/commit/64939ce9926f4740387a151311e918e807bfa681) - **optimizer**: add annotation tests for ZEROIFNULL *(PR [#6187](https://github.com/tobymao/sqlglot/pull/6187) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`4b6bcdd`](https://github.com/tobymao/sqlglot/commit/4b6bcdd4dc297bd42ad000ffda98d14110565dc9) - **optimizer**: Add tests for snowflake's `NULLIFZERO` *(PR [#6197](https://github.com/tobymao/sqlglot/pull/6197) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`ef68075`](https://github.com/tobymao/sqlglot/commit/ef680756c33da180ed2f21fb6113a0123db341c9) - **optimizer**: add annotation tests for NVL2 *(PR [#6208](https://github.com/tobymao/sqlglot/pull/6208) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`7f550f2`](https://github.com/tobymao/sqlglot/commit/7f550f22da40d8c1cfc8afb183d6e4dbd50241ea) - **optimizer**: add annotation tests for NVL *(PR [#6207](https://github.com/tobymao/sqlglot/pull/6207) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* ## [v27.28.0] - 2025-10-21 ### :boom: BREAKING CHANGES - due to [`2238ac2`](https://github.com/tobymao/sqlglot/commit/2238ac27478bd272ba39928bbec1075c4191ee1b) - transpile timestamp literals in datediff fixes [#6083](https://github.com/tobymao/sqlglot/pull/6083) *(PR [#6086](https://github.com/tobymao/sqlglot/pull/6086) by [@georgesittas](https://github.com/georgesittas))*: transpile timestamp literals in datediff fixes #6083 (#6086) - due to [`c49ba0e`](https://github.com/tobymao/sqlglot/commit/c49ba0eee21f7776703d2a26c6641b4a32a1cff7) - Annotate type for snowflake WIDTH_BUCKET function *(PR [#6078](https://github.com/tobymao/sqlglot/pull/6078) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake WIDTH_BUCKET function (#6078) - due to [`fbc1f13`](https://github.com/tobymao/sqlglot/commit/fbc1f1335eecaaaab4fc93ddbb74611a4df0aea7) - annotate type for Snowflake CONVERT_TIMEZONE function *(PR [#6076](https://github.com/tobymao/sqlglot/pull/6076) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake CONVERT_TIMEZONE function (#6076) - due to [`70e977c`](https://github.com/tobymao/sqlglot/commit/70e977c5edfb495529d38a9096cb40762a9b5d7b) - annotate type for Snowflake DATE_TRUNC function *(PR [#6080](https://github.com/tobymao/sqlglot/pull/6080) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DATE_TRUNC function (#6080) - due to [`e9cf146`](https://github.com/tobymao/sqlglot/commit/e9cf146a4a6cd78f6a59c195e7ec12240b836e5e) - annotate type for Snowflake DATE_PART function *(PR [#6079](https://github.com/tobymao/sqlglot/pull/6079) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DATE_PART function (#6079) - due to [`5109890`](https://github.com/tobymao/sqlglot/commit/510989043d18baa17502a971262462814a2eb5be) - VALUES with ORDER BY/LIMIT/OFFSET *(PR [#6094](https://github.com/tobymao/sqlglot/pull/6094) by [@geooo109](https://github.com/geooo109))*: VALUES with ORDER BY/LIMIT/OFFSET (#6094) - due to [`6fe5824`](https://github.com/tobymao/sqlglot/commit/6fe58247888c326093618657fb027e482d82d107) - Annotate type for hour, minute, second functions *(PR [#6100](https://github.com/tobymao/sqlglot/pull/6100) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for hour, minute, second functions (#6100) - due to [`a4d07a0`](https://github.com/tobymao/sqlglot/commit/a4d07a07eefbdaf88d30df2310a9533afdc75a82) - Annotate type for snowflake EXTRACT function *(PR [#6099](https://github.com/tobymao/sqlglot/pull/6099) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake EXTRACT function (#6099) - due to [`483770b`](https://github.com/tobymao/sqlglot/commit/483770b816fab14b7eb7222974ed2c99045302a7) - Annotate type for snowflake TIME_SLICE function *(PR [#6098](https://github.com/tobymao/sqlglot/pull/6098) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TIME_SLICE function (#6098) - due to [`06f40f9`](https://github.com/tobymao/sqlglot/commit/06f40f900ce693ba4203514e422cba8cda0dbb07) - don't simplify x XOR x due to NULL semantics *(PR [#6115](https://github.com/tobymao/sqlglot/pull/6115) by [@geooo109](https://github.com/geooo109))*: don't simplify x XOR x due to NULL semantics (#6115) - due to [`c286cee`](https://github.com/tobymao/sqlglot/commit/c286cee54ab93e1fd0b3be658f7e767e3e00afe9) - Annotate type for snowflake MONTHNAME function *(PR [#6116](https://github.com/tobymao/sqlglot/pull/6116) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake MONTHNAME function (#6116) - due to [`1a34788`](https://github.com/tobymao/sqlglot/commit/1a34788025bdd8a018c4bb9214f72152e68bdd14) - Annotate type for snowflake PREVIOUS_DAY function *(PR [#6117](https://github.com/tobymao/sqlglot/pull/6117) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake PREVIOUS_DAY function (#6117) - due to [`533faf8`](https://github.com/tobymao/sqlglot/commit/533faf87b6df351070b565dd1fe9ce4e13b6c46e) - transpile duckdb `READ_PARQUET` to `parquet.` closes [#6122](https://github.com/tobymao/sqlglot/pull/6122) *(commit by [@georgesittas](https://github.com/georgesittas))*: transpile duckdb `READ_PARQUET` to `parquet.` closes #6122 - due to [`cd4e557`](https://github.com/tobymao/sqlglot/commit/cd4e557658b1384f36c9a1ef9da5a09b893229b1) - Annotate type for snowflake RANDOM function *(PR [#6124](https://github.com/tobymao/sqlglot/pull/6124) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: Annotate type for snowflake RANDOM function (#6124) - due to [`fe63d84`](https://github.com/tobymao/sqlglot/commit/fe63d84f1bd365b22221f348d79c0546aa3118b0) - annotate type for Snowflake MONTHS_BETWEEN function *(PR [#6120](https://github.com/tobymao/sqlglot/pull/6120) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))*: annotate type for Snowflake MONTHS_BETWEEN function (#6120) - due to [`598d09b`](https://github.com/tobymao/sqlglot/commit/598d09b036d938c90a44955d67175ea868090ba2) - annotate type for Snowflake DATEADD function *(PR [#6089](https://github.com/tobymao/sqlglot/pull/6089) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DATEADD function (#6089) - due to [`b98bcee`](https://github.com/tobymao/sqlglot/commit/b98bcee148ba426816e166dbfa9ba8e0979aae21) - Annotate type for snowflake next_day function *(PR [#6125](https://github.com/tobymao/sqlglot/pull/6125) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))*: Annotate type for snowflake next_day function (#6125) - due to [`e2129c6`](https://github.com/tobymao/sqlglot/commit/e2129c6766ca1f10ff6663bec98be984abb33c91) - Do not consider BIT_COUNT an aggregate function *(PR [#6135](https://github.com/tobymao/sqlglot/pull/6135) by [@VaggelisD](https://github.com/VaggelisD))*: Do not consider BIT_COUNT an aggregate function (#6135) - due to [`d136414`](https://github.com/tobymao/sqlglot/commit/d136414e520270ac9ab2fd8e9df4691d269b3af0) - avoid simplifying AND with NULL *(PR [#6148](https://github.com/tobymao/sqlglot/pull/6148) by [@geooo109](https://github.com/geooo109))*: avoid simplifying AND with NULL (#6148) - due to [`3a334f3`](https://github.com/tobymao/sqlglot/commit/3a334f376b9766b6b99fdf195ae763bb44976ec4) - annotate type for boolnot snowflake function *(PR [#6141](https://github.com/tobymao/sqlglot/pull/6141) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))*: annotate type for boolnot snowflake function (#6141) - due to [`99949cc`](https://github.com/tobymao/sqlglot/commit/99949ccd3ff81b524edeae437d874b86250dbb5b) - avoid needlessly copying in lineage *(PR [#6150](https://github.com/tobymao/sqlglot/pull/6150) by [@georgesittas](https://github.com/georgesittas))*: avoid needlessly copying in lineage (#6150) - due to [`4e36f9d`](https://github.com/tobymao/sqlglot/commit/4e36f9dd6a854b378c9bbf6b2e9811045affc63d) - Annotate type for snowflake TIMEADD function *(PR [#6134](https://github.com/tobymao/sqlglot/pull/6134) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TIMEADD function (#6134) ### :sparkles: New Features - [`c49ba0e`](https://github.com/tobymao/sqlglot/commit/c49ba0eee21f7776703d2a26c6641b4a32a1cff7) - **optimizer**: Annotate type for snowflake WIDTH_BUCKET function *(PR [#6078](https://github.com/tobymao/sqlglot/pull/6078) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fbc1f13`](https://github.com/tobymao/sqlglot/commit/fbc1f1335eecaaaab4fc93ddbb74611a4df0aea7) - **optimizer**: annotate type for Snowflake CONVERT_TIMEZONE function *(PR [#6076](https://github.com/tobymao/sqlglot/pull/6076) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`70e977c`](https://github.com/tobymao/sqlglot/commit/70e977c5edfb495529d38a9096cb40762a9b5d7b) - **optimizer**: annotate type for Snowflake DATE_TRUNC function *(PR [#6080](https://github.com/tobymao/sqlglot/pull/6080) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`e9cf146`](https://github.com/tobymao/sqlglot/commit/e9cf146a4a6cd78f6a59c195e7ec12240b836e5e) - **optimizer**: annotate type for Snowflake DATE_PART function *(PR [#6079](https://github.com/tobymao/sqlglot/pull/6079) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`cdf3b1b`](https://github.com/tobymao/sqlglot/commit/cdf3b1b34dc044064d0a5ba7ff22723b8ae33e5d) - **optimizer**: Annotate type for snowflake add_months function *(PR [#6097](https://github.com/tobymao/sqlglot/pull/6097) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`6fe5824`](https://github.com/tobymao/sqlglot/commit/6fe58247888c326093618657fb027e482d82d107) - **optimizer**: Annotate type for hour, minute, second functions *(PR [#6100](https://github.com/tobymao/sqlglot/pull/6100) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`483770b`](https://github.com/tobymao/sqlglot/commit/483770b816fab14b7eb7222974ed2c99045302a7) - **optimizer**: Annotate type for snowflake TIME_SLICE function *(PR [#6098](https://github.com/tobymao/sqlglot/pull/6098) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`071a995`](https://github.com/tobymao/sqlglot/commit/071a9954aad220c1e13ba7a6714a083058a8e03f) - **tsql**: add support for iso_week on DATEPART *(PR [#6111](https://github.com/tobymao/sqlglot/pull/6111) by [@lBilali](https://github.com/lBilali))* - :arrow_lower_right: *addresses issue [#6110](https://github.com/tobymao/sqlglot/issues/6110) opened by [@lBilali](https://github.com/lBilali)* - [`c286cee`](https://github.com/tobymao/sqlglot/commit/c286cee54ab93e1fd0b3be658f7e767e3e00afe9) - **optimizer**: Annotate type for snowflake MONTHNAME function *(PR [#6116](https://github.com/tobymao/sqlglot/pull/6116) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1a34788`](https://github.com/tobymao/sqlglot/commit/1a34788025bdd8a018c4bb9214f72152e68bdd14) - **optimizer**: Annotate type for snowflake PREVIOUS_DAY function *(PR [#6117](https://github.com/tobymao/sqlglot/pull/6117) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`533faf8`](https://github.com/tobymao/sqlglot/commit/533faf87b6df351070b565dd1fe9ce4e13b6c46e) - **spark**: transpile duckdb `READ_PARQUET` to `parquet.` closes [#6122](https://github.com/tobymao/sqlglot/pull/6122) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`cd4e557`](https://github.com/tobymao/sqlglot/commit/cd4e557658b1384f36c9a1ef9da5a09b893229b1) - **optimizer**: Annotate type for snowflake RANDOM function *(PR [#6124](https://github.com/tobymao/sqlglot/pull/6124) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`fe63d84`](https://github.com/tobymao/sqlglot/commit/fe63d84f1bd365b22221f348d79c0546aa3118b0) - **optimizer**: annotate type for Snowflake MONTHS_BETWEEN function *(PR [#6120](https://github.com/tobymao/sqlglot/pull/6120) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`7cb7598`](https://github.com/tobymao/sqlglot/commit/7cb7598e13260aa45c851dc620b4994ddfa089fe) - **optimizer**: Annotate type for snowflake TIME_FROM_PARTS function *(PR [#6119](https://github.com/tobymao/sqlglot/pull/6119) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`598d09b`](https://github.com/tobymao/sqlglot/commit/598d09b036d938c90a44955d67175ea868090ba2) - **optimizer**: annotate type for Snowflake DATEADD function *(PR [#6089](https://github.com/tobymao/sqlglot/pull/6089) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`b98bcee`](https://github.com/tobymao/sqlglot/commit/b98bcee148ba426816e166dbfa9ba8e0979aae21) - **optimizer**: Annotate type for snowflake next_day function *(PR [#6125](https://github.com/tobymao/sqlglot/pull/6125) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* - [`fe1927f`](https://github.com/tobymao/sqlglot/commit/fe1927f28600e2d8863a4e7f06e6a21bf6ff7f9c) - **duckdb**: Transpile unix_micros to epoch_us *(PR [#6127](https://github.com/tobymao/sqlglot/pull/6127) by [@vchan](https://github.com/vchan))* - [`a531f10`](https://github.com/tobymao/sqlglot/commit/a531f107235c29ac6a7e627a323f00b8ecf7023d) - **duckdb**: transpile TimeSub *(PR [#6142](https://github.com/tobymao/sqlglot/pull/6142) by [@toriwei](https://github.com/toriwei))* - [`b1a9dff`](https://github.com/tobymao/sqlglot/commit/b1a9dfff52a0ffbb0b7c8bfedb0a90e245b97851) - make qualify faster by owly resetting scope when needed *(PR [#6081](https://github.com/tobymao/sqlglot/pull/6081) by [@tobymao](https://github.com/tobymao))* - [`3a334f3`](https://github.com/tobymao/sqlglot/commit/3a334f376b9766b6b99fdf195ae763bb44976ec4) - **optimizer**: annotate type for boolnot snowflake function *(PR [#6141](https://github.com/tobymao/sqlglot/pull/6141) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`4e36f9d`](https://github.com/tobymao/sqlglot/commit/4e36f9dd6a854b378c9bbf6b2e9811045affc63d) - **optimizer**: Annotate type for snowflake TIMEADD function *(PR [#6134](https://github.com/tobymao/sqlglot/pull/6134) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ### :bug: Bug Fixes - [`2238ac2`](https://github.com/tobymao/sqlglot/commit/2238ac27478bd272ba39928bbec1075c4191ee1b) - **duckdb**: transpile timestamp literals in datediff fixes [#6083](https://github.com/tobymao/sqlglot/pull/6083) *(PR [#6086](https://github.com/tobymao/sqlglot/pull/6086) by [@georgesittas](https://github.com/georgesittas))* - [`bef541c`](https://github.com/tobymao/sqlglot/commit/bef541cec36f8c4295f815c3f5cd22491738901b) - **parser**: query mods and set ops in FROM-first syntax *(PR [#6092](https://github.com/tobymao/sqlglot/pull/6092) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6088](https://github.com/tobymao/sqlglot/issues/6088) opened by [@denis-komarov](https://github.com/denis-komarov)* - :arrow_lower_right: *fixes issue [#6091](https://github.com/tobymao/sqlglot/issues/6091) opened by [@denis-komarov](https://github.com/denis-komarov)* - :arrow_lower_right: *fixes issue [#6093](https://github.com/tobymao/sqlglot/issues/6093) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`5109890`](https://github.com/tobymao/sqlglot/commit/510989043d18baa17502a971262462814a2eb5be) - **parser**: VALUES with ORDER BY/LIMIT/OFFSET *(PR [#6094](https://github.com/tobymao/sqlglot/pull/6094) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6087](https://github.com/tobymao/sqlglot/issues/6087) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`4b062c8`](https://github.com/tobymao/sqlglot/commit/4b062c850bd9867be0d622f3f526762fa2b72302) - consume more syntax for cubes/rollups fixes [#6101](https://github.com/tobymao/sqlglot/pull/6101) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`f00866a`](https://github.com/tobymao/sqlglot/commit/f00866aeb8b7f51e27173c688225fe16d777eb1a) - **duckdb**: 1 arg FORMAT func *(PR [#6109](https://github.com/tobymao/sqlglot/pull/6109) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6108](https://github.com/tobymao/sqlglot/issues/6108) opened by [@erindru](https://github.com/erindru)* - [`77dfd5a`](https://github.com/tobymao/sqlglot/commit/77dfd5a41bb9ce5450e0f6b7a78c953c8ade14d5) - lineage does not modify sql input if expression *(PR [#6113](https://github.com/tobymao/sqlglot/pull/6113) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#6112](https://github.com/tobymao/sqlglot/issues/6112) opened by [@snovik75](https://github.com/snovik75)* - [`06f40f9`](https://github.com/tobymao/sqlglot/commit/06f40f900ce693ba4203514e422cba8cda0dbb07) - **optimizer**: don't simplify x XOR x due to NULL semantics *(PR [#6115](https://github.com/tobymao/sqlglot/pull/6115) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6104](https://github.com/tobymao/sqlglot/issues/6104) opened by [@dllggyx](https://github.com/dllggyx)* - [`03e2dff`](https://github.com/tobymao/sqlglot/commit/03e2dff9b074dc228cf3854ff1f4357e091aa9b3) - allow parsing `analyze` as an identifier fixes [#6123](https://github.com/tobymao/sqlglot/pull/6123) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8744431`](https://github.com/tobymao/sqlglot/commit/874443148c8ec2a773dfaca5da10d3587a49de3e) - transpile bigquery DATETIME_DIFF to duckdb *(PR [#6126](https://github.com/tobymao/sqlglot/pull/6126) by [@toriwei](https://github.com/toriwei))* - :arrow_lower_right: *fixes issue [#6107](https://github.com/tobymao/sqlglot/issues/6107) opened by [@izeigerman](https://github.com/izeigerman)* - [`b94e81b`](https://github.com/tobymao/sqlglot/commit/b94e81b42b89c75625b2da779c0f53777d9b6b48) - **optimizer**: avoid removing string literals from WHERE clause *(PR [#6131](https://github.com/tobymao/sqlglot/pull/6131) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6128](https://github.com/tobymao/sqlglot/issues/6128) opened by [@dllggyx](https://github.com/dllggyx)* - [`e2129c6`](https://github.com/tobymao/sqlglot/commit/e2129c6766ca1f10ff6663bec98be984abb33c91) - **optimizer**: Do not consider BIT_COUNT an aggregate function *(PR [#6135](https://github.com/tobymao/sqlglot/pull/6135) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6130](https://github.com/tobymao/sqlglot/issues/6130) opened by [@dllggyx](https://github.com/dllggyx)* - [`03bfeed`](https://github.com/tobymao/sqlglot/commit/03bfeed56c5c2f143ce2e1be38d519f902d19961) - **starrocks**: disable IS TRUE/FALSE syntax support *(PR [#6145](https://github.com/tobymao/sqlglot/pull/6145) by [@petrikoro](https://github.com/petrikoro))* - :arrow_lower_right: *fixes issue [#6144](https://github.com/tobymao/sqlglot/issues/6144) opened by [@petrikoro](https://github.com/petrikoro)* - [`d136414`](https://github.com/tobymao/sqlglot/commit/d136414e520270ac9ab2fd8e9df4691d269b3af0) - **optimizer**: avoid simplifying AND with NULL *(PR [#6148](https://github.com/tobymao/sqlglot/pull/6148) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6136](https://github.com/tobymao/sqlglot/issues/6136) opened by [@dllggyx](https://github.com/dllggyx)* - [`1fd9991`](https://github.com/tobymao/sqlglot/commit/1fd99911a60f0543fbc79221a8c6a6f232ed0a2a) - **clickhouse**: NOT + IN precedence *(PR [#6149](https://github.com/tobymao/sqlglot/pull/6149) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6143](https://github.com/tobymao/sqlglot/issues/6143) opened by [@mlipiev](https://github.com/mlipiev)* ### :recycle: Refactors - [`58dbce3`](https://github.com/tobymao/sqlglot/commit/58dbce30da5ab94af82247ab8a7eb85200d9b8af) - bq static type annotators *(PR [#6103](https://github.com/tobymao/sqlglot/pull/6103) by [@geooo109](https://github.com/geooo109))* - [`c970235`](https://github.com/tobymao/sqlglot/commit/c97023549623fe5974d6bff57e64339eff74187e) - clean up MONTHNAME test *(commit by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`d36ba87`](https://github.com/tobymao/sqlglot/commit/d36ba8774a2a4b53c122e3b78086ce0f09e77244) - **optimizer**: add tests for Snowflake DATE_FROM_PARTS function *(PR [#6077](https://github.com/tobymao/sqlglot/pull/6077) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`2bc05cf`](https://github.com/tobymao/sqlglot/commit/2bc05cf3bd53b874a1505c747e38f8a6a1dbf8c7) - **optimizer**: add tests for Snowflake DATEDIFF function *(PR [#6090](https://github.com/tobymao/sqlglot/pull/6090) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`a4d07a0`](https://github.com/tobymao/sqlglot/commit/a4d07a07eefbdaf88d30df2310a9533afdc75a82) - **optimizer**: Annotate type for snowflake EXTRACT function *(PR [#6099](https://github.com/tobymao/sqlglot/pull/6099) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ab1da2e`](https://github.com/tobymao/sqlglot/commit/ab1da2e54a83e29d708047d4b3f8abcc1094229d) - **optimizer**: add type annotation tests for snowflake LAST_DAY function *(PR [#6105](https://github.com/tobymao/sqlglot/pull/6105) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`4e24c0a`](https://github.com/tobymao/sqlglot/commit/4e24c0ad92e7071a1f1537886173e29999b46f72) - **optimizer**: add type annotation tests for snowflake TIMESTAMPDIFF function *(PR [#6138](https://github.com/tobymao/sqlglot/pull/6138) by [@fivetran-MichaelLee](https://github.com/fivetran-MichaelLee))* - [`ae8571f`](https://github.com/tobymao/sqlglot/commit/ae8571fdec71587188e45fe087e1967f5ba641bc) - **optimizer**: add type annotation tests for snowflake TIMEDIFF *(PR [#6140](https://github.com/tobymao/sqlglot/pull/6140) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`3059320`](https://github.com/tobymao/sqlglot/commit/30593202b30001933f05747937975013754b75fa) - copy by default in `lineage` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`99949cc`](https://github.com/tobymao/sqlglot/commit/99949ccd3ff81b524edeae437d874b86250dbb5b) - avoid needlessly copying in lineage *(PR [#6150](https://github.com/tobymao/sqlglot/pull/6150) by [@georgesittas](https://github.com/georgesittas))* - [`e7756d8`](https://github.com/tobymao/sqlglot/commit/e7756d8e9f347bfba3f861463890bf57e532cc54) - **optimizer**: add annotation tests for snowflake's BOOLXOR *(PR [#6154](https://github.com/tobymao/sqlglot/pull/6154) by [@fivetran-felixhuang](https://github.com/fivetran-felixhuang))* - [`72e43e3`](https://github.com/tobymao/sqlglot/commit/72e43e3ea08f9dce5a32654060a56f2ee31bea8f) - **optimizer**: add type annotation tests for snowflake's TIMESTAMPADD function *(PR [#6146](https://github.com/tobymao/sqlglot/pull/6146) by [@fivetran-ashashankar](https://github.com/fivetran-ashashankar))* ## [v27.27.0] - 2025-10-13 ### :boom: BREAKING CHANGES - due to [`c67276d`](https://github.com/tobymao/sqlglot/commit/c67276d5be970252e14d1817d8498fc9985222d9) - Annotate type for snowflake RADIANS function. *(PR [#6064](https://github.com/tobymao/sqlglot/pull/6064) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake RADIANS function. (#6064) ### :sparkles: New Features - [`c67276d`](https://github.com/tobymao/sqlglot/commit/c67276d5be970252e14d1817d8498fc9985222d9) - **optimizer**: Annotate type for snowflake RADIANS function. *(PR [#6064](https://github.com/tobymao/sqlglot/pull/6064) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ### :wrench: Chores - [`dab2a3f`](https://github.com/tobymao/sqlglot/commit/dab2a3fbdb8a523f05319eb34a1fd34534272206) - bump sqlglotrs version to 0.7.3 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.26.0] - 2025-10-10 ### :boom: BREAKING CHANGES - due to [`9060f60`](https://github.com/tobymao/sqlglot/commit/9060f603818db863b7570a2c3c50c3eb88155e76) - Annotate type for snowflake ATAN2 function. *(PR [#6060](https://github.com/tobymao/sqlglot/pull/6060) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake ATAN2 function. (#6060) - due to [`b3eb2e4`](https://github.com/tobymao/sqlglot/commit/b3eb2e4ca6177ee61b27675e8ec8b4815587df31) - annotate type for Snowflake SINH function *(PR [#6052](https://github.com/tobymao/sqlglot/pull/6052) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake SINH function (#6052) - due to [`157d2fa`](https://github.com/tobymao/sqlglot/commit/157d2fa06ab110ebc760aa7567d7fda801a5ced9) - annotate type for Snowflake CEIL function *(PR [#6051](https://github.com/tobymao/sqlglot/pull/6051) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake CEIL function (#6051) - due to [`e7833de`](https://github.com/tobymao/sqlglot/commit/e7833de9744a4aa69d244285e7f6f7281af178ba) - support DELETE with USING and multiple VALUES *(PR [#6072](https://github.com/tobymao/sqlglot/pull/6072) by [@geooo109](https://github.com/geooo109))*: support DELETE with USING and multiple VALUES (#6072) - due to [`354140d`](https://github.com/tobymao/sqlglot/commit/354140d0a279f317439bdb247e1ab9578f9a035d) - Annotate type for snowflake TANH and ATAN functions *(PR [#6069](https://github.com/tobymao/sqlglot/pull/6069) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TANH and ATAN functions (#6069) ### :sparkles: New Features - [`9060f60`](https://github.com/tobymao/sqlglot/commit/9060f603818db863b7570a2c3c50c3eb88155e76) - **optimizer**: Annotate type for snowflake ATAN2 function. *(PR [#6060](https://github.com/tobymao/sqlglot/pull/6060) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`b3eb2e4`](https://github.com/tobymao/sqlglot/commit/b3eb2e4ca6177ee61b27675e8ec8b4815587df31) - **optimizer**: annotate type for Snowflake SINH function *(PR [#6052](https://github.com/tobymao/sqlglot/pull/6052) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`440b960`](https://github.com/tobymao/sqlglot/commit/440b960529801674fa23708212485fda95749699) - **duckdb**: support `USING KEY (...)` in recursive DuckDB CTEs *(PR [#6068](https://github.com/tobymao/sqlglot/pull/6068) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6066](https://github.com/tobymao/sqlglot/issues/6066) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`157d2fa`](https://github.com/tobymao/sqlglot/commit/157d2fa06ab110ebc760aa7567d7fda801a5ced9) - **optimizer**: annotate type for Snowflake CEIL function *(PR [#6051](https://github.com/tobymao/sqlglot/pull/6051) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`eb6d6e7`](https://github.com/tobymao/sqlglot/commit/eb6d6e7ccde37456ab56ad976e7d95cea23c14e3) - **duckdb**: support `DEFAULT VALUES` clause in `INSERT` DML *(PR [#6067](https://github.com/tobymao/sqlglot/pull/6067) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6065](https://github.com/tobymao/sqlglot/issues/6065) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`354140d`](https://github.com/tobymao/sqlglot/commit/354140d0a279f317439bdb247e1ab9578f9a035d) - **optimizer**: Annotate type for snowflake TANH and ATAN functions *(PR [#6069](https://github.com/tobymao/sqlglot/pull/6069) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c94e3e0`](https://github.com/tobymao/sqlglot/commit/c94e3e0e4e20bd76d4cf630123d2c05a0e3044c3) - add ColumnDef expression parser *(PR [#6075](https://github.com/tobymao/sqlglot/pull/6075) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`2c7cc29`](https://github.com/tobymao/sqlglot/commit/2c7cc29a329dcbaaa90a6f857d2383d2967ea6cc) - **duckdb**: Transform exp.HexString to BLOB in hex notation *(PR [#6045](https://github.com/tobymao/sqlglot/pull/6045) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6035](https://github.com/tobymao/sqlglot/issues/6035) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`e7833de`](https://github.com/tobymao/sqlglot/commit/e7833de9744a4aa69d244285e7f6f7281af178ba) - **parser**: support DELETE with USING and multiple VALUES *(PR [#6072](https://github.com/tobymao/sqlglot/pull/6072) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6070](https://github.com/tobymao/sqlglot/issues/6070) opened by [@denis-komarov](https://github.com/denis-komarov)* ### :recycle: Refactors - [`2c9d15c`](https://github.com/tobymao/sqlglot/commit/2c9d15c92da25c8456b2463c69aa56c8ec47c453) - replace direct arg manipulation *(PR [#6073](https://github.com/tobymao/sqlglot/pull/6073) by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`75b8d16`](https://github.com/tobymao/sqlglot/commit/75b8d16e41b677ea7e150c89d713795073aae6e3) - remove docs from main branch *(PR [#6057](https://github.com/tobymao/sqlglot/pull/6057) by [@georgesittas](https://github.com/georgesittas))* - [`cfa2493`](https://github.com/tobymao/sqlglot/commit/cfa249328eef31ab0e0688dcc03521da3343ce47) - **optimizer**: Annotate type for snowflake SQUARE function *(PR [#6059](https://github.com/tobymao/sqlglot/pull/6059) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`e26c394`](https://github.com/tobymao/sqlglot/commit/e26c3949beb7f73020fcd099237dbe31a4db8d84) - **optimizer**: Annotate type for snowflake POW function *(PR [#6058](https://github.com/tobymao/sqlglot/pull/6058) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`7d303ad`](https://github.com/tobymao/sqlglot/commit/7d303adc5efe9d51eb62aeab80bfa4f844e1911d) - include Python 3.14 in the testing matrix *(PR [#6074](https://github.com/tobymao/sqlglot/pull/6074) by [@georgesittas](https://github.com/georgesittas))* ## [v27.25.0] - 2025-10-09 ### :boom: BREAKING CHANGES - due to [`6f31b86`](https://github.com/tobymao/sqlglot/commit/6f31b86599258afe156aa3d9ccc42389cac37021) - Annotate type for snowflake FLOOR function *(PR [#6030](https://github.com/tobymao/sqlglot/pull/6030) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake FLOOR function (#6030) - due to [`cecab2f`](https://github.com/tobymao/sqlglot/commit/cecab2fd66d578ddc765b5fd0e7b155971280a0c) - annotate type for Snowflake ATANH function *(PR [#6054](https://github.com/tobymao/sqlglot/pull/6054) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake ATANH function (#6054) - due to [`08339a9`](https://github.com/tobymao/sqlglot/commit/08339a902138211f67cfb009d2576b22ea8d8e42) - annotate type for Snowflake FACTORIAL function *(PR [#6053](https://github.com/tobymao/sqlglot/pull/6053) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake FACTORIAL function (#6053) ### :sparkles: New Features - [`6f31b86`](https://github.com/tobymao/sqlglot/commit/6f31b86599258afe156aa3d9ccc42389cac37021) - **optimizer**: Annotate type for snowflake FLOOR function *(PR [#6030](https://github.com/tobymao/sqlglot/pull/6030) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`b7463d5`](https://github.com/tobymao/sqlglot/commit/b7463d5b0a1e286498d7ccfd9a07ef7edfa80bb2) - **optimizer**: Annotate type for snowflake ASIN function. *(PR [#6049](https://github.com/tobymao/sqlglot/pull/6049) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fe959a5`](https://github.com/tobymao/sqlglot/commit/fe959a5598508526ed5910a4c62372116b5d3c30) - **optimizer**: Annotate type for snowflake CBRT function *(PR [#6050](https://github.com/tobymao/sqlglot/pull/6050) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`cecab2f`](https://github.com/tobymao/sqlglot/commit/cecab2fd66d578ddc765b5fd0e7b155971280a0c) - **optimizer**: annotate type for Snowflake ATANH function *(PR [#6054](https://github.com/tobymao/sqlglot/pull/6054) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`08339a9`](https://github.com/tobymao/sqlglot/commit/08339a902138211f67cfb009d2576b22ea8d8e42) - **optimizer**: annotate type for Snowflake FACTORIAL function *(PR [#6053](https://github.com/tobymao/sqlglot/pull/6053) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* ### :bug: Bug Fixes - [`3bb6bb3`](https://github.com/tobymao/sqlglot/commit/3bb6bb3e5193ed53c803c3786a1791f15cd2f89a) - **parser**: support :: cast operator after IS NULL/IS NOT NULL *(PR [#6056](https://github.com/tobymao/sqlglot/pull/6056) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6055](https://github.com/tobymao/sqlglot/issues/6055) opened by [@vchan](https://github.com/vchan)* ### :wrench: Chores - [`15030a3`](https://github.com/tobymao/sqlglot/commit/15030a3996d005d79f27408a68d17f94c98aec68) - **optimizer**: Add tests for snowflake LN and LOG functions *(PR [#6048](https://github.com/tobymao/sqlglot/pull/6048) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`2ae8dbd`](https://github.com/tobymao/sqlglot/commit/2ae8dbd4d1b43bb27647144c32b2a781ff3edbeb) - push docs to `api-docs` branch instead of main *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.22.2] - 2025-10-08 ### :wrench: Chores - [`9ab3a96`](https://github.com/tobymao/sqlglot/commit/9ab3a96a853639224c80a9daff4674187a1a84ef) - bump sqlglotrs to 0.7.2 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.22.1] - 2025-10-08 ### :boom: BREAKING CHANGES - due to [`7ac01c2`](https://github.com/tobymao/sqlglot/commit/7ac01c2ae9bc4375efb63c60e3221e85088fdd1f) - bump sqlglotrs to 0.7.1 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.7.1 ### :wrench: Chores - [`7ac01c2`](https://github.com/tobymao/sqlglot/commit/7ac01c2ae9bc4375efb63c60e3221e85088fdd1f) - bump sqlglotrs to 0.7.1 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.22.0] - 2025-10-08 ### :boom: BREAKING CHANGES - due to [`6beb917`](https://github.com/tobymao/sqlglot/commit/6beb9172dffd0aaea46b75477485060737e774b9) - Annotate type for snowflake ROUND function *(PR [#6032](https://github.com/tobymao/sqlglot/pull/6032) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake ROUND function (#6032) - due to [`0939d69`](https://github.com/tobymao/sqlglot/commit/0939d69223a860581b1c30cc2f762294946b93f3) - move odbc date literal handling in t-sql closes [#6037](https://github.com/tobymao/sqlglot/pull/6037) *(PR [#6044](https://github.com/tobymao/sqlglot/pull/6044) by [@georgesittas](https://github.com/georgesittas))*: move odbc date literal handling in t-sql closes #6037 (#6044) - due to [`56c8b3b`](https://github.com/tobymao/sqlglot/commit/56c8b3bbff7451b9049e1a168716bb41222a86ed) - Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark *(PR [#6004](https://github.com/tobymao/sqlglot/pull/6004) by [@tsamaras](https://github.com/tsamaras))*: Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark (#6004) ### :sparkles: New Features - [`6beb917`](https://github.com/tobymao/sqlglot/commit/6beb9172dffd0aaea46b75477485060737e774b9) - **optimizer**: Annotate type for snowflake ROUND function *(PR [#6032](https://github.com/tobymao/sqlglot/pull/6032) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8e03ad9`](https://github.com/tobymao/sqlglot/commit/8e03ad9dd087ebc72bf58cb6383607c0ce2e8f8f) - **optimizer**: Annotate type for snowflake MOD function *(PR [#6031](https://github.com/tobymao/sqlglot/pull/6031) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`15b3fac`](https://github.com/tobymao/sqlglot/commit/15b3fac3dd5efd4c347ac40055f07a9be5906802) - **mysql**: support `FOR ORDINALITY` clause in `COLUMN` expression *(PR [#6046](https://github.com/tobymao/sqlglot/pull/6046) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#6039](https://github.com/tobymao/sqlglot/issues/6039) opened by [@jdddog](https://github.com/jdddog)* - [`56c8b3b`](https://github.com/tobymao/sqlglot/commit/56c8b3bbff7451b9049e1a168716bb41222a86ed) - **hive,spark**: Support CHANGE COLUMN statements in Hive and CHANGE/ALTER COLUMN statements in Spark *(PR [#6004](https://github.com/tobymao/sqlglot/pull/6004) by [@tsamaras](https://github.com/tsamaras))* ### :bug: Bug Fixes - [`6a6ca92`](https://github.com/tobymao/sqlglot/commit/6a6ca927c4e6e06f5cb38ad1153a8b556999ef90) - **parser**: Allow nested GROUPING SETS *(PR [#6041](https://github.com/tobymao/sqlglot/pull/6041) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6038](https://github.com/tobymao/sqlglot/issues/6038) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`41baeaa`](https://github.com/tobymao/sqlglot/commit/41baeaa1530c5419c945409133e3b7caa5250ec7) - **optimizer**: more robust CROSS JOIN substitution and JOIN reordering *(PR [#6021](https://github.com/tobymao/sqlglot/pull/6021) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6009](https://github.com/tobymao/sqlglot/issues/6009) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`e2f299f`](https://github.com/tobymao/sqlglot/commit/e2f299f5ad18d75a394e55bd1ee59ed243d77e54) - allow subqueries to have modifiers closes [#6014](https://github.com/tobymao/sqlglot/pull/6014) *(PR [#6034](https://github.com/tobymao/sqlglot/pull/6034) by [@tobymao](https://github.com/tobymao))* - [`0d65266`](https://github.com/tobymao/sqlglot/commit/0d6526693f8e7dda9b7c180d31c364bde91afc72) - parse lambda for arg_min/max arguments closes [#6036](https://github.com/tobymao/sqlglot/pull/6036) *(PR [#6042](https://github.com/tobymao/sqlglot/pull/6042) by [@georgesittas](https://github.com/georgesittas))* - [`0939d69`](https://github.com/tobymao/sqlglot/commit/0939d69223a860581b1c30cc2f762294946b93f3) - move odbc date literal handling in t-sql closes [#6037](https://github.com/tobymao/sqlglot/pull/6037) *(PR [#6044](https://github.com/tobymao/sqlglot/pull/6044) by [@georgesittas](https://github.com/georgesittas))* - [`65848e5`](https://github.com/tobymao/sqlglot/commit/65848e5a3e4c1cb26e6ca4deb7819a282838c3c2) - **tsql**: UPDATE with OPTIONS *(PR [#6043](https://github.com/tobymao/sqlglot/pull/6043) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6033](https://github.com/tobymao/sqlglot/issues/6033) opened by [@ligfx](https://github.com/ligfx)* ### :recycle: Refactors - [`8f00c80`](https://github.com/tobymao/sqlglot/commit/8f00c804a67209a5eca1fcb28aeb95941c58e583) - _parse_in expr len check *(commit by [@geooo109](https://github.com/geooo109))* ## [v27.21.0] - 2025-10-07 ### :boom: BREAKING CHANGES - due to [`3c7b5c0`](https://github.com/tobymao/sqlglot/commit/3c7b5c0e2dc071b7b9f6da308ba58a3a43da93dc) - Annotate type for snowflake SOUNDEX_P123 function *(PR [#5987](https://github.com/tobymao/sqlglot/pull/5987) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake SOUNDEX_P123 function (#5987) - due to [`f25e42e`](https://github.com/tobymao/sqlglot/commit/f25e42e3f5b3b7b671bd724ba7b09a9b07d13995) - annotate type for Snowflake REGEXP_INSTR function *(PR [#5978](https://github.com/tobymao/sqlglot/pull/5978) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_INSTR function (#5978) - due to [`13cb26e`](https://github.com/tobymao/sqlglot/commit/13cb26e2f29373538d60a8124ddebf95fd22a8d8) - annotate type for Snowflake REGEXP_SUBSTR_ALL function *(PR [#5979](https://github.com/tobymao/sqlglot/pull/5979) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_SUBSTR_ALL function (#5979) - due to [`4ce683e`](https://github.com/tobymao/sqlglot/commit/4ce683eb8ac5716a334cbd7625438b9f89623c7a) - Annotate type for snowflake UNICODE function *(PR [#5993](https://github.com/tobymao/sqlglot/pull/5993) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake UNICODE function (#5993) - due to [`c7657fb`](https://github.com/tobymao/sqlglot/commit/c7657fbd27a4350c424ef65947471ab9ec086831) - remove `unalias_group_by` transformation since it is unsafe *(PR [#5997](https://github.com/tobymao/sqlglot/pull/5997) by [@georgesittas](https://github.com/georgesittas))*: remove `unalias_group_by` transformation since it is unsafe (#5997) - due to [`587196c`](https://github.com/tobymao/sqlglot/commit/587196c9c2d122f73f9deb7e87c2831f27f6ed02) - Annotate type for snowflake STRTOK_TO_ARRAY function *(PR [#5994](https://github.com/tobymao/sqlglot/pull/5994) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake STRTOK_TO_ARRAY function (#5994) - due to [`bced710`](https://github.com/tobymao/sqlglot/commit/bced71084ffb3a8f7a11db843777f05b68f367da) - Annotate type for snowflake STRTOK function. *(PR [#5991](https://github.com/tobymao/sqlglot/pull/5991) by [@georgesittas](https://github.com/georgesittas))*: Annotate type for snowflake STRTOK function. (#5991) - due to [`be1cdc8`](https://github.com/tobymao/sqlglot/commit/be1cdc81b511d462b710b50941d5c2770d901e91) - Fix roundtrip of ~ operator *(PR [#6017](https://github.com/tobymao/sqlglot/pull/6017) by [@VaggelisD](https://github.com/VaggelisD))*: Fix roundtrip of ~ operator (#6017) - due to [`74a13f2`](https://github.com/tobymao/sqlglot/commit/74a13f2a548b9cd41061e835cb3cd9dd2a5a9fb3) - Annotate type for snowflake DIV0 and DIVNULL functions *(PR [#6008](https://github.com/tobymao/sqlglot/pull/6008) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake DIV0 and DIVNULL functions (#6008) - due to [`fec2b31`](https://github.com/tobymao/sqlglot/commit/fec2b31956f2debdad7c53744a577894cd8d747c) - Annotate type for snowflake SEARCH function *(PR [#5985](https://github.com/tobymao/sqlglot/pull/5985) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake SEARCH function (#5985) - due to [`27a76cd`](https://github.com/tobymao/sqlglot/commit/27a76cdfe4212f16f945521eb3997580eacf1d61) - Annotate type for snowflake COT, SIN and TAN functions *(PR [#6022](https://github.com/tobymao/sqlglot/pull/6022) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake COT, SIN and TAN functions (#6022) - due to [`0911276`](https://github.com/tobymao/sqlglot/commit/091127663ab4cb94b02be5aa40c6a46dd7f89243) - annotate type for Snowflake EXP function *(PR [#6007](https://github.com/tobymao/sqlglot/pull/6007) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake EXP function (#6007) - due to [`a96d50e`](https://github.com/tobymao/sqlglot/commit/a96d50e14bed5e87ff2dce9c545e0c48897b64d6) - annotate type for Snowflake COSH function *(PR [#6006](https://github.com/tobymao/sqlglot/pull/6006) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COSH function (#6006) - due to [`4df58e0`](https://github.com/tobymao/sqlglot/commit/4df58e0f0b8985590fb29a8ab6ba0ced987ac5b9) - annotate type for Snowflake DEGREES function *(PR [#6027](https://github.com/tobymao/sqlglot/pull/6027) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake DEGREES function (#6027) - due to [`db71a20`](https://github.com/tobymao/sqlglot/commit/db71a2023aaeca2ffda782ae7b91fdee356c402e) - annotate type for Snowflake COS function *(PR [#6028](https://github.com/tobymao/sqlglot/pull/6028) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COS function (#6028) - due to [`5dd2ed3`](https://github.com/tobymao/sqlglot/commit/5dd2ed3c69cf9e8c3e327297e0cc932f0954e108) - bump sqlglotrs to 0.7.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.7.0 ### :sparkles: New Features - [`3c7b5c0`](https://github.com/tobymao/sqlglot/commit/3c7b5c0e2dc071b7b9f6da308ba58a3a43da93dc) - **optimizer**: Annotate type for snowflake SOUNDEX_P123 function *(PR [#5987](https://github.com/tobymao/sqlglot/pull/5987) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`475c09b`](https://github.com/tobymao/sqlglot/commit/475c09bd27179db4d186638645698dd4ad6553cd) - **optimizer**: Annotate type for snowflake TRANSLATE function *(PR [#5992](https://github.com/tobymao/sqlglot/pull/5992) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`f25e42e`](https://github.com/tobymao/sqlglot/commit/f25e42e3f5b3b7b671bd724ba7b09a9b07d13995) - **optimizer**: annotate type for Snowflake REGEXP_INSTR function *(PR [#5978](https://github.com/tobymao/sqlglot/pull/5978) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`13cb26e`](https://github.com/tobymao/sqlglot/commit/13cb26e2f29373538d60a8124ddebf95fd22a8d8) - **optimizer**: annotate type for Snowflake REGEXP_SUBSTR_ALL function *(PR [#5979](https://github.com/tobymao/sqlglot/pull/5979) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`4ce683e`](https://github.com/tobymao/sqlglot/commit/4ce683eb8ac5716a334cbd7625438b9f89623c7a) - **optimizer**: Annotate type for snowflake UNICODE function *(PR [#5993](https://github.com/tobymao/sqlglot/pull/5993) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`587196c`](https://github.com/tobymao/sqlglot/commit/587196c9c2d122f73f9deb7e87c2831f27f6ed02) - **optimizer**: Annotate type for snowflake STRTOK_TO_ARRAY function *(PR [#5994](https://github.com/tobymao/sqlglot/pull/5994) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`bced710`](https://github.com/tobymao/sqlglot/commit/bced71084ffb3a8f7a11db843777f05b68f367da) - **optimizer**: Annotate type for snowflake STRTOK function. *(PR [#5991](https://github.com/tobymao/sqlglot/pull/5991) by [@georgesittas](https://github.com/georgesittas))* - [`74a13f2`](https://github.com/tobymao/sqlglot/commit/74a13f2a548b9cd41061e835cb3cd9dd2a5a9fb3) - **optimizer**: Annotate type for snowflake DIV0 and DIVNULL functions *(PR [#6008](https://github.com/tobymao/sqlglot/pull/6008) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fec2b31`](https://github.com/tobymao/sqlglot/commit/fec2b31956f2debdad7c53744a577894cd8d747c) - **optimizer**: Annotate type for snowflake SEARCH function *(PR [#5985](https://github.com/tobymao/sqlglot/pull/5985) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`27a76cd`](https://github.com/tobymao/sqlglot/commit/27a76cdfe4212f16f945521eb3997580eacf1d61) - **optimizer**: Annotate type for snowflake COT, SIN and TAN functions *(PR [#6022](https://github.com/tobymao/sqlglot/pull/6022) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`8b48f7b`](https://github.com/tobymao/sqlglot/commit/8b48f7b985342cfcc45bc2b94540a1a2bf5995c4) - **optimizer**: Annotate type for snowflake SIGN and ABS functions *(PR [#6025](https://github.com/tobymao/sqlglot/pull/6025) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`0911276`](https://github.com/tobymao/sqlglot/commit/091127663ab4cb94b02be5aa40c6a46dd7f89243) - **optimizer**: annotate type for Snowflake EXP function *(PR [#6007](https://github.com/tobymao/sqlglot/pull/6007) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`a96d50e`](https://github.com/tobymao/sqlglot/commit/a96d50e14bed5e87ff2dce9c545e0c48897b64d6) - **optimizer**: annotate type for Snowflake COSH function *(PR [#6006](https://github.com/tobymao/sqlglot/pull/6006) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`4df58e0`](https://github.com/tobymao/sqlglot/commit/4df58e0f0b8985590fb29a8ab6ba0ced987ac5b9) - **optimizer**: annotate type for Snowflake DEGREES function *(PR [#6027](https://github.com/tobymao/sqlglot/pull/6027) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`db71a20`](https://github.com/tobymao/sqlglot/commit/db71a2023aaeca2ffda782ae7b91fdee356c402e) - **optimizer**: annotate type for Snowflake COS function *(PR [#6028](https://github.com/tobymao/sqlglot/pull/6028) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* ### :bug: Bug Fixes - [`51b1bb1`](https://github.com/tobymao/sqlglot/commit/51b1bb178fa952edc13b2cbc6f624d30b0bde798) - move `WATERMARK` logic to risingwave fixes [#5989](https://github.com/tobymao/sqlglot/pull/5989) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`033ddf0`](https://github.com/tobymao/sqlglot/commit/033ddf04da895f1f5d38aff5361b2ae0793fefea) - **optimizer**: convert INNER JOINs to LEFT JOINs when merging LEFT JOIN subqueries *(PR [#5980](https://github.com/tobymao/sqlglot/pull/5980) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5969](https://github.com/tobymao/sqlglot/issues/5969) opened by [@karta0807913](https://github.com/karta0807913)* - [`c7657fb`](https://github.com/tobymao/sqlglot/commit/c7657fbd27a4350c424ef65947471ab9ec086831) - remove `unalias_group_by` transformation since it is unsafe *(PR [#5997](https://github.com/tobymao/sqlglot/pull/5997) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5995](https://github.com/tobymao/sqlglot/issues/5995) opened by [@capricornsky0119](https://github.com/capricornsky0119)* - [`b6f9694`](https://github.com/tobymao/sqlglot/commit/b6f9694c535cdd1403a63036cc246fda4e6d4d22) - **optimizer**: avoid merging subquery with JOIN when outer query uses JOIN *(PR [#5999](https://github.com/tobymao/sqlglot/pull/5999) by [@geooo109](https://github.com/geooo109))* - [`23fd7b9`](https://github.com/tobymao/sqlglot/commit/23fd7b9116541b96e5d89389e862c6004e92d109) - respect multi-part Column units instead of converting to Var *(PR [#6005](https://github.com/tobymao/sqlglot/pull/6005) by [@georgesittas](https://github.com/georgesittas))* - [`be1cdc8`](https://github.com/tobymao/sqlglot/commit/be1cdc81b511d462b710b50941d5c2770d901e91) - **duckdb**: Fix roundtrip of ~ operator *(PR [#6017](https://github.com/tobymao/sqlglot/pull/6017) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6016](https://github.com/tobymao/sqlglot/issues/6016) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`27c278f`](https://github.com/tobymao/sqlglot/commit/27c278f562f5ce98a1a4d31f8e66f148a1f42236) - **parser**: Allow LIMIT with % percentage *(PR [#6019](https://github.com/tobymao/sqlglot/pull/6019) by [@VaggelisD](https://github.com/VaggelisD))* - [`39bf3f8`](https://github.com/tobymao/sqlglot/commit/39bf3f893389663796cdd799ef0f1e684f315a01) - **parser**: Allow CUBE & ROLLUP inside GROUPING SETS *(PR [#6018](https://github.com/tobymao/sqlglot/pull/6018) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6015](https://github.com/tobymao/sqlglot/issues/6015) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`ba7ad34`](https://github.com/tobymao/sqlglot/commit/ba7ad341d5ee1298b8fe54be11ca6252c1a44c99) - **duckdb**: Parse ROW type as STRUCT *(PR [#6020](https://github.com/tobymao/sqlglot/pull/6020) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#6012](https://github.com/tobymao/sqlglot/issues/6012) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`718d6bb`](https://github.com/tobymao/sqlglot/commit/718d6bbf7f40e5b3e99563e2f1ac9eadeff57c3d) - handle unicode heredoc tags & Rust grapheme clusters properly *(PR [#6024](https://github.com/tobymao/sqlglot/pull/6024) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#6010](https://github.com/tobymao/sqlglot/issues/6010) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`c8cfb9d`](https://github.com/tobymao/sqlglot/commit/c8cfb9db2e789be2dc7f8a154082a9210b736502) - **snowflake**: transpile ARRAY_CONTAINS with VARIANT CAST *(PR [#6029](https://github.com/tobymao/sqlglot/pull/6029) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#6026](https://github.com/tobymao/sqlglot/issues/6026) opened by [@Birkman](https://github.com/Birkman)* ### :wrench: Chores - [`1b1c6f8`](https://github.com/tobymao/sqlglot/commit/1b1c6f8d418371d49f0d3511baf3c5e35dd3ef42) - coerce type for EXTRACT canonicalization *(PR [#5998](https://github.com/tobymao/sqlglot/pull/5998) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5996](https://github.com/tobymao/sqlglot/issues/5996) opened by [@snovik75](https://github.com/snovik75)* - [`f00ae73`](https://github.com/tobymao/sqlglot/commit/f00ae735c8f185b4c6c132373c9fa9bbe58e37b7) - **optimizer**: Annotate type for sqrt function *(PR [#6003](https://github.com/tobymao/sqlglot/pull/6003) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ac97f14`](https://github.com/tobymao/sqlglot/commit/ac97f14ee1a576a276018f6c9ae1237ecf9ceda7) - simplify `SEARCH` Snowflake instantiation *(commit by [@georgesittas](https://github.com/georgesittas))* - [`5dd2ed3`](https://github.com/tobymao/sqlglot/commit/5dd2ed3c69cf9e8c3e327297e0cc932f0954e108) - bump sqlglotrs to 0.7.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.20.0] - 2025-09-30 ### :boom: BREAKING CHANGES - due to [`13a30df`](https://github.com/tobymao/sqlglot/commit/13a30dfa37096df5bfc2c31538325c40a49f7917) - Annotate type for snowflake TRY_BASE64_DECODE_BINARY function *(PR [#5972](https://github.com/tobymao/sqlglot/pull/5972) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_BASE64_DECODE_BINARY function (#5972) - due to [`1f5fdd7`](https://github.com/tobymao/sqlglot/commit/1f5fdd799c047de167a4572f7ac26b7ad92167f2) - Annotate type for snowflake TRY_BASE64_DECODE_STRING function *(PR [#5974](https://github.com/tobymao/sqlglot/pull/5974) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_BASE64_DECODE_STRING function (#5974) - due to [`324e82f`](https://github.com/tobymao/sqlglot/commit/324e82fe1fb11722f91341010602a743b151e055) - Annotate type for snowflake TRY_HEX_DECODE_BINARY function *(PR [#5975](https://github.com/tobymao/sqlglot/pull/5975) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_HEX_DECODE_BINARY function (#5975) - due to [`6caf99d`](https://github.com/tobymao/sqlglot/commit/6caf99d556a3357ffaa6c294a9babcd30dd5fac5) - Annotate type for snowflake TRY_HEX_DECODE_STRING function *(PR [#5976](https://github.com/tobymao/sqlglot/pull/5976) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake TRY_HEX_DECODE_STRING function (#5976) - due to [`73186a8`](https://github.com/tobymao/sqlglot/commit/73186a812ce422c108ee81b3de11da6ee9a9e902) - annotate type for Snowflake REGEXP_COUNT function *(PR [#5963](https://github.com/tobymao/sqlglot/pull/5963) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_COUNT function (#5963) - due to [`c3bdb3c`](https://github.com/tobymao/sqlglot/commit/c3bdb3cd1af1809ed82be0ae40744d9fffc8ce18) - array start index is 1, support array_flatten, fixes [#5983](https://github.com/tobymao/sqlglot/pull/5983) *(commit by [@georgesittas](https://github.com/georgesittas))*: array start index is 1, support array_flatten, fixes #5983 - due to [`244fb48`](https://github.com/tobymao/sqlglot/commit/244fb48fc9c4776f427c08b825d139b1c172fd26) - annotate type for Snowflake SPLIT_PART function *(PR [#5988](https://github.com/tobymao/sqlglot/pull/5988) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake SPLIT_PART function (#5988) - due to [`0d772e0`](https://github.com/tobymao/sqlglot/commit/0d772e0b9d687b24d49203c05d7a90cc1dce02d5) - add ast node for `DIRECTORY` source *(PR [#5990](https://github.com/tobymao/sqlglot/pull/5990) by [@georgesittas](https://github.com/georgesittas))*: add ast node for `DIRECTORY` source (#5990) ### :sparkles: New Features - [`13a30df`](https://github.com/tobymao/sqlglot/commit/13a30dfa37096df5bfc2c31538325c40a49f7917) - **optimizer**: Annotate type for snowflake TRY_BASE64_DECODE_BINARY function *(PR [#5972](https://github.com/tobymao/sqlglot/pull/5972) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1f5fdd7`](https://github.com/tobymao/sqlglot/commit/1f5fdd799c047de167a4572f7ac26b7ad92167f2) - **optimizer**: Annotate type for snowflake TRY_BASE64_DECODE_STRING function *(PR [#5974](https://github.com/tobymao/sqlglot/pull/5974) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`324e82f`](https://github.com/tobymao/sqlglot/commit/324e82fe1fb11722f91341010602a743b151e055) - **optimizer**: Annotate type for snowflake TRY_HEX_DECODE_BINARY function *(PR [#5975](https://github.com/tobymao/sqlglot/pull/5975) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`6caf99d`](https://github.com/tobymao/sqlglot/commit/6caf99d556a3357ffaa6c294a9babcd30dd5fac5) - **optimizer**: Annotate type for snowflake TRY_HEX_DECODE_STRING function *(PR [#5976](https://github.com/tobymao/sqlglot/pull/5976) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`73186a8`](https://github.com/tobymao/sqlglot/commit/73186a812ce422c108ee81b3de11da6ee9a9e902) - **optimizer**: annotate type for Snowflake REGEXP_COUNT function *(PR [#5963](https://github.com/tobymao/sqlglot/pull/5963) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`6124de7`](https://github.com/tobymao/sqlglot/commit/6124de76fa6d6725e844cd37e09ebfe99469b0ec) - **optimizer**: Annotate type for snowflake SOUNDEX function *(PR [#5986](https://github.com/tobymao/sqlglot/pull/5986) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`244fb48`](https://github.com/tobymao/sqlglot/commit/244fb48fc9c4776f427c08b825d139b1c172fd26) - **optimizer**: annotate type for Snowflake SPLIT_PART function *(PR [#5988](https://github.com/tobymao/sqlglot/pull/5988) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`0d772e0`](https://github.com/tobymao/sqlglot/commit/0d772e0b9d687b24d49203c05d7a90cc1dce02d5) - **snowflake**: add ast node for `DIRECTORY` source *(PR [#5990](https://github.com/tobymao/sqlglot/pull/5990) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`7a3744f`](https://github.com/tobymao/sqlglot/commit/7a3744f203b93211e5dd97e6730b6bf59d6d96e0) - **sqlite**: support `RANGE CURRENT ROW` in window spec *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c3bdb3c`](https://github.com/tobymao/sqlglot/commit/c3bdb3cd1af1809ed82be0ae40744d9fffc8ce18) - **starrocks**: array start index is 1, support array_flatten, fixes [#5983](https://github.com/tobymao/sqlglot/pull/5983) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`d425ba2`](https://github.com/tobymao/sqlglot/commit/d425ba26b96b368801f8f486fa375cd75105993d) - make hash and eq non recursive *(PR [#5966](https://github.com/tobymao/sqlglot/pull/5966) by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`345c6a1`](https://github.com/tobymao/sqlglot/commit/345c6a153481a22d6df1b12ef1863e2133688fdf) - add uv support to Makefile *(PR [#5973](https://github.com/tobymao/sqlglot/pull/5973) by [@eakmanrq](https://github.com/eakmanrq))* ## [v27.19.0] - 2025-09-26 ### :boom: BREAKING CHANGES - due to [`68473ac`](https://github.com/tobymao/sqlglot/commit/68473ac3ec8dc76512dc76819892a1b0324c7ddc) - Annotate type for snowflake PARSE_URL function *(PR [#5962](https://github.com/tobymao/sqlglot/pull/5962) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake PARSE_URL function (#5962) - due to [`b015a9d`](https://github.com/tobymao/sqlglot/commit/b015a9d944d0a87069a7750ad74953c399d7da34) - annotate type for Snowflake REGEXP_INSTR function *(commit by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake REGEXP_INSTR function - due to [`1f29ba7`](https://github.com/tobymao/sqlglot/commit/1f29ba710f4213beb1a2f993244d7d824f3536ce) - annotate type for Snowflake PARSE_IP function *(PR [#5961](https://github.com/tobymao/sqlglot/pull/5961) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake PARSE_IP function (#5961) - due to [`bf45d5d`](https://github.com/tobymao/sqlglot/commit/bf45d5d3cb0c0f380824019eb32ec29049268a61) - annotate types for Snowflake RTRIMMED_LENGTH function *(PR [#5968](https://github.com/tobymao/sqlglot/pull/5968) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake RTRIMMED_LENGTH function (#5968) - due to [`13caa69`](https://github.com/tobymao/sqlglot/commit/13caa6991f003ad7abb590073451e591b6fd888c) - Annotate type for snowflake POSITION function *(PR [#5964](https://github.com/tobymao/sqlglot/pull/5964) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake POSITION function (#5964) ### :sparkles: New Features - [`88e4e4c`](https://github.com/tobymao/sqlglot/commit/88e4e4c55f3a113127eb3c82c0be46c29bcf15ab) - **optimizer**: Annotate type for OCTET_LENGTH function *(PR [#5960](https://github.com/tobymao/sqlglot/pull/5960) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`68473ac`](https://github.com/tobymao/sqlglot/commit/68473ac3ec8dc76512dc76819892a1b0324c7ddc) - **optimizer**: Annotate type for snowflake PARSE_URL function *(PR [#5962](https://github.com/tobymao/sqlglot/pull/5962) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`b015a9d`](https://github.com/tobymao/sqlglot/commit/b015a9d944d0a87069a7750ad74953c399d7da34) - **optimizer**: annotate type for Snowflake REGEXP_INSTR function *(commit by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`1f29ba7`](https://github.com/tobymao/sqlglot/commit/1f29ba710f4213beb1a2f993244d7d824f3536ce) - **optimizer**: annotate type for Snowflake PARSE_IP function *(PR [#5961](https://github.com/tobymao/sqlglot/pull/5961) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`bf45d5d`](https://github.com/tobymao/sqlglot/commit/bf45d5d3cb0c0f380824019eb32ec29049268a61) - **optimizer**: annotate types for Snowflake RTRIMMED_LENGTH function *(PR [#5968](https://github.com/tobymao/sqlglot/pull/5968) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`13caa69`](https://github.com/tobymao/sqlglot/commit/13caa6991f003ad7abb590073451e591b6fd888c) - **optimizer**: Annotate type for snowflake POSITION function *(PR [#5964](https://github.com/tobymao/sqlglot/pull/5964) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`1471306`](https://github.com/tobymao/sqlglot/commit/1471306ed317830c294e3654075f55424d14bf5a) - support parse into grant principal and privilege *(PR [#5971](https://github.com/tobymao/sqlglot/pull/5971) by [@eakmanrq](https://github.com/eakmanrq))* ### :bug: Bug Fixes - [`5432976`](https://github.com/tobymao/sqlglot/commit/543297680755344185e0f306843bc4909f4f75ed) - **bigquery**: allow GRANT as an id var *(PR [#5965](https://github.com/tobymao/sqlglot/pull/5965) by [@treysp](https://github.com/treysp))* ### :wrench: Chores - [`1514bc6`](https://github.com/tobymao/sqlglot/commit/1514bc640ec129a96aedd9e89bfd5d61e832d6b1) - **optimizer**: add type inference tests for Snowflake RPAD function *(PR [#5967](https://github.com/tobymao/sqlglot/pull/5967) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`050b89d`](https://github.com/tobymao/sqlglot/commit/050b89deb9be842f2ddd07c78ea201ec4eae4779) - **optimizer**: Annotate type for snowflake regexp function *(PR [#5970](https://github.com/tobymao/sqlglot/pull/5970) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ## [v27.18.0] - 2025-09-25 ### :boom: BREAKING CHANGES - due to [`7f13eaf`](https://github.com/tobymao/sqlglot/commit/7f13eaf7769a3381a56c9209af590835be2f95cd) - Annotate type for snowflake DECOMPRESS_BINARY function *(PR [#5945](https://github.com/tobymao/sqlglot/pull/5945) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake DECOMPRESS_BINARY function (#5945) - due to [`be12b29`](https://github.com/tobymao/sqlglot/commit/be12b29b5a7bd6d6e09dbd8c17086bd77c19abc0) - Annotate type for snowflake DECOMPRESS_STRING function *(PR [#5947](https://github.com/tobymao/sqlglot/pull/5947) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake DECOMPRESS_STRING function (#5947) - due to [`1573fef`](https://github.com/tobymao/sqlglot/commit/1573fefac27b5b1215e3d458f8ccf1b9dadbb772) - annotate types for Snowflake JAROWINKLER_SIMILARITY function *(PR [#5950](https://github.com/tobymao/sqlglot/pull/5950) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake JAROWINKLER_SIMILARITY function (#5950) - due to [`883c6ab`](https://github.com/tobymao/sqlglot/commit/883c6abe589865f478d95604e8d670e57afd04af) - annotate type for Snowflake COLLATION function *(PR [#5939](https://github.com/tobymao/sqlglot/pull/5939) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COLLATION function (#5939) ### :sparkles: New Features - [`7f13eaf`](https://github.com/tobymao/sqlglot/commit/7f13eaf7769a3381a56c9209af590835be2f95cd) - **optimizer**: Annotate type for snowflake DECOMPRESS_BINARY function *(PR [#5945](https://github.com/tobymao/sqlglot/pull/5945) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`be12b29`](https://github.com/tobymao/sqlglot/commit/be12b29b5a7bd6d6e09dbd8c17086bd77c19abc0) - **optimizer**: Annotate type for snowflake DECOMPRESS_STRING function *(PR [#5947](https://github.com/tobymao/sqlglot/pull/5947) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`a55fce5`](https://github.com/tobymao/sqlglot/commit/a55fce5310a50af132c5d06bb299fe3f025442c4) - **optimizer**: Annotate type for snowflake LPAD function *(PR [#5948](https://github.com/tobymao/sqlglot/pull/5948) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`05e07aa`](https://github.com/tobymao/sqlglot/commit/05e07aa740d7977a6b42ec15ae4fa9c2168a15f5) - **optimizer**: annotate type for Snowflake INSERT function *(PR [#5942](https://github.com/tobymao/sqlglot/pull/5942) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`6268e10`](https://github.com/tobymao/sqlglot/commit/6268e107a947badaa00508544f5389412806ecd0) - **solr**: initial dialect implementation *(PR [#5946](https://github.com/tobymao/sqlglot/pull/5946) by [@aadel](https://github.com/aadel))* - [`1573fef`](https://github.com/tobymao/sqlglot/commit/1573fefac27b5b1215e3d458f8ccf1b9dadbb772) - **optimizer**: annotate types for Snowflake JAROWINKLER_SIMILARITY function *(PR [#5950](https://github.com/tobymao/sqlglot/pull/5950) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`883c6ab`](https://github.com/tobymao/sqlglot/commit/883c6abe589865f478d95604e8d670e57afd04af) - **optimizer**: annotate type for Snowflake COLLATION function *(PR [#5939](https://github.com/tobymao/sqlglot/pull/5939) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`627c18d`](https://github.com/tobymao/sqlglot/commit/627c18d7da6bf644bc14c0f17963dea0be20604a) - **mysql**: add valid INTERVAL units *(PR [#5951](https://github.com/tobymao/sqlglot/pull/5951) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`3846d4d`](https://github.com/tobymao/sqlglot/commit/3846d4dcdf8cbf8e90b2661083a567ab0547ad3c) - **solr**: properly support OR alternative operator *(commit by [@georgesittas](https://github.com/georgesittas))* - [`df428d5`](https://github.com/tobymao/sqlglot/commit/df428d516113a47ae50d04cd50a250830589c072) - **parser**: interval identifier followed by END *(PR [#5944](https://github.com/tobymao/sqlglot/pull/5944) by [@geooo109](https://github.com/geooo109))* - [`e178d16`](https://github.com/tobymao/sqlglot/commit/e178d1674a71e6f35a6acfa8f4a317f0fe2e4516) - **duckdb**: UNNEST as table *(PR [#5953](https://github.com/tobymao/sqlglot/pull/5953) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5952](https://github.com/tobymao/sqlglot/issues/5952) opened by [@denis-komarov](https://github.com/denis-komarov)* - [`24feb8e`](https://github.com/tobymao/sqlglot/commit/24feb8ee0bc43f3f14fd768c9a0d986355becea2) - **parser**: parse `UPDATE` clauses in any order *(PR [#5958](https://github.com/tobymao/sqlglot/pull/5958) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5956](https://github.com/tobymao/sqlglot/issues/5956) opened by [@sfc-gh-clathrope](https://github.com/sfc-gh-clathrope)* - [`980f99a`](https://github.com/tobymao/sqlglot/commit/980f99a4cc0613012a189ee5636af37ec736040c) - **snowflake**: properly generate inferred `STRUCT` data types *(PR [#5954](https://github.com/tobymao/sqlglot/pull/5954) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`c18aaf8`](https://github.com/tobymao/sqlglot/commit/c18aaf80fd7375e89dfc8863da619d84f3257353) - cleanup *(commit by [@tobymao](https://github.com/tobymao))* ## [v27.17.0] - 2025-09-23 ### :boom: BREAKING CHANGES - due to [`f4ad258`](https://github.com/tobymao/sqlglot/commit/f4ad25882951de4e4442dfd5189a56d5a1c5e630) - Annotate types for Snowflake BASE64_DECODE_BINARY function *(PR [#5917](https://github.com/tobymao/sqlglot/pull/5917) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate types for Snowflake BASE64_DECODE_BINARY function (#5917) - due to [`6d0e3f8`](https://github.com/tobymao/sqlglot/commit/6d0e3f8dcae7ed1a7659ece69b1f94cec5e7300e) - Add parser support to ilike like function versions. *(PR [#5915](https://github.com/tobymao/sqlglot/pull/5915) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add parser support to ilike like function versions. (#5915) - due to [`22c7ed7`](https://github.com/tobymao/sqlglot/commit/22c7ed7734b41ca544bb67bcc1ca4151f6d5f05f) - parse tuple *(PR [#5920](https://github.com/tobymao/sqlglot/pull/5920) by [@geooo109](https://github.com/geooo109))*: parse tuple (#5920) - due to [`fc5624e`](https://github.com/tobymao/sqlglot/commit/fc5624eca43d2855ac350c92d85b184a6893d5ca) - annotate types for Snowflake ASCII function *(PR [#5926](https://github.com/tobymao/sqlglot/pull/5926) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake ASCII function (#5926) - due to [`4e81690`](https://github.com/tobymao/sqlglot/commit/4e8169045edcaa28ae43abeb07370df63846fbfd) - annotate type for Snowflake COLLATE function *(PR [#5931](https://github.com/tobymao/sqlglot/pull/5931) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake COLLATE function (#5931) - due to [`f07d35d`](https://github.com/tobymao/sqlglot/commit/f07d35d29104c6203efaab738118d1903614b83c) - annotate type for Snowflake CHR function *(PR [#5929](https://github.com/tobymao/sqlglot/pull/5929) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake CHR function (#5929) - due to [`f8c0ee4`](https://github.com/tobymao/sqlglot/commit/f8c0ee4d3c1a4d4a92b897d1cc85f9904c8e566b) - Add function and annotate snowflake hex decode string and binary functions *(PR [#5928](https://github.com/tobymao/sqlglot/pull/5928) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Add function and annotate snowflake hex decode string and binary functions (#5928) - due to [`66f9501`](https://github.com/tobymao/sqlglot/commit/66f9501d76d087798bad93e578273ab2a45e2575) - annotate types for Snowflake BIT_LENGTH function *(PR [#5927](https://github.com/tobymao/sqlglot/pull/5927) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake BIT_LENGTH function (#5927) - due to [`7878437`](https://github.com/tobymao/sqlglot/commit/78784370712df65a2e1e79a1c2b441131ed7222a) - annotate snowflake's `BASE64_DECODE_STRING`, `BASE64_ENCODE` *(PR [#5922](https://github.com/tobymao/sqlglot/pull/5922) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annotate snowflake's `BASE64_DECODE_STRING`, `BASE64_ENCODE` (#5922) - due to [`9bcad04`](https://github.com/tobymao/sqlglot/commit/9bcad040bd51dd03821c68eea1a73534fc7a81b7) - Annotate type for HEX ENCODE function. *(PR [#5936](https://github.com/tobymao/sqlglot/pull/5936) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for HEX ENCODE function. (#5936) - due to [`590928f`](https://github.com/tobymao/sqlglot/commit/590928f4637306e8cf3f1302d5dd5d5dbc76e7e0) - annotate type for Snowflake INITCAP function *(PR [#5941](https://github.com/tobymao/sqlglot/pull/5941) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake INITCAP function (#5941) - due to [`ac04de1`](https://github.com/tobymao/sqlglot/commit/ac04de1944c7a976406581b489b3cf9b11dafb77) - annotate type for Snowflake EDITDISTANCE function *(PR [#5940](https://github.com/tobymao/sqlglot/pull/5940) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for Snowflake EDITDISTANCE function (#5940) - due to [`9e28af8`](https://github.com/tobymao/sqlglot/commit/9e28af8a52ced951ecf7f4e85a6305e20a13de1f) - Annotate type for snowflake COMPRESS function *(PR [#5938](https://github.com/tobymao/sqlglot/pull/5938) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate type for snowflake COMPRESS function (#5938) ### :sparkles: New Features - [`f4ad258`](https://github.com/tobymao/sqlglot/commit/f4ad25882951de4e4442dfd5189a56d5a1c5e630) - **optimizer**: Annotate types for Snowflake BASE64_DECODE_BINARY function *(PR [#5917](https://github.com/tobymao/sqlglot/pull/5917) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`6d0e3f8`](https://github.com/tobymao/sqlglot/commit/6d0e3f8dcae7ed1a7659ece69b1f94cec5e7300e) - **optimizer**: Add parser support to ilike like function versions. *(PR [#5915](https://github.com/tobymao/sqlglot/pull/5915) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fc5624e`](https://github.com/tobymao/sqlglot/commit/fc5624eca43d2855ac350c92d85b184a6893d5ca) - **optimizer**: annotate types for Snowflake ASCII function *(PR [#5926](https://github.com/tobymao/sqlglot/pull/5926) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`4e81690`](https://github.com/tobymao/sqlglot/commit/4e8169045edcaa28ae43abeb07370df63846fbfd) - **optimizer**: annotate type for Snowflake COLLATE function *(PR [#5931](https://github.com/tobymao/sqlglot/pull/5931) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f07d35d`](https://github.com/tobymao/sqlglot/commit/f07d35d29104c6203efaab738118d1903614b83c) - **optimizer**: annotate type for Snowflake CHR function *(PR [#5929](https://github.com/tobymao/sqlglot/pull/5929) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f8c0ee4`](https://github.com/tobymao/sqlglot/commit/f8c0ee4d3c1a4d4a92b897d1cc85f9904c8e566b) - **optimizer**: Add function and annotate snowflake hex decode string and binary functions *(PR [#5928](https://github.com/tobymao/sqlglot/pull/5928) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`66f9501`](https://github.com/tobymao/sqlglot/commit/66f9501d76d087798bad93e578273ab2a45e2575) - **optimizer**: annotate types for Snowflake BIT_LENGTH function *(PR [#5927](https://github.com/tobymao/sqlglot/pull/5927) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f4c810e`](https://github.com/tobymao/sqlglot/commit/f4c810e043d9379e94efb185e368e27ad9c15715) - transpile Trino `FORMAT` to DuckDB and Snowflake, closes [#5933](https://github.com/tobymao/sqlglot/pull/5933) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7878437`](https://github.com/tobymao/sqlglot/commit/78784370712df65a2e1e79a1c2b441131ed7222a) - **optimizer**: annotate snowflake's `BASE64_DECODE_STRING`, `BASE64_ENCODE` *(PR [#5922](https://github.com/tobymao/sqlglot/pull/5922) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`9bcad04`](https://github.com/tobymao/sqlglot/commit/9bcad040bd51dd03821c68eea1a73534fc7a81b7) - **optimizer**: Annotate type for HEX ENCODE function. *(PR [#5936](https://github.com/tobymao/sqlglot/pull/5936) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`590928f`](https://github.com/tobymao/sqlglot/commit/590928f4637306e8cf3f1302d5dd5d5dbc76e7e0) - **optimizer**: annotate type for Snowflake INITCAP function *(PR [#5941](https://github.com/tobymao/sqlglot/pull/5941) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`ac04de1`](https://github.com/tobymao/sqlglot/commit/ac04de1944c7a976406581b489b3cf9b11dafb77) - **optimizer**: annotate type for Snowflake EDITDISTANCE function *(PR [#5940](https://github.com/tobymao/sqlglot/pull/5940) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`a385990`](https://github.com/tobymao/sqlglot/commit/a38599080932a8b54a169df8b7a69650cb47b6bc) - **parser**: support wrapped aggregate functions *(PR [#5943](https://github.com/tobymao/sqlglot/pull/5943) by [@geooo109](https://github.com/geooo109))* - [`9e28af8`](https://github.com/tobymao/sqlglot/commit/9e28af8a52ced951ecf7f4e85a6305e20a13de1f) - **optimizer**: Annotate type for snowflake COMPRESS function *(PR [#5938](https://github.com/tobymao/sqlglot/pull/5938) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ### :bug: Bug Fixes - [`6807a32`](https://github.com/tobymao/sqlglot/commit/6807a32cccf984dc13a30b815750b2c41374b845) - escape byte string delimiters *(PR [#5916](https://github.com/tobymao/sqlglot/pull/5916) by [@georgesittas](https://github.com/georgesittas))* - [`22c7ed7`](https://github.com/tobymao/sqlglot/commit/22c7ed7734b41ca544bb67bcc1ca4151f6d5f05f) - **clickhouse**: parse tuple *(PR [#5920](https://github.com/tobymao/sqlglot/pull/5920) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5913](https://github.com/tobymao/sqlglot/issues/5913) opened by [@tiagoskaneta](https://github.com/tiagoskaneta)* - [`223160b`](https://github.com/tobymao/sqlglot/commit/223160bd7914d51e9ec1abb8d0f1053e13a65c98) - **parser**: NULLABLE as an identifier *(PR [#5921](https://github.com/tobymao/sqlglot/pull/5921) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5919](https://github.com/tobymao/sqlglot/issues/5919) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`42cfc79`](https://github.com/tobymao/sqlglot/commit/42cfc79ce120dee83084e2bb6b8bbd19f45bf06f) - **snowflake**: parse DAYOFWEEKISO *(PR [#5925](https://github.com/tobymao/sqlglot/pull/5925) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5924](https://github.com/tobymao/sqlglot/issues/5924) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`0be2cb4`](https://github.com/tobymao/sqlglot/commit/0be2cb448ee1a5ac020ac47e9944875c30e42632) - **postgres**: support `DISTINCT` qualifier in `JSON_AGG` fixes [#5935](https://github.com/tobymao/sqlglot/pull/5935) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e34b2e1`](https://github.com/tobymao/sqlglot/commit/e34b2e14d1f87d095955765173a5e17fc9985220) - allow grouping set parser to consume more syntax fixes [#5937](https://github.com/tobymao/sqlglot/pull/5937) *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.16.3] - 2025-09-18 ### :bug: Bug Fixes - [`d127051`](https://github.com/tobymao/sqlglot/commit/d1270517c3e124ca59caf29e4506eb3848f7452e) - precedence issue with column operator parsing *(PR [#5914](https://github.com/tobymao/sqlglot/pull/5914) by [@georgesittas](https://github.com/georgesittas))* ## [v27.16.2] - 2025-09-18 ### :wrench: Chores - [`837890c`](https://github.com/tobymao/sqlglot/commit/837890c7e8bcc3695541bbe32fd8088eee70fea3) - handle badly formed binary expressions gracefully in type inference *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.16.1] - 2025-09-18 ### :bug: Bug Fixes - [`0e256b3`](https://github.com/tobymao/sqlglot/commit/0e256b3f864bc2d026817bd08e89ee89f44ad256) - edge case with parsing `interval` as identifier *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.16.0] - 2025-09-18 ### :boom: BREAKING CHANGES - due to [`5a973e9`](https://github.com/tobymao/sqlglot/commit/5a973e9a88fa7f522a9bf91dc60fb0f6effef53d) - annotate types for Snowflake AI_CLASSIFY function *(PR [#5909](https://github.com/tobymao/sqlglot/pull/5909) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake AI_CLASSIFY function (#5909) - due to [`2d0d908`](https://github.com/tobymao/sqlglot/commit/2d0d908b5bbc32ff3bc92eb1ae9fc6e5ac3409bc) - produce TableAlias instead of Alias for USING in merge builder *(PR [#5911](https://github.com/tobymao/sqlglot/pull/5911) by [@georgesittas](https://github.com/georgesittas))*: produce TableAlias instead of Alias for USING in merge builder (#5911) ### :sparkles: New Features - [`5a973e9`](https://github.com/tobymao/sqlglot/commit/5a973e9a88fa7f522a9bf91dc60fb0f6effef53d) - **optimizer**: annotate types for Snowflake AI_CLASSIFY function *(PR [#5909](https://github.com/tobymao/sqlglot/pull/5909) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* ### :bug: Bug Fixes - [`2d0d908`](https://github.com/tobymao/sqlglot/commit/2d0d908b5bbc32ff3bc92eb1ae9fc6e5ac3409bc) - produce TableAlias instead of Alias for USING in merge builder *(PR [#5911](https://github.com/tobymao/sqlglot/pull/5911) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5910](https://github.com/tobymao/sqlglot/issues/5910) opened by [@deepyaman](https://github.com/deepyaman)* ### :wrench: Chores - [`e8974e7`](https://github.com/tobymao/sqlglot/commit/e8974e70d9956ce7a5cb119ba465660f5f172a17) - **optimizer**: Add tests for snowflake likeall, likeany and ilikeany functions *(PR [#5908](https://github.com/tobymao/sqlglot/pull/5908) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ## [v27.15.3] - 2025-09-17 ### :bug: Bug Fixes - [`bd3e965`](https://github.com/tobymao/sqlglot/commit/bd3e9655aa72ffef8a9e0221205fa2c3915ef58b) - allow `lock` to be used as an identifier *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.15.2] - 2025-09-17 ### :sparkles: New Features - [`d5cf114`](https://github.com/tobymao/sqlglot/commit/d5cf1149932850a91cb5f1ebecda2652616729ef) - **duckdb**: support INSTALL *(PR [#5904](https://github.com/tobymao/sqlglot/pull/5904) by [@geooo109](https://github.com/geooo109))* - [`73e05bb`](https://github.com/tobymao/sqlglot/commit/73e05bb15bb86e4a07cc09bf02028a6cf7fa1e6f) - **snowflake**: properly generate `BITNOT` *(PR [#5906](https://github.com/tobymao/sqlglot/pull/5906) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`16f317c`](https://github.com/tobymao/sqlglot/commit/16f317c04f7c0a398c38b461e05f4d4c30baf98b) - **snowflake**: add support for `!` syntax *(PR [#5907](https://github.com/tobymao/sqlglot/pull/5907) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`095b2ac`](https://github.com/tobymao/sqlglot/commit/095b2ac3af230eff86d9bc1b0fd3a0a2095f151c) - clean up duckdb INSTALL tests *(commit by [@geooo109](https://github.com/geooo109))* ## [v27.15.1] - 2025-09-17 ### :sparkles: New Features - [`1ee026d`](https://github.com/tobymao/sqlglot/commit/1ee026d22d4f6c3613c1809a6738cdea846c48a9) - **postgres**: support `SUBSTRING(value FOR length FROM start)` variant *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.15.0] - 2025-09-17 ### :boom: BREAKING CHANGES - due to [`96ae7a3`](https://github.com/tobymao/sqlglot/commit/96ae7a3bcbf9de1932150baa0bd704d4ce05c9f7) - Annotate and add tests for snowflake REPEAT and SPLIT functions *(PR [#5875](https://github.com/tobymao/sqlglot/pull/5875) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate and add tests for snowflake REPEAT and SPLIT functions (#5875) - due to [`f2d3bf7`](https://github.com/tobymao/sqlglot/commit/f2d3bf74e804e5a5e2ac6ca94210ba04df07e7f3) - annotate types for Snowflake UUID_STRING function *(PR [#5881](https://github.com/tobymao/sqlglot/pull/5881) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake UUID_STRING function (#5881) - due to [`ec80ff3`](https://github.com/tobymao/sqlglot/commit/ec80ff34957c3e3f80c44175383b06cf72988a68) - make dump a list instead of a nested dict to avoid all recursion errors *(PR [#5885](https://github.com/tobymao/sqlglot/pull/5885) by [@tobymao](https://github.com/tobymao))*: make dump a list instead of a nested dict to avoid all recursion errors (#5885) - due to [`2fdaccd`](https://github.com/tobymao/sqlglot/commit/2fdaccd1a9045bda3d529025a4706c397b8a836f) - annotate types for Snowflake SHA1, SHA2 functions *(PR [#5884](https://github.com/tobymao/sqlglot/pull/5884) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake SHA1, SHA2 functions (#5884) - due to [`faba309`](https://github.com/tobymao/sqlglot/commit/faba30905390e5efaf0ba9a05aab9ac2724b1b85) - annotate types for Snowflake AI_AGG function *(PR [#5894](https://github.com/tobymao/sqlglot/pull/5894) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake AI_AGG function (#5894) - due to [`304bec5`](https://github.com/tobymao/sqlglot/commit/304bec5f7342501ad28ea4cd0a4b9aa092f2192f) - Annotate snowflake MD5 functions *(PR [#5883](https://github.com/tobymao/sqlglot/pull/5883) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate snowflake MD5 functions (#5883) - due to [`c0180ec`](https://github.com/tobymao/sqlglot/commit/c0180ec163a43836fed754efcb6f26ad37cdae50) - annotate types for Snowflake AI_SUMMARIZE_AGG function *(PR [#5902](https://github.com/tobymao/sqlglot/pull/5902) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake AI_SUMMARIZE_AGG function (#5902) - due to [`f5409df`](https://github.com/tobymao/sqlglot/commit/f5409df64ed6069880669878db687e4b98c3e280) - use column name in struct type annotation *(PR [#5903](https://github.com/tobymao/sqlglot/pull/5903) by [@georgesittas](https://github.com/georgesittas))*: use column name in struct type annotation (#5903) ### :sparkles: New Features - [`cd818ba`](https://github.com/tobymao/sqlglot/commit/cd818bad51e93ec349b97675e4c1f5bd7c4c1522) - **singlestore**: Fixed generation/parsing of computed collumns *(PR [#5878](https://github.com/tobymao/sqlglot/pull/5878) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`5d1f241`](https://github.com/tobymao/sqlglot/commit/5d1f241209197419111e9eda37fb6f2a5ec2bc4b) - **tsql**: support JSON_ARRAYAGG *(PR [#5879](https://github.com/tobymao/sqlglot/pull/5879) by [@geooo109](https://github.com/geooo109))* - [`96ae7a3`](https://github.com/tobymao/sqlglot/commit/96ae7a3bcbf9de1932150baa0bd704d4ce05c9f7) - **optimizer**: Annotate and add tests for snowflake REPEAT and SPLIT functions *(PR [#5875](https://github.com/tobymao/sqlglot/pull/5875) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`0fe6a25`](https://github.com/tobymao/sqlglot/commit/0fe6a25e366dcbc5a4a0878b285d147a6aa00412) - **postgres**: support JSON_AGG *(PR [#5880](https://github.com/tobymao/sqlglot/pull/5880) by [@geooo109](https://github.com/geooo109))* - [`854eeeb`](https://github.com/tobymao/sqlglot/commit/854eeeb5b25954cc26b91135d58eb8370271f1de) - **optimizer**: annotate types for Snowflake REGEXP_LIKE, REGEXP_REPLACE, REGEXP_SUBSTR functions *(PR [#5876](https://github.com/tobymao/sqlglot/pull/5876) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`f2d3bf7`](https://github.com/tobymao/sqlglot/commit/f2d3bf74e804e5a5e2ac6ca94210ba04df07e7f3) - **optimizer**: annotate types for Snowflake UUID_STRING function *(PR [#5881](https://github.com/tobymao/sqlglot/pull/5881) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`5b9463a`](https://github.com/tobymao/sqlglot/commit/5b9463ad11a49c821585985c35394ebb30e827dd) - **mysql**: add support for binary `MOD` operator fixes [#5887](https://github.com/tobymao/sqlglot/pull/5887) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d24eabc`](https://github.com/tobymao/sqlglot/commit/d24eabcbe30dc0f7c2dbae346e429efef58b5680) - **bigquery**: Add support for ML.GENERATE_TEXT_EMBEDDING(...) *(PR [#5891](https://github.com/tobymao/sqlglot/pull/5891) by [@VaggelisD](https://github.com/VaggelisD))* - [`950a3fa`](https://github.com/tobymao/sqlglot/commit/950a3fa6d6307f7713f40117655da2f9710ebfa9) - **mysql**: SOUNDS LIKE, SUBSTR *(PR [#5886](https://github.com/tobymao/sqlglot/pull/5886) by [@vuvova](https://github.com/vuvova))* - [`688afc5`](https://github.com/tobymao/sqlglot/commit/688afc55ab08588636eba92893c603ca68e43e6e) - **singlestore**: Fixed generation of exp.National *(PR [#5890](https://github.com/tobymao/sqlglot/pull/5890) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`c77147e`](https://github.com/tobymao/sqlglot/commit/c77147ebaafa6942f80af75dd6c2d7a62a7e6fe2) - **parser**: Extend support for `IS UNKOWN` across all dialects *(PR [#5888](https://github.com/tobymao/sqlglot/pull/5888) by [@VaggelisD](https://github.com/VaggelisD))* - [`ec80ff3`](https://github.com/tobymao/sqlglot/commit/ec80ff34957c3e3f80c44175383b06cf72988a68) - make dump a list instead of a nested dict to avoid all recursion errors *(PR [#5885](https://github.com/tobymao/sqlglot/pull/5885) by [@tobymao](https://github.com/tobymao))* - [`2fdaccd`](https://github.com/tobymao/sqlglot/commit/2fdaccd1a9045bda3d529025a4706c397b8a836f) - **optimizer**: annotate types for Snowflake SHA1, SHA2 functions *(PR [#5884](https://github.com/tobymao/sqlglot/pull/5884) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`faba309`](https://github.com/tobymao/sqlglot/commit/faba30905390e5efaf0ba9a05aab9ac2724b1b85) - **optimizer**: annotate types for Snowflake AI_AGG function *(PR [#5894](https://github.com/tobymao/sqlglot/pull/5894) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`dd27844`](https://github.com/tobymao/sqlglot/commit/dd2784435c7bdd2ceaaaaa359fcd112ad1f8190c) - **snowflake**: transpile `BYTE_LENGTH` *(PR [#5899](https://github.com/tobymao/sqlglot/pull/5899) by [@ozadari](https://github.com/ozadari))* - [`304bec5`](https://github.com/tobymao/sqlglot/commit/304bec5f7342501ad28ea4cd0a4b9aa092f2192f) - **optimizer**: Annotate snowflake MD5 functions *(PR [#5883](https://github.com/tobymao/sqlglot/pull/5883) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ec3006d`](https://github.com/tobymao/sqlglot/commit/ec3006d815951fdc1a80d6722ce6f1176417d595) - **optimizer**: Add tests for snowflake NOT ILIKE and NOT LIKE *(PR [#5901](https://github.com/tobymao/sqlglot/pull/5901) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c0180ec`](https://github.com/tobymao/sqlglot/commit/c0180ec163a43836fed754efcb6f26ad37cdae50) - **optimizer**: annotate types for Snowflake AI_SUMMARIZE_AGG function *(PR [#5902](https://github.com/tobymao/sqlglot/pull/5902) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* ### :bug: Bug Fixes - [`1d9e357`](https://github.com/tobymao/sqlglot/commit/1d9e357fb7549635ca25c6c42299880d7864e074) - **optimizer**: expand columns on the LHS of recursive CTEs *(PR [#5872](https://github.com/tobymao/sqlglot/pull/5872) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5814](https://github.com/tobymao/sqlglot/issues/5814) opened by [@suresh-summation](https://github.com/suresh-summation)* - [`7fcc52a`](https://github.com/tobymao/sqlglot/commit/7fcc52a22241c480c22b3e6f843e7a210c75a0ec) - **parser**: Require an explicit alias in EXCLUDE/RENAME/REPLACE star ops *(PR [#5892](https://github.com/tobymao/sqlglot/pull/5892) by [@VaggelisD](https://github.com/VaggelisD))* - [`5fdcc65`](https://github.com/tobymao/sqlglot/commit/5fdcc651277ba4e86e11d0c5952a56e40299a998) - **snowflake**: parse OCTET_LENGTH *(PR [#5900](https://github.com/tobymao/sqlglot/pull/5900) by [@geooo109](https://github.com/geooo109))* - [`f5409df`](https://github.com/tobymao/sqlglot/commit/f5409df64ed6069880669878db687e4b98c3e280) - **optimizer**: use column name in struct type annotation *(PR [#5903](https://github.com/tobymao/sqlglot/pull/5903) by [@georgesittas](https://github.com/georgesittas))* - [`74886d8`](https://github.com/tobymao/sqlglot/commit/74886d82f70c9317af51c77b322e67a6aa260a5e) - **snowflake**: transpile BQ UNNEST with alias *(PR [#5897](https://github.com/tobymao/sqlglot/pull/5897) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5895](https://github.com/tobymao/sqlglot/issues/5895) opened by [@YuvalOmerRep](https://github.com/YuvalOmerRep)* ## [v27.14.0] - 2025-09-11 ### :boom: BREAKING CHANGES - due to [`9c8a600`](https://github.com/tobymao/sqlglot/commit/9c8a6001f41816035f391d046eb9692d6f13cefc) - correct parsing of TO_VARCHAR *(PR [#5840](https://github.com/tobymao/sqlglot/pull/5840) by [@geooo109](https://github.com/geooo109))*: correct parsing of TO_VARCHAR (#5840) - due to [`1e9aef1`](https://github.com/tobymao/sqlglot/commit/1e9aef1bb20f4dc5e9c03d59cb3165c235c11ce1) - convert NULL annotations to UNKNOWN *(PR [#5842](https://github.com/tobymao/sqlglot/pull/5842) by [@georgesittas](https://github.com/georgesittas))*: convert NULL annotations to UNKNOWN (#5842) - due to [`44c9e70`](https://github.com/tobymao/sqlglot/commit/44c9e70bd8c9421035eb0e87e4286061ec5d2fa8) - add tests for snowflake STARTSWITH function *(PR [#5847](https://github.com/tobymao/sqlglot/pull/5847) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: add tests for snowflake STARTSWITH function (#5847) - due to [`0779c2d`](https://github.com/tobymao/sqlglot/commit/0779c2d4e8ce0228592de6882763940783fa5e87) - support BIT_X aggregates again for duckdb, postgres *(PR [#5851](https://github.com/tobymao/sqlglot/pull/5851) by [@georgesittas](https://github.com/georgesittas))*: support BIT_X aggregates again for duckdb, postgres (#5851) - due to [`c50d6e3`](https://github.com/tobymao/sqlglot/commit/c50d6e3c7b96f00d27c34a02c8e0dced21e6c373) - annotate type for snowflake LEFT, RIGHT and SUBSTRING functions *(PR [#5849](https://github.com/tobymao/sqlglot/pull/5849) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annotate type for snowflake LEFT, RIGHT and SUBSTRING functions (#5849) - due to [`e441e16`](https://github.com/tobymao/sqlglot/commit/e441e16991626c2da2d38bc9c3a2b408e3f773bd) - make dump/pickling non-recursive to avoid hitting stack limits *(PR [#5850](https://github.com/tobymao/sqlglot/pull/5850) by [@tobymao](https://github.com/tobymao))*: make dump/pickling non-recursive to avoid hitting stack limits (#5850) - due to [`b128339`](https://github.com/tobymao/sqlglot/commit/b12833977e2a395712481cf11e293fdbd70fd4ce) - annotate and add tests for snowflake LENGTH and LOWER functions *(PR [#5856](https://github.com/tobymao/sqlglot/pull/5856) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annotate and add tests for snowflake LENGTH and LOWER functions (#5856) - due to [`134957a`](https://github.com/tobymao/sqlglot/commit/134957af11c55a4ab16f58d0725d6bb8ab23eb28) - annotate types for Snowflake TRIM function *(PR [#5811](https://github.com/tobymao/sqlglot/pull/5811) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate types for Snowflake TRIM function (#5811) - due to [`d3cd6bf`](https://github.com/tobymao/sqlglot/commit/d3cd6bf6e5fbaa490868ee3cd2cc99dd5e40a396) - Annotate and add tests for snowflake REPLACE and SPACE functions *(PR [#5871](https://github.com/tobymao/sqlglot/pull/5871) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: Annotate and add tests for snowflake REPLACE and SPACE functions (#5871) ### :sparkles: New Features - [`a398fb4`](https://github.com/tobymao/sqlglot/commit/a398fb4df28c868f4cfc34530044b9d7b78e2e90) - **singlestore**: Splitted truncation of multiple tables into several queries *(PR [#5839](https://github.com/tobymao/sqlglot/pull/5839) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`cd27c96`](https://github.com/tobymao/sqlglot/commit/cd27c96fe85aba5f54116f38649edd8db064a5e6) - **snowflake**: transpile `TO_HEX` from bigquery *(PR [#5838](https://github.com/tobymao/sqlglot/pull/5838) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`d2e4ab7`](https://github.com/tobymao/sqlglot/commit/d2e4ab7df41ae3601e9b66e1338db3d851729339) - **snowflake**: add tests for endswith function *(PR [#5846](https://github.com/tobymao/sqlglot/pull/5846) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`c50d6e3`](https://github.com/tobymao/sqlglot/commit/c50d6e3c7b96f00d27c34a02c8e0dced21e6c373) - **optimizer**: annotate type for snowflake LEFT, RIGHT and SUBSTRING functions *(PR [#5849](https://github.com/tobymao/sqlglot/pull/5849) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ca6c8f7`](https://github.com/tobymao/sqlglot/commit/ca6c8f753ba8458544439e20671f0981c98d168d) - **singlestore**: Improved parsting/generation of exp.Show *(PR [#5853](https://github.com/tobymao/sqlglot/pull/5853) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`722eceb`](https://github.com/tobymao/sqlglot/commit/722ecebfa43aa5948031edd1828b6482a241d9ef) - **snowflake**: MD5Digest transpiling to MD5_BINARY *(PR [#5855](https://github.com/tobymao/sqlglot/pull/5855) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`b128339`](https://github.com/tobymao/sqlglot/commit/b12833977e2a395712481cf11e293fdbd70fd4ce) - **optimizer**: annotate and add tests for snowflake LENGTH and LOWER functions *(PR [#5856](https://github.com/tobymao/sqlglot/pull/5856) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`134957a`](https://github.com/tobymao/sqlglot/commit/134957af11c55a4ab16f58d0725d6bb8ab23eb28) - **optimizer**: annotate types for Snowflake TRIM function *(PR [#5811](https://github.com/tobymao/sqlglot/pull/5811) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`0475dae`](https://github.com/tobymao/sqlglot/commit/0475dae21231b85407bf778fd9f1abaecdeb68de) - **singlestore**: Marked several exp.Describe args as unsupported *(PR [#5861](https://github.com/tobymao/sqlglot/pull/5861) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`7a07b41`](https://github.com/tobymao/sqlglot/commit/7a07b41b2357149adc6afb50bb98e37e6a3175f1) - **optimizer**: Add tests for snowflake LTRIM and RTRIM functions *(PR [#5857](https://github.com/tobymao/sqlglot/pull/5857) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`fb90666`](https://github.com/tobymao/sqlglot/commit/fb90666ff3e710d70815a68defde3dc85aeef7b3) - **singlestore**: Added collate handling to exp.AlterColumn *(PR [#5864](https://github.com/tobymao/sqlglot/pull/5864) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`2f27692`](https://github.com/tobymao/sqlglot/commit/2f276929d6b6f788eb5b3ee0b1a8a8c108833474) - **snowflake**: JSONFormat transpiling to TO_JSON *(PR [#5860](https://github.com/tobymao/sqlglot/pull/5860) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`487c811`](https://github.com/tobymao/sqlglot/commit/487c8119cbfaf2783f5f17ec90c8e69e4432a4fa) - **singlestore**: Fixed parsing/generation of exp.RenameColumn *(PR [#5865](https://github.com/tobymao/sqlglot/pull/5865) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`76cf4d8`](https://github.com/tobymao/sqlglot/commit/76cf4d892a6d011a2e0020fb1ea82518d4f49e71) - **bigquery**: add support for ML.TRANSLATE func *(PR [#5859](https://github.com/tobymao/sqlglot/pull/5859) by [@geooo109](https://github.com/geooo109))* - [`a899eb1`](https://github.com/tobymao/sqlglot/commit/a899eb188d5e354d3ed56d1e7c32861eecf3e906) - **singlestore**: Fixed parsing and generation of VECTOR type *(PR [#5854](https://github.com/tobymao/sqlglot/pull/5854) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`0acf076`](https://github.com/tobymao/sqlglot/commit/0acf0769773061fca3ec03125a5d43a4aa9c8e4b) - **postgres**: Support `?|` JSONB operator *(PR [#5866](https://github.com/tobymao/sqlglot/pull/5866) by [@VaggelisD](https://github.com/VaggelisD))* - [`bd4b278`](https://github.com/tobymao/sqlglot/commit/bd4b2780c32ee52d25b6539d7b4479b6a7f80d18) - **optimizer**: annotate types for Snowflake UPPER function *(PR [#5812](https://github.com/tobymao/sqlglot/pull/5812) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`edab189`](https://github.com/tobymao/sqlglot/commit/edab1890e2c790b737be4995a31667448eff148e) - **postgres**: Support ?& JSONB operator *(PR [#5867](https://github.com/tobymao/sqlglot/pull/5867) by [@VaggelisD](https://github.com/VaggelisD))* - [`960ec06`](https://github.com/tobymao/sqlglot/commit/960ec069eb275b7b8cc6705dbbb1143159f06237) - **postgres**: Support #- JSONB operator *(PR [#5868](https://github.com/tobymao/sqlglot/pull/5868) by [@VaggelisD](https://github.com/VaggelisD))* - [`d3cd6bf`](https://github.com/tobymao/sqlglot/commit/d3cd6bf6e5fbaa490868ee3cd2cc99dd5e40a396) - **optimizer**: Annotate and add tests for snowflake REPLACE and SPACE functions *(PR [#5871](https://github.com/tobymao/sqlglot/pull/5871) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`ba22531`](https://github.com/tobymao/sqlglot/commit/ba2253113ea5a7c76c8df7ec9b6faf37da698fa4) - **bigquery**: Add support for ML.FORECAST(...) *(PR [#5873](https://github.com/tobymao/sqlglot/pull/5873) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`9c8a600`](https://github.com/tobymao/sqlglot/commit/9c8a6001f41816035f391d046eb9692d6f13cefc) - **snowflake**: correct parsing of TO_VARCHAR *(PR [#5840](https://github.com/tobymao/sqlglot/pull/5840) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5837](https://github.com/tobymao/sqlglot/issues/5837) opened by [@ultrabear](https://github.com/ultrabear)* - [`f3d07fd`](https://github.com/tobymao/sqlglot/commit/f3d07fd8a106b034f64bb100291671c0fe39a106) - **snowflake**: Enable parsing of COPY INTO without files list *(PR [#5841](https://github.com/tobymao/sqlglot/pull/5841) by [@whummer](https://github.com/whummer))* - [`0ffb1fa`](https://github.com/tobymao/sqlglot/commit/0ffb1faac3b32aad845306eed0e000ff0d055554) - **duckdb**: transpile joins without ON/USING to CROSS JOIN *(PR [#5804](https://github.com/tobymao/sqlglot/pull/5804) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5795](https://github.com/tobymao/sqlglot/issues/5795) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`1e9aef1`](https://github.com/tobymao/sqlglot/commit/1e9aef1bb20f4dc5e9c03d59cb3165c235c11ce1) - **optimizer**: convert NULL annotations to UNKNOWN *(PR [#5842](https://github.com/tobymao/sqlglot/pull/5842) by [@georgesittas](https://github.com/georgesittas))* - [`bbcf0d4`](https://github.com/tobymao/sqlglot/commit/bbcf0d4404ea014f08319c44313719b4377adcdb) - **duckdb**: support trailing commas before `FOR` in pivot, fixes [#5843](https://github.com/tobymao/sqlglot/pull/5843) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ad8a408`](https://github.com/tobymao/sqlglot/commit/ad8a408a4e3e26e32472fc55c67b44687992ae47) - **parser**: more robust nested pipe syntax *(PR [#5845](https://github.com/tobymao/sqlglot/pull/5845) by [@geooo109](https://github.com/geooo109))* - [`44c9e70`](https://github.com/tobymao/sqlglot/commit/44c9e70bd8c9421035eb0e87e4286061ec5d2fa8) - **optimizer**: add tests for snowflake STARTSWITH function *(PR [#5847](https://github.com/tobymao/sqlglot/pull/5847) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`0779c2d`](https://github.com/tobymao/sqlglot/commit/0779c2d4e8ce0228592de6882763940783fa5e87) - support BIT_X aggregates again for duckdb, postgres *(PR [#5851](https://github.com/tobymao/sqlglot/pull/5851) by [@georgesittas](https://github.com/georgesittas))* - [`d131aab`](https://github.com/tobymao/sqlglot/commit/d131aab6815bf77d444a763d9bb4028d8f0e742d) - **redshift**: convert FETCH clauses to LIMIT for Redshift dialect *(PR [#5848](https://github.com/tobymao/sqlglot/pull/5848) by [@tomasmontielp](https://github.com/tomasmontielp))* - [`b22c4ec`](https://github.com/tobymao/sqlglot/commit/b22c4ecf4c032d89ca737f01d614102aa9c2b1ed) - **fabric**: UUID to UNIQUEIDENTIFIER *(PR [#5863](https://github.com/tobymao/sqlglot/pull/5863) by [@fresioAS](https://github.com/fresioAS))* - [`03d4f49`](https://github.com/tobymao/sqlglot/commit/03d4f49d92cd034d37074359b8c2cf96c5c3f5cf) - **clickhouse**: arrays are 1-indexed *(PR [#5862](https://github.com/tobymao/sqlglot/pull/5862) by [@joeyutong](https://github.com/joeyutong))* ### :recycle: Refactors - [`e441e16`](https://github.com/tobymao/sqlglot/commit/e441e16991626c2da2d38bc9c3a2b408e3f773bd) - make dump/pickling non-recursive to avoid hitting stack limits *(PR [#5850](https://github.com/tobymao/sqlglot/pull/5850) by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`b244f30`](https://github.com/tobymao/sqlglot/commit/b244f30524846bd08d03a73410ae9b4674254ecd) - move `exp.Contains` to `BOOLEAN` entry in `TYPE_TO_EXPRESSIONS` *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.13.2] - 2025-09-08 ### :bug: Bug Fixes - [`5e7979f`](https://github.com/tobymao/sqlglot/commit/5e7979f3cf5f7996e198ddd81069d49a4a3b9391) - select session *(PR [#5836](https://github.com/tobymao/sqlglot/pull/5836) by [@tobymao](https://github.com/tobymao))* ## [v27.13.1] - 2025-09-08 ### :bug: Bug Fixes - [`f3d55c0`](https://github.com/tobymao/sqlglot/commit/f3d55c05c8411c9871f8ca4d23f726f976c9236b) - remove always token *(PR [#5832](https://github.com/tobymao/sqlglot/pull/5832) by [@tobymao](https://github.com/tobymao))* - [`1724775`](https://github.com/tobymao/sqlglot/commit/1724775429f66c2768864c8f96ace861eaa435fd) - suppert types() with no args *(PR [#5833](https://github.com/tobymao/sqlglot/pull/5833) by [@tobymao](https://github.com/tobymao))* - [`31c82c6`](https://github.com/tobymao/sqlglot/commit/31c82c6d6cd402e59cb59a94daafd22410eae0f6) - support `case.*` *(PR [#5835](https://github.com/tobymao/sqlglot/pull/5835) by [@georgesittas](https://github.com/georgesittas))* - [`c00f73b`](https://github.com/tobymao/sqlglot/commit/c00f73bac2530a62c25093c60bf02d0a4231bb0b) - window spec no and only exclude *(PR [#5834](https://github.com/tobymao/sqlglot/pull/5834) by [@tobymao](https://github.com/tobymao))* ## [v27.13.0] - 2025-09-08 ### :boom: BREAKING CHANGES - due to [`3726b33`](https://github.com/tobymao/sqlglot/commit/3726b33bb6b4ab286617f510e96e1fbd27c429f3) - support nulls_first arg for array_sort *(PR [#5802](https://github.com/tobymao/sqlglot/pull/5802) by [@treysp](https://github.com/treysp))*: support nulls_first arg for array_sort (#5802) - due to [`cf1d1e3`](https://github.com/tobymao/sqlglot/commit/cf1d1e3e0ef9e6cd1b1c6128c63ddf06c30f1339) - annotate type for snowflake's REVERSE function *(PR [#5803](https://github.com/tobymao/sqlglot/pull/5803) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))*: annotate type for snowflake's REVERSE function (#5803) - due to [`ad0b407`](https://github.com/tobymao/sqlglot/commit/ad0b407098e1611d4fc0e1f0916511337b9aefdb) - Mark 'BEGIN' as TokenType.BEGIN for transactions *(PR [#5826](https://github.com/tobymao/sqlglot/pull/5826) by [@VaggelisD](https://github.com/VaggelisD))*: Mark 'BEGIN' as TokenType.BEGIN for transactions (#5826) - due to [`0198282`](https://github.com/tobymao/sqlglot/commit/0198282a82bbf3e81476e164718d63fd1210acdc) - : Update tests for concat string function *(PR [#5809](https://github.com/tobymao/sqlglot/pull/5809) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: : Update tests for concat string function (#5809) - due to [`db2c430`](https://github.com/tobymao/sqlglot/commit/db2c4303237a1244070c359245c398a724df6de2) - annoate the "contains" function *(PR [#5829](https://github.com/tobymao/sqlglot/pull/5829) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))*: annoate the "contains" function (#5829) ### :sparkles: New Features - [`cf1d1e3`](https://github.com/tobymao/sqlglot/commit/cf1d1e3e0ef9e6cd1b1c6128c63ddf06c30f1339) - **optimizer**: annotate type for snowflake's REVERSE function *(PR [#5803](https://github.com/tobymao/sqlglot/pull/5803) by [@fivetran-BradfordPaskewitz](https://github.com/fivetran-BradfordPaskewitz))* - [`1d07c52`](https://github.com/tobymao/sqlglot/commit/1d07c52badb2e392e6895cbb275d2224789366c9) - **SingleStore**: Implemented generation of CURRENT_DATETIME *(PR [#5816](https://github.com/tobymao/sqlglot/pull/5816) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`cad4fd0`](https://github.com/tobymao/sqlglot/commit/cad4fd0c5b0ec90e693fa6883af0ab287b921019) - **singlestore**: Added handling of exp.JSONObject *(PR [#5817](https://github.com/tobymao/sqlglot/pull/5817) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e3cb076`](https://github.com/tobymao/sqlglot/commit/e3cb0766bd5c3ccb31ea52cfc76201f548798dc1) - **singlestore**: Implemented generation of exp.StandardHash *(PR [#5823](https://github.com/tobymao/sqlglot/pull/5823) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`0198282`](https://github.com/tobymao/sqlglot/commit/0198282a82bbf3e81476e164718d63fd1210acdc) - **optimizer**: : Update tests for concat string function *(PR [#5809](https://github.com/tobymao/sqlglot/pull/5809) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* - [`4e8a436`](https://github.com/tobymao/sqlglot/commit/4e8a436c16f487a72bd1ac2432bcb1c46599d901) - **singlestore**: Added generation of exp.JSONExists *(PR [#5820](https://github.com/tobymao/sqlglot/pull/5820) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`82bea49`](https://github.com/tobymao/sqlglot/commit/82bea49978ae459492b5127a2a52049826e2fd06) - **singlestore**: Refactored parsing of JSON_BUILD_OBJECT *(PR [#5828](https://github.com/tobymao/sqlglot/pull/5828) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`f7d38c3`](https://github.com/tobymao/sqlglot/commit/f7d38c3a10c505346f04e39a2712d60b4c96370f) - **singlestore**: Implemented generation of exp.Stuff *(PR [#5825](https://github.com/tobymao/sqlglot/pull/5825) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`030a5b5`](https://github.com/tobymao/sqlglot/commit/030a5b5ea03ecee869b07cfd27f4ea044732822e) - **singlestore**: Added generation of exp.JSONBExists *(PR [#5821](https://github.com/tobymao/sqlglot/pull/5821) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e58fef1`](https://github.com/tobymao/sqlglot/commit/e58fef1d6dc654a3b36461bcbea21c99cdc96477) - **singlestore**: Implemented parsing and generation of exp.MatchAgainst *(PR [#5822](https://github.com/tobymao/sqlglot/pull/5822) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e94f530`](https://github.com/tobymao/sqlglot/commit/e94f530af0e0cdad995b4c8dc5ed86953490d37f) - **singlestore**: Added handling of exp.JSONArray *(PR [#5818](https://github.com/tobymao/sqlglot/pull/5818) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`1c42ef4`](https://github.com/tobymao/sqlglot/commit/1c42ef4374aeab8a1ee9848892d7f8c4511c7f04) - **singlestore**: Fixed parsing/generation of exp.JSONArrayAgg *(PR [#5819](https://github.com/tobymao/sqlglot/pull/5819) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`67219f0`](https://github.com/tobymao/sqlglot/commit/67219f0606231514f430e146e2fdb99e796f718b) - **singlestore**: Added support of UTC_TIMESTAMP and CURRENT_TIMESTAMP *(PR [#5808](https://github.com/tobymao/sqlglot/pull/5808) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`db2c430`](https://github.com/tobymao/sqlglot/commit/db2c4303237a1244070c359245c398a724df6de2) - **optimizer**: annoate the "contains" function *(PR [#5829](https://github.com/tobymao/sqlglot/pull/5829) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ### :bug: Bug Fixes - [`3726b33`](https://github.com/tobymao/sqlglot/commit/3726b33bb6b4ab286617f510e96e1fbd27c429f3) - **snowflake**: support nulls_first arg for array_sort *(PR [#5802](https://github.com/tobymao/sqlglot/pull/5802) by [@treysp](https://github.com/treysp))* - [`3408de0`](https://github.com/tobymao/sqlglot/commit/3408de09e50d2510c1a6f511dc2dec357059044f) - parsing quoted built-in data types *(PR [#5810](https://github.com/tobymao/sqlglot/pull/5810) by [@treysp](https://github.com/treysp))* - [`ad0b407`](https://github.com/tobymao/sqlglot/commit/ad0b407098e1611d4fc0e1f0916511337b9aefdb) - **postgres**: Mark 'BEGIN' as TokenType.BEGIN for transactions *(PR [#5826](https://github.com/tobymao/sqlglot/pull/5826) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5815](https://github.com/tobymao/sqlglot/issues/5815) opened by [@karakanb](https://github.com/karakanb)* - [`e1a1b5b`](https://github.com/tobymao/sqlglot/commit/e1a1b5befefb0ca30ac1310cecb82a44f6089034) - **snowflake**: transpile BigQuery's `&` to `BITAND` *(PR [#5827](https://github.com/tobymao/sqlglot/pull/5827) by [@YuvalOmerRep](https://github.com/YuvalOmerRep))* - [`32d0278`](https://github.com/tobymao/sqlglot/commit/32d027827eaa7aa0cd9faf2ac1f84739f914050f) - parse and generation of BITWISE AGG funcs across dialects *(PR [#5831](https://github.com/tobymao/sqlglot/pull/5831) by [@geooo109](https://github.com/geooo109))* - [`5f39a83`](https://github.com/tobymao/sqlglot/commit/5f39a83f1ff957aca57eb4745f83c296436acaac) - **bigquery**: properly generate `LIMIT` for `STRING_AGG` *(PR [#5830](https://github.com/tobymao/sqlglot/pull/5830) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`164fec1`](https://github.com/tobymao/sqlglot/commit/164fec1b36e3c7df41e2e5a5ad6b226fc5f76305) - **optimizer**: test type annotation for snowflake CHARINDEX function *(PR [#5805](https://github.com/tobymao/sqlglot/pull/5805) by [@fivetran-amrutabhimsenayachit](https://github.com/fivetran-amrutabhimsenayachit))* ## [v27.12.0] - 2025-09-04 ### :boom: BREAKING CHANGES - due to [`1c551d5`](https://github.com/tobymao/sqlglot/commit/1c551d5ed3315e314013c1f063deabd9d8613e5d) - parse and annotate type for bq TO_JSON *(PR [#5768](https://github.com/tobymao/sqlglot/pull/5768) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq TO_JSON (#5768) - due to [`1707f2d`](https://github.com/tobymao/sqlglot/commit/1707f2d7f9d3b58e8c216db638f8e572f9fe6f13) - annotate type for ABS *(PR [#5770](https://github.com/tobymao/sqlglot/pull/5770) by [@geooo109](https://github.com/geooo109))*: annotate type for ABS (#5770) - due to [`69acc51`](https://github.com/tobymao/sqlglot/commit/69acc5142b2d4f0b30832c350aa49f16d1adabef) - annotate type for bq IS_INF, IS_NAN *(PR [#5771](https://github.com/tobymao/sqlglot/pull/5771) by [@geooo109](https://github.com/geooo109))*: annotate type for bq IS_INF, IS_NAN (#5771) - due to [`0da2076`](https://github.com/tobymao/sqlglot/commit/0da207652331920416b29e2cc67bdc3c3f964466) - annotate type for bq CBRT *(PR [#5772](https://github.com/tobymao/sqlglot/pull/5772) by [@geooo109](https://github.com/geooo109))*: annotate type for bq CBRT (#5772) - due to [`a4968cb`](https://github.com/tobymao/sqlglot/commit/a4968cb5693670c1a2e9cd2c86404dd90fd76160) - annotate type for bq RAND *(PR [#5774](https://github.com/tobymao/sqlglot/pull/5774) by [@geooo109](https://github.com/geooo109))*: annotate type for bq RAND (#5774) - due to [`3e63350`](https://github.com/tobymao/sqlglot/commit/3e63350bd1d58b510cecd1a573d27be3fd2565ce) - parse and annotate type for bq ACOS *(PR [#5776](https://github.com/tobymao/sqlglot/pull/5776) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ACOS (#5776) - due to [`2be9d01`](https://github.com/tobymao/sqlglot/commit/2be9d01830c778186dc274c94c6db0dd6c4116d1) - parse and annotate type for bq ACOSH *(PR [#5779](https://github.com/tobymao/sqlglot/pull/5779) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ACOSH (#5779) - due to [`b77d3da`](https://github.com/tobymao/sqlglot/commit/b77d3da8f2548858d2b9d8590fcde83e1ec62b8a) - remove `"EXCLUDE" -> TokenType.EXCEPT` in DuckDB, Snowflake *(PR [#5766](https://github.com/tobymao/sqlglot/pull/5766) by [@treysp](https://github.com/treysp))*: remove `"EXCLUDE" -> TokenType.EXCEPT` in DuckDB, Snowflake (#5766) - due to [`7da2f31`](https://github.com/tobymao/sqlglot/commit/7da2f31d6613f16585e98c3fa1f592c617ae40c9) - parse and annotate type for bq ASIN/H *(PR [#5783](https://github.com/tobymao/sqlglot/pull/5783) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ASIN/H (#5783) - due to [`341ea83`](https://github.com/tobymao/sqlglot/commit/341ea83a07c707fdbf565b8d9ef4b9b6341ed1d5) - parse and annotate type for bq ATAN/H/2 *(PR [#5784](https://github.com/tobymao/sqlglot/pull/5784) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq ATAN/H/2 (#5784) - due to [`aa360cb`](https://github.com/tobymao/sqlglot/commit/aa360cb0e204aa056557ff8b15aa2d4f678430e6) - use regexp_like as it exists *(PR [#5781](https://github.com/tobymao/sqlglot/pull/5781) by [@jasonthomassql](https://github.com/jasonthomassql))*: use regexp_like as it exists (#5781) - due to [`c2a1ad4`](https://github.com/tobymao/sqlglot/commit/c2a1ad4050771401a5b26bcadd90060e4527fbff) - parse and annotate type for bq COT/H *(PR [#5786](https://github.com/tobymao/sqlglot/pull/5786) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq COT/H (#5786) - due to [`316ae91`](https://github.com/tobymao/sqlglot/commit/316ae913d8b1a63f3071ebb1b826328108d74cef) - Added handling of UTC_DATE and exp.CurrentDate *(PR [#5785](https://github.com/tobymao/sqlglot/pull/5785) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))*: Added handling of UTC_DATE and exp.CurrentDate (#5785) - due to [`2c6d237`](https://github.com/tobymao/sqlglot/commit/2c6d23742ea9fcc2b9c784315d3d5364e360fea5) - parse and annotate type for bq CSC/H *(PR [#5787](https://github.com/tobymao/sqlglot/pull/5787) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq CSC/H (#5787) - due to [`8a35076`](https://github.com/tobymao/sqlglot/commit/8a350763c2337f6910a5f0e19af387ba488fcb70) - parse and annotate type for bq SEC/H *(PR [#5788](https://github.com/tobymao/sqlglot/pull/5788) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SEC/H (#5788) - due to [`79901cb`](https://github.com/tobymao/sqlglot/commit/79901cb506737ae1932fa44a705858d2597ee587) - parse and annotate type for bq SIN\H *(PR [#5790](https://github.com/tobymao/sqlglot/pull/5790) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SIN\H (#5790) - due to [`74fb547`](https://github.com/tobymao/sqlglot/commit/74fb5476def1b389da425885db56bd6592fd7f78) - parse and annotate type for bq RANGE_BUCKET *(PR [#5793](https://github.com/tobymao/sqlglot/pull/5793) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq RANGE_BUCKET (#5793) - due to [`eca65e8`](https://github.com/tobymao/sqlglot/commit/eca65e8b79f65850b014a4cb7913ba4a5861dbe9) - parse and annotate type for bq COSINE/EUCLIDEAN_DISTANCE *(PR [#5792](https://github.com/tobymao/sqlglot/pull/5792) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq COSINE/EUCLIDEAN_DISTANCE (#5792) - due to [`a180d3f`](https://github.com/tobymao/sqlglot/commit/a180d3f2f9f3938611027269028c03274aa1889c) - parse and annotate type for bq SAFE math funcs *(PR [#5797](https://github.com/tobymao/sqlglot/pull/5797) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SAFE math funcs (#5797) - due to [`fc7ad7a`](https://github.com/tobymao/sqlglot/commit/fc7ad7a4d953424b56542eacfe1835f5789921c7) - parse ALTER SESSION *(PR [#5734](https://github.com/tobymao/sqlglot/pull/5734) by [@tekumara](https://github.com/tekumara))*: parse ALTER SESSION (#5734) - due to [`8ec1a6c`](https://github.com/tobymao/sqlglot/commit/8ec1a6cf5a8edc2d834c713ce0fd8d87237f11ed) - annotate type for bq STRING_AGG *(PR [#5798](https://github.com/tobymao/sqlglot/pull/5798) by [@geooo109](https://github.com/geooo109))*: annotate type for bq STRING_AGG (#5798) - due to [`dd97bfa`](https://github.com/tobymao/sqlglot/commit/dd97bfa1dc2f86b727c55b06b3c54b18c02e360d) - annotate type for bq DATETIME_TRUNC *(PR [#5799](https://github.com/tobymao/sqlglot/pull/5799) by [@geooo109](https://github.com/geooo109))*: annotate type for bq DATETIME_TRUNC (#5799) - due to [`d3e9dda`](https://github.com/tobymao/sqlglot/commit/d3e9dda183695dd1e4a9832a6671bccc6db561a0) - annotate type for bq GENERATE_UUID *(commit by [@geooo109](https://github.com/geooo109))*: annotate type for bq GENERATE_UUID ### :sparkles: New Features - [`1c551d5`](https://github.com/tobymao/sqlglot/commit/1c551d5ed3315e314013c1f063deabd9d8613e5d) - **optimizer**: parse and annotate type for bq TO_JSON *(PR [#5768](https://github.com/tobymao/sqlglot/pull/5768) by [@geooo109](https://github.com/geooo109))* - [`a024d48`](https://github.com/tobymao/sqlglot/commit/a024d48fedd049796329050a1f51822dd1388695) - **singlestore**: Added generation of exp.TsOrDsDiff *(PR [#5769](https://github.com/tobymao/sqlglot/pull/5769) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`1707f2d`](https://github.com/tobymao/sqlglot/commit/1707f2d7f9d3b58e8c216db638f8e572f9fe6f13) - **optimizer**: annotate type for ABS *(PR [#5770](https://github.com/tobymao/sqlglot/pull/5770) by [@geooo109](https://github.com/geooo109))* - [`69acc51`](https://github.com/tobymao/sqlglot/commit/69acc5142b2d4f0b30832c350aa49f16d1adabef) - **optimizer**: annotate type for bq IS_INF, IS_NAN *(PR [#5771](https://github.com/tobymao/sqlglot/pull/5771) by [@geooo109](https://github.com/geooo109))* - [`0da2076`](https://github.com/tobymao/sqlglot/commit/0da207652331920416b29e2cc67bdc3c3f964466) - **optimizer**: annotate type for bq CBRT *(PR [#5772](https://github.com/tobymao/sqlglot/pull/5772) by [@geooo109](https://github.com/geooo109))* - [`a4968cb`](https://github.com/tobymao/sqlglot/commit/a4968cb5693670c1a2e9cd2c86404dd90fd76160) - **optimizer**: annotate type for bq RAND *(PR [#5774](https://github.com/tobymao/sqlglot/pull/5774) by [@geooo109](https://github.com/geooo109))* - [`dd7781a`](https://github.com/tobymao/sqlglot/commit/dd7781a15b842a5826714958ed7af9024903cd1e) - **singlestore**: Fixed generation of exp.Collate *(PR [#5775](https://github.com/tobymao/sqlglot/pull/5775) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`fb684cb`](https://github.com/tobymao/sqlglot/commit/fb684cbdb6178ddc441f598cc1a6e914291cd00e) - **singelstore**: Fixed generation of exp.RegexpILike *(PR [#5777](https://github.com/tobymao/sqlglot/pull/5777) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`3e63350`](https://github.com/tobymao/sqlglot/commit/3e63350bd1d58b510cecd1a573d27be3fd2565ce) - **optimizer**: parse and annotate type for bq ACOS *(PR [#5776](https://github.com/tobymao/sqlglot/pull/5776) by [@geooo109](https://github.com/geooo109))* - [`8705a78`](https://github.com/tobymao/sqlglot/commit/8705a787df034b4cecb4ba95e9599772c5561ba9) - **singlestore**: Fixed generation of exp.CastToStrType *(PR [#5778](https://github.com/tobymao/sqlglot/pull/5778) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e3c35ad`](https://github.com/tobymao/sqlglot/commit/e3c35ade797f46549cc803e1acd8816041713a10) - **singlestore**: Fixed generation of exp.UnicodeString *(PR [#5773](https://github.com/tobymao/sqlglot/pull/5773) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`2be9d01`](https://github.com/tobymao/sqlglot/commit/2be9d01830c778186dc274c94c6db0dd6c4116d1) - **optimizer**: parse and annotate type for bq ACOSH *(PR [#5779](https://github.com/tobymao/sqlglot/pull/5779) by [@geooo109](https://github.com/geooo109))* - [`7da2f31`](https://github.com/tobymao/sqlglot/commit/7da2f31d6613f16585e98c3fa1f592c617ae40c9) - **optimizer**: parse and annotate type for bq ASIN/H *(PR [#5783](https://github.com/tobymao/sqlglot/pull/5783) by [@geooo109](https://github.com/geooo109))* - [`341ea83`](https://github.com/tobymao/sqlglot/commit/341ea83a07c707fdbf565b8d9ef4b9b6341ed1d5) - **optimizer**: parse and annotate type for bq ATAN/H/2 *(PR [#5784](https://github.com/tobymao/sqlglot/pull/5784) by [@geooo109](https://github.com/geooo109))* - [`be54a45`](https://github.com/tobymao/sqlglot/commit/be54a458413ce3be6c321e5f4feb3e5df5ee6d08) - **singlestore**: Implemented generation of exp.Cbrt *(PR [#5782](https://github.com/tobymao/sqlglot/pull/5782) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`aa360cb`](https://github.com/tobymao/sqlglot/commit/aa360cb0e204aa056557ff8b15aa2d4f678430e6) - **databricks**: use regexp_like as it exists *(PR [#5781](https://github.com/tobymao/sqlglot/pull/5781) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`c2a1ad4`](https://github.com/tobymao/sqlglot/commit/c2a1ad4050771401a5b26bcadd90060e4527fbff) - **optimizer**: parse and annotate type for bq COT/H *(PR [#5786](https://github.com/tobymao/sqlglot/pull/5786) by [@geooo109](https://github.com/geooo109))* - [`316ae91`](https://github.com/tobymao/sqlglot/commit/316ae913d8b1a63f3071ebb1b826328108d74cef) - **singlestore**: Added handling of UTC_DATE and exp.CurrentDate *(PR [#5785](https://github.com/tobymao/sqlglot/pull/5785) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`2c6d237`](https://github.com/tobymao/sqlglot/commit/2c6d23742ea9fcc2b9c784315d3d5364e360fea5) - **optimizer**: parse and annotate type for bq CSC/H *(PR [#5787](https://github.com/tobymao/sqlglot/pull/5787) by [@geooo109](https://github.com/geooo109))* - [`8a35076`](https://github.com/tobymao/sqlglot/commit/8a350763c2337f6910a5f0e19af387ba488fcb70) - **optimizer**: parse and annotate type for bq SEC/H *(PR [#5788](https://github.com/tobymao/sqlglot/pull/5788) by [@geooo109](https://github.com/geooo109))* - [`566bfb2`](https://github.com/tobymao/sqlglot/commit/566bfb2a64a64b74da63b3a89d68caf702ab6522) - **singlestore**: Added support of UTC_TIME and CURRENT_TIME *(PR [#5789](https://github.com/tobymao/sqlglot/pull/5789) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`79901cb`](https://github.com/tobymao/sqlglot/commit/79901cb506737ae1932fa44a705858d2597ee587) - **optimizer**: parse and annotate type for bq SIN\H *(PR [#5790](https://github.com/tobymao/sqlglot/pull/5790) by [@geooo109](https://github.com/geooo109))* - [`74fb547`](https://github.com/tobymao/sqlglot/commit/74fb5476def1b389da425885db56bd6592fd7f78) - **optimizer**: parse and annotate type for bq RANGE_BUCKET *(PR [#5793](https://github.com/tobymao/sqlglot/pull/5793) by [@geooo109](https://github.com/geooo109))* - [`eca65e8`](https://github.com/tobymao/sqlglot/commit/eca65e8b79f65850b014a4cb7913ba4a5861dbe9) - **optimizer**: parse and annotate type for bq COSINE/EUCLIDEAN_DISTANCE *(PR [#5792](https://github.com/tobymao/sqlglot/pull/5792) by [@geooo109](https://github.com/geooo109))* - [`a180d3f`](https://github.com/tobymao/sqlglot/commit/a180d3f2f9f3938611027269028c03274aa1889c) - **optimizer**: parse and annotate type for bq SAFE math funcs *(PR [#5797](https://github.com/tobymao/sqlglot/pull/5797) by [@geooo109](https://github.com/geooo109))* - [`fc7ad7a`](https://github.com/tobymao/sqlglot/commit/fc7ad7a4d953424b56542eacfe1835f5789921c7) - **snowflake**: parse ALTER SESSION *(PR [#5734](https://github.com/tobymao/sqlglot/pull/5734) by [@tekumara](https://github.com/tekumara))* - [`8ec1a6c`](https://github.com/tobymao/sqlglot/commit/8ec1a6cf5a8edc2d834c713ce0fd8d87237f11ed) - **optimizer**: annotate type for bq STRING_AGG *(PR [#5798](https://github.com/tobymao/sqlglot/pull/5798) by [@geooo109](https://github.com/geooo109))* - [`dd97bfa`](https://github.com/tobymao/sqlglot/commit/dd97bfa1dc2f86b727c55b06b3c54b18c02e360d) - **optimizer**: annotate type for bq DATETIME_TRUNC *(PR [#5799](https://github.com/tobymao/sqlglot/pull/5799) by [@geooo109](https://github.com/geooo109))* - [`d3e9dda`](https://github.com/tobymao/sqlglot/commit/d3e9dda183695dd1e4a9832a6671bccc6db561a0) - **optimizer**: annotate type for bq GENERATE_UUID *(commit by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`d8f6a37`](https://github.com/tobymao/sqlglot/commit/d8f6a376ba1fcca48e4a65923dd7a319ce6cfb91) - **optimizer**: allow aliased negative integer literal as group by column *(PR [#5791](https://github.com/tobymao/sqlglot/pull/5791) by [@treysp](https://github.com/treysp))* - [`1259576`](https://github.com/tobymao/sqlglot/commit/1259576283f1d45abb70ec40c60e500214a27b6f) - **hive**: DATE_SUB to DATE_ADD use parens if needed *(PR [#5796](https://github.com/tobymao/sqlglot/pull/5796) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5794](https://github.com/tobymao/sqlglot/issues/5794) opened by [@mingelchan](https://github.com/mingelchan)* - [`b0516b4`](https://github.com/tobymao/sqlglot/commit/b0516b4bc9cf2bba2cb57e6bb79ff09b5e2244e3) - **optimizer**: Do not qualify columns if a projection coflicts with a source *(PR [#5780](https://github.com/tobymao/sqlglot/pull/5780) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5262](https://github.com/TobikoData/sqlmesh/issues/5262) opened by [@mChlopek](https://github.com/mChlopek)* - [`8af0d40`](https://github.com/tobymao/sqlglot/commit/8af0d40055450f71b7e36e576f4a9a1104bc02b2) - **parser**: address edge case where `values` is used as an identifier *(PR [#5801](https://github.com/tobymao/sqlglot/pull/5801) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`b77d3da`](https://github.com/tobymao/sqlglot/commit/b77d3da8f2548858d2b9d8590fcde83e1ec62b8a) - remove `"EXCLUDE" -> TokenType.EXCEPT` in DuckDB, Snowflake *(PR [#5766](https://github.com/tobymao/sqlglot/pull/5766) by [@treysp](https://github.com/treysp))* - [`005564a`](https://github.com/tobymao/sqlglot/commit/005564ab28cb14be469f09e89b01275d6e25874e) - **snowflake**: refactor logic related to ALTER SESSION *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.11.0] - 2025-09-03 ### :boom: BREAKING CHANGES - due to [`baffd2c`](https://github.com/tobymao/sqlglot/commit/baffd2c0be9657683781f3f8831c47e32dbf68bb) - parse and annotate type for bq REGEXP_INSTR *(PR [#5710](https://github.com/tobymao/sqlglot/pull/5710) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq REGEXP_INSTR (#5710) - due to [`b79eb19`](https://github.com/tobymao/sqlglot/commit/b79eb198cc21203efa82128b357d435338e9133d) - annotate type for bq ROW_NUMBER *(PR [#5716](https://github.com/tobymao/sqlglot/pull/5716) by [@geooo109](https://github.com/geooo109))*: annotate type for bq ROW_NUMBER (#5716) - due to [`f709bef`](https://github.com/tobymao/sqlglot/commit/f709bef3af7cd0daa25fe3d58b1753c3e65720ef) - annotate type for bq FIRST_VALUE *(PR [#5718](https://github.com/tobymao/sqlglot/pull/5718) by [@geooo109](https://github.com/geooo109))*: annotate type for bq FIRST_VALUE (#5718) - due to [`15a9061`](https://github.com/tobymao/sqlglot/commit/15a906170e5d5cdaa207ec7607edfdd7d4a8b774) - annotate type for bq PERCENTILE_DISC *(PR [#5722](https://github.com/tobymao/sqlglot/pull/5722) by [@geooo109](https://github.com/geooo109))*: annotate type for bq PERCENTILE_DISC (#5722) - due to [`7d49609`](https://github.com/tobymao/sqlglot/commit/7d4960963f0ef70b96f5b969bb008d2742e833ea) - annotate type for bq NTH_VALUE *(PR [#5720](https://github.com/tobymao/sqlglot/pull/5720) by [@geooo109](https://github.com/geooo109))*: annotate type for bq NTH_VALUE (#5720) - due to [`d41acf1`](https://github.com/tobymao/sqlglot/commit/d41acf11221bee30a5ae089cbac9b158ed3dd515) - annotate type for bq LEAD *(PR [#5719](https://github.com/tobymao/sqlglot/pull/5719) by [@geooo109](https://github.com/geooo109))*: annotate type for bq LEAD (#5719) - due to [`ff12130`](https://github.com/tobymao/sqlglot/commit/ff12130c23a215917f20fda7d50322f1cb7de599) - annotate type for bq PERNCENTILE_CONT *(PR [#5729](https://github.com/tobymao/sqlglot/pull/5729) by [@geooo109](https://github.com/geooo109))*: annotate type for bq PERNCENTILE_CONT (#5729) - due to [`fdb8a0a`](https://github.com/tobymao/sqlglot/commit/fdb8a0a6d0d74194255f313bd934db7fc1ce0d3f) - parse and annotate type for bq FORMAT *(PR [#5715](https://github.com/tobymao/sqlglot/pull/5715) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq FORMAT (#5715) - due to [`012bdd3`](https://github.com/tobymao/sqlglot/commit/012bdd3c8aeff180f85354ffd403fc1aa5815dcf) - parse and annotate type for bq CUME_DIST *(PR [#5735](https://github.com/tobymao/sqlglot/pull/5735) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq CUME_DIST (#5735) - due to [`b99eaeb`](https://github.com/tobymao/sqlglot/commit/b99eaeb0c6eb3dc613e76d205e02632bd6af353b) - parse and annotate type for bq DENSE_RANK *(PR [#5736](https://github.com/tobymao/sqlglot/pull/5736) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq DENSE_RANK (#5736) - due to [`bb95c73`](https://github.com/tobymao/sqlglot/commit/bb95c7312c942ef987955f01e060604d60e32e83) - parse and annotate type for bq RANK *(PR [#5738](https://github.com/tobymao/sqlglot/pull/5738) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq RANK (#5738) - due to [`8713c08`](https://github.com/tobymao/sqlglot/commit/8713c082b0aa8454a5773fc2a85e08a132dc6ce3) - parse and annotate type for bq PERCENT_RANK *(PR [#5739](https://github.com/tobymao/sqlglot/pull/5739) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq PERCENT_RANK (#5739) - due to [`9ce4e31`](https://github.com/tobymao/sqlglot/commit/9ce4e31aecbde6ea1f227a7166c0f3dc9e302a66) - annotate type for bq JSON_OBJECT *(PR [#5740](https://github.com/tobymao/sqlglot/pull/5740) by [@geooo109](https://github.com/geooo109))*: annotate type for bq JSON_OBJECT (#5740) - due to [`d35ec6e`](https://github.com/tobymao/sqlglot/commit/d35ec6e37e21cf3cec848ed55bd73128c4633cd2) - annotate type for bq JSON_QUERY/JSON_QUERY_ARRAY *(PR [#5741](https://github.com/tobymao/sqlglot/pull/5741) by [@geooo109](https://github.com/geooo109))*: annotate type for bq JSON_QUERY/JSON_QUERY_ARRAY (#5741) - due to [`4753642`](https://github.com/tobymao/sqlglot/commit/4753642cfcfb1f192ec4d21a492737b27affef09) - annotate type for bq JSON_EXTRACT_SCALAR *(commit by [@geooo109](https://github.com/geooo109))*: annotate type for bq JSON_EXTRACT_SCALAR - due to [`113a530`](https://github.com/tobymao/sqlglot/commit/113a5308d050fd5ceacab4c6188e5eea5dd740b1) - parse and annotate type for bq JSON_ARRAY_APPEND *(PR [#5747](https://github.com/tobymao/sqlglot/pull/5747) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_ARRAY_APPEND (#5747) - due to [`268e2c6`](https://github.com/tobymao/sqlglot/commit/268e2c694d1eb99f1fe64477bc38ed4946bf1c32) - parse and annotate type for bq JSON_ARRAY_INSERT *(PR [#5748](https://github.com/tobymao/sqlglot/pull/5748) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_ARRAY_INSERT (#5748) - due to [`455ec1f`](https://github.com/tobymao/sqlglot/commit/455ec1f4f8aecb5435fa4cb2912bfc21db8dd44d) - parse and annotate type for bq JSON_KEYS *(PR [#5749](https://github.com/tobymao/sqlglot/pull/5749) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_KEYS (#5749) - due to [`59895fa`](https://github.com/tobymao/sqlglot/commit/59895faa23ebe1b27938c37a7b39df87de609844) - parse and annotate type for bq JSON_REMOVE *(PR [#5750](https://github.com/tobymao/sqlglot/pull/5750) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_REMOVE (#5750) - due to [`06d7df7`](https://github.com/tobymao/sqlglot/commit/06d7df7a05f2824cabf48e8d1e8a4ebca8fda496) - parse and annotate type for bq JSON_SET *(PR [#5751](https://github.com/tobymao/sqlglot/pull/5751) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_SET (#5751) - due to [`e72b341`](https://github.com/tobymao/sqlglot/commit/e72b3419c8a367caa0e5e80030979cd94e87a40d) - parse and annotate type for bq JSON_STRIP_NULLS *(PR [#5753](https://github.com/tobymao/sqlglot/pull/5753) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_STRIP_NULLS (#5753) - due to [`5de61a7`](https://github.com/tobymao/sqlglot/commit/5de61a7ab850d4e68fde4d76ee396d30d7bdef33) - parse and annotate type for bq JSON_EXTRACT_STRING_ARRAY *(PR [#5758](https://github.com/tobymao/sqlglot/pull/5758) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON_EXTRACT_STRING_ARRAY (#5758) - due to [`36c9393`](https://github.com/tobymao/sqlglot/commit/36c93939575a19bd611269719c39d3d216be8cde) - parse and annotate type for bq JSON LAX funcs *(PR [#5760](https://github.com/tobymao/sqlglot/pull/5760) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq JSON LAX funcs (#5760) - due to [`88862b5`](https://github.com/tobymao/sqlglot/commit/88862b56bc29c8a600b4d0e4693d5846d3a577ff) - annotate type for bq TO_JSON_STRING *(PR [#5762](https://github.com/tobymao/sqlglot/pull/5762) by [@geooo109](https://github.com/geooo109))*: annotate type for bq TO_JSON_STRING (#5762) ### :sparkles: New Features - [`baffd2c`](https://github.com/tobymao/sqlglot/commit/baffd2c0be9657683781f3f8831c47e32dbf68bb) - **optimizer**: parse and annotate type for bq REGEXP_INSTR *(PR [#5710](https://github.com/tobymao/sqlglot/pull/5710) by [@geooo109](https://github.com/geooo109))* - [`b79eb19`](https://github.com/tobymao/sqlglot/commit/b79eb198cc21203efa82128b357d435338e9133d) - **optimizer**: annotate type for bq ROW_NUMBER *(PR [#5716](https://github.com/tobymao/sqlglot/pull/5716) by [@geooo109](https://github.com/geooo109))* - [`f709bef`](https://github.com/tobymao/sqlglot/commit/f709bef3af7cd0daa25fe3d58b1753c3e65720ef) - **optimizer**: annotate type for bq FIRST_VALUE *(PR [#5718](https://github.com/tobymao/sqlglot/pull/5718) by [@geooo109](https://github.com/geooo109))* - [`b9ae9e5`](https://github.com/tobymao/sqlglot/commit/b9ae9e534dee1e32fccbf22cab9bc17fbd920629) - **singlestore**: Implemeted generation of exp.TsOrDiToDi *(PR [#5724](https://github.com/tobymao/sqlglot/pull/5724) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`9b14fff`](https://github.com/tobymao/sqlglot/commit/9b14fffd2c9404f76a3faced2ec9d6eaac8feb01) - **singlestore**: Implemented generation of exp.DateToDi *(PR [#5717](https://github.com/tobymao/sqlglot/pull/5717) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`07d8c23`](https://github.com/tobymao/sqlglot/commit/07d8c2347baba6523310c4d31cddfb0e5c0eddc1) - **singlestore**: Implemented generation of exp.DiToDate *(PR [#5721](https://github.com/tobymao/sqlglot/pull/5721) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`ad34a85`](https://github.com/tobymao/sqlglot/commit/ad34a855a433bc0f51a707cbcb66f8dce667a562) - **singlestore**: Implemented generation of exp.FromTimeZone *(PR [#5723](https://github.com/tobymao/sqlglot/pull/5723) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`29d5e4f`](https://github.com/tobymao/sqlglot/commit/29d5e4f62a799f35c0904a23cedacc6efa95a63b) - **singlestore**: Implemented generation of exp.DatetimeAdd *(PR [#5728](https://github.com/tobymao/sqlglot/pull/5728) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`15a9061`](https://github.com/tobymao/sqlglot/commit/15a906170e5d5cdaa207ec7607edfdd7d4a8b774) - **optimizer**: annotate type for bq PERCENTILE_DISC *(PR [#5722](https://github.com/tobymao/sqlglot/pull/5722) by [@geooo109](https://github.com/geooo109))* - [`7d49609`](https://github.com/tobymao/sqlglot/commit/7d4960963f0ef70b96f5b969bb008d2742e833ea) - **optimizer**: annotate type for bq NTH_VALUE *(PR [#5720](https://github.com/tobymao/sqlglot/pull/5720) by [@geooo109](https://github.com/geooo109))* - [`d41acf1`](https://github.com/tobymao/sqlglot/commit/d41acf11221bee30a5ae089cbac9b158ed3dd515) - **optimizer**: annotate type for bq LEAD *(PR [#5719](https://github.com/tobymao/sqlglot/pull/5719) by [@geooo109](https://github.com/geooo109))* - [`113809a`](https://github.com/tobymao/sqlglot/commit/113809a07efee0f12758bd2571c8515885568466) - **singlestore**: Implemented exp.TimeStrToDate generation *(PR [#5725](https://github.com/tobymao/sqlglot/pull/5725) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`cf63d0d`](https://github.com/tobymao/sqlglot/commit/cf63d0df4c2f58b2cf0c87e2a3a6f63f836a50a1) - **dremio**: add regexp_like and alias regexp_matches *(PR [#5731](https://github.com/tobymao/sqlglot/pull/5731) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`e42160f`](https://github.com/tobymao/sqlglot/commit/e42160f27fa68828898969073f2f4a0014f5e3e9) - **dremio**: support alias repeatstr *(PR [#5730](https://github.com/tobymao/sqlglot/pull/5730) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`ff12130`](https://github.com/tobymao/sqlglot/commit/ff12130c23a215917f20fda7d50322f1cb7de599) - **optimizer**: annotate type for bq PERNCENTILE_CONT *(PR [#5729](https://github.com/tobymao/sqlglot/pull/5729) by [@geooo109](https://github.com/geooo109))* - [`fdb8a0a`](https://github.com/tobymao/sqlglot/commit/fdb8a0a6d0d74194255f313bd934db7fc1ce0d3f) - **optimizer**: parse and annotate type for bq FORMAT *(PR [#5715](https://github.com/tobymao/sqlglot/pull/5715) by [@geooo109](https://github.com/geooo109))* - [`e272292`](https://github.com/tobymao/sqlglot/commit/e272292197f2bb81ccfad1de06a95f321f0b565f) - **singlestore**: Implemented generation of exp.Time *(PR [#5727](https://github.com/tobymao/sqlglot/pull/5727) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`012bdd3`](https://github.com/tobymao/sqlglot/commit/012bdd3c8aeff180f85354ffd403fc1aa5815dcf) - **optimizer**: parse and annotate type for bq CUME_DIST *(PR [#5735](https://github.com/tobymao/sqlglot/pull/5735) by [@geooo109](https://github.com/geooo109))* - [`b99eaeb`](https://github.com/tobymao/sqlglot/commit/b99eaeb0c6eb3dc613e76d205e02632bd6af353b) - **optimizer**: parse and annotate type for bq DENSE_RANK *(PR [#5736](https://github.com/tobymao/sqlglot/pull/5736) by [@geooo109](https://github.com/geooo109))* - [`8cf6ef9`](https://github.com/tobymao/sqlglot/commit/8cf6ef92a0f43943efb0fe380f41dc09f43aca85) - **optimizer**: parse and annotate_type for bq NTILE *(PR [#5737](https://github.com/tobymao/sqlglot/pull/5737) by [@geooo109](https://github.com/geooo109))* - [`bb95c73`](https://github.com/tobymao/sqlglot/commit/bb95c7312c942ef987955f01e060604d60e32e83) - **optimizer**: parse and annotate type for bq RANK *(PR [#5738](https://github.com/tobymao/sqlglot/pull/5738) by [@geooo109](https://github.com/geooo109))* - [`8713c08`](https://github.com/tobymao/sqlglot/commit/8713c082b0aa8454a5773fc2a85e08a132dc6ce3) - **optimizer**: parse and annotate type for bq PERCENT_RANK *(PR [#5739](https://github.com/tobymao/sqlglot/pull/5739) by [@geooo109](https://github.com/geooo109))* - [`9ce4e31`](https://github.com/tobymao/sqlglot/commit/9ce4e31aecbde6ea1f227a7166c0f3dc9e302a66) - **optimizer**: annotate type for bq JSON_OBJECT *(PR [#5740](https://github.com/tobymao/sqlglot/pull/5740) by [@geooo109](https://github.com/geooo109))* - [`d35ec6e`](https://github.com/tobymao/sqlglot/commit/d35ec6e37e21cf3cec848ed55bd73128c4633cd2) - **optimizer**: annotate type for bq JSON_QUERY/JSON_QUERY_ARRAY *(PR [#5741](https://github.com/tobymao/sqlglot/pull/5741) by [@geooo109](https://github.com/geooo109))* - [`4753642`](https://github.com/tobymao/sqlglot/commit/4753642cfcfb1f192ec4d21a492737b27affef09) - **optimizer**: annotate type for bq JSON_EXTRACT_SCALAR *(commit by [@geooo109](https://github.com/geooo109))* - [`6249dbe`](https://github.com/tobymao/sqlglot/commit/6249dbe4173ad5278adf84452dcf7253a2395b91) - **singlestore**: Added generation of exp.DatetimeDiff *(PR [#5743](https://github.com/tobymao/sqlglot/pull/5743) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`113a530`](https://github.com/tobymao/sqlglot/commit/113a5308d050fd5ceacab4c6188e5eea5dd740b1) - **optimizer**: parse and annotate type for bq JSON_ARRAY_APPEND *(PR [#5747](https://github.com/tobymao/sqlglot/pull/5747) by [@geooo109](https://github.com/geooo109))* - [`8603705`](https://github.com/tobymao/sqlglot/commit/8603705a8e5513699adc2499389c67412eee70cb) - **singlestore**: feat(singlestore): Implemented generation of exp.DatetimeSub *(PR [#5744](https://github.com/tobymao/sqlglot/pull/5744) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`7d71c0b`](https://github.com/tobymao/sqlglot/commit/7d71c0bb576f9de3447b4780ab64a3f4d92c6432) - **singlestore**: Fixed generation of exp.DatetimeTrunc and exp.DateTrunc *(PR [#5745](https://github.com/tobymao/sqlglot/pull/5745) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`268e2c6`](https://github.com/tobymao/sqlglot/commit/268e2c694d1eb99f1fe64477bc38ed4946bf1c32) - **optimizer**: parse and annotate type for bq JSON_ARRAY_INSERT *(PR [#5748](https://github.com/tobymao/sqlglot/pull/5748) by [@geooo109](https://github.com/geooo109))* - [`455ec1f`](https://github.com/tobymao/sqlglot/commit/455ec1f4f8aecb5435fa4cb2912bfc21db8dd44d) - **optimizer**: parse and annotate type for bq JSON_KEYS *(PR [#5749](https://github.com/tobymao/sqlglot/pull/5749) by [@geooo109](https://github.com/geooo109))* - [`59895fa`](https://github.com/tobymao/sqlglot/commit/59895faa23ebe1b27938c37a7b39df87de609844) - **optimizer**: parse and annotate type for bq JSON_REMOVE *(PR [#5750](https://github.com/tobymao/sqlglot/pull/5750) by [@geooo109](https://github.com/geooo109))* - [`06d7df7`](https://github.com/tobymao/sqlglot/commit/06d7df7a05f2824cabf48e8d1e8a4ebca8fda496) - **optimizer**: parse and annotate type for bq JSON_SET *(PR [#5751](https://github.com/tobymao/sqlglot/pull/5751) by [@geooo109](https://github.com/geooo109))* - [`7f5079a`](https://github.com/tobymao/sqlglot/commit/7f5079a1b71c4dd28e98b77b5b749e074fce862c) - **singlestore**: Improved geneation of exp.DataType *(PR [#5746](https://github.com/tobymao/sqlglot/pull/5746) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`ad9405c`](https://github.com/tobymao/sqlglot/commit/ad9405cd43108ff80d16711f8b33ff57430ed686) - **singlestore**: fixed generation of exp.TimestampTrunc *(PR [#5754](https://github.com/tobymao/sqlglot/pull/5754) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`a1852f9`](https://github.com/tobymao/sqlglot/commit/a1852f93fdfe926072c12954c95796d038e15140) - **dremio**: parse date_part *(PR [#5756](https://github.com/tobymao/sqlglot/pull/5756) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`0db1df6`](https://github.com/tobymao/sqlglot/commit/0db1df617ec4f05b1ee6cf1d606272f6e799a9b9) - **singlestore**: Fixed generation of exp.DateDiff *(PR [#5752](https://github.com/tobymao/sqlglot/pull/5752) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e72b341`](https://github.com/tobymao/sqlglot/commit/e72b3419c8a367caa0e5e80030979cd94e87a40d) - **optimizer**: parse and annotate type for bq JSON_STRIP_NULLS *(PR [#5753](https://github.com/tobymao/sqlglot/pull/5753) by [@geooo109](https://github.com/geooo109))* - [`5de61a7`](https://github.com/tobymao/sqlglot/commit/5de61a7ab850d4e68fde4d76ee396d30d7bdef33) - **optimizer**: parse and annotate type for bq JSON_EXTRACT_STRING_ARRAY *(PR [#5758](https://github.com/tobymao/sqlglot/pull/5758) by [@geooo109](https://github.com/geooo109))* - [`36c9393`](https://github.com/tobymao/sqlglot/commit/36c93939575a19bd611269719c39d3d216be8cde) - **optimizer**: parse and annotate type for bq JSON LAX funcs *(PR [#5760](https://github.com/tobymao/sqlglot/pull/5760) by [@geooo109](https://github.com/geooo109))* - [`c443d5c`](https://github.com/tobymao/sqlglot/commit/c443d5caf2d9695856103eebfff21cb215777112) - **dremio**: parse datetype *(PR [#5759](https://github.com/tobymao/sqlglot/pull/5759) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`5172a99`](https://github.com/tobymao/sqlglot/commit/5172a99fc4d5e21a1dbe4509d6d7ab1ccfe8bff7) - **singlestore**: Fixed parsing of columns with table name *(PR [#5767](https://github.com/tobymao/sqlglot/pull/5767) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`88862b5`](https://github.com/tobymao/sqlglot/commit/88862b56bc29c8a600b4d0e4693d5846d3a577ff) - **optimizer**: annotate type for bq TO_JSON_STRING *(PR [#5762](https://github.com/tobymao/sqlglot/pull/5762) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`ec93497`](https://github.com/tobymao/sqlglot/commit/ec93497bac82090b88c6e749ec2adc99bbc23a61) - **bigquery**: support commands inside for loops *(PR [#5732](https://github.com/tobymao/sqlglot/pull/5732) by [@treysp](https://github.com/treysp))* - [`85845bb`](https://github.com/tobymao/sqlglot/commit/85845bb941ac9a4ee090a89cd3d3dab4ab5835a7) - **snowflake**: allow exclude as id var *(PR [#5764](https://github.com/tobymao/sqlglot/pull/5764) by [@treysp](https://github.com/treysp))* - [`db2d9cc`](https://github.com/tobymao/sqlglot/commit/db2d9cca9718fb196066dbf60840124917d1f8ac) - **tokenizer**: handle empty hex strings *(PR [#5763](https://github.com/tobymao/sqlglot/pull/5763) by [@paulolieuthier](https://github.com/paulolieuthier))* - :arrow_lower_right: *fixes issue [#5761](https://github.com/tobymao/sqlglot/issues/5761) opened by [@paulolieuthier](https://github.com/paulolieuthier)* - [`982257b`](https://github.com/tobymao/sqlglot/commit/982257b40973cdfc20a8d6dd9a1674cda7eb75c4) - **bigquery**: Crash when ARRAY_CONCAT is called with no expressions *(PR [#5755](https://github.com/tobymao/sqlglot/pull/5755) by [@ozadari](https://github.com/ozadari))* - [`24ca504`](https://github.com/tobymao/sqlglot/commit/24ca504360779c8a20a58accf506eb9600ac9bf8) - **bigquery**: Crash when ARRAY_CONCAT is called with no expressions *(PR [#5755](https://github.com/tobymao/sqlglot/pull/5755) by [@ozadari](https://github.com/ozadari))* ### :wrench: Chores - [`41521e3`](https://github.com/tobymao/sqlglot/commit/41521e31b465acd51ab02b1ac4e5512b98175b7e) - bump sqlglotrs to 0.6.2 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.10.0] - 2025-08-28 ### :boom: BREAKING CHANGES - due to [`de2fe15`](https://github.com/tobymao/sqlglot/commit/de2fe1503b5bb003431d1f0c7b9ae87932a6cc1c) - annotate type for bq CONTAINS_SUBSTR *(PR [#5705](https://github.com/tobymao/sqlglot/pull/5705) by [@geooo109](https://github.com/geooo109))*: annotate type for bq CONTAINS_SUBSTR (#5705) - due to [`770888f`](https://github.com/tobymao/sqlglot/commit/770888f4e9a9061329e3c416f968f7dd9639fb81) - annotate type for bq NORMALIZE *(PR [#5711](https://github.com/tobymao/sqlglot/pull/5711) by [@geooo109](https://github.com/geooo109))*: annotate type for bq NORMALIZE (#5711) - due to [`506033f`](https://github.com/tobymao/sqlglot/commit/506033f299f7a4c28f6efd8bf715be5dcf73e929) - parse and annotate type for bq NORMALIZE_AND_CASEFOLD *(PR [#5712](https://github.com/tobymao/sqlglot/pull/5712) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq NORMALIZE_AND_CASEFOLD (#5712) - due to [`848aea1`](https://github.com/tobymao/sqlglot/commit/848aea1dbaaeb580b633796dcca06c28314b9c3e) - parse and annotate type for bq OCTET_LENGTH *(PR [#5713](https://github.com/tobymao/sqlglot/pull/5713) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq OCTET_LENGTH (#5713) - due to [`727bf83`](https://github.com/tobymao/sqlglot/commit/727bf8378f232188d35834d980b035552999ea3b) - add support for REVOKE DDL *(PR [#5703](https://github.com/tobymao/sqlglot/pull/5703) by [@newtonapple](https://github.com/newtonapple))*: add support for REVOKE DDL (#5703) ### :sparkles: New Features - [`f6f8f56`](https://github.com/tobymao/sqlglot/commit/f6f8f56a59d550dfc7dfcab0c3b9a6885c7e758a) - **singlestore**: Fixed parsing/generation of exp.JSONFormat *(PR [#5706](https://github.com/tobymao/sqlglot/pull/5706) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`de2fe15`](https://github.com/tobymao/sqlglot/commit/de2fe1503b5bb003431d1f0c7b9ae87932a6cc1c) - **optimizer**: annotate type for bq CONTAINS_SUBSTR *(PR [#5705](https://github.com/tobymao/sqlglot/pull/5705) by [@geooo109](https://github.com/geooo109))* - [`a78146e`](https://github.com/tobymao/sqlglot/commit/a78146e37bfc972050b4467c39769407061e9bc3) - **singlestore**: Fixed parsing/generation of exp.DateBin *(PR [#5709](https://github.com/tobymao/sqlglot/pull/5709) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`ab0c985`](https://github.com/tobymao/sqlglot/commit/ab0c985424ae9d9340eafd15ecdc9b31bdd8837c) - **singlestore**: Marked exp.Reduce finish argument as unsupported *(PR [#5707](https://github.com/tobymao/sqlglot/pull/5707) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`770888f`](https://github.com/tobymao/sqlglot/commit/770888f4e9a9061329e3c416f968f7dd9639fb81) - **optimizer**: annotate type for bq NORMALIZE *(PR [#5711](https://github.com/tobymao/sqlglot/pull/5711) by [@geooo109](https://github.com/geooo109))* - [`506033f`](https://github.com/tobymao/sqlglot/commit/506033f299f7a4c28f6efd8bf715be5dcf73e929) - **optimizer**: parse and annotate type for bq NORMALIZE_AND_CASEFOLD *(PR [#5712](https://github.com/tobymao/sqlglot/pull/5712) by [@geooo109](https://github.com/geooo109))* - [`848aea1`](https://github.com/tobymao/sqlglot/commit/848aea1dbaaeb580b633796dcca06c28314b9c3e) - **optimizer**: parse and annotate type for bq OCTET_LENGTH *(PR [#5713](https://github.com/tobymao/sqlglot/pull/5713) by [@geooo109](https://github.com/geooo109))* - [`727bf83`](https://github.com/tobymao/sqlglot/commit/727bf8378f232188d35834d980b035552999ea3b) - add support for REVOKE DDL *(PR [#5703](https://github.com/tobymao/sqlglot/pull/5703) by [@newtonapple](https://github.com/newtonapple))* ### :bug: Bug Fixes - [`0427c7b`](https://github.com/tobymao/sqlglot/commit/0427c7b7aa9f8161324085a98c5f531fa35c8b0c) - **optimizer**: qualify columns for AggFunc with DISTINCT *(PR [#5708](https://github.com/tobymao/sqlglot/pull/5708) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5698](https://github.com/tobymao/sqlglot/issues/5698) opened by [@georgesittas](https://github.com/georgesittas)* ## [v27.9.0] - 2025-08-27 ### :boom: BREAKING CHANGES - due to [`7b180bd`](https://github.com/tobymao/sqlglot/commit/7b180bdc3da9e39946c22970bd2523f7d8beaf29) - raise if query modifier is specified multiple times *(PR [#5608](https://github.com/tobymao/sqlglot/pull/5608) by [@georgesittas](https://github.com/georgesittas))*: raise if query modifier is specified multiple times (#5608) - due to [`36602a2`](https://github.com/tobymao/sqlglot/commit/36602a2ecc9ffca98e89044d23e40f33c6ed71e4) - parse LIST_FILTER into ArrayFilter closes [#5633](https://github.com/tobymao/sqlglot/pull/5633) *(commit by [@georgesittas](https://github.com/georgesittas))*: parse LIST_FILTER into ArrayFilter closes #5633 - due to [`0188d21`](https://github.com/tobymao/sqlglot/commit/0188d21d443c991a528eb9d220459890b7dca477) - parse LIST_TRANSFORM into Transform closes [#5634](https://github.com/tobymao/sqlglot/pull/5634) *(commit by [@georgesittas](https://github.com/georgesittas))*: parse LIST_TRANSFORM into Transform closes #5634 - due to [`3ab1d44`](https://github.com/tobymao/sqlglot/commit/3ab1d4487279cab3be2d3764e51516c6db21629d) - Wrap CONCAT items with COALESCE less aggressively *(PR [#5641](https://github.com/tobymao/sqlglot/pull/5641) by [@VaggelisD](https://github.com/VaggelisD))*: Wrap CONCAT items with COALESCE less aggressively (#5641) - due to [`af0b299`](https://github.com/tobymao/sqlglot/commit/af0b299561914953b30ab36004e53dcb92d39e1c) - Qualify columns generated by exp.Aliases *(PR [#5647](https://github.com/tobymao/sqlglot/pull/5647) by [@VaggelisD](https://github.com/VaggelisD))*: Qualify columns generated by exp.Aliases (#5647) - due to [`53aa8fe`](https://github.com/tobymao/sqlglot/commit/53aa8fe7f188012f765066f32c4179035fff036d) - support alter table with check closes [#5649](https://github.com/tobymao/sqlglot/pull/5649) *(commit by [@georgesittas](https://github.com/georgesittas))*: support alter table with check closes #5649 - due to [`1a60a5a`](https://github.com/tobymao/sqlglot/commit/1a60a5a845c7431d7d3d7ccb71119699316f4b41) - Added parsing/generation of JSON_ARRAY_CONTAINS function *(PR [#5661](https://github.com/tobymao/sqlglot/pull/5661) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))*: Added parsing/generation of JSON_ARRAY_CONTAINS function (#5661) - due to [`e0db0a9`](https://github.com/tobymao/sqlglot/commit/e0db0a95d3cb7614242dbd1b439d408e7e7bd475) - add parse and annotate type for bigquery FARM_FINGERPRINT *(PR [#5667](https://github.com/tobymao/sqlglot/pull/5667) by [@geooo109](https://github.com/geooo109))*: add parse and annotate type for bigquery FARM_FINGERPRINT (#5667) - due to [`56588c7`](https://github.com/tobymao/sqlglot/commit/56588c7e22b4db4f0e44696a460483ca1e549163) - Add support for vector_search function. Move predict to BigQuery dialect. *(PR [#5660](https://github.com/tobymao/sqlglot/pull/5660) by [@rloredo](https://github.com/rloredo))*: Add support for vector_search function. Move predict to BigQuery dialect. (#5660) - due to [`a688a0f`](https://github.com/tobymao/sqlglot/commit/a688a0f0d70f87139e531d1419b338b695bec384) - parse and annotate type for bigquery APPROX_TOP_COUNT *(PR [#5670](https://github.com/tobymao/sqlglot/pull/5670) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery APPROX_TOP_COUNT (#5670) - due to [`3c93fcc`](https://github.com/tobymao/sqlglot/commit/3c93fcce96ec82e78753f6c9dd5fb0e730a82058) - parse and annotate type for bigquery APPROX_TOP_SUM *(PR [#5675](https://github.com/tobymao/sqlglot/pull/5675) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery APPROX_TOP_SUM (#5675) - due to [`741d45a`](https://github.com/tobymao/sqlglot/commit/741d45a0ca7c1bad67da4393cd10cc9cfa49ea68) - parse and annotate type for bigquery FROM/TO_BASE32 *(PR [#5676](https://github.com/tobymao/sqlglot/pull/5676) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery FROM/TO_BASE32 (#5676) - due to [`9ae045c`](https://github.com/tobymao/sqlglot/commit/9ae045c0405e43b148e3b9261825288ebf09100c) - parse and annotate type for bigquery FROM_HEX *(PR [#5679](https://github.com/tobymao/sqlglot/pull/5679) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery FROM_HEX (#5679) - due to [`5a22a25`](https://github.com/tobymao/sqlglot/commit/5a22a254143978989027f6e7f6163019a34f112a) - annotate type for bigquery TO_HEX *(PR [#5680](https://github.com/tobymao/sqlglot/pull/5680) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TO_HEX (#5680) - due to [`5c1eb2d`](https://github.com/tobymao/sqlglot/commit/5c1eb2df5dd3dcc6ed2c8204cec56b5c3d276f87) - parse and annotate type for bq PARSE_BIG/NUMERIC *(PR [#5690](https://github.com/tobymao/sqlglot/pull/5690) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq PARSE_BIG/NUMERIC (#5690) - due to [`311373d`](https://github.com/tobymao/sqlglot/commit/311373d22134de906d1c1cef019541e85e2f7c9f) - parse and annotate type for bq CODE_POINTS_TO_BYTES *(PR [#5686](https://github.com/tobymao/sqlglot/pull/5686) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq CODE_POINTS_TO_BYTES (#5686) - due to [`79d9de1`](https://github.com/tobymao/sqlglot/commit/79d9de1745598f8f3ae2c82c1389dd455c946a09) - parse and annotate type for bq TO_CODE_POINTS *(PR [#5685](https://github.com/tobymao/sqlglot/pull/5685) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq TO_CODE_POINTS (#5685) - due to [`5df3ea9`](https://github.com/tobymao/sqlglot/commit/5df3ea92f59125955124ea1883b777b489db3042) - parse and annotate type for bq SAFE_CONVERT_BYTES_TO_STRING *(PR [#5681](https://github.com/tobymao/sqlglot/pull/5681) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq SAFE_CONVERT_BYTES_TO_STRING (#5681) - due to [`c832746`](https://github.com/tobymao/sqlglot/commit/c832746018fbc2c531d5b2a7c7f8cd5d78e511ff) - parse and annotate type for bigquery APPROX_QUANTILES *(PR [#5678](https://github.com/tobymao/sqlglot/pull/5678) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery APPROX_QUANTILES (#5678) - due to [`99e169e`](https://github.com/tobymao/sqlglot/commit/99e169ea13d5be3712a47f6b55b98a4764a3c24d) - parse and annotate type for bq BOOL *(PR [#5697](https://github.com/tobymao/sqlglot/pull/5697) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq BOOL (#5697) - due to [`3f31770`](https://github.com/tobymao/sqlglot/commit/3f31770c793f464fcac1ce2b8dfa03d4b7f0231c) - parse and annotate type for bq FLOAT64 *(PR [#5700](https://github.com/tobymao/sqlglot/pull/5700) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bq FLOAT64 (#5700) ### :sparkles: New Features - [`02e60e7`](https://github.com/tobymao/sqlglot/commit/02e60e73fc0c2dae815aa225be247a17ccdf4b82) - **singlestore**: desugarize DAYNAME into DATE_FORMAT *(PR [#5610](https://github.com/tobymao/sqlglot/pull/5610) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`7b180bd`](https://github.com/tobymao/sqlglot/commit/7b180bdc3da9e39946c22970bd2523f7d8beaf29) - **parser**: raise if query modifier is specified multiple times *(PR [#5608](https://github.com/tobymao/sqlglot/pull/5608) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5604](https://github.com/tobymao/sqlglot/issues/5604) opened by [@bricct](https://github.com/bricct)* - [`442eafc`](https://github.com/tobymao/sqlglot/commit/442eafcb00a2650930bd6023aa9a5febfebbe796) - **singlestore**: Added parsing of HOUR function *(PR [#5612](https://github.com/tobymao/sqlglot/pull/5612) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`5320359`](https://github.com/tobymao/sqlglot/commit/532035978605efd1d43de75aafca750e2894c0b9) - **singlestore**: Added parsing of MICROSECOND function *(PR [#5619](https://github.com/tobymao/sqlglot/pull/5619) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`db1db97`](https://github.com/tobymao/sqlglot/commit/db1db9732352187629df853ad937ebaf4abfe487) - **doris**: update exp.UniqueKeyProperty SQL generation logic *(PR [#5613](https://github.com/tobymao/sqlglot/pull/5613) by [@xinge-ji](https://github.com/xinge-ji))* - [`54623a6`](https://github.com/tobymao/sqlglot/commit/54623a6b85432272703f12a197b05ced78529f90) - **singlestore**: Added parsing of MINUTE function *(PR [#5620](https://github.com/tobymao/sqlglot/pull/5620) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`565c9f8`](https://github.com/tobymao/sqlglot/commit/565c9f8c55cfbef5d3a9e1470551f1dc4416825e) - **singlestore**: Added generation of DAYOFWEEK_ISO function *(PR [#5627](https://github.com/tobymao/sqlglot/pull/5627) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`8db916e`](https://github.com/tobymao/sqlglot/commit/8db916e2f2ce241bdff130d626f98df182b48f3e) - **singlestore**: Added parsing of WEEKDAY function *(PR [#5624](https://github.com/tobymao/sqlglot/pull/5624) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`aa6274a`](https://github.com/tobymao/sqlglot/commit/aa6274a0ea647df1251563945635260a6ddd4972) - **singlestore**: Fixed generation of DAY_OF_MONTH function *(PR [#5629](https://github.com/tobymao/sqlglot/pull/5629) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`dee44b8`](https://github.com/tobymao/sqlglot/commit/dee44b8c1d70ca6079867896fb68cad256909dad) - **singlestore**: Added parsing of MONTHNAME function *(PR [#5623](https://github.com/tobymao/sqlglot/pull/5623) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`deebf0c`](https://github.com/tobymao/sqlglot/commit/deebf0c3cc379e28c4ab66b6bb7a9c84c14e88c6) - **singlestore**: Added parsing of SECOND function *(PR [#5621](https://github.com/tobymao/sqlglot/pull/5621) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`12a60b9`](https://github.com/tobymao/sqlglot/commit/12a60b99b6b2b0673b57218c691794deb67aa3a5) - **singlestore**: Removed redundant deletions from TRANSFORMS *(PR [#5632](https://github.com/tobymao/sqlglot/pull/5632) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`36602a2`](https://github.com/tobymao/sqlglot/commit/36602a2ecc9ffca98e89044d23e40f33c6ed71e4) - **duckdb**: parse LIST_FILTER into ArrayFilter closes [#5633](https://github.com/tobymao/sqlglot/pull/5633) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`0188d21`](https://github.com/tobymao/sqlglot/commit/0188d21d443c991a528eb9d220459890b7dca477) - **duckdb**: parse LIST_TRANSFORM into Transform closes [#5634](https://github.com/tobymao/sqlglot/pull/5634) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b117d59`](https://github.com/tobymao/sqlglot/commit/b117d59f3c43f6f44cd0ccdf22717f7bcd990889) - **dremio**: add dremio date_add and date_sub parsing *(PR [#5617](https://github.com/tobymao/sqlglot/pull/5617) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`999b9e7`](https://github.com/tobymao/sqlglot/commit/999b9e793c0819a4d2af6400fc924946d26b3e6f) - **singlestore**: Changed generation of exp.TsOrDsToDate to handle case when format is not provided *(PR [#5639](https://github.com/tobymao/sqlglot/pull/5639) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`b556e97`](https://github.com/tobymao/sqlglot/commit/b556e97f8cfbde21c0a921ac1c01c9e4f2ec2535) - **singlestore**: Marked exp.All as unsupported *(PR [#5640](https://github.com/tobymao/sqlglot/pull/5640) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`c076694`](https://github.com/tobymao/sqlglot/commit/c0766946e6799fb61c38e855fd18812d08a5c251) - **clickhouse**: support custom partition key expressions *(PR [#5645](https://github.com/tobymao/sqlglot/pull/5645) by [@GaliFFun](https://github.com/GaliFFun))* - [`cab62b0`](https://github.com/tobymao/sqlglot/commit/cab62b06ce926e3116a6a45a9c57e4901cd8a281) - **doris**: add support for BUILD and REFRESH properties in materialized view *(PR [#5614](https://github.com/tobymao/sqlglot/pull/5614) by [@xinge-ji](https://github.com/xinge-ji))* - [`af0b299`](https://github.com/tobymao/sqlglot/commit/af0b299561914953b30ab36004e53dcb92d39e1c) - **optimizer**: Qualify columns generated by exp.Aliases *(PR [#5647](https://github.com/tobymao/sqlglot/pull/5647) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5638](https://github.com/tobymao/sqlglot/issues/5638) opened by [@catlynkong](https://github.com/catlynkong)* - [`981e0e7`](https://github.com/tobymao/sqlglot/commit/981e0e70a304665e746158c859bcc81f99384685) - **doris**: add support for PARTITION BY LIST *(PR [#5615](https://github.com/tobymao/sqlglot/pull/5615) by [@xinge-ji](https://github.com/xinge-ji))* - [`53aa8fe`](https://github.com/tobymao/sqlglot/commit/53aa8fe7f188012f765066f32c4179035fff036d) - **tsql**: support alter table with check closes [#5649](https://github.com/tobymao/sqlglot/pull/5649) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`23cac6c`](https://github.com/tobymao/sqlglot/commit/23cac6c58099a9ac818ac5d3970a427ca3579cca) - **exasol**: Add support for GROUP_CONCAT and LISTAGG functions *(PR [#5646](https://github.com/tobymao/sqlglot/pull/5646) by [@nnamdi16](https://github.com/nnamdi16))* - [`d087ac8`](https://github.com/tobymao/sqlglot/commit/d087ac89376df5ab16de99c8b67f99060f0a6170) - **bigquery**: Add support for ml.generate_embedding function *(PR [#5652](https://github.com/tobymao/sqlglot/pull/5652) by [@rloredo](https://github.com/rloredo))* - [`e71bcb5`](https://github.com/tobymao/sqlglot/commit/e71bcb51181de63c8ad13004216506529fcf9644) - **dremio**: support array_generate_range *(PR [#5653](https://github.com/tobymao/sqlglot/pull/5653) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`edbd04b`](https://github.com/tobymao/sqlglot/commit/edbd04b6a91b1a6f76e4fa938098ba5ed581ba72) - **singlestore**: Fixed generation of exp.RegexpLike *(PR [#5663](https://github.com/tobymao/sqlglot/pull/5663) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`4992edb`](https://github.com/tobymao/sqlglot/commit/4992edbb79f4922917cc5ce5aa687e6f7da7798c) - **singlestore**: Fixed exp.Xor generation *(PR [#5662](https://github.com/tobymao/sqlglot/pull/5662) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`20de3d3`](https://github.com/tobymao/sqlglot/commit/20de3d37cdae0705c67f80fbacbe024a62f34657) - **singlestore**: Fixed parsing/generation of exp.Hll *(PR [#5664](https://github.com/tobymao/sqlglot/pull/5664) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`1a60a5a`](https://github.com/tobymao/sqlglot/commit/1a60a5a845c7431d7d3d7ccb71119699316f4b41) - **singlestore**: Added parsing/generation of JSON_ARRAY_CONTAINS function *(PR [#5661](https://github.com/tobymao/sqlglot/pull/5661) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`f662dc0`](https://github.com/tobymao/sqlglot/commit/f662dc0b47fd14d00899c14a899756a5ba1fe9da) - **singlestore**: Fixed generation of exp.ApproxDistinct *(PR [#5666](https://github.com/tobymao/sqlglot/pull/5666) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e0db0a9`](https://github.com/tobymao/sqlglot/commit/e0db0a95d3cb7614242dbd1b439d408e7e7bd475) - **optimizer**: add parse and annotate type for bigquery FARM_FINGERPRINT *(PR [#5667](https://github.com/tobymao/sqlglot/pull/5667) by [@geooo109](https://github.com/geooo109))* - [`dcd4ef7`](https://github.com/tobymao/sqlglot/commit/dcd4ef769727ed1227911f2d9a85244d61173003) - **singlestore**: Fixed exp.CountIf generation *(PR [#5668](https://github.com/tobymao/sqlglot/pull/5668) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`e431e85`](https://github.com/tobymao/sqlglot/commit/e431e851c2c5d20f049adbc38e370a64d39c346f) - **singlestore**: Fixed generation of exp.LogicalOr *(PR [#5669](https://github.com/tobymao/sqlglot/pull/5669) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`56588c7`](https://github.com/tobymao/sqlglot/commit/56588c7e22b4db4f0e44696a460483ca1e549163) - **bigquery**: Add support for vector_search function. Move predict to BigQuery dialect. *(PR [#5660](https://github.com/tobymao/sqlglot/pull/5660) by [@rloredo](https://github.com/rloredo))* - [`f0d2cc2`](https://github.com/tobymao/sqlglot/commit/f0d2cc2b0f72340172ecd154f632aa6a24c15512) - **singlestore**: Fixed generation of exp.LogicalAnd *(PR [#5671](https://github.com/tobymao/sqlglot/pull/5671) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`a688a0f`](https://github.com/tobymao/sqlglot/commit/a688a0f0d70f87139e531d1419b338b695bec384) - **optimizer**: parse and annotate type for bigquery APPROX_TOP_COUNT *(PR [#5670](https://github.com/tobymao/sqlglot/pull/5670) by [@geooo109](https://github.com/geooo109))* - [`fa8d571`](https://github.com/tobymao/sqlglot/commit/fa8d57132b1d21d92eb5de3ba88b41f880e14889) - **singlestore**: Fixed generation/parsing of exp.ApproxQuantile *(PR [#5672](https://github.com/tobymao/sqlglot/pull/5672) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`9955ebe`](https://github.com/tobymao/sqlglot/commit/9955ebe90d3421815738ecb643806add755c5df3) - **singlestore**: Fixed parsing/generation of exp.Variance *(PR [#5673](https://github.com/tobymao/sqlglot/pull/5673) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`3c93fcc`](https://github.com/tobymao/sqlglot/commit/3c93fcce96ec82e78753f6c9dd5fb0e730a82058) - **optimizer**: parse and annotate type for bigquery APPROX_TOP_SUM *(PR [#5675](https://github.com/tobymao/sqlglot/pull/5675) by [@geooo109](https://github.com/geooo109))* - [`60cbb9d`](https://github.com/tobymao/sqlglot/commit/60cbb9d0e3c9b5a36c1368c9b5bb05def8ce8658) - **dremio**: add CURRENT_DATE_UTC *(PR [#5674](https://github.com/tobymao/sqlglot/pull/5674) by [@jasonthomassql](https://github.com/jasonthomassql))* - :arrow_lower_right: *addresses issue [#5655](https://github.com/tobymao/sqlglot/issues/5655) opened by [@jasonthomassql](https://github.com/jasonthomassql)* - [`741d45a`](https://github.com/tobymao/sqlglot/commit/741d45a0ca7c1bad67da4393cd10cc9cfa49ea68) - **optimizer**: parse and annotate type for bigquery FROM/TO_BASE32 *(PR [#5676](https://github.com/tobymao/sqlglot/pull/5676) by [@geooo109](https://github.com/geooo109))* - [`9ae045c`](https://github.com/tobymao/sqlglot/commit/9ae045c0405e43b148e3b9261825288ebf09100c) - **optimizer**: parse and annotate type for bigquery FROM_HEX *(PR [#5679](https://github.com/tobymao/sqlglot/pull/5679) by [@geooo109](https://github.com/geooo109))* - [`5a22a25`](https://github.com/tobymao/sqlglot/commit/5a22a254143978989027f6e7f6163019a34f112a) - **optimizer**: annotate type for bigquery TO_HEX *(PR [#5680](https://github.com/tobymao/sqlglot/pull/5680) by [@geooo109](https://github.com/geooo109))* - [`d920ac3`](https://github.com/tobymao/sqlglot/commit/d920ac3886ce006d76616bc31884ee2f5c4162bc) - **singlestore**: Fixed parsing/generation of exp.RegexpExtractAll *(PR [#5692](https://github.com/tobymao/sqlglot/pull/5692) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`260c72b`](https://github.com/tobymao/sqlglot/commit/260c72befc0510ebe1d007284c0eef9343de20d7) - **singlestore**: Fixed parsing/generation of exp.Contains *(PR [#5684](https://github.com/tobymao/sqlglot/pull/5684) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`081dc67`](https://github.com/tobymao/sqlglot/commit/081dc673b89d3d8d0709b29e359142297ff64536) - **singlestore**: Fixed generaion/parsing of exp.VariancePop *(PR [#5682](https://github.com/tobymao/sqlglot/pull/5682) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`eb538bf`](https://github.com/tobymao/sqlglot/commit/eb538bf225645d0a54d614733e447c13cf91a37a) - **singlestore**: Fixed generation of exp.Chr *(PR [#5683](https://github.com/tobymao/sqlglot/pull/5683) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`32d9dd1`](https://github.com/tobymao/sqlglot/commit/32d9dd1309ce0876114f57993596c4456aa1d50f) - **singlestore**: Fixed exp.MD5Digest generation *(PR [#5688](https://github.com/tobymao/sqlglot/pull/5688) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`5c1eb2d`](https://github.com/tobymao/sqlglot/commit/5c1eb2df5dd3dcc6ed2c8204cec56b5c3d276f87) - **optimizer**: parse and annotate type for bq PARSE_BIG/NUMERIC *(PR [#5690](https://github.com/tobymao/sqlglot/pull/5690) by [@geooo109](https://github.com/geooo109))* - [`6f88500`](https://github.com/tobymao/sqlglot/commit/6f885007a075339cf20034459571a6ae821c61c0) - **singlestore**: Fixed exp.IsAscii generation *(PR [#5687](https://github.com/tobymao/sqlglot/pull/5687) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`311373d`](https://github.com/tobymao/sqlglot/commit/311373d22134de906d1c1cef019541e85e2f7c9f) - **optimizer**: parse and annotate type for bq CODE_POINTS_TO_BYTES *(PR [#5686](https://github.com/tobymao/sqlglot/pull/5686) by [@geooo109](https://github.com/geooo109))* - [`79d9de1`](https://github.com/tobymao/sqlglot/commit/79d9de1745598f8f3ae2c82c1389dd455c946a09) - **optimizer**: parse and annotate type for bq TO_CODE_POINTS *(PR [#5685](https://github.com/tobymao/sqlglot/pull/5685) by [@geooo109](https://github.com/geooo109))* - [`5df3ea9`](https://github.com/tobymao/sqlglot/commit/5df3ea92f59125955124ea1883b777b489db3042) - **optimizer**: parse and annotate type for bq SAFE_CONVERT_BYTES_TO_STRING *(PR [#5681](https://github.com/tobymao/sqlglot/pull/5681) by [@geooo109](https://github.com/geooo109))* - [`c832746`](https://github.com/tobymao/sqlglot/commit/c832746018fbc2c531d5b2a7c7f8cd5d78e511ff) - **optimizer**: parse and annotate type for bigquery APPROX_QUANTILES *(PR [#5678](https://github.com/tobymao/sqlglot/pull/5678) by [@geooo109](https://github.com/geooo109))* - [`8fa5ae8`](https://github.com/tobymao/sqlglot/commit/8fa5ae8a61c698abaea265b4950390ea3ddfa7e9) - **singlestore**: Fixed generation/parsing of exp.RegexpExtract *(PR [#5691](https://github.com/tobymao/sqlglot/pull/5691) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`d6d409a`](https://github.com/tobymao/sqlglot/commit/d6d409a548042063f80d02dfaf5b61a0096d1d50) - **singlestore**: Fixed generaion of exp.Repeat *(PR [#5693](https://github.com/tobymao/sqlglot/pull/5693) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`b7db08b`](https://github.com/tobymao/sqlglot/commit/b7db08b96c7d7d02ec54f26b8749b3d57f021d8b) - **singlestore**: Fixed generation of exp.StartsWith *(PR [#5694](https://github.com/tobymao/sqlglot/pull/5694) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`87b04ef`](https://github.com/tobymao/sqlglot/commit/87b04ef0fc2df5064be9e6b75b264cff0639face) - **singlestore**: Fixed generation of exp.FromBase *(PR [#5695](https://github.com/tobymao/sqlglot/pull/5695) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`9c1d0fd`](https://github.com/tobymao/sqlglot/commit/9c1d0fdac9acd3fb3109ca3d3cae9c9ffaed1a7d) - **duckdb**: transpile array unique aggregation closes [#5689](https://github.com/tobymao/sqlglot/pull/5689) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`99e169e`](https://github.com/tobymao/sqlglot/commit/99e169ea13d5be3712a47f6b55b98a4764a3c24d) - **optimizer**: parse and annotate type for bq BOOL *(PR [#5697](https://github.com/tobymao/sqlglot/pull/5697) by [@geooo109](https://github.com/geooo109))* - [`3f31770`](https://github.com/tobymao/sqlglot/commit/3f31770c793f464fcac1ce2b8dfa03d4b7f0231c) - **optimizer**: parse and annotate type for bq FLOAT64 *(PR [#5700](https://github.com/tobymao/sqlglot/pull/5700) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`f1269f5`](https://github.com/tobymao/sqlglot/commit/f1269f5ecfccfee4cdeeda5bfd10eb1c47994fad) - **tsql**: do not attach limit modifier to set operation *(PR [#5609](https://github.com/tobymao/sqlglot/pull/5609) by [@georgesittas](https://github.com/georgesittas))* - [`a6edf8e`](https://github.com/tobymao/sqlglot/commit/a6edf8ee3273a7736ed801ef8dea302613b119da) - **tsql**: Remove ORDER from set op modifiers too *(PR [#5626](https://github.com/tobymao/sqlglot/pull/5626) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5618](https://github.com/tobymao/sqlglot/issues/5618) opened by [@MQMMMQM](https://github.com/MQMMMQM)* - [`ce5840e`](https://github.com/tobymao/sqlglot/commit/ce5840ed615e162a93cd911ab6207160878fcc64) - **exasol**: update several dialect properties to correctly reflect semantics *(PR [#5642](https://github.com/tobymao/sqlglot/pull/5642) by [@nnamdi16](https://github.com/nnamdi16))* - [`3ab1d44`](https://github.com/tobymao/sqlglot/commit/3ab1d4487279cab3be2d3764e51516c6db21629d) - **generator**: Wrap CONCAT items with COALESCE less aggressively *(PR [#5641](https://github.com/tobymao/sqlglot/pull/5641) by [@VaggelisD](https://github.com/VaggelisD))* - [`045d2f0`](https://github.com/tobymao/sqlglot/commit/045d2f02649b0e6dc178c079e4e0db201ed9bf08) - **duckdb**: Transpile Spark's FIRST(col, TRUE) *(PR [#5644](https://github.com/tobymao/sqlglot/pull/5644) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5643](https://github.com/tobymao/sqlglot/issues/5643) opened by [@michal-clutch](https://github.com/michal-clutch)* ### :wrench: Chores - [`4c04c0c`](https://github.com/tobymao/sqlglot/commit/4c04c0ce859ab8314ed36fb8779f14c0fc2f1094) - use a valid SPDX identifier as license classifier *(PR [#5606](https://github.com/tobymao/sqlglot/pull/5606) by [@ecederstrand](https://github.com/ecederstrand))* - [`249f638`](https://github.com/tobymao/sqlglot/commit/249f638877ddd2a1732d1e6bc859793f3bc0622d) - add table to document dialect support level *(PR [#5628](https://github.com/tobymao/sqlglot/pull/5628) by [@georgesittas](https://github.com/georgesittas))* - [`3357125`](https://github.com/tobymao/sqlglot/commit/33571250d172d64a3e0450738b3ad330e5c0a795) - **doris**: refactor unique key prop generation *(PR [#5625](https://github.com/tobymao/sqlglot/pull/5625) by [@georgesittas](https://github.com/georgesittas))* - [`545f1ac`](https://github.com/tobymao/sqlglot/commit/545f1acd76bdc4e537209266984137f6c69ce622) - Clean up of PR5614 *(PR [#5648](https://github.com/tobymao/sqlglot/pull/5648) by [@VaggelisD](https://github.com/VaggelisD))* ## [v27.8.0] - 2025-08-19 ### :boom: BREAKING CHANGES - due to [`2a33339`](https://github.com/tobymao/sqlglot/commit/2a333395cde71936df911488afcff92cae735e11) - annotate type for bigquery REPLACE *(PR [#5572](https://github.com/tobymao/sqlglot/pull/5572) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery REPLACE (#5572) - due to [`1e6f813`](https://github.com/tobymao/sqlglot/commit/1e6f81343de641e588f1a05ce7dc01bed72bd849) - annotate type for bigquery REGEXP_EXTRACT_ALL *(PR [#5573](https://github.com/tobymao/sqlglot/pull/5573) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery REGEXP_EXTRACT_ALL (#5573) - due to [`d0d62ed`](https://github.com/tobymao/sqlglot/commit/d0d62ede6320b3fd0eee04b7073f5708676dc58c) - support `TO_CHAR` with numeric inputs *(PR [#5570](https://github.com/tobymao/sqlglot/pull/5570) by [@jasonthomassql](https://github.com/jasonthomassql))*: support `TO_CHAR` with numeric inputs (#5570) - due to [`7928985`](https://github.com/tobymao/sqlglot/commit/7928985a655c3d0244bc9175a37f502b19a5c5f0) - allow dashes in JSONPath keys *(PR [#5574](https://github.com/tobymao/sqlglot/pull/5574) by [@georgesittas](https://github.com/georgesittas))*: allow dashes in JSONPath keys (#5574) - due to [`eb09e6e`](https://github.com/tobymao/sqlglot/commit/eb09e6e32491a05846488de7b72b1dca0e0a2669) - parse and annotate type for bigquery TRANSLATE *(PR [#5575](https://github.com/tobymao/sqlglot/pull/5575) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery TRANSLATE (#5575) - due to [`f9a522b`](https://github.com/tobymao/sqlglot/commit/f9a522b26cd5d643b8b18fa64d70f2a3f0ff2d2c) - parse and annotate type for bigquery SOUNDEX *(PR [#5576](https://github.com/tobymao/sqlglot/pull/5576) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery SOUNDEX (#5576) - due to [`51da41b`](https://github.com/tobymao/sqlglot/commit/51da41b90ce421b154e45add28353ac044640a1c) - annotate type for bigquery MD5 *(PR [#5577](https://github.com/tobymao/sqlglot/pull/5577) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery MD5 (#5577) - due to [`bcf302f`](https://github.com/tobymao/sqlglot/commit/bcf302ff6ad2d0adfc29f708a8b53b5c0e547619) - annotate type for bigquery MIN/MAX BY *(PR [#5579](https://github.com/tobymao/sqlglot/pull/5579) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery MIN/MAX BY (#5579) - due to [`c501d9e`](https://github.com/tobymao/sqlglot/commit/c501d9e6f58e4880e4d23f21f53f72dcb5fdaa8c) - parse and annotate type for bigquery GROUPING *(PR [#5581](https://github.com/tobymao/sqlglot/pull/5581) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery GROUPING (#5581) ### :sparkles: New Features - [`2a33339`](https://github.com/tobymao/sqlglot/commit/2a333395cde71936df911488afcff92cae735e11) - **optimizer**: annotate type for bigquery REPLACE *(PR [#5572](https://github.com/tobymao/sqlglot/pull/5572) by [@geooo109](https://github.com/geooo109))* - [`1e6f813`](https://github.com/tobymao/sqlglot/commit/1e6f81343de641e588f1a05ce7dc01bed72bd849) - **optimizer**: annotate type for bigquery REGEXP_EXTRACT_ALL *(PR [#5573](https://github.com/tobymao/sqlglot/pull/5573) by [@geooo109](https://github.com/geooo109))* - [`eb09e6e`](https://github.com/tobymao/sqlglot/commit/eb09e6e32491a05846488de7b72b1dca0e0a2669) - **optimizer**: parse and annotate type for bigquery TRANSLATE *(PR [#5575](https://github.com/tobymao/sqlglot/pull/5575) by [@geooo109](https://github.com/geooo109))* - [`f9a522b`](https://github.com/tobymao/sqlglot/commit/f9a522b26cd5d643b8b18fa64d70f2a3f0ff2d2c) - **optimizer**: parse and annotate type for bigquery SOUNDEX *(PR [#5576](https://github.com/tobymao/sqlglot/pull/5576) by [@geooo109](https://github.com/geooo109))* - [`51da41b`](https://github.com/tobymao/sqlglot/commit/51da41b90ce421b154e45add28353ac044640a1c) - **optimizer**: annotate type for bigquery MD5 *(PR [#5577](https://github.com/tobymao/sqlglot/pull/5577) by [@geooo109](https://github.com/geooo109))* - [`bcf302f`](https://github.com/tobymao/sqlglot/commit/bcf302ff6ad2d0adfc29f708a8b53b5c0e547619) - **optimizer**: annotate type for bigquery MIN/MAX BY *(PR [#5579](https://github.com/tobymao/sqlglot/pull/5579) by [@geooo109](https://github.com/geooo109))* - [`c501d9e`](https://github.com/tobymao/sqlglot/commit/c501d9e6f58e4880e4d23f21f53f72dcb5fdaa8c) - **optimizer**: parse and annotate type for bigquery GROUPING *(PR [#5581](https://github.com/tobymao/sqlglot/pull/5581) by [@geooo109](https://github.com/geooo109))* - [`8612825`](https://github.com/tobymao/sqlglot/commit/86128253f911b733d45b073356e3b8ddf261c22b) - **spark**: generate date/time ops as interval binary ops *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8fda774`](https://github.com/tobymao/sqlglot/commit/8fda774b7a9b0c66948349dfe030d3c122ff6eee) - **singlestore**: Added parsing and generation of JSON_EXTRACT *(PR [#5555](https://github.com/tobymao/sqlglot/pull/5555) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`82cc954`](https://github.com/tobymao/sqlglot/commit/82cc9549a875211a400e5c4e818b05ca48a0a9f4) - **exasol**: map div function to IntDiv in exasol dialect *(PR [#5593](https://github.com/tobymao/sqlglot/pull/5593) by [@nnamdi16](https://github.com/nnamdi16))* - [`eb0fe68`](https://github.com/tobymao/sqlglot/commit/eb0fe68d6b5977053c871badf2f5c1895b3e1c66) - **trino**: add JSON_VALUE function support with RETURNING clause *(PR [#5590](https://github.com/tobymao/sqlglot/pull/5590) by [@rev-rwasilewski](https://github.com/rev-rwasilewski))* - [`9e95c11`](https://github.com/tobymao/sqlglot/commit/9e95c115ea0304d9ccb4cb0be8389f5ff5f2a952) - **exasol**: mapped weekofyear to week in Exasol dialect *(PR [#5594](https://github.com/tobymao/sqlglot/pull/5594) by [@nnamdi16](https://github.com/nnamdi16))* - [`8f013c3`](https://github.com/tobymao/sqlglot/commit/8f013c37a412ca5978889c1e47b0c6f7add0715d) - **singlestore**: Fixed parsing of DATE function *(PR [#5601](https://github.com/tobymao/sqlglot/pull/5601) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`a4a299a`](https://github.com/tobymao/sqlglot/commit/a4a299acbaf4461f0c2b470bc4e9e9590515eda7) - transpile `TO_CHAR` from Dremio to Databricks *(PR [#5598](https://github.com/tobymao/sqlglot/pull/5598) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`093f35c`](https://github.com/tobymao/sqlglot/commit/093f35c201c3c22c3a14c6f8de26c06246bdf19c) - **dremio**: handle `DATE_FORMAT`, `TO_DATE`, and `TO_TIMESTAMP` *(PR [#5597](https://github.com/tobymao/sqlglot/pull/5597) by [@jasonthomassql](https://github.com/jasonthomassql))* ### :bug: Bug Fixes - [`d0d62ed`](https://github.com/tobymao/sqlglot/commit/d0d62ede6320b3fd0eee04b7073f5708676dc58c) - **dremio**: support `TO_CHAR` with numeric inputs *(PR [#5570](https://github.com/tobymao/sqlglot/pull/5570) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`7928985`](https://github.com/tobymao/sqlglot/commit/7928985a655c3d0244bc9175a37f502b19a5c5f0) - **bigquery**: allow dashes in JSONPath keys *(PR [#5574](https://github.com/tobymao/sqlglot/pull/5574) by [@georgesittas](https://github.com/georgesittas))* - [`866042d`](https://github.com/tobymao/sqlglot/commit/866042d0268da0cebce042c0868878c0fb39c3d1) - Remove TokenType.APPLY from table alias tokens *(PR [#5592](https://github.com/tobymao/sqlglot/pull/5592) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5591](https://github.com/tobymao/sqlglot/issues/5591) opened by [@saadbelgi](https://github.com/saadbelgi)* - [`b485f66`](https://github.com/tobymao/sqlglot/commit/b485f6666fa8625b7da45ef832b5d666fbb707ea) - **dremio**: improve `TO_CHAR` transpilability *(PR [#5580](https://github.com/tobymao/sqlglot/pull/5580) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`81874e9`](https://github.com/tobymao/sqlglot/commit/81874e9c3aafcc2cf8fb443f65146c5b3598b9b3) - handle unknown types in `unit_to_str` *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`173e442`](https://github.com/tobymao/sqlglot/commit/173e4425b692728abffa8542324690823f984303) - refactor JSON_VALUE handling for MySQL and Trino *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.7.0] - 2025-08-13 ### :boom: BREAKING CHANGES - due to [`938f4b6`](https://github.com/tobymao/sqlglot/commit/938f4b6ebc1c0d26bd3c1400883978c79a435189) - annotate type for LAST_DAY *(PR [#5528](https://github.com/tobymao/sqlglot/pull/5528) by [@geooo109](https://github.com/geooo109))*: annotate type for LAST_DAY (#5528) - due to [`7d12dac`](https://github.com/tobymao/sqlglot/commit/7d12dac613ba5119334408f2c52cb270067156d9) - annotate type for bigquery GENERATE_TIMESTAMP_ARRAY *(PR [#5529](https://github.com/tobymao/sqlglot/pull/5529) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery GENERATE_TIMESTAMP_ARRAY (#5529) - due to [`d50ebe2`](https://github.com/tobymao/sqlglot/commit/d50ebe286dd8e2836b9eb2a3406f15976db3aa05) - annotate type for bigquery TIME_TRUNC *(PR [#5530](https://github.com/tobymao/sqlglot/pull/5530) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TIME_TRUNC (#5530) - due to [`29748be`](https://github.com/tobymao/sqlglot/commit/29748be7dfc10edc9f29665c98327883dd25c13d) - annotate type for bigquery TIME *(PR [#5531](https://github.com/tobymao/sqlglot/pull/5531) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TIME (#5531) - due to [`7003b3f`](https://github.com/tobymao/sqlglot/commit/7003b3fa39cd455e3643066364696708d1ac4f38) - parse and annotate type for bigquery DATE_FROM_UNIX_DATE *(PR [#5532](https://github.com/tobymao/sqlglot/pull/5532) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery DATE_FROM_UNIX_DATE (#5532) - due to [`a276ca6`](https://github.com/tobymao/sqlglot/commit/a276ca6fd5f9d47fa8c90fcfa19f9864e7a28f8f) - parse and annotate type for bigquery JUSTIFY funcs *(PR [#5534](https://github.com/tobymao/sqlglot/pull/5534) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery JUSTIFY funcs (#5534) - due to [`374178e`](https://github.com/tobymao/sqlglot/commit/374178e22fe8d2d2275b65fe08e27ef66c611220) - parse and annotate type for bigquery UNIX_MICROS and UNIX_MILLIS *(PR [#5535](https://github.com/tobymao/sqlglot/pull/5535) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery UNIX_MICROS and UNIX_MILLIS (#5535) - due to [`1d8d1ab`](https://github.com/tobymao/sqlglot/commit/1d8d1abe459053a135a46525d0a13bb861220927) - annotate type for bigquery DATE_TRUNC *(PR [#5540](https://github.com/tobymao/sqlglot/pull/5540) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery DATE_TRUNC (#5540) - due to [`306ba65`](https://github.com/tobymao/sqlglot/commit/306ba6531839ea2823f5165de7bde01d17560845) - annotate type for bigquery TIMESTAMP_TRUNC *(PR [#5541](https://github.com/tobymao/sqlglot/pull/5541) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery TIMESTAMP_TRUNC (#5541) - due to [`d799c5a`](https://github.com/tobymao/sqlglot/commit/d799c5af23010a67c29edb6d45a40fb24903e1a3) - preserve projection names when merging subqueries *(commit by [@snovik75](https://github.com/snovik75))*: preserve projection names when merging subqueries - due to [`8130bd4`](https://github.com/tobymao/sqlglot/commit/8130bd40815803a6781ee8f20fccd30987516192) - WEEKDAY of WEEK as VAR *(PR [#5552](https://github.com/tobymao/sqlglot/pull/5552) by [@geooo109](https://github.com/geooo109))*: WEEKDAY of WEEK as VAR (#5552) - due to [`f3ffe19`](https://github.com/tobymao/sqlglot/commit/f3ffe19ec01533c5f27b9d3a7b6704b83c005118) - annotate type for bigquery format_time *(PR [#5559](https://github.com/tobymao/sqlglot/pull/5559) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery format_time (#5559) - due to [`6872b43`](https://github.com/tobymao/sqlglot/commit/6872b43ba17a39137172fd2fa9f0d059ce595ef9) - use dialect in DataType.build fixes [#5560](https://github.com/tobymao/sqlglot/pull/5560) *(commit by [@georgesittas](https://github.com/georgesittas))*: use dialect in DataType.build fixes #5560 - due to [`3ab3690`](https://github.com/tobymao/sqlglot/commit/3ab369096313b418699b7942b1c513c0c66a5331) - parse and annotate type for bigquery PARSE_DATETIME *(PR [#5558](https://github.com/tobymao/sqlglot/pull/5558) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery PARSE_DATETIME (#5558) - due to [`e5da951`](https://github.com/tobymao/sqlglot/commit/e5da951542eb55691bc43fbbfbec4a30100de038) - parse and annotate type for bigquery PARSE_TIME *(PR [#5561](https://github.com/tobymao/sqlglot/pull/5561) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery PARSE_TIME (#5561) - due to [`798e213`](https://github.com/tobymao/sqlglot/commit/798e213fd10c3b61afbd8cef621546de65fa6f26) - improve transpilability of ANY_VALUE closes [#5563](https://github.com/tobymao/sqlglot/pull/5563) *(commit by [@georgesittas](https://github.com/georgesittas))*: improve transpilability of ANY_VALUE closes #5563 - due to [`8c0cb76`](https://github.com/tobymao/sqlglot/commit/8c0cb764fd825062fb7334032b8eeffbc39627d5) - more robust CREATE SEQUENCE *(PR [#5566](https://github.com/tobymao/sqlglot/pull/5566) by [@geooo109](https://github.com/geooo109))*: more robust CREATE SEQUENCE (#5566) - due to [`c7041c7`](https://github.com/tobymao/sqlglot/commit/c7041c71250b17192c2f25fb8f33407324d332c2) - parse and annotate type for bigquery BYTE_LENGHT *(PR [#5568](https://github.com/tobymao/sqlglot/pull/5568) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery BYTE_LENGHT (#5568) - due to [`a6c61c3`](https://github.com/tobymao/sqlglot/commit/a6c61c34f1e168c97dd5c2b8ec071372ba593992) - parse and annotate type for bigquery CODE_POINTS_TO_STRING *(PR [#5569](https://github.com/tobymao/sqlglot/pull/5569) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery CODE_POINTS_TO_STRING (#5569) - due to [`51e0335`](https://github.com/tobymao/sqlglot/commit/51e0335377fe2bc2e2a94a623475791e9dd19fb9) - parse and annotate type for bigquery REVERSE *(PR [#5571](https://github.com/tobymao/sqlglot/pull/5571) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for bigquery REVERSE (#5571) ### :sparkles: New Features - [`1fb90db`](https://github.com/tobymao/sqlglot/commit/1fb90db52b59e6e3a40597c6f611d0476b72025b) - **teradata**: Add support for Teradata set query band expression *(PR [#5519](https://github.com/tobymao/sqlglot/pull/5519) by [@treff7es](https://github.com/treff7es))* - [`a49baaf`](https://github.com/tobymao/sqlglot/commit/a49baaf717cb41abb25ca51ae5adddc8473baa8b) - **doris**: Override table_sql to avoid AS keyword in UPDATE and DELETE statements *(PR [#5517](https://github.com/tobymao/sqlglot/pull/5517) by [@peterylh](https://github.com/peterylh))* - [`75fd6d2`](https://github.com/tobymao/sqlglot/commit/75fd6d21fb7bc8399432e73d10b4837ae62d2ab5) - **exasol**: Add support for date difference functions in Exasol dialect *(PR [#5510](https://github.com/tobymao/sqlglot/pull/5510) by [@nnamdi16](https://github.com/nnamdi16))* - [`2a91bb4`](https://github.com/tobymao/sqlglot/commit/2a91bb4f17c7569a5b409cc07e970e5d68235149) - **teradata**: Add support for Teradata locking select *(PR [#5524](https://github.com/tobymao/sqlglot/pull/5524) by [@treff7es](https://github.com/treff7es))* - [`938f4b6`](https://github.com/tobymao/sqlglot/commit/938f4b6ebc1c0d26bd3c1400883978c79a435189) - **optimizer**: annotate type for LAST_DAY *(PR [#5528](https://github.com/tobymao/sqlglot/pull/5528) by [@geooo109](https://github.com/geooo109))* - [`7d12dac`](https://github.com/tobymao/sqlglot/commit/7d12dac613ba5119334408f2c52cb270067156d9) - **optimizer**: annotate type for bigquery GENERATE_TIMESTAMP_ARRAY *(PR [#5529](https://github.com/tobymao/sqlglot/pull/5529) by [@geooo109](https://github.com/geooo109))* - [`d50ebe2`](https://github.com/tobymao/sqlglot/commit/d50ebe286dd8e2836b9eb2a3406f15976db3aa05) - **optimizer**: annotate type for bigquery TIME_TRUNC *(PR [#5530](https://github.com/tobymao/sqlglot/pull/5530) by [@geooo109](https://github.com/geooo109))* - [`29748be`](https://github.com/tobymao/sqlglot/commit/29748be7dfc10edc9f29665c98327883dd25c13d) - **optimizer**: annotate type for bigquery TIME *(PR [#5531](https://github.com/tobymao/sqlglot/pull/5531) by [@geooo109](https://github.com/geooo109))* - [`7003b3f`](https://github.com/tobymao/sqlglot/commit/7003b3fa39cd455e3643066364696708d1ac4f38) - **optimizer**: parse and annotate type for bigquery DATE_FROM_UNIX_DATE *(PR [#5532](https://github.com/tobymao/sqlglot/pull/5532) by [@geooo109](https://github.com/geooo109))* - [`a276ca6`](https://github.com/tobymao/sqlglot/commit/a276ca6fd5f9d47fa8c90fcfa19f9864e7a28f8f) - **optimizer**: parse and annotate type for bigquery JUSTIFY funcs *(PR [#5534](https://github.com/tobymao/sqlglot/pull/5534) by [@geooo109](https://github.com/geooo109))* - [`374178e`](https://github.com/tobymao/sqlglot/commit/374178e22fe8d2d2275b65fe08e27ef66c611220) - **optimizer**: parse and annotate type for bigquery UNIX_MICROS and UNIX_MILLIS *(PR [#5535](https://github.com/tobymao/sqlglot/pull/5535) by [@geooo109](https://github.com/geooo109))* - [`1d8d1ab`](https://github.com/tobymao/sqlglot/commit/1d8d1abe459053a135a46525d0a13bb861220927) - **optimizer**: annotate type for bigquery DATE_TRUNC *(PR [#5540](https://github.com/tobymao/sqlglot/pull/5540) by [@geooo109](https://github.com/geooo109))* - [`306ba65`](https://github.com/tobymao/sqlglot/commit/306ba6531839ea2823f5165de7bde01d17560845) - **optimizer**: annotate type for bigquery TIMESTAMP_TRUNC *(PR [#5541](https://github.com/tobymao/sqlglot/pull/5541) by [@geooo109](https://github.com/geooo109))* - [`6a68cca`](https://github.com/tobymao/sqlglot/commit/6a68cca97ad4bdd75c544ada0a5af0fa92ec4664) - **dremio**: support lowercase `TIME_MAPPING` formats *(PR [#5556](https://github.com/tobymao/sqlglot/pull/5556) by [@jasonthomassql](https://github.com/jasonthomassql))* - [`f3ffe19`](https://github.com/tobymao/sqlglot/commit/f3ffe19ec01533c5f27b9d3a7b6704b83c005118) - **optimizer**: annotate type for bigquery format_time *(PR [#5559](https://github.com/tobymao/sqlglot/pull/5559) by [@geooo109](https://github.com/geooo109))* - [`3ab3690`](https://github.com/tobymao/sqlglot/commit/3ab369096313b418699b7942b1c513c0c66a5331) - **optimizer**: parse and annotate type for bigquery PARSE_DATETIME *(PR [#5558](https://github.com/tobymao/sqlglot/pull/5558) by [@geooo109](https://github.com/geooo109))* - [`e5da951`](https://github.com/tobymao/sqlglot/commit/e5da951542eb55691bc43fbbfbec4a30100de038) - **optimizer**: parse and annotate type for bigquery PARSE_TIME *(PR [#5561](https://github.com/tobymao/sqlglot/pull/5561) by [@geooo109](https://github.com/geooo109))* - [`902a0cd`](https://github.com/tobymao/sqlglot/commit/902a0cdfe46f693aa55612d45a2de2def21f0b8c) - **singlestore**: Added parsing/generation of UNIXTIME functions *(PR [#5562](https://github.com/tobymao/sqlglot/pull/5562) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`798e213`](https://github.com/tobymao/sqlglot/commit/798e213fd10c3b61afbd8cef621546de65fa6f26) - **duckdb**: improve transpilability of ANY_VALUE closes [#5563](https://github.com/tobymao/sqlglot/pull/5563) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c7041c7`](https://github.com/tobymao/sqlglot/commit/c7041c71250b17192c2f25fb8f33407324d332c2) - **optimizer**: parse and annotate type for bigquery BYTE_LENGHT *(PR [#5568](https://github.com/tobymao/sqlglot/pull/5568) by [@geooo109](https://github.com/geooo109))* - [`a6c61c3`](https://github.com/tobymao/sqlglot/commit/a6c61c34f1e168c97dd5c2b8ec071372ba593992) - **optimizer**: parse and annotate type for bigquery CODE_POINTS_TO_STRING *(PR [#5569](https://github.com/tobymao/sqlglot/pull/5569) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`9020684`](https://github.com/tobymao/sqlglot/commit/9020684a7e984a10fa4775339596ac5a0d6a6d93) - nested natural join performance closes [#5514](https://github.com/tobymao/sqlglot/pull/5514) *(PR [#5515](https://github.com/tobymao/sqlglot/pull/5515) by [@tobymao](https://github.com/tobymao))* - [`394870a`](https://github.com/tobymao/sqlglot/commit/394870a7ee9bb3bc814b7c3847193687f06b432b) - **duckdb**: transpile ADD_MONTHS *(PR [#5523](https://github.com/tobymao/sqlglot/pull/5523) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5505](https://github.com/tobymao/sqlglot/issues/5505) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`249692c`](https://github.com/tobymao/sqlglot/commit/249692c67450a1fe3775e1f35b6f62fdb0a62e1a) - **duckdb**: put guard in AddMonths generator before annotating it *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d799c5a`](https://github.com/tobymao/sqlglot/commit/d799c5af23010a67c29edb6d45a40fb24903e1a3) - **optimizer**: preserve projection names when merging subqueries *(commit by [@snovik75](https://github.com/snovik75))* - [`8130bd4`](https://github.com/tobymao/sqlglot/commit/8130bd40815803a6781ee8f20fccd30987516192) - **parser**: WEEKDAY of WEEK as VAR *(PR [#5552](https://github.com/tobymao/sqlglot/pull/5552) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5547](https://github.com/tobymao/sqlglot/issues/5547) opened by [@rloredo](https://github.com/rloredo)* - [`4e1373f`](https://github.com/tobymao/sqlglot/commit/4e1373f301cbea3cb5762fc1430b65deae3f9d04) - **doris**: Rename Table *(PR [#5549](https://github.com/tobymao/sqlglot/pull/5549) by [@xinge-ji](https://github.com/xinge-ji))* - [`16f544d`](https://github.com/tobymao/sqlglot/commit/16f544dc25d5d61277d32f02e4be18c10d16cf9f) - **doris**: fix DATE_TRUNC and partition by *(PR [#5553](https://github.com/tobymao/sqlglot/pull/5553) by [@xinge-ji](https://github.com/xinge-ji))* - [`6295414`](https://github.com/tobymao/sqlglot/commit/6295414fb41401f92993e661b880a0727e74c087) - convert unit to Var instead of choosing default in `unit_to_var` *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6872b43`](https://github.com/tobymao/sqlglot/commit/6872b43ba17a39137172fd2fa9f0d059ce595ef9) - **parser**: use dialect in DataType.build fixes [#5560](https://github.com/tobymao/sqlglot/pull/5560) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6f354d9`](https://github.com/tobymao/sqlglot/commit/6f354d958fb9ca9242b7fc1d2da86af74d57fedc) - **clickhouse**: add ROWS keyword in OFFSET followed by FETCH fixes [#5564](https://github.com/tobymao/sqlglot/pull/5564) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8c0cb76`](https://github.com/tobymao/sqlglot/commit/8c0cb764fd825062fb7334032b8eeffbc39627d5) - **parser**: more robust CREATE SEQUENCE *(PR [#5566](https://github.com/tobymao/sqlglot/pull/5566) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5537](https://github.com/tobymao/sqlglot/issues/5537) opened by [@tekumara](https://github.com/tekumara)* - [`7e9df88`](https://github.com/tobymao/sqlglot/commit/7e9df880bc118d0dbb2dbd6344f805f79af2fe5e) - **doris**: CURRENT_DATE *(PR [#5567](https://github.com/tobymao/sqlglot/pull/5567) by [@xinge-ji](https://github.com/xinge-ji))* - [`51e0335`](https://github.com/tobymao/sqlglot/commit/51e0335377fe2bc2e2a94a623475791e9dd19fb9) - **optimizer**: parse and annotate type for bigquery REVERSE *(PR [#5571](https://github.com/tobymao/sqlglot/pull/5571) by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`720f634`](https://github.com/tobymao/sqlglot/commit/720f6343f6144e8986ec6b7e50419c3d7a331f0a) - Fix style on main, refactor exasol tests *(PR [#5527](https://github.com/tobymao/sqlglot/pull/5527) by [@VaggelisD](https://github.com/VaggelisD))* - [`5653501`](https://github.com/tobymao/sqlglot/commit/5653501606f041282b6315c3efa33b9a3baf8d98) - Refactor PR 5517 *(PR [#5526](https://github.com/tobymao/sqlglot/pull/5526) by [@VaggelisD](https://github.com/VaggelisD))* - [`d15dfe3`](https://github.com/tobymao/sqlglot/commit/d15dfe3f0f4444e4999ad65051b2474e62f422b3) - build type using dialect for bigquery *(PR [#5539](https://github.com/tobymao/sqlglot/pull/5539) by [@geooo109](https://github.com/geooo109))* ## [v27.6.0] - 2025-08-01 ### :boom: BREAKING CHANGES - due to [`6b691b3`](https://github.com/tobymao/sqlglot/commit/6b691b33c3528c0377bd8822a3df90de869c6cb1) - Parse and transpile GET(...) extract function *(PR [#5500](https://github.com/tobymao/sqlglot/pull/5500) by [@VaggelisD](https://github.com/VaggelisD))*: Parse and transpile GET(...) extract function (#5500) - due to [`964a275`](https://github.com/tobymao/sqlglot/commit/964a275b42314380de3b301ada9f9756602729f7) - Make `UNION` column qualification recursive *(PR [#5508](https://github.com/tobymao/sqlglot/pull/5508) by [@VaggelisD](https://github.com/VaggelisD))*: Make `UNION` column qualification recursive (#5508) ### :sparkles: New Features - [`6b691b3`](https://github.com/tobymao/sqlglot/commit/6b691b33c3528c0377bd8822a3df90de869c6cb1) - **snowflake**: Parse and transpile GET(...) extract function *(PR [#5500](https://github.com/tobymao/sqlglot/pull/5500) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5495](https://github.com/tobymao/sqlglot/issues/5495) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`a2a2f0f`](https://github.com/tobymao/sqlglot/commit/a2a2f0fe910228651c5c39beebcc02172a0b7e94) - **exasol**: Add support for IF, NULLIFZERO, and ZEROIFNULL functions *(PR [#5502](https://github.com/tobymao/sqlglot/pull/5502) by [@nnamdi16](https://github.com/nnamdi16))* - [`2d8ce58`](https://github.com/tobymao/sqlglot/commit/2d8ce587c75f21b188ec4c201936eedac3b051e8) - **singlestore**: Added cast operator *(PR [#5504](https://github.com/tobymao/sqlglot/pull/5504) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`6256348`](https://github.com/tobymao/sqlglot/commit/6256348a28b72ae9052d4244736846af209410b0) - **exasol**: add support for ADD_DAYS function in exasol dialect *(PR [#5507](https://github.com/tobymao/sqlglot/pull/5507) by [@nnamdi16](https://github.com/nnamdi16))* - [`2f40fc5`](https://github.com/tobymao/sqlglot/commit/2f40fc578a840c9276a4c3b91351fb8d95c837fc) - add more pseudocols to bq which are not expanded by star *(PR [#5509](https://github.com/tobymao/sqlglot/pull/5509) by [@z3z1ma](https://github.com/z3z1ma))* ### :bug: Bug Fixes - [`3b52061`](https://github.com/tobymao/sqlglot/commit/3b520611c5a894ddea935d13aadd27c791a8a755) - **exasol**: fix TokenType.TEXT mapping in exasol dialect *(PR [#5506](https://github.com/tobymao/sqlglot/pull/5506) by [@nnamdi16](https://github.com/nnamdi16))* - [`964a275`](https://github.com/tobymao/sqlglot/commit/964a275b42314380de3b301ada9f9756602729f7) - Make `UNION` column qualification recursive *(PR [#5508](https://github.com/tobymao/sqlglot/pull/5508) by [@VaggelisD](https://github.com/VaggelisD))* ## [v27.5.1] - 2025-07-30 ### :bug: Bug Fixes - [`caf71d6`](https://github.com/tobymao/sqlglot/commit/caf71d687c0048d2346fddaee58b519e4f2e7945) - `between` builder should not set `symmetric` by default *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.5.0] - 2025-07-30 ### :boom: BREAKING CHANGES - due to [`002286e`](https://github.com/tobymao/sqlglot/commit/002286ee05a608e303a2238a9a74ab963709b5da) - remove AM/PM entries from postgres, oracle `TIME_MAPPING` *(PR [#5491](https://github.com/tobymao/sqlglot/pull/5491) by [@georgesittas](https://github.com/georgesittas))*: remove AM/PM entries from postgres, oracle `TIME_MAPPING` (#5491) - due to [`ad78db6`](https://github.com/tobymao/sqlglot/commit/ad78db6c9002a5bf9188d66f0080dfefd070f77b) - Refactor `LIKE ANY` and support `ALL | SOME` quantifiers *(PR [#5493](https://github.com/tobymao/sqlglot/pull/5493) by [@VaggelisD](https://github.com/VaggelisD))*: Refactor `LIKE ANY` and support `ALL | SOME` quantifiers (#5493) ### :sparkles: New Features - [`8cdd9e8`](https://github.com/tobymao/sqlglot/commit/8cdd9e8715b4cf67c200c723940743ed69bbfd80) - **mysql**: Parse UNIQUE INDEX constraint similar to UNIQUE KEY *(PR [#5489](https://github.com/tobymao/sqlglot/pull/5489) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5479](https://github.com/tobymao/sqlglot/issues/5479) opened by [@nathanchapman](https://github.com/nathanchapman)* - [`787d167`](https://github.com/tobymao/sqlglot/commit/787d167d694b557d6e43ed391f59847a888fa572) - **exasol**: add support for REGEXP_SUBSTR in exasol dialect *(PR [#5487](https://github.com/tobymao/sqlglot/pull/5487) by [@nnamdi16](https://github.com/nnamdi16))* - [`0963f60`](https://github.com/tobymao/sqlglot/commit/0963f60987c267c64f2fcfbde469b8b28911a14b) - **singlestore**: Fixed time formatting *(PR [#5476](https://github.com/tobymao/sqlglot/pull/5476) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`488d2e4`](https://github.com/tobymao/sqlglot/commit/488d2e4bf9d4eb148356d1fd6c2360bbf77f283c) - **singlestore**: Added RESERVED_KEYWORDS *(PR [#5497](https://github.com/tobymao/sqlglot/pull/5497) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* - [`fad9992`](https://github.com/tobymao/sqlglot/commit/fad9992a00478a964552f72802b95ca3918c4377) - **exasol**: Add support for TRUNC, TRUNCATE and DATE_TRUNC function… *(PR [#5490](https://github.com/tobymao/sqlglot/pull/5490) by [@nnamdi16](https://github.com/nnamdi16))* - [`ad78db6`](https://github.com/tobymao/sqlglot/commit/ad78db6c9002a5bf9188d66f0080dfefd070f77b) - Refactor `LIKE ANY` and support `ALL | SOME` quantifiers *(PR [#5493](https://github.com/tobymao/sqlglot/pull/5493) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5484](https://github.com/tobymao/sqlglot/issues/5484) opened by [@mazum21](https://github.com/mazum21)* - [`a7a6f16`](https://github.com/tobymao/sqlglot/commit/a7a6f167d30ac19383ad15931c26751c66a61976) - **singlestore**: Added Tokenizer *(PR [#5492](https://github.com/tobymao/sqlglot/pull/5492) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* ### :bug: Bug Fixes - [`3982653`](https://github.com/tobymao/sqlglot/commit/3982653e62a42ca1be2bdd8722119e27bd1ba680) - Do not consume BUCKET/TRUNCATE as partitioning keywords *(PR [#5488](https://github.com/tobymao/sqlglot/pull/5488) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5485](https://github.com/tobymao/sqlglot/issues/5485) opened by [@chenkovsky](https://github.com/chenkovsky)* - [`002286e`](https://github.com/tobymao/sqlglot/commit/002286ee05a608e303a2238a9a74ab963709b5da) - remove AM/PM entries from postgres, oracle `TIME_MAPPING` *(PR [#5491](https://github.com/tobymao/sqlglot/pull/5491) by [@georgesittas](https://github.com/georgesittas))* - [`74f278a`](https://github.com/tobymao/sqlglot/commit/74f278a226058e196270042e2a9664b9acded28a) - **optimizer**: Fix SEMI/ANTI join handling in optimizer rules *(PR [#5498](https://github.com/tobymao/sqlglot/pull/5498) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5481](https://github.com/tobymao/sqlglot/issues/5481) opened by [@themattmorris](https://github.com/themattmorris)* - [`42633fb`](https://github.com/tobymao/sqlglot/commit/42633fb49b3c04eeea42e061e33ee08e61960cb4) - dont print (A)SYMMETRIC keyword in BETWEEN for postgres subclasses *(PR [#5503](https://github.com/tobymao/sqlglot/pull/5503) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`18b7d0f`](https://github.com/tobymao/sqlglot/commit/18b7d0fe19708d88b224770d844a8f6a74fe2aa7) - fix deprecated 'license' specification format *(PR [#5494](https://github.com/tobymao/sqlglot/pull/5494) by [@loonies](https://github.com/loonies))* ## [v27.4.1] - 2025-07-27 ### :bug: Bug Fixes - [`ba2b3e2`](https://github.com/tobymao/sqlglot/commit/ba2b3e21ca5454402808b68697ea4eb62963d341) - **bigquery**: make exp.Array type inference more robust *(PR [#5483](https://github.com/tobymao/sqlglot/pull/5483) by [@georgesittas](https://github.com/georgesittas))* ## [v27.4.0] - 2025-07-25 ### :boom: BREAKING CHANGES - due to [`4f348bd`](https://github.com/tobymao/sqlglot/commit/4f348bddda21b18841fd2d728fe486e95cdaa549) - store Query schemas in meta dict instead of type attr *(PR [#5480](https://github.com/tobymao/sqlglot/pull/5480) by [@georgesittas](https://github.com/georgesittas))*: store Query schemas in meta dict instead of type attr (#5480) ### :sparkles: New Features - [`7961ece`](https://github.com/tobymao/sqlglot/commit/7961ece058f3771364aad5beedba9484e3a2e27c) - **exasol**: Add support for HASH_SHA1 function *(PR [#5468](https://github.com/tobymao/sqlglot/pull/5468) by [@nnamdi16](https://github.com/nnamdi16))* - [`406815d`](https://github.com/tobymao/sqlglot/commit/406815de21f0fdc9874ff46155d4ee0274aa6337) - **exasol**: support HASH_SHA1 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e6f4fc9`](https://github.com/tobymao/sqlglot/commit/e6f4fc9c6d59d96777b2a2ec5dcc360e53639f8d) - **sqlite**: support ATTACH/DETACH DATABASE *(PR [#5469](https://github.com/tobymao/sqlglot/pull/5469) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5459](https://github.com/tobymao/sqlglot/issues/5459) opened by [@mariofox](https://github.com/mariofox)* - [`8aa3498`](https://github.com/tobymao/sqlglot/commit/8aa349890673dccdd4daa0aea6ca5fcb9fdaf46f) - **hive, spark**: Add support for LOCATION in ADD PARTITION *(PR [#5472](https://github.com/tobymao/sqlglot/pull/5472) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5457](https://github.com/tobymao/sqlglot/issues/5457) opened by [@tsamaras](https://github.com/tsamaras)* - [`44adfc0`](https://github.com/tobymao/sqlglot/commit/44adfc0e74da9d1b05a5a8a67b81fb7c67634c70) - **exasol**: add HASH_MD5 functionality to exasol dialect *(PR [#5473](https://github.com/tobymao/sqlglot/pull/5473) by [@nnamdi16](https://github.com/nnamdi16))* - [`05e1c4d`](https://github.com/tobymao/sqlglot/commit/05e1c4dbf795915448173a894a89a33b289a3b5b) - **snowflake**: Transpile BQ's `STRUCT` dot access *(PR [#5471](https://github.com/tobymao/sqlglot/pull/5471) by [@VaggelisD](https://github.com/VaggelisD))* - [`3c5ecdf`](https://github.com/tobymao/sqlglot/commit/3c5ecdf7f27629c01f0f3402e64a9dedf0583851) - **exasol**: Add HASHTYPE_MD5 functions to Exasol dialect *(PR [#5474](https://github.com/tobymao/sqlglot/pull/5474) by [@nnamdi16](https://github.com/nnamdi16))* - [`1d640d2`](https://github.com/tobymao/sqlglot/commit/1d640d2278288b9a39a65b2532a13bc17e06c4e8) - **exasol**: add support for HASH_SHA256 and HASH_SHA512 hashing *(PR [#5475](https://github.com/tobymao/sqlglot/pull/5475) by [@nnamdi16](https://github.com/nnamdi16))* ### :bug: Bug Fixes - [`e1819d6`](https://github.com/tobymao/sqlglot/commit/e1819d6451fec0eb3a1f77c90fd8d5c5b0d89889) - only strip kind from joins when it is inner|outer *(PR [#5477](https://github.com/tobymao/sqlglot/pull/5477) by [@themattmorris](https://github.com/themattmorris))* - :arrow_lower_right: *fixes issue [#5470](https://github.com/tobymao/sqlglot/issues/5470) opened by [@themattmorris](https://github.com/themattmorris)* - [`4f348bd`](https://github.com/tobymao/sqlglot/commit/4f348bddda21b18841fd2d728fe486e95cdaa549) - **bigquery**: store Query schemas in meta dict instead of type attr *(PR [#5480](https://github.com/tobymao/sqlglot/pull/5480) by [@georgesittas](https://github.com/georgesittas))* ## [v27.3.1] - 2025-07-24 ### :boom: BREAKING CHANGES - due to [`48703c4`](https://github.com/tobymao/sqlglot/commit/48703c4fadd9f24de151a63d1bfa74f4b8e71133) - temporarily move VARCHAR length inference logic to Fabric *(commit by [@georgesittas](https://github.com/georgesittas))*: temporarily move VARCHAR length inference logic to Fabric ### :sparkles: New Features - [`4cc321c`](https://github.com/tobymao/sqlglot/commit/4cc321cc1995d538ab0c48a7a0a473c31e76ddff) - **singlestore**: Added initial implementation of SingleStore dialect *(PR [#5447](https://github.com/tobymao/sqlglot/pull/5447) by [@AdalbertMemSQL](https://github.com/AdalbertMemSQL))* ### :wrench: Chores - [`48703c4`](https://github.com/tobymao/sqlglot/commit/48703c4fadd9f24de151a63d1bfa74f4b8e71133) - **tsql**: temporarily move VARCHAR length inference logic to Fabric *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.3.0] - 2025-07-24 ### :boom: BREAKING CHANGES - due to [`d7ccb48`](https://github.com/tobymao/sqlglot/commit/d7ccb48e542c49258e31cc4df45f49beebc2e238) - week/quarter support *(PR [#5374](https://github.com/tobymao/sqlglot/pull/5374) by [@eakmanrq](https://github.com/eakmanrq))*: week/quarter support (#5374) - due to [`b368fba`](https://github.com/tobymao/sqlglot/commit/b368fba59b606e038d445b2ca2d8436e115af3d6) - parse and annotate type for ASCII *(PR [#5377](https://github.com/tobymao/sqlglot/pull/5377) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for ASCII (#5377) - due to [`7f19b31`](https://github.com/tobymao/sqlglot/commit/7f19b31ebd7981e53a8f8ba343b4f3222fe160c7) - annotate type for UNICODE *(PR [#5381](https://github.com/tobymao/sqlglot/pull/5381) by [@geooo109](https://github.com/geooo109))*: annotate type for UNICODE (#5381) - due to [`9e8d3ab`](https://github.com/tobymao/sqlglot/commit/9e8d3abedcffb1c267ed0e6a8332af3b52105d41) - Preserve struct-column parentheses for RisingWave dialect *(PR [#5376](https://github.com/tobymao/sqlglot/pull/5376) by [@MisterWheatley](https://github.com/MisterWheatley))*: Added dialect as argument to `simplify_parens` function * style: Ran formatter and tests. Fixed type annotation for simplify_parens * Fix: Make dialect in `simplify_parens` optional. Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com> * Fix(optimizer): Tweaks to make simple non-nested star expand pass unit test for RW * Fix(optimizer): Added test for deep nested unpacking for BigQuery and RisingWave * style: Ran formatting check * fix: Remove unuses function from RisingWave dialect test * docs: updated docstring of new _expand_struct_stars_risingwave internal function * fix: apply suggestions from code review 2 Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com> * fix(optimizer,risingwave): Ensure that struct star-expansion to the correct level for RisingWave Updated logic for expanding (struct_col).* expressions in RisingWave to correctly handle the level of nesting. Moved struct expansion tests to tests/fixtures/qualify_columns.sql on behest of maintainers. --------- - due to [`3223e63`](https://github.com/tobymao/sqlglot/commit/3223e6394fdd3f8e48c68bbb940b661ff8e76fd8) - cast datetimeoffset to datetime2 *(PR [#5385](https://github.com/tobymao/sqlglot/pull/5385) by [@mattiasthalen](https://github.com/mattiasthalen))*: cast datetimeoffset to datetime2 (#5385) - due to [`06cea31`](https://github.com/tobymao/sqlglot/commit/06cea310bd9fd3a9a9fa0ba008596e878a430df8) - support KEY related locks *(PR [#5397](https://github.com/tobymao/sqlglot/pull/5397) by [@geooo109](https://github.com/geooo109))*: support KEY related locks (#5397) - due to [`1014a67`](https://github.com/tobymao/sqlglot/commit/1014a6759b0917ef1bf5af0dbbdcca72214a8dea) - remove redundant todate in dayofweek closes [#5398](https://github.com/tobymao/sqlglot/pull/5398) *(PR [#5399](https://github.com/tobymao/sqlglot/pull/5399) by [@tobymao](https://github.com/tobymao))*: remove redundant todate in dayofweek closes #5398 (#5399) - due to [`b2631ae`](https://github.com/tobymao/sqlglot/commit/b2631aec8d1bdb08decb201b6bd2ba5d927bb121) - annotate type for bigquery BIT_AND, BIT_OR, BIT_XOR, BIT_COUNT *(PR [#5405](https://github.com/tobymao/sqlglot/pull/5405) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery BIT_AND, BIT_OR, BIT_XOR, BIT_COUNT (#5405) - due to [`5835b8d`](https://github.com/tobymao/sqlglot/commit/5835b8d6c7fe77d9645691bb88021af137ed0bac) - make bracket parsing aware of duckdb MAP func *(PR [#5423](https://github.com/tobymao/sqlglot/pull/5423) by [@geooo109](https://github.com/geooo109))*: make bracket parsing aware of duckdb MAP func (#5423) - due to [`489dc5c`](https://github.com/tobymao/sqlglot/commit/489dc5c2f7506e0fe4de549384dd0f816e9fd12f) - parse and annotate type support for JSON_ARRAY *(PR [#5424](https://github.com/tobymao/sqlglot/pull/5424) by [@geooo109](https://github.com/geooo109))*: parse and annotate type support for JSON_ARRAY (#5424) - due to [`0ed518c`](https://github.com/tobymao/sqlglot/commit/0ed518c67042002ee0af91bee0b9e7093c85f926) - annotate type for bigquery JSON_VALUE *(PR [#5427](https://github.com/tobymao/sqlglot/pull/5427) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery JSON_VALUE (#5427) - due to [`6091617`](https://github.com/tobymao/sqlglot/commit/6091617067c263e3e834e579b37aa1c601b1ddc7) - annotate type for bigquery JSON_VALUE_ARRAY *(PR [#5428](https://github.com/tobymao/sqlglot/pull/5428) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery JSON_VALUE_ARRAY (#5428) - due to [`631c851`](https://github.com/tobymao/sqlglot/commit/631c851cbbfbf55cb66a79c2549aeeb443fcab83) - parse and annotate type support for bigquery JSON_TYPE *(PR [#5430](https://github.com/tobymao/sqlglot/pull/5430) by [@geooo109](https://github.com/geooo109))*: parse and annotate type support for bigquery JSON_TYPE (#5430) - due to [`6268f6f`](https://github.com/tobymao/sqlglot/commit/6268f6f39dda1ca5cf2ad0403e219b49c5c9593a) - add default precision to CHAR/VARCHAR create expressions *(PR [#5434](https://github.com/tobymao/sqlglot/pull/5434) by [@mattiasthalen](https://github.com/mattiasthalen))*: add default precision to CHAR/VARCHAR create expressions (#5434) - due to [`8467bad`](https://github.com/tobymao/sqlglot/commit/8467bad405e27c842c989e71588adc39cf2383fc) - add parsing/generating for BigQuery `DECLARE` *(PR [#5442](https://github.com/tobymao/sqlglot/pull/5442) by [@plaflamme](https://github.com/plaflamme))*: add parsing/generating for BigQuery `DECLARE` (#5442) - due to [`79c5c30`](https://github.com/tobymao/sqlglot/commit/79c5c30f3802c6959376b3b0f3c4d055a30b6b43) - transpile STRING_AGG *(PR [#5449](https://github.com/tobymao/sqlglot/pull/5449) by [@geooo109](https://github.com/geooo109))*: transpile STRING_AGG (#5449) - due to [`190f8ab`](https://github.com/tobymao/sqlglot/commit/190f8abe3d3bbda09e2f945287398d2aa9d6a863) - improve BigQuery `UNNEST` transpilation *(PR [#5451](https://github.com/tobymao/sqlglot/pull/5451) by [@georgesittas](https://github.com/georgesittas))*: improve BigQuery `UNNEST` transpilation (#5451) - due to [`3590e75`](https://github.com/tobymao/sqlglot/commit/3590e75c1df2d572e2fea664893dba5565a17e05) - support ? placeholder *(PR [#5455](https://github.com/tobymao/sqlglot/pull/5455) by [@geooo109](https://github.com/geooo109))*: support ? placeholder (#5455) - due to [`cdbf595`](https://github.com/tobymao/sqlglot/commit/cdbf5953171c8d4c8e4a24262f278c6f7d74e057) - Wrap GET_PATH value with PARSE_JSON preemptively *(PR [#5458](https://github.com/tobymao/sqlglot/pull/5458) by [@VaggelisD](https://github.com/VaggelisD))*: Wrap GET_PATH value with PARSE_JSON preemptively (#5458) - due to [`bee82f3`](https://github.com/tobymao/sqlglot/commit/bee82f37ac537780495ff408738d88871208517a) - Remove `UNKNOWN` type from `TRY_CAST` *(PR [#5466](https://github.com/tobymao/sqlglot/pull/5466) by [@VaggelisD](https://github.com/VaggelisD))*: Remove `UNKNOWN` type from `TRY_CAST` (#5466) ### :sparkles: New Features - [`b368fba`](https://github.com/tobymao/sqlglot/commit/b368fba59b606e038d445b2ca2d8436e115af3d6) - **optimizer**: parse and annotate type for ASCII *(PR [#5377](https://github.com/tobymao/sqlglot/pull/5377) by [@geooo109](https://github.com/geooo109))* - [`7f19b31`](https://github.com/tobymao/sqlglot/commit/7f19b31ebd7981e53a8f8ba343b4f3222fe160c7) - **optimizer**: annotate type for UNICODE *(PR [#5381](https://github.com/tobymao/sqlglot/pull/5381) by [@geooo109](https://github.com/geooo109))* - [`f035bf0`](https://github.com/tobymao/sqlglot/commit/f035bf0eb582aa07d4ad79e0ed1958ce0d091ad9) - **dremio**: Add TIME_MAPPING for Dremio dialect *(PR [#5378](https://github.com/tobymao/sqlglot/pull/5378) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`31cfd0f`](https://github.com/tobymao/sqlglot/commit/31cfd0fc3309bc1080b7a2ba8d40b2aba5c098a3) - **exasol**: add to_date and refactored to_char functions with respect to time mapping *(PR [#5379](https://github.com/tobymao/sqlglot/pull/5379) by [@nnamdi16](https://github.com/nnamdi16))* - [`bd3776e`](https://github.com/tobymao/sqlglot/commit/bd3776eaa26d40b44c4cebc2f3838b4055653548) - **doris**: add PROPERTIES_LOCATION mapping for Doris dialect *(PR [#5391](https://github.com/tobymao/sqlglot/pull/5391) by [@xinge-ji](https://github.com/xinge-ji))* - [`7eaa67a`](https://github.com/tobymao/sqlglot/commit/7eaa67acb216501046c739f56839418b84f244c0) - **doris**: properly supported PROPERTIES and UNIQUE KEY table prop *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1e78163`](https://github.com/tobymao/sqlglot/commit/1e78163b829e910e7960c79e7ab118c07d1ecdc3) - **duckdb**: support column access via index *(PR [#5395](https://github.com/tobymao/sqlglot/pull/5395) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5392](https://github.com/tobymao/sqlglot/issues/5392) opened by [@tekumara](https://github.com/tekumara)* - [`1014a67`](https://github.com/tobymao/sqlglot/commit/1014a6759b0917ef1bf5af0dbbdcca72214a8dea) - remove redundant todate in dayofweek closes [#5398](https://github.com/tobymao/sqlglot/pull/5398) *(PR [#5399](https://github.com/tobymao/sqlglot/pull/5399) by [@tobymao](https://github.com/tobymao))* - [`be52f78`](https://github.com/tobymao/sqlglot/commit/be52f7866b03e436d103d9201d1a44c6632c643a) - **exasol**: add support for CONVERT_TZ function *(PR [#5401](https://github.com/tobymao/sqlglot/pull/5401) by [@nnamdi16](https://github.com/nnamdi16))* - [`d637161`](https://github.com/tobymao/sqlglot/commit/d637161406faf623418f112162268bedb422213b) - **exasol**: add mapping to TIME_TO_STR in exasol dialect *(PR [#5403](https://github.com/tobymao/sqlglot/pull/5403) by [@nnamdi16](https://github.com/nnamdi16))* - [`b2631ae`](https://github.com/tobymao/sqlglot/commit/b2631aec8d1bdb08decb201b6bd2ba5d927bb121) - **optimizer**: annotate type for bigquery BIT_AND, BIT_OR, BIT_XOR, BIT_COUNT *(PR [#5405](https://github.com/tobymao/sqlglot/pull/5405) by [@geooo109](https://github.com/geooo109))* - [`b81ae62`](https://github.com/tobymao/sqlglot/commit/b81ae629bfb27760ddd832402a86dabe4e65072f) - **exasol**: map STR_TO_TIME to TO_DATE and *(PR [#5407](https://github.com/tobymao/sqlglot/pull/5407) by [@nnamdi16](https://github.com/nnamdi16))* - [`c2fb9ab`](https://github.com/tobymao/sqlglot/commit/c2fb9abeb2f077f00278e46efd9573a3806cd218) - add `DateStrToTime` *(PR [#5409](https://github.com/tobymao/sqlglot/pull/5409) by [@betodealmeida](https://github.com/betodealmeida))* - [`a95993a`](https://github.com/tobymao/sqlglot/commit/a95993ae4e8aa99969db059a534819a4f0b62b96) - **snowflake**: improve transpilation of queries with UNNEST sources *(PR [#5408](https://github.com/tobymao/sqlglot/pull/5408) by [@georgesittas](https://github.com/georgesittas))* - [`7b69f54`](https://github.com/tobymao/sqlglot/commit/7b69f545bbcfeb1e1f2f3b7e0b9757cfd675e4a5) - **snowflake**: Support SEMANTIC_VIEW *(PR [#5414](https://github.com/tobymao/sqlglot/pull/5414) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5406](https://github.com/tobymao/sqlglot/issues/5406) opened by [@jkillian](https://github.com/jkillian)* - [`7dba6f6`](https://github.com/tobymao/sqlglot/commit/7dba6f64d9a7945bbdef1b6e014d802014567a1e) - **exasol**: map AT TIME ZONE to CONVERT_TZ *(PR [#5416](https://github.com/tobymao/sqlglot/pull/5416) by [@nnamdi16](https://github.com/nnamdi16))* - [`25f2c1b`](https://github.com/tobymao/sqlglot/commit/25f2c1bb18f9d073b128150566cb27c0c2da0865) - **postgres**: query placeholders *(PR [#5415](https://github.com/tobymao/sqlglot/pull/5415) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5412](https://github.com/tobymao/sqlglot/issues/5412) opened by [@aersam](https://github.com/aersam)* - [`c309c87`](https://github.com/tobymao/sqlglot/commit/c309c8763a90bf0bce02e21f4088b38d85556cce) - **doris**: support range partitioning *(PR [#5402](https://github.com/tobymao/sqlglot/pull/5402) by [@xinge-ji](https://github.com/xinge-ji))* - [`394d3a8`](https://github.com/tobymao/sqlglot/commit/394d3a81ef41d3052c0b0d6e48180c344b7db143) - **dremio**: Add support for DATE_ADD and DATE_SUB *(PR [#5411](https://github.com/tobymao/sqlglot/pull/5411) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`9cfac4f`](https://github.com/tobymao/sqlglot/commit/9cfac4fb04ce1fd038c3e8cbdb755cc24c052497) - **doris**: enhance partitioning support *(PR [#5421](https://github.com/tobymao/sqlglot/pull/5421) by [@xinge-ji](https://github.com/xinge-ji))* - [`a018bea`](https://github.com/tobymao/sqlglot/commit/a018bea159261a3ad4ac082f29e30fe1153995b3) - **exasol**: mapped exp.CurrentUser to exasol CURRENT_USER *(PR [#5422](https://github.com/tobymao/sqlglot/pull/5422) by [@nnamdi16](https://github.com/nnamdi16))* - [`489dc5c`](https://github.com/tobymao/sqlglot/commit/489dc5c2f7506e0fe4de549384dd0f816e9fd12f) - **optimizer**: parse and annotate type support for JSON_ARRAY *(PR [#5424](https://github.com/tobymao/sqlglot/pull/5424) by [@geooo109](https://github.com/geooo109))* - [`0ed518c`](https://github.com/tobymao/sqlglot/commit/0ed518c67042002ee0af91bee0b9e7093c85f926) - **optimizer**: annotate type for bigquery JSON_VALUE *(PR [#5427](https://github.com/tobymao/sqlglot/pull/5427) by [@geooo109](https://github.com/geooo109))* - [`6091617`](https://github.com/tobymao/sqlglot/commit/6091617067c263e3e834e579b37aa1c601b1ddc7) - **optimizer**: annotate type for bigquery JSON_VALUE_ARRAY *(PR [#5428](https://github.com/tobymao/sqlglot/pull/5428) by [@geooo109](https://github.com/geooo109))* - [`631c851`](https://github.com/tobymao/sqlglot/commit/631c851cbbfbf55cb66a79c2549aeeb443fcab83) - **optimizer**: parse and annotate type support for bigquery JSON_TYPE *(PR [#5430](https://github.com/tobymao/sqlglot/pull/5430) by [@geooo109](https://github.com/geooo109))* - [`732548f`](https://github.com/tobymao/sqlglot/commit/732548ff7a6792cfa38dba8b3b8a73a302532ae7) - **postgresql**: add support for table creation DDL that contains a primary key alongside the INCLUDE keyword *(PR [#5425](https://github.com/tobymao/sqlglot/pull/5425) by [@amosbiras](https://github.com/amosbiras))* - [`9f887f1`](https://github.com/tobymao/sqlglot/commit/9f887f14d20cd493b4a0a4489649fc5b9f2ae7fd) - Add support for BETWEEN flags *(PR [#5435](https://github.com/tobymao/sqlglot/pull/5435) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`edef00a`](https://github.com/tobymao/sqlglot/commit/edef00af9b703ace76871b989d9b94d9c30dcafd) - **duckdb**: Add reset command for duckdb *(PR [#5448](https://github.com/tobymao/sqlglot/pull/5448) by [@themisvaltinos](https://github.com/themisvaltinos))* - [`6268f6f`](https://github.com/tobymao/sqlglot/commit/6268f6f39dda1ca5cf2ad0403e219b49c5c9593a) - **tsql**: add default precision to CHAR/VARCHAR create expressions *(PR [#5434](https://github.com/tobymao/sqlglot/pull/5434) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`bbf26e9`](https://github.com/tobymao/sqlglot/commit/bbf26e9610bee341d4e6df12a031b05ff6b57861) - **mysql**: Add support for SELECT DISTINCTROW *(PR [#5446](https://github.com/tobymao/sqlglot/pull/5446) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5445](https://github.com/tobymao/sqlglot/issues/5445) opened by [@chenweida123](https://github.com/chenweida123)* - [`8467bad`](https://github.com/tobymao/sqlglot/commit/8467bad405e27c842c989e71588adc39cf2383fc) - add parsing/generating for BigQuery `DECLARE` *(PR [#5442](https://github.com/tobymao/sqlglot/pull/5442) by [@plaflamme](https://github.com/plaflamme))* - [`190f8ab`](https://github.com/tobymao/sqlglot/commit/190f8abe3d3bbda09e2f945287398d2aa9d6a863) - improve BigQuery `UNNEST` transpilation *(PR [#5451](https://github.com/tobymao/sqlglot/pull/5451) by [@georgesittas](https://github.com/georgesittas))* - [`dbef44d`](https://github.com/tobymao/sqlglot/commit/dbef44db64d8c80e5000c55c981e0de89054e6eb) - **exasol**: mapped STRPOS to INSTR in exasol dialect *(PR [#5454](https://github.com/tobymao/sqlglot/pull/5454) by [@nnamdi16](https://github.com/nnamdi16))* - [`010c34c`](https://github.com/tobymao/sqlglot/commit/010c34c1803df0223cf65263f2fb03b404e5141c) - support `DESC SEMANTIC VIEW` *(PR [#5452](https://github.com/tobymao/sqlglot/pull/5452) by [@betodealmeida](https://github.com/betodealmeida))* - [`9795021`](https://github.com/tobymao/sqlglot/commit/9795021ff35bae17ff5a9ba7c5cdb46a75aab63b) - **exasol**: transformed column comments *(PR [#5464](https://github.com/tobymao/sqlglot/pull/5464) by [@nnamdi16](https://github.com/nnamdi16))* - [`4c5b687`](https://github.com/tobymao/sqlglot/commit/4c5b68746dcede62ca9d1217bd428f50a1731e2c) - **snowflake**: transpile IS (IS can only be used with NULL) *(PR [#5467](https://github.com/tobymao/sqlglot/pull/5467) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`d7ccb48`](https://github.com/tobymao/sqlglot/commit/d7ccb48e542c49258e31cc4df45f49beebc2e238) - **duckdb**: week/quarter support *(PR [#5374](https://github.com/tobymao/sqlglot/pull/5374) by [@eakmanrq](https://github.com/eakmanrq))* - [`252469d`](https://github.com/tobymao/sqlglot/commit/252469d2d0ed221dbb2fde86043506ad15dbe7e5) - **snowflake**: transpile bigquery CURRENT_DATE with timezone *(PR [#5387](https://github.com/tobymao/sqlglot/pull/5387) by [@geooo109](https://github.com/geooo109))* - [`7511853`](https://github.com/tobymao/sqlglot/commit/751185325caf838107ecb4e8f35ad77bf3cc9bf2) - **postgres**: add XML type *(PR [#5396](https://github.com/tobymao/sqlglot/pull/5396) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5393](https://github.com/tobymao/sqlglot/issues/5393) opened by [@aersam](https://github.com/aersam)* - [`9e8d3ab`](https://github.com/tobymao/sqlglot/commit/9e8d3abedcffb1c267ed0e6a8332af3b52105d41) - **optimizer**: Preserve struct-column parentheses for RisingWave dialect *(PR [#5376](https://github.com/tobymao/sqlglot/pull/5376) by [@MisterWheatley](https://github.com/MisterWheatley))* - [`3223e63`](https://github.com/tobymao/sqlglot/commit/3223e6394fdd3f8e48c68bbb940b661ff8e76fd8) - **fabric**: cast datetimeoffset to datetime2 *(PR [#5385](https://github.com/tobymao/sqlglot/pull/5385) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`12b49dd`](https://github.com/tobymao/sqlglot/commit/12b49dd800951a48ea8bc0f01d7c35340236f559) - remove equal sign from CREATE TABLE comment (doris, starrocks) *(PR [#5390](https://github.com/tobymao/sqlglot/pull/5390) by [@xinge-ji](https://github.com/xinge-ji))* - [`06cea31`](https://github.com/tobymao/sqlglot/commit/06cea310bd9fd3a9a9fa0ba008596e878a430df8) - **postgres**: support KEY related locks *(PR [#5397](https://github.com/tobymao/sqlglot/pull/5397) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5394](https://github.com/tobymao/sqlglot/issues/5394) opened by [@aurimasandriusaitis](https://github.com/aurimasandriusaitis)* - [`92d93a6`](https://github.com/tobymao/sqlglot/commit/92d93a624b41df8bb4628c1f2d0cbb8c7844c927) - **parser**: do not consume modifier prefixes in group parser, fixes [#5400](https://github.com/tobymao/sqlglot/pull/5400) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ba0c801`](https://github.com/tobymao/sqlglot/commit/ba0c801e3dab8e08d4b5f7f73247ec6cfdc667e5) - **tsql**: change READ_ONLY to READONLY *(PR [#5410](https://github.com/tobymao/sqlglot/pull/5410) by [@CrispinStichartFNSB](https://github.com/CrispinStichartFNSB))* - [`63da895`](https://github.com/tobymao/sqlglot/commit/63da89563fddc13ee7aec06ee36d8a0f74227ee1) - **risingwave**: Fix RisingWave dialect SQL for MAP datatype declaration *(PR [#5418](https://github.com/tobymao/sqlglot/pull/5418) by [@MisterWheatley](https://github.com/MisterWheatley))* - [`edacae1`](https://github.com/tobymao/sqlglot/commit/edacae183fe26ea25bffe1bccd335bf57ed34ecb) - **snowflake**: transpile bigquery GENERATE_DATE_ARRAY with column access *(PR [#5388](https://github.com/tobymao/sqlglot/pull/5388) by [@geooo109](https://github.com/geooo109))* - [`5835b8d`](https://github.com/tobymao/sqlglot/commit/5835b8d6c7fe77d9645691bb88021af137ed0bac) - **duckdb**: make bracket parsing aware of duckdb MAP func *(PR [#5423](https://github.com/tobymao/sqlglot/pull/5423) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5417](https://github.com/tobymao/sqlglot/issues/5417) opened by [@MisterWheatley](https://github.com/MisterWheatley)* - [`5c59816`](https://github.com/tobymao/sqlglot/commit/5c59816f5572f8adb1de9c97f0007d19091910ec) - **snowflake**: ALTER TABLE ADD with multiple columns *(PR [#5431](https://github.com/tobymao/sqlglot/pull/5431) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5426](https://github.com/tobymao/sqlglot/issues/5426) opened by [@ca0904](https://github.com/ca0904)* - [`9f860a0`](https://github.com/tobymao/sqlglot/commit/9f860a0ce47f74930efa1afcd86fe7668a40c239) - **snowflake**: ALTER TABLE ADD with IF NOT EXISTS *(PR [#5438](https://github.com/tobymao/sqlglot/pull/5438) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5432](https://github.com/tobymao/sqlglot/issues/5432) opened by [@ca0904](https://github.com/ca0904)* - [`d7b3a26`](https://github.com/tobymao/sqlglot/commit/d7b3a261647e4ce675c84bbf72a33d320099fc01) - **postgres**: transpile duckdb LIST_HAS_ANY and LIST_CONTAINS *(PR [#5440](https://github.com/tobymao/sqlglot/pull/5440) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5437](https://github.com/tobymao/sqlglot/issues/5437) opened by [@aersam](https://github.com/aersam)* - [`79c5c30`](https://github.com/tobymao/sqlglot/commit/79c5c30f3802c6959376b3b0f3c4d055a30b6b43) - **spark**: transpile STRING_AGG *(PR [#5449](https://github.com/tobymao/sqlglot/pull/5449) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5441](https://github.com/tobymao/sqlglot/issues/5441) opened by [@dxaen](https://github.com/dxaen)* - [`3590e75`](https://github.com/tobymao/sqlglot/commit/3590e75c1df2d572e2fea664893dba5565a17e05) - **postgres**: support ? placeholder *(PR [#5455](https://github.com/tobymao/sqlglot/pull/5455) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5453](https://github.com/tobymao/sqlglot/issues/5453) opened by [@jkillian](https://github.com/jkillian)* - [`cdbf595`](https://github.com/tobymao/sqlglot/commit/cdbf5953171c8d4c8e4a24262f278c6f7d74e057) - **snowflake**: Wrap GET_PATH value with PARSE_JSON preemptively *(PR [#5458](https://github.com/tobymao/sqlglot/pull/5458) by [@VaggelisD](https://github.com/VaggelisD))* - [`8f16f52`](https://github.com/tobymao/sqlglot/commit/8f16f52859b66e3f8b30fff82f0c1679c7e37a25) - restore default `sql_names` for `DecodeCase` *(PR [#5465](https://github.com/tobymao/sqlglot/pull/5465) by [@georgesittas](https://github.com/georgesittas))* - [`bee82f3`](https://github.com/tobymao/sqlglot/commit/bee82f37ac537780495ff408738d88871208517a) - **snowflake**: Remove `UNKNOWN` type from `TRY_CAST` *(PR [#5466](https://github.com/tobymao/sqlglot/pull/5466) by [@VaggelisD](https://github.com/VaggelisD))* ### :wrench: Chores - [`71b1349`](https://github.com/tobymao/sqlglot/commit/71b1349a26d2b9839899900ef8fdfb1ebc3d68fd) - **postgres, hive**: use ASCII node instead of UNICODE node *(PR [#5380](https://github.com/tobymao/sqlglot/pull/5380) by [@geooo109](https://github.com/geooo109))* - [`a5c2245`](https://github.com/tobymao/sqlglot/commit/a5c2245c3e30f5bc3f410edacf3a077ce99f4a80) - improve error msg for PIVOT with missing aggregation *(commit by [@georgesittas](https://github.com/georgesittas))* - [`59fd875`](https://github.com/tobymao/sqlglot/commit/59fd875cd4ee1c44f9ca20f701215ae64d669d60) - Refactor PRIMARY KEY ... INCLUDE handling *(PR [#5433](https://github.com/tobymao/sqlglot/pull/5433) by [@VaggelisD](https://github.com/VaggelisD))* - [`e9bb3e8`](https://github.com/tobymao/sqlglot/commit/e9bb3e8ccb52c76ed77fc5e7d04cf75230b737fa) - Refactor DECLARE statement *(PR [#5450](https://github.com/tobymao/sqlglot/pull/5450) by [@VaggelisD](https://github.com/VaggelisD))* ## [v27.2.0] - 2025-07-22 ### :boom: BREAKING CHANGES - due to [`6268f6f`](https://github.com/tobymao/sqlglot/commit/6268f6f39dda1ca5cf2ad0403e219b49c5c9593a) - add default precision to CHAR/VARCHAR create expressions *(PR [#5434](https://github.com/tobymao/sqlglot/pull/5434) by [@mattiasthalen](https://github.com/mattiasthalen))*: add default precision to CHAR/VARCHAR create expressions (#5434) - due to [`8467bad`](https://github.com/tobymao/sqlglot/commit/8467bad405e27c842c989e71588adc39cf2383fc) - add parsing/generating for BigQuery `DECLARE` *(PR [#5442](https://github.com/tobymao/sqlglot/pull/5442) by [@plaflamme](https://github.com/plaflamme))*: add parsing/generating for BigQuery `DECLARE` (#5442) - due to [`79c5c30`](https://github.com/tobymao/sqlglot/commit/79c5c30f3802c6959376b3b0f3c4d055a30b6b43) - transpile STRING_AGG *(PR [#5449](https://github.com/tobymao/sqlglot/pull/5449) by [@geooo109](https://github.com/geooo109))*: transpile STRING_AGG (#5449) - due to [`190f8ab`](https://github.com/tobymao/sqlglot/commit/190f8abe3d3bbda09e2f945287398d2aa9d6a863) - improve BigQuery `UNNEST` transpilation *(PR [#5451](https://github.com/tobymao/sqlglot/pull/5451) by [@georgesittas](https://github.com/georgesittas))*: improve BigQuery `UNNEST` transpilation (#5451) ### :sparkles: New Features - [`732548f`](https://github.com/tobymao/sqlglot/commit/732548ff7a6792cfa38dba8b3b8a73a302532ae7) - **postgresql**: add support for table creation DDL that contains a primary key alongside the INCLUDE keyword *(PR [#5425](https://github.com/tobymao/sqlglot/pull/5425) by [@amosbiras](https://github.com/amosbiras))* - [`9f887f1`](https://github.com/tobymao/sqlglot/commit/9f887f14d20cd493b4a0a4489649fc5b9f2ae7fd) - Add support for BETWEEN flags *(PR [#5435](https://github.com/tobymao/sqlglot/pull/5435) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`edef00a`](https://github.com/tobymao/sqlglot/commit/edef00af9b703ace76871b989d9b94d9c30dcafd) - **duckdb**: Add reset command for duckdb *(PR [#5448](https://github.com/tobymao/sqlglot/pull/5448) by [@themisvaltinos](https://github.com/themisvaltinos))* - [`6268f6f`](https://github.com/tobymao/sqlglot/commit/6268f6f39dda1ca5cf2ad0403e219b49c5c9593a) - **tsql**: add default precision to CHAR/VARCHAR create expressions *(PR [#5434](https://github.com/tobymao/sqlglot/pull/5434) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`bbf26e9`](https://github.com/tobymao/sqlglot/commit/bbf26e9610bee341d4e6df12a031b05ff6b57861) - **mysql**: Add support for SELECT DISTINCTROW *(PR [#5446](https://github.com/tobymao/sqlglot/pull/5446) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5445](https://github.com/tobymao/sqlglot/issues/5445) opened by [@chenweida123](https://github.com/chenweida123)* - [`8467bad`](https://github.com/tobymao/sqlglot/commit/8467bad405e27c842c989e71588adc39cf2383fc) - add parsing/generating for BigQuery `DECLARE` *(PR [#5442](https://github.com/tobymao/sqlglot/pull/5442) by [@plaflamme](https://github.com/plaflamme))* - [`190f8ab`](https://github.com/tobymao/sqlglot/commit/190f8abe3d3bbda09e2f945287398d2aa9d6a863) - improve BigQuery `UNNEST` transpilation *(PR [#5451](https://github.com/tobymao/sqlglot/pull/5451) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`9f860a0`](https://github.com/tobymao/sqlglot/commit/9f860a0ce47f74930efa1afcd86fe7668a40c239) - **snowflake**: ALTER TABLE ADD with IF NOT EXISTS *(PR [#5438](https://github.com/tobymao/sqlglot/pull/5438) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5432](https://github.com/tobymao/sqlglot/issues/5432) opened by [@ca0904](https://github.com/ca0904)* - [`d7b3a26`](https://github.com/tobymao/sqlglot/commit/d7b3a261647e4ce675c84bbf72a33d320099fc01) - **postgres**: transpile duckdb LIST_HAS_ANY and LIST_CONTAINS *(PR [#5440](https://github.com/tobymao/sqlglot/pull/5440) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5437](https://github.com/tobymao/sqlglot/issues/5437) opened by [@aersam](https://github.com/aersam)* - [`79c5c30`](https://github.com/tobymao/sqlglot/commit/79c5c30f3802c6959376b3b0f3c4d055a30b6b43) - **spark**: transpile STRING_AGG *(PR [#5449](https://github.com/tobymao/sqlglot/pull/5449) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5441](https://github.com/tobymao/sqlglot/issues/5441) opened by [@dxaen](https://github.com/dxaen)* ### :wrench: Chores - [`59fd875`](https://github.com/tobymao/sqlglot/commit/59fd875cd4ee1c44f9ca20f701215ae64d669d60) - Refactor PRIMARY KEY ... INCLUDE handling *(PR [#5433](https://github.com/tobymao/sqlglot/pull/5433) by [@VaggelisD](https://github.com/VaggelisD))* - [`e9bb3e8`](https://github.com/tobymao/sqlglot/commit/e9bb3e8ccb52c76ed77fc5e7d04cf75230b737fa) - Refactor DECLARE statement *(PR [#5450](https://github.com/tobymao/sqlglot/pull/5450) by [@VaggelisD](https://github.com/VaggelisD))* ## [v27.1.0] - 2025-07-18 ### :boom: BREAKING CHANGES - due to [`5724538`](https://github.com/tobymao/sqlglot/commit/5724538f278b2178114b88850251afd7c3db0dda) - ARRAY_CONCAT type annotation *(PR [#5293](https://github.com/tobymao/sqlglot/pull/5293) by [@geooo109](https://github.com/geooo109))*: ARRAY_CONCAT type annotation (#5293) - due to [`c103b23`](https://github.com/tobymao/sqlglot/commit/c103b2304dca552ac8cf6733156db8b59d3614f3) - add support for `SUBSTRING_INDEX` *(PR [#5296](https://github.com/tobymao/sqlglot/pull/5296) by [@ankur334](https://github.com/ankur334))*: add support for `SUBSTRING_INDEX` (#5296) - due to [`a7bd823`](https://github.com/tobymao/sqlglot/commit/a7bd8234e0dd02abfe6fa56287e7bda14a549e5a) - annotate type of ARRAY_TO_STRING *(PR [#5301](https://github.com/tobymao/sqlglot/pull/5301) by [@geooo109](https://github.com/geooo109))*: annotate type of ARRAY_TO_STRING (#5301) - due to [`6b42353`](https://github.com/tobymao/sqlglot/commit/6b4235340a2e432015c27b2aeadbdcb930bfa6b0) - annotate type of ARRAY_FIRST, ARRAY_LAST *(PR [#5303](https://github.com/tobymao/sqlglot/pull/5303) by [@geooo109](https://github.com/geooo109))*: annotate type of ARRAY_FIRST, ARRAY_LAST (#5303) - due to [`db9b61e`](https://github.com/tobymao/sqlglot/commit/db9b61e4ecaa0600418eb90f637fb8b06b08c399) - parse, annotate type for ARRAY_REVERSE *(PR [#5306](https://github.com/tobymao/sqlglot/pull/5306) by [@geooo109](https://github.com/geooo109))*: parse, annotate type for ARRAY_REVERSE (#5306) - due to [`5612a6d`](https://github.com/tobymao/sqlglot/commit/5612a6da6dee3545f3600db1e5b87c9450952eba) - add support for SPACE *(PR [#5308](https://github.com/tobymao/sqlglot/pull/5308) by [@ankur334](https://github.com/ankur334))*: add support for SPACE (#5308) - due to [`8a2f65d`](https://github.com/tobymao/sqlglot/commit/8a2f65d6b2b68ad5ba45a5aed5e56c4dc0fea6fc) - parse and annotate type for ARRAY_SLICE *(PR [#5312](https://github.com/tobymao/sqlglot/pull/5312) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for ARRAY_SLICE (#5312) - due to [`8d118ea`](https://github.com/tobymao/sqlglot/commit/8d118ead9c15e7b2b4b51b7cf93cab94e61c2625) - route statements to hive/trino depending on their type *(PR [#5314](https://github.com/tobymao/sqlglot/pull/5314) by [@georgesittas](https://github.com/georgesittas))*: route statements to hive/trino depending on their type (#5314) - due to [`d2f7c41`](https://github.com/tobymao/sqlglot/commit/d2f7c41f9f30f4cf0c74782be9be0cc6e75565f3) - add TypeOf / toTypeName support *(PR [#5315](https://github.com/tobymao/sqlglot/pull/5315) by [@ankur334](https://github.com/ankur334))*: add TypeOf / toTypeName support (#5315) - due to [`5a0f589`](https://github.com/tobymao/sqlglot/commit/5a0f589a0fdb6743c3be2f98b74a34780f51332b) - distinguish STORED AS from USING *(PR [#5320](https://github.com/tobymao/sqlglot/pull/5320) by [@geooo109](https://github.com/geooo109))*: distinguish STORED AS from USING (#5320) - due to [`c4ca182`](https://github.com/tobymao/sqlglot/commit/c4ca182ad637b7a22b55d0ecf320c5a09ec5d56c) - annotate type for FROM_BASE64 *(PR [#5329](https://github.com/tobymao/sqlglot/pull/5329) by [@geooo109](https://github.com/geooo109))*: annotate type for FROM_BASE64 (#5329) - due to [`7b72bbe`](https://github.com/tobymao/sqlglot/commit/7b72bbed3a0930e11ce4a0fdd9082de715326ac9) - annotate type for ANY_VALUE *(PR [#5331](https://github.com/tobymao/sqlglot/pull/5331) by [@geooo109](https://github.com/geooo109))*: annotate type for ANY_VALUE (#5331) - due to [`c0d57e7`](https://github.com/tobymao/sqlglot/commit/c0d57e747bf5d2bed7ba2007ac2092d5797ee038) - annotate type for CHR *(PR [#5332](https://github.com/tobymao/sqlglot/pull/5332) by [@geooo109](https://github.com/geooo109))*: annotate type for CHR (#5332) - due to [`d65b5c2`](https://github.com/tobymao/sqlglot/commit/d65b5c22c29416007cca0154fd35f1d4b5efc929) - annotate type for COUNTIF *(PR [#5334](https://github.com/tobymao/sqlglot/pull/5334) by [@geooo109](https://github.com/geooo109))*: annotate type for COUNTIF (#5334) - due to [`521b705`](https://github.com/tobymao/sqlglot/commit/521b7053213df8577f609409af2552c2ff4fd8c9) - annotate type for GENERATE_ARRAY *(PR [#5335](https://github.com/tobymao/sqlglot/pull/5335) by [@geooo109](https://github.com/geooo109))*: annotate type for GENERATE_ARRAY (#5335) - due to [`5fb26c5`](https://github.com/tobymao/sqlglot/commit/5fb26c58026018360f36a732394b612a3baac38b) - annotate type for INT64 *(PR [#5339](https://github.com/tobymao/sqlglot/pull/5339) by [@geooo109](https://github.com/geooo109))*: annotate type for INT64 (#5339) - due to [`cff9b55`](https://github.com/tobymao/sqlglot/commit/cff9b55d70a3b85057e6385c93c0814eaa50f40b) - annotate type for LOGICAL_AND and LOGICAL_OR *(PR [#5340](https://github.com/tobymao/sqlglot/pull/5340) by [@geooo109](https://github.com/geooo109))*: annotate type for LOGICAL_AND and LOGICAL_OR (#5340) - due to [`b94a6f9`](https://github.com/tobymao/sqlglot/commit/b94a6f9228aa730296c3152179bfbf3503521063) - annotate type for MAKE_INTERVAL *(PR [#5341](https://github.com/tobymao/sqlglot/pull/5341) by [@geooo109](https://github.com/geooo109))*: annotate type for MAKE_INTERVAL (#5341) - due to [`2c9a7c6`](https://github.com/tobymao/sqlglot/commit/2c9a7c6f0b097a9e8514fc5e2af21c52f145920c) - annotate type for LAST_VALUE *(PR [#5336](https://github.com/tobymao/sqlglot/pull/5336) by [@geooo109](https://github.com/geooo109))*: annotate type for LAST_VALUE (#5336) - due to [`d862a28`](https://github.com/tobymao/sqlglot/commit/d862a28b0a30f0c5774351f38a61f195120ad904) - annoate type for TO_BASE64 *(PR [#5342](https://github.com/tobymao/sqlglot/pull/5342) by [@geooo109](https://github.com/geooo109))*: annoate type for TO_BASE64 (#5342) - due to [`85888c1`](https://github.com/tobymao/sqlglot/commit/85888c1b7cbbd0eee179d902a54fbd2a899cc16b) - annotate type for UNIX_DATE *(PR [#5343](https://github.com/tobymao/sqlglot/pull/5343) by [@geooo109](https://github.com/geooo109))*: annotate type for UNIX_DATE (#5343) - due to [`8a214e0`](https://github.com/tobymao/sqlglot/commit/8a214e0859dfb715fcef0dd6b2d6392012b1f3fb) - annotate type for UNIX_SECONDS *(PR [#5344](https://github.com/tobymao/sqlglot/pull/5344) by [@geooo109](https://github.com/geooo109))*: annotate type for UNIX_SECONDS (#5344) - due to [`625cb74`](https://github.com/tobymao/sqlglot/commit/625cb74b69e99ea1a707549366ea960d759848c9) - annotate type for STARTS_WITH *(PR [#5345](https://github.com/tobymao/sqlglot/pull/5345) by [@geooo109](https://github.com/geooo109))*: annotate type for STARTS_WITH (#5345) - due to [`0337c4d`](https://github.com/tobymao/sqlglot/commit/0337c4d46e9e85d951fc9565a47e338106543711) - annotate type for SHA and SHA2 *(PR [#5346](https://github.com/tobymao/sqlglot/pull/5346) by [@geooo109](https://github.com/geooo109))*: annotate type for SHA and SHA2 (#5346) - due to [`cc389fa`](https://github.com/tobymao/sqlglot/commit/cc389facb33f94a0d1f696f2ef9e92f298711894) - annotate type SHA1, SHA256, SHA512 for BigQuery *(PR [#5347](https://github.com/tobymao/sqlglot/pull/5347) by [@geooo109](https://github.com/geooo109))*: annotate type SHA1, SHA256, SHA512 for BigQuery (#5347) - due to [`509b741`](https://github.com/tobymao/sqlglot/commit/509b74173f678842e7550c75c4d8d906c879fb12) - preserve multi-arg DECODE function instead of converting to CASE *(PR [#5352](https://github.com/tobymao/sqlglot/pull/5352) by [@georgesittas](https://github.com/georgesittas))*: preserve multi-arg DECODE function instead of converting to CASE (#5352) - due to [`c1d3d61`](https://github.com/tobymao/sqlglot/commit/c1d3d61d00f00d2030107689d8704f7a488a80a7) - annotate type for CORR *(PR [#5364](https://github.com/tobymao/sqlglot/pull/5364) by [@geooo109](https://github.com/geooo109))*: annotate type for CORR (#5364) - due to [`c1e8677`](https://github.com/tobymao/sqlglot/commit/c1e867767a006e774a2c200c10eb85b3fbd8a372) - annotate type for COVAR_POP *(PR [#5365](https://github.com/tobymao/sqlglot/pull/5365) by [@geooo109](https://github.com/geooo109))*: annotate type for COVAR_POP (#5365) - due to [`e110ef4`](https://github.com/tobymao/sqlglot/commit/e110ef4f774e6ab8de6d4c86e5d306ab53fe895b) - annotate type for COVAR_SAMP *(PR [#5367](https://github.com/tobymao/sqlglot/pull/5367) by [@geooo109](https://github.com/geooo109))*: annotate type for COVAR_SAMP (#5367) - due to [`5b59c16`](https://github.com/tobymao/sqlglot/commit/5b59c16528fb1904c64bef0ca6307bb6a95e5a2c) - annotate type for DATETIME *(PR [#5369](https://github.com/tobymao/sqlglot/pull/5369) by [@geooo109](https://github.com/geooo109))*: annotate type for DATETIME (#5369) - due to [`47176ce`](https://github.com/tobymao/sqlglot/commit/47176ce6b9a4c1722f285034b08a6ae782129894) - annotate type for ENDS_WITH *(PR [#5370](https://github.com/tobymao/sqlglot/pull/5370) by [@geooo109](https://github.com/geooo109))*: annotate type for ENDS_WITH (#5370) - due to [`2cce53d`](https://github.com/tobymao/sqlglot/commit/2cce53d59968f0a4bb3e9599ade93b0e6a140c68) - annotate type for LAG *(PR [#5371](https://github.com/tobymao/sqlglot/pull/5371) by [@geooo109](https://github.com/geooo109))*: annotate type for LAG (#5371) - due to [`a3227de`](https://github.com/tobymao/sqlglot/commit/a3227de3fc57d559eb899dec08af01f85b470ce4) - improve transpilation of `ROUND(x, y)` to Postgres *(PR [#5368](https://github.com/tobymao/sqlglot/pull/5368) by [@blecourt-private](https://github.com/blecourt-private))*: improve transpilation of `ROUND(x, y)` to Postgres (#5368) - due to [`d7ccb48`](https://github.com/tobymao/sqlglot/commit/d7ccb48e542c49258e31cc4df45f49beebc2e238) - week/quarter support *(PR [#5374](https://github.com/tobymao/sqlglot/pull/5374) by [@eakmanrq](https://github.com/eakmanrq))*: week/quarter support (#5374) - due to [`b368fba`](https://github.com/tobymao/sqlglot/commit/b368fba59b606e038d445b2ca2d8436e115af3d6) - parse and annotate type for ASCII *(PR [#5377](https://github.com/tobymao/sqlglot/pull/5377) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for ASCII (#5377) - due to [`7f19b31`](https://github.com/tobymao/sqlglot/commit/7f19b31ebd7981e53a8f8ba343b4f3222fe160c7) - annotate type for UNICODE *(PR [#5381](https://github.com/tobymao/sqlglot/pull/5381) by [@geooo109](https://github.com/geooo109))*: annotate type for UNICODE (#5381) - due to [`9e8d3ab`](https://github.com/tobymao/sqlglot/commit/9e8d3abedcffb1c267ed0e6a8332af3b52105d41) - Preserve struct-column parentheses for RisingWave dialect *(PR [#5376](https://github.com/tobymao/sqlglot/pull/5376) by [@MisterWheatley](https://github.com/MisterWheatley))*: Added dialect as argument to `simplify_parens` function * style: Ran formatter and tests. Fixed type annotation for simplify_parens * Fix: Make dialect in `simplify_parens` optional. Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com> * Fix(optimizer): Tweaks to make simple non-nested star expand pass unit test for RW * Fix(optimizer): Added test for deep nested unpacking for BigQuery and RisingWave * style: Ran formatting check * fix: Remove unuses function from RisingWave dialect test * docs: updated docstring of new _expand_struct_stars_risingwave internal function * fix: apply suggestions from code review 2 Co-authored-by: Jo <46752250+georgesittas@users.noreply.github.com> * fix(optimizer,risingwave): Ensure that struct star-expansion to the correct level for RisingWave Updated logic for expanding (struct_col).* expressions in RisingWave to correctly handle the level of nesting. Moved struct expansion tests to tests/fixtures/qualify_columns.sql on behest of maintainers. --------- - due to [`3223e63`](https://github.com/tobymao/sqlglot/commit/3223e6394fdd3f8e48c68bbb940b661ff8e76fd8) - cast datetimeoffset to datetime2 *(PR [#5385](https://github.com/tobymao/sqlglot/pull/5385) by [@mattiasthalen](https://github.com/mattiasthalen))*: cast datetimeoffset to datetime2 (#5385) - due to [`06cea31`](https://github.com/tobymao/sqlglot/commit/06cea310bd9fd3a9a9fa0ba008596e878a430df8) - support KEY related locks *(PR [#5397](https://github.com/tobymao/sqlglot/pull/5397) by [@geooo109](https://github.com/geooo109))*: support KEY related locks (#5397) - due to [`1014a67`](https://github.com/tobymao/sqlglot/commit/1014a6759b0917ef1bf5af0dbbdcca72214a8dea) - remove redundant todate in dayofweek closes [#5398](https://github.com/tobymao/sqlglot/pull/5398) *(PR [#5399](https://github.com/tobymao/sqlglot/pull/5399) by [@tobymao](https://github.com/tobymao))*: remove redundant todate in dayofweek closes #5398 (#5399) - due to [`b2631ae`](https://github.com/tobymao/sqlglot/commit/b2631aec8d1bdb08decb201b6bd2ba5d927bb121) - annotate type for bigquery BIT_AND, BIT_OR, BIT_XOR, BIT_COUNT *(PR [#5405](https://github.com/tobymao/sqlglot/pull/5405) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery BIT_AND, BIT_OR, BIT_XOR, BIT_COUNT (#5405) - due to [`5835b8d`](https://github.com/tobymao/sqlglot/commit/5835b8d6c7fe77d9645691bb88021af137ed0bac) - make bracket parsing aware of duckdb MAP func *(PR [#5423](https://github.com/tobymao/sqlglot/pull/5423) by [@geooo109](https://github.com/geooo109))*: make bracket parsing aware of duckdb MAP func (#5423) - due to [`489dc5c`](https://github.com/tobymao/sqlglot/commit/489dc5c2f7506e0fe4de549384dd0f816e9fd12f) - parse and annotate type support for JSON_ARRAY *(PR [#5424](https://github.com/tobymao/sqlglot/pull/5424) by [@geooo109](https://github.com/geooo109))*: parse and annotate type support for JSON_ARRAY (#5424) - due to [`0ed518c`](https://github.com/tobymao/sqlglot/commit/0ed518c67042002ee0af91bee0b9e7093c85f926) - annotate type for bigquery JSON_VALUE *(PR [#5427](https://github.com/tobymao/sqlglot/pull/5427) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery JSON_VALUE (#5427) - due to [`6091617`](https://github.com/tobymao/sqlglot/commit/6091617067c263e3e834e579b37aa1c601b1ddc7) - annotate type for bigquery JSON_VALUE_ARRAY *(PR [#5428](https://github.com/tobymao/sqlglot/pull/5428) by [@geooo109](https://github.com/geooo109))*: annotate type for bigquery JSON_VALUE_ARRAY (#5428) - due to [`631c851`](https://github.com/tobymao/sqlglot/commit/631c851cbbfbf55cb66a79c2549aeeb443fcab83) - parse and annotate type support for bigquery JSON_TYPE *(PR [#5430](https://github.com/tobymao/sqlglot/pull/5430) by [@geooo109](https://github.com/geooo109))*: parse and annotate type support for bigquery JSON_TYPE (#5430) ### :sparkles: New Features - [`ba7bf39`](https://github.com/tobymao/sqlglot/commit/ba7bf39966b519e11cde02a3c1f720598469e616) - **exasol**: implemented BIT_AND function with test *(PR [#5294](https://github.com/tobymao/sqlglot/pull/5294) by [@nnamdi16](https://github.com/nnamdi16))* - [`fb4122e`](https://github.com/tobymao/sqlglot/commit/fb4122e80d1995bb87401e9ebe3749078c026a06) - **exasol**: add bitwiseOr function to exasol dialect *(PR [#5297](https://github.com/tobymao/sqlglot/pull/5297) by [@nnamdi16](https://github.com/nnamdi16))* - [`c103b23`](https://github.com/tobymao/sqlglot/commit/c103b2304dca552ac8cf6733156db8b59d3614f3) - add support for `SUBSTRING_INDEX` *(PR [#5296](https://github.com/tobymao/sqlglot/pull/5296) by [@ankur334](https://github.com/ankur334))* - [`4752f3a`](https://github.com/tobymao/sqlglot/commit/4752f3a6b715d8b6968c8f1f05f6ccdfb7351071) - **exasol**: added bit_xor built in exasol function to exasol dialect in sqlglot *(PR [#5298](https://github.com/tobymao/sqlglot/pull/5298) by [@nnamdi16](https://github.com/nnamdi16))* - [`09bd610`](https://github.com/tobymao/sqlglot/commit/09bd6101de21ed86c9fd6df0f63e8bca2666dd81) - **parser**: annotate type of ARRAY_CONCAT_AGG *(PR [#5299](https://github.com/tobymao/sqlglot/pull/5299) by [@geooo109](https://github.com/geooo109))* - [`ad0311a`](https://github.com/tobymao/sqlglot/commit/ad0311a7f8b0b3c5746c29d816b58578a892dd33) - **exasol**: added bit_not exasol built in function. *(PR [#5300](https://github.com/tobymao/sqlglot/pull/5300) by [@nnamdi16](https://github.com/nnamdi16))* - [`a7bd823`](https://github.com/tobymao/sqlglot/commit/a7bd8234e0dd02abfe6fa56287e7bda14a549e5a) - **parser**: annotate type of ARRAY_TO_STRING *(PR [#5301](https://github.com/tobymao/sqlglot/pull/5301) by [@geooo109](https://github.com/geooo109))* - [`2aa2182`](https://github.com/tobymao/sqlglot/commit/2aa21820f7d3a26cc4f47c1c757a9b7c97dd0382) - **exasol**: added BIT_LSHIFT built in function to exasol dialect *(PR [#5302](https://github.com/tobymao/sqlglot/pull/5302) by [@nnamdi16](https://github.com/nnamdi16))* - [`c3d9ef2`](https://github.com/tobymao/sqlglot/commit/c3d9ef2cb2d004b57c64af4f3f1bac41f1890737) - **exasol**: added the bit_rshift built in exasol function *(PR [#5304](https://github.com/tobymao/sqlglot/pull/5304) by [@nnamdi16](https://github.com/nnamdi16))* - [`6b42353`](https://github.com/tobymao/sqlglot/commit/6b4235340a2e432015c27b2aeadbdcb930bfa6b0) - **parser**: annotate type of ARRAY_FIRST, ARRAY_LAST *(PR [#5303](https://github.com/tobymao/sqlglot/pull/5303) by [@geooo109](https://github.com/geooo109))* - [`f5b7cc6`](https://github.com/tobymao/sqlglot/commit/f5b7cc6d2f8d73bff4e42e242d3ad3db41d899cc) - **exasol**: added `EVERY` built in function *(PR [#5305](https://github.com/tobymao/sqlglot/pull/5305) by [@nnamdi16](https://github.com/nnamdi16))* - [`d3f04d6`](https://github.com/tobymao/sqlglot/commit/d3f04d6766281ecb7ced9a5e812ab765d7b699be) - add Dremio dialect *(PR [#5277](https://github.com/tobymao/sqlglot/pull/5277) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`3d8e478`](https://github.com/tobymao/sqlglot/commit/3d8e478eac3df6a94c87cd610f96c5f19697a9bf) - **exasol**: added edit_distance built in function to exasol dialect *(PR [#5310](https://github.com/tobymao/sqlglot/pull/5310) by [@nnamdi16](https://github.com/nnamdi16))* - [`db9b61e`](https://github.com/tobymao/sqlglot/commit/db9b61e4ecaa0600418eb90f637fb8b06b08c399) - **parser**: parse, annotate type for ARRAY_REVERSE *(PR [#5306](https://github.com/tobymao/sqlglot/pull/5306) by [@geooo109](https://github.com/geooo109))* - [`5612a6d`](https://github.com/tobymao/sqlglot/commit/5612a6da6dee3545f3600db1e5b87c9450952eba) - add support for SPACE *(PR [#5308](https://github.com/tobymao/sqlglot/pull/5308) by [@ankur334](https://github.com/ankur334))* - [`f148c9e`](https://github.com/tobymao/sqlglot/commit/f148c9e64ae0d4df96323271729fa6a6ca68a671) - **duckdb**: Transpile Spark's `exp.PosExplode` *(PR [#5311](https://github.com/tobymao/sqlglot/pull/5311) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5309](https://github.com/tobymao/sqlglot/issues/5309) opened by [@nimrodolev](https://github.com/nimrodolev)* - [`179a278`](https://github.com/tobymao/sqlglot/commit/179a278c7fdbc29105e37f132e6f03e18627f769) - **exasol**: added the regexp_replace function *(PR [#5313](https://github.com/tobymao/sqlglot/pull/5313) by [@nnamdi16](https://github.com/nnamdi16))* - [`8a2f65d`](https://github.com/tobymao/sqlglot/commit/8a2f65d6b2b68ad5ba45a5aed5e56c4dc0fea6fc) - **parser**: parse and annotate type for ARRAY_SLICE *(PR [#5312](https://github.com/tobymao/sqlglot/pull/5312) by [@geooo109](https://github.com/geooo109))* - [`d2f7c41`](https://github.com/tobymao/sqlglot/commit/d2f7c41f9f30f4cf0c74782be9be0cc6e75565f3) - add TypeOf / toTypeName support *(PR [#5315](https://github.com/tobymao/sqlglot/pull/5315) by [@ankur334](https://github.com/ankur334))* - [`950c15d`](https://github.com/tobymao/sqlglot/commit/950c15db5ff64b6f11036f8003db3e5b1fb3afc3) - **exasol**: add var_pop built in function to exasol dialect *(PR [#5328](https://github.com/tobymao/sqlglot/pull/5328) by [@nnamdi16](https://github.com/nnamdi16))* - [`c4ca182`](https://github.com/tobymao/sqlglot/commit/c4ca182ad637b7a22b55d0ecf320c5a09ec5d56c) - **optimizer**: annotate type for FROM_BASE64 *(PR [#5329](https://github.com/tobymao/sqlglot/pull/5329) by [@geooo109](https://github.com/geooo109))* - [`0992e99`](https://github.com/tobymao/sqlglot/commit/0992e99f99aeb4ecc97e6918a23b8fd524311ed9) - **exasol**: Add support APPROXIMATE_COUNT_DISTINCT functions in exasol dialect *(PR [#5330](https://github.com/tobymao/sqlglot/pull/5330) by [@nnamdi16](https://github.com/nnamdi16))* - [`7b72bbe`](https://github.com/tobymao/sqlglot/commit/7b72bbed3a0930e11ce4a0fdd9082de715326ac9) - **optimizer**: annotate type for ANY_VALUE *(PR [#5331](https://github.com/tobymao/sqlglot/pull/5331) by [@geooo109](https://github.com/geooo109))* - [`c0d57e7`](https://github.com/tobymao/sqlglot/commit/c0d57e747bf5d2bed7ba2007ac2092d5797ee038) - **optimizer**: annotate type for CHR *(PR [#5332](https://github.com/tobymao/sqlglot/pull/5332) by [@geooo109](https://github.com/geooo109))* - [`d65b5c2`](https://github.com/tobymao/sqlglot/commit/d65b5c22c29416007cca0154fd35f1d4b5efc929) - **optimizer**: annotate type for COUNTIF *(PR [#5334](https://github.com/tobymao/sqlglot/pull/5334) by [@geooo109](https://github.com/geooo109))* - [`521b705`](https://github.com/tobymao/sqlglot/commit/521b7053213df8577f609409af2552c2ff4fd8c9) - **optimizer**: annotate type for GENERATE_ARRAY *(PR [#5335](https://github.com/tobymao/sqlglot/pull/5335) by [@geooo109](https://github.com/geooo109))* - [`5fb26c5`](https://github.com/tobymao/sqlglot/commit/5fb26c58026018360f36a732394b612a3baac38b) - **optimizer**: annotate type for INT64 *(PR [#5339](https://github.com/tobymao/sqlglot/pull/5339) by [@geooo109](https://github.com/geooo109))* - [`cff9b55`](https://github.com/tobymao/sqlglot/commit/cff9b55d70a3b85057e6385c93c0814eaa50f40b) - **optimizer**: annotate type for LOGICAL_AND and LOGICAL_OR *(PR [#5340](https://github.com/tobymao/sqlglot/pull/5340) by [@geooo109](https://github.com/geooo109))* - [`b94a6f9`](https://github.com/tobymao/sqlglot/commit/b94a6f9228aa730296c3152179bfbf3503521063) - **optimizer**: annotate type for MAKE_INTERVAL *(PR [#5341](https://github.com/tobymao/sqlglot/pull/5341) by [@geooo109](https://github.com/geooo109))* - [`2c9a7c6`](https://github.com/tobymao/sqlglot/commit/2c9a7c6f0b097a9e8514fc5e2af21c52f145920c) - **optimizer**: annotate type for LAST_VALUE *(PR [#5336](https://github.com/tobymao/sqlglot/pull/5336) by [@geooo109](https://github.com/geooo109))* - [`d862a28`](https://github.com/tobymao/sqlglot/commit/d862a28b0a30f0c5774351f38a61f195120ad904) - **optimizer**: annoate type for TO_BASE64 *(PR [#5342](https://github.com/tobymao/sqlglot/pull/5342) by [@geooo109](https://github.com/geooo109))* - [`85888c1`](https://github.com/tobymao/sqlglot/commit/85888c1b7cbbd0eee179d902a54fbd2a899cc16b) - **optimizer**: annotate type for UNIX_DATE *(PR [#5343](https://github.com/tobymao/sqlglot/pull/5343) by [@geooo109](https://github.com/geooo109))* - [`8a214e0`](https://github.com/tobymao/sqlglot/commit/8a214e0859dfb715fcef0dd6b2d6392012b1f3fb) - **optimizer**: annotate type for UNIX_SECONDS *(PR [#5344](https://github.com/tobymao/sqlglot/pull/5344) by [@geooo109](https://github.com/geooo109))* - [`625cb74`](https://github.com/tobymao/sqlglot/commit/625cb74b69e99ea1a707549366ea960d759848c9) - **optimizer**: annotate type for STARTS_WITH *(PR [#5345](https://github.com/tobymao/sqlglot/pull/5345) by [@geooo109](https://github.com/geooo109))* - [`0337c4d`](https://github.com/tobymao/sqlglot/commit/0337c4d46e9e85d951fc9565a47e338106543711) - **optimizer**: annotate type for SHA and SHA2 *(PR [#5346](https://github.com/tobymao/sqlglot/pull/5346) by [@geooo109](https://github.com/geooo109))* - [`835d9e6`](https://github.com/tobymao/sqlglot/commit/835d9e6c9ffc05de642113b566a1a4eb9cc38470) - add case-insensitive uppercase normalization strategy *(PR [#5349](https://github.com/tobymao/sqlglot/pull/5349) by [@georgesittas](https://github.com/georgesittas))* - [`f80493e`](https://github.com/tobymao/sqlglot/commit/f80493efb168f600dc92da439d84e820f303e5aa) - **exasol**: Add TO_CHAR function support in exasol dialect *(PR [#5350](https://github.com/tobymao/sqlglot/pull/5350) by [@nnamdi16](https://github.com/nnamdi16))* - [`cea6a24`](https://github.com/tobymao/sqlglot/commit/cea6a240292d6e31bc73179d433835483e65747a) - **teradata**: add FORMAT phrase parsing *(PR [#5348](https://github.com/tobymao/sqlglot/pull/5348) by [@readjfb](https://github.com/readjfb))* - [`eae64e1`](https://github.com/tobymao/sqlglot/commit/eae64e1629a276bf3885749991869b6c6dea8a8b) - **duckdb**: support new lambda syntax *(PR [#5359](https://github.com/tobymao/sqlglot/pull/5359) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5357](https://github.com/tobymao/sqlglot/issues/5357) opened by [@aersam](https://github.com/aersam)* - [`e77991d`](https://github.com/tobymao/sqlglot/commit/e77991d92fad56014ba2778c71e5e446d4dd090e) - **duckdb**: Add support for SET VARIABLE *(PR [#5360](https://github.com/tobymao/sqlglot/pull/5360) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5356](https://github.com/tobymao/sqlglot/issues/5356) opened by [@aersam](https://github.com/aersam)* - [`c1d3d61`](https://github.com/tobymao/sqlglot/commit/c1d3d61d00f00d2030107689d8704f7a488a80a7) - **optimizer**: annotate type for CORR *(PR [#5364](https://github.com/tobymao/sqlglot/pull/5364) by [@geooo109](https://github.com/geooo109))* - [`c1e8677`](https://github.com/tobymao/sqlglot/commit/c1e867767a006e774a2c200c10eb85b3fbd8a372) - **optimizer**: annotate type for COVAR_POP *(PR [#5365](https://github.com/tobymao/sqlglot/pull/5365) by [@geooo109](https://github.com/geooo109))* - [`e110ef4`](https://github.com/tobymao/sqlglot/commit/e110ef4f774e6ab8de6d4c86e5d306ab53fe895b) - **optimizer**: annotate type for COVAR_SAMP *(PR [#5367](https://github.com/tobymao/sqlglot/pull/5367) by [@geooo109](https://github.com/geooo109))* - [`5b59c16`](https://github.com/tobymao/sqlglot/commit/5b59c16528fb1904c64bef0ca6307bb6a95e5a2c) - **optimizer**: annotate type for DATETIME *(PR [#5369](https://github.com/tobymao/sqlglot/pull/5369) by [@geooo109](https://github.com/geooo109))* - [`47176ce`](https://github.com/tobymao/sqlglot/commit/47176ce6b9a4c1722f285034b08a6ae782129894) - **optimizer**: annotate type for ENDS_WITH *(PR [#5370](https://github.com/tobymao/sqlglot/pull/5370) by [@geooo109](https://github.com/geooo109))* - [`1fd757e`](https://github.com/tobymao/sqlglot/commit/1fd757e6279315f00e719974613313a6e43dfe55) - **fabric**: Ensure TIMESTAMPTZ is used with AT TIME ZONE *(PR [#5362](https://github.com/tobymao/sqlglot/pull/5362) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`2cce53d`](https://github.com/tobymao/sqlglot/commit/2cce53d59968f0a4bb3e9599ade93b0e6a140c68) - **optimizer**: annotate type for LAG *(PR [#5371](https://github.com/tobymao/sqlglot/pull/5371) by [@geooo109](https://github.com/geooo109))* - [`a3227de`](https://github.com/tobymao/sqlglot/commit/a3227de3fc57d559eb899dec08af01f85b470ce4) - improve transpilation of `ROUND(x, y)` to Postgres *(PR [#5368](https://github.com/tobymao/sqlglot/pull/5368) by [@blecourt-private](https://github.com/blecourt-private))* - :arrow_lower_right: *addresses issue [#5366](https://github.com/tobymao/sqlglot/issues/5366) opened by [@blecourt-private](https://github.com/blecourt-private)* - [`b368fba`](https://github.com/tobymao/sqlglot/commit/b368fba59b606e038d445b2ca2d8436e115af3d6) - **optimizer**: parse and annotate type for ASCII *(PR [#5377](https://github.com/tobymao/sqlglot/pull/5377) by [@geooo109](https://github.com/geooo109))* - [`7f19b31`](https://github.com/tobymao/sqlglot/commit/7f19b31ebd7981e53a8f8ba343b4f3222fe160c7) - **optimizer**: annotate type for UNICODE *(PR [#5381](https://github.com/tobymao/sqlglot/pull/5381) by [@geooo109](https://github.com/geooo109))* - [`f035bf0`](https://github.com/tobymao/sqlglot/commit/f035bf0eb582aa07d4ad79e0ed1958ce0d091ad9) - **dremio**: Add TIME_MAPPING for Dremio dialect *(PR [#5378](https://github.com/tobymao/sqlglot/pull/5378) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`31cfd0f`](https://github.com/tobymao/sqlglot/commit/31cfd0fc3309bc1080b7a2ba8d40b2aba5c098a3) - **exasol**: add to_date and refactored to_char functions with respect to time mapping *(PR [#5379](https://github.com/tobymao/sqlglot/pull/5379) by [@nnamdi16](https://github.com/nnamdi16))* - [`bd3776e`](https://github.com/tobymao/sqlglot/commit/bd3776eaa26d40b44c4cebc2f3838b4055653548) - **doris**: add PROPERTIES_LOCATION mapping for Doris dialect *(PR [#5391](https://github.com/tobymao/sqlglot/pull/5391) by [@xinge-ji](https://github.com/xinge-ji))* - [`7eaa67a`](https://github.com/tobymao/sqlglot/commit/7eaa67acb216501046c739f56839418b84f244c0) - **doris**: properly supported PROPERTIES and UNIQUE KEY table prop *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1e78163`](https://github.com/tobymao/sqlglot/commit/1e78163b829e910e7960c79e7ab118c07d1ecdc3) - **duckdb**: support column access via index *(PR [#5395](https://github.com/tobymao/sqlglot/pull/5395) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5392](https://github.com/tobymao/sqlglot/issues/5392) opened by [@tekumara](https://github.com/tekumara)* - [`1014a67`](https://github.com/tobymao/sqlglot/commit/1014a6759b0917ef1bf5af0dbbdcca72214a8dea) - remove redundant todate in dayofweek closes [#5398](https://github.com/tobymao/sqlglot/pull/5398) *(PR [#5399](https://github.com/tobymao/sqlglot/pull/5399) by [@tobymao](https://github.com/tobymao))* - [`be52f78`](https://github.com/tobymao/sqlglot/commit/be52f7866b03e436d103d9201d1a44c6632c643a) - **exasol**: add support for CONVERT_TZ function *(PR [#5401](https://github.com/tobymao/sqlglot/pull/5401) by [@nnamdi16](https://github.com/nnamdi16))* - [`d637161`](https://github.com/tobymao/sqlglot/commit/d637161406faf623418f112162268bedb422213b) - **exasol**: add mapping to TIME_TO_STR in exasol dialect *(PR [#5403](https://github.com/tobymao/sqlglot/pull/5403) by [@nnamdi16](https://github.com/nnamdi16))* - [`b2631ae`](https://github.com/tobymao/sqlglot/commit/b2631aec8d1bdb08decb201b6bd2ba5d927bb121) - **optimizer**: annotate type for bigquery BIT_AND, BIT_OR, BIT_XOR, BIT_COUNT *(PR [#5405](https://github.com/tobymao/sqlglot/pull/5405) by [@geooo109](https://github.com/geooo109))* - [`b81ae62`](https://github.com/tobymao/sqlglot/commit/b81ae629bfb27760ddd832402a86dabe4e65072f) - **exasol**: map STR_TO_TIME to TO_DATE and *(PR [#5407](https://github.com/tobymao/sqlglot/pull/5407) by [@nnamdi16](https://github.com/nnamdi16))* - [`c2fb9ab`](https://github.com/tobymao/sqlglot/commit/c2fb9abeb2f077f00278e46efd9573a3806cd218) - add `DateStrToTime` *(PR [#5409](https://github.com/tobymao/sqlglot/pull/5409) by [@betodealmeida](https://github.com/betodealmeida))* - [`a95993a`](https://github.com/tobymao/sqlglot/commit/a95993ae4e8aa99969db059a534819a4f0b62b96) - **snowflake**: improve transpilation of queries with UNNEST sources *(PR [#5408](https://github.com/tobymao/sqlglot/pull/5408) by [@georgesittas](https://github.com/georgesittas))* - [`7b69f54`](https://github.com/tobymao/sqlglot/commit/7b69f545bbcfeb1e1f2f3b7e0b9757cfd675e4a5) - **snowflake**: Support SEMANTIC_VIEW *(PR [#5414](https://github.com/tobymao/sqlglot/pull/5414) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5406](https://github.com/tobymao/sqlglot/issues/5406) opened by [@jkillian](https://github.com/jkillian)* - [`7dba6f6`](https://github.com/tobymao/sqlglot/commit/7dba6f64d9a7945bbdef1b6e014d802014567a1e) - **exasol**: map AT TIME ZONE to CONVERT_TZ *(PR [#5416](https://github.com/tobymao/sqlglot/pull/5416) by [@nnamdi16](https://github.com/nnamdi16))* - [`25f2c1b`](https://github.com/tobymao/sqlglot/commit/25f2c1bb18f9d073b128150566cb27c0c2da0865) - **postgres**: query placeholders *(PR [#5415](https://github.com/tobymao/sqlglot/pull/5415) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5412](https://github.com/tobymao/sqlglot/issues/5412) opened by [@aersam](https://github.com/aersam)* - [`c309c87`](https://github.com/tobymao/sqlglot/commit/c309c8763a90bf0bce02e21f4088b38d85556cce) - **doris**: support range partitioning *(PR [#5402](https://github.com/tobymao/sqlglot/pull/5402) by [@xinge-ji](https://github.com/xinge-ji))* - [`394d3a8`](https://github.com/tobymao/sqlglot/commit/394d3a81ef41d3052c0b0d6e48180c344b7db143) - **dremio**: Add support for DATE_ADD and DATE_SUB *(PR [#5411](https://github.com/tobymao/sqlglot/pull/5411) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`9cfac4f`](https://github.com/tobymao/sqlglot/commit/9cfac4fb04ce1fd038c3e8cbdb755cc24c052497) - **doris**: enhance partitioning support *(PR [#5421](https://github.com/tobymao/sqlglot/pull/5421) by [@xinge-ji](https://github.com/xinge-ji))* - [`a018bea`](https://github.com/tobymao/sqlglot/commit/a018bea159261a3ad4ac082f29e30fe1153995b3) - **exasol**: mapped exp.CurrentUser to exasol CURRENT_USER *(PR [#5422](https://github.com/tobymao/sqlglot/pull/5422) by [@nnamdi16](https://github.com/nnamdi16))* - [`489dc5c`](https://github.com/tobymao/sqlglot/commit/489dc5c2f7506e0fe4de549384dd0f816e9fd12f) - **optimizer**: parse and annotate type support for JSON_ARRAY *(PR [#5424](https://github.com/tobymao/sqlglot/pull/5424) by [@geooo109](https://github.com/geooo109))* - [`0ed518c`](https://github.com/tobymao/sqlglot/commit/0ed518c67042002ee0af91bee0b9e7093c85f926) - **optimizer**: annotate type for bigquery JSON_VALUE *(PR [#5427](https://github.com/tobymao/sqlglot/pull/5427) by [@geooo109](https://github.com/geooo109))* - [`6091617`](https://github.com/tobymao/sqlglot/commit/6091617067c263e3e834e579b37aa1c601b1ddc7) - **optimizer**: annotate type for bigquery JSON_VALUE_ARRAY *(PR [#5428](https://github.com/tobymao/sqlglot/pull/5428) by [@geooo109](https://github.com/geooo109))* - [`631c851`](https://github.com/tobymao/sqlglot/commit/631c851cbbfbf55cb66a79c2549aeeb443fcab83) - **optimizer**: parse and annotate type support for bigquery JSON_TYPE *(PR [#5430](https://github.com/tobymao/sqlglot/pull/5430) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`5724538`](https://github.com/tobymao/sqlglot/commit/5724538f278b2178114b88850251afd7c3db0dda) - **bigquery**: ARRAY_CONCAT type annotation *(PR [#5293](https://github.com/tobymao/sqlglot/pull/5293) by [@geooo109](https://github.com/geooo109))* - [`0a6afcd`](https://github.com/tobymao/sqlglot/commit/0a6afcd90c663aaef9b385fc12ccd19dbf6388cc) - use re-entrant lock in dialects/__init__ to avoid deadlocks *(PR [#5322](https://github.com/tobymao/sqlglot/pull/5322) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5321](https://github.com/tobymao/sqlglot/issues/5321) opened by [@jc-5s](https://github.com/jc-5s)* - [`599ca81`](https://github.com/tobymao/sqlglot/commit/599ca8101f48805098cbdf808ac2923a8246066b) - **parser**: avoid CTE values ALIAS gen, when ALIAS exists *(PR [#5323](https://github.com/tobymao/sqlglot/pull/5323) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5318](https://github.com/tobymao/sqlglot/issues/5318) opened by [@ankur334](https://github.com/ankur334)* - [`5a0f589`](https://github.com/tobymao/sqlglot/commit/5a0f589a0fdb6743c3be2f98b74a34780f51332b) - **spark**: distinguish STORED AS from USING *(PR [#5320](https://github.com/tobymao/sqlglot/pull/5320) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5317](https://github.com/tobymao/sqlglot/issues/5317) opened by [@cosinequanon](https://github.com/cosinequanon)* - [`cbc79c2`](https://github.com/tobymao/sqlglot/commit/cbc79c2a47c46370de0378b8bae61f4f3c17ca82) - preserve ORDER BY comments fixes [#5326](https://github.com/tobymao/sqlglot/pull/5326) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`fa69583`](https://github.com/tobymao/sqlglot/commit/fa69583d8b4f5801d05c21a92b43dea272a3ef49) - **optimizer**: avoid qualifying CTE *(PR [#5327](https://github.com/tobymao/sqlglot/pull/5327) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5319](https://github.com/tobymao/sqlglot/issues/5319) opened by [@naamamaoz](https://github.com/naamamaoz)* - [`29cce43`](https://github.com/tobymao/sqlglot/commit/29cce43e72451feeb8788ac2660658075bf59093) - comment lost before GROUP, JOIN and HAVING *(PR [#5338](https://github.com/tobymao/sqlglot/pull/5338) by [@chiiips](https://github.com/chiiips))* - [`509b741`](https://github.com/tobymao/sqlglot/commit/509b74173f678842e7550c75c4d8d906c879fb12) - preserve multi-arg DECODE function instead of converting to CASE *(PR [#5352](https://github.com/tobymao/sqlglot/pull/5352) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5351](https://github.com/tobymao/sqlglot/issues/5351) opened by [@kentmaxwell](https://github.com/kentmaxwell)* - [`188d446`](https://github.com/tobymao/sqlglot/commit/188d446ca65125c63bbfff96d15d91078deb6b4a) - **optimizer**: downstream column for PIVOT *(PR [#5363](https://github.com/tobymao/sqlglot/pull/5363) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5354](https://github.com/tobymao/sqlglot/issues/5354) opened by [@suresh-summation](https://github.com/suresh-summation)* - [`d7ccb48`](https://github.com/tobymao/sqlglot/commit/d7ccb48e542c49258e31cc4df45f49beebc2e238) - **duckdb**: week/quarter support *(PR [#5374](https://github.com/tobymao/sqlglot/pull/5374) by [@eakmanrq](https://github.com/eakmanrq))* - [`252469d`](https://github.com/tobymao/sqlglot/commit/252469d2d0ed221dbb2fde86043506ad15dbe7e5) - **snowflake**: transpile bigquery CURRENT_DATE with timezone *(PR [#5387](https://github.com/tobymao/sqlglot/pull/5387) by [@geooo109](https://github.com/geooo109))* - [`7511853`](https://github.com/tobymao/sqlglot/commit/751185325caf838107ecb4e8f35ad77bf3cc9bf2) - **postgres**: add XML type *(PR [#5396](https://github.com/tobymao/sqlglot/pull/5396) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5393](https://github.com/tobymao/sqlglot/issues/5393) opened by [@aersam](https://github.com/aersam)* - [`9e8d3ab`](https://github.com/tobymao/sqlglot/commit/9e8d3abedcffb1c267ed0e6a8332af3b52105d41) - **optimizer**: Preserve struct-column parentheses for RisingWave dialect *(PR [#5376](https://github.com/tobymao/sqlglot/pull/5376) by [@MisterWheatley](https://github.com/MisterWheatley))* - [`3223e63`](https://github.com/tobymao/sqlglot/commit/3223e6394fdd3f8e48c68bbb940b661ff8e76fd8) - **fabric**: cast datetimeoffset to datetime2 *(PR [#5385](https://github.com/tobymao/sqlglot/pull/5385) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`12b49dd`](https://github.com/tobymao/sqlglot/commit/12b49dd800951a48ea8bc0f01d7c35340236f559) - remove equal sign from CREATE TABLE comment (doris, starrocks) *(PR [#5390](https://github.com/tobymao/sqlglot/pull/5390) by [@xinge-ji](https://github.com/xinge-ji))* - [`06cea31`](https://github.com/tobymao/sqlglot/commit/06cea310bd9fd3a9a9fa0ba008596e878a430df8) - **postgres**: support KEY related locks *(PR [#5397](https://github.com/tobymao/sqlglot/pull/5397) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5394](https://github.com/tobymao/sqlglot/issues/5394) opened by [@aurimasandriusaitis](https://github.com/aurimasandriusaitis)* - [`92d93a6`](https://github.com/tobymao/sqlglot/commit/92d93a624b41df8bb4628c1f2d0cbb8c7844c927) - **parser**: do not consume modifier prefixes in group parser, fixes [#5400](https://github.com/tobymao/sqlglot/pull/5400) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ba0c801`](https://github.com/tobymao/sqlglot/commit/ba0c801e3dab8e08d4b5f7f73247ec6cfdc667e5) - **tsql**: change READ_ONLY to READONLY *(PR [#5410](https://github.com/tobymao/sqlglot/pull/5410) by [@CrispinStichartFNSB](https://github.com/CrispinStichartFNSB))* - [`63da895`](https://github.com/tobymao/sqlglot/commit/63da89563fddc13ee7aec06ee36d8a0f74227ee1) - **risingwave**: Fix RisingWave dialect SQL for MAP datatype declaration *(PR [#5418](https://github.com/tobymao/sqlglot/pull/5418) by [@MisterWheatley](https://github.com/MisterWheatley))* - [`edacae1`](https://github.com/tobymao/sqlglot/commit/edacae183fe26ea25bffe1bccd335bf57ed34ecb) - **snowflake**: transpile bigquery GENERATE_DATE_ARRAY with column access *(PR [#5388](https://github.com/tobymao/sqlglot/pull/5388) by [@geooo109](https://github.com/geooo109))* - [`5835b8d`](https://github.com/tobymao/sqlglot/commit/5835b8d6c7fe77d9645691bb88021af137ed0bac) - **duckdb**: make bracket parsing aware of duckdb MAP func *(PR [#5423](https://github.com/tobymao/sqlglot/pull/5423) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5417](https://github.com/tobymao/sqlglot/issues/5417) opened by [@MisterWheatley](https://github.com/MisterWheatley)* - [`5c59816`](https://github.com/tobymao/sqlglot/commit/5c59816f5572f8adb1de9c97f0007d19091910ec) - **snowflake**: ALTER TABLE ADD with multiple columns *(PR [#5431](https://github.com/tobymao/sqlglot/pull/5431) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5426](https://github.com/tobymao/sqlglot/issues/5426) opened by [@ca0904](https://github.com/ca0904)* ### :recycle: Refactors - [`8d118ea`](https://github.com/tobymao/sqlglot/commit/8d118ead9c15e7b2b4b51b7cf93cab94e61c2625) - **athena**: route statements to hive/trino depending on their type *(PR [#5314](https://github.com/tobymao/sqlglot/pull/5314) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5267](https://github.com/tobymao/sqlglot/issues/5267) opened by [@cpcloud](https://github.com/cpcloud)* ### :wrench: Chores - [`cc389fa`](https://github.com/tobymao/sqlglot/commit/cc389facb33f94a0d1f696f2ef9e92f298711894) - **optimizer**: annotate type SHA1, SHA256, SHA512 for BigQuery *(PR [#5347](https://github.com/tobymao/sqlglot/pull/5347) by [@geooo109](https://github.com/geooo109))* - [`194850a`](https://github.com/tobymao/sqlglot/commit/194850a52497300a8f1d47f2306b67cdd11ffab6) - **exasol**: clean up TO_CHAR *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1abd461`](https://github.com/tobymao/sqlglot/commit/1abd461295830807c52f24d25ac6938095f54831) - bump min. supported version to python 3.9 *(PR [#5353](https://github.com/tobymao/sqlglot/pull/5353) by [@georgesittas](https://github.com/georgesittas))* - [`71b1349`](https://github.com/tobymao/sqlglot/commit/71b1349a26d2b9839899900ef8fdfb1ebc3d68fd) - **postgres, hive**: use ASCII node instead of UNICODE node *(PR [#5380](https://github.com/tobymao/sqlglot/pull/5380) by [@geooo109](https://github.com/geooo109))* - [`a5c2245`](https://github.com/tobymao/sqlglot/commit/a5c2245c3e30f5bc3f410edacf3a077ce99f4a80) - improve error msg for PIVOT with missing aggregation *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v27.0.0] - 2025-07-07 ### :boom: BREAKING CHANGES - due to [`f2bf000`](https://github.com/tobymao/sqlglot/commit/f2bf000a410fb18531bb90ef1d767baf0e8bce7a) - avoid creating new alias for qualifying unpivot *(PR [#5121](https://github.com/tobymao/sqlglot/pull/5121) by [@geooo109](https://github.com/geooo109))*: avoid creating new alias for qualifying unpivot (#5121) - due to [`a126ce8`](https://github.com/tobymao/sqlglot/commit/a126ce8a25287cf3531d815035fa3d567dc772fb) - make coalesce simplification optional, skip by default *(PR [#5123](https://github.com/tobymao/sqlglot/pull/5123) by [@barakalon](https://github.com/barakalon))*: make coalesce simplification optional, skip by default (#5123) - due to [`6910744`](https://github.com/tobymao/sqlglot/commit/6910744e6260793b3f9190782cf60fbbd9adcd38) - update py03 version *(PR [#5136](https://github.com/tobymao/sqlglot/pull/5136) by [@benfdking](https://github.com/benfdking))*: update py03 version (#5136) - due to [`a56deab`](https://github.com/tobymao/sqlglot/commit/a56deabc2b9543209fb5e41f19c3bef89177a577) - bump sqlglotrs to 0.5.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.5.0 - due to [`c484ca3`](https://github.com/tobymao/sqlglot/commit/c484ca39bad750a96b62e2edae85612cac66ba30) - recognize ARRAY_CONCAT_AGG as an aggregate function *(PR [#5141](https://github.com/tobymao/sqlglot/pull/5141) by [@georgesittas](https://github.com/georgesittas))*: recognize ARRAY_CONCAT_AGG as an aggregate function (#5141) - due to [`72ce404`](https://github.com/tobymao/sqlglot/commit/72ce40405625239a0d6763d502e5af8b12abfe9b) - Refactor ALTER TABLE ADD parsing *(PR [#5144](https://github.com/tobymao/sqlglot/pull/5144) by [@VaggelisD](https://github.com/VaggelisD))*: Refactor ALTER TABLE ADD parsing (#5144) - due to [`e73ddb7`](https://github.com/tobymao/sqlglot/commit/e73ddb733b7f120ae74054e6d4dc7d458f59ac50) - preserve TIMESTAMP on roundtrip *(PR [#5145](https://github.com/tobymao/sqlglot/pull/5145) by [@georgesittas](https://github.com/georgesittas))*: preserve TIMESTAMP on roundtrip (#5145) - due to [`f6124c6`](https://github.com/tobymao/sqlglot/commit/f6124c6343f67563fc19f617891ecfc145a642db) - return token vector in `tokenize` even on failure *(PR [#5155](https://github.com/tobymao/sqlglot/pull/5155) by [@georgesittas](https://github.com/georgesittas))*: return token vector in `tokenize` even on failure (#5155) - due to [`64c37f1`](https://github.com/tobymao/sqlglot/commit/64c37f147366fe87ae187996ecb3c9a5afa7c264) - bump sqlglotrs to 0.6.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.6.0 - due to [`434c45b`](https://github.com/tobymao/sqlglot/commit/434c45b547c3a5ea155dc8d7da2baab326eb6d4f) - improve support for ENDSWITH closes [#5170](https://github.com/tobymao/sqlglot/pull/5170) *(commit by [@georgesittas](https://github.com/georgesittas))*: improve support for ENDSWITH closes #5170 - due to [`bc001ce`](https://github.com/tobymao/sqlglot/commit/bc001cef4c907d8fa421d3190b4fa91865d9ff6c) - Add support for ANY_VALUE for versions 16+ *(PR [#5179](https://github.com/tobymao/sqlglot/pull/5179) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for ANY_VALUE for versions 16+ (#5179) - due to [`6a2cb39`](https://github.com/tobymao/sqlglot/commit/6a2cb39d0ceec091dc4fc228f26d4f457729a3cf) - virtual column with AS(expr) as ComputedColumnConstraint *(PR [#5180](https://github.com/tobymao/sqlglot/pull/5180) by [@geooo109](https://github.com/geooo109))*: virtual column with AS(expr) as ComputedColumnConstraint (#5180) - due to [`29e2f1d`](https://github.com/tobymao/sqlglot/commit/29e2f1d89c095c9fab0944a6962c99bd745c2c91) - Array_intersection transpilation support *(PR [#5186](https://github.com/tobymao/sqlglot/pull/5186) by [@HarishRavi96](https://github.com/HarishRavi96))*: Array_intersection transpilation support (#5186) - due to [`ac6555b`](https://github.com/tobymao/sqlglot/commit/ac6555b4d6c162ef7b14b63307d01fd560138ea0) - preserve DIV binary operator, fixes [#5198](https://github.com/tobymao/sqlglot/pull/5198) *(PR [#5199](https://github.com/tobymao/sqlglot/pull/5199) by [@georgesittas](https://github.com/georgesittas))*: preserve DIV binary operator, fixes #5198 (#5199) - due to [`dfdd84b`](https://github.com/tobymao/sqlglot/commit/dfdd84bbc50da70f40a17b39935f8171d961f7d2) - CTEs instead of subqueries for pipe syntax *(PR [#5205](https://github.com/tobymao/sqlglot/pull/5205) by [@geooo109](https://github.com/geooo109))*: CTEs instead of subqueries for pipe syntax (#5205) - due to [`5f95299`](https://github.com/tobymao/sqlglot/commit/5f9529940d83e89704f7d25eda63cd73fdb503ae) - support multi-part (>3) dotted functions *(PR [#5211](https://github.com/tobymao/sqlglot/pull/5211) by [@georgesittas](https://github.com/georgesittas))*: support multi-part (>3) dotted functions (#5211) - due to [`02afa2a`](https://github.com/tobymao/sqlglot/commit/02afa2a1941fc67086d50dffac2857262f1c3c4f) - Preserve quoting for UDT *(PR [#5216](https://github.com/tobymao/sqlglot/pull/5216) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve quoting for UDT (#5216) - due to [`44297f1`](https://github.com/tobymao/sqlglot/commit/44297f1c5c8c2cb16fe77c318312f417b4281708) - JOIN pipe syntax, Set Operators as CTEs *(PR [#5215](https://github.com/tobymao/sqlglot/pull/5215) by [@geooo109](https://github.com/geooo109))*: JOIN pipe syntax, Set Operators as CTEs (#5215) - due to [`4f42d95`](https://github.com/tobymao/sqlglot/commit/4f42d951363f8c43a4c414dc21d0505d9c8e48bf) - Normalize date parts in `exp.Extract` generation *(PR [#5229](https://github.com/tobymao/sqlglot/pull/5229) by [@VaggelisD](https://github.com/VaggelisD))*: Normalize date parts in `exp.Extract` generation (#5229) - due to [`e7e38fe`](https://github.com/tobymao/sqlglot/commit/e7e38fe0e09f9affbff4ffa7023d0161e3a1ee49) - resolve table "columns" in bigquery that produce structs *(PR [#5230](https://github.com/tobymao/sqlglot/pull/5230) by [@georgesittas](https://github.com/georgesittas))*: resolve table "columns" in bigquery that produce structs (#5230) - due to [`d3dc761`](https://github.com/tobymao/sqlglot/commit/d3dc761393146357a5d20c4d7992fd2a1ae5e6e2) - change comma to cross join when precedence is the same for all join types *(PR [#5240](https://github.com/tobymao/sqlglot/pull/5240) by [@georgesittas](https://github.com/georgesittas))*: change comma to cross join when precedence is the same for all join types (#5240) - due to [`e7c217e`](https://github.com/tobymao/sqlglot/commit/e7c217ef08e5811e7dad2b3d26dbaa9f02114e38) - transpile from/to dbms_random.value *(PR [#5242](https://github.com/tobymao/sqlglot/pull/5242) by [@georgesittas](https://github.com/georgesittas))*: transpile from/to dbms_random.value (#5242) - due to [`31814cd`](https://github.com/tobymao/sqlglot/commit/31814cddb0cf65caf29fbc45a31a9c865b7991c7) - cast constructed timestamp literal to zone-aware type if needed *(PR [#5253](https://github.com/tobymao/sqlglot/pull/5253) by [@georgesittas](https://github.com/georgesittas))*: cast constructed timestamp literal to zone-aware type if needed (#5253) - due to [`db4e0ec`](https://github.com/tobymao/sqlglot/commit/db4e0ece950a6a1f543d8ecad48a7d4b1d6872be) - convert information schema keywords to uppercase for consistency *(PR [#5263](https://github.com/tobymao/sqlglot/pull/5263) by [@mattiasthalen](https://github.com/mattiasthalen))*: convert information schema keywords to uppercase for consistency (#5263) - due to [`eea1570`](https://github.com/tobymao/sqlglot/commit/eea1570ba530517a95699092ccd9ce6a856f5e84) - add support for SYSDATETIMEOFFSET closes [#5272](https://github.com/tobymao/sqlglot/pull/5272) *(PR [#5273](https://github.com/tobymao/sqlglot/pull/5273) by [@georgesittas](https://github.com/georgesittas))*: add support for SYSDATETIMEOFFSET closes #5272 (#5273) - due to [`3d3ccc5`](https://github.com/tobymao/sqlglot/commit/3d3ccc52a40536b9ac4e974f1592dffe5a7568f9) - Transpile exp.PosExplode pos column alias *(PR [#5274](https://github.com/tobymao/sqlglot/pull/5274) by [@VaggelisD](https://github.com/VaggelisD))*: Transpile exp.PosExplode pos column alias (#5274) - due to [`9a95af1`](https://github.com/tobymao/sqlglot/commit/9a95af1c725cd70ffa8206f1d88452a7faab93b2) - only cast strings to timestamp for TO_CHAR (TimeToStr) *(PR [#5283](https://github.com/tobymao/sqlglot/pull/5283) by [@georgesittas](https://github.com/georgesittas))*: only cast strings to timestamp for TO_CHAR (TimeToStr) (#5283) - due to [`8af4790`](https://github.com/tobymao/sqlglot/commit/8af479017ccde16049c897ae5d322d4a69843b65) - Fix parsing of ADD CONSTRAINT *(PR [#5288](https://github.com/tobymao/sqlglot/pull/5288) by [@VaggelisD](https://github.com/VaggelisD))*: Fix parsing of ADD CONSTRAINT (#5288) - due to [`18aea08`](https://github.com/tobymao/sqlglot/commit/18aea08f7dcaa887bcf29886cd3b3bc2850a3679) - include bigquery unnest aliases in selected sources *(PR [#5285](https://github.com/tobymao/sqlglot/pull/5285) by [@georgesittas](https://github.com/georgesittas))*: include bigquery unnest aliases in selected sources (#5285) - due to [`0ff95c5`](https://github.com/tobymao/sqlglot/commit/0ff95c5903907c9ab30b7850bb3b962bc6da2bab) - add parsing/transpilation support for the REPLACE function *(PR [#5289](https://github.com/tobymao/sqlglot/pull/5289) by [@rahulj51](https://github.com/rahulj51))*: add parsing/transpilation support for the REPLACE function (#5289) - due to [`dc03649`](https://github.com/tobymao/sqlglot/commit/dc03649bca0b7a090254976182a03c21dd2269ba) - only coerce time var -like units into strings for DATE_TRUNC *(PR [#5291](https://github.com/tobymao/sqlglot/pull/5291) by [@georgesittas](https://github.com/georgesittas))*: only coerce time var -like units into strings for DATE_TRUNC (#5291) - due to [`5724538`](https://github.com/tobymao/sqlglot/commit/5724538f278b2178114b88850251afd7c3db0dda) - ARRAY_CONCAT type annotation *(PR [#5293](https://github.com/tobymao/sqlglot/pull/5293) by [@geooo109](https://github.com/geooo109))*: ARRAY_CONCAT type annotation (#5293) - due to [`c103b23`](https://github.com/tobymao/sqlglot/commit/c103b2304dca552ac8cf6733156db8b59d3614f3) - add support for `SUBSTRING_INDEX` *(PR [#5296](https://github.com/tobymao/sqlglot/pull/5296) by [@ankur334](https://github.com/ankur334))*: add support for `SUBSTRING_INDEX` (#5296) - due to [`a7bd823`](https://github.com/tobymao/sqlglot/commit/a7bd8234e0dd02abfe6fa56287e7bda14a549e5a) - annotate type of ARRAY_TO_STRING *(PR [#5301](https://github.com/tobymao/sqlglot/pull/5301) by [@geooo109](https://github.com/geooo109))*: annotate type of ARRAY_TO_STRING (#5301) - due to [`6b42353`](https://github.com/tobymao/sqlglot/commit/6b4235340a2e432015c27b2aeadbdcb930bfa6b0) - annotate type of ARRAY_FIRST, ARRAY_LAST *(PR [#5303](https://github.com/tobymao/sqlglot/pull/5303) by [@geooo109](https://github.com/geooo109))*: annotate type of ARRAY_FIRST, ARRAY_LAST (#5303) - due to [`db9b61e`](https://github.com/tobymao/sqlglot/commit/db9b61e4ecaa0600418eb90f637fb8b06b08c399) - parse, annotate type for ARRAY_REVERSE *(PR [#5306](https://github.com/tobymao/sqlglot/pull/5306) by [@geooo109](https://github.com/geooo109))*: parse, annotate type for ARRAY_REVERSE (#5306) - due to [`5612a6d`](https://github.com/tobymao/sqlglot/commit/5612a6da6dee3545f3600db1e5b87c9450952eba) - add support for SPACE *(PR [#5308](https://github.com/tobymao/sqlglot/pull/5308) by [@ankur334](https://github.com/ankur334))*: add support for SPACE (#5308) - due to [`8a2f65d`](https://github.com/tobymao/sqlglot/commit/8a2f65d6b2b68ad5ba45a5aed5e56c4dc0fea6fc) - parse and annotate type for ARRAY_SLICE *(PR [#5312](https://github.com/tobymao/sqlglot/pull/5312) by [@geooo109](https://github.com/geooo109))*: parse and annotate type for ARRAY_SLICE (#5312) - due to [`8d118ea`](https://github.com/tobymao/sqlglot/commit/8d118ead9c15e7b2b4b51b7cf93cab94e61c2625) - route statements to hive/trino depending on their type *(PR [#5314](https://github.com/tobymao/sqlglot/pull/5314) by [@georgesittas](https://github.com/georgesittas))*: route statements to hive/trino depending on their type (#5314) - due to [`d2f7c41`](https://github.com/tobymao/sqlglot/commit/d2f7c41f9f30f4cf0c74782be9be0cc6e75565f3) - add TypeOf / toTypeName support *(PR [#5315](https://github.com/tobymao/sqlglot/pull/5315) by [@ankur334](https://github.com/ankur334))*: add TypeOf / toTypeName support (#5315) - due to [`5a0f589`](https://github.com/tobymao/sqlglot/commit/5a0f589a0fdb6743c3be2f98b74a34780f51332b) - distinguish STORED AS from USING *(PR [#5320](https://github.com/tobymao/sqlglot/pull/5320) by [@geooo109](https://github.com/geooo109))*: distinguish STORED AS from USING (#5320) - due to [`c4ca182`](https://github.com/tobymao/sqlglot/commit/c4ca182ad637b7a22b55d0ecf320c5a09ec5d56c) - annotate type for FROM_BASE64 *(PR [#5329](https://github.com/tobymao/sqlglot/pull/5329) by [@geooo109](https://github.com/geooo109))*: annotate type for FROM_BASE64 (#5329) - due to [`7b72bbe`](https://github.com/tobymao/sqlglot/commit/7b72bbed3a0930e11ce4a0fdd9082de715326ac9) - annotate type for ANY_VALUE *(PR [#5331](https://github.com/tobymao/sqlglot/pull/5331) by [@geooo109](https://github.com/geooo109))*: annotate type for ANY_VALUE (#5331) - due to [`c0d57e7`](https://github.com/tobymao/sqlglot/commit/c0d57e747bf5d2bed7ba2007ac2092d5797ee038) - annotate type for CHR *(PR [#5332](https://github.com/tobymao/sqlglot/pull/5332) by [@geooo109](https://github.com/geooo109))*: annotate type for CHR (#5332) - due to [`d65b5c2`](https://github.com/tobymao/sqlglot/commit/d65b5c22c29416007cca0154fd35f1d4b5efc929) - annotate type for COUNTIF *(PR [#5334](https://github.com/tobymao/sqlglot/pull/5334) by [@geooo109](https://github.com/geooo109))*: annotate type for COUNTIF (#5334) - due to [`521b705`](https://github.com/tobymao/sqlglot/commit/521b7053213df8577f609409af2552c2ff4fd8c9) - annotate type for GENERATE_ARRAY *(PR [#5335](https://github.com/tobymao/sqlglot/pull/5335) by [@geooo109](https://github.com/geooo109))*: annotate type for GENERATE_ARRAY (#5335) - due to [`5fb26c5`](https://github.com/tobymao/sqlglot/commit/5fb26c58026018360f36a732394b612a3baac38b) - annotate type for INT64 *(PR [#5339](https://github.com/tobymao/sqlglot/pull/5339) by [@geooo109](https://github.com/geooo109))*: annotate type for INT64 (#5339) - due to [`cff9b55`](https://github.com/tobymao/sqlglot/commit/cff9b55d70a3b85057e6385c93c0814eaa50f40b) - annotate type for LOGICAL_AND and LOGICAL_OR *(PR [#5340](https://github.com/tobymao/sqlglot/pull/5340) by [@geooo109](https://github.com/geooo109))*: annotate type for LOGICAL_AND and LOGICAL_OR (#5340) - due to [`b94a6f9`](https://github.com/tobymao/sqlglot/commit/b94a6f9228aa730296c3152179bfbf3503521063) - annotate type for MAKE_INTERVAL *(PR [#5341](https://github.com/tobymao/sqlglot/pull/5341) by [@geooo109](https://github.com/geooo109))*: annotate type for MAKE_INTERVAL (#5341) - due to [`2c9a7c6`](https://github.com/tobymao/sqlglot/commit/2c9a7c6f0b097a9e8514fc5e2af21c52f145920c) - annotate type for LAST_VALUE *(PR [#5336](https://github.com/tobymao/sqlglot/pull/5336) by [@geooo109](https://github.com/geooo109))*: annotate type for LAST_VALUE (#5336) - due to [`d862a28`](https://github.com/tobymao/sqlglot/commit/d862a28b0a30f0c5774351f38a61f195120ad904) - annoate type for TO_BASE64 *(PR [#5342](https://github.com/tobymao/sqlglot/pull/5342) by [@geooo109](https://github.com/geooo109))*: annoate type for TO_BASE64 (#5342) - due to [`85888c1`](https://github.com/tobymao/sqlglot/commit/85888c1b7cbbd0eee179d902a54fbd2a899cc16b) - annotate type for UNIX_DATE *(PR [#5343](https://github.com/tobymao/sqlglot/pull/5343) by [@geooo109](https://github.com/geooo109))*: annotate type for UNIX_DATE (#5343) - due to [`8a214e0`](https://github.com/tobymao/sqlglot/commit/8a214e0859dfb715fcef0dd6b2d6392012b1f3fb) - annotate type for UNIX_SECONDS *(PR [#5344](https://github.com/tobymao/sqlglot/pull/5344) by [@geooo109](https://github.com/geooo109))*: annotate type for UNIX_SECONDS (#5344) - due to [`625cb74`](https://github.com/tobymao/sqlglot/commit/625cb74b69e99ea1a707549366ea960d759848c9) - annotate type for STARTS_WITH *(PR [#5345](https://github.com/tobymao/sqlglot/pull/5345) by [@geooo109](https://github.com/geooo109))*: annotate type for STARTS_WITH (#5345) - due to [`0337c4d`](https://github.com/tobymao/sqlglot/commit/0337c4d46e9e85d951fc9565a47e338106543711) - annotate type for SHA and SHA2 *(PR [#5346](https://github.com/tobymao/sqlglot/pull/5346) by [@geooo109](https://github.com/geooo109))*: annotate type for SHA and SHA2 (#5346) - due to [`cc389fa`](https://github.com/tobymao/sqlglot/commit/cc389facb33f94a0d1f696f2ef9e92f298711894) - annotate type SHA1, SHA256, SHA512 for BigQuery *(PR [#5347](https://github.com/tobymao/sqlglot/pull/5347) by [@geooo109](https://github.com/geooo109))*: annotate type SHA1, SHA256, SHA512 for BigQuery (#5347) - due to [`509b741`](https://github.com/tobymao/sqlglot/commit/509b74173f678842e7550c75c4d8d906c879fb12) - preserve multi-arg DECODE function instead of converting to CASE *(PR [#5352](https://github.com/tobymao/sqlglot/pull/5352) by [@georgesittas](https://github.com/georgesittas))*: preserve multi-arg DECODE function instead of converting to CASE (#5352) - due to [`c1d3d61`](https://github.com/tobymao/sqlglot/commit/c1d3d61d00f00d2030107689d8704f7a488a80a7) - annotate type for CORR *(PR [#5364](https://github.com/tobymao/sqlglot/pull/5364) by [@geooo109](https://github.com/geooo109))*: annotate type for CORR (#5364) - due to [`c1e8677`](https://github.com/tobymao/sqlglot/commit/c1e867767a006e774a2c200c10eb85b3fbd8a372) - annotate type for COVAR_POP *(PR [#5365](https://github.com/tobymao/sqlglot/pull/5365) by [@geooo109](https://github.com/geooo109))*: annotate type for COVAR_POP (#5365) - due to [`e110ef4`](https://github.com/tobymao/sqlglot/commit/e110ef4f774e6ab8de6d4c86e5d306ab53fe895b) - annotate type for COVAR_SAMP *(PR [#5367](https://github.com/tobymao/sqlglot/pull/5367) by [@geooo109](https://github.com/geooo109))*: annotate type for COVAR_SAMP (#5367) - due to [`5b59c16`](https://github.com/tobymao/sqlglot/commit/5b59c16528fb1904c64bef0ca6307bb6a95e5a2c) - annotate type for DATETIME *(PR [#5369](https://github.com/tobymao/sqlglot/pull/5369) by [@geooo109](https://github.com/geooo109))*: annotate type for DATETIME (#5369) - due to [`47176ce`](https://github.com/tobymao/sqlglot/commit/47176ce6b9a4c1722f285034b08a6ae782129894) - annotate type for ENDS_WITH *(PR [#5370](https://github.com/tobymao/sqlglot/pull/5370) by [@geooo109](https://github.com/geooo109))*: annotate type for ENDS_WITH (#5370) - due to [`2cce53d`](https://github.com/tobymao/sqlglot/commit/2cce53d59968f0a4bb3e9599ade93b0e6a140c68) - annotate type for LAG *(PR [#5371](https://github.com/tobymao/sqlglot/pull/5371) by [@geooo109](https://github.com/geooo109))*: annotate type for LAG (#5371) - due to [`a3227de`](https://github.com/tobymao/sqlglot/commit/a3227de3fc57d559eb899dec08af01f85b470ce4) - improve transpilation of `ROUND(x, y)` to Postgres *(PR [#5368](https://github.com/tobymao/sqlglot/pull/5368) by [@blecourt-private](https://github.com/blecourt-private))*: improve transpilation of `ROUND(x, y)` to Postgres (#5368) ### :sparkles: New Features - [`82c50ce`](https://github.com/tobymao/sqlglot/commit/82c50ce68d9a1ad25095086ae3645f5c4996c18b) - **duckdb**: extend time travel parsing to take VERSION into account *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bb4f428`](https://github.com/tobymao/sqlglot/commit/bb4f4283b53bc060a8c7e0f12c1e7ef5b521c4e6) - bubble up comments nested under a Bracket, fixes [#5131](https://github.com/tobymao/sqlglot/pull/5131) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`9f318eb`](https://github.com/tobymao/sqlglot/commit/9f318ebe4502bb484a34873252cf4a40c7e440e4) - **snowflake**: Transpile BQ's `ARRAY(SELECT AS STRUCT ...)` *(PR [#5140](https://github.com/tobymao/sqlglot/pull/5140) by [@VaggelisD](https://github.com/VaggelisD))* - [`93b402a`](https://github.com/tobymao/sqlglot/commit/93b402abc74e642ed312db585b33315674a450cd) - **parser**: support SELECT, FROM, WHERE with pipe syntax *(PR [#5128](https://github.com/tobymao/sqlglot/pull/5128) by [@geooo109](https://github.com/geooo109))* - [`1a8e78b`](https://github.com/tobymao/sqlglot/commit/1a8e78bd84e006023d5d3ea561504587dfbb55a9) - **parser**: ORDER BY with pipe syntax *(PR [#5153](https://github.com/tobymao/sqlglot/pull/5153) by [@geooo109](https://github.com/geooo109))* - [`966ad95`](https://github.com/tobymao/sqlglot/commit/966ad95432d5f8e29ade36d8271a5c489c207324) - **tsql**: add convert style 126 *(PR [#5157](https://github.com/tobymao/sqlglot/pull/5157) by [@pa1ch](https://github.com/pa1ch))* - [`b7ac6ff`](https://github.com/tobymao/sqlglot/commit/b7ac6ff4680ff619be4b0ddb01f61f916ed09d58) - **parser**: LIMIT/OFFSET pipe syntax *(PR [#5159](https://github.com/tobymao/sqlglot/pull/5159) by [@geooo109](https://github.com/geooo109))* - [`cfc158d`](https://github.com/tobymao/sqlglot/commit/cfc158d753d4f43d12c3b502633d29e43dcc5569) - **snowflake**: transpile STRTOK_TO_ARRAY to duckdb *(PR [#5165](https://github.com/tobymao/sqlglot/pull/5165) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5160](https://github.com/tobymao/sqlglot/issues/5160) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`ff0f30b`](https://github.com/tobymao/sqlglot/commit/ff0f30bcf7d0d74b26a703eaa632e1be15b3c001) - support ARRAY_REMOVE *(PR [#5163](https://github.com/tobymao/sqlglot/pull/5163) by [@geooo109](https://github.com/geooo109))* - [`9cac01f`](https://github.com/tobymao/sqlglot/commit/9cac01f6b4a5c93b55f5b68f21cb104932880a0e) - **tsql**: support FOR XML syntax *(PR [#5167](https://github.com/tobymao/sqlglot/pull/5167) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5161](https://github.com/tobymao/sqlglot/issues/5161) opened by [@codykonior](https://github.com/codykonior)* - [`8b5129f`](https://github.com/tobymao/sqlglot/commit/8b5129f288880032f0bf9d649984d82314039af1) - **postgres**: improve pretty-formatting of ARRAY[...] *(commit by [@georgesittas](https://github.com/georgesittas))* - [`964b4a1`](https://github.com/tobymao/sqlglot/commit/964b4a1e367e00e243b80edf677cd48d453ed31e) - add line/col position for Star *(commit by [@georgesittas](https://github.com/georgesittas))* - [`434c45b`](https://github.com/tobymao/sqlglot/commit/434c45b547c3a5ea155dc8d7da2baab326eb6d4f) - improve support for ENDSWITH closes [#5170](https://github.com/tobymao/sqlglot/pull/5170) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`63f9cb4`](https://github.com/tobymao/sqlglot/commit/63f9cb4b158b88574136b32241ee60254352c9e6) - **sqlglotrs**: match the Python implementation of __repr__ for tokens *(PR [#5172](https://github.com/tobymao/sqlglot/pull/5172) by [@georgesittas](https://github.com/georgesittas))* - [`c007afa`](https://github.com/tobymao/sqlglot/commit/c007afa23831e9bd86f401d85260e15edf00328f) - support Star instance as first arg of exp.column helper *(PR [#5177](https://github.com/tobymao/sqlglot/pull/5177) by [@georgesittas](https://github.com/georgesittas))* - [`bc001ce`](https://github.com/tobymao/sqlglot/commit/bc001cef4c907d8fa421d3190b4fa91865d9ff6c) - **postgres**: Add support for ANY_VALUE for versions 16+ *(PR [#5179](https://github.com/tobymao/sqlglot/pull/5179) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4674](https://github.com/TobikoData/sqlmesh/issues/4674) opened by [@petrikoro](https://github.com/petrikoro)* - [`ba05ff6`](https://github.com/tobymao/sqlglot/commit/ba05ff67127e056d567fc2c1d3bcc8e3dcce7b7e) - **parser**: AGGREGATE with GROUP AND ORDER BY pipe syntax *(PR [#5171](https://github.com/tobymao/sqlglot/pull/5171) by [@geooo109](https://github.com/geooo109))* - [`26077a4`](https://github.com/tobymao/sqlglot/commit/26077a47d9db750f44ab1baf9a434596b5bb613b) - make to_table more lenient *(PR [#5183](https://github.com/tobymao/sqlglot/pull/5183) by [@georgesittas](https://github.com/georgesittas))* - [`29e2f1d`](https://github.com/tobymao/sqlglot/commit/29e2f1d89c095c9fab0944a6962c99bd745c2c91) - Array_intersection transpilation support *(PR [#5186](https://github.com/tobymao/sqlglot/pull/5186) by [@HarishRavi96](https://github.com/HarishRavi96))* - [`d86a114`](https://github.com/tobymao/sqlglot/commit/d86a1147aeb866ed0ab2c342914ecf8cbfadac8a) - **sqlite**: implement RESPECT/IGNORE NULLS in first_value() *(PR [#5185](https://github.com/tobymao/sqlglot/pull/5185) by [@NickCrews](https://github.com/NickCrews))* - [`1d50fca`](https://github.com/tobymao/sqlglot/commit/1d50fca8ffc34e4acbc1b791c4cdf5f184a748db) - improve transpilation of st_point and st_distance *(PR [#5194](https://github.com/tobymao/sqlglot/pull/5194) by [@georgesittas](https://github.com/georgesittas))* - [`756ec3b`](https://github.com/tobymao/sqlglot/commit/756ec3b65db1eb2572d017a3ac12ece6bb44c726) - **parser**: SET OPERATORS with pipe syntax *(PR [#5184](https://github.com/tobymao/sqlglot/pull/5184) by [@geooo109](https://github.com/geooo109))* - [`c20f85e`](https://github.com/tobymao/sqlglot/commit/c20f85e3e171e502fc51f74894d3313f0ad61535) - **spark**: support ALTER ADD PARTITION *(PR [#5208](https://github.com/tobymao/sqlglot/pull/5208) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5204](https://github.com/tobymao/sqlglot/issues/5204) opened by [@cosinequanon](https://github.com/cosinequanon)* - [`44297f1`](https://github.com/tobymao/sqlglot/commit/44297f1c5c8c2cb16fe77c318312f417b4281708) - **parser**: JOIN pipe syntax, Set Operators as CTEs *(PR [#5215](https://github.com/tobymao/sqlglot/pull/5215) by [@geooo109](https://github.com/geooo109))* - [`21cd3eb`](https://github.com/tobymao/sqlglot/commit/21cd3ebf5d0b57f5b102c5aadc3b24a598ebe918) - **parser**: PIVOT/UNPIVOT pipe syntax *(PR [#5222](https://github.com/tobymao/sqlglot/pull/5222) by [@geooo109](https://github.com/geooo109))* - [`97f5822`](https://github.com/tobymao/sqlglot/commit/97f58226fc8815b23787b7b8699ea71f58268560) - **parser**: AS pipe syntax *(PR [#5224](https://github.com/tobymao/sqlglot/pull/5224) by [@geooo109](https://github.com/geooo109))* - [`a7e7fee`](https://github.com/tobymao/sqlglot/commit/a7e7feef02a77fe8606f3f482bad91230fa637f4) - **parser**: EXTEND pipe syntax *(PR [#5225](https://github.com/tobymao/sqlglot/pull/5225) by [@geooo109](https://github.com/geooo109))* - [`c1cb9f8`](https://github.com/tobymao/sqlglot/commit/c1cb9f8f682080f7a06c387219d79c6d068b6dbe) - **snowflake**: add autoincrement order clause support *(PR [#5223](https://github.com/tobymao/sqlglot/pull/5223) by [@dmaresma](https://github.com/dmaresma))* - [`91afe4c`](https://github.com/tobymao/sqlglot/commit/91afe4cfd7b3f427e4c0b298075e867b8a1bbe55) - **parser**: TABLESAMPLE pipe syntax *(PR [#5231](https://github.com/tobymao/sqlglot/pull/5231) by [@geooo109](https://github.com/geooo109))* - [`62da84a`](https://github.com/tobymao/sqlglot/commit/62da84acce7f44802dca26a9357a16115e21fabf) - **snowflake**: improve transpilation of unnested object lookup *(PR [#5234](https://github.com/tobymao/sqlglot/pull/5234) by [@georgesittas](https://github.com/georgesittas))* - [`2c60453`](https://github.com/tobymao/sqlglot/commit/2c604537ba83dee74e9ced7e216673ecc70fe487) - **parser**: DROP pipe syntax *(PR [#5226](https://github.com/tobymao/sqlglot/pull/5226) by [@geooo109](https://github.com/geooo109))* - [`9885729`](https://github.com/tobymao/sqlglot/commit/988572954135c68dc021b992c815024ce3debaff) - **parser**: SET pipe syntax *(PR [#5236](https://github.com/tobymao/sqlglot/pull/5236) by [@geooo109](https://github.com/geooo109))* - [`e7c217e`](https://github.com/tobymao/sqlglot/commit/e7c217ef08e5811e7dad2b3d26dbaa9f02114e38) - **oracle**: transpile from/to dbms_random.value *(PR [#5242](https://github.com/tobymao/sqlglot/pull/5242) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5241](https://github.com/tobymao/sqlglot/issues/5241) opened by [@Akshat-2512](https://github.com/Akshat-2512)* - [`0d19544`](https://github.com/tobymao/sqlglot/commit/0d19544317c1056b17fb089d4be9b5bddfe6feb3) - add Microsoft Fabric dialect, a case sensitive version of TSQL *(PR [#5247](https://github.com/tobymao/sqlglot/pull/5247) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`249dbc9`](https://github.com/tobymao/sqlglot/commit/249dbc906adc6b20932dc8efe83f6f4d23ef8c1e) - **parser**: start with SELECT and nested pipe syntax *(PR [#5248](https://github.com/tobymao/sqlglot/pull/5248) by [@geooo109](https://github.com/geooo109))* - [`f5b5b93`](https://github.com/tobymao/sqlglot/commit/f5b5b9338eb92b7aa2c9b4c92c6138c2c05e1c40) - **fabric**: implement type mappings for unsupported Fabric types *(PR [#5249](https://github.com/tobymao/sqlglot/pull/5249) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`78fcea1`](https://github.com/tobymao/sqlglot/commit/78fcea13b5eb1734a15a254875bc80ad8063b0b0) - **spark, databricks**: parse brackets as placeholder *(PR [#5256](https://github.com/tobymao/sqlglot/pull/5256) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5251](https://github.com/tobymao/sqlglot/issues/5251) opened by [@aersam](https://github.com/aersam)* - [`7d71387`](https://github.com/tobymao/sqlglot/commit/7d7138780db82e7a75949d29282b944e739ad99d) - **fabric**: Add precision cap to temporal data types *(PR [#5250](https://github.com/tobymao/sqlglot/pull/5250) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`e8cf793`](https://github.com/tobymao/sqlglot/commit/e8cf79305d398f25640ef3c07dd8b32997cb0167) - **duckdb**: Transpile Snowflake's TO_CHAR if format is in Snowflake.TIME_MAPPING *(PR [#5257](https://github.com/tobymao/sqlglot/pull/5257) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5255](https://github.com/tobymao/sqlglot/issues/5255) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`0cdfe64`](https://github.com/tobymao/sqlglot/commit/0cdfe642e3cb996c5ac48cc055af2862340dcf56) - add Exasol dialect (pass 1: string type mapping) *(PR [#5264](https://github.com/tobymao/sqlglot/pull/5264) by [@nnamdi16](https://github.com/nnamdi16))* - [`eea1570`](https://github.com/tobymao/sqlglot/commit/eea1570ba530517a95699092ccd9ce6a856f5e84) - **tsql**: add support for SYSDATETIMEOFFSET closes [#5272](https://github.com/tobymao/sqlglot/pull/5272) *(PR [#5273](https://github.com/tobymao/sqlglot/pull/5273) by [@georgesittas](https://github.com/georgesittas))* - [`3d3ccc5`](https://github.com/tobymao/sqlglot/commit/3d3ccc52a40536b9ac4e974f1592dffe5a7568f9) - **hive**: Transpile exp.PosExplode pos column alias *(PR [#5274](https://github.com/tobymao/sqlglot/pull/5274) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5271](https://github.com/tobymao/sqlglot/issues/5271) opened by [@charlie-liner](https://github.com/charlie-liner)* - [`1c48c09`](https://github.com/tobymao/sqlglot/commit/1c48c09fd836db40bba6c46d0e9969937ce96587) - **exasol**: added datatype mappings and test for exasol dialect. *(PR [#5270](https://github.com/tobymao/sqlglot/pull/5270) by [@nnamdi16](https://github.com/nnamdi16))* - [`883fcb1`](https://github.com/tobymao/sqlglot/commit/883fcb137583f6d36f3a70a1343780bb40bf6f81) - **databricks**: GROUP_CONCAT to LISTAGG *(PR [#5284](https://github.com/tobymao/sqlglot/pull/5284) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5281](https://github.com/tobymao/sqlglot/issues/5281) opened by [@wKollendorf](https://github.com/wKollendorf)* - [`21ef897`](https://github.com/tobymao/sqlglot/commit/21ef8974426d9f3562ade0bd2c8448bb440bee27) - **fabric**: implement UnixToTime transformation to DATEADD syntax *(PR [#5269](https://github.com/tobymao/sqlglot/pull/5269) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`0ff95c5`](https://github.com/tobymao/sqlglot/commit/0ff95c5903907c9ab30b7850bb3b962bc6da2bab) - add parsing/transpilation support for the REPLACE function *(PR [#5289](https://github.com/tobymao/sqlglot/pull/5289) by [@rahulj51](https://github.com/rahulj51))* - [`1b0631c`](https://github.com/tobymao/sqlglot/commit/1b0631c2b4516a9ceb81af6173790dd09269b635) - **exasol**: implemented the Mod function *(PR [#5292](https://github.com/tobymao/sqlglot/pull/5292) by [@nnamdi16](https://github.com/nnamdi16))* - [`ba7bf39`](https://github.com/tobymao/sqlglot/commit/ba7bf39966b519e11cde02a3c1f720598469e616) - **exasol**: implemented BIT_AND function with test *(PR [#5294](https://github.com/tobymao/sqlglot/pull/5294) by [@nnamdi16](https://github.com/nnamdi16))* - [`fb4122e`](https://github.com/tobymao/sqlglot/commit/fb4122e80d1995bb87401e9ebe3749078c026a06) - **exasol**: add bitwiseOr function to exasol dialect *(PR [#5297](https://github.com/tobymao/sqlglot/pull/5297) by [@nnamdi16](https://github.com/nnamdi16))* - [`c103b23`](https://github.com/tobymao/sqlglot/commit/c103b2304dca552ac8cf6733156db8b59d3614f3) - add support for `SUBSTRING_INDEX` *(PR [#5296](https://github.com/tobymao/sqlglot/pull/5296) by [@ankur334](https://github.com/ankur334))* - [`4752f3a`](https://github.com/tobymao/sqlglot/commit/4752f3a6b715d8b6968c8f1f05f6ccdfb7351071) - **exasol**: added bit_xor built in exasol function to exasol dialect in sqlglot *(PR [#5298](https://github.com/tobymao/sqlglot/pull/5298) by [@nnamdi16](https://github.com/nnamdi16))* - [`09bd610`](https://github.com/tobymao/sqlglot/commit/09bd6101de21ed86c9fd6df0f63e8bca2666dd81) - **parser**: annotate type of ARRAY_CONCAT_AGG *(PR [#5299](https://github.com/tobymao/sqlglot/pull/5299) by [@geooo109](https://github.com/geooo109))* - [`ad0311a`](https://github.com/tobymao/sqlglot/commit/ad0311a7f8b0b3c5746c29d816b58578a892dd33) - **exasol**: added bit_not exasol built in function. *(PR [#5300](https://github.com/tobymao/sqlglot/pull/5300) by [@nnamdi16](https://github.com/nnamdi16))* - [`a7bd823`](https://github.com/tobymao/sqlglot/commit/a7bd8234e0dd02abfe6fa56287e7bda14a549e5a) - **parser**: annotate type of ARRAY_TO_STRING *(PR [#5301](https://github.com/tobymao/sqlglot/pull/5301) by [@geooo109](https://github.com/geooo109))* - [`2aa2182`](https://github.com/tobymao/sqlglot/commit/2aa21820f7d3a26cc4f47c1c757a9b7c97dd0382) - **exasol**: added BIT_LSHIFT built in function to exasol dialect *(PR [#5302](https://github.com/tobymao/sqlglot/pull/5302) by [@nnamdi16](https://github.com/nnamdi16))* - [`c3d9ef2`](https://github.com/tobymao/sqlglot/commit/c3d9ef2cb2d004b57c64af4f3f1bac41f1890737) - **exasol**: added the bit_rshift built in exasol function *(PR [#5304](https://github.com/tobymao/sqlglot/pull/5304) by [@nnamdi16](https://github.com/nnamdi16))* - [`6b42353`](https://github.com/tobymao/sqlglot/commit/6b4235340a2e432015c27b2aeadbdcb930bfa6b0) - **parser**: annotate type of ARRAY_FIRST, ARRAY_LAST *(PR [#5303](https://github.com/tobymao/sqlglot/pull/5303) by [@geooo109](https://github.com/geooo109))* - [`f5b7cc6`](https://github.com/tobymao/sqlglot/commit/f5b7cc6d2f8d73bff4e42e242d3ad3db41d899cc) - **exasol**: added `EVERY` built in function *(PR [#5305](https://github.com/tobymao/sqlglot/pull/5305) by [@nnamdi16](https://github.com/nnamdi16))* - [`d3f04d6`](https://github.com/tobymao/sqlglot/commit/d3f04d6766281ecb7ced9a5e812ab765d7b699be) - add Dremio dialect *(PR [#5277](https://github.com/tobymao/sqlglot/pull/5277) by [@mateuszpoleski](https://github.com/mateuszpoleski))* - [`3d8e478`](https://github.com/tobymao/sqlglot/commit/3d8e478eac3df6a94c87cd610f96c5f19697a9bf) - **exasol**: added edit_distance built in function to exasol dialect *(PR [#5310](https://github.com/tobymao/sqlglot/pull/5310) by [@nnamdi16](https://github.com/nnamdi16))* - [`db9b61e`](https://github.com/tobymao/sqlglot/commit/db9b61e4ecaa0600418eb90f637fb8b06b08c399) - **parser**: parse, annotate type for ARRAY_REVERSE *(PR [#5306](https://github.com/tobymao/sqlglot/pull/5306) by [@geooo109](https://github.com/geooo109))* - [`5612a6d`](https://github.com/tobymao/sqlglot/commit/5612a6da6dee3545f3600db1e5b87c9450952eba) - add support for SPACE *(PR [#5308](https://github.com/tobymao/sqlglot/pull/5308) by [@ankur334](https://github.com/ankur334))* - [`f148c9e`](https://github.com/tobymao/sqlglot/commit/f148c9e64ae0d4df96323271729fa6a6ca68a671) - **duckdb**: Transpile Spark's `exp.PosExplode` *(PR [#5311](https://github.com/tobymao/sqlglot/pull/5311) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5309](https://github.com/tobymao/sqlglot/issues/5309) opened by [@nimrodolev](https://github.com/nimrodolev)* - [`179a278`](https://github.com/tobymao/sqlglot/commit/179a278c7fdbc29105e37f132e6f03e18627f769) - **exasol**: added the regexp_replace function *(PR [#5313](https://github.com/tobymao/sqlglot/pull/5313) by [@nnamdi16](https://github.com/nnamdi16))* - [`8a2f65d`](https://github.com/tobymao/sqlglot/commit/8a2f65d6b2b68ad5ba45a5aed5e56c4dc0fea6fc) - **parser**: parse and annotate type for ARRAY_SLICE *(PR [#5312](https://github.com/tobymao/sqlglot/pull/5312) by [@geooo109](https://github.com/geooo109))* - [`d2f7c41`](https://github.com/tobymao/sqlglot/commit/d2f7c41f9f30f4cf0c74782be9be0cc6e75565f3) - add TypeOf / toTypeName support *(PR [#5315](https://github.com/tobymao/sqlglot/pull/5315) by [@ankur334](https://github.com/ankur334))* - [`950c15d`](https://github.com/tobymao/sqlglot/commit/950c15db5ff64b6f11036f8003db3e5b1fb3afc3) - **exasol**: add var_pop built in function to exasol dialect *(PR [#5328](https://github.com/tobymao/sqlglot/pull/5328) by [@nnamdi16](https://github.com/nnamdi16))* - [`c4ca182`](https://github.com/tobymao/sqlglot/commit/c4ca182ad637b7a22b55d0ecf320c5a09ec5d56c) - **optimizer**: annotate type for FROM_BASE64 *(PR [#5329](https://github.com/tobymao/sqlglot/pull/5329) by [@geooo109](https://github.com/geooo109))* - [`0992e99`](https://github.com/tobymao/sqlglot/commit/0992e99f99aeb4ecc97e6918a23b8fd524311ed9) - **exasol**: Add support APPROXIMATE_COUNT_DISTINCT functions in exasol dialect *(PR [#5330](https://github.com/tobymao/sqlglot/pull/5330) by [@nnamdi16](https://github.com/nnamdi16))* - [`7b72bbe`](https://github.com/tobymao/sqlglot/commit/7b72bbed3a0930e11ce4a0fdd9082de715326ac9) - **optimizer**: annotate type for ANY_VALUE *(PR [#5331](https://github.com/tobymao/sqlglot/pull/5331) by [@geooo109](https://github.com/geooo109))* - [`c0d57e7`](https://github.com/tobymao/sqlglot/commit/c0d57e747bf5d2bed7ba2007ac2092d5797ee038) - **optimizer**: annotate type for CHR *(PR [#5332](https://github.com/tobymao/sqlglot/pull/5332) by [@geooo109](https://github.com/geooo109))* - [`d65b5c2`](https://github.com/tobymao/sqlglot/commit/d65b5c22c29416007cca0154fd35f1d4b5efc929) - **optimizer**: annotate type for COUNTIF *(PR [#5334](https://github.com/tobymao/sqlglot/pull/5334) by [@geooo109](https://github.com/geooo109))* - [`521b705`](https://github.com/tobymao/sqlglot/commit/521b7053213df8577f609409af2552c2ff4fd8c9) - **optimizer**: annotate type for GENERATE_ARRAY *(PR [#5335](https://github.com/tobymao/sqlglot/pull/5335) by [@geooo109](https://github.com/geooo109))* - [`5fb26c5`](https://github.com/tobymao/sqlglot/commit/5fb26c58026018360f36a732394b612a3baac38b) - **optimizer**: annotate type for INT64 *(PR [#5339](https://github.com/tobymao/sqlglot/pull/5339) by [@geooo109](https://github.com/geooo109))* - [`cff9b55`](https://github.com/tobymao/sqlglot/commit/cff9b55d70a3b85057e6385c93c0814eaa50f40b) - **optimizer**: annotate type for LOGICAL_AND and LOGICAL_OR *(PR [#5340](https://github.com/tobymao/sqlglot/pull/5340) by [@geooo109](https://github.com/geooo109))* - [`b94a6f9`](https://github.com/tobymao/sqlglot/commit/b94a6f9228aa730296c3152179bfbf3503521063) - **optimizer**: annotate type for MAKE_INTERVAL *(PR [#5341](https://github.com/tobymao/sqlglot/pull/5341) by [@geooo109](https://github.com/geooo109))* - [`2c9a7c6`](https://github.com/tobymao/sqlglot/commit/2c9a7c6f0b097a9e8514fc5e2af21c52f145920c) - **optimizer**: annotate type for LAST_VALUE *(PR [#5336](https://github.com/tobymao/sqlglot/pull/5336) by [@geooo109](https://github.com/geooo109))* - [`d862a28`](https://github.com/tobymao/sqlglot/commit/d862a28b0a30f0c5774351f38a61f195120ad904) - **optimizer**: annoate type for TO_BASE64 *(PR [#5342](https://github.com/tobymao/sqlglot/pull/5342) by [@geooo109](https://github.com/geooo109))* - [`85888c1`](https://github.com/tobymao/sqlglot/commit/85888c1b7cbbd0eee179d902a54fbd2a899cc16b) - **optimizer**: annotate type for UNIX_DATE *(PR [#5343](https://github.com/tobymao/sqlglot/pull/5343) by [@geooo109](https://github.com/geooo109))* - [`8a214e0`](https://github.com/tobymao/sqlglot/commit/8a214e0859dfb715fcef0dd6b2d6392012b1f3fb) - **optimizer**: annotate type for UNIX_SECONDS *(PR [#5344](https://github.com/tobymao/sqlglot/pull/5344) by [@geooo109](https://github.com/geooo109))* - [`625cb74`](https://github.com/tobymao/sqlglot/commit/625cb74b69e99ea1a707549366ea960d759848c9) - **optimizer**: annotate type for STARTS_WITH *(PR [#5345](https://github.com/tobymao/sqlglot/pull/5345) by [@geooo109](https://github.com/geooo109))* - [`0337c4d`](https://github.com/tobymao/sqlglot/commit/0337c4d46e9e85d951fc9565a47e338106543711) - **optimizer**: annotate type for SHA and SHA2 *(PR [#5346](https://github.com/tobymao/sqlglot/pull/5346) by [@geooo109](https://github.com/geooo109))* - [`835d9e6`](https://github.com/tobymao/sqlglot/commit/835d9e6c9ffc05de642113b566a1a4eb9cc38470) - add case-insensitive uppercase normalization strategy *(PR [#5349](https://github.com/tobymao/sqlglot/pull/5349) by [@georgesittas](https://github.com/georgesittas))* - [`f80493e`](https://github.com/tobymao/sqlglot/commit/f80493efb168f600dc92da439d84e820f303e5aa) - **exasol**: Add TO_CHAR function support in exasol dialect *(PR [#5350](https://github.com/tobymao/sqlglot/pull/5350) by [@nnamdi16](https://github.com/nnamdi16))* - [`cea6a24`](https://github.com/tobymao/sqlglot/commit/cea6a240292d6e31bc73179d433835483e65747a) - **teradata**: add FORMAT phrase parsing *(PR [#5348](https://github.com/tobymao/sqlglot/pull/5348) by [@readjfb](https://github.com/readjfb))* - [`eae64e1`](https://github.com/tobymao/sqlglot/commit/eae64e1629a276bf3885749991869b6c6dea8a8b) - **duckdb**: support new lambda syntax *(PR [#5359](https://github.com/tobymao/sqlglot/pull/5359) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5357](https://github.com/tobymao/sqlglot/issues/5357) opened by [@aersam](https://github.com/aersam)* - [`e77991d`](https://github.com/tobymao/sqlglot/commit/e77991d92fad56014ba2778c71e5e446d4dd090e) - **duckdb**: Add support for SET VARIABLE *(PR [#5360](https://github.com/tobymao/sqlglot/pull/5360) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5356](https://github.com/tobymao/sqlglot/issues/5356) opened by [@aersam](https://github.com/aersam)* - [`c1d3d61`](https://github.com/tobymao/sqlglot/commit/c1d3d61d00f00d2030107689d8704f7a488a80a7) - **optimizer**: annotate type for CORR *(PR [#5364](https://github.com/tobymao/sqlglot/pull/5364) by [@geooo109](https://github.com/geooo109))* - [`c1e8677`](https://github.com/tobymao/sqlglot/commit/c1e867767a006e774a2c200c10eb85b3fbd8a372) - **optimizer**: annotate type for COVAR_POP *(PR [#5365](https://github.com/tobymao/sqlglot/pull/5365) by [@geooo109](https://github.com/geooo109))* - [`e110ef4`](https://github.com/tobymao/sqlglot/commit/e110ef4f774e6ab8de6d4c86e5d306ab53fe895b) - **optimizer**: annotate type for COVAR_SAMP *(PR [#5367](https://github.com/tobymao/sqlglot/pull/5367) by [@geooo109](https://github.com/geooo109))* - [`5b59c16`](https://github.com/tobymao/sqlglot/commit/5b59c16528fb1904c64bef0ca6307bb6a95e5a2c) - **optimizer**: annotate type for DATETIME *(PR [#5369](https://github.com/tobymao/sqlglot/pull/5369) by [@geooo109](https://github.com/geooo109))* - [`47176ce`](https://github.com/tobymao/sqlglot/commit/47176ce6b9a4c1722f285034b08a6ae782129894) - **optimizer**: annotate type for ENDS_WITH *(PR [#5370](https://github.com/tobymao/sqlglot/pull/5370) by [@geooo109](https://github.com/geooo109))* - [`1fd757e`](https://github.com/tobymao/sqlglot/commit/1fd757e6279315f00e719974613313a6e43dfe55) - **fabric**: Ensure TIMESTAMPTZ is used with AT TIME ZONE *(PR [#5362](https://github.com/tobymao/sqlglot/pull/5362) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`2cce53d`](https://github.com/tobymao/sqlglot/commit/2cce53d59968f0a4bb3e9599ade93b0e6a140c68) - **optimizer**: annotate type for LAG *(PR [#5371](https://github.com/tobymao/sqlglot/pull/5371) by [@geooo109](https://github.com/geooo109))* - [`a3227de`](https://github.com/tobymao/sqlglot/commit/a3227de3fc57d559eb899dec08af01f85b470ce4) - improve transpilation of `ROUND(x, y)` to Postgres *(PR [#5368](https://github.com/tobymao/sqlglot/pull/5368) by [@blecourt-private](https://github.com/blecourt-private))* - :arrow_lower_right: *addresses issue [#5366](https://github.com/tobymao/sqlglot/issues/5366) opened by [@blecourt-private](https://github.com/blecourt-private)* ### :bug: Bug Fixes - [`f2bf000`](https://github.com/tobymao/sqlglot/commit/f2bf000a410fb18531bb90ef1d767baf0e8bce7a) - **optimizer**: avoid creating new alias for qualifying unpivot *(PR [#5121](https://github.com/tobymao/sqlglot/pull/5121) by [@geooo109](https://github.com/geooo109))* - [`a126ce8`](https://github.com/tobymao/sqlglot/commit/a126ce8a25287cf3531d815035fa3d567dc772fb) - **optimizer**: make coalesce simplification optional, skip by default *(PR [#5123](https://github.com/tobymao/sqlglot/pull/5123) by [@barakalon](https://github.com/barakalon))* - [`f7401fd`](https://github.com/tobymao/sqlglot/commit/f7401fdc29a35738eb23f424ceba03463a4d8af9) - **bigquery**: avoid getting stuck in infinite loop when parsing tables *(PR [#5130](https://github.com/tobymao/sqlglot/pull/5130) by [@georgesittas](https://github.com/georgesittas))* - [`e9b3156`](https://github.com/tobymao/sqlglot/commit/e9b3156aa1ed95fdee4c6b419134d8ca746964b6) - **athena**: Handle transpilation of FileFormatProperty from dialects that treat it as a variable and not a string literal *(PR [#5133](https://github.com/tobymao/sqlglot/pull/5133) by [@erindru](https://github.com/erindru))* - [`a3fccd9`](https://github.com/tobymao/sqlglot/commit/a3fccd9be294499b53477da931f8b097cdbe09fc) - **snowflake**: generate SELECT for UNNEST without JOIN or FROM *(PR [#5138](https://github.com/tobymao/sqlglot/pull/5138) by [@geooo109](https://github.com/geooo109))* - [`993919d`](https://github.com/tobymao/sqlglot/commit/993919d05d5d3c814471607b56831bb65d349eb4) - **snowflake**: Properly transpile ARRAY_AGG, IGNORE/RESPECT NULLS *(PR [#5137](https://github.com/tobymao/sqlglot/pull/5137) by [@VaggelisD](https://github.com/VaggelisD))* - [`6e57619`](https://github.com/tobymao/sqlglot/commit/6e57619f85375e789bb39a6478aa01cd7c7758f0) - **snowflake**: Transpile ISOWEEK to WEEKISO *(PR [#5139](https://github.com/tobymao/sqlglot/pull/5139) by [@VaggelisD](https://github.com/VaggelisD))* - [`c484ca3`](https://github.com/tobymao/sqlglot/commit/c484ca39bad750a96b62e2edae85612cac66ba30) - **bigquery**: recognize ARRAY_CONCAT_AGG as an aggregate function *(PR [#5141](https://github.com/tobymao/sqlglot/pull/5141) by [@georgesittas](https://github.com/georgesittas))* - [`f3aeb37`](https://github.com/tobymao/sqlglot/commit/f3aeb374351a0b1b3c75945718d8ea42f8926b62) - **tsql**: properly parse and generate ALTER SET *(PR [#5143](https://github.com/tobymao/sqlglot/pull/5143) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5135](https://github.com/tobymao/sqlglot/issues/5135) opened by [@codykonior](https://github.com/codykonior)* - [`72ce404`](https://github.com/tobymao/sqlglot/commit/72ce40405625239a0d6763d502e5af8b12abfe9b) - Refactor ALTER TABLE ADD parsing *(PR [#5144](https://github.com/tobymao/sqlglot/pull/5144) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5129](https://github.com/tobymao/sqlglot/issues/5129) opened by [@Mevrael](https://github.com/Mevrael)* - [`e73ddb7`](https://github.com/tobymao/sqlglot/commit/e73ddb733b7f120ae74054e6d4dc7d458f59ac50) - **mysql**: preserve TIMESTAMP on roundtrip *(PR [#5145](https://github.com/tobymao/sqlglot/pull/5145) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5127](https://github.com/tobymao/sqlglot/issues/5127) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`4f8c73d`](https://github.com/tobymao/sqlglot/commit/4f8c73d60eecebc601c60ee8c7819458435e34b8) - **hive**: STRUCT column names and data type should be separated by ':' in hive *(PR [#5147](https://github.com/tobymao/sqlglot/pull/5147) by [@tsamaras](https://github.com/tsamaras))* - [`e2a488f`](https://github.com/tobymao/sqlglot/commit/e2a488f48f3e036566462463bbc58cc6a1c7492e) - Error on columns mismatch in pushdown_projections ignores dialect *(PR [#5151](https://github.com/tobymao/sqlglot/pull/5151) by [@snovik75](https://github.com/snovik75))* - [`1a35365`](https://github.com/tobymao/sqlglot/commit/1a35365a3bb1ef56e8da0023271cbe3108e0ccb1) - avoid generating nested comments when not supported *(PR [#5158](https://github.com/tobymao/sqlglot/pull/5158) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5132](https://github.com/tobymao/sqlglot/issues/5132) opened by [@patricksurry](https://github.com/patricksurry)* - [`f6124c6`](https://github.com/tobymao/sqlglot/commit/f6124c6343f67563fc19f617891ecfc145a642db) - **rust-tokenizer**: return token vector in `tokenize` even on failure *(PR [#5155](https://github.com/tobymao/sqlglot/pull/5155) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5148](https://github.com/tobymao/sqlglot/issues/5148) opened by [@kamoser](https://github.com/kamoser)* - [`760a606`](https://github.com/tobymao/sqlglot/commit/760a6062d5f259488e471af9c1d33e200066e9dc) - **postgres**: support decimal values in INTERVAL expressions fixes [#5168](https://github.com/tobymao/sqlglot/pull/5168) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6a2cb39`](https://github.com/tobymao/sqlglot/commit/6a2cb39d0ceec091dc4fc228f26d4f457729a3cf) - **parser**: virtual column with AS(expr) as ComputedColumnConstraint *(PR [#5180](https://github.com/tobymao/sqlglot/pull/5180) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5173](https://github.com/tobymao/sqlglot/issues/5173) opened by [@suyah](https://github.com/suyah)* - [`c87ae02`](https://github.com/tobymao/sqlglot/commit/c87ae02aa263be8463ca7283ebd090385a4bfd59) - **sqlite**: Add REPLACE to command tokens *(PR [#5192](https://github.com/tobymao/sqlglot/pull/5192) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5187](https://github.com/tobymao/sqlglot/issues/5187) opened by [@stefanmalanik](https://github.com/stefanmalanik)* - [`4b89afd`](https://github.com/tobymao/sqlglot/commit/4b89afdcc0063e70cbc64165c7f1f5102afaa87c) - **starrocks**: array_agg_transpilation_fix *(PR [#5190](https://github.com/tobymao/sqlglot/pull/5190) by [@Swathiraj23](https://github.com/Swathiraj23))* - [`461b054`](https://github.com/tobymao/sqlglot/commit/461b0548832ab8d916c3a6638f27a49f681109fe) - **postgres**: support use_spheroid argument in ST_DISTANCE *(commit by [@georgesittas](https://github.com/georgesittas))* - [`99bbae3`](https://github.com/tobymao/sqlglot/commit/99bbae370329c5f5cd132b711c714359cf96ba58) - **sqlite**: allow ALTER RENAME without COLUMN keyword fixes [#5195](https://github.com/tobymao/sqlglot/pull/5195) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ac6555b`](https://github.com/tobymao/sqlglot/commit/ac6555b4d6c162ef7b14b63307d01fd560138ea0) - **hive**: preserve DIV binary operator, fixes [#5198](https://github.com/tobymao/sqlglot/pull/5198) *(PR [#5199](https://github.com/tobymao/sqlglot/pull/5199) by [@georgesittas](https://github.com/georgesittas))* - [`d0eeb26`](https://github.com/tobymao/sqlglot/commit/d0eeb2639e771e8f8b6feabd41c65f16ed5a9829) - eliminate_join_marks has multiple issues fixes [#5188](https://github.com/tobymao/sqlglot/pull/5188) *(PR [#5189](https://github.com/tobymao/sqlglot/pull/5189) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#5188](https://github.com/tobymao/sqlglot/issues/5188) opened by [@snovik75](https://github.com/snovik75)* - [`dfdd84b`](https://github.com/tobymao/sqlglot/commit/dfdd84bbc50da70f40a17b39935f8171d961f7d2) - **parser**: CTEs instead of subqueries for pipe syntax *(PR [#5205](https://github.com/tobymao/sqlglot/pull/5205) by [@geooo109](https://github.com/geooo109))* - [`77e9d9a`](https://github.com/tobymao/sqlglot/commit/77e9d9a0269e2013379967cf2f46fbd79c036277) - **mysql**: properly parse STORED/VIRTUAL computed columns *(PR [#5210](https://github.com/tobymao/sqlglot/pull/5210) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5203](https://github.com/tobymao/sqlglot/issues/5203) opened by [@mdebski](https://github.com/mdebski)* - [`5f95299`](https://github.com/tobymao/sqlglot/commit/5f9529940d83e89704f7d25eda63cd73fdb503ae) - **parser**: support multi-part (>3) dotted functions *(PR [#5211](https://github.com/tobymao/sqlglot/pull/5211) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5200](https://github.com/tobymao/sqlglot/issues/5200) opened by [@mateuszpoleski](https://github.com/mateuszpoleski)* - [`02afa2a`](https://github.com/tobymao/sqlglot/commit/02afa2a1941fc67086d50dffac2857262f1c3c4f) - **postgres**: Preserve quoting for UDT *(PR [#5216](https://github.com/tobymao/sqlglot/pull/5216) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5212](https://github.com/tobymao/sqlglot/issues/5212) opened by [@NickCrews](https://github.com/NickCrews)* - [`f37c0b1`](https://github.com/tobymao/sqlglot/commit/f37c0b1197321dd610648ce652a171ab063deeeb) - **snowflake**: ensure a standalone GET() expression can be parsed *(PR [#5219](https://github.com/tobymao/sqlglot/pull/5219) by [@georgesittas](https://github.com/georgesittas))* - [`28fed58`](https://github.com/tobymao/sqlglot/commit/28fed586a39df83aade4792217743a1a859fd039) - **optimizer**: UnboundLocalError in scope module *(commit by [@georgesittas](https://github.com/georgesittas))* - [`809e05a`](https://github.com/tobymao/sqlglot/commit/809e05a743d5a2904a1d6f6813f24ca7549ac7ef) - **snowflake**: preserve STRTOK_TO_ARRAY roundtrip *(commit by [@georgesittas](https://github.com/georgesittas))* - [`df73a79`](https://github.com/tobymao/sqlglot/commit/df73a79a2ca3ba859b8aba5e3d0f6ed269874a63) - **tsql**: Retain limit clause in subquery expression. *(PR [#5227](https://github.com/tobymao/sqlglot/pull/5227) by [@MarcusRisanger](https://github.com/MarcusRisanger))* - [`4f42d95`](https://github.com/tobymao/sqlglot/commit/4f42d951363f8c43a4c414dc21d0505d9c8e48bf) - **duckdb**: Normalize date parts in `exp.Extract` generation *(PR [#5229](https://github.com/tobymao/sqlglot/pull/5229) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5228](https://github.com/tobymao/sqlglot/issues/5228) opened by [@greybeam-bot](https://github.com/greybeam-bot)* - [`1b4c083`](https://github.com/tobymao/sqlglot/commit/1b4c083fff8d7c44bf1dbba28c1225fa1e28c4d2) - **athena**: include Hive string escapes in the tokenizer *(PR [#5233](https://github.com/tobymao/sqlglot/pull/5233) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5232](https://github.com/tobymao/sqlglot/issues/5232) opened by [@ligfx](https://github.com/ligfx)* - [`e7e38fe`](https://github.com/tobymao/sqlglot/commit/e7e38fe0e09f9affbff4ffa7023d0161e3a1ee49) - **optimizer**: resolve table "columns" in bigquery that produce structs *(PR [#5230](https://github.com/tobymao/sqlglot/pull/5230) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5207](https://github.com/tobymao/sqlglot/issues/5207) opened by [@Bladieblah](https://github.com/Bladieblah)* - [`781539d`](https://github.com/tobymao/sqlglot/commit/781539d5cbe58142ed6688f1522fc4ed31da0a56) - **duckdb**: Generate correct DETACH syntax if IF EXISTS is set *(PR [#5235](https://github.com/tobymao/sqlglot/pull/5235) by [@erindru](https://github.com/erindru))* - [`d3dc761`](https://github.com/tobymao/sqlglot/commit/d3dc761393146357a5d20c4d7992fd2a1ae5e6e2) - change comma to cross join when precedence is the same for all join types *(PR [#5240](https://github.com/tobymao/sqlglot/pull/5240) by [@georgesittas](https://github.com/georgesittas))* - [`31814cd`](https://github.com/tobymao/sqlglot/commit/31814cddb0cf65caf29fbc45a31a9c865b7991c7) - **presto**: cast constructed timestamp literal to zone-aware type if needed *(PR [#5253](https://github.com/tobymao/sqlglot/pull/5253) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5252](https://github.com/tobymao/sqlglot/issues/5252) opened by [@agni-sairent](https://github.com/agni-sairent)* - [`847248d`](https://github.com/tobymao/sqlglot/commit/847248dd1b66e3a8f60c23a4488be85dfdef4113) - format ADD CONSTRAINT clause properly fixes [#5260](https://github.com/tobymao/sqlglot/pull/5260) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`db4e0ec`](https://github.com/tobymao/sqlglot/commit/db4e0ece950a6a1f543d8ecad48a7d4b1d6872be) - **tsql**: convert information schema keywords to uppercase for consistency *(PR [#5263](https://github.com/tobymao/sqlglot/pull/5263) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`8de87e3`](https://github.com/tobymao/sqlglot/commit/8de87e3f755a40b600aa94ee2c30cf697ef7c43c) - **redshift**: handle scale parameter in to_timestamp *(PR [#5266](https://github.com/tobymao/sqlglot/pull/5266) by [@MatiasCasaliSplit](https://github.com/MatiasCasaliSplit))* - [`e32f709`](https://github.com/tobymao/sqlglot/commit/e32f70992b5058efb93f5d2b6106fb00b810f576) - **hive**: Fix exp.PosExplode alias order *(PR [#5279](https://github.com/tobymao/sqlglot/pull/5279) by [@VaggelisD](https://github.com/VaggelisD))* - [`3dd9f8e`](https://github.com/tobymao/sqlglot/commit/3dd9f8e78ecbfde0dd7fc6fefcc09c8cb99bcd7b) - **fabric**: Type mismatches and precision error *(PR [#5280](https://github.com/tobymao/sqlglot/pull/5280) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`9a95af1`](https://github.com/tobymao/sqlglot/commit/9a95af1c725cd70ffa8206f1d88452a7faab93b2) - **snowflake**: only cast strings to timestamp for TO_CHAR (TimeToStr) *(PR [#5283](https://github.com/tobymao/sqlglot/pull/5283) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5282](https://github.com/tobymao/sqlglot/issues/5282) opened by [@wedotech-ashley](https://github.com/wedotech-ashley)* - [`8af4790`](https://github.com/tobymao/sqlglot/commit/8af479017ccde16049c897ae5d322d4a69843b65) - **tsql**: Fix parsing of ADD CONSTRAINT *(PR [#5288](https://github.com/tobymao/sqlglot/pull/5288) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4813](https://github.com/TobikoData/sqlmesh/issues/4813) opened by [@bnstewrt](https://github.com/bnstewrt)* - [`18aea08`](https://github.com/tobymao/sqlglot/commit/18aea08f7dcaa887bcf29886cd3b3bc2850a3679) - **scope**: include bigquery unnest aliases in selected sources *(PR [#5285](https://github.com/tobymao/sqlglot/pull/5285) by [@georgesittas](https://github.com/georgesittas))* - [`ba4a234`](https://github.com/tobymao/sqlglot/commit/ba4a234bfabdd8161b96a29436a50e0eb04c2dc2) - **fabric**: ignore Date cap *(PR [#5290](https://github.com/tobymao/sqlglot/pull/5290) by [@fresioAS](https://github.com/fresioAS))* - [`dc03649`](https://github.com/tobymao/sqlglot/commit/dc03649bca0b7a090254976182a03c21dd2269ba) - **bigquery**: only coerce time var -like units into strings for DATE_TRUNC *(PR [#5291](https://github.com/tobymao/sqlglot/pull/5291) by [@georgesittas](https://github.com/georgesittas))* - [`5724538`](https://github.com/tobymao/sqlglot/commit/5724538f278b2178114b88850251afd7c3db0dda) - **bigquery**: ARRAY_CONCAT type annotation *(PR [#5293](https://github.com/tobymao/sqlglot/pull/5293) by [@geooo109](https://github.com/geooo109))* - [`0a6afcd`](https://github.com/tobymao/sqlglot/commit/0a6afcd90c663aaef9b385fc12ccd19dbf6388cc) - use re-entrant lock in dialects/__init__ to avoid deadlocks *(PR [#5322](https://github.com/tobymao/sqlglot/pull/5322) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5321](https://github.com/tobymao/sqlglot/issues/5321) opened by [@jc-5s](https://github.com/jc-5s)* - [`599ca81`](https://github.com/tobymao/sqlglot/commit/599ca8101f48805098cbdf808ac2923a8246066b) - **parser**: avoid CTE values ALIAS gen, when ALIAS exists *(PR [#5323](https://github.com/tobymao/sqlglot/pull/5323) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5318](https://github.com/tobymao/sqlglot/issues/5318) opened by [@ankur334](https://github.com/ankur334)* - [`5a0f589`](https://github.com/tobymao/sqlglot/commit/5a0f589a0fdb6743c3be2f98b74a34780f51332b) - **spark**: distinguish STORED AS from USING *(PR [#5320](https://github.com/tobymao/sqlglot/pull/5320) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5317](https://github.com/tobymao/sqlglot/issues/5317) opened by [@cosinequanon](https://github.com/cosinequanon)* - [`cbc79c2`](https://github.com/tobymao/sqlglot/commit/cbc79c2a47c46370de0378b8bae61f4f3c17ca82) - preserve ORDER BY comments fixes [#5326](https://github.com/tobymao/sqlglot/pull/5326) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`fa69583`](https://github.com/tobymao/sqlglot/commit/fa69583d8b4f5801d05c21a92b43dea272a3ef49) - **optimizer**: avoid qualifying CTE *(PR [#5327](https://github.com/tobymao/sqlglot/pull/5327) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5319](https://github.com/tobymao/sqlglot/issues/5319) opened by [@naamamaoz](https://github.com/naamamaoz)* - [`29cce43`](https://github.com/tobymao/sqlglot/commit/29cce43e72451feeb8788ac2660658075bf59093) - comment lost before GROUP, JOIN and HAVING *(PR [#5338](https://github.com/tobymao/sqlglot/pull/5338) by [@chiiips](https://github.com/chiiips))* - [`509b741`](https://github.com/tobymao/sqlglot/commit/509b74173f678842e7550c75c4d8d906c879fb12) - preserve multi-arg DECODE function instead of converting to CASE *(PR [#5352](https://github.com/tobymao/sqlglot/pull/5352) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5351](https://github.com/tobymao/sqlglot/issues/5351) opened by [@kentmaxwell](https://github.com/kentmaxwell)* - [`188d446`](https://github.com/tobymao/sqlglot/commit/188d446ca65125c63bbfff96d15d91078deb6b4a) - **optimizer**: downstream column for PIVOT *(PR [#5363](https://github.com/tobymao/sqlglot/pull/5363) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5354](https://github.com/tobymao/sqlglot/issues/5354) opened by [@suresh-summation](https://github.com/suresh-summation)* ### :recycle: Refactors - [`86c6b90`](https://github.com/tobymao/sqlglot/commit/86c6b90d21b204b4376639affa142e8cee509065) - **tsql**: XML_OPTIONS *(commit by [@geooo109](https://github.com/geooo109))* - [`aac70aa`](https://github.com/tobymao/sqlglot/commit/aac70aaaa8d840c267129e2307ccb65058cef0c9) - **parser**: simpler _parse_pipe_syntax_select *(commit by [@geooo109](https://github.com/geooo109))* - [`8d118ea`](https://github.com/tobymao/sqlglot/commit/8d118ead9c15e7b2b4b51b7cf93cab94e61c2625) - **athena**: route statements to hive/trino depending on their type *(PR [#5314](https://github.com/tobymao/sqlglot/pull/5314) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5267](https://github.com/tobymao/sqlglot/issues/5267) opened by [@cpcloud](https://github.com/cpcloud)* ### :wrench: Chores - [`6910744`](https://github.com/tobymao/sqlglot/commit/6910744e6260793b3f9190782cf60fbbd9adcd38) - update py03 version *(PR [#5136](https://github.com/tobymao/sqlglot/pull/5136) by [@benfdking](https://github.com/benfdking))* - :arrow_lower_right: *addresses issue [#5134](https://github.com/tobymao/sqlglot/issues/5134) opened by [@mgorny](https://github.com/mgorny)* - [`a56deab`](https://github.com/tobymao/sqlglot/commit/a56deabc2b9543209fb5e41f19c3bef89177a577) - bump sqlglotrs to 0.5.0 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`5752a87`](https://github.com/tobymao/sqlglot/commit/5752a87406b736317e4dc5cce9ae05cbc5c19547) - udpate benchmarking framework *(PR [#5146](https://github.com/tobymao/sqlglot/pull/5146) by [@benfdking](https://github.com/benfdking))* - [`0ae297a`](https://github.com/tobymao/sqlglot/commit/0ae297a01262cf323e225fe578bdeab2230c6fd5) - compare performance on main vs pr branch *(PR [#5149](https://github.com/tobymao/sqlglot/pull/5149) by [@georgesittas](https://github.com/georgesittas))* - [`180963b`](https://github.com/tobymao/sqlglot/commit/180963b8cf25d9ff83d2347859b7f46398af5000) - handle pipe syntax unsupported operators more gracefully *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6c8d61a`](https://github.com/tobymao/sqlglot/commit/6c8d61ae1ef5b645835ccd683063845dd801e8d2) - include optimization benchmarks *(PR [#5152](https://github.com/tobymao/sqlglot/pull/5152) by [@georgesittas](https://github.com/georgesittas))* - [`bc5c66c`](https://github.com/tobymao/sqlglot/commit/bc5c66c9210a472147d98a94c34b4bb582ade8b1) - Run benchmark job if /benchmark comment *(PR [#5164](https://github.com/tobymao/sqlglot/pull/5164) by [@VaggelisD](https://github.com/VaggelisD))* - [`742b2b7`](https://github.com/tobymao/sqlglot/commit/742b2b770b88a2e901d2f84af00db821da441e4c) - Fix benchmark CI to include issue number *(PR [#5166](https://github.com/tobymao/sqlglot/pull/5166) by [@VaggelisD](https://github.com/VaggelisD))* - [`64c37f1`](https://github.com/tobymao/sqlglot/commit/64c37f147366fe87ae187996ecb3c9a5afa7c264) - bump sqlglotrs to 0.6.0 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`440590b`](https://github.com/tobymao/sqlglot/commit/440590bf92ab1281f50b96a1400cbca695d40f0c) - bump sqlglotrs to 0.6.1 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`83de4e1`](https://github.com/tobymao/sqlglot/commit/83de4e11bc1547aa22b275b20c0326dfbe43b2b8) - improve benchmark result displaying *(PR [#5176](https://github.com/tobymao/sqlglot/pull/5176) by [@georgesittas](https://github.com/georgesittas))* - [`5d5dc2f`](https://github.com/tobymao/sqlglot/commit/5d5dc2fa471bd53730e03ac8039804221949f843) - Clean up exp.ArrayIntersect PR *(PR [#5193](https://github.com/tobymao/sqlglot/pull/5193) by [@VaggelisD](https://github.com/VaggelisD))* - [`ad8a4e7`](https://github.com/tobymao/sqlglot/commit/ad8a4e73e1a9e4234f0b711163fb49630acf736c) - refactor join mark elimination to use is_correlated_subquery *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7dfb578`](https://github.com/tobymao/sqlglot/commit/7dfb5780fb242c82744dc1538077776ac624081e) - Refactor DETACH generation *(PR [#5237](https://github.com/tobymao/sqlglot/pull/5237) by [@VaggelisD](https://github.com/VaggelisD))* - [`cc389fa`](https://github.com/tobymao/sqlglot/commit/cc389facb33f94a0d1f696f2ef9e92f298711894) - **optimizer**: annotate type SHA1, SHA256, SHA512 for BigQuery *(PR [#5347](https://github.com/tobymao/sqlglot/pull/5347) by [@geooo109](https://github.com/geooo109))* - [`194850a`](https://github.com/tobymao/sqlglot/commit/194850a52497300a8f1d47f2306b67cdd11ffab6) - **exasol**: clean up TO_CHAR *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1abd461`](https://github.com/tobymao/sqlglot/commit/1abd461295830807c52f24d25ac6938095f54831) - bump min. supported version to python 3.9 *(PR [#5353](https://github.com/tobymao/sqlglot/pull/5353) by [@georgesittas](https://github.com/georgesittas))* ## [v26.33.0] - 2025-07-01 ### :boom: BREAKING CHANGES - due to [`d2f7c41`](https://github.com/tobymao/sqlglot/commit/d2f7c41f9f30f4cf0c74782be9be0cc6e75565f3) - add TypeOf / toTypeName support *(PR [#5315](https://github.com/tobymao/sqlglot/pull/5315) by [@ankur334](https://github.com/ankur334))*: add TypeOf / toTypeName support (#5315) ### :sparkles: New Features - [`d2f7c41`](https://github.com/tobymao/sqlglot/commit/d2f7c41f9f30f4cf0c74782be9be0cc6e75565f3) - add TypeOf / toTypeName support *(PR [#5315](https://github.com/tobymao/sqlglot/pull/5315) by [@ankur334](https://github.com/ankur334))* ### :bug: Bug Fixes - [`0a6afcd`](https://github.com/tobymao/sqlglot/commit/0a6afcd90c663aaef9b385fc12ccd19dbf6388cc) - use re-entrant lock in dialects/__init__ to avoid deadlocks *(PR [#5322](https://github.com/tobymao/sqlglot/pull/5322) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5321](https://github.com/tobymao/sqlglot/issues/5321) opened by [@jc-5s](https://github.com/jc-5s)* - [`599ca81`](https://github.com/tobymao/sqlglot/commit/599ca8101f48805098cbdf808ac2923a8246066b) - **parser**: avoid CTE values ALIAS gen, when ALIAS exists *(PR [#5323](https://github.com/tobymao/sqlglot/pull/5323) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5318](https://github.com/tobymao/sqlglot/issues/5318) opened by [@ankur334](https://github.com/ankur334)* ## [v26.31.0] - 2025-06-26 ### :boom: BREAKING CHANGES - due to [`f2bf000`](https://github.com/tobymao/sqlglot/commit/f2bf000a410fb18531bb90ef1d767baf0e8bce7a) - avoid creating new alias for qualifying unpivot *(PR [#5121](https://github.com/tobymao/sqlglot/pull/5121) by [@geooo109](https://github.com/geooo109))*: avoid creating new alias for qualifying unpivot (#5121) - due to [`a126ce8`](https://github.com/tobymao/sqlglot/commit/a126ce8a25287cf3531d815035fa3d567dc772fb) - make coalesce simplification optional, skip by default *(PR [#5123](https://github.com/tobymao/sqlglot/pull/5123) by [@barakalon](https://github.com/barakalon))*: make coalesce simplification optional, skip by default (#5123) - due to [`6910744`](https://github.com/tobymao/sqlglot/commit/6910744e6260793b3f9190782cf60fbbd9adcd38) - update py03 version *(PR [#5136](https://github.com/tobymao/sqlglot/pull/5136) by [@benfdking](https://github.com/benfdking))*: update py03 version (#5136) - due to [`a56deab`](https://github.com/tobymao/sqlglot/commit/a56deabc2b9543209fb5e41f19c3bef89177a577) - bump sqlglotrs to 0.5.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.5.0 - due to [`c484ca3`](https://github.com/tobymao/sqlglot/commit/c484ca39bad750a96b62e2edae85612cac66ba30) - recognize ARRAY_CONCAT_AGG as an aggregate function *(PR [#5141](https://github.com/tobymao/sqlglot/pull/5141) by [@georgesittas](https://github.com/georgesittas))*: recognize ARRAY_CONCAT_AGG as an aggregate function (#5141) - due to [`72ce404`](https://github.com/tobymao/sqlglot/commit/72ce40405625239a0d6763d502e5af8b12abfe9b) - Refactor ALTER TABLE ADD parsing *(PR [#5144](https://github.com/tobymao/sqlglot/pull/5144) by [@VaggelisD](https://github.com/VaggelisD))*: Refactor ALTER TABLE ADD parsing (#5144) - due to [`e73ddb7`](https://github.com/tobymao/sqlglot/commit/e73ddb733b7f120ae74054e6d4dc7d458f59ac50) - preserve TIMESTAMP on roundtrip *(PR [#5145](https://github.com/tobymao/sqlglot/pull/5145) by [@georgesittas](https://github.com/georgesittas))*: preserve TIMESTAMP on roundtrip (#5145) - due to [`f6124c6`](https://github.com/tobymao/sqlglot/commit/f6124c6343f67563fc19f617891ecfc145a642db) - return token vector in `tokenize` even on failure *(PR [#5155](https://github.com/tobymao/sqlglot/pull/5155) by [@georgesittas](https://github.com/georgesittas))*: return token vector in `tokenize` even on failure (#5155) - due to [`64c37f1`](https://github.com/tobymao/sqlglot/commit/64c37f147366fe87ae187996ecb3c9a5afa7c264) - bump sqlglotrs to 0.6.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.6.0 - due to [`434c45b`](https://github.com/tobymao/sqlglot/commit/434c45b547c3a5ea155dc8d7da2baab326eb6d4f) - improve support for ENDSWITH closes [#5170](https://github.com/tobymao/sqlglot/pull/5170) *(commit by [@georgesittas](https://github.com/georgesittas))*: improve support for ENDSWITH closes #5170 - due to [`bc001ce`](https://github.com/tobymao/sqlglot/commit/bc001cef4c907d8fa421d3190b4fa91865d9ff6c) - Add support for ANY_VALUE for versions 16+ *(PR [#5179](https://github.com/tobymao/sqlglot/pull/5179) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for ANY_VALUE for versions 16+ (#5179) - due to [`6a2cb39`](https://github.com/tobymao/sqlglot/commit/6a2cb39d0ceec091dc4fc228f26d4f457729a3cf) - virtual column with AS(expr) as ComputedColumnConstraint *(PR [#5180](https://github.com/tobymao/sqlglot/pull/5180) by [@geooo109](https://github.com/geooo109))*: virtual column with AS(expr) as ComputedColumnConstraint (#5180) - due to [`29e2f1d`](https://github.com/tobymao/sqlglot/commit/29e2f1d89c095c9fab0944a6962c99bd745c2c91) - Array_intersection transpilation support *(PR [#5186](https://github.com/tobymao/sqlglot/pull/5186) by [@HarishRavi96](https://github.com/HarishRavi96))*: Array_intersection transpilation support (#5186) - due to [`ac6555b`](https://github.com/tobymao/sqlglot/commit/ac6555b4d6c162ef7b14b63307d01fd560138ea0) - preserve DIV binary operator, fixes [#5198](https://github.com/tobymao/sqlglot/pull/5198) *(PR [#5199](https://github.com/tobymao/sqlglot/pull/5199) by [@georgesittas](https://github.com/georgesittas))*: preserve DIV binary operator, fixes #5198 (#5199) - due to [`dfdd84b`](https://github.com/tobymao/sqlglot/commit/dfdd84bbc50da70f40a17b39935f8171d961f7d2) - CTEs instead of subqueries for pipe syntax *(PR [#5205](https://github.com/tobymao/sqlglot/pull/5205) by [@geooo109](https://github.com/geooo109))*: CTEs instead of subqueries for pipe syntax (#5205) - due to [`5f95299`](https://github.com/tobymao/sqlglot/commit/5f9529940d83e89704f7d25eda63cd73fdb503ae) - support multi-part (>3) dotted functions *(PR [#5211](https://github.com/tobymao/sqlglot/pull/5211) by [@georgesittas](https://github.com/georgesittas))*: support multi-part (>3) dotted functions (#5211) - due to [`02afa2a`](https://github.com/tobymao/sqlglot/commit/02afa2a1941fc67086d50dffac2857262f1c3c4f) - Preserve quoting for UDT *(PR [#5216](https://github.com/tobymao/sqlglot/pull/5216) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve quoting for UDT (#5216) - due to [`44297f1`](https://github.com/tobymao/sqlglot/commit/44297f1c5c8c2cb16fe77c318312f417b4281708) - JOIN pipe syntax, Set Operators as CTEs *(PR [#5215](https://github.com/tobymao/sqlglot/pull/5215) by [@geooo109](https://github.com/geooo109))*: JOIN pipe syntax, Set Operators as CTEs (#5215) - due to [`4f42d95`](https://github.com/tobymao/sqlglot/commit/4f42d951363f8c43a4c414dc21d0505d9c8e48bf) - Normalize date parts in `exp.Extract` generation *(PR [#5229](https://github.com/tobymao/sqlglot/pull/5229) by [@VaggelisD](https://github.com/VaggelisD))*: Normalize date parts in `exp.Extract` generation (#5229) - due to [`e7e38fe`](https://github.com/tobymao/sqlglot/commit/e7e38fe0e09f9affbff4ffa7023d0161e3a1ee49) - resolve table "columns" in bigquery that produce structs *(PR [#5230](https://github.com/tobymao/sqlglot/pull/5230) by [@georgesittas](https://github.com/georgesittas))*: resolve table "columns" in bigquery that produce structs (#5230) - due to [`d3dc761`](https://github.com/tobymao/sqlglot/commit/d3dc761393146357a5d20c4d7992fd2a1ae5e6e2) - change comma to cross join when precedence is the same for all join types *(PR [#5240](https://github.com/tobymao/sqlglot/pull/5240) by [@georgesittas](https://github.com/georgesittas))*: change comma to cross join when precedence is the same for all join types (#5240) - due to [`e7c217e`](https://github.com/tobymao/sqlglot/commit/e7c217ef08e5811e7dad2b3d26dbaa9f02114e38) - transpile from/to dbms_random.value *(PR [#5242](https://github.com/tobymao/sqlglot/pull/5242) by [@georgesittas](https://github.com/georgesittas))*: transpile from/to dbms_random.value (#5242) - due to [`31814cd`](https://github.com/tobymao/sqlglot/commit/31814cddb0cf65caf29fbc45a31a9c865b7991c7) - cast constructed timestamp literal to zone-aware type if needed *(PR [#5253](https://github.com/tobymao/sqlglot/pull/5253) by [@georgesittas](https://github.com/georgesittas))*: cast constructed timestamp literal to zone-aware type if needed (#5253) - due to [`db4e0ec`](https://github.com/tobymao/sqlglot/commit/db4e0ece950a6a1f543d8ecad48a7d4b1d6872be) - convert information schema keywords to uppercase for consistency *(PR [#5263](https://github.com/tobymao/sqlglot/pull/5263) by [@mattiasthalen](https://github.com/mattiasthalen))*: convert information schema keywords to uppercase for consistency (#5263) - due to [`eea1570`](https://github.com/tobymao/sqlglot/commit/eea1570ba530517a95699092ccd9ce6a856f5e84) - add support for SYSDATETIMEOFFSET closes [#5272](https://github.com/tobymao/sqlglot/pull/5272) *(PR [#5273](https://github.com/tobymao/sqlglot/pull/5273) by [@georgesittas](https://github.com/georgesittas))*: add support for SYSDATETIMEOFFSET closes #5272 (#5273) - due to [`3d3ccc5`](https://github.com/tobymao/sqlglot/commit/3d3ccc52a40536b9ac4e974f1592dffe5a7568f9) - Transpile exp.PosExplode pos column alias *(PR [#5274](https://github.com/tobymao/sqlglot/pull/5274) by [@VaggelisD](https://github.com/VaggelisD))*: Transpile exp.PosExplode pos column alias (#5274) - due to [`9a95af1`](https://github.com/tobymao/sqlglot/commit/9a95af1c725cd70ffa8206f1d88452a7faab93b2) - only cast strings to timestamp for TO_CHAR (TimeToStr) *(PR [#5283](https://github.com/tobymao/sqlglot/pull/5283) by [@georgesittas](https://github.com/georgesittas))*: only cast strings to timestamp for TO_CHAR (TimeToStr) (#5283) - due to [`8af4790`](https://github.com/tobymao/sqlglot/commit/8af479017ccde16049c897ae5d322d4a69843b65) - Fix parsing of ADD CONSTRAINT *(PR [#5288](https://github.com/tobymao/sqlglot/pull/5288) by [@VaggelisD](https://github.com/VaggelisD))*: Fix parsing of ADD CONSTRAINT (#5288) - due to [`18aea08`](https://github.com/tobymao/sqlglot/commit/18aea08f7dcaa887bcf29886cd3b3bc2850a3679) - include bigquery unnest aliases in selected sources *(PR [#5285](https://github.com/tobymao/sqlglot/pull/5285) by [@georgesittas](https://github.com/georgesittas))*: include bigquery unnest aliases in selected sources (#5285) - due to [`0ff95c5`](https://github.com/tobymao/sqlglot/commit/0ff95c5903907c9ab30b7850bb3b962bc6da2bab) - add parsing/transpilation support for the REPLACE function *(PR [#5289](https://github.com/tobymao/sqlglot/pull/5289) by [@rahulj51](https://github.com/rahulj51))*: add parsing/transpilation support for the REPLACE function (#5289) - due to [`dc03649`](https://github.com/tobymao/sqlglot/commit/dc03649bca0b7a090254976182a03c21dd2269ba) - only coerce time var -like units into strings for DATE_TRUNC *(PR [#5291](https://github.com/tobymao/sqlglot/pull/5291) by [@georgesittas](https://github.com/georgesittas))*: only coerce time var -like units into strings for DATE_TRUNC (#5291) ### :sparkles: New Features - [`82c50ce`](https://github.com/tobymao/sqlglot/commit/82c50ce68d9a1ad25095086ae3645f5c4996c18b) - **duckdb**: extend time travel parsing to take VERSION into account *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bb4f428`](https://github.com/tobymao/sqlglot/commit/bb4f4283b53bc060a8c7e0f12c1e7ef5b521c4e6) - bubble up comments nested under a Bracket, fixes [#5131](https://github.com/tobymao/sqlglot/pull/5131) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`9f318eb`](https://github.com/tobymao/sqlglot/commit/9f318ebe4502bb484a34873252cf4a40c7e440e4) - **snowflake**: Transpile BQ's `ARRAY(SELECT AS STRUCT ...)` *(PR [#5140](https://github.com/tobymao/sqlglot/pull/5140) by [@VaggelisD](https://github.com/VaggelisD))* - [`93b402a`](https://github.com/tobymao/sqlglot/commit/93b402abc74e642ed312db585b33315674a450cd) - **parser**: support SELECT, FROM, WHERE with pipe syntax *(PR [#5128](https://github.com/tobymao/sqlglot/pull/5128) by [@geooo109](https://github.com/geooo109))* - [`1a8e78b`](https://github.com/tobymao/sqlglot/commit/1a8e78bd84e006023d5d3ea561504587dfbb55a9) - **parser**: ORDER BY with pipe syntax *(PR [#5153](https://github.com/tobymao/sqlglot/pull/5153) by [@geooo109](https://github.com/geooo109))* - [`966ad95`](https://github.com/tobymao/sqlglot/commit/966ad95432d5f8e29ade36d8271a5c489c207324) - **tsql**: add convert style 126 *(PR [#5157](https://github.com/tobymao/sqlglot/pull/5157) by [@pa1ch](https://github.com/pa1ch))* - [`b7ac6ff`](https://github.com/tobymao/sqlglot/commit/b7ac6ff4680ff619be4b0ddb01f61f916ed09d58) - **parser**: LIMIT/OFFSET pipe syntax *(PR [#5159](https://github.com/tobymao/sqlglot/pull/5159) by [@geooo109](https://github.com/geooo109))* - [`cfc158d`](https://github.com/tobymao/sqlglot/commit/cfc158d753d4f43d12c3b502633d29e43dcc5569) - **snowflake**: transpile STRTOK_TO_ARRAY to duckdb *(PR [#5165](https://github.com/tobymao/sqlglot/pull/5165) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5160](https://github.com/tobymao/sqlglot/issues/5160) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`ff0f30b`](https://github.com/tobymao/sqlglot/commit/ff0f30bcf7d0d74b26a703eaa632e1be15b3c001) - support ARRAY_REMOVE *(PR [#5163](https://github.com/tobymao/sqlglot/pull/5163) by [@geooo109](https://github.com/geooo109))* - [`9cac01f`](https://github.com/tobymao/sqlglot/commit/9cac01f6b4a5c93b55f5b68f21cb104932880a0e) - **tsql**: support FOR XML syntax *(PR [#5167](https://github.com/tobymao/sqlglot/pull/5167) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5161](https://github.com/tobymao/sqlglot/issues/5161) opened by [@codykonior](https://github.com/codykonior)* - [`8b5129f`](https://github.com/tobymao/sqlglot/commit/8b5129f288880032f0bf9d649984d82314039af1) - **postgres**: improve pretty-formatting of ARRAY[...] *(commit by [@georgesittas](https://github.com/georgesittas))* - [`964b4a1`](https://github.com/tobymao/sqlglot/commit/964b4a1e367e00e243b80edf677cd48d453ed31e) - add line/col position for Star *(commit by [@georgesittas](https://github.com/georgesittas))* - [`434c45b`](https://github.com/tobymao/sqlglot/commit/434c45b547c3a5ea155dc8d7da2baab326eb6d4f) - improve support for ENDSWITH closes [#5170](https://github.com/tobymao/sqlglot/pull/5170) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`63f9cb4`](https://github.com/tobymao/sqlglot/commit/63f9cb4b158b88574136b32241ee60254352c9e6) - **sqlglotrs**: match the Python implementation of __repr__ for tokens *(PR [#5172](https://github.com/tobymao/sqlglot/pull/5172) by [@georgesittas](https://github.com/georgesittas))* - [`c007afa`](https://github.com/tobymao/sqlglot/commit/c007afa23831e9bd86f401d85260e15edf00328f) - support Star instance as first arg of exp.column helper *(PR [#5177](https://github.com/tobymao/sqlglot/pull/5177) by [@georgesittas](https://github.com/georgesittas))* - [`bc001ce`](https://github.com/tobymao/sqlglot/commit/bc001cef4c907d8fa421d3190b4fa91865d9ff6c) - **postgres**: Add support for ANY_VALUE for versions 16+ *(PR [#5179](https://github.com/tobymao/sqlglot/pull/5179) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4674](https://github.com/TobikoData/sqlmesh/issues/4674) opened by [@petrikoro](https://github.com/petrikoro)* - [`ba05ff6`](https://github.com/tobymao/sqlglot/commit/ba05ff67127e056d567fc2c1d3bcc8e3dcce7b7e) - **parser**: AGGREGATE with GROUP AND ORDER BY pipe syntax *(PR [#5171](https://github.com/tobymao/sqlglot/pull/5171) by [@geooo109](https://github.com/geooo109))* - [`26077a4`](https://github.com/tobymao/sqlglot/commit/26077a47d9db750f44ab1baf9a434596b5bb613b) - make to_table more lenient *(PR [#5183](https://github.com/tobymao/sqlglot/pull/5183) by [@georgesittas](https://github.com/georgesittas))* - [`29e2f1d`](https://github.com/tobymao/sqlglot/commit/29e2f1d89c095c9fab0944a6962c99bd745c2c91) - Array_intersection transpilation support *(PR [#5186](https://github.com/tobymao/sqlglot/pull/5186) by [@HarishRavi96](https://github.com/HarishRavi96))* - [`d86a114`](https://github.com/tobymao/sqlglot/commit/d86a1147aeb866ed0ab2c342914ecf8cbfadac8a) - **sqlite**: implement RESPECT/IGNORE NULLS in first_value() *(PR [#5185](https://github.com/tobymao/sqlglot/pull/5185) by [@NickCrews](https://github.com/NickCrews))* - [`1d50fca`](https://github.com/tobymao/sqlglot/commit/1d50fca8ffc34e4acbc1b791c4cdf5f184a748db) - improve transpilation of st_point and st_distance *(PR [#5194](https://github.com/tobymao/sqlglot/pull/5194) by [@georgesittas](https://github.com/georgesittas))* - [`756ec3b`](https://github.com/tobymao/sqlglot/commit/756ec3b65db1eb2572d017a3ac12ece6bb44c726) - **parser**: SET OPERATORS with pipe syntax *(PR [#5184](https://github.com/tobymao/sqlglot/pull/5184) by [@geooo109](https://github.com/geooo109))* - [`c20f85e`](https://github.com/tobymao/sqlglot/commit/c20f85e3e171e502fc51f74894d3313f0ad61535) - **spark**: support ALTER ADD PARTITION *(PR [#5208](https://github.com/tobymao/sqlglot/pull/5208) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5204](https://github.com/tobymao/sqlglot/issues/5204) opened by [@cosinequanon](https://github.com/cosinequanon)* - [`44297f1`](https://github.com/tobymao/sqlglot/commit/44297f1c5c8c2cb16fe77c318312f417b4281708) - **parser**: JOIN pipe syntax, Set Operators as CTEs *(PR [#5215](https://github.com/tobymao/sqlglot/pull/5215) by [@geooo109](https://github.com/geooo109))* - [`21cd3eb`](https://github.com/tobymao/sqlglot/commit/21cd3ebf5d0b57f5b102c5aadc3b24a598ebe918) - **parser**: PIVOT/UNPIVOT pipe syntax *(PR [#5222](https://github.com/tobymao/sqlglot/pull/5222) by [@geooo109](https://github.com/geooo109))* - [`97f5822`](https://github.com/tobymao/sqlglot/commit/97f58226fc8815b23787b7b8699ea71f58268560) - **parser**: AS pipe syntax *(PR [#5224](https://github.com/tobymao/sqlglot/pull/5224) by [@geooo109](https://github.com/geooo109))* - [`a7e7fee`](https://github.com/tobymao/sqlglot/commit/a7e7feef02a77fe8606f3f482bad91230fa637f4) - **parser**: EXTEND pipe syntax *(PR [#5225](https://github.com/tobymao/sqlglot/pull/5225) by [@geooo109](https://github.com/geooo109))* - [`c1cb9f8`](https://github.com/tobymao/sqlglot/commit/c1cb9f8f682080f7a06c387219d79c6d068b6dbe) - **snowflake**: add autoincrement order clause support *(PR [#5223](https://github.com/tobymao/sqlglot/pull/5223) by [@dmaresma](https://github.com/dmaresma))* - [`91afe4c`](https://github.com/tobymao/sqlglot/commit/91afe4cfd7b3f427e4c0b298075e867b8a1bbe55) - **parser**: TABLESAMPLE pipe syntax *(PR [#5231](https://github.com/tobymao/sqlglot/pull/5231) by [@geooo109](https://github.com/geooo109))* - [`62da84a`](https://github.com/tobymao/sqlglot/commit/62da84acce7f44802dca26a9357a16115e21fabf) - **snowflake**: improve transpilation of unnested object lookup *(PR [#5234](https://github.com/tobymao/sqlglot/pull/5234) by [@georgesittas](https://github.com/georgesittas))* - [`2c60453`](https://github.com/tobymao/sqlglot/commit/2c604537ba83dee74e9ced7e216673ecc70fe487) - **parser**: DROP pipe syntax *(PR [#5226](https://github.com/tobymao/sqlglot/pull/5226) by [@geooo109](https://github.com/geooo109))* - [`9885729`](https://github.com/tobymao/sqlglot/commit/988572954135c68dc021b992c815024ce3debaff) - **parser**: SET pipe syntax *(PR [#5236](https://github.com/tobymao/sqlglot/pull/5236) by [@geooo109](https://github.com/geooo109))* - [`e7c217e`](https://github.com/tobymao/sqlglot/commit/e7c217ef08e5811e7dad2b3d26dbaa9f02114e38) - **oracle**: transpile from/to dbms_random.value *(PR [#5242](https://github.com/tobymao/sqlglot/pull/5242) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5241](https://github.com/tobymao/sqlglot/issues/5241) opened by [@Akshat-2512](https://github.com/Akshat-2512)* - [`0d19544`](https://github.com/tobymao/sqlglot/commit/0d19544317c1056b17fb089d4be9b5bddfe6feb3) - add Microsoft Fabric dialect, a case sensitive version of TSQL *(PR [#5247](https://github.com/tobymao/sqlglot/pull/5247) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`249dbc9`](https://github.com/tobymao/sqlglot/commit/249dbc906adc6b20932dc8efe83f6f4d23ef8c1e) - **parser**: start with SELECT and nested pipe syntax *(PR [#5248](https://github.com/tobymao/sqlglot/pull/5248) by [@geooo109](https://github.com/geooo109))* - [`f5b5b93`](https://github.com/tobymao/sqlglot/commit/f5b5b9338eb92b7aa2c9b4c92c6138c2c05e1c40) - **fabric**: implement type mappings for unsupported Fabric types *(PR [#5249](https://github.com/tobymao/sqlglot/pull/5249) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`78fcea1`](https://github.com/tobymao/sqlglot/commit/78fcea13b5eb1734a15a254875bc80ad8063b0b0) - **spark, databricks**: parse brackets as placeholder *(PR [#5256](https://github.com/tobymao/sqlglot/pull/5256) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5251](https://github.com/tobymao/sqlglot/issues/5251) opened by [@aersam](https://github.com/aersam)* - [`7d71387`](https://github.com/tobymao/sqlglot/commit/7d7138780db82e7a75949d29282b944e739ad99d) - **fabric**: Add precision cap to temporal data types *(PR [#5250](https://github.com/tobymao/sqlglot/pull/5250) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`e8cf793`](https://github.com/tobymao/sqlglot/commit/e8cf79305d398f25640ef3c07dd8b32997cb0167) - **duckdb**: Transpile Snowflake's TO_CHAR if format is in Snowflake.TIME_MAPPING *(PR [#5257](https://github.com/tobymao/sqlglot/pull/5257) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5255](https://github.com/tobymao/sqlglot/issues/5255) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`0cdfe64`](https://github.com/tobymao/sqlglot/commit/0cdfe642e3cb996c5ac48cc055af2862340dcf56) - add Exasol dialect (pass 1: string type mapping) *(PR [#5264](https://github.com/tobymao/sqlglot/pull/5264) by [@nnamdi16](https://github.com/nnamdi16))* - [`eea1570`](https://github.com/tobymao/sqlglot/commit/eea1570ba530517a95699092ccd9ce6a856f5e84) - **tsql**: add support for SYSDATETIMEOFFSET closes [#5272](https://github.com/tobymao/sqlglot/pull/5272) *(PR [#5273](https://github.com/tobymao/sqlglot/pull/5273) by [@georgesittas](https://github.com/georgesittas))* - [`3d3ccc5`](https://github.com/tobymao/sqlglot/commit/3d3ccc52a40536b9ac4e974f1592dffe5a7568f9) - **hive**: Transpile exp.PosExplode pos column alias *(PR [#5274](https://github.com/tobymao/sqlglot/pull/5274) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5271](https://github.com/tobymao/sqlglot/issues/5271) opened by [@charlie-liner](https://github.com/charlie-liner)* - [`1c48c09`](https://github.com/tobymao/sqlglot/commit/1c48c09fd836db40bba6c46d0e9969937ce96587) - **exasol**: added datatype mappings and test for exasol dialect. *(PR [#5270](https://github.com/tobymao/sqlglot/pull/5270) by [@nnamdi16](https://github.com/nnamdi16))* - [`883fcb1`](https://github.com/tobymao/sqlglot/commit/883fcb137583f6d36f3a70a1343780bb40bf6f81) - **databricks**: GROUP_CONCAT to LISTAGG *(PR [#5284](https://github.com/tobymao/sqlglot/pull/5284) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5281](https://github.com/tobymao/sqlglot/issues/5281) opened by [@wKollendorf](https://github.com/wKollendorf)* - [`21ef897`](https://github.com/tobymao/sqlglot/commit/21ef8974426d9f3562ade0bd2c8448bb440bee27) - **fabric**: implement UnixToTime transformation to DATEADD syntax *(PR [#5269](https://github.com/tobymao/sqlglot/pull/5269) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`0ff95c5`](https://github.com/tobymao/sqlglot/commit/0ff95c5903907c9ab30b7850bb3b962bc6da2bab) - add parsing/transpilation support for the REPLACE function *(PR [#5289](https://github.com/tobymao/sqlglot/pull/5289) by [@rahulj51](https://github.com/rahulj51))* - [`1b0631c`](https://github.com/tobymao/sqlglot/commit/1b0631c2b4516a9ceb81af6173790dd09269b635) - **exasol**: implemented the Mod function *(PR [#5292](https://github.com/tobymao/sqlglot/pull/5292) by [@nnamdi16](https://github.com/nnamdi16))* ### :bug: Bug Fixes - [`f2bf000`](https://github.com/tobymao/sqlglot/commit/f2bf000a410fb18531bb90ef1d767baf0e8bce7a) - **optimizer**: avoid creating new alias for qualifying unpivot *(PR [#5121](https://github.com/tobymao/sqlglot/pull/5121) by [@geooo109](https://github.com/geooo109))* - [`a126ce8`](https://github.com/tobymao/sqlglot/commit/a126ce8a25287cf3531d815035fa3d567dc772fb) - **optimizer**: make coalesce simplification optional, skip by default *(PR [#5123](https://github.com/tobymao/sqlglot/pull/5123) by [@barakalon](https://github.com/barakalon))* - [`f7401fd`](https://github.com/tobymao/sqlglot/commit/f7401fdc29a35738eb23f424ceba03463a4d8af9) - **bigquery**: avoid getting stuck in infinite loop when parsing tables *(PR [#5130](https://github.com/tobymao/sqlglot/pull/5130) by [@georgesittas](https://github.com/georgesittas))* - [`e9b3156`](https://github.com/tobymao/sqlglot/commit/e9b3156aa1ed95fdee4c6b419134d8ca746964b6) - **athena**: Handle transpilation of FileFormatProperty from dialects that treat it as a variable and not a string literal *(PR [#5133](https://github.com/tobymao/sqlglot/pull/5133) by [@erindru](https://github.com/erindru))* - [`a3fccd9`](https://github.com/tobymao/sqlglot/commit/a3fccd9be294499b53477da931f8b097cdbe09fc) - **snowflake**: generate SELECT for UNNEST without JOIN or FROM *(PR [#5138](https://github.com/tobymao/sqlglot/pull/5138) by [@geooo109](https://github.com/geooo109))* - [`993919d`](https://github.com/tobymao/sqlglot/commit/993919d05d5d3c814471607b56831bb65d349eb4) - **snowflake**: Properly transpile ARRAY_AGG, IGNORE/RESPECT NULLS *(PR [#5137](https://github.com/tobymao/sqlglot/pull/5137) by [@VaggelisD](https://github.com/VaggelisD))* - [`6e57619`](https://github.com/tobymao/sqlglot/commit/6e57619f85375e789bb39a6478aa01cd7c7758f0) - **snowflake**: Transpile ISOWEEK to WEEKISO *(PR [#5139](https://github.com/tobymao/sqlglot/pull/5139) by [@VaggelisD](https://github.com/VaggelisD))* - [`c484ca3`](https://github.com/tobymao/sqlglot/commit/c484ca39bad750a96b62e2edae85612cac66ba30) - **bigquery**: recognize ARRAY_CONCAT_AGG as an aggregate function *(PR [#5141](https://github.com/tobymao/sqlglot/pull/5141) by [@georgesittas](https://github.com/georgesittas))* - [`f3aeb37`](https://github.com/tobymao/sqlglot/commit/f3aeb374351a0b1b3c75945718d8ea42f8926b62) - **tsql**: properly parse and generate ALTER SET *(PR [#5143](https://github.com/tobymao/sqlglot/pull/5143) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5135](https://github.com/tobymao/sqlglot/issues/5135) opened by [@codykonior](https://github.com/codykonior)* - [`72ce404`](https://github.com/tobymao/sqlglot/commit/72ce40405625239a0d6763d502e5af8b12abfe9b) - Refactor ALTER TABLE ADD parsing *(PR [#5144](https://github.com/tobymao/sqlglot/pull/5144) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5129](https://github.com/tobymao/sqlglot/issues/5129) opened by [@Mevrael](https://github.com/Mevrael)* - [`e73ddb7`](https://github.com/tobymao/sqlglot/commit/e73ddb733b7f120ae74054e6d4dc7d458f59ac50) - **mysql**: preserve TIMESTAMP on roundtrip *(PR [#5145](https://github.com/tobymao/sqlglot/pull/5145) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5127](https://github.com/tobymao/sqlglot/issues/5127) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`4f8c73d`](https://github.com/tobymao/sqlglot/commit/4f8c73d60eecebc601c60ee8c7819458435e34b8) - **hive**: STRUCT column names and data type should be separated by ':' in hive *(PR [#5147](https://github.com/tobymao/sqlglot/pull/5147) by [@tsamaras](https://github.com/tsamaras))* - [`e2a488f`](https://github.com/tobymao/sqlglot/commit/e2a488f48f3e036566462463bbc58cc6a1c7492e) - Error on columns mismatch in pushdown_projections ignores dialect *(PR [#5151](https://github.com/tobymao/sqlglot/pull/5151) by [@snovik75](https://github.com/snovik75))* - [`1a35365`](https://github.com/tobymao/sqlglot/commit/1a35365a3bb1ef56e8da0023271cbe3108e0ccb1) - avoid generating nested comments when not supported *(PR [#5158](https://github.com/tobymao/sqlglot/pull/5158) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5132](https://github.com/tobymao/sqlglot/issues/5132) opened by [@patricksurry](https://github.com/patricksurry)* - [`f6124c6`](https://github.com/tobymao/sqlglot/commit/f6124c6343f67563fc19f617891ecfc145a642db) - **rust-tokenizer**: return token vector in `tokenize` even on failure *(PR [#5155](https://github.com/tobymao/sqlglot/pull/5155) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5148](https://github.com/tobymao/sqlglot/issues/5148) opened by [@kamoser](https://github.com/kamoser)* - [`760a606`](https://github.com/tobymao/sqlglot/commit/760a6062d5f259488e471af9c1d33e200066e9dc) - **postgres**: support decimal values in INTERVAL expressions fixes [#5168](https://github.com/tobymao/sqlglot/pull/5168) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6a2cb39`](https://github.com/tobymao/sqlglot/commit/6a2cb39d0ceec091dc4fc228f26d4f457729a3cf) - **parser**: virtual column with AS(expr) as ComputedColumnConstraint *(PR [#5180](https://github.com/tobymao/sqlglot/pull/5180) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5173](https://github.com/tobymao/sqlglot/issues/5173) opened by [@suyah](https://github.com/suyah)* - [`c87ae02`](https://github.com/tobymao/sqlglot/commit/c87ae02aa263be8463ca7283ebd090385a4bfd59) - **sqlite**: Add REPLACE to command tokens *(PR [#5192](https://github.com/tobymao/sqlglot/pull/5192) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5187](https://github.com/tobymao/sqlglot/issues/5187) opened by [@stefanmalanik](https://github.com/stefanmalanik)* - [`4b89afd`](https://github.com/tobymao/sqlglot/commit/4b89afdcc0063e70cbc64165c7f1f5102afaa87c) - **starrocks**: array_agg_transpilation_fix *(PR [#5190](https://github.com/tobymao/sqlglot/pull/5190) by [@Swathiraj23](https://github.com/Swathiraj23))* - [`461b054`](https://github.com/tobymao/sqlglot/commit/461b0548832ab8d916c3a6638f27a49f681109fe) - **postgres**: support use_spheroid argument in ST_DISTANCE *(commit by [@georgesittas](https://github.com/georgesittas))* - [`99bbae3`](https://github.com/tobymao/sqlglot/commit/99bbae370329c5f5cd132b711c714359cf96ba58) - **sqlite**: allow ALTER RENAME without COLUMN keyword fixes [#5195](https://github.com/tobymao/sqlglot/pull/5195) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ac6555b`](https://github.com/tobymao/sqlglot/commit/ac6555b4d6c162ef7b14b63307d01fd560138ea0) - **hive**: preserve DIV binary operator, fixes [#5198](https://github.com/tobymao/sqlglot/pull/5198) *(PR [#5199](https://github.com/tobymao/sqlglot/pull/5199) by [@georgesittas](https://github.com/georgesittas))* - [`d0eeb26`](https://github.com/tobymao/sqlglot/commit/d0eeb2639e771e8f8b6feabd41c65f16ed5a9829) - eliminate_join_marks has multiple issues fixes [#5188](https://github.com/tobymao/sqlglot/pull/5188) *(PR [#5189](https://github.com/tobymao/sqlglot/pull/5189) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#5188](https://github.com/tobymao/sqlglot/issues/5188) opened by [@snovik75](https://github.com/snovik75)* - [`dfdd84b`](https://github.com/tobymao/sqlglot/commit/dfdd84bbc50da70f40a17b39935f8171d961f7d2) - **parser**: CTEs instead of subqueries for pipe syntax *(PR [#5205](https://github.com/tobymao/sqlglot/pull/5205) by [@geooo109](https://github.com/geooo109))* - [`77e9d9a`](https://github.com/tobymao/sqlglot/commit/77e9d9a0269e2013379967cf2f46fbd79c036277) - **mysql**: properly parse STORED/VIRTUAL computed columns *(PR [#5210](https://github.com/tobymao/sqlglot/pull/5210) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5203](https://github.com/tobymao/sqlglot/issues/5203) opened by [@mdebski](https://github.com/mdebski)* - [`5f95299`](https://github.com/tobymao/sqlglot/commit/5f9529940d83e89704f7d25eda63cd73fdb503ae) - **parser**: support multi-part (>3) dotted functions *(PR [#5211](https://github.com/tobymao/sqlglot/pull/5211) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5200](https://github.com/tobymao/sqlglot/issues/5200) opened by [@mateuszpoleski](https://github.com/mateuszpoleski)* - [`02afa2a`](https://github.com/tobymao/sqlglot/commit/02afa2a1941fc67086d50dffac2857262f1c3c4f) - **postgres**: Preserve quoting for UDT *(PR [#5216](https://github.com/tobymao/sqlglot/pull/5216) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5212](https://github.com/tobymao/sqlglot/issues/5212) opened by [@NickCrews](https://github.com/NickCrews)* - [`f37c0b1`](https://github.com/tobymao/sqlglot/commit/f37c0b1197321dd610648ce652a171ab063deeeb) - **snowflake**: ensure a standalone GET() expression can be parsed *(PR [#5219](https://github.com/tobymao/sqlglot/pull/5219) by [@georgesittas](https://github.com/georgesittas))* - [`28fed58`](https://github.com/tobymao/sqlglot/commit/28fed586a39df83aade4792217743a1a859fd039) - **optimizer**: UnboundLocalError in scope module *(commit by [@georgesittas](https://github.com/georgesittas))* - [`809e05a`](https://github.com/tobymao/sqlglot/commit/809e05a743d5a2904a1d6f6813f24ca7549ac7ef) - **snowflake**: preserve STRTOK_TO_ARRAY roundtrip *(commit by [@georgesittas](https://github.com/georgesittas))* - [`df73a79`](https://github.com/tobymao/sqlglot/commit/df73a79a2ca3ba859b8aba5e3d0f6ed269874a63) - **tsql**: Retain limit clause in subquery expression. *(PR [#5227](https://github.com/tobymao/sqlglot/pull/5227) by [@MarcusRisanger](https://github.com/MarcusRisanger))* - [`4f42d95`](https://github.com/tobymao/sqlglot/commit/4f42d951363f8c43a4c414dc21d0505d9c8e48bf) - **duckdb**: Normalize date parts in `exp.Extract` generation *(PR [#5229](https://github.com/tobymao/sqlglot/pull/5229) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5228](https://github.com/tobymao/sqlglot/issues/5228) opened by [@greybeam-bot](https://github.com/greybeam-bot)* - [`1b4c083`](https://github.com/tobymao/sqlglot/commit/1b4c083fff8d7c44bf1dbba28c1225fa1e28c4d2) - **athena**: include Hive string escapes in the tokenizer *(PR [#5233](https://github.com/tobymao/sqlglot/pull/5233) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5232](https://github.com/tobymao/sqlglot/issues/5232) opened by [@ligfx](https://github.com/ligfx)* - [`e7e38fe`](https://github.com/tobymao/sqlglot/commit/e7e38fe0e09f9affbff4ffa7023d0161e3a1ee49) - **optimizer**: resolve table "columns" in bigquery that produce structs *(PR [#5230](https://github.com/tobymao/sqlglot/pull/5230) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5207](https://github.com/tobymao/sqlglot/issues/5207) opened by [@Bladieblah](https://github.com/Bladieblah)* - [`781539d`](https://github.com/tobymao/sqlglot/commit/781539d5cbe58142ed6688f1522fc4ed31da0a56) - **duckdb**: Generate correct DETACH syntax if IF EXISTS is set *(PR [#5235](https://github.com/tobymao/sqlglot/pull/5235) by [@erindru](https://github.com/erindru))* - [`d3dc761`](https://github.com/tobymao/sqlglot/commit/d3dc761393146357a5d20c4d7992fd2a1ae5e6e2) - change comma to cross join when precedence is the same for all join types *(PR [#5240](https://github.com/tobymao/sqlglot/pull/5240) by [@georgesittas](https://github.com/georgesittas))* - [`31814cd`](https://github.com/tobymao/sqlglot/commit/31814cddb0cf65caf29fbc45a31a9c865b7991c7) - **presto**: cast constructed timestamp literal to zone-aware type if needed *(PR [#5253](https://github.com/tobymao/sqlglot/pull/5253) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5252](https://github.com/tobymao/sqlglot/issues/5252) opened by [@agni-sairent](https://github.com/agni-sairent)* - [`847248d`](https://github.com/tobymao/sqlglot/commit/847248dd1b66e3a8f60c23a4488be85dfdef4113) - format ADD CONSTRAINT clause properly fixes [#5260](https://github.com/tobymao/sqlglot/pull/5260) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`db4e0ec`](https://github.com/tobymao/sqlglot/commit/db4e0ece950a6a1f543d8ecad48a7d4b1d6872be) - **tsql**: convert information schema keywords to uppercase for consistency *(PR [#5263](https://github.com/tobymao/sqlglot/pull/5263) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`8de87e3`](https://github.com/tobymao/sqlglot/commit/8de87e3f755a40b600aa94ee2c30cf697ef7c43c) - **redshift**: handle scale parameter in to_timestamp *(PR [#5266](https://github.com/tobymao/sqlglot/pull/5266) by [@MatiasCasaliSplit](https://github.com/MatiasCasaliSplit))* - [`e32f709`](https://github.com/tobymao/sqlglot/commit/e32f70992b5058efb93f5d2b6106fb00b810f576) - **hive**: Fix exp.PosExplode alias order *(PR [#5279](https://github.com/tobymao/sqlglot/pull/5279) by [@VaggelisD](https://github.com/VaggelisD))* - [`3dd9f8e`](https://github.com/tobymao/sqlglot/commit/3dd9f8e78ecbfde0dd7fc6fefcc09c8cb99bcd7b) - **fabric**: Type mismatches and precision error *(PR [#5280](https://github.com/tobymao/sqlglot/pull/5280) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`9a95af1`](https://github.com/tobymao/sqlglot/commit/9a95af1c725cd70ffa8206f1d88452a7faab93b2) - **snowflake**: only cast strings to timestamp for TO_CHAR (TimeToStr) *(PR [#5283](https://github.com/tobymao/sqlglot/pull/5283) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5282](https://github.com/tobymao/sqlglot/issues/5282) opened by [@wedotech-ashley](https://github.com/wedotech-ashley)* - [`8af4790`](https://github.com/tobymao/sqlglot/commit/8af479017ccde16049c897ae5d322d4a69843b65) - **tsql**: Fix parsing of ADD CONSTRAINT *(PR [#5288](https://github.com/tobymao/sqlglot/pull/5288) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4813](https://github.com/TobikoData/sqlmesh/issues/4813) opened by [@bnstewrt](https://github.com/bnstewrt)* - [`18aea08`](https://github.com/tobymao/sqlglot/commit/18aea08f7dcaa887bcf29886cd3b3bc2850a3679) - **scope**: include bigquery unnest aliases in selected sources *(PR [#5285](https://github.com/tobymao/sqlglot/pull/5285) by [@georgesittas](https://github.com/georgesittas))* - [`ba4a234`](https://github.com/tobymao/sqlglot/commit/ba4a234bfabdd8161b96a29436a50e0eb04c2dc2) - **fabric**: ignore Date cap *(PR [#5290](https://github.com/tobymao/sqlglot/pull/5290) by [@fresioAS](https://github.com/fresioAS))* - [`dc03649`](https://github.com/tobymao/sqlglot/commit/dc03649bca0b7a090254976182a03c21dd2269ba) - **bigquery**: only coerce time var -like units into strings for DATE_TRUNC *(PR [#5291](https://github.com/tobymao/sqlglot/pull/5291) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`86c6b90`](https://github.com/tobymao/sqlglot/commit/86c6b90d21b204b4376639affa142e8cee509065) - **tsql**: XML_OPTIONS *(commit by [@geooo109](https://github.com/geooo109))* - [`aac70aa`](https://github.com/tobymao/sqlglot/commit/aac70aaaa8d840c267129e2307ccb65058cef0c9) - **parser**: simpler _parse_pipe_syntax_select *(commit by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`6910744`](https://github.com/tobymao/sqlglot/commit/6910744e6260793b3f9190782cf60fbbd9adcd38) - update py03 version *(PR [#5136](https://github.com/tobymao/sqlglot/pull/5136) by [@benfdking](https://github.com/benfdking))* - :arrow_lower_right: *addresses issue [#5134](https://github.com/tobymao/sqlglot/issues/5134) opened by [@mgorny](https://github.com/mgorny)* - [`a56deab`](https://github.com/tobymao/sqlglot/commit/a56deabc2b9543209fb5e41f19c3bef89177a577) - bump sqlglotrs to 0.5.0 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`5752a87`](https://github.com/tobymao/sqlglot/commit/5752a87406b736317e4dc5cce9ae05cbc5c19547) - udpate benchmarking framework *(PR [#5146](https://github.com/tobymao/sqlglot/pull/5146) by [@benfdking](https://github.com/benfdking))* - [`0ae297a`](https://github.com/tobymao/sqlglot/commit/0ae297a01262cf323e225fe578bdeab2230c6fd5) - compare performance on main vs pr branch *(PR [#5149](https://github.com/tobymao/sqlglot/pull/5149) by [@georgesittas](https://github.com/georgesittas))* - [`180963b`](https://github.com/tobymao/sqlglot/commit/180963b8cf25d9ff83d2347859b7f46398af5000) - handle pipe syntax unsupported operators more gracefully *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6c8d61a`](https://github.com/tobymao/sqlglot/commit/6c8d61ae1ef5b645835ccd683063845dd801e8d2) - include optimization benchmarks *(PR [#5152](https://github.com/tobymao/sqlglot/pull/5152) by [@georgesittas](https://github.com/georgesittas))* - [`bc5c66c`](https://github.com/tobymao/sqlglot/commit/bc5c66c9210a472147d98a94c34b4bb582ade8b1) - Run benchmark job if /benchmark comment *(PR [#5164](https://github.com/tobymao/sqlglot/pull/5164) by [@VaggelisD](https://github.com/VaggelisD))* - [`742b2b7`](https://github.com/tobymao/sqlglot/commit/742b2b770b88a2e901d2f84af00db821da441e4c) - Fix benchmark CI to include issue number *(PR [#5166](https://github.com/tobymao/sqlglot/pull/5166) by [@VaggelisD](https://github.com/VaggelisD))* - [`64c37f1`](https://github.com/tobymao/sqlglot/commit/64c37f147366fe87ae187996ecb3c9a5afa7c264) - bump sqlglotrs to 0.6.0 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`440590b`](https://github.com/tobymao/sqlglot/commit/440590bf92ab1281f50b96a1400cbca695d40f0c) - bump sqlglotrs to 0.6.1 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`83de4e1`](https://github.com/tobymao/sqlglot/commit/83de4e11bc1547aa22b275b20c0326dfbe43b2b8) - improve benchmark result displaying *(PR [#5176](https://github.com/tobymao/sqlglot/pull/5176) by [@georgesittas](https://github.com/georgesittas))* - [`5d5dc2f`](https://github.com/tobymao/sqlglot/commit/5d5dc2fa471bd53730e03ac8039804221949f843) - Clean up exp.ArrayIntersect PR *(PR [#5193](https://github.com/tobymao/sqlglot/pull/5193) by [@VaggelisD](https://github.com/VaggelisD))* - [`ad8a4e7`](https://github.com/tobymao/sqlglot/commit/ad8a4e73e1a9e4234f0b711163fb49630acf736c) - refactor join mark elimination to use is_correlated_subquery *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7dfb578`](https://github.com/tobymao/sqlglot/commit/7dfb5780fb242c82744dc1538077776ac624081e) - Refactor DETACH generation *(PR [#5237](https://github.com/tobymao/sqlglot/pull/5237) by [@VaggelisD](https://github.com/VaggelisD))* ## [v26.30.0] - 2025-06-21 ### :boom: BREAKING CHANGES - due to [`d3dc761`](https://github.com/tobymao/sqlglot/commit/d3dc761393146357a5d20c4d7992fd2a1ae5e6e2) - change comma to cross join when precedence is the same for all join types *(PR [#5240](https://github.com/tobymao/sqlglot/pull/5240) by [@georgesittas](https://github.com/georgesittas))*: change comma to cross join when precedence is the same for all join types (#5240) - due to [`e7c217e`](https://github.com/tobymao/sqlglot/commit/e7c217ef08e5811e7dad2b3d26dbaa9f02114e38) - transpile from/to dbms_random.value *(PR [#5242](https://github.com/tobymao/sqlglot/pull/5242) by [@georgesittas](https://github.com/georgesittas))*: transpile from/to dbms_random.value (#5242) - due to [`31814cd`](https://github.com/tobymao/sqlglot/commit/31814cddb0cf65caf29fbc45a31a9c865b7991c7) - cast constructed timestamp literal to zone-aware type if needed *(PR [#5253](https://github.com/tobymao/sqlglot/pull/5253) by [@georgesittas](https://github.com/georgesittas))*: cast constructed timestamp literal to zone-aware type if needed (#5253) ### :sparkles: New Features - [`e7c217e`](https://github.com/tobymao/sqlglot/commit/e7c217ef08e5811e7dad2b3d26dbaa9f02114e38) - **oracle**: transpile from/to dbms_random.value *(PR [#5242](https://github.com/tobymao/sqlglot/pull/5242) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5241](https://github.com/tobymao/sqlglot/issues/5241) opened by [@Akshat-2512](https://github.com/Akshat-2512)* - [`0d19544`](https://github.com/tobymao/sqlglot/commit/0d19544317c1056b17fb089d4be9b5bddfe6feb3) - add Microsoft Fabric dialect, a case sensitive version of TSQL *(PR [#5247](https://github.com/tobymao/sqlglot/pull/5247) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`249dbc9`](https://github.com/tobymao/sqlglot/commit/249dbc906adc6b20932dc8efe83f6f4d23ef8c1e) - **parser**: start with SELECT and nested pipe syntax *(PR [#5248](https://github.com/tobymao/sqlglot/pull/5248) by [@geooo109](https://github.com/geooo109))* - [`f5b5b93`](https://github.com/tobymao/sqlglot/commit/f5b5b9338eb92b7aa2c9b4c92c6138c2c05e1c40) - **fabric**: implement type mappings for unsupported Fabric types *(PR [#5249](https://github.com/tobymao/sqlglot/pull/5249) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`78fcea1`](https://github.com/tobymao/sqlglot/commit/78fcea13b5eb1734a15a254875bc80ad8063b0b0) - **spark, databricks**: parse brackets as placeholder *(PR [#5256](https://github.com/tobymao/sqlglot/pull/5256) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5251](https://github.com/tobymao/sqlglot/issues/5251) opened by [@aersam](https://github.com/aersam)* - [`7d71387`](https://github.com/tobymao/sqlglot/commit/7d7138780db82e7a75949d29282b944e739ad99d) - **fabric**: Add precision cap to temporal data types *(PR [#5250](https://github.com/tobymao/sqlglot/pull/5250) by [@mattiasthalen](https://github.com/mattiasthalen))* - [`e8cf793`](https://github.com/tobymao/sqlglot/commit/e8cf79305d398f25640ef3c07dd8b32997cb0167) - **duckdb**: Transpile Snowflake's TO_CHAR if format is in Snowflake.TIME_MAPPING *(PR [#5257](https://github.com/tobymao/sqlglot/pull/5257) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#5255](https://github.com/tobymao/sqlglot/issues/5255) opened by [@kyle-cheung](https://github.com/kyle-cheung)* ### :bug: Bug Fixes - [`d3dc761`](https://github.com/tobymao/sqlglot/commit/d3dc761393146357a5d20c4d7992fd2a1ae5e6e2) - change comma to cross join when precedence is the same for all join types *(PR [#5240](https://github.com/tobymao/sqlglot/pull/5240) by [@georgesittas](https://github.com/georgesittas))* - [`31814cd`](https://github.com/tobymao/sqlglot/commit/31814cddb0cf65caf29fbc45a31a9c865b7991c7) - **presto**: cast constructed timestamp literal to zone-aware type if needed *(PR [#5253](https://github.com/tobymao/sqlglot/pull/5253) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5252](https://github.com/tobymao/sqlglot/issues/5252) opened by [@agni-sairent](https://github.com/agni-sairent)* ## [v26.29.0] - 2025-06-17 ### :boom: BREAKING CHANGES - due to [`4f42d95`](https://github.com/tobymao/sqlglot/commit/4f42d951363f8c43a4c414dc21d0505d9c8e48bf) - Normalize date parts in `exp.Extract` generation *(PR [#5229](https://github.com/tobymao/sqlglot/pull/5229) by [@VaggelisD](https://github.com/VaggelisD))*: Normalize date parts in `exp.Extract` generation (#5229) - due to [`e7e38fe`](https://github.com/tobymao/sqlglot/commit/e7e38fe0e09f9affbff4ffa7023d0161e3a1ee49) - resolve table "columns" in bigquery that produce structs *(PR [#5230](https://github.com/tobymao/sqlglot/pull/5230) by [@georgesittas](https://github.com/georgesittas))*: resolve table "columns" in bigquery that produce structs (#5230) ### :sparkles: New Features - [`97f5822`](https://github.com/tobymao/sqlglot/commit/97f58226fc8815b23787b7b8699ea71f58268560) - **parser**: AS pipe syntax *(PR [#5224](https://github.com/tobymao/sqlglot/pull/5224) by [@geooo109](https://github.com/geooo109))* - [`a7e7fee`](https://github.com/tobymao/sqlglot/commit/a7e7feef02a77fe8606f3f482bad91230fa637f4) - **parser**: EXTEND pipe syntax *(PR [#5225](https://github.com/tobymao/sqlglot/pull/5225) by [@geooo109](https://github.com/geooo109))* - [`c1cb9f8`](https://github.com/tobymao/sqlglot/commit/c1cb9f8f682080f7a06c387219d79c6d068b6dbe) - **snowflake**: add autoincrement order clause support *(PR [#5223](https://github.com/tobymao/sqlglot/pull/5223) by [@dmaresma](https://github.com/dmaresma))* - [`91afe4c`](https://github.com/tobymao/sqlglot/commit/91afe4cfd7b3f427e4c0b298075e867b8a1bbe55) - **parser**: TABLESAMPLE pipe syntax *(PR [#5231](https://github.com/tobymao/sqlglot/pull/5231) by [@geooo109](https://github.com/geooo109))* - [`62da84a`](https://github.com/tobymao/sqlglot/commit/62da84acce7f44802dca26a9357a16115e21fabf) - **snowflake**: improve transpilation of unnested object lookup *(PR [#5234](https://github.com/tobymao/sqlglot/pull/5234) by [@georgesittas](https://github.com/georgesittas))* - [`2c60453`](https://github.com/tobymao/sqlglot/commit/2c604537ba83dee74e9ced7e216673ecc70fe487) - **parser**: DROP pipe syntax *(PR [#5226](https://github.com/tobymao/sqlglot/pull/5226) by [@geooo109](https://github.com/geooo109))* - [`9885729`](https://github.com/tobymao/sqlglot/commit/988572954135c68dc021b992c815024ce3debaff) - **parser**: SET pipe syntax *(PR [#5236](https://github.com/tobymao/sqlglot/pull/5236) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`df73a79`](https://github.com/tobymao/sqlglot/commit/df73a79a2ca3ba859b8aba5e3d0f6ed269874a63) - **tsql**: Retain limit clause in subquery expression. *(PR [#5227](https://github.com/tobymao/sqlglot/pull/5227) by [@MarcusRisanger](https://github.com/MarcusRisanger))* - [`4f42d95`](https://github.com/tobymao/sqlglot/commit/4f42d951363f8c43a4c414dc21d0505d9c8e48bf) - **duckdb**: Normalize date parts in `exp.Extract` generation *(PR [#5229](https://github.com/tobymao/sqlglot/pull/5229) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5228](https://github.com/tobymao/sqlglot/issues/5228) opened by [@greybeam-bot](https://github.com/greybeam-bot)* - [`1b4c083`](https://github.com/tobymao/sqlglot/commit/1b4c083fff8d7c44bf1dbba28c1225fa1e28c4d2) - **athena**: include Hive string escapes in the tokenizer *(PR [#5233](https://github.com/tobymao/sqlglot/pull/5233) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5232](https://github.com/tobymao/sqlglot/issues/5232) opened by [@ligfx](https://github.com/ligfx)* - [`e7e38fe`](https://github.com/tobymao/sqlglot/commit/e7e38fe0e09f9affbff4ffa7023d0161e3a1ee49) - **optimizer**: resolve table "columns" in bigquery that produce structs *(PR [#5230](https://github.com/tobymao/sqlglot/pull/5230) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5207](https://github.com/tobymao/sqlglot/issues/5207) opened by [@Bladieblah](https://github.com/Bladieblah)* - [`781539d`](https://github.com/tobymao/sqlglot/commit/781539d5cbe58142ed6688f1522fc4ed31da0a56) - **duckdb**: Generate correct DETACH syntax if IF EXISTS is set *(PR [#5235](https://github.com/tobymao/sqlglot/pull/5235) by [@erindru](https://github.com/erindru))* ### :wrench: Chores - [`7dfb578`](https://github.com/tobymao/sqlglot/commit/7dfb5780fb242c82744dc1538077776ac624081e) - Refactor DETACH generation *(PR [#5237](https://github.com/tobymao/sqlglot/pull/5237) by [@VaggelisD](https://github.com/VaggelisD))* ## [v26.28.1] - 2025-06-13 ### :boom: BREAKING CHANGES - due to [`44297f1`](https://github.com/tobymao/sqlglot/commit/44297f1c5c8c2cb16fe77c318312f417b4281708) - JOIN pipe syntax, Set Operators as CTEs *(PR [#5215](https://github.com/tobymao/sqlglot/pull/5215) by [@geooo109](https://github.com/geooo109))*: JOIN pipe syntax, Set Operators as CTEs (#5215) ### :sparkles: New Features - [`44297f1`](https://github.com/tobymao/sqlglot/commit/44297f1c5c8c2cb16fe77c318312f417b4281708) - **parser**: JOIN pipe syntax, Set Operators as CTEs *(PR [#5215](https://github.com/tobymao/sqlglot/pull/5215) by [@geooo109](https://github.com/geooo109))* - [`21cd3eb`](https://github.com/tobymao/sqlglot/commit/21cd3ebf5d0b57f5b102c5aadc3b24a598ebe918) - **parser**: PIVOT/UNPIVOT pipe syntax *(PR [#5222](https://github.com/tobymao/sqlglot/pull/5222) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`28fed58`](https://github.com/tobymao/sqlglot/commit/28fed586a39df83aade4792217743a1a859fd039) - **optimizer**: UnboundLocalError in scope module *(commit by [@georgesittas](https://github.com/georgesittas))* - [`809e05a`](https://github.com/tobymao/sqlglot/commit/809e05a743d5a2904a1d6f6813f24ca7549ac7ef) - **snowflake**: preserve STRTOK_TO_ARRAY roundtrip *(commit by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`aac70aa`](https://github.com/tobymao/sqlglot/commit/aac70aaaa8d840c267129e2307ccb65058cef0c9) - **parser**: simpler _parse_pipe_syntax_select *(commit by [@geooo109](https://github.com/geooo109))* ## [v26.27.0] - 2025-06-12 ### :boom: BREAKING CHANGES - due to [`ac6555b`](https://github.com/tobymao/sqlglot/commit/ac6555b4d6c162ef7b14b63307d01fd560138ea0) - preserve DIV binary operator, fixes [#5198](https://github.com/tobymao/sqlglot/pull/5198) *(PR [#5199](https://github.com/tobymao/sqlglot/pull/5199) by [@georgesittas](https://github.com/georgesittas))*: preserve DIV binary operator, fixes #5198 (#5199) - due to [`dfdd84b`](https://github.com/tobymao/sqlglot/commit/dfdd84bbc50da70f40a17b39935f8171d961f7d2) - CTEs instead of subqueries for pipe syntax *(PR [#5205](https://github.com/tobymao/sqlglot/pull/5205) by [@geooo109](https://github.com/geooo109))*: CTEs instead of subqueries for pipe syntax (#5205) - due to [`5f95299`](https://github.com/tobymao/sqlglot/commit/5f9529940d83e89704f7d25eda63cd73fdb503ae) - support multi-part (>3) dotted functions *(PR [#5211](https://github.com/tobymao/sqlglot/pull/5211) by [@georgesittas](https://github.com/georgesittas))*: support multi-part (>3) dotted functions (#5211) - due to [`02afa2a`](https://github.com/tobymao/sqlglot/commit/02afa2a1941fc67086d50dffac2857262f1c3c4f) - Preserve quoting for UDT *(PR [#5216](https://github.com/tobymao/sqlglot/pull/5216) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve quoting for UDT (#5216) ### :sparkles: New Features - [`c20f85e`](https://github.com/tobymao/sqlglot/commit/c20f85e3e171e502fc51f74894d3313f0ad61535) - **spark**: support ALTER ADD PARTITION *(PR [#5208](https://github.com/tobymao/sqlglot/pull/5208) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5204](https://github.com/tobymao/sqlglot/issues/5204) opened by [@cosinequanon](https://github.com/cosinequanon)* ### :bug: Bug Fixes - [`99bbae3`](https://github.com/tobymao/sqlglot/commit/99bbae370329c5f5cd132b711c714359cf96ba58) - **sqlite**: allow ALTER RENAME without COLUMN keyword fixes [#5195](https://github.com/tobymao/sqlglot/pull/5195) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ac6555b`](https://github.com/tobymao/sqlglot/commit/ac6555b4d6c162ef7b14b63307d01fd560138ea0) - **hive**: preserve DIV binary operator, fixes [#5198](https://github.com/tobymao/sqlglot/pull/5198) *(PR [#5199](https://github.com/tobymao/sqlglot/pull/5199) by [@georgesittas](https://github.com/georgesittas))* - [`d0eeb26`](https://github.com/tobymao/sqlglot/commit/d0eeb2639e771e8f8b6feabd41c65f16ed5a9829) - eliminate_join_marks has multiple issues fixes [#5188](https://github.com/tobymao/sqlglot/pull/5188) *(PR [#5189](https://github.com/tobymao/sqlglot/pull/5189) by [@snovik75](https://github.com/snovik75))* - :arrow_lower_right: *fixes issue [#5188](https://github.com/tobymao/sqlglot/issues/5188) opened by [@snovik75](https://github.com/snovik75)* - [`dfdd84b`](https://github.com/tobymao/sqlglot/commit/dfdd84bbc50da70f40a17b39935f8171d961f7d2) - **parser**: CTEs instead of subqueries for pipe syntax *(PR [#5205](https://github.com/tobymao/sqlglot/pull/5205) by [@geooo109](https://github.com/geooo109))* - [`77e9d9a`](https://github.com/tobymao/sqlglot/commit/77e9d9a0269e2013379967cf2f46fbd79c036277) - **mysql**: properly parse STORED/VIRTUAL computed columns *(PR [#5210](https://github.com/tobymao/sqlglot/pull/5210) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5203](https://github.com/tobymao/sqlglot/issues/5203) opened by [@mdebski](https://github.com/mdebski)* - [`5f95299`](https://github.com/tobymao/sqlglot/commit/5f9529940d83e89704f7d25eda63cd73fdb503ae) - **parser**: support multi-part (>3) dotted functions *(PR [#5211](https://github.com/tobymao/sqlglot/pull/5211) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5200](https://github.com/tobymao/sqlglot/issues/5200) opened by [@mateuszpoleski](https://github.com/mateuszpoleski)* - [`02afa2a`](https://github.com/tobymao/sqlglot/commit/02afa2a1941fc67086d50dffac2857262f1c3c4f) - **postgres**: Preserve quoting for UDT *(PR [#5216](https://github.com/tobymao/sqlglot/pull/5216) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5212](https://github.com/tobymao/sqlglot/issues/5212) opened by [@NickCrews](https://github.com/NickCrews)* - [`f37c0b1`](https://github.com/tobymao/sqlglot/commit/f37c0b1197321dd610648ce652a171ab063deeeb) - **snowflake**: ensure a standalone GET() expression can be parsed *(PR [#5219](https://github.com/tobymao/sqlglot/pull/5219) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`ad8a4e7`](https://github.com/tobymao/sqlglot/commit/ad8a4e73e1a9e4234f0b711163fb49630acf736c) - refactor join mark elimination to use is_correlated_subquery *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.26.0] - 2025-06-09 ### :boom: BREAKING CHANGES - due to [`434c45b`](https://github.com/tobymao/sqlglot/commit/434c45b547c3a5ea155dc8d7da2baab326eb6d4f) - improve support for ENDSWITH closes [#5170](https://github.com/tobymao/sqlglot/pull/5170) *(commit by [@georgesittas](https://github.com/georgesittas))*: improve support for ENDSWITH closes #5170 - due to [`bc001ce`](https://github.com/tobymao/sqlglot/commit/bc001cef4c907d8fa421d3190b4fa91865d9ff6c) - Add support for ANY_VALUE for versions 16+ *(PR [#5179](https://github.com/tobymao/sqlglot/pull/5179) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for ANY_VALUE for versions 16+ (#5179) - due to [`6a2cb39`](https://github.com/tobymao/sqlglot/commit/6a2cb39d0ceec091dc4fc228f26d4f457729a3cf) - virtual column with AS(expr) as ComputedColumnConstraint *(PR [#5180](https://github.com/tobymao/sqlglot/pull/5180) by [@geooo109](https://github.com/geooo109))*: virtual column with AS(expr) as ComputedColumnConstraint (#5180) - due to [`29e2f1d`](https://github.com/tobymao/sqlglot/commit/29e2f1d89c095c9fab0944a6962c99bd745c2c91) - Array_intersection transpilation support *(PR [#5186](https://github.com/tobymao/sqlglot/pull/5186) by [@HarishRavi96](https://github.com/HarishRavi96))*: Array_intersection transpilation support (#5186) ### :sparkles: New Features - [`434c45b`](https://github.com/tobymao/sqlglot/commit/434c45b547c3a5ea155dc8d7da2baab326eb6d4f) - improve support for ENDSWITH closes [#5170](https://github.com/tobymao/sqlglot/pull/5170) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`63f9cb4`](https://github.com/tobymao/sqlglot/commit/63f9cb4b158b88574136b32241ee60254352c9e6) - **sqlglotrs**: match the Python implementation of __repr__ for tokens *(PR [#5172](https://github.com/tobymao/sqlglot/pull/5172) by [@georgesittas](https://github.com/georgesittas))* - [`c007afa`](https://github.com/tobymao/sqlglot/commit/c007afa23831e9bd86f401d85260e15edf00328f) - support Star instance as first arg of exp.column helper *(PR [#5177](https://github.com/tobymao/sqlglot/pull/5177) by [@georgesittas](https://github.com/georgesittas))* - [`bc001ce`](https://github.com/tobymao/sqlglot/commit/bc001cef4c907d8fa421d3190b4fa91865d9ff6c) - **postgres**: Add support for ANY_VALUE for versions 16+ *(PR [#5179](https://github.com/tobymao/sqlglot/pull/5179) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4674](https://github.com/TobikoData/sqlmesh/issues/4674) opened by [@petrikoro](https://github.com/petrikoro)* - [`ba05ff6`](https://github.com/tobymao/sqlglot/commit/ba05ff67127e056d567fc2c1d3bcc8e3dcce7b7e) - **parser**: AGGREGATE with GROUP AND ORDER BY pipe syntax *(PR [#5171](https://github.com/tobymao/sqlglot/pull/5171) by [@geooo109](https://github.com/geooo109))* - [`26077a4`](https://github.com/tobymao/sqlglot/commit/26077a47d9db750f44ab1baf9a434596b5bb613b) - make to_table more lenient *(PR [#5183](https://github.com/tobymao/sqlglot/pull/5183) by [@georgesittas](https://github.com/georgesittas))* - [`29e2f1d`](https://github.com/tobymao/sqlglot/commit/29e2f1d89c095c9fab0944a6962c99bd745c2c91) - Array_intersection transpilation support *(PR [#5186](https://github.com/tobymao/sqlglot/pull/5186) by [@HarishRavi96](https://github.com/HarishRavi96))* - [`d86a114`](https://github.com/tobymao/sqlglot/commit/d86a1147aeb866ed0ab2c342914ecf8cbfadac8a) - **sqlite**: implement RESPECT/IGNORE NULLS in first_value() *(PR [#5185](https://github.com/tobymao/sqlglot/pull/5185) by [@NickCrews](https://github.com/NickCrews))* - [`1d50fca`](https://github.com/tobymao/sqlglot/commit/1d50fca8ffc34e4acbc1b791c4cdf5f184a748db) - improve transpilation of st_point and st_distance *(PR [#5194](https://github.com/tobymao/sqlglot/pull/5194) by [@georgesittas](https://github.com/georgesittas))* - [`756ec3b`](https://github.com/tobymao/sqlglot/commit/756ec3b65db1eb2572d017a3ac12ece6bb44c726) - **parser**: SET OPERATORS with pipe syntax *(PR [#5184](https://github.com/tobymao/sqlglot/pull/5184) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`6a2cb39`](https://github.com/tobymao/sqlglot/commit/6a2cb39d0ceec091dc4fc228f26d4f457729a3cf) - **parser**: virtual column with AS(expr) as ComputedColumnConstraint *(PR [#5180](https://github.com/tobymao/sqlglot/pull/5180) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5173](https://github.com/tobymao/sqlglot/issues/5173) opened by [@suyah](https://github.com/suyah)* - [`c87ae02`](https://github.com/tobymao/sqlglot/commit/c87ae02aa263be8463ca7283ebd090385a4bfd59) - **sqlite**: Add REPLACE to command tokens *(PR [#5192](https://github.com/tobymao/sqlglot/pull/5192) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5187](https://github.com/tobymao/sqlglot/issues/5187) opened by [@stefanmalanik](https://github.com/stefanmalanik)* - [`4b89afd`](https://github.com/tobymao/sqlglot/commit/4b89afdcc0063e70cbc64165c7f1f5102afaa87c) - **starrocks**: array_agg_transpilation_fix *(PR [#5190](https://github.com/tobymao/sqlglot/pull/5190) by [@Swathiraj23](https://github.com/Swathiraj23))* - [`461b054`](https://github.com/tobymao/sqlglot/commit/461b0548832ab8d916c3a6638f27a49f681109fe) - **postgres**: support use_spheroid argument in ST_DISTANCE *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`83de4e1`](https://github.com/tobymao/sqlglot/commit/83de4e11bc1547aa22b275b20c0326dfbe43b2b8) - improve benchmark result displaying *(PR [#5176](https://github.com/tobymao/sqlglot/pull/5176) by [@georgesittas](https://github.com/georgesittas))* - [`5d5dc2f`](https://github.com/tobymao/sqlglot/commit/5d5dc2fa471bd53730e03ac8039804221949f843) - Clean up exp.ArrayIntersect PR *(PR [#5193](https://github.com/tobymao/sqlglot/pull/5193) by [@VaggelisD](https://github.com/VaggelisD))* ## [v26.25.3] - 2025-06-04 ### :sparkles: New Features - [`964b4a1`](https://github.com/tobymao/sqlglot/commit/964b4a1e367e00e243b80edf677cd48d453ed31e) - add line/col position for Star *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.25.2] - 2025-06-04 ### :sparkles: New Features - [`8b5129f`](https://github.com/tobymao/sqlglot/commit/8b5129f288880032f0bf9d649984d82314039af1) - **postgres**: improve pretty-formatting of ARRAY[...] *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.25.1] - 2025-06-04 ### :wrench: Chores - [`440590b`](https://github.com/tobymao/sqlglot/commit/440590bf92ab1281f50b96a1400cbca695d40f0c) - bump sqlglotrs to 0.6.1 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.25.0] - 2025-06-03 ### :boom: BREAKING CHANGES - due to [`72ce404`](https://github.com/tobymao/sqlglot/commit/72ce40405625239a0d6763d502e5af8b12abfe9b) - Refactor ALTER TABLE ADD parsing *(PR [#5144](https://github.com/tobymao/sqlglot/pull/5144) by [@VaggelisD](https://github.com/VaggelisD))*: Refactor ALTER TABLE ADD parsing (#5144) - due to [`e73ddb7`](https://github.com/tobymao/sqlglot/commit/e73ddb733b7f120ae74054e6d4dc7d458f59ac50) - preserve TIMESTAMP on roundtrip *(PR [#5145](https://github.com/tobymao/sqlglot/pull/5145) by [@georgesittas](https://github.com/georgesittas))*: preserve TIMESTAMP on roundtrip (#5145) - due to [`f6124c6`](https://github.com/tobymao/sqlglot/commit/f6124c6343f67563fc19f617891ecfc145a642db) - return token vector in `tokenize` even on failure *(PR [#5155](https://github.com/tobymao/sqlglot/pull/5155) by [@georgesittas](https://github.com/georgesittas))*: return token vector in `tokenize` even on failure (#5155) - due to [`64c37f1`](https://github.com/tobymao/sqlglot/commit/64c37f147366fe87ae187996ecb3c9a5afa7c264) - bump sqlglotrs to 0.6.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.6.0 ### :sparkles: New Features - [`93b402a`](https://github.com/tobymao/sqlglot/commit/93b402abc74e642ed312db585b33315674a450cd) - **parser**: support SELECT, FROM, WHERE with pipe syntax *(PR [#5128](https://github.com/tobymao/sqlglot/pull/5128) by [@geooo109](https://github.com/geooo109))* - [`1a8e78b`](https://github.com/tobymao/sqlglot/commit/1a8e78bd84e006023d5d3ea561504587dfbb55a9) - **parser**: ORDER BY with pipe syntax *(PR [#5153](https://github.com/tobymao/sqlglot/pull/5153) by [@geooo109](https://github.com/geooo109))* - [`966ad95`](https://github.com/tobymao/sqlglot/commit/966ad95432d5f8e29ade36d8271a5c489c207324) - **tsql**: add convert style 126 *(PR [#5157](https://github.com/tobymao/sqlglot/pull/5157) by [@pa1ch](https://github.com/pa1ch))* - [`b7ac6ff`](https://github.com/tobymao/sqlglot/commit/b7ac6ff4680ff619be4b0ddb01f61f916ed09d58) - **parser**: LIMIT/OFFSET pipe syntax *(PR [#5159](https://github.com/tobymao/sqlglot/pull/5159) by [@geooo109](https://github.com/geooo109))* - [`cfc158d`](https://github.com/tobymao/sqlglot/commit/cfc158d753d4f43d12c3b502633d29e43dcc5569) - **snowflake**: transpile STRTOK_TO_ARRAY to duckdb *(PR [#5165](https://github.com/tobymao/sqlglot/pull/5165) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5160](https://github.com/tobymao/sqlglot/issues/5160) opened by [@kyle-cheung](https://github.com/kyle-cheung)* - [`ff0f30b`](https://github.com/tobymao/sqlglot/commit/ff0f30bcf7d0d74b26a703eaa632e1be15b3c001) - support ARRAY_REMOVE *(PR [#5163](https://github.com/tobymao/sqlglot/pull/5163) by [@geooo109](https://github.com/geooo109))* - [`9cac01f`](https://github.com/tobymao/sqlglot/commit/9cac01f6b4a5c93b55f5b68f21cb104932880a0e) - **tsql**: support FOR XML syntax *(PR [#5167](https://github.com/tobymao/sqlglot/pull/5167) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5161](https://github.com/tobymao/sqlglot/issues/5161) opened by [@codykonior](https://github.com/codykonior)* ### :bug: Bug Fixes - [`f3aeb37`](https://github.com/tobymao/sqlglot/commit/f3aeb374351a0b1b3c75945718d8ea42f8926b62) - **tsql**: properly parse and generate ALTER SET *(PR [#5143](https://github.com/tobymao/sqlglot/pull/5143) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5135](https://github.com/tobymao/sqlglot/issues/5135) opened by [@codykonior](https://github.com/codykonior)* - [`72ce404`](https://github.com/tobymao/sqlglot/commit/72ce40405625239a0d6763d502e5af8b12abfe9b) - Refactor ALTER TABLE ADD parsing *(PR [#5144](https://github.com/tobymao/sqlglot/pull/5144) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5129](https://github.com/tobymao/sqlglot/issues/5129) opened by [@Mevrael](https://github.com/Mevrael)* - [`e73ddb7`](https://github.com/tobymao/sqlglot/commit/e73ddb733b7f120ae74054e6d4dc7d458f59ac50) - **mysql**: preserve TIMESTAMP on roundtrip *(PR [#5145](https://github.com/tobymao/sqlglot/pull/5145) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5127](https://github.com/tobymao/sqlglot/issues/5127) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`4f8c73d`](https://github.com/tobymao/sqlglot/commit/4f8c73d60eecebc601c60ee8c7819458435e34b8) - **hive**: STRUCT column names and data type should be separated by ':' in hive *(PR [#5147](https://github.com/tobymao/sqlglot/pull/5147) by [@tsamaras](https://github.com/tsamaras))* - [`e2a488f`](https://github.com/tobymao/sqlglot/commit/e2a488f48f3e036566462463bbc58cc6a1c7492e) - Error on columns mismatch in pushdown_projections ignores dialect *(PR [#5151](https://github.com/tobymao/sqlglot/pull/5151) by [@snovik75](https://github.com/snovik75))* - [`1a35365`](https://github.com/tobymao/sqlglot/commit/1a35365a3bb1ef56e8da0023271cbe3108e0ccb1) - avoid generating nested comments when not supported *(PR [#5158](https://github.com/tobymao/sqlglot/pull/5158) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5132](https://github.com/tobymao/sqlglot/issues/5132) opened by [@patricksurry](https://github.com/patricksurry)* - [`f6124c6`](https://github.com/tobymao/sqlglot/commit/f6124c6343f67563fc19f617891ecfc145a642db) - **rust-tokenizer**: return token vector in `tokenize` even on failure *(PR [#5155](https://github.com/tobymao/sqlglot/pull/5155) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5148](https://github.com/tobymao/sqlglot/issues/5148) opened by [@kamoser](https://github.com/kamoser)* - [`760a606`](https://github.com/tobymao/sqlglot/commit/760a6062d5f259488e471af9c1d33e200066e9dc) - **postgres**: support decimal values in INTERVAL expressions fixes [#5168](https://github.com/tobymao/sqlglot/pull/5168) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`86c6b90`](https://github.com/tobymao/sqlglot/commit/86c6b90d21b204b4376639affa142e8cee509065) - **tsql**: XML_OPTIONS *(commit by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`5752a87`](https://github.com/tobymao/sqlglot/commit/5752a87406b736317e4dc5cce9ae05cbc5c19547) - udpate benchmarking framework *(PR [#5146](https://github.com/tobymao/sqlglot/pull/5146) by [@benfdking](https://github.com/benfdking))* - [`0ae297a`](https://github.com/tobymao/sqlglot/commit/0ae297a01262cf323e225fe578bdeab2230c6fd5) - compare performance on main vs pr branch *(PR [#5149](https://github.com/tobymao/sqlglot/pull/5149) by [@georgesittas](https://github.com/georgesittas))* - [`180963b`](https://github.com/tobymao/sqlglot/commit/180963b8cf25d9ff83d2347859b7f46398af5000) - handle pipe syntax unsupported operators more gracefully *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6c8d61a`](https://github.com/tobymao/sqlglot/commit/6c8d61ae1ef5b645835ccd683063845dd801e8d2) - include optimization benchmarks *(PR [#5152](https://github.com/tobymao/sqlglot/pull/5152) by [@georgesittas](https://github.com/georgesittas))* - [`bc5c66c`](https://github.com/tobymao/sqlglot/commit/bc5c66c9210a472147d98a94c34b4bb582ade8b1) - Run benchmark job if /benchmark comment *(PR [#5164](https://github.com/tobymao/sqlglot/pull/5164) by [@VaggelisD](https://github.com/VaggelisD))* - [`742b2b7`](https://github.com/tobymao/sqlglot/commit/742b2b770b88a2e901d2f84af00db821da441e4c) - Fix benchmark CI to include issue number *(PR [#5166](https://github.com/tobymao/sqlglot/pull/5166) by [@VaggelisD](https://github.com/VaggelisD))* - [`64c37f1`](https://github.com/tobymao/sqlglot/commit/64c37f147366fe87ae187996ecb3c9a5afa7c264) - bump sqlglotrs to 0.6.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.24.0] - 2025-05-30 ### :boom: BREAKING CHANGES - due to [`c484ca3`](https://github.com/tobymao/sqlglot/commit/c484ca39bad750a96b62e2edae85612cac66ba30) - recognize ARRAY_CONCAT_AGG as an aggregate function *(PR [#5141](https://github.com/tobymao/sqlglot/pull/5141) by [@georgesittas](https://github.com/georgesittas))*: recognize ARRAY_CONCAT_AGG as an aggregate function (#5141) ### :sparkles: New Features - [`bb4f428`](https://github.com/tobymao/sqlglot/commit/bb4f4283b53bc060a8c7e0f12c1e7ef5b521c4e6) - bubble up comments nested under a Bracket, fixes [#5131](https://github.com/tobymao/sqlglot/pull/5131) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`9f318eb`](https://github.com/tobymao/sqlglot/commit/9f318ebe4502bb484a34873252cf4a40c7e440e4) - **snowflake**: Transpile BQ's `ARRAY(SELECT AS STRUCT ...)` *(PR [#5140](https://github.com/tobymao/sqlglot/pull/5140) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`a3fccd9`](https://github.com/tobymao/sqlglot/commit/a3fccd9be294499b53477da931f8b097cdbe09fc) - **snowflake**: generate SELECT for UNNEST without JOIN or FROM *(PR [#5138](https://github.com/tobymao/sqlglot/pull/5138) by [@geooo109](https://github.com/geooo109))* - [`993919d`](https://github.com/tobymao/sqlglot/commit/993919d05d5d3c814471607b56831bb65d349eb4) - **snowflake**: Properly transpile ARRAY_AGG, IGNORE/RESPECT NULLS *(PR [#5137](https://github.com/tobymao/sqlglot/pull/5137) by [@VaggelisD](https://github.com/VaggelisD))* - [`6e57619`](https://github.com/tobymao/sqlglot/commit/6e57619f85375e789bb39a6478aa01cd7c7758f0) - **snowflake**: Transpile ISOWEEK to WEEKISO *(PR [#5139](https://github.com/tobymao/sqlglot/pull/5139) by [@VaggelisD](https://github.com/VaggelisD))* - [`c484ca3`](https://github.com/tobymao/sqlglot/commit/c484ca39bad750a96b62e2edae85612cac66ba30) - **bigquery**: recognize ARRAY_CONCAT_AGG as an aggregate function *(PR [#5141](https://github.com/tobymao/sqlglot/pull/5141) by [@georgesittas](https://github.com/georgesittas))* ## [v26.23.0] - 2025-05-29 ### :boom: BREAKING CHANGES - due to [`6910744`](https://github.com/tobymao/sqlglot/commit/6910744e6260793b3f9190782cf60fbbd9adcd38) - update py03 version *(PR [#5136](https://github.com/tobymao/sqlglot/pull/5136) by [@benfdking](https://github.com/benfdking))*: update py03 version (#5136) - due to [`a56deab`](https://github.com/tobymao/sqlglot/commit/a56deabc2b9543209fb5e41f19c3bef89177a577) - bump sqlglotrs to 0.5.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.5.0 ### :bug: Bug Fixes - [`e9b3156`](https://github.com/tobymao/sqlglot/commit/e9b3156aa1ed95fdee4c6b419134d8ca746964b6) - **athena**: Handle transpilation of FileFormatProperty from dialects that treat it as a variable and not a string literal *(PR [#5133](https://github.com/tobymao/sqlglot/pull/5133) by [@erindru](https://github.com/erindru))* ### :wrench: Chores - [`6910744`](https://github.com/tobymao/sqlglot/commit/6910744e6260793b3f9190782cf60fbbd9adcd38) - update py03 version *(PR [#5136](https://github.com/tobymao/sqlglot/pull/5136) by [@benfdking](https://github.com/benfdking))* - :arrow_lower_right: *addresses issue [#5134](https://github.com/tobymao/sqlglot/issues/5134) opened by [@mgorny](https://github.com/mgorny)* - [`a56deab`](https://github.com/tobymao/sqlglot/commit/a56deabc2b9543209fb5e41f19c3bef89177a577) - bump sqlglotrs to 0.5.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.22.1] - 2025-05-28 ### :bug: Bug Fixes - [`f7401fd`](https://github.com/tobymao/sqlglot/commit/f7401fdc29a35738eb23f424ceba03463a4d8af9) - **bigquery**: avoid getting stuck in infinite loop when parsing tables *(PR [#5130](https://github.com/tobymao/sqlglot/pull/5130) by [@georgesittas](https://github.com/georgesittas))* ## [v26.22.0] - 2025-05-27 ### :boom: BREAKING CHANGES - due to [`f2bf000`](https://github.com/tobymao/sqlglot/commit/f2bf000a410fb18531bb90ef1d767baf0e8bce7a) - avoid creating new alias for qualifying unpivot *(PR [#5121](https://github.com/tobymao/sqlglot/pull/5121) by [@geooo109](https://github.com/geooo109))*: avoid creating new alias for qualifying unpivot (#5121) - due to [`a126ce8`](https://github.com/tobymao/sqlglot/commit/a126ce8a25287cf3531d815035fa3d567dc772fb) - make coalesce simplification optional, skip by default *(PR [#5123](https://github.com/tobymao/sqlglot/pull/5123) by [@barakalon](https://github.com/barakalon))*: make coalesce simplification optional, skip by default (#5123) ### :sparkles: New Features - [`82c50ce`](https://github.com/tobymao/sqlglot/commit/82c50ce68d9a1ad25095086ae3645f5c4996c18b) - **duckdb**: extend time travel parsing to take VERSION into account *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`f2bf000`](https://github.com/tobymao/sqlglot/commit/f2bf000a410fb18531bb90ef1d767baf0e8bce7a) - **optimizer**: avoid creating new alias for qualifying unpivot *(PR [#5121](https://github.com/tobymao/sqlglot/pull/5121) by [@geooo109](https://github.com/geooo109))* - [`a126ce8`](https://github.com/tobymao/sqlglot/commit/a126ce8a25287cf3531d815035fa3d567dc772fb) - **optimizer**: make coalesce simplification optional, skip by default *(PR [#5123](https://github.com/tobymao/sqlglot/pull/5123) by [@barakalon](https://github.com/barakalon))* ## [v26.21.0] - 2025-05-26 ### :boom: BREAKING CHANGES - due to [`de67d3c`](https://github.com/tobymao/sqlglot/commit/de67d3c953191d77ecf8cf57e375e7d203cd8857) - error on unsupported dialect settings *(PR [#5119](https://github.com/tobymao/sqlglot/pull/5119) by [@georgesittas](https://github.com/georgesittas))*: error on unsupported dialect settings (#5119) ### :sparkles: New Features - [`344f2f1`](https://github.com/tobymao/sqlglot/commit/344f2f12b6ed02d3cfd265c33fe4428741bcf6d6) - store line/col position for Anonymous functions *(PR [#5120](https://github.com/tobymao/sqlglot/pull/5120) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`de67d3c`](https://github.com/tobymao/sqlglot/commit/de67d3c953191d77ecf8cf57e375e7d203cd8857) - error on unsupported dialect settings *(PR [#5119](https://github.com/tobymao/sqlglot/pull/5119) by [@georgesittas](https://github.com/georgesittas))* ## [v26.20.0] - 2025-05-25 ### :sparkles: New Features - [`a51744f`](https://github.com/tobymao/sqlglot/commit/a51744f84945dbb99a2ab3b576eccf1543e21e17) - **optimizer**: annotate SORT_ARRAY *(PR [#5110](https://github.com/tobymao/sqlglot/pull/5110) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5107](https://github.com/tobymao/sqlglot/issues/5107) opened by [@fernandomorato](https://github.com/fernandomorato)* ### :bug: Bug Fixes - [`ef93832`](https://github.com/tobymao/sqlglot/commit/ef938328ea18dede07ea4be7425a203770b4ca7d) - **tsql**: separate ISNULL from COALESCE *(PR [#5105](https://github.com/tobymao/sqlglot/pull/5105) by [@geooo109](https://github.com/geooo109))* - [`10fe4c0`](https://github.com/tobymao/sqlglot/commit/10fe4c039a15f12c97bdf74e2e4cf547691f8546) - Return parameterized `type2` in `_maybe_coerce` *(PR [#5106](https://github.com/tobymao/sqlglot/pull/5106) by [@aninhalbuquerque](https://github.com/aninhalbuquerque))* - [`2a95777`](https://github.com/tobymao/sqlglot/commit/2a957772fb2d95442604cf19451bf8cb58be0aeb) - **snowflake**: Put COPY GRANTS in the right place for materialized views *(PR [#5109](https://github.com/tobymao/sqlglot/pull/5109) by [@erindru](https://github.com/erindru))* - [`8ba0eca`](https://github.com/tobymao/sqlglot/commit/8ba0ecaa4c3b81594a0bc0a6a88f205dc64fb9aa) - **optimizer**: avoid creating extra ARRAY for annotate SORT_ARRAY *(commit by [@geooo109](https://github.com/geooo109))* - [`a2ba1aa`](https://github.com/tobymao/sqlglot/commit/a2ba1aa14891db9edb853296501fac6995f8d802) - **optimizer**: annotate DPipe with VARCHAR *(PR [#5111](https://github.com/tobymao/sqlglot/pull/5111) by [@geooo109](https://github.com/geooo109))* - [`57db62a`](https://github.com/tobymao/sqlglot/commit/57db62ac9cf115b699076af2fb951188b54639be) - ignore/respect nulls generation edge case *(PR [#5117](https://github.com/tobymao/sqlglot/pull/5117) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`a330093`](https://github.com/tobymao/sqlglot/commit/a33009349b49f244f857976dad72bee3943b80c0) - **executor**: add type hints in table module *(PR [#5113](https://github.com/tobymao/sqlglot/pull/5113) by [@esadek](https://github.com/esadek))* ## [v26.19.0] - 2025-05-22 ### :boom: BREAKING CHANGES - due to [`886f85b`](https://github.com/tobymao/sqlglot/commit/886f85bf61d23ef968b9bcfd98ab606c8a590526) - pass dialect to ensure_schema *(PR [#5100](https://github.com/tobymao/sqlglot/pull/5100) by [@georgesittas](https://github.com/georgesittas))*: pass dialect to ensure_schema (#5100) - due to [`7570f8a`](https://github.com/tobymao/sqlglot/commit/7570f8a8e77b045b5fd97dde8b4112b901df7e15) - hive, spark2, spark, databricks type coercion for IF and COALESCE functions *(PR [#5096](https://github.com/tobymao/sqlglot/pull/5096) by [@geooo109](https://github.com/geooo109))*: hive, spark2, spark, databricks type coercion for IF and COALESCE functions (#5096) ### :sparkles: New Features - [`f5f4ca1`](https://github.com/tobymao/sqlglot/commit/f5f4ca195b57007afa80fd3d9ef69953e36536ea) - **starrocks**: Support parsing "NONE" as security option *(PR [#5099](https://github.com/tobymao/sqlglot/pull/5099) by [@alpolishchuk](https://github.com/alpolishchuk))* - [`2b928e2`](https://github.com/tobymao/sqlglot/commit/2b928e238cba63e5e043207dae1bfe2f140a1c2b) - improve pretty-printing of MERGE statement *(PR [#5102](https://github.com/tobymao/sqlglot/pull/5102) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5101](https://github.com/tobymao/sqlglot/issues/5101) opened by [@maoxingda](https://github.com/maoxingda)* - [`69ce6b4`](https://github.com/tobymao/sqlglot/commit/69ce6b4e5d597288e4001f9696713aee083617be) - **duckdb**: add support for TRY *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`886f85b`](https://github.com/tobymao/sqlglot/commit/886f85bf61d23ef968b9bcfd98ab606c8a590526) - **optimizer**: pass dialect to ensure_schema *(PR [#5100](https://github.com/tobymao/sqlglot/pull/5100) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5098](https://github.com/tobymao/sqlglot/issues/5098) opened by [@sh-rp](https://github.com/sh-rp)* - [`7570f8a`](https://github.com/tobymao/sqlglot/commit/7570f8a8e77b045b5fd97dde8b4112b901df7e15) - **optimizer**: hive, spark2, spark, databricks type coercion for IF and COALESCE functions *(PR [#5096](https://github.com/tobymao/sqlglot/pull/5096) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5067](https://github.com/tobymao/sqlglot/issues/5067) opened by [@fernandomorato](https://github.com/fernandomorato)* ### :wrench: Chores - [`cb96a0c`](https://github.com/tobymao/sqlglot/commit/cb96a0c57d94b172e6a46f8498d726cec65cfb3f) - **duckdb**: add test for UUIDV7 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.18.1] - 2025-05-20 ### :wrench: Chores - [`db2af6f`](https://github.com/tobymao/sqlglot/commit/db2af6fa1e2c2bf0f4cebb272287d0b2e8e69f76) - bump sqlglotrs to 0.4.2 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.18.0] - 2025-05-20 ### :boom: BREAKING CHANGES - due to [`1df7f61`](https://github.com/tobymao/sqlglot/commit/1df7f611bc96616cb07950a80f6669d0bc331b0e) - refactor length_sql so it handles any type, not just varchar/blob *(PR [#4935](https://github.com/tobymao/sqlglot/pull/4935) by [@tekumara](https://github.com/tekumara))*: refactor length_sql so it handles any type, not just varchar/blob (#4935) - due to [`52719f3`](https://github.com/tobymao/sqlglot/commit/52719f37f6541e8ec9f66642ac23ed9015048092) - parse CREATE STAGE *(PR [#4947](https://github.com/tobymao/sqlglot/pull/4947) by [@tekumara](https://github.com/tekumara))*: parse CREATE STAGE (#4947) - due to [`fd39b30`](https://github.com/tobymao/sqlglot/commit/fd39b30209d068b787619b8137a105aca9c3e607) - parse CREATE FILE FORMAT *(PR [#4948](https://github.com/tobymao/sqlglot/pull/4948) by [@tekumara](https://github.com/tekumara))*: parse CREATE FILE FORMAT (#4948) - due to [`f835756`](https://github.com/tobymao/sqlglot/commit/f835756257f735643584b89e93693e8577744731) - Fix CREATE EXTERNAL TABLE properties *(PR [#4951](https://github.com/tobymao/sqlglot/pull/4951) by [@VaggelisD](https://github.com/VaggelisD))*: Fix CREATE EXTERNAL TABLE properties (#4951) - due to [`44b955b`](https://github.com/tobymao/sqlglot/commit/44b955bd537bfb8f5b6e84ecbcd5f6e3da852260) - Fix generation of exp.Values *(PR [#4930](https://github.com/tobymao/sqlglot/pull/4930) by [@VaggelisD](https://github.com/VaggelisD))*: Fix generation of exp.Values (#4930) - due to [`1f506b1`](https://github.com/tobymao/sqlglot/commit/1f506b186f1b954829195eefda318e231d474208) - support SHOW (ALL) TABLES *(PR [#4961](https://github.com/tobymao/sqlglot/pull/4961) by [@mscolnick](https://github.com/mscolnick))*: support SHOW (ALL) TABLES (#4961) - due to [`72cf4a4`](https://github.com/tobymao/sqlglot/commit/72cf4a4501a8d122041a28b71be5a41ffb53602a) - Add support for PIVOT multiple IN clauses *(PR [#4964](https://github.com/tobymao/sqlglot/pull/4964) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for PIVOT multiple IN clauses (#4964) - due to [`400ea54`](https://github.com/tobymao/sqlglot/commit/400ea54d3a9cab256bfa5e496439bb9be6072d0b) - ensure JSON_FORMAT type is JSON when targeting Presto *(PR [#4968](https://github.com/tobymao/sqlglot/pull/4968) by [@georgesittas](https://github.com/georgesittas))*: ensure JSON_FORMAT type is JSON when targeting Presto (#4968) - due to [`cb20038`](https://github.com/tobymao/sqlglot/commit/cb2003875fc6e149bd4a631e99c312a04435a46b) - treat GO as command *(PR [#4978](https://github.com/tobymao/sqlglot/pull/4978) by [@georgesittas](https://github.com/georgesittas))*: treat GO as command (#4978) - due to [`60e26b8`](https://github.com/tobymao/sqlglot/commit/60e26b868242a05a7fdc2725bd21a127910a6fb7) - improve transpilability of GET_JSON_OBJECT by parsing json path *(PR [#4980](https://github.com/tobymao/sqlglot/pull/4980) by [@georgesittas](https://github.com/georgesittas))*: improve transpilability of GET_JSON_OBJECT by parsing json path (#4980) - due to [`2b7845a`](https://github.com/tobymao/sqlglot/commit/2b7845a3a821d366ae90ba9ef5e7d61194a34874) - Add support for Athena's Iceberg partitioning transforms *(PR [#4976](https://github.com/tobymao/sqlglot/pull/4976) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for Athena's Iceberg partitioning transforms (#4976) - due to [`ee794e9`](https://github.com/tobymao/sqlglot/commit/ee794e9c6a3b2fdb142114327d904b6c94a16cd0) - use the standard POWER function instead of ^ fixes [#4982](https://github.com/tobymao/sqlglot/pull/4982) *(commit by [@georgesittas](https://github.com/georgesittas))*: use the standard POWER function instead of ^ fixes #4982 - due to [`2369195`](https://github.com/tobymao/sqlglot/commit/2369195635e25dabd5ce26c13e402076508bba04) - consistently parse INTERVAL value as a string *(PR [#4986](https://github.com/tobymao/sqlglot/pull/4986) by [@georgesittas](https://github.com/georgesittas))*: consistently parse INTERVAL value as a string (#4986) - due to [`e866cff`](https://github.com/tobymao/sqlglot/commit/e866cffbaac3b62255d0d5c8be043ab2394af619) - support RELY option for PRIMARY KEY, FOREIGN KEY, and UNIQUE constraints *(PR [#4987](https://github.com/tobymao/sqlglot/pull/4987) by [@geooo109](https://github.com/geooo109))*: support RELY option for PRIMARY KEY, FOREIGN KEY, and UNIQUE constraints (#4987) - due to [`510984f`](https://github.com/tobymao/sqlglot/commit/510984f2ddc6ff13b8a8030f698aed9ad0e6f46b) - stop generating redundant TO_DATE calls *(PR [#4990](https://github.com/tobymao/sqlglot/pull/4990) by [@georgesittas](https://github.com/georgesittas))*: stop generating redundant TO_DATE calls (#4990) - due to [`da9ec61`](https://github.com/tobymao/sqlglot/commit/da9ec61e8edd5049e246390e1b638cf14d50fa2d) - Fix pretty generation of exp.Window *(PR [#4994](https://github.com/tobymao/sqlglot/pull/4994) by [@VaggelisD](https://github.com/VaggelisD))*: Fix pretty generation of exp.Window (#4994) - due to [`fb83fac`](https://github.com/tobymao/sqlglot/commit/fb83fac2d097d8d3e8e2556c072792857609bd94) - remove recursion from `simplify` *(PR [#4988](https://github.com/tobymao/sqlglot/pull/4988) by [@georgesittas](https://github.com/georgesittas))*: remove recursion from `simplify` (#4988) - due to [`890b24a`](https://github.com/tobymao/sqlglot/commit/890b24a5cec269f5595743d0a86024a23217a3f1) - remove `connector_depth` as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))*: remove `connector_depth` as it is now dead code - due to [`1dc501b`](https://github.com/tobymao/sqlglot/commit/1dc501b8ed68638375d869e11f3bf188948a4990) - remove `max_depth` argument in simplify as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))*: remove `max_depth` argument in simplify as it is now dead code - due to [`f5358d8`](https://github.com/tobymao/sqlglot/commit/f5358d8a3e2743b5ac0d540f10502d333ad4e082) - add support for GET statements *(PR [#5019](https://github.com/tobymao/sqlglot/pull/5019) by [@eruditmorina](https://github.com/eruditmorina))*: add support for GET statements (#5019) - due to [`bafa7f3`](https://github.com/tobymao/sqlglot/commit/bafa7f3a03c57e573b793ed2c83c3a549dfb789c) - parse DOW and DOY *(PR [#5037](https://github.com/tobymao/sqlglot/pull/5037) by [@geooo109](https://github.com/geooo109))*: parse DOW and DOY (#5037) - due to [`eb0a989`](https://github.com/tobymao/sqlglot/commit/eb0a989a7f3bbddb49c66ad5cd42043532568e25) - support udf environment property *(PR [#5045](https://github.com/tobymao/sqlglot/pull/5045) by [@geooo109](https://github.com/geooo109))*: support udf environment property (#5045) - due to [`807fbbc`](https://github.com/tobymao/sqlglot/commit/807fbbc5a89925fd3c98e823003a9dc929fcaff6) - transpile timestamp without time zone *(PR [#5047](https://github.com/tobymao/sqlglot/pull/5047) by [@geooo109](https://github.com/geooo109))*: transpile timestamp without time zone (#5047) - due to [`c48fc8f`](https://github.com/tobymao/sqlglot/commit/c48fc8fefc13becff92d0546cec1730f038af6b2) - support translate with error *(PR [#5052](https://github.com/tobymao/sqlglot/pull/5052) by [@geooo109](https://github.com/geooo109))*: support translate with error (#5052) - due to [`2e9704e`](https://github.com/tobymao/sqlglot/commit/2e9704ede255ef17b412c6905aad69afd70ccbf3) - Change `COLLATE` expression to `Var` for `ALTER TABLE` *(PR [#5055](https://github.com/tobymao/sqlglot/pull/5055) by [@MarcusRisanger](https://github.com/MarcusRisanger))*: Change `COLLATE` expression to `Var` for `ALTER TABLE` (#5055) - due to [`63f505e`](https://github.com/tobymao/sqlglot/commit/63f505e036928ed94df61a8b213bf84198e33d35) - unqualify UNNEST only the left most part of a column *(PR [#5069](https://github.com/tobymao/sqlglot/pull/5069) by [@geooo109](https://github.com/geooo109))*: unqualify UNNEST only the left most part of a column (#5069) - due to [`56da962`](https://github.com/tobymao/sqlglot/commit/56da9629899e72ab1e15cfc45ede838c4c38c16e) - to_timestamp without format *(PR [#5070](https://github.com/tobymao/sqlglot/pull/5070) by [@geooo109](https://github.com/geooo109))*: to_timestamp without format (#5070) - due to [`1ddfcbe`](https://github.com/tobymao/sqlglot/commit/1ddfcbe6c1d30d70533774da38d842bb3af6c205) - support CONVERT function *(PR [#5074](https://github.com/tobymao/sqlglot/pull/5074) by [@geooo109](https://github.com/geooo109))*: support CONVERT function (#5074) - due to [`ba52f01`](https://github.com/tobymao/sqlglot/commit/ba52f014f0d53ce8a179f1b140876274a01b38ac) - respect normalization strategy overrides *(PR [#5080](https://github.com/tobymao/sqlglot/pull/5080) by [@georgesittas](https://github.com/georgesittas))*: respect normalization strategy overrides (#5080) ### :sparkles: New Features - [`52719f3`](https://github.com/tobymao/sqlglot/commit/52719f37f6541e8ec9f66642ac23ed9015048092) - **snowflake**: parse CREATE STAGE *(PR [#4947](https://github.com/tobymao/sqlglot/pull/4947) by [@tekumara](https://github.com/tekumara))* - [`fd39b30`](https://github.com/tobymao/sqlglot/commit/fd39b30209d068b787619b8137a105aca9c3e607) - **snowflake**: parse CREATE FILE FORMAT *(PR [#4948](https://github.com/tobymao/sqlglot/pull/4948) by [@tekumara](https://github.com/tekumara))* - [`da9a6a1`](https://github.com/tobymao/sqlglot/commit/da9a6a1d56323319b87e9b193d12ad1c644b9239) - **snowflake**: parse SHOW STAGES *(PR [#4949](https://github.com/tobymao/sqlglot/pull/4949) by [@tekumara](https://github.com/tekumara))* - [`bfdcdf0`](https://github.com/tobymao/sqlglot/commit/bfdcdf0afc0f4af3dacdfc3e8dca243793552b74) - **snowflake**: parse SHOW FILE FORMATS *(PR [#4950](https://github.com/tobymao/sqlglot/pull/4950) by [@tekumara](https://github.com/tekumara))* - [`c591443`](https://github.com/tobymao/sqlglot/commit/c591443b6b2328780e08179144557e181db0cbb6) - **duckdb**: add support for GROUP clause in standard PIVOT syntax *(PR [#4953](https://github.com/tobymao/sqlglot/pull/4953) by [@georgesittas](https://github.com/georgesittas))* - [`b011ee2`](https://github.com/tobymao/sqlglot/commit/b011ee2df0beaac75b982261a25d3e787dead54a) - **bigquery**: Add support for side & kind on set operators *(PR [#4959](https://github.com/tobymao/sqlglot/pull/4959) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4942](https://github.com/tobymao/sqlglot/issues/4942) opened by [@z3z1ma](https://github.com/z3z1ma)* - [`1f506b1`](https://github.com/tobymao/sqlglot/commit/1f506b186f1b954829195eefda318e231d474208) - **duckdb**: support SHOW (ALL) TABLES *(PR [#4961](https://github.com/tobymao/sqlglot/pull/4961) by [@mscolnick](https://github.com/mscolnick))* - :arrow_lower_right: *addresses issue [#4956](https://github.com/tobymao/sqlglot/issues/4956) opened by [@mscolnick](https://github.com/mscolnick)* - [`ad5b595`](https://github.com/tobymao/sqlglot/commit/ad5b595049a16a27a7f249afea43dbcfcf43b5f4) - allow explicit aliasing in if(...) expressions *(PR [#4963](https://github.com/tobymao/sqlglot/pull/4963) by [@georgesittas](https://github.com/georgesittas))* - [`72cf4a4`](https://github.com/tobymao/sqlglot/commit/72cf4a4501a8d122041a28b71be5a41ffb53602a) - **duckdb**: Add support for PIVOT multiple IN clauses *(PR [#4964](https://github.com/tobymao/sqlglot/pull/4964) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4944](https://github.com/tobymao/sqlglot/issues/4944) opened by [@nph](https://github.com/nph)* - [`7bc5a21`](https://github.com/tobymao/sqlglot/commit/7bc5a217c3cc68d0cb1eaedc0c18f5188de80bf1) - **postgres**: support laterals with ordinality fixes [#4965](https://github.com/tobymao/sqlglot/pull/4965) *(PR [#4966](https://github.com/tobymao/sqlglot/pull/4966) by [@georgesittas](https://github.com/georgesittas))* - [`400ea54`](https://github.com/tobymao/sqlglot/commit/400ea54d3a9cab256bfa5e496439bb9be6072d0b) - ensure JSON_FORMAT type is JSON when targeting Presto *(PR [#4968](https://github.com/tobymao/sqlglot/pull/4968) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4967](https://github.com/tobymao/sqlglot/issues/4967) opened by [@jmsmdy](https://github.com/jmsmdy)* - [`a762993`](https://github.com/tobymao/sqlglot/commit/a762993c53d7ae91a831a8be448010e17e60f497) - **generator**: unsupported warning for T-SQL query option *(PR [#4972](https://github.com/tobymao/sqlglot/pull/4972) by [@geooo109](https://github.com/geooo109))* - [`e866cff`](https://github.com/tobymao/sqlglot/commit/e866cffbaac3b62255d0d5c8be043ab2394af619) - **parser**: support RELY option for PRIMARY KEY, FOREIGN KEY, and UNIQUE constraints *(PR [#4987](https://github.com/tobymao/sqlglot/pull/4987) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#4983](https://github.com/tobymao/sqlglot/issues/4983) opened by [@ggadon](https://github.com/ggadon)* - [`76535ce`](https://github.com/tobymao/sqlglot/commit/76535ce9487186d2eb7071fac2f224238de7a9ba) - **optimizer**: add support for Spark's TRANSFORM clause *(PR [#4993](https://github.com/tobymao/sqlglot/pull/4993) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4991](https://github.com/tobymao/sqlglot/issues/4991) opened by [@karta0807913](https://github.com/karta0807913)* - [`27a9fb2`](https://github.com/tobymao/sqlglot/commit/27a9fb26a1936512a09b8b09ed2656e22918f2c6) - **clickhouse**: Support parsing CTAS with alias *(PR [#5003](https://github.com/tobymao/sqlglot/pull/5003) by [@dorranh](https://github.com/dorranh))* - [`45cd165`](https://github.com/tobymao/sqlglot/commit/45cd165eaca96b33f1de753a147bdc352b9d56d0) - **clickhouse**: Support ClickHouse Nothing type *(PR [#5004](https://github.com/tobymao/sqlglot/pull/5004) by [@dorranh](https://github.com/dorranh))* - [`ca61a61`](https://github.com/tobymao/sqlglot/commit/ca61a617fa67082bc0fc94853dee4d70b8ca5c59) - Support exp.PartitionByProperty for parse_into() *(PR [#5006](https://github.com/tobymao/sqlglot/pull/5006) by [@erindru](https://github.com/erindru))* - [`a6d4c3c`](https://github.com/tobymao/sqlglot/commit/a6d4c3c901f828cdd96a16a0e55eac1b244f63be) - **snowflake**: Add numeric parameter support *(PR [#5008](https://github.com/tobymao/sqlglot/pull/5008) by [@hovaesco](https://github.com/hovaesco))* - [`5feae00`](https://github.com/tobymao/sqlglot/commit/5feae00ec7a4826285e7fd0be85d377cc0de09b5) - **databricks**: add support for the VOID type *(PR [#5012](https://github.com/tobymao/sqlglot/pull/5012) by [@georgesittas](https://github.com/georgesittas))* - [`6010302`](https://github.com/tobymao/sqlglot/commit/60103020879db5f23a6c4a1775848e31cce13415) - **postgres**: transpile QUARTER interval unit *(PR [#5015](https://github.com/tobymao/sqlglot/pull/5015) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5013](https://github.com/tobymao/sqlglot/issues/5013) opened by [@Wiill007](https://github.com/Wiill007)* - [`f5358d8`](https://github.com/tobymao/sqlglot/commit/f5358d8a3e2743b5ac0d540f10502d333ad4e082) - **snowflake**: add support for GET statements *(PR [#5019](https://github.com/tobymao/sqlglot/pull/5019) by [@eruditmorina](https://github.com/eruditmorina))* - [`df5ecdb`](https://github.com/tobymao/sqlglot/commit/df5ecdbebcdce491031538f6baa0f87ec7eefee8) - Include token refereces in the meta of identifier expressions *(PR [#5022](https://github.com/tobymao/sqlglot/pull/5022) by [@izeigerman](https://github.com/izeigerman))* - [`bafa7f3`](https://github.com/tobymao/sqlglot/commit/bafa7f3a03c57e573b793ed2c83c3a549dfb789c) - **presto**: parse DOW and DOY *(PR [#5037](https://github.com/tobymao/sqlglot/pull/5037) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5036](https://github.com/tobymao/sqlglot/issues/5036) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`eb0a989`](https://github.com/tobymao/sqlglot/commit/eb0a989a7f3bbddb49c66ad5cd42043532568e25) - support udf environment property *(PR [#5045](https://github.com/tobymao/sqlglot/pull/5045) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5043](https://github.com/tobymao/sqlglot/issues/5043) opened by [@aersam](https://github.com/aersam)* - [`c48fc8f`](https://github.com/tobymao/sqlglot/commit/c48fc8fefc13becff92d0546cec1730f038af6b2) - **teradata**: support translate with error *(PR [#5052](https://github.com/tobymao/sqlglot/pull/5052) by [@geooo109](https://github.com/geooo109))* - [`6791849`](https://github.com/tobymao/sqlglot/commit/679184943f7ffa79a2a466546f9bdfccd69034a3) - **executor**: support conversion from table to pylist *(PR [#5053](https://github.com/tobymao/sqlglot/pull/5053) by [@esadek](https://github.com/esadek))* - [`07bf71b`](https://github.com/tobymao/sqlglot/commit/07bf71bae5d2a5c381104a86bb52c06809c21174) - **parser**: FK REFERENCES without specifying column *(PR [#5064](https://github.com/tobymao/sqlglot/pull/5064) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5057](https://github.com/tobymao/sqlglot/issues/5057) opened by [@Steven-Wright](https://github.com/Steven-Wright)* - [`1ddfcbe`](https://github.com/tobymao/sqlglot/commit/1ddfcbe6c1d30d70533774da38d842bb3af6c205) - **oracle**: support CONVERT function *(PR [#5074](https://github.com/tobymao/sqlglot/pull/5074) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5071](https://github.com/tobymao/sqlglot/issues/5071) opened by [@tchamwam](https://github.com/tchamwam)* - [`2cca655`](https://github.com/tobymao/sqlglot/commit/2cca655430ccf4542dcb3fd0e95b776739ef91eb) - allow PIVOT to follow a JOIN *(PR [#5075](https://github.com/tobymao/sqlglot/pull/5075) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5073](https://github.com/tobymao/sqlglot/issues/5073) opened by [@tchamwam](https://github.com/tchamwam)* - [`c7a56d7`](https://github.com/tobymao/sqlglot/commit/c7a56d7616cfb99de942d527e80ccec36cfc5cc3) - **oracle**: PRIOR in SELECT *(PR [#5077](https://github.com/tobymao/sqlglot/pull/5077) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#5072](https://github.com/tobymao/sqlglot/issues/5072) opened by [@tchamwam](https://github.com/tchamwam)* - [`5c66679`](https://github.com/tobymao/sqlglot/commit/5c66679208b34b480b9a0a0c538a15ab98f872b6) - **clickhouse**: allow EXCHANGE to be parsed as Command *(commit by [@georgesittas](https://github.com/georgesittas))* - [`3bcf989`](https://github.com/tobymao/sqlglot/commit/3bcf9899bbdac54bf8923ab3aa13ec66c65f0c44) - **snowflake**: Transpile DataType.BIGDECIMAL to DOUBLE *(PR [#5092](https://github.com/tobymao/sqlglot/pull/5092) by [@VaggelisD](https://github.com/VaggelisD))* - [`b63b60e`](https://github.com/tobymao/sqlglot/commit/b63b60ebd10ca51f05e3f54532767bd98ccc34e3) - treat `CHAR[ACTER] VARYING` as `VARCHAR` for all dialects *(PR [#5093](https://github.com/tobymao/sqlglot/pull/5093) by [@ewhitley](https://github.com/ewhitley))* - [`aa26aad`](https://github.com/tobymao/sqlglot/commit/aa26aad2608cd55b8bbd1d9e268444307a7224dc) - transpile WINDOW clause *(PR [#5097](https://github.com/tobymao/sqlglot/pull/5097) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`7042603`](https://github.com/tobymao/sqlglot/commit/7042603ecb5693795b15219ec9cebf2f76032c03) - **optimizer**: Merge subqueries when inner query has name conflict with outer query *(PR [#4931](https://github.com/tobymao/sqlglot/pull/4931) by [@barakalon](https://github.com/barakalon))* - [`1df7f61`](https://github.com/tobymao/sqlglot/commit/1df7f611bc96616cb07950a80f6669d0bc331b0e) - **duckdb**: refactor length_sql so it handles any type, not just varchar/blob *(PR [#4935](https://github.com/tobymao/sqlglot/pull/4935) by [@tekumara](https://github.com/tekumara))* - :arrow_lower_right: *fixes issue [#4934](https://github.com/tobymao/sqlglot/issues/4934) opened by [@tekumara](https://github.com/tekumara)* - [`09882e3`](https://github.com/tobymao/sqlglot/commit/09882e32f057670a9cbd97c1e5cf1a00c774b5d2) - **tsql**: remove assert call from _build_formatted_time *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bf39a95`](https://github.com/tobymao/sqlglot/commit/bf39a95426ed6637e424da1be070cc9a8affc358) - **sqlite**: transpile double quoted PRIMARY KEY *(PR [#4941](https://github.com/tobymao/sqlglot/pull/4941) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4938](https://github.com/tobymao/sqlglot/issues/4938) opened by [@rgeronimi](https://github.com/rgeronimi)* - [`f835756`](https://github.com/tobymao/sqlglot/commit/f835756257f735643584b89e93693e8577744731) - **snowflake**: Fix CREATE EXTERNAL TABLE properties *(PR [#4951](https://github.com/tobymao/sqlglot/pull/4951) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4945](https://github.com/tobymao/sqlglot/issues/4945) opened by [@tekumara](https://github.com/tekumara)* - [`61ed971`](https://github.com/tobymao/sqlglot/commit/61ed971213c979c3777e57853bd6989bc169adb1) - **athena**: Correctly handle CTAS queries that contain Union's *(PR [#4955](https://github.com/tobymao/sqlglot/pull/4955) by [@erindru](https://github.com/erindru))* - [`44b955b`](https://github.com/tobymao/sqlglot/commit/44b955bd537bfb8f5b6e84ecbcd5f6e3da852260) - **clickhouse**: Fix generation of exp.Values *(PR [#4930](https://github.com/tobymao/sqlglot/pull/4930) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4056](https://github.com/TobikoData/sqlmesh/issues/4056) opened by [@dnbnero](https://github.com/dnbnero)* - [`61bc01c`](https://github.com/tobymao/sqlglot/commit/61bc01ceec2f801490f3f1a571aee655c5109962) - **clickhouse**: allow string literal for clickhouse ON CLUSTER clause *(PR [#4971](https://github.com/tobymao/sqlglot/pull/4971) by [@lepfhty](https://github.com/lepfhty))* - [`1353b79`](https://github.com/tobymao/sqlglot/commit/1353b79bd9810788a02163928b044fe038267078) - **Snowflake**: Enhance parity for FILE_FORMAT & CREDENTIALS in CREATE STAGE *(PR [#4969](https://github.com/tobymao/sqlglot/pull/4969) by [@whummer](https://github.com/whummer))* - [`9693dbd`](https://github.com/tobymao/sqlglot/commit/9693dbd18b98b2699cade738a254f71f2ee8ce74) - **clickhouse**: avoid superfluous parentheses in DISTINCT ON (...) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`cb20038`](https://github.com/tobymao/sqlglot/commit/cb2003875fc6e149bd4a631e99c312a04435a46b) - **tsql**: treat GO as command *(PR [#4978](https://github.com/tobymao/sqlglot/pull/4978) by [@georgesittas](https://github.com/georgesittas))* - [`60e26b8`](https://github.com/tobymao/sqlglot/commit/60e26b868242a05a7fdc2725bd21a127910a6fb7) - **hive**: improve transpilability of GET_JSON_OBJECT by parsing json path *(PR [#4980](https://github.com/tobymao/sqlglot/pull/4980) by [@georgesittas](https://github.com/georgesittas))* - [`2b7845a`](https://github.com/tobymao/sqlglot/commit/2b7845a3a821d366ae90ba9ef5e7d61194a34874) - Add support for Athena's Iceberg partitioning transforms *(PR [#4976](https://github.com/tobymao/sqlglot/pull/4976) by [@VaggelisD](https://github.com/VaggelisD))* - [`fa6af23`](https://github.com/tobymao/sqlglot/commit/fa6af2302f8482c5d89ead481afe4195aaa41a9c) - **optimizer**: compare the whole type to determine if a cast can be removed *(PR [#4981](https://github.com/tobymao/sqlglot/pull/4981) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4977](https://github.com/tobymao/sqlglot/issues/4977) opened by [@MeinAccount](https://github.com/MeinAccount)* - [`830c9b8`](https://github.com/tobymao/sqlglot/commit/830c9b8bbf906cf5d4fa8028b67dadda73fc58a9) - **unnest_subqueries**: avoid adding GROUP BY on aggregate projections in lateral subqueries *(PR [#4970](https://github.com/tobymao/sqlglot/pull/4970) by [@skadel](https://github.com/skadel))* - [`ee794e9`](https://github.com/tobymao/sqlglot/commit/ee794e9c6a3b2fdb142114327d904b6c94a16cd0) - **postgres**: use the standard POWER function instead of ^ fixes [#4982](https://github.com/tobymao/sqlglot/pull/4982) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`85e62b8`](https://github.com/tobymao/sqlglot/commit/85e62b88df2822797f527dce4eaa230c778cbe9e) - **bigquery**: Do not consume JOIN keywords after WITH OFFSET *(PR [#4984](https://github.com/tobymao/sqlglot/pull/4984) by [@VaggelisD](https://github.com/VaggelisD))* - [`2369195`](https://github.com/tobymao/sqlglot/commit/2369195635e25dabd5ce26c13e402076508bba04) - consistently parse INTERVAL value as a string *(PR [#4986](https://github.com/tobymao/sqlglot/pull/4986) by [@georgesittas](https://github.com/georgesittas))* - [`510984f`](https://github.com/tobymao/sqlglot/commit/510984f2ddc6ff13b8a8030f698aed9ad0e6f46b) - **hive**: stop generating redundant TO_DATE calls *(PR [#4990](https://github.com/tobymao/sqlglot/pull/4990) by [@georgesittas](https://github.com/georgesittas))* - [`da9ec61`](https://github.com/tobymao/sqlglot/commit/da9ec61e8edd5049e246390e1b638cf14d50fa2d) - **generator**: Fix pretty generation of exp.Window *(PR [#4994](https://github.com/tobymao/sqlglot/pull/4994) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4098](https://github.com/TobikoData/sqlmesh/issues/4098) opened by [@tanghyd](https://github.com/tanghyd)* - [`aae9aa8`](https://github.com/tobymao/sqlglot/commit/aae9aa8f96ccaa7686cda3cdabec208ae4c3d60a) - **optimizer**: ensure there are no shared refs after qualify_tables *(PR [#4995](https://github.com/tobymao/sqlglot/pull/4995) by [@georgesittas](https://github.com/georgesittas))* - [`adaef42`](https://github.com/tobymao/sqlglot/commit/adaef42234d8f1c9c331f53bee2c42686f29bdec) - **trino**: Dont quote identifiers in string literals for the partitioned_by property *(PR [#4998](https://github.com/tobymao/sqlglot/pull/4998) by [@erindru](https://github.com/erindru))* - [`a547f8d`](https://github.com/tobymao/sqlglot/commit/a547f8d4292f3b3a4c85f9d6466ead2ad976dfd2) - **postgres**: Capture optional minus sign in interval regex *(PR [#5000](https://github.com/tobymao/sqlglot/pull/5000) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4999](https://github.com/tobymao/sqlglot/issues/4999) opened by [@cpimhoff](https://github.com/cpimhoff)* - [`8e9dbd4`](https://github.com/tobymao/sqlglot/commit/8e9dbd491b9516c614554e05f05cc1cb976838e3) - **duckdb**: warn on unsupported IGNORE/RESPECT NULLS *(PR [#5002](https://github.com/tobymao/sqlglot/pull/5002) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5001](https://github.com/tobymao/sqlglot/issues/5001) opened by [@MarcoGorelli](https://github.com/MarcoGorelli)* - [`10b02bc`](https://github.com/tobymao/sqlglot/commit/10b02bce304042fea09e9cb2369db3c873452245) - **clickhouse**: Support optional timezone argument in date_diff() *(PR [#5005](https://github.com/tobymao/sqlglot/pull/5005) by [@dorranh](https://github.com/dorranh))* - [`c594b63`](https://github.com/tobymao/sqlglot/commit/c594b630c1c940e9a47abfce1633b435a2607f13) - Add MAX_BY & MIN_BY to FUNCTION_PARSER *(PR [#5021](https://github.com/tobymao/sqlglot/pull/5021) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5020](https://github.com/tobymao/sqlglot/issues/5020) opened by [@omerhadari](https://github.com/omerhadari)* - [`c1c892c`](https://github.com/tobymao/sqlglot/commit/c1c892cebb89ddf29369ff3c7647f96d217acb71) - **parser**: parse column ops after no-paren type casting *(PR [#5025](https://github.com/tobymao/sqlglot/pull/5025) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5024](https://github.com/tobymao/sqlglot/issues/5024) opened by [@MagdaSousa](https://github.com/MagdaSousa)* - [`52e068f`](https://github.com/tobymao/sqlglot/commit/52e068f74bd6844d0273ddcc7637d249e6ed51c1) - **databricks**: Preserve colon operators in TRY_CAST *(PR [#5028](https://github.com/tobymao/sqlglot/pull/5028) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5027](https://github.com/tobymao/sqlglot/issues/5027) opened by [@aersam](https://github.com/aersam)* - [`91e5036`](https://github.com/tobymao/sqlglot/commit/91e5036831b87fd4670424e6a49e81efead432f2) - **parser**: Do not parse set ops if input expr is None *(PR [#5030](https://github.com/tobymao/sqlglot/pull/5030) by [@VaggelisD](https://github.com/VaggelisD))* - [`8f77b30`](https://github.com/tobymao/sqlglot/commit/8f77b301a267eadb4c4792201e112159db554d1c) - **snowflake**: get function *(commit by [@tobymao](https://github.com/tobymao))* - [`281ab21`](https://github.com/tobymao/sqlglot/commit/281ab21969d3937cef55adc3032f74b00173e948) - **snowflake**: generate expression DayOfWeekIso using DAYOFWEEKISO *(PR [#5034](https://github.com/tobymao/sqlglot/pull/5034) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5032](https://github.com/tobymao/sqlglot/issues/5032) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`2fa9684`](https://github.com/tobymao/sqlglot/commit/2fa96843a29323b97229842f7cf993b72bc86677) - preserve non-participating joins in eliminate_join_marks rule fixes [#5039](https://github.com/tobymao/sqlglot/pull/5039) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d10fdf5`](https://github.com/tobymao/sqlglot/commit/d10fdf5f9388dc3848617cfbf4e6f7b1aa73be1a) - **optimizer**: prevent incorrect predicate pushdown into RHS of CROSS JOIN UNNEST *(PR [#5033](https://github.com/tobymao/sqlglot/pull/5033) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5023](https://github.com/tobymao/sqlglot/issues/5023) opened by [@schelip](https://github.com/schelip)* - [`7c55c48`](https://github.com/tobymao/sqlglot/commit/7c55c48ec2088e776fd4ec5b6c0f4989450a39c6) - prevent redundant backslash escapes in rawstring generator *(PR [#5040](https://github.com/tobymao/sqlglot/pull/5040) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5038](https://github.com/tobymao/sqlglot/issues/5038) opened by [@ihvol-freenome](https://github.com/ihvol-freenome)* - [`167c547`](https://github.com/tobymao/sqlglot/commit/167c547171fa3f2de1c2fdd64ca51bb9ccb3ee52) - **tsql**: ALTER COLUMN syntax *(PR [#5051](https://github.com/tobymao/sqlglot/pull/5051) by [@MarcusRisanger](https://github.com/MarcusRisanger))* - :arrow_lower_right: *fixes issue [#5050](https://github.com/tobymao/sqlglot/issues/5050) opened by [@MarcusRisanger](https://github.com/MarcusRisanger)* - [`807fbbc`](https://github.com/tobymao/sqlglot/commit/807fbbc5a89925fd3c98e823003a9dc929fcaff6) - **duckdb**: transpile timestamp without time zone *(PR [#5047](https://github.com/tobymao/sqlglot/pull/5047) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4859](https://github.com/tobymao/sqlglot/issues/4859) opened by [@eakmanrq](https://github.com/eakmanrq)* - [`2e9704e`](https://github.com/tobymao/sqlglot/commit/2e9704ede255ef17b412c6905aad69afd70ccbf3) - **tsql**: Change `COLLATE` expression to `Var` for `ALTER TABLE` *(PR [#5055](https://github.com/tobymao/sqlglot/pull/5055) by [@MarcusRisanger](https://github.com/MarcusRisanger))* - [`60f9420`](https://github.com/tobymao/sqlglot/commit/60f9420660d8d48bd98560a9bf8aec1f497fdeff) - **druid**: preserve MOD function fixes [#5060](https://github.com/tobymao/sqlglot/pull/5060) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7866b48`](https://github.com/tobymao/sqlglot/commit/7866b48275c830aeb51592e1888c751bcb58a361) - **druid**: support current_timestamp *(PR [#5061](https://github.com/tobymao/sqlglot/pull/5061) by [@ALongJohnson](https://github.com/ALongJohnson))* - :arrow_lower_right: *fixes issue [#5059](https://github.com/tobymao/sqlglot/issues/5059) opened by [@ALongJohnson](https://github.com/ALongJohnson)* - [`626f3a3`](https://github.com/tobymao/sqlglot/commit/626f3a3987c2a96a8fd6e329d237c0c7bc8bf264) - Support EXCLUDE in window definition *(PR [#5058](https://github.com/tobymao/sqlglot/pull/5058) by [@rafasofizada](https://github.com/rafasofizada))* - [`63f505e`](https://github.com/tobymao/sqlglot/commit/63f505e036928ed94df61a8b213bf84198e33d35) - unqualify UNNEST only the left most part of a column *(PR [#5069](https://github.com/tobymao/sqlglot/pull/5069) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5062](https://github.com/tobymao/sqlglot/issues/5062) opened by [@goldmedal](https://github.com/goldmedal)* - [`56da962`](https://github.com/tobymao/sqlglot/commit/56da9629899e72ab1e15cfc45ede838c4c38c16e) - **oracle**: to_timestamp without format *(PR [#5070](https://github.com/tobymao/sqlglot/pull/5070) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5068](https://github.com/tobymao/sqlglot/issues/5068) opened by [@kosta-foundational](https://github.com/kosta-foundational)* - [`ba52f01`](https://github.com/tobymao/sqlglot/commit/ba52f014f0d53ce8a179f1b140876274a01b38ac) - **bigquery**: respect normalization strategy overrides *(PR [#5080](https://github.com/tobymao/sqlglot/pull/5080) by [@georgesittas](https://github.com/georgesittas))* - [`03ace87`](https://github.com/tobymao/sqlglot/commit/03ace877e3f9e5d56fcbcbe260849f5d1247e5d9) - **optimizer**: keep ORDER BY when merging subqueries *(PR [#5084](https://github.com/tobymao/sqlglot/pull/5084) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#5065](https://github.com/tobymao/sqlglot/issues/5065) opened by [@udaykrishna-eng](https://github.com/udaykrishna-eng)* - [`ba7b5a8`](https://github.com/tobymao/sqlglot/commit/ba7b5a8566dc15f438dcd0c03397b2e93e9c75cb) - **bigquery**: respect normalization strategy patching *(commit by [@georgesittas](https://github.com/georgesittas))* - [`4558bb7`](https://github.com/tobymao/sqlglot/commit/4558bb7a3a00629194f969d05d4b151f9ccd6172) - **bigquery**: always infer concat type as either bytes or string *(PR [#5085](https://github.com/tobymao/sqlglot/pull/5085) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5083](https://github.com/tobymao/sqlglot/issues/5083) opened by [@tobymao](https://github.com/tobymao)* - [`612a2da`](https://github.com/tobymao/sqlglot/commit/612a2daeb0e93c5cc77b3c78c0b53905f4bee19c) - **tokenizer**: fix token col attribute when there is leading whitespace after a newline *(PR [#5094](https://github.com/tobymao/sqlglot/pull/5094) by [@chgiff](https://github.com/chgiff))* - [`9d3a929`](https://github.com/tobymao/sqlglot/commit/9d3a929ba9006ebac67ff315c55da74a724ec975) - preserve `ARRAY_JOIN` for StarRocks, Doris (fixes [#5095](https://github.com/tobymao/sqlglot/pull/5095)) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`fb83fac`](https://github.com/tobymao/sqlglot/commit/fb83fac2d097d8d3e8e2556c072792857609bd94) - **optimizer**: remove recursion from `simplify` *(PR [#4988](https://github.com/tobymao/sqlglot/pull/4988) by [@georgesittas](https://github.com/georgesittas))* - [`1b3ea34`](https://github.com/tobymao/sqlglot/commit/1b3ea344af1d71d3eee239a5c4996a0aecd091de) - **clickhouse**: override _parse_property_assignment to handle null engine *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`890b24a`](https://github.com/tobymao/sqlglot/commit/890b24a5cec269f5595743d0a86024a23217a3f1) - remove `connector_depth` as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1dc501b`](https://github.com/tobymao/sqlglot/commit/1dc501b8ed68638375d869e11f3bf188948a4990) - remove `max_depth` argument in simplify as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6572517`](https://github.com/tobymao/sqlglot/commit/6572517c1ec76f14cbd661aacc15c84bef065284) - improve tooling around benchmarks *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1d4d906`](https://github.com/tobymao/sqlglot/commit/1d4d906abc60d29b6606bc8eee50c92cef21d3fd) - use _try_parse for parsing ClickHouse's CREATE TABLE .. AS *(commit by [@georgesittas](https://github.com/georgesittas))* - [`fc58c27`](https://github.com/tobymao/sqlglot/commit/fc58c273690734263b971b138ec8f0186f524672) - Refactor placeholder parsing for TokenType.COLON *(PR [#5009](https://github.com/tobymao/sqlglot/pull/5009) by [@VaggelisD](https://github.com/VaggelisD))* - [`da90228`](https://github.com/tobymao/sqlglot/commit/da90228f1550715646106dd6f9a170d0973f138f) - put a lock around the lazy dialect module loading call *(PR [#5011](https://github.com/tobymao/sqlglot/pull/5011) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5010](https://github.com/tobymao/sqlglot/issues/5010) opened by [@NickCrews](https://github.com/NickCrews)* - [`abbcf26`](https://github.com/tobymao/sqlglot/commit/abbcf26b2101b2d806466353dcd29b79d1af5219) - bump sqlglotrs to 0.4.1 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.16.4] - 2025-05-02 ### :bug: Bug Fixes - [`52e068f`](https://github.com/tobymao/sqlglot/commit/52e068f74bd6844d0273ddcc7637d249e6ed51c1) - **databricks**: Preserve colon operators in TRY_CAST *(PR [#5028](https://github.com/tobymao/sqlglot/pull/5028) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5027](https://github.com/tobymao/sqlglot/issues/5027) opened by [@aersam](https://github.com/aersam)* - [`91e5036`](https://github.com/tobymao/sqlglot/commit/91e5036831b87fd4670424e6a49e81efead432f2) - **parser**: Do not parse set ops if input expr is None *(PR [#5030](https://github.com/tobymao/sqlglot/pull/5030) by [@VaggelisD](https://github.com/VaggelisD))* - [`8f77b30`](https://github.com/tobymao/sqlglot/commit/8f77b301a267eadb4c4792201e112159db554d1c) - **snowflake**: get function *(commit by [@tobymao](https://github.com/tobymao))* ## [v26.16.3] - 2025-05-01 ### :boom: BREAKING CHANGES - due to [`f5358d8`](https://github.com/tobymao/sqlglot/commit/f5358d8a3e2743b5ac0d540f10502d333ad4e082) - add support for GET statements *(PR [#5019](https://github.com/tobymao/sqlglot/pull/5019) by [@eruditmorina](https://github.com/eruditmorina))*: add support for GET statements (#5019) ### :sparkles: New Features - [`6010302`](https://github.com/tobymao/sqlglot/commit/60103020879db5f23a6c4a1775848e31cce13415) - **postgres**: transpile QUARTER interval unit *(PR [#5015](https://github.com/tobymao/sqlglot/pull/5015) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5013](https://github.com/tobymao/sqlglot/issues/5013) opened by [@Wiill007](https://github.com/Wiill007)* - [`f5358d8`](https://github.com/tobymao/sqlglot/commit/f5358d8a3e2743b5ac0d540f10502d333ad4e082) - **snowflake**: add support for GET statements *(PR [#5019](https://github.com/tobymao/sqlglot/pull/5019) by [@eruditmorina](https://github.com/eruditmorina))* - [`df5ecdb`](https://github.com/tobymao/sqlglot/commit/df5ecdbebcdce491031538f6baa0f87ec7eefee8) - Include token refereces in the meta of identifier expressions *(PR [#5022](https://github.com/tobymao/sqlglot/pull/5022) by [@izeigerman](https://github.com/izeigerman))* ### :bug: Bug Fixes - [`c594b63`](https://github.com/tobymao/sqlglot/commit/c594b630c1c940e9a47abfce1633b435a2607f13) - Add MAX_BY & MIN_BY to FUNCTION_PARSER *(PR [#5021](https://github.com/tobymao/sqlglot/pull/5021) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#5020](https://github.com/tobymao/sqlglot/issues/5020) opened by [@omerhadari](https://github.com/omerhadari)* - [`c1c892c`](https://github.com/tobymao/sqlglot/commit/c1c892cebb89ddf29369ff3c7647f96d217acb71) - **parser**: parse column ops after no-paren type casting *(PR [#5025](https://github.com/tobymao/sqlglot/pull/5025) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5024](https://github.com/tobymao/sqlglot/issues/5024) opened by [@MagdaSousa](https://github.com/MagdaSousa)* ## [v26.16.2] - 2025-04-24 ### :sparkles: New Features - [`5feae00`](https://github.com/tobymao/sqlglot/commit/5feae00ec7a4826285e7fd0be85d377cc0de09b5) - **databricks**: add support for the VOID type *(PR [#5012](https://github.com/tobymao/sqlglot/pull/5012) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`da90228`](https://github.com/tobymao/sqlglot/commit/da90228f1550715646106dd6f9a170d0973f138f) - put a lock around the lazy dialect module loading call *(PR [#5011](https://github.com/tobymao/sqlglot/pull/5011) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#5010](https://github.com/tobymao/sqlglot/issues/5010) opened by [@NickCrews](https://github.com/NickCrews)* ## [v26.16.1] - 2025-04-24 ### :sparkles: New Features - [`27a9fb2`](https://github.com/tobymao/sqlglot/commit/27a9fb26a1936512a09b8b09ed2656e22918f2c6) - **clickhouse**: Support parsing CTAS with alias *(PR [#5003](https://github.com/tobymao/sqlglot/pull/5003) by [@dorranh](https://github.com/dorranh))* - [`45cd165`](https://github.com/tobymao/sqlglot/commit/45cd165eaca96b33f1de753a147bdc352b9d56d0) - **clickhouse**: Support ClickHouse Nothing type *(PR [#5004](https://github.com/tobymao/sqlglot/pull/5004) by [@dorranh](https://github.com/dorranh))* - [`ca61a61`](https://github.com/tobymao/sqlglot/commit/ca61a617fa67082bc0fc94853dee4d70b8ca5c59) - Support exp.PartitionByProperty for parse_into() *(PR [#5006](https://github.com/tobymao/sqlglot/pull/5006) by [@erindru](https://github.com/erindru))* - [`a6d4c3c`](https://github.com/tobymao/sqlglot/commit/a6d4c3c901f828cdd96a16a0e55eac1b244f63be) - **snowflake**: Add numeric parameter support *(PR [#5008](https://github.com/tobymao/sqlglot/pull/5008) by [@hovaesco](https://github.com/hovaesco))* ### :bug: Bug Fixes - [`8e9dbd4`](https://github.com/tobymao/sqlglot/commit/8e9dbd491b9516c614554e05f05cc1cb976838e3) - **duckdb**: warn on unsupported IGNORE/RESPECT NULLS *(PR [#5002](https://github.com/tobymao/sqlglot/pull/5002) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#5001](https://github.com/tobymao/sqlglot/issues/5001) opened by [@MarcoGorelli](https://github.com/MarcoGorelli)* - [`10b02bc`](https://github.com/tobymao/sqlglot/commit/10b02bce304042fea09e9cb2369db3c873452245) - **clickhouse**: Support optional timezone argument in date_diff() *(PR [#5005](https://github.com/tobymao/sqlglot/pull/5005) by [@dorranh](https://github.com/dorranh))* ### :wrench: Chores - [`1d4d906`](https://github.com/tobymao/sqlglot/commit/1d4d906abc60d29b6606bc8eee50c92cef21d3fd) - use _try_parse for parsing ClickHouse's CREATE TABLE .. AS
*(commit by [@georgesittas](https://github.com/georgesittas))* - [`fc58c27`](https://github.com/tobymao/sqlglot/commit/fc58c273690734263b971b138ec8f0186f524672) - Refactor placeholder parsing for TokenType.COLON *(PR [#5009](https://github.com/tobymao/sqlglot/pull/5009) by [@VaggelisD](https://github.com/VaggelisD))* ## [v26.16.0] - 2025-04-22 ### :boom: BREAKING CHANGES - due to [`510984f`](https://github.com/tobymao/sqlglot/commit/510984f2ddc6ff13b8a8030f698aed9ad0e6f46b) - stop generating redundant TO_DATE calls *(PR [#4990](https://github.com/tobymao/sqlglot/pull/4990) by [@georgesittas](https://github.com/georgesittas))*: stop generating redundant TO_DATE calls (#4990) - due to [`da9ec61`](https://github.com/tobymao/sqlglot/commit/da9ec61e8edd5049e246390e1b638cf14d50fa2d) - Fix pretty generation of exp.Window *(PR [#4994](https://github.com/tobymao/sqlglot/pull/4994) by [@VaggelisD](https://github.com/VaggelisD))*: Fix pretty generation of exp.Window (#4994) - due to [`fb83fac`](https://github.com/tobymao/sqlglot/commit/fb83fac2d097d8d3e8e2556c072792857609bd94) - remove recursion from `simplify` *(PR [#4988](https://github.com/tobymao/sqlglot/pull/4988) by [@georgesittas](https://github.com/georgesittas))*: remove recursion from `simplify` (#4988) - due to [`890b24a`](https://github.com/tobymao/sqlglot/commit/890b24a5cec269f5595743d0a86024a23217a3f1) - remove `connector_depth` as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))*: remove `connector_depth` as it is now dead code - due to [`1dc501b`](https://github.com/tobymao/sqlglot/commit/1dc501b8ed68638375d869e11f3bf188948a4990) - remove `max_depth` argument in simplify as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))*: remove `max_depth` argument in simplify as it is now dead code ### :sparkles: New Features - [`76535ce`](https://github.com/tobymao/sqlglot/commit/76535ce9487186d2eb7071fac2f224238de7a9ba) - **optimizer**: add support for Spark's TRANSFORM clause *(PR [#4993](https://github.com/tobymao/sqlglot/pull/4993) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4991](https://github.com/tobymao/sqlglot/issues/4991) opened by [@karta0807913](https://github.com/karta0807913)* ### :bug: Bug Fixes - [`510984f`](https://github.com/tobymao/sqlglot/commit/510984f2ddc6ff13b8a8030f698aed9ad0e6f46b) - **hive**: stop generating redundant TO_DATE calls *(PR [#4990](https://github.com/tobymao/sqlglot/pull/4990) by [@georgesittas](https://github.com/georgesittas))* - [`da9ec61`](https://github.com/tobymao/sqlglot/commit/da9ec61e8edd5049e246390e1b638cf14d50fa2d) - **generator**: Fix pretty generation of exp.Window *(PR [#4994](https://github.com/tobymao/sqlglot/pull/4994) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4098](https://github.com/TobikoData/sqlmesh/issues/4098) opened by [@tanghyd](https://github.com/tanghyd)* - [`aae9aa8`](https://github.com/tobymao/sqlglot/commit/aae9aa8f96ccaa7686cda3cdabec208ae4c3d60a) - **optimizer**: ensure there are no shared refs after qualify_tables *(PR [#4995](https://github.com/tobymao/sqlglot/pull/4995) by [@georgesittas](https://github.com/georgesittas))* - [`adaef42`](https://github.com/tobymao/sqlglot/commit/adaef42234d8f1c9c331f53bee2c42686f29bdec) - **trino**: Dont quote identifiers in string literals for the partitioned_by property *(PR [#4998](https://github.com/tobymao/sqlglot/pull/4998) by [@erindru](https://github.com/erindru))* - [`a547f8d`](https://github.com/tobymao/sqlglot/commit/a547f8d4292f3b3a4c85f9d6466ead2ad976dfd2) - **postgres**: Capture optional minus sign in interval regex *(PR [#5000](https://github.com/tobymao/sqlglot/pull/5000) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4999](https://github.com/tobymao/sqlglot/issues/4999) opened by [@cpimhoff](https://github.com/cpimhoff)* ### :recycle: Refactors - [`fb83fac`](https://github.com/tobymao/sqlglot/commit/fb83fac2d097d8d3e8e2556c072792857609bd94) - **optimizer**: remove recursion from `simplify` *(PR [#4988](https://github.com/tobymao/sqlglot/pull/4988) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`890b24a`](https://github.com/tobymao/sqlglot/commit/890b24a5cec269f5595743d0a86024a23217a3f1) - remove `connector_depth` as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1dc501b`](https://github.com/tobymao/sqlglot/commit/1dc501b8ed68638375d869e11f3bf188948a4990) - remove `max_depth` argument in simplify as it is now dead code *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6572517`](https://github.com/tobymao/sqlglot/commit/6572517c1ec76f14cbd661aacc15c84bef065284) - improve tooling around benchmarks *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.15.0] - 2025-04-17 ### :boom: BREAKING CHANGES - due to [`2b7845a`](https://github.com/tobymao/sqlglot/commit/2b7845a3a821d366ae90ba9ef5e7d61194a34874) - Add support for Athena's Iceberg partitioning transforms *(PR [#4976](https://github.com/tobymao/sqlglot/pull/4976) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for Athena's Iceberg partitioning transforms (#4976) - due to [`ee794e9`](https://github.com/tobymao/sqlglot/commit/ee794e9c6a3b2fdb142114327d904b6c94a16cd0) - use the standard POWER function instead of ^ fixes [#4982](https://github.com/tobymao/sqlglot/pull/4982) *(commit by [@georgesittas](https://github.com/georgesittas))*: use the standard POWER function instead of ^ fixes #4982 - due to [`2369195`](https://github.com/tobymao/sqlglot/commit/2369195635e25dabd5ce26c13e402076508bba04) - consistently parse INTERVAL value as a string *(PR [#4986](https://github.com/tobymao/sqlglot/pull/4986) by [@georgesittas](https://github.com/georgesittas))*: consistently parse INTERVAL value as a string (#4986) - due to [`e866cff`](https://github.com/tobymao/sqlglot/commit/e866cffbaac3b62255d0d5c8be043ab2394af619) - support RELY option for PRIMARY KEY, FOREIGN KEY, and UNIQUE constraints *(PR [#4987](https://github.com/tobymao/sqlglot/pull/4987) by [@geooo109](https://github.com/geooo109))*: support RELY option for PRIMARY KEY, FOREIGN KEY, and UNIQUE constraints (#4987) ### :sparkles: New Features - [`e866cff`](https://github.com/tobymao/sqlglot/commit/e866cffbaac3b62255d0d5c8be043ab2394af619) - **parser**: support RELY option for PRIMARY KEY, FOREIGN KEY, and UNIQUE constraints *(PR [#4987](https://github.com/tobymao/sqlglot/pull/4987) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#4983](https://github.com/tobymao/sqlglot/issues/4983) opened by [@ggadon](https://github.com/ggadon)* ### :bug: Bug Fixes - [`2b7845a`](https://github.com/tobymao/sqlglot/commit/2b7845a3a821d366ae90ba9ef5e7d61194a34874) - Add support for Athena's Iceberg partitioning transforms *(PR [#4976](https://github.com/tobymao/sqlglot/pull/4976) by [@VaggelisD](https://github.com/VaggelisD))* - [`fa6af23`](https://github.com/tobymao/sqlglot/commit/fa6af2302f8482c5d89ead481afe4195aaa41a9c) - **optimizer**: compare the whole type to determine if a cast can be removed *(PR [#4981](https://github.com/tobymao/sqlglot/pull/4981) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4977](https://github.com/tobymao/sqlglot/issues/4977) opened by [@MeinAccount](https://github.com/MeinAccount)* - [`830c9b8`](https://github.com/tobymao/sqlglot/commit/830c9b8bbf906cf5d4fa8028b67dadda73fc58a9) - **unnest_subqueries**: avoid adding GROUP BY on aggregate projections in lateral subqueries *(PR [#4970](https://github.com/tobymao/sqlglot/pull/4970) by [@skadel](https://github.com/skadel))* - [`ee794e9`](https://github.com/tobymao/sqlglot/commit/ee794e9c6a3b2fdb142114327d904b6c94a16cd0) - **postgres**: use the standard POWER function instead of ^ fixes [#4982](https://github.com/tobymao/sqlglot/pull/4982) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`85e62b8`](https://github.com/tobymao/sqlglot/commit/85e62b88df2822797f527dce4eaa230c778cbe9e) - **bigquery**: Do not consume JOIN keywords after WITH OFFSET *(PR [#4984](https://github.com/tobymao/sqlglot/pull/4984) by [@VaggelisD](https://github.com/VaggelisD))* - [`2369195`](https://github.com/tobymao/sqlglot/commit/2369195635e25dabd5ce26c13e402076508bba04) - consistently parse INTERVAL value as a string *(PR [#4986](https://github.com/tobymao/sqlglot/pull/4986) by [@georgesittas](https://github.com/georgesittas))* ## [v26.14.0] - 2025-04-15 ### :boom: BREAKING CHANGES - due to [`cb20038`](https://github.com/tobymao/sqlglot/commit/cb2003875fc6e149bd4a631e99c312a04435a46b) - treat GO as command *(PR [#4978](https://github.com/tobymao/sqlglot/pull/4978) by [@georgesittas](https://github.com/georgesittas))*: treat GO as command (#4978) - due to [`60e26b8`](https://github.com/tobymao/sqlglot/commit/60e26b868242a05a7fdc2725bd21a127910a6fb7) - improve transpilability of GET_JSON_OBJECT by parsing json path *(PR [#4980](https://github.com/tobymao/sqlglot/pull/4980) by [@georgesittas](https://github.com/georgesittas))*: improve transpilability of GET_JSON_OBJECT by parsing json path (#4980) ### :bug: Bug Fixes - [`cb20038`](https://github.com/tobymao/sqlglot/commit/cb2003875fc6e149bd4a631e99c312a04435a46b) - **tsql**: treat GO as command *(PR [#4978](https://github.com/tobymao/sqlglot/pull/4978) by [@georgesittas](https://github.com/georgesittas))* - [`60e26b8`](https://github.com/tobymao/sqlglot/commit/60e26b868242a05a7fdc2725bd21a127910a6fb7) - **hive**: improve transpilability of GET_JSON_OBJECT by parsing json path *(PR [#4980](https://github.com/tobymao/sqlglot/pull/4980) by [@georgesittas](https://github.com/georgesittas))* ## [v26.13.2] - 2025-04-14 ### :bug: Bug Fixes - [`9693dbd`](https://github.com/tobymao/sqlglot/commit/9693dbd18b98b2699cade738a254f71f2ee8ce74) - **clickhouse**: avoid superfluous parentheses in DISTINCT ON (...) *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.13.1] - 2025-04-14 ### :sparkles: New Features - [`a762993`](https://github.com/tobymao/sqlglot/commit/a762993c53d7ae91a831a8be448010e17e60f497) - **generator**: unsupported warning for T-SQL query option *(PR [#4972](https://github.com/tobymao/sqlglot/pull/4972) by [@geooo109](https://github.com/geooo109))* ### :bug: Bug Fixes - [`61bc01c`](https://github.com/tobymao/sqlglot/commit/61bc01ceec2f801490f3f1a571aee655c5109962) - **clickhouse**: allow string literal for clickhouse ON CLUSTER clause *(PR [#4971](https://github.com/tobymao/sqlglot/pull/4971) by [@lepfhty](https://github.com/lepfhty))* - [`1353b79`](https://github.com/tobymao/sqlglot/commit/1353b79bd9810788a02163928b044fe038267078) - **Snowflake**: Enhance parity for FILE_FORMAT & CREDENTIALS in CREATE STAGE *(PR [#4969](https://github.com/tobymao/sqlglot/pull/4969) by [@whummer](https://github.com/whummer))* ## [v26.13.0] - 2025-04-11 ### :boom: BREAKING CHANGES - due to [`1df7f61`](https://github.com/tobymao/sqlglot/commit/1df7f611bc96616cb07950a80f6669d0bc331b0e) - refactor length_sql so it handles any type, not just varchar/blob *(PR [#4935](https://github.com/tobymao/sqlglot/pull/4935) by [@tekumara](https://github.com/tekumara))*: refactor length_sql so it handles any type, not just varchar/blob (#4935) - due to [`52719f3`](https://github.com/tobymao/sqlglot/commit/52719f37f6541e8ec9f66642ac23ed9015048092) - parse CREATE STAGE *(PR [#4947](https://github.com/tobymao/sqlglot/pull/4947) by [@tekumara](https://github.com/tekumara))*: parse CREATE STAGE (#4947) - due to [`fd39b30`](https://github.com/tobymao/sqlglot/commit/fd39b30209d068b787619b8137a105aca9c3e607) - parse CREATE FILE FORMAT *(PR [#4948](https://github.com/tobymao/sqlglot/pull/4948) by [@tekumara](https://github.com/tekumara))*: parse CREATE FILE FORMAT (#4948) - due to [`f835756`](https://github.com/tobymao/sqlglot/commit/f835756257f735643584b89e93693e8577744731) - Fix CREATE EXTERNAL TABLE properties *(PR [#4951](https://github.com/tobymao/sqlglot/pull/4951) by [@VaggelisD](https://github.com/VaggelisD))*: Fix CREATE EXTERNAL TABLE properties (#4951) - due to [`44b955b`](https://github.com/tobymao/sqlglot/commit/44b955bd537bfb8f5b6e84ecbcd5f6e3da852260) - Fix generation of exp.Values *(PR [#4930](https://github.com/tobymao/sqlglot/pull/4930) by [@VaggelisD](https://github.com/VaggelisD))*: Fix generation of exp.Values (#4930) - due to [`1f506b1`](https://github.com/tobymao/sqlglot/commit/1f506b186f1b954829195eefda318e231d474208) - support SHOW (ALL) TABLES *(PR [#4961](https://github.com/tobymao/sqlglot/pull/4961) by [@mscolnick](https://github.com/mscolnick))*: support SHOW (ALL) TABLES (#4961) - due to [`72cf4a4`](https://github.com/tobymao/sqlglot/commit/72cf4a4501a8d122041a28b71be5a41ffb53602a) - Add support for PIVOT multiple IN clauses *(PR [#4964](https://github.com/tobymao/sqlglot/pull/4964) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for PIVOT multiple IN clauses (#4964) - due to [`400ea54`](https://github.com/tobymao/sqlglot/commit/400ea54d3a9cab256bfa5e496439bb9be6072d0b) - ensure JSON_FORMAT type is JSON when targeting Presto *(PR [#4968](https://github.com/tobymao/sqlglot/pull/4968) by [@georgesittas](https://github.com/georgesittas))*: ensure JSON_FORMAT type is JSON when targeting Presto (#4968) ### :sparkles: New Features - [`52719f3`](https://github.com/tobymao/sqlglot/commit/52719f37f6541e8ec9f66642ac23ed9015048092) - **snowflake**: parse CREATE STAGE *(PR [#4947](https://github.com/tobymao/sqlglot/pull/4947) by [@tekumara](https://github.com/tekumara))* - [`fd39b30`](https://github.com/tobymao/sqlglot/commit/fd39b30209d068b787619b8137a105aca9c3e607) - **snowflake**: parse CREATE FILE FORMAT *(PR [#4948](https://github.com/tobymao/sqlglot/pull/4948) by [@tekumara](https://github.com/tekumara))* - [`da9a6a1`](https://github.com/tobymao/sqlglot/commit/da9a6a1d56323319b87e9b193d12ad1c644b9239) - **snowflake**: parse SHOW STAGES *(PR [#4949](https://github.com/tobymao/sqlglot/pull/4949) by [@tekumara](https://github.com/tekumara))* - [`bfdcdf0`](https://github.com/tobymao/sqlglot/commit/bfdcdf0afc0f4af3dacdfc3e8dca243793552b74) - **snowflake**: parse SHOW FILE FORMATS *(PR [#4950](https://github.com/tobymao/sqlglot/pull/4950) by [@tekumara](https://github.com/tekumara))* - [`c591443`](https://github.com/tobymao/sqlglot/commit/c591443b6b2328780e08179144557e181db0cbb6) - **duckdb**: add support for GROUP clause in standard PIVOT syntax *(PR [#4953](https://github.com/tobymao/sqlglot/pull/4953) by [@georgesittas](https://github.com/georgesittas))* - [`b011ee2`](https://github.com/tobymao/sqlglot/commit/b011ee2df0beaac75b982261a25d3e787dead54a) - **bigquery**: Add support for side & kind on set operators *(PR [#4959](https://github.com/tobymao/sqlglot/pull/4959) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4942](https://github.com/tobymao/sqlglot/issues/4942) opened by [@z3z1ma](https://github.com/z3z1ma)* - [`1f506b1`](https://github.com/tobymao/sqlglot/commit/1f506b186f1b954829195eefda318e231d474208) - **duckdb**: support SHOW (ALL) TABLES *(PR [#4961](https://github.com/tobymao/sqlglot/pull/4961) by [@mscolnick](https://github.com/mscolnick))* - :arrow_lower_right: *addresses issue [#4956](https://github.com/tobymao/sqlglot/issues/4956) opened by [@mscolnick](https://github.com/mscolnick)* - [`ad5b595`](https://github.com/tobymao/sqlglot/commit/ad5b595049a16a27a7f249afea43dbcfcf43b5f4) - allow explicit aliasing in if(...) expressions *(PR [#4963](https://github.com/tobymao/sqlglot/pull/4963) by [@georgesittas](https://github.com/georgesittas))* - [`72cf4a4`](https://github.com/tobymao/sqlglot/commit/72cf4a4501a8d122041a28b71be5a41ffb53602a) - **duckdb**: Add support for PIVOT multiple IN clauses *(PR [#4964](https://github.com/tobymao/sqlglot/pull/4964) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4944](https://github.com/tobymao/sqlglot/issues/4944) opened by [@nph](https://github.com/nph)* - [`7bc5a21`](https://github.com/tobymao/sqlglot/commit/7bc5a217c3cc68d0cb1eaedc0c18f5188de80bf1) - **postgres**: support laterals with ordinality fixes [#4965](https://github.com/tobymao/sqlglot/pull/4965) *(PR [#4966](https://github.com/tobymao/sqlglot/pull/4966) by [@georgesittas](https://github.com/georgesittas))* - [`400ea54`](https://github.com/tobymao/sqlglot/commit/400ea54d3a9cab256bfa5e496439bb9be6072d0b) - ensure JSON_FORMAT type is JSON when targeting Presto *(PR [#4968](https://github.com/tobymao/sqlglot/pull/4968) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4967](https://github.com/tobymao/sqlglot/issues/4967) opened by [@jmsmdy](https://github.com/jmsmdy)* ### :bug: Bug Fixes - [`7042603`](https://github.com/tobymao/sqlglot/commit/7042603ecb5693795b15219ec9cebf2f76032c03) - **optimizer**: Merge subqueries when inner query has name conflict with outer query *(PR [#4931](https://github.com/tobymao/sqlglot/pull/4931) by [@barakalon](https://github.com/barakalon))* - [`1df7f61`](https://github.com/tobymao/sqlglot/commit/1df7f611bc96616cb07950a80f6669d0bc331b0e) - **duckdb**: refactor length_sql so it handles any type, not just varchar/blob *(PR [#4935](https://github.com/tobymao/sqlglot/pull/4935) by [@tekumara](https://github.com/tekumara))* - :arrow_lower_right: *fixes issue [#4934](https://github.com/tobymao/sqlglot/issues/4934) opened by [@tekumara](https://github.com/tekumara)* - [`09882e3`](https://github.com/tobymao/sqlglot/commit/09882e32f057670a9cbd97c1e5cf1a00c774b5d2) - **tsql**: remove assert call from _build_formatted_time *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bf39a95`](https://github.com/tobymao/sqlglot/commit/bf39a95426ed6637e424da1be070cc9a8affc358) - **sqlite**: transpile double quoted PRIMARY KEY *(PR [#4941](https://github.com/tobymao/sqlglot/pull/4941) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4938](https://github.com/tobymao/sqlglot/issues/4938) opened by [@rgeronimi](https://github.com/rgeronimi)* - [`f835756`](https://github.com/tobymao/sqlglot/commit/f835756257f735643584b89e93693e8577744731) - **snowflake**: Fix CREATE EXTERNAL TABLE properties *(PR [#4951](https://github.com/tobymao/sqlglot/pull/4951) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4945](https://github.com/tobymao/sqlglot/issues/4945) opened by [@tekumara](https://github.com/tekumara)* - [`61ed971`](https://github.com/tobymao/sqlglot/commit/61ed971213c979c3777e57853bd6989bc169adb1) - **athena**: Correctly handle CTAS queries that contain Union's *(PR [#4955](https://github.com/tobymao/sqlglot/pull/4955) by [@erindru](https://github.com/erindru))* - [`44b955b`](https://github.com/tobymao/sqlglot/commit/44b955bd537bfb8f5b6e84ecbcd5f6e3da852260) - **clickhouse**: Fix generation of exp.Values *(PR [#4930](https://github.com/tobymao/sqlglot/pull/4930) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4056](https://github.com/TobikoData/sqlmesh/issues/4056) opened by [@dnbnero](https://github.com/dnbnero)* ## [v26.12.0] - 2025-03-27 ### :boom: BREAKING CHANGES - due to [`8a692b9`](https://github.com/tobymao/sqlglot/commit/8a692b9b5b7982ed54444bddfe974e5f629183ff) - support select...into #temp_table syntax *(PR [#4893](https://github.com/tobymao/sqlglot/pull/4893) by [@hhubbell](https://github.com/hhubbell))*: support select...into #temp_table syntax (#4893) - due to [`bcf311a`](https://github.com/tobymao/sqlglot/commit/bcf311a4af4b1a95e038befc0bc84627c4851e5f) - Preserve PARSE_JSON() *(PR [#4901](https://github.com/tobymao/sqlglot/pull/4901) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve PARSE_JSON() (#4901) - due to [`937b7bd`](https://github.com/tobymao/sqlglot/commit/937b7bdd5b06daffee379390796c76ffb07c2588) - handle string interval values in DATE ADD/SUB *(PR [#4902](https://github.com/tobymao/sqlglot/pull/4902) by [@georgesittas](https://github.com/georgesittas))*: handle string interval values in DATE ADD/SUB (#4902) - due to [`96749c1`](https://github.com/tobymao/sqlglot/commit/96749c144832b491f01de387cd2f7a9b769af626) - improve LATERAL VIEW EXPLODE transpilation *(PR [#4905](https://github.com/tobymao/sqlglot/pull/4905) by [@georgesittas](https://github.com/georgesittas))*: improve LATERAL VIEW EXPLODE transpilation (#4905) - due to [`71c529a`](https://github.com/tobymao/sqlglot/commit/71c529a13db1690412829ac03b82ff72d44ce6c2) - disable lateral alias expansion for Oracle fixes [#4910](https://github.com/tobymao/sqlglot/pull/4910) *(commit by [@georgesittas](https://github.com/georgesittas))*: disable lateral alias expansion for Oracle fixes #4910 - due to [`f17004e`](https://github.com/tobymao/sqlglot/commit/f17004e1691c9d834e295452a960a6e3a2830e88) - only use ARRAY[...] syntax for Schema if parent is partitioned by prop *(PR [#4913](https://github.com/tobymao/sqlglot/pull/4913) by [@georgesittas](https://github.com/georgesittas))*: only use ARRAY[...] syntax for Schema if parent is partitioned by prop (#4913) - due to [`2fbbf6a`](https://github.com/tobymao/sqlglot/commit/2fbbf6a8525385f53bcb3e588d665208ac6811c1) - infer timestamp function types as TIMESTAMPTZ for bigquery *(PR [#4914](https://github.com/tobymao/sqlglot/pull/4914) by [@georgesittas](https://github.com/georgesittas))*: infer timestamp function types as TIMESTAMPTZ for bigquery (#4914) - due to [`c0b3448`](https://github.com/tobymao/sqlglot/commit/c0b3448e7a4ec46485dd65b7498855ab57e029ef) - parse at sign as ABS function *(PR [#4915](https://github.com/tobymao/sqlglot/pull/4915) by [@geooo109](https://github.com/geooo109))*: parse at sign as ABS function (#4915) - due to [`aa9734d`](https://github.com/tobymao/sqlglot/commit/aa9734df473d1aed8e5a53a7ef8e4d3208c8296d) - improve pretty-formatting of IN (...) *(PR [#4920](https://github.com/tobymao/sqlglot/pull/4920) by [@georgesittas](https://github.com/georgesittas))*: improve pretty-formatting of IN (...) (#4920) ### :sparkles: New Features - [`1d6218e`](https://github.com/tobymao/sqlglot/commit/1d6218e386b3b1a5081272e179b8e48ec57153b4) - **snowflake**: add support for CREATE USING TEMPLATE closes [#4883](https://github.com/tobymao/sqlglot/pull/4883) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`fa9f9bd`](https://github.com/tobymao/sqlglot/commit/fa9f9bde626ddb3b8b5ad3dedc6aa3399b8c1716) - **tsql**: allow MERGE to be used in place of a subquery *(PR [#4890](https://github.com/tobymao/sqlglot/pull/4890) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4884](https://github.com/tobymao/sqlglot/issues/4884) opened by [@AndysonEjvind](https://github.com/AndysonEjvind)* - [`0de4503`](https://github.com/tobymao/sqlglot/commit/0de4503655ae9169ae02fdc8c48fb1edcd868cc8) - add a check and error message for set operations in pushdown_projections *(PR [#4897](https://github.com/tobymao/sqlglot/pull/4897) by [@snovik75](https://github.com/snovik75))* - [`96749c1`](https://github.com/tobymao/sqlglot/commit/96749c144832b491f01de387cd2f7a9b769af626) - **presto**: improve LATERAL VIEW EXPLODE transpilation *(PR [#4905](https://github.com/tobymao/sqlglot/pull/4905) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4879](https://github.com/tobymao/sqlglot/issues/4879) opened by [@Juanpeterjuanpa](https://github.com/Juanpeterjuanpa)* - [`c0b3448`](https://github.com/tobymao/sqlglot/commit/c0b3448e7a4ec46485dd65b7498855ab57e029ef) - **duckdb**: parse at sign as ABS function *(PR [#4915](https://github.com/tobymao/sqlglot/pull/4915) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#4912](https://github.com/tobymao/sqlglot/issues/4912) opened by [@suresh-summation](https://github.com/suresh-summation)* - [`aa9734d`](https://github.com/tobymao/sqlglot/commit/aa9734df473d1aed8e5a53a7ef8e4d3208c8296d) - **generator**: improve pretty-formatting of IN (...) *(PR [#4920](https://github.com/tobymao/sqlglot/pull/4920) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4015](https://github.com/TobikoData/sqlmesh/issues/4015) opened by [@petrikoro](https://github.com/petrikoro)* ### :bug: Bug Fixes - [`1617509`](https://github.com/tobymao/sqlglot/commit/1617509d44124ffaba7eaf139023df07c3ad1636) - **bigquery**: preserve time zone info in FORMAT_TIMESTAMP roundtrip *(PR [#4895](https://github.com/tobymao/sqlglot/pull/4895) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4894](https://github.com/tobymao/sqlglot/issues/4894) opened by [@lenare1K5](https://github.com/lenare1K5)* - [`8a692b9`](https://github.com/tobymao/sqlglot/commit/8a692b9b5b7982ed54444bddfe974e5f629183ff) - **tsql**: support select...into #temp_table syntax *(PR [#4893](https://github.com/tobymao/sqlglot/pull/4893) by [@hhubbell](https://github.com/hhubbell))* - [`bcf311a`](https://github.com/tobymao/sqlglot/commit/bcf311a4af4b1a95e038befc0bc84627c4851e5f) - **databricks**: Preserve PARSE_JSON() *(PR [#4901](https://github.com/tobymao/sqlglot/pull/4901) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4898](https://github.com/tobymao/sqlglot/issues/4898) opened by [@h2o1](https://github.com/h2o1)* - [`3040a5e`](https://github.com/tobymao/sqlglot/commit/3040a5e4ebc778795251a74cf3de2169337aca55) - preserve whitespace in quoted identifiers and strings *(PR [#4903](https://github.com/tobymao/sqlglot/pull/4903) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4900](https://github.com/tobymao/sqlglot/issues/4900) opened by [@npochhi](https://github.com/npochhi)* - [`937b7bd`](https://github.com/tobymao/sqlglot/commit/937b7bdd5b06daffee379390796c76ffb07c2588) - **hive**: handle string interval values in DATE ADD/SUB *(PR [#4902](https://github.com/tobymao/sqlglot/pull/4902) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4899](https://github.com/tobymao/sqlglot/issues/4899) opened by [@ricardo-rolo](https://github.com/ricardo-rolo)* - [`71c529a`](https://github.com/tobymao/sqlglot/commit/71c529a13db1690412829ac03b82ff72d44ce6c2) - **optimizer**: disable lateral alias expansion for Oracle fixes [#4910](https://github.com/tobymao/sqlglot/pull/4910) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`3b7c699`](https://github.com/tobymao/sqlglot/commit/3b7c699267bf4f041a033017a894f0c1e2ae4068) - **snowflake**: quote identifiers in stage references *(PR [#4906](https://github.com/tobymao/sqlglot/pull/4906) by [@whummer](https://github.com/whummer))* - [`f17004e`](https://github.com/tobymao/sqlglot/commit/f17004e1691c9d834e295452a960a6e3a2830e88) - **presto**: only use ARRAY[...] syntax for Schema if parent is partitioned by prop *(PR [#4913](https://github.com/tobymao/sqlglot/pull/4913) by [@georgesittas](https://github.com/georgesittas))* - [`2fbbf6a`](https://github.com/tobymao/sqlglot/commit/2fbbf6a8525385f53bcb3e588d665208ac6811c1) - **optimizer**: infer timestamp function types as TIMESTAMPTZ for bigquery *(PR [#4914](https://github.com/tobymao/sqlglot/pull/4914) by [@georgesittas](https://github.com/georgesittas))* - [`ff6be71`](https://github.com/tobymao/sqlglot/commit/ff6be715b7d44700b595bbd5c83f65c28b52e191) - **optimizer**: avoid merging window function nested under a projection *(PR [#4919](https://github.com/tobymao/sqlglot/pull/4919) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4907](https://github.com/tobymao/sqlglot/issues/4907) opened by [@Rejudge-F](https://github.com/Rejudge-F)* ### :recycle: Refactors - [`d386f37`](https://github.com/tobymao/sqlglot/commit/d386f374a6108ecce4e48324fe487c0955ab63b3) - **sqlite**: move generator methods within SQLite class *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.11.1] - 2025-03-18 ### :bug: Bug Fixes - [`d7b3b3e`](https://github.com/tobymao/sqlglot/commit/d7b3b3e89720d1783d092a2c60a9c2209d9984a2) - **optimizer**: handle TableFromRows properly in annotate_types *(PR [#4889](https://github.com/tobymao/sqlglot/pull/4889) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4004](https://github.com/TobikoData/sqlmesh/issues/4004) opened by [@hmeng-taproot](https://github.com/hmeng-taproot)* ## [v26.11.0] - 2025-03-17 ### :boom: BREAKING CHANGES - due to [`ac3d311`](https://github.com/tobymao/sqlglot/commit/ac3d311c4184ca2ced603a100588e3e7435ce352) - do not expand having expressions if they conflict with a projection *(PR [#4881](https://github.com/tobymao/sqlglot/pull/4881) by [@tobymao](https://github.com/tobymao))*: do not expand having expressions if they conflict with a projection (#4881) - due to [`081994e`](https://github.com/tobymao/sqlglot/commit/081994ea85c7aa1cbbbc40a24857dba4fd6c1c61) - Fix parsing multi-part format name *(PR [#4885](https://github.com/tobymao/sqlglot/pull/4885) by [@VaggelisD](https://github.com/VaggelisD))*: Fix parsing multi-part format name (#4885) - due to [`491c407`](https://github.com/tobymao/sqlglot/commit/491c407d48a24b6d4093e9c9bfdc3d8c27c29e4c) - parse parameter key as Var instead of Identifier *(PR [#4888](https://github.com/tobymao/sqlglot/pull/4888) by [@georgesittas](https://github.com/georgesittas))*: parse parameter key as Var instead of Identifier (#4888) ### :bug: Bug Fixes - [`ac3d311`](https://github.com/tobymao/sqlglot/commit/ac3d311c4184ca2ced603a100588e3e7435ce352) - do not expand having expressions if they conflict with a projection *(PR [#4881](https://github.com/tobymao/sqlglot/pull/4881) by [@tobymao](https://github.com/tobymao))* - [`44b7b09`](https://github.com/tobymao/sqlglot/commit/44b7b09deca881e274ad03068eee5d4d594c8ca8) - **parser**: Fix separator generation for STRING_AGG *(PR [#4887](https://github.com/tobymao/sqlglot/pull/4887) by [@VaggelisD](https://github.com/VaggelisD))* - [`081994e`](https://github.com/tobymao/sqlglot/commit/081994ea85c7aa1cbbbc40a24857dba4fd6c1c61) - **snowflake**: Fix parsing multi-part format name *(PR [#4885](https://github.com/tobymao/sqlglot/pull/4885) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4882](https://github.com/tobymao/sqlglot/issues/4882) opened by [@kharigardner](https://github.com/kharigardner)* - [`38111a5`](https://github.com/tobymao/sqlglot/commit/38111a5eaa6bde640e25aa408ff7ea9ea6864c0b) - apply unpivot alias string conversion only for UNPIVOT *(PR [#4886](https://github.com/tobymao/sqlglot/pull/4886) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4003](https://github.com/TobikoData/sqlmesh/issues/4003) opened by [@lucargir](https://github.com/lucargir)* - [`491c407`](https://github.com/tobymao/sqlglot/commit/491c407d48a24b6d4093e9c9bfdc3d8c27c29e4c) - **clickhouse**: parse parameter key as Var instead of Identifier *(PR [#4888](https://github.com/tobymao/sqlglot/pull/4888) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4002](https://github.com/TobikoData/sqlmesh/issues/4002) opened by [@petrjanda](https://github.com/petrjanda)* ## [v26.10.1] - 2025-03-13 ### :bug: Bug Fixes - [`2b3824f`](https://github.com/tobymao/sqlglot/commit/2b3824f0bac5dae48ea7eecbe2168afe79038d06) - **duckdb**: revert timestamp/datetime -> timestampntz parsing temporarily *(PR [#4878](https://github.com/tobymao/sqlglot/pull/4878) by [@georgesittas](https://github.com/georgesittas))* ## [v26.10.0] - 2025-03-13 ### :boom: BREAKING CHANGES - due to [`c0bfcc6`](https://github.com/tobymao/sqlglot/commit/c0bfcc66b97ce667a1ead608c4fbbee69db633fa) - postgres case insesitive formats closes [#4860](https://github.com/tobymao/sqlglot/pull/4860) *(commit by [@tobymao](https://github.com/tobymao))*: postgres case insesitive formats closes #4860 - due to [`6914684`](https://github.com/tobymao/sqlglot/commit/69146842d005ae0edecbd7f6f842f648ae0622e7) - duckdb defaults timestampntz closes [#4859](https://github.com/tobymao/sqlglot/pull/4859) *(commit by [@tobymao](https://github.com/tobymao))*: duckdb defaults timestampntz closes #4859 - due to [`ceb1f02`](https://github.com/tobymao/sqlglot/commit/ceb1f026dd04926a6a210de9d16da4dffef4717c) - support TO_CHAR to duckdb STRFTIME *(PR [#4866](https://github.com/tobymao/sqlglot/pull/4866) by [@geooo109](https://github.com/geooo109))*: support TO_CHAR to duckdb STRFTIME (#4866) - due to [`d748e53`](https://github.com/tobymao/sqlglot/commit/d748e53f6a77196bef6550b6d9fddf41076c01fa) - Introduce pyproject.toml and switch to packaging via build *(PR [#4865](https://github.com/tobymao/sqlglot/pull/4865) by [@erindru](https://github.com/erindru))*: Introduce pyproject.toml and switch to packaging via build (#4865) - due to [`038da09`](https://github.com/tobymao/sqlglot/commit/038da09f620cf057e4576b719c4e2f6712cbb804) - treat TABLE(...) as a UDTF *(PR [#4875](https://github.com/tobymao/sqlglot/pull/4875) by [@georgesittas](https://github.com/georgesittas))*: treat TABLE(...) as a UDTF (#4875) - due to [`92e479e`](https://github.com/tobymao/sqlglot/commit/92e479ea7d70efc4bdccd17cb12b719aec603830) - support STRUCT(*) and MAP(*) *(PR [#4876](https://github.com/tobymao/sqlglot/pull/4876) by [@geooo109](https://github.com/geooo109))*: support STRUCT(*) and MAP(*) (#4876) - due to [`87c94fe`](https://github.com/tobymao/sqlglot/commit/87c94fe91aa2a4bc2c255191d92aed450f3c7998) - turn off multi-arg coalesce simplification *(PR [#4877](https://github.com/tobymao/sqlglot/pull/4877) by [@georgesittas](https://github.com/georgesittas))*: turn off multi-arg coalesce simplification (#4877) ### :sparkles: New Features - [`54be278`](https://github.com/tobymao/sqlglot/commit/54be278361496367fb2f7d380634d3390879e58d) - **snowflake**: add support for HEX_DECODE_BINARY *(PR [#4855](https://github.com/tobymao/sqlglot/pull/4855) by [@sk-](https://github.com/sk-))* - :arrow_lower_right: *addresses issue [#4852](https://github.com/tobymao/sqlglot/issues/4852) opened by [@sk-](https://github.com/sk-)* - [`47959a9`](https://github.com/tobymao/sqlglot/commit/47959a94a4693cb904cfb2e50ce8cc8ca5c2e22f) - **duckdb**: add support for prefix aliases *(PR [#4869](https://github.com/tobymao/sqlglot/pull/4869) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`e942391`](https://github.com/tobymao/sqlglot/commit/e942391edcefb40f927887450765b4365b0e980d) - spark zone offset closes [#4858](https://github.com/tobymao/sqlglot/pull/4858) *(commit by [@tobymao](https://github.com/tobymao))* - [`c0bfcc6`](https://github.com/tobymao/sqlglot/commit/c0bfcc66b97ce667a1ead608c4fbbee69db633fa) - postgres case insesitive formats closes [#4860](https://github.com/tobymao/sqlglot/pull/4860) *(commit by [@tobymao](https://github.com/tobymao))* - [`6914684`](https://github.com/tobymao/sqlglot/commit/69146842d005ae0edecbd7f6f842f648ae0622e7) - duckdb defaults timestampntz closes [#4859](https://github.com/tobymao/sqlglot/pull/4859) *(commit by [@tobymao](https://github.com/tobymao))* - [`ceb1f02`](https://github.com/tobymao/sqlglot/commit/ceb1f026dd04926a6a210de9d16da4dffef4717c) - **snowflake**: support TO_CHAR to duckdb STRFTIME *(PR [#4866](https://github.com/tobymao/sqlglot/pull/4866) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4857](https://github.com/tobymao/sqlglot/issues/4857) opened by [@asarama](https://github.com/asarama)* - [`80466f1`](https://github.com/tobymao/sqlglot/commit/80466f16aa081860bc9e65f425924a0620840cdf) - expand util - align normalization behaviour with lazy and non-lazy source providers. *(PR [#4874](https://github.com/tobymao/sqlglot/pull/4874) by [@omerhadari](https://github.com/omerhadari))* - [`038da09`](https://github.com/tobymao/sqlglot/commit/038da09f620cf057e4576b719c4e2f6712cbb804) - **snowflake**: treat TABLE(...) as a UDTF *(PR [#4875](https://github.com/tobymao/sqlglot/pull/4875) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4861](https://github.com/tobymao/sqlglot/issues/4861) opened by [@mattijsdp](https://github.com/mattijsdp)* - [`92e479e`](https://github.com/tobymao/sqlglot/commit/92e479ea7d70efc4bdccd17cb12b719aec603830) - **hive**: support STRUCT(*) and MAP(*) *(PR [#4876](https://github.com/tobymao/sqlglot/pull/4876) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4871](https://github.com/tobymao/sqlglot/issues/4871) opened by [@btyuhas](https://github.com/btyuhas)* - [`87c94fe`](https://github.com/tobymao/sqlglot/commit/87c94fe91aa2a4bc2c255191d92aed450f3c7998) - **redshift**: turn off multi-arg coalesce simplification *(PR [#4877](https://github.com/tobymao/sqlglot/pull/4877) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`d748e53`](https://github.com/tobymao/sqlglot/commit/d748e53f6a77196bef6550b6d9fddf41076c01fa) - Introduce pyproject.toml and switch to packaging via build *(PR [#4865](https://github.com/tobymao/sqlglot/pull/4865) by [@erindru](https://github.com/erindru))* ## [v26.9.0] - 2025-03-07 ### :boom: BREAKING CHANGES - due to [`6a3973b`](https://github.com/tobymao/sqlglot/commit/6a3973b7da639a19634bc352ea76f75735114c38) - Refactor exp.GroupConcat generation *(PR [#4823](https://github.com/tobymao/sqlglot/pull/4823) by [@VaggelisD](https://github.com/VaggelisD))*: Refactor exp.GroupConcat generation (#4823) - due to [`813d2ad`](https://github.com/tobymao/sqlglot/commit/813d2ada7afd653b2aaff75cbddd7f011750f861) - use _parse_table_parts for udf parsing *(PR [#4829](https://github.com/tobymao/sqlglot/pull/4829) by [@geooo109](https://github.com/geooo109))*: use _parse_table_parts for udf parsing (#4829) - due to [`7cdbad6`](https://github.com/tobymao/sqlglot/commit/7cdbad688cad7e7ce40df99802e93deb6a4d7abf) - add initial support for PUT statements *(PR [#4818](https://github.com/tobymao/sqlglot/pull/4818) by [@whummer](https://github.com/whummer))*: add initial support for PUT statements (#4818) - due to [`8c0a6be`](https://github.com/tobymao/sqlglot/commit/8c0a6bec6e38f3f6ce9a90b6a9b6457de70c7228) - BLOB transpilation *(PR [#4844](https://github.com/tobymao/sqlglot/pull/4844) by [@geooo109](https://github.com/geooo109))*: BLOB transpilation (#4844) ### :sparkles: New Features - [`7e8975e`](https://github.com/tobymao/sqlglot/commit/7e8975efce0af350142f8fb437cf46dd46f2b8d9) - **oracle**: add FORCE property *(PR [#4828](https://github.com/tobymao/sqlglot/pull/4828) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#4826](https://github.com/tobymao/sqlglot/issues/4826) opened by [@Duchyna1](https://github.com/Duchyna1)* - [`7cdbad6`](https://github.com/tobymao/sqlglot/commit/7cdbad688cad7e7ce40df99802e93deb6a4d7abf) - **snowflake**: add initial support for PUT statements *(PR [#4818](https://github.com/tobymao/sqlglot/pull/4818) by [@whummer](https://github.com/whummer))* - :arrow_lower_right: *addresses issue [#4813](https://github.com/tobymao/sqlglot/issues/4813) opened by [@whummer](https://github.com/whummer)* - [`f4d1a1f`](https://github.com/tobymao/sqlglot/commit/f4d1a1f4d8104b2efd56f568ca99c7e768466d19) - **hive**: add support for STORED BY syntax for storage handlers *(PR [#4832](https://github.com/tobymao/sqlglot/pull/4832) by [@tsamaras](https://github.com/tsamaras))* - [`b7a0df1`](https://github.com/tobymao/sqlglot/commit/b7a0df1b9a9cff2cd57db77ac0095c189b9d67ab) - **parser**: Support trailing commas after from *(PR [#4854](https://github.com/tobymao/sqlglot/pull/4854) by [@omerhadari](https://github.com/omerhadari))* ### :bug: Bug Fixes - [`6a3973b`](https://github.com/tobymao/sqlglot/commit/6a3973b7da639a19634bc352ea76f75735114c38) - **duckdb, snowflake**: Refactor exp.GroupConcat generation *(PR [#4823](https://github.com/tobymao/sqlglot/pull/4823) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4821](https://github.com/tobymao/sqlglot/issues/4821) opened by [@asarama](https://github.com/asarama)* - [`08eb7f2`](https://github.com/tobymao/sqlglot/commit/08eb7f2032957c2fe3119963f344538b90d8f631) - **snowflake**: clean up PUT implementation *(PR [#4830](https://github.com/tobymao/sqlglot/pull/4830) by [@georgesittas](https://github.com/georgesittas))* - [`adf2fef`](https://github.com/tobymao/sqlglot/commit/adf2fef27dc341508c3b9c710da0f835277094a1) - **mysql**: Support for USING BTREE/HASH in PK *(PR [#4837](https://github.com/tobymao/sqlglot/pull/4837) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4833](https://github.com/tobymao/sqlglot/issues/4833) opened by [@Gohoy](https://github.com/Gohoy)* - [`8c0a6be`](https://github.com/tobymao/sqlglot/commit/8c0a6bec6e38f3f6ce9a90b6a9b6457de70c7228) - **mysql**: BLOB transpilation *(PR [#4844](https://github.com/tobymao/sqlglot/pull/4844) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4839](https://github.com/tobymao/sqlglot/issues/4839) opened by [@Gohoy](https://github.com/Gohoy)* - [`0cb7a71`](https://github.com/tobymao/sqlglot/commit/0cb7a719de33ab1f6cfedf0833df7c79324b21f9) - **postgres**: Fix arrow extraction for string keys representing numbers *(PR [#4842](https://github.com/tobymao/sqlglot/pull/4842) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4840](https://github.com/tobymao/sqlglot/issues/4840) opened by [@superkashyap](https://github.com/superkashyap)* - [`2e223cb`](https://github.com/tobymao/sqlglot/commit/2e223cb3e0bc946b8aa97e115e4c0dc02e58d1c9) - **parser**: properly parse qualified columns when parsing "columns ops" *(PR [#4847](https://github.com/tobymao/sqlglot/pull/4847) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4845](https://github.com/tobymao/sqlglot/issues/4845) opened by [@maudlel](https://github.com/maudlel)* ### :recycle: Refactors - [`813d2ad`](https://github.com/tobymao/sqlglot/commit/813d2ada7afd653b2aaff75cbddd7f011750f861) - use _parse_table_parts for udf parsing *(PR [#4829](https://github.com/tobymao/sqlglot/pull/4829) by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`e4fd354`](https://github.com/tobymao/sqlglot/commit/e4fd354c8fb55752cb883eb3912950c17020a1df) - Simplify Hive's STORED BY property *(PR [#4838](https://github.com/tobymao/sqlglot/pull/4838) by [@VaggelisD](https://github.com/VaggelisD))* - [`8115b58`](https://github.com/tobymao/sqlglot/commit/8115b5853e621423eb2697b7253b17ef709dbdf0) - (duckdb): treat auto-increment DDL property as unsupported *(PR [#4849](https://github.com/tobymao/sqlglot/pull/4849) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4841](https://github.com/tobymao/sqlglot/issues/4841) opened by [@sk-](https://github.com/sk-)* - [`b05dddb`](https://github.com/tobymao/sqlglot/commit/b05dddbe5a7d45dfebefc3e04cb95d8c4d9802e9) - fix pdoc deployment *(PR [#4856](https://github.com/tobymao/sqlglot/pull/4856) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4853](https://github.com/tobymao/sqlglot/issues/4853) opened by [@tsamaras](https://github.com/tsamaras)* ## [v26.8.0] - 2025-03-03 ### :boom: BREAKING CHANGES - due to [`596b66f`](https://github.com/tobymao/sqlglot/commit/596b66fc289140109db8f689c6e84264d643a47a) - add support for and/2 and or/2 functions *(PR [#4806](https://github.com/tobymao/sqlglot/pull/4806) by [@georgesittas](https://github.com/georgesittas))*: add support for and/2 and or/2 functions (#4806) - due to [`eae860c`](https://github.com/tobymao/sqlglot/commit/eae860ce5b59b9e0b791fe79686899efb83df1dd) - expand DISTINCT ON expressions like we do for GROUP/ORDER by *(PR [#4807](https://github.com/tobymao/sqlglot/pull/4807) by [@georgesittas](https://github.com/georgesittas))*: expand DISTINCT ON expressions like we do for GROUP/ORDER by (#4807) - due to [`83e6a87`](https://github.com/tobymao/sqlglot/commit/83e6a87f8d233eac6d3bcd3a49451a14dc10e06e) - Parse SHA256 *(PR [#4816](https://github.com/tobymao/sqlglot/pull/4816) by [@VaggelisD](https://github.com/VaggelisD))*: Parse SHA256 (#4816) ### :sparkles: New Features - [`50539ce`](https://github.com/tobymao/sqlglot/commit/50539ced46de3949f6a70acdab86129fb50c9385) - **trino**: add support for ON ... ERROR/NULL syntax for JSON_QUERY *(PR [#4805](https://github.com/tobymao/sqlglot/pull/4805) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3905](https://github.com/TobikoData/sqlmesh/issues/3905) opened by [@darkcofy](https://github.com/darkcofy)* - [`596b66f`](https://github.com/tobymao/sqlglot/commit/596b66fc289140109db8f689c6e84264d643a47a) - **clickhouse**: add support for and/2 and or/2 functions *(PR [#4806](https://github.com/tobymao/sqlglot/pull/4806) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4803](https://github.com/tobymao/sqlglot/issues/4803) opened by [@xtess16](https://github.com/xtess16)* - [`c5bf122`](https://github.com/tobymao/sqlglot/commit/c5bf122a6aa7ca315ad726e6ea3d4a98eebd68d0) - **mysql**: support setting visibility on `ALTER COLUMN`. *(PR [#4809](https://github.com/tobymao/sqlglot/pull/4809) by [@burnison](https://github.com/burnison))* ### :bug: Bug Fixes - [`6441d00`](https://github.com/tobymao/sqlglot/commit/6441d0041ccec7f1c28763f5775b6195d2049dc6) - orphan node(s) in eliminate_join_marks *(PR [#4808](https://github.com/tobymao/sqlglot/pull/4808) by [@snovik75](https://github.com/snovik75))* - [`eae860c`](https://github.com/tobymao/sqlglot/commit/eae860ce5b59b9e0b791fe79686899efb83df1dd) - **optimizer**: expand DISTINCT ON expressions like we do for GROUP/ORDER by *(PR [#4807](https://github.com/tobymao/sqlglot/pull/4807) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4791](https://github.com/tobymao/sqlglot/issues/4791) opened by [@Fosly](https://github.com/Fosly)* - [`5ef35f2`](https://github.com/tobymao/sqlglot/commit/5ef35f2dc622d96b013a2651c71e1a32933f51cb) - **clickhouse**: unparseable `AggregateFunction(count)` *(PR [#4812](https://github.com/tobymao/sqlglot/pull/4812) by [@pkit](https://github.com/pkit))* - [`83e6a87`](https://github.com/tobymao/sqlglot/commit/83e6a87f8d233eac6d3bcd3a49451a14dc10e06e) - **duckdb**: Parse SHA256 *(PR [#4816](https://github.com/tobymao/sqlglot/pull/4816) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4815](https://github.com/tobymao/sqlglot/issues/4815) opened by [@muuuuwa](https://github.com/muuuuwa)* - [`faf6d41`](https://github.com/tobymao/sqlglot/commit/faf6d416afe30bf0bc24649fcceccf79fbfb8ca1) - allow duplicate nodes in matchings *(PR [#4817](https://github.com/tobymao/sqlglot/pull/4817) by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`e34f3bc`](https://github.com/tobymao/sqlglot/commit/e34f3bc99f832ce2affb3a0297329f3d1cd7244e) - **optimizer**: refactor DISTINCT ON qualification to better match ORDER BY *(PR [#4811](https://github.com/tobymao/sqlglot/pull/4811) by [@georgesittas](https://github.com/georgesittas))* ## [v26.7.0] - 2025-02-26 ### :boom: BREAKING CHANGES - due to [`466c839`](https://github.com/tobymao/sqlglot/commit/466c839c2cfc94b398dd619b738df165f2876cdb) - Remove extra MAP bracket and ARRAY wrap *(PR [#4712](https://github.com/tobymao/sqlglot/pull/4712) by [@VaggelisD](https://github.com/VaggelisD))*: Remove extra MAP bracket and ARRAY wrap (#4712) - due to [`79ab311`](https://github.com/tobymao/sqlglot/commit/79ab3116758c240786ab4353a26f1646e242a61b) - add generate_series table column alias *(PR [#4741](https://github.com/tobymao/sqlglot/pull/4741) by [@georgesittas](https://github.com/georgesittas))*: add generate_series table column alias (#4741) - due to [`66b3ea9`](https://github.com/tobymao/sqlglot/commit/66b3ea905af34cfedc961c68ba738ba90d16221d) - respect type nullability when casting in strtodate_sql *(PR [#4744](https://github.com/tobymao/sqlglot/pull/4744) by [@sleshJdev](https://github.com/sleshJdev))*: respect type nullability when casting in strtodate_sql (#4744) - due to [`91f47fe`](https://github.com/tobymao/sqlglot/commit/91f47fec2a8c727f7e4c93fa54b6f06a36a6b42f) - Support for exp.HexString in DuckDB/Presto/Trino *(PR [#4743](https://github.com/tobymao/sqlglot/pull/4743) by [@VaggelisD](https://github.com/VaggelisD))*: Support for exp.HexString in DuckDB/Presto/Trino (#4743) - due to [`0596176`](https://github.com/tobymao/sqlglot/commit/0596176dd59737f945624d6453259072917e2fee) - generate CASE expression instead of COUNT_IF for ON CONVERSION FAILURE) *(PR [#4683](https://github.com/tobymao/sqlglot/pull/4683) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4682](https://github.com/tobymao/sqlglot/issues/4682) opened by [@jaredschwartz-ofs](https://github.com/jaredschwartz-ofs)* - [`1ad656e`](https://github.com/tobymao/sqlglot/commit/1ad656e4572bf7b3d38805e92f7202f4dcc4f9f8) - enable parsing of (u)int128,256 types for all dialects *(PR [#4685](https://github.com/tobymao/sqlglot/pull/4685) by [@georgesittas](https://github.com/georgesittas))* - [`6f5fb04`](https://github.com/tobymao/sqlglot/commit/6f5fb0423e970920fa5abda3f7e4356e2fb441e1) - implement Dune dialect *(PR [#4686](https://github.com/tobymao/sqlglot/pull/4686) by [@georgesittas](https://github.com/georgesittas))* - [`9ea15c7`](https://github.com/tobymao/sqlglot/commit/9ea15c732d76e0d6a393e553a42e6b9ed30ef286) - **bigquery**: add EXPORT DATA statement support *(PR [#4688](https://github.com/tobymao/sqlglot/pull/4688) by [@ArnoldHueteG](https://github.com/ArnoldHueteG))* ### :bug: Bug Fixes - [`cd53f7e`](https://github.com/tobymao/sqlglot/commit/cd53f7ec03e99129b430c435d23907ef7d0e0c34) - **clickhouse**: Generate bracket notation for exp.VarMap *(PR [#4664](https://github.com/tobymao/sqlglot/pull/4664) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4662](https://github.com/tobymao/sqlglot/issues/4662) opened by [@martijnthe](https://github.com/martijnthe)* - [`0920f77`](https://github.com/tobymao/sqlglot/commit/0920f778b2d94d94f3c8cccf280a87a6a14b12f7) - use utf-8 encoding in open calls, fixes [#4676](https://github.com/tobymao/sqlglot/pull/4676) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e71c4c0`](https://github.com/tobymao/sqlglot/commit/e71c4c0b60811f26828d7719fe941dfbc3693be1) - **trino**: Add more JSON_QUERY options *(PR [#4673](https://github.com/tobymao/sqlglot/pull/4673) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4672](https://github.com/tobymao/sqlglot/issues/4672) opened by [@JustGui](https://github.com/JustGui)* - [`1a91913`](https://github.com/tobymao/sqlglot/commit/1a91913eea97e2008a0fe4282d60d7c693a79fc3) - **postgres**: Parse empty bracketed ARRAY with cast *(PR [#4679](https://github.com/tobymao/sqlglot/pull/4679) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4674](https://github.com/tobymao/sqlglot/issues/4674) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`c45f174`](https://github.com/tobymao/sqlglot/commit/c45f17455477790f53ef7e347a7e85cfdb82c4ab) - **bigquery**: Inline type-annotated ARRAY literals *(PR [#4671](https://github.com/tobymao/sqlglot/pull/4671) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4670](https://github.com/tobymao/sqlglot/issues/4670) opened by [@sean-rose](https://github.com/sean-rose)* - [`df75edd`](https://github.com/tobymao/sqlglot/commit/df75eddf698af9fe36e7121a63cc2b9fdd468363) - **duckdb**: support postgres JSON/JSONB_OBJECT_AGG to duckdb JSON_GROUP_OBJECT *(PR [#4677](https://github.com/tobymao/sqlglot/pull/4677) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4667](https://github.com/tobymao/sqlglot/issues/4667) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`69680c1`](https://github.com/tobymao/sqlglot/commit/69680c146f67175ab6e4c4d9898b0991033a4188) - **tsql**: Transpile exp.Fetch limits *(PR [#4680](https://github.com/tobymao/sqlglot/pull/4680) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4665](https://github.com/tobymao/sqlglot/issues/4665) opened by [@WillAyd](https://github.com/WillAyd)* - [`b3b0962`](https://github.com/tobymao/sqlglot/commit/b3b09624cdefb1baa46ddbb888b24648f330a963) - **hive**: Simplify DATE_FORMAT roundtrip *(PR [#4689](https://github.com/tobymao/sqlglot/pull/4689) by [@VaggelisD](https://github.com/VaggelisD))* - [`ade8b82`](https://github.com/tobymao/sqlglot/commit/ade8b826541ecfb00e218d16d995d34adab0335a) - load dialects lazily *(PR [#4687](https://github.com/tobymao/sqlglot/pull/4687) by [@georgesittas](https://github.com/georgesittas))* - [`47c0236`](https://github.com/tobymao/sqlglot/commit/47c023650dad8b0091248c608a211018b841042a) - **bigquery**: Refactor EXPORT DATA statement *(PR [#4693](https://github.com/tobymao/sqlglot/pull/4693) by [@VaggelisD](https://github.com/VaggelisD))* - [`1904b76`](https://github.com/tobymao/sqlglot/commit/1904b7605a7308608ac64e5cfb3c8424d3e55c17) - **tsql**: remove BEGIN from identifiers *(PR [#4695](https://github.com/tobymao/sqlglot/pull/4695) by [@geooo109](https://github.com/geooo109))* - [`a688b6c`](https://github.com/tobymao/sqlglot/commit/a688b6cff01b9cd828c0467b0aa09fba728d751a) - **snowflake**: support correct AUTO INCREMENT transpilation *(PR [#4696](https://github.com/tobymao/sqlglot/pull/4696) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4694](https://github.com/tobymao/sqlglot/issues/4694) opened by [@sfc-gh-tdwojak](https://github.com/sfc-gh-tdwojak)* ## [v26.3.9] - 2025-01-27 ### :bug: Bug Fixes - [`b091f2f`](https://github.com/tobymao/sqlglot/commit/b091f2f4e4779fb9a4187d1665ca40e1648d9ccb) - **trino**: Correctly render exp.LocationProperty in CREATE TABLE / CREATE SCHEMA *(PR [#4659](https://github.com/tobymao/sqlglot/pull/4659) by [@erindru](https://github.com/erindru))* - [`c4de945`](https://github.com/tobymao/sqlglot/commit/c4de94538cd69540f772b9b13e968ee16ffbbe67) - **Trino**: Prevent first_value and last_value from being converted *(PR [#4661](https://github.com/tobymao/sqlglot/pull/4661) by [@MikeWallis42](https://github.com/MikeWallis42))* - :arrow_lower_right: *fixes issue [#4660](https://github.com/tobymao/sqlglot/issues/4660) opened by [@MikeWallis42](https://github.com/MikeWallis42)* ### :wrench: Chores - [`bae0489`](https://github.com/tobymao/sqlglot/commit/bae0489044a1368556f03f637c171a1873b6f05c) - reduce sdist size *(commit by [@tobymao](https://github.com/tobymao))* ## [v26.3.8] - 2025-01-24 ### :wrench: Chores - [`5f54f16`](https://github.com/tobymao/sqlglot/commit/5f54f168ee75c5a344747a035e63e1df70fe652c) - bump sqlglotrs to 0.3.14 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.7] - 2025-01-24 ### :wrench: Chores - [`14ad1a0`](https://github.com/tobymao/sqlglot/commit/14ad1a04e86fea5ea88f99948e4cc283692e72a2) - bump sqlglotrs to 0.3.13 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.6] - 2025-01-24 ### :wrench: Chores - [`085fef6`](https://github.com/tobymao/sqlglot/commit/085fef6971a4ebd43b5c7013c6bbcb0d00dfdc30) - bump sqlglotrs to 0.3.12 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.5] - 2025-01-24 ### :wrench: Chores - [`acb7217`](https://github.com/tobymao/sqlglot/commit/acb7217d89e12de549663b67af4687a08512993f) - bump sqlglotrs to 0.3.11 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.4] - 2025-01-24 ### :wrench: Chores - [`bb7548d`](https://github.com/tobymao/sqlglot/commit/bb7548d1e9f371d3ce931fcbd86c65c895f159d1) - bump sqlglotrs to 0.3.10 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.3] - 2025-01-23 ### :wrench: Chores - [`3a188ef`](https://github.com/tobymao/sqlglot/commit/3a188ef0d42a6313625b25003c27195156e7e753) - fix sqlglotrs deployment job *(PR [#4657](https://github.com/tobymao/sqlglot/pull/4657) by [@georgesittas](https://github.com/georgesittas))* - [`7e55533`](https://github.com/tobymao/sqlglot/commit/7e55533d9bb06783803f275415640217c89085d0) - bump sqlglotrs to 0.3.9 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.2] - 2025-01-23 ### :wrench: Chores - [`28f56cb`](https://github.com/tobymao/sqlglot/commit/28f56cb7d9805ce898e7bf6bb884cccb1bd32c52) - fix sqlglotrs deployment job *(PR [#4656](https://github.com/tobymao/sqlglot/pull/4656) by [@georgesittas](https://github.com/georgesittas))* - [`846b141`](https://github.com/tobymao/sqlglot/commit/846b1414183e3d193b4aacc82f3861378adb9ec9) - bump sqlglotrs to 0.3.8 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.1] - 2025-01-23 ### :wrench: Chores - [`ff9ea0c`](https://github.com/tobymao/sqlglot/commit/ff9ea0c4554ef0fa46b3460d01374d4a3f9c36ff) - change upload-artifact to v4 *(commit by [@georgesittas](https://github.com/georgesittas))* - [`61c4784`](https://github.com/tobymao/sqlglot/commit/61c4784033940e34e91732e2464e4baba77e6b7c) - bump sqlglotrs to 0.3.7 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.3.0] - 2025-01-23 ### :boom: BREAKING CHANGES - due to [`8b465d4`](https://github.com/tobymao/sqlglot/commit/8b465d498e0aa9feee53306f631e258443ee3060) - expand single VALUES clause in CTE into a SELECT * *(PR [#4617](https://github.com/tobymao/sqlglot/pull/4617) by [@georgesittas](https://github.com/georgesittas))*: expand single VALUES clause in CTE into a SELECT * (#4617) - due to [`59d886d`](https://github.com/tobymao/sqlglot/commit/59d886d6abfc00726b785a4d468f6b2e0f9d3b1a) - treat LEVEL column in CONNECT BY queries as an identifier *(PR [#4627](https://github.com/tobymao/sqlglot/pull/4627) by [@georgesittas](https://github.com/georgesittas))*: treat LEVEL column in CONNECT BY queries as an identifier (#4627) - due to [`9db09ff`](https://github.com/tobymao/sqlglot/commit/9db09ff91931802c675a219951f28afee1d4019d) - support more compact SAFE_DIVIDE transpilation [#4634](https://github.com/tobymao/sqlglot/pull/4634) *(PR [#4641](https://github.com/tobymao/sqlglot/pull/4641) by [@geooo109](https://github.com/geooo109))*: support more compact SAFE_DIVIDE transpilation #4634 (#4641) - due to [`94af80b`](https://github.com/tobymao/sqlglot/commit/94af80b8bc3c44aa9770d6503f4e07ad4e37e314) - Do not remove parens on bracketed expressions *(PR [#4645](https://github.com/tobymao/sqlglot/pull/4645) by [@VaggelisD](https://github.com/VaggelisD))*: Do not remove parens on bracketed expressions (#4645) - due to [`35923e9`](https://github.com/tobymao/sqlglot/commit/35923e959ff934093a7b82c58f13c5a89a768f5e) - POSITION and all their variants for all dialects *(PR [#4606](https://github.com/tobymao/sqlglot/pull/4606) by [@pruzko](https://github.com/pruzko))*: POSITION and all their variants for all dialects (#4606) ### :sparkles: New Features - [`e47a7c9`](https://github.com/tobymao/sqlglot/commit/e47a7c943b0beef37e30cd7c71ea98c27b82c11b) - Fix Oracle Integer Type Mapping *(PR [#4616](https://github.com/tobymao/sqlglot/pull/4616) by [@pruzko](https://github.com/pruzko))* - [`d8ade83`](https://github.com/tobymao/sqlglot/commit/d8ade830bbca4d2893a7e406868a0bd3a654057e) - **clickhouse**: Dynamic data type *(PR [#4624](https://github.com/tobymao/sqlglot/pull/4624) by [@pkit](https://github.com/pkit))* - [`f7628ad`](https://github.com/tobymao/sqlglot/commit/f7628adf12e03a09ec89fe883d5b710a0f7e0151) - **optimizer**: Fix qualify for SEMI/ANTI joins *(PR [#4622](https://github.com/tobymao/sqlglot/pull/4622) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3557](https://github.com/TobikoData/sqlmesh/issues/3557) opened by [@Bilbottom](https://github.com/Bilbottom)* - [`a20b663`](https://github.com/tobymao/sqlglot/commit/a20b663964a9845d3eb3c43def5880a531dab4a4) - improve rs tokenizer performance *(PR [#4638](https://github.com/tobymao/sqlglot/pull/4638) by [@benfdking](https://github.com/benfdking))* - [`ffa0df7`](https://github.com/tobymao/sqlglot/commit/ffa0df72e36c6a08f1fc707d9c83e98eccc214c1) - **parser**: Support Oracle/Postgres XMLNAMESPACES in XMLTABLE *(PR [#4643](https://github.com/tobymao/sqlglot/pull/4643) by [@rbreejen](https://github.com/rbreejen))* - :arrow_lower_right: *addresses issue [#4642](https://github.com/tobymao/sqlglot/issues/4642) opened by [@rbreejen](https://github.com/rbreejen)* - [`35923e9`](https://github.com/tobymao/sqlglot/commit/35923e959ff934093a7b82c58f13c5a89a768f5e) - POSITION and all their variants for all dialects *(PR [#4606](https://github.com/tobymao/sqlglot/pull/4606) by [@pruzko](https://github.com/pruzko))* ### :bug: Bug Fixes - [`14474ee`](https://github.com/tobymao/sqlglot/commit/14474ee689025cc67b1f9a07e51d2f414ec5ab49) - **tsql**: support TSQL PRIMARY KEY constraint with DESC, ASC *(PR [#4618](https://github.com/tobymao/sqlglot/pull/4618) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4610](https://github.com/tobymao/sqlglot/issues/4610) opened by [@cchambers-rdi](https://github.com/cchambers-rdi)* - [`8b465d4`](https://github.com/tobymao/sqlglot/commit/8b465d498e0aa9feee53306f631e258443ee3060) - **parser**: expand single VALUES clause in CTE into a SELECT * *(PR [#4617](https://github.com/tobymao/sqlglot/pull/4617) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3556](https://github.com/TobikoData/sqlmesh/issues/3556) opened by [@Bilbottom](https://github.com/Bilbottom)* - [`647d986`](https://github.com/tobymao/sqlglot/commit/647d98650a3d6ba6aa7d57560555832548dd89aa) - **snowflake**: get rid of incorrect time mappings *(PR [#4629](https://github.com/tobymao/sqlglot/pull/4629) by [@georgesittas](https://github.com/georgesittas))* - [`9cbd5ef`](https://github.com/tobymao/sqlglot/commit/9cbd5ef798d1f34d4eebe501cead8295564fc15c) - **trino**: generate ArrayUniqueAgg as ARRAY_AGG(DISTINCT ...) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`59d886d`](https://github.com/tobymao/sqlglot/commit/59d886d6abfc00726b785a4d468f6b2e0f9d3b1a) - **optimizer**: treat LEVEL column in CONNECT BY queries as an identifier *(PR [#4627](https://github.com/tobymao/sqlglot/pull/4627) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4620](https://github.com/tobymao/sqlglot/issues/4620) opened by [@snovik75](https://github.com/snovik75)* - [`6107661`](https://github.com/tobymao/sqlglot/commit/6107661424622651447da09fb9d7e456ff453bff) - **snowflake**: Allow parsing of TO_TIME *(PR [#4631](https://github.com/tobymao/sqlglot/pull/4631) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4625](https://github.com/tobymao/sqlglot/issues/4625) opened by [@aletheavilla](https://github.com/aletheavilla)* - [`9fdfd4d`](https://github.com/tobymao/sqlglot/commit/9fdfd4d6824702f019223536ba4013a966170ff6) - **trino**: support QUOTES option for JSON_QUERY [#4623](https://github.com/tobymao/sqlglot/pull/4623) *(PR [#4628](https://github.com/tobymao/sqlglot/pull/4628) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4623](https://github.com/tobymao/sqlglot/issues/4623) opened by [@betodealmeida](https://github.com/betodealmeida)* - [`43eb0d9`](https://github.com/tobymao/sqlglot/commit/43eb0d9360f3154039e9eb71ee8818b6590d220a) - **tsql**: create schema ast access fixup fixes [#4632](https://github.com/tobymao/sqlglot/pull/4632) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`59f6525`](https://github.com/tobymao/sqlglot/commit/59f652572037940f136508ee60b8e0a137ce18f0) - **duckdb**: Transpile exp.RegexpILike *(PR [#4640](https://github.com/tobymao/sqlglot/pull/4640) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4639](https://github.com/tobymao/sqlglot/issues/4639) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`9db09ff`](https://github.com/tobymao/sqlglot/commit/9db09ff91931802c675a219951f28afee1d4019d) - **bigquery**: support more compact SAFE_DIVIDE transpilation [#4634](https://github.com/tobymao/sqlglot/pull/4634) *(PR [#4641](https://github.com/tobymao/sqlglot/pull/4641) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4634](https://github.com/tobymao/sqlglot/issues/4634) opened by [@bbernst](https://github.com/bbernst)* - [`94af80b`](https://github.com/tobymao/sqlglot/commit/94af80b8bc3c44aa9770d6503f4e07ad4e37e314) - **optimizer**: Do not remove parens on bracketed expressions *(PR [#4645](https://github.com/tobymao/sqlglot/pull/4645) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3672](https://github.com/TobikoData/sqlmesh/issues/3672) opened by [@simon-pactum](https://github.com/simon-pactum)* - [`761e835`](https://github.com/tobymao/sqlglot/commit/761e835e39fa819ef478b8086bfd814dbecc7927) - qualify using *(PR [#4646](https://github.com/tobymao/sqlglot/pull/4646) by [@tobymao](https://github.com/tobymao))* - [`8b0b8ac`](https://github.com/tobymao/sqlglot/commit/8b0b8ac4ccbaf54d5fa948d9900ca53ccca9115b) - **sqlite**: allow 2-arg version of UNHEX closes [#4648](https://github.com/tobymao/sqlglot/pull/4648) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`2f12bd9`](https://github.com/tobymao/sqlglot/commit/2f12bd94d8583ddf9af808dda4df1690179ee592) - **athena**: Generate PartitionedByProperty correctly on CTAS for an Iceberg table *(PR [#4654](https://github.com/tobymao/sqlglot/pull/4654) by [@erindru](https://github.com/erindru))* - [`1ea0dc2`](https://github.com/tobymao/sqlglot/commit/1ea0dc296ca2e47d466ddce162ad64945c532586) - **postgres**: Support WITHIN GROUP ( order_by_clause ) FILTER for Postgres *(PR [#4652](https://github.com/tobymao/sqlglot/pull/4652) by [@gl3nnleblanc](https://github.com/gl3nnleblanc))* - :arrow_lower_right: *fixes issue [#4651](https://github.com/tobymao/sqlglot/issues/4651) opened by [@gl3nnleblanc](https://github.com/gl3nnleblanc)* ### :recycle: Refactors - [`284a936`](https://github.com/tobymao/sqlglot/commit/284a9360c5d43301da34d8d5199f101423ade289) - simplify WITHIN GROUP ... FILTER support *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`73512f9`](https://github.com/tobymao/sqlglot/commit/73512f9dde03b632b5f9eff0331713f9b44996d7) - set default properly for use_rs_tokenizer *(PR [#4619](https://github.com/tobymao/sqlglot/pull/4619) by [@georgesittas](https://github.com/georgesittas))* - [`9ba1db3`](https://github.com/tobymao/sqlglot/commit/9ba1db3436d2afba5821b853cb3c573aada370e7) - add bench command *(PR [#4621](https://github.com/tobymao/sqlglot/pull/4621) by [@benfdking](https://github.com/benfdking))* - [`0aa1516`](https://github.com/tobymao/sqlglot/commit/0aa1516cd8bf5f7d77e6d743f30f1526ccf15633) - move to string new *(PR [#4637](https://github.com/tobymao/sqlglot/pull/4637) by [@benfdking](https://github.com/benfdking))* - [`2355a91`](https://github.com/tobymao/sqlglot/commit/2355a914752f3add75457849ae8f8ec00754f888) - clean up unnecessary mut *(PR [#4636](https://github.com/tobymao/sqlglot/pull/4636) by [@benfdking](https://github.com/benfdking))* - [`0b68af5`](https://github.com/tobymao/sqlglot/commit/0b68af545bc82317ee16903d525e7b47f273d92d) - bump sqlglotrs to 0.3.6 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.2.1] - 2025-01-15 ### :wrench: Chores - [`b447322`](https://github.com/tobymao/sqlglot/commit/b4473220f0f50a9ce2463b3a98a77bf2fdd897af) - parser accepts ctes without as keyword again, except for clickhouse *(PR [#4612](https://github.com/tobymao/sqlglot/pull/4612) by [@georgesittas](https://github.com/georgesittas))* ## [v26.2.0] - 2025-01-14 ### :boom: BREAKING CHANGES - due to [`f3fcc10`](https://github.com/tobymao/sqlglot/commit/f3fcc1013dfcfdaa388ba3426ed82c4fe0eefab1) - allow limit, offset to be used as both modifiers and aliases *(PR [#4589](https://github.com/tobymao/sqlglot/pull/4589) by [@georgesittas](https://github.com/georgesittas))*: allow limit, offset to be used as both modifiers and aliases (#4589) - due to [`b7ab3f1`](https://github.com/tobymao/sqlglot/commit/b7ab3f1697bda3d67a1183e6cd78dbd13777112b) - exp.Merge condition for Trino/Postgres *(PR [#4596](https://github.com/tobymao/sqlglot/pull/4596) by [@MikeWallis42](https://github.com/MikeWallis42))*: exp.Merge condition for Trino/Postgres (#4596) - due to [`e617d40`](https://github.com/tobymao/sqlglot/commit/e617d407ece96d3c3311c95936ccdca6ecd35a70) - extend ANALYZE common syntax to cover multiple dialects *(PR [#4591](https://github.com/tobymao/sqlglot/pull/4591) by [@zashroof](https://github.com/zashroof))*: extend ANALYZE common syntax to cover multiple dialects (#4591) ### :sparkles: New Features - [`c75016a`](https://github.com/tobymao/sqlglot/commit/c75016a83cda5eb328f854a8628884b90dec10e4) - parse analyze compute statistics *(PR [#4547](https://github.com/tobymao/sqlglot/pull/4547) by [@zashroof](https://github.com/zashroof))* - [`986a1da`](https://github.com/tobymao/sqlglot/commit/986a1da98fa5648bc3e364ae436dc4168a1b33ed) - Druid dialect *(PR [#4579](https://github.com/tobymao/sqlglot/pull/4579) by [@betodealmeida](https://github.com/betodealmeida))* - [`bc9975f`](https://github.com/tobymao/sqlglot/commit/bc9975fe80d66b0c25b8755f1757f049edb4d0be) - move to rustc fx hashmap *(PR [#4588](https://github.com/tobymao/sqlglot/pull/4588) by [@benfdking](https://github.com/benfdking))* - [`853cbe6`](https://github.com/tobymao/sqlglot/commit/853cbe655f2aa3fa4debb8091b335eb6f9530390) - cleaner IS_ASCII for TSQL *(PR [#4592](https://github.com/tobymao/sqlglot/pull/4592) by [@pruzko](https://github.com/pruzko))* - [`3ebd879`](https://github.com/tobymao/sqlglot/commit/3ebd87919a4a9947c077c657c03ba2d2b3799620) - LOGICAL_AND and LOGICAL_OR for Oracle *(PR [#4593](https://github.com/tobymao/sqlglot/pull/4593) by [@pruzko](https://github.com/pruzko))* - [`e617d40`](https://github.com/tobymao/sqlglot/commit/e617d407ece96d3c3311c95936ccdca6ecd35a70) - extend ANALYZE common syntax to cover multiple dialects *(PR [#4591](https://github.com/tobymao/sqlglot/pull/4591) by [@zashroof](https://github.com/zashroof))* ### :bug: Bug Fixes - [`766d698`](https://github.com/tobymao/sqlglot/commit/766d69886ac088de7dd9a22d71124ffa1b36d003) - **postgres**: Revert exp.StrPos generation *(PR [#4586](https://github.com/tobymao/sqlglot/pull/4586) by [@VaggelisD](https://github.com/VaggelisD))* - [`f3fcc10`](https://github.com/tobymao/sqlglot/commit/f3fcc1013dfcfdaa388ba3426ed82c4fe0eefab1) - **parser**: allow limit, offset to be used as both modifiers and aliases *(PR [#4589](https://github.com/tobymao/sqlglot/pull/4589) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4575](https://github.com/tobymao/sqlglot/issues/4575) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`2bea466`](https://github.com/tobymao/sqlglot/commit/2bea466cbef3adfc09185176ee38ddf820b3f7ab) - **optimizer**: unions on nested subqueries *(PR [#4603](https://github.com/tobymao/sqlglot/pull/4603) by [@barakalon](https://github.com/barakalon))* - [`199508a`](https://github.com/tobymao/sqlglot/commit/199508a77c62f75b5e12fee47828d34e4903c706) - **snowflake**: treat $ as part of the json path key identifier *(PR [#4604](https://github.com/tobymao/sqlglot/pull/4604) by [@georgesittas](https://github.com/georgesittas))* - [`b7ab3f1`](https://github.com/tobymao/sqlglot/commit/b7ab3f1697bda3d67a1183e6cd78dbd13777112b) - exp.Merge condition for Trino/Postgres *(PR [#4596](https://github.com/tobymao/sqlglot/pull/4596) by [@MikeWallis42](https://github.com/MikeWallis42))* - :arrow_lower_right: *fixes issue [#4595](https://github.com/tobymao/sqlglot/issues/4595) opened by [@MikeWallis42](https://github.com/MikeWallis42)* ### :recycle: Refactors - [`c0f7309`](https://github.com/tobymao/sqlglot/commit/c0f7309327e21204a0a0f273712d3097f02f6796) - simplify `trie_filter` closure in `Tokenizer` initialization *(PR [#4599](https://github.com/tobymao/sqlglot/pull/4599) by [@gvozdvmozgu](https://github.com/gvozdvmozgu))* - [`fb93219`](https://github.com/tobymao/sqlglot/commit/fb932198087e5e3aa1a42e65ac30f28e24c6d84f) - replace `std::mem::replace` with `std::mem::take` and `Vec::drain` *(PR [#4600](https://github.com/tobymao/sqlglot/pull/4600) by [@gvozdvmozgu](https://github.com/gvozdvmozgu))* ### :wrench: Chores - [`672d656`](https://github.com/tobymao/sqlglot/commit/672d656eb5a014ba42492ba2c2a9a33ebd145bd8) - clean up ANALYZE implementation *(PR [#4607](https://github.com/tobymao/sqlglot/pull/4607) by [@georgesittas](https://github.com/georgesittas))* - [`e58a8cb`](https://github.com/tobymao/sqlglot/commit/e58a8cb4d388d22eff8fd2cca08f38e4c42075d6) - apply clippy fixes *(PR [#4608](https://github.com/tobymao/sqlglot/pull/4608) by [@benfdking](https://github.com/benfdking))* - [`5502c94`](https://github.com/tobymao/sqlglot/commit/5502c94d665a2ed354e44beb145e767bab00adfa) - bump sqlglotrs to 0.3.5 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.1.3] - 2025-01-09 ### :bug: Bug Fixes - [`d250846`](https://github.com/tobymao/sqlglot/commit/d250846d05711ac62a45efd4930f0ca712841b11) - **snowflake**: generate LIMIT when OFFSET exists [#4575](https://github.com/tobymao/sqlglot/pull/4575) *(PR [#4581](https://github.com/tobymao/sqlglot/pull/4581) by [@geooo109](https://github.com/geooo109))* ### :wrench: Chores - [`ffbb935`](https://github.com/tobymao/sqlglot/commit/ffbb9350f8d0decab4555471ec2e468fa2741f5f) - install python 3.7 when building windows wheel for sqlglotrs *(PR [#4585](https://github.com/tobymao/sqlglot/pull/4585) by [@georgesittas](https://github.com/georgesittas))* - [`1ea05c0`](https://github.com/tobymao/sqlglot/commit/1ea05c0b4e3cf53482058b22ecac7ec7c1de525d) - bump sqlglotrs to 0.3.4 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.1.2] - 2025-01-08 ### :wrench: Chores - [`e33af0b`](https://github.com/tobymao/sqlglot/commit/e33af0bcd859571dab68aef3a1fc9ecbf5c49e71) - try setup-python@v5 in the publish job *(PR [#4582](https://github.com/tobymao/sqlglot/pull/4582) by [@georgesittas](https://github.com/georgesittas))* - [`3259f84`](https://github.com/tobymao/sqlglot/commit/3259f84f1faa6f1135ecca7d0f5fcd4b187b4da7) - bump sqlglotrs to 0.3.3 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.1.1] - 2025-01-08 ### :wrench: Chores - [`e51d1cf`](https://github.com/tobymao/sqlglot/commit/e51d1cfb0aa1028bb116851b03b759282305217b) - release sqlglotrs for Python 3.13 on windows *(PR [#4580](https://github.com/tobymao/sqlglot/pull/4580) by [@VaggelisD](https://github.com/VaggelisD))* - [`975ffa0`](https://github.com/tobymao/sqlglot/commit/975ffa0e10f08243375e5e83384fd0e134730d14) - bump sqlglotrs to 0.3.2 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.1.0] - 2025-01-08 ### :boom: BREAKING CHANGES - due to [`07d05da`](https://github.com/tobymao/sqlglot/commit/07d05da95c7d3882a7032dade3cbeefbd96628b7) - normalize before qualifying tables *(PR [#4539](https://github.com/tobymao/sqlglot/pull/4539) by [@tobymao](https://github.com/tobymao))*: normalize before qualifying tables (#4539) - due to [`cead7c3`](https://github.com/tobymao/sqlglot/commit/cead7c32bef44c0efaf48c2038976c7c7f2b709c) - require AS token in CTEs for all dialects except spark, databricks *(PR [#4546](https://github.com/tobymao/sqlglot/pull/4546) by [@georgesittas](https://github.com/georgesittas))*: require AS token in CTEs for all dialects except spark, databricks (#4546) - due to [`231d032`](https://github.com/tobymao/sqlglot/commit/231d03202e4338ee097662d59770dae1a9958617) - support Unicode in sqlite, mysql, tsql, postgres, oracle *(PR [#4554](https://github.com/tobymao/sqlglot/pull/4554) by [@pruzko](https://github.com/pruzko))*: support Unicode in sqlite, mysql, tsql, postgres, oracle (#4554) - due to [`83595b6`](https://github.com/tobymao/sqlglot/commit/83595b67f0aa4cafdfcf4bace7b92c17f9e9f5f3) - parse ASCII into Unicode to facilitate transpilation *(commit by [@georgesittas](https://github.com/georgesittas))*: parse ASCII into Unicode to facilitate transpilation - due to [`e141960`](https://github.com/tobymao/sqlglot/commit/e1419607981cd8fe597781faeae429069b13d5fb) - improve transpilation of CHAR[ACTER]_LENGTH *(PR [#4555](https://github.com/tobymao/sqlglot/pull/4555) by [@pruzko](https://github.com/pruzko))*: improve transpilation of CHAR[ACTER]_LENGTH (#4555) ### :sparkles: New Features - [`7a517d7`](https://github.com/tobymao/sqlglot/commit/7a517d71dbcab4b46538263cac604ac38e714e6b) - Introduce meta comment to parse known functions as exp.Anonymous *(PR [#4532](https://github.com/tobymao/sqlglot/pull/4532) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4522](https://github.com/tobymao/sqlglot/issues/4522) opened by [@github-christophe-oudar](https://github.com/github-christophe-oudar)* - [`6992c18`](https://github.com/tobymao/sqlglot/commit/6992c1855f343a5d0120a3b4c993d8c406dd29ba) - **tokenizer**: Allow underscore separated number literals *(PR [#4536](https://github.com/tobymao/sqlglot/pull/4536) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4530](https://github.com/tobymao/sqlglot/issues/4530) opened by [@martijnthe](https://github.com/martijnthe)* - [`7fe9f7f`](https://github.com/tobymao/sqlglot/commit/7fe9f7f6dbc701580ca17318400203245331704e) - **tsql**: add support for DATETRUNC [#4531](https://github.com/tobymao/sqlglot/pull/4531) *(PR [#4537](https://github.com/tobymao/sqlglot/pull/4537) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *addresses issue [#4531](https://github.com/tobymao/sqlglot/issues/4531) opened by [@rajat-wisdom](https://github.com/rajat-wisdom)* - [`931eef6`](https://github.com/tobymao/sqlglot/commit/931eef6958be87ef88f4ff5311441e7a7004b8c5) - **duckdb**: Support simplified UNPIVOT statement *(PR [#4545](https://github.com/tobymao/sqlglot/pull/4545) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4542](https://github.com/tobymao/sqlglot/issues/4542) opened by [@Bilbottom](https://github.com/Bilbottom)* - [`923a1f7`](https://github.com/tobymao/sqlglot/commit/923a1f7fda66f3dc61ee12755fe8960f8aeb3cd8) - treat NAMESPACE as a db creatable *(PR [#4556](https://github.com/tobymao/sqlglot/pull/4556) by [@TanviPardeshi](https://github.com/TanviPardeshi))* - [`b0cc7d0`](https://github.com/tobymao/sqlglot/commit/b0cc7d029a78c7929daff9b30dc072608d9c80b0) - add support for IS_ASCII *(PR [#4557](https://github.com/tobymao/sqlglot/pull/4557) by [@pruzko](https://github.com/pruzko))* - [`231d032`](https://github.com/tobymao/sqlglot/commit/231d03202e4338ee097662d59770dae1a9958617) - support Unicode in sqlite, mysql, tsql, postgres, oracle *(PR [#4554](https://github.com/tobymao/sqlglot/pull/4554) by [@pruzko](https://github.com/pruzko))* - [`83595b6`](https://github.com/tobymao/sqlglot/commit/83595b67f0aa4cafdfcf4bace7b92c17f9e9f5f3) - **hive**: parse ASCII into Unicode to facilitate transpilation *(commit by [@georgesittas](https://github.com/georgesittas))* - [`095fb1e`](https://github.com/tobymao/sqlglot/commit/095fb1e834153eeeea33885dc20e1ba05f8bf814) - generate POSITION instead of STRPOS for Postgres *(PR [#4577](https://github.com/tobymao/sqlglot/pull/4577) by [@pruzko](https://github.com/pruzko))* - [`9def0b7`](https://github.com/tobymao/sqlglot/commit/9def0b79ee623a07d8c367e0ec575ed8e63c83c6) - add support for Chr in tsql and sqlite *(PR [#4566](https://github.com/tobymao/sqlglot/pull/4566) by [@pruzko](https://github.com/pruzko))* - [`d32d26a`](https://github.com/tobymao/sqlglot/commit/d32d26affaa7b0639abc107505db234aeb7386d4) - **postgres**: add support for XMLTABLE *(commit by [@georgesittas](https://github.com/georgesittas))* - [`e141960`](https://github.com/tobymao/sqlglot/commit/e1419607981cd8fe597781faeae429069b13d5fb) - improve transpilation of CHAR[ACTER]_LENGTH *(PR [#4555](https://github.com/tobymao/sqlglot/pull/4555) by [@pruzko](https://github.com/pruzko))* ### :bug: Bug Fixes - [`07d05da`](https://github.com/tobymao/sqlglot/commit/07d05da95c7d3882a7032dade3cbeefbd96628b7) - normalize before qualifying tables *(PR [#4539](https://github.com/tobymao/sqlglot/pull/4539) by [@tobymao](https://github.com/tobymao))* - :arrow_lower_right: *fixes issue [#4538](https://github.com/tobymao/sqlglot/issues/4538) opened by [@karakanb](https://github.com/karakanb)* - [`1ed3235`](https://github.com/tobymao/sqlglot/commit/1ed32358adc6b578e8b8af265ac8afe37aae9ad8) - allow When in exp.merge fixes [#4543](https://github.com/tobymao/sqlglot/pull/4543) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`cead7c3`](https://github.com/tobymao/sqlglot/commit/cead7c32bef44c0efaf48c2038976c7c7f2b709c) - **parser**: require AS token in CTEs for all dialects except spark, databricks *(PR [#4546](https://github.com/tobymao/sqlglot/pull/4546) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4544](https://github.com/tobymao/sqlglot/issues/4544) opened by [@xtess16](https://github.com/xtess16)* - [`006b384`](https://github.com/tobymao/sqlglot/commit/006b3842f90186f8932f0dbf02453f138129608b) - **postgres**: add support for WHERE clause in INSERT DML *(PR [#4550](https://github.com/tobymao/sqlglot/pull/4550) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4549](https://github.com/tobymao/sqlglot/issues/4549) opened by [@VigneshChennai](https://github.com/VigneshChennai)* - [`795e7e0`](https://github.com/tobymao/sqlglot/commit/795e7e0e857486417ce98246389849fc09ccb60a) - Pin ubuntu to 22.04 for Python 3.7 *(PR [#4571](https://github.com/tobymao/sqlglot/pull/4571) by [@VaggelisD](https://github.com/VaggelisD))* - [`2495508`](https://github.com/tobymao/sqlglot/commit/2495508fa7b3931d466c36b5ed225b2e1510b01c) - **tsql**: generate correct DateFromParts naming *(PR [#4563](https://github.com/tobymao/sqlglot/pull/4563) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4558](https://github.com/tobymao/sqlglot/issues/4558) opened by [@rajat-wisdom](https://github.com/rajat-wisdom)* - [`2aff4ae`](https://github.com/tobymao/sqlglot/commit/2aff4ae861dc5225a616f5e3980cf04805e5b339) - **duckdb**: support parentheses with FROM-First syntax *(PR [#4569](https://github.com/tobymao/sqlglot/pull/4569) by [@geooo109](https://github.com/geooo109))* - :arrow_lower_right: *fixes issue [#4561](https://github.com/tobymao/sqlglot/issues/4561) opened by [@LennartH](https://github.com/LennartH)* - [`51ac9a7`](https://github.com/tobymao/sqlglot/commit/51ac9a7b8a91d1bb5b3b6b396e1083c03573a708) - **rust-tokenizer**: increase integer width when converting hex literals *(PR [#4573](https://github.com/tobymao/sqlglot/pull/4573) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4560](https://github.com/tobymao/sqlglot/issues/4560) opened by [@whummer](https://github.com/whummer)* - [`94ffdb7`](https://github.com/tobymao/sqlglot/commit/94ffdb7b790c6c2235a0586c6df23c3155c184b1) - addressing mismatch in STR_POSITION argument order in executor. *(PR [#4574](https://github.com/tobymao/sqlglot/pull/4574) by [@cecilycarver](https://github.com/cecilycarver))* - [`139b699`](https://github.com/tobymao/sqlglot/commit/139b699f61326bdf9700f0ba9bea9a44e594cf6d) - **tsql**: transpile snowflake TIMESTAMP_NTZ to DATETIME2 *(PR [#4576](https://github.com/tobymao/sqlglot/pull/4576) by [@geooo109](https://github.com/geooo109))* - [`ceb42fa`](https://github.com/tobymao/sqlglot/commit/ceb42fabad60312699e4b15936aeebac00e22e4d) - parse & generate Length properly in clickhouse *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`9921528`](https://github.com/tobymao/sqlglot/commit/992152840ffad5fb85315e0bead2c498d4310cc5) - introduce benchmarking for rust *(PR [#4552](https://github.com/tobymao/sqlglot/pull/4552) by [@benfdking](https://github.com/benfdking))* - [`0ffe8f9`](https://github.com/tobymao/sqlglot/commit/0ffe8f91eb8295ab8171e029aa4ccbf071028a4a) - temporarily disable sqlglotrs benchmarking *(PR [#4578](https://github.com/tobymao/sqlglot/pull/4578) by [@georgesittas](https://github.com/georgesittas))* - [`7a0dbcf`](https://github.com/tobymao/sqlglot/commit/7a0dbcfda26ff7cf20371c84b31f454e63260959) - bump sqlglotrs to 0.3.1 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v26.0.1] - 2024-12-18 ### :sparkles: New Features - [`5d3ee4c`](https://github.com/tobymao/sqlglot/commit/5d3ee4cac1c5c9e45cbf6263c32c87fda78f9854) - **snowflake**: transpile date subtraction *(PR [#4506](https://github.com/tobymao/sqlglot/pull/4506) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4485](https://github.com/tobymao/sqlglot/issues/4485) opened by [@cisenbe](https://github.com/cisenbe)* - [`efeb4bd`](https://github.com/tobymao/sqlglot/commit/efeb4bd870dd5c017b31d6b95c9bd6311c75b9ae) - **postgres**: add support for XMLELEMENT *(PR [#4513](https://github.com/tobymao/sqlglot/pull/4513) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4512](https://github.com/tobymao/sqlglot/issues/4512) opened by [@fresioAS](https://github.com/fresioAS)* - [`e495777`](https://github.com/tobymao/sqlglot/commit/e495777b8612866041050c96d3df700cd829dc9c) - **clickhouse**: add support for bracket map syntax *(PR [#4528](https://github.com/tobymao/sqlglot/pull/4528) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4527](https://github.com/tobymao/sqlglot/issues/4527) opened by [@mrcljx](https://github.com/mrcljx)* - [`cc44ed7`](https://github.com/tobymao/sqlglot/commit/cc44ed73fa4489e0bcb457b7eae8a9772415db65) - **mysql**: Support SERIAL data type *(PR [#4533](https://github.com/tobymao/sqlglot/pull/4533) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4529](https://github.com/tobymao/sqlglot/issues/4529) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`ee7dc96`](https://github.com/tobymao/sqlglot/commit/ee7dc966d533228756c3294c66422c27eceae503) - **starrocks**: add partition by range and unique key *(PR [#4509](https://github.com/tobymao/sqlglot/pull/4509) by [@pickfire](https://github.com/pickfire))* - [`84ec478`](https://github.com/tobymao/sqlglot/commit/84ec47810e0a5c9e71a2b48e686656f9c2eafb39) - **lineage**: Extend lineage function to work with pivot operation *(PR [#4471](https://github.com/tobymao/sqlglot/pull/4471) by [@step4](https://github.com/step4))* - [`52c8374`](https://github.com/tobymao/sqlglot/commit/52c8374876bc4037dcb81a50301fdd62cb14bb2a) - include comments in gen *(PR [#4535](https://github.com/tobymao/sqlglot/pull/4535) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`8f8e84a`](https://github.com/tobymao/sqlglot/commit/8f8e84ae81d60bea224e35b9ca88b0bb4a59512b) - **snowflake**: bitxor third parameter(padside) issue *(PR [#4501](https://github.com/tobymao/sqlglot/pull/4501) by [@ankur334](https://github.com/ankur334))* - [`4760246`](https://github.com/tobymao/sqlglot/commit/476024653e5b942faaaaa2b3bce30a3ea1873190) - **snowflake**: generate only one INPUT => clause in unnest_sql *(PR [#4505](https://github.com/tobymao/sqlglot/pull/4505) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4503](https://github.com/tobymao/sqlglot/issues/4503) opened by [@harounp](https://github.com/harounp)* - [`7649d50`](https://github.com/tobymao/sqlglot/commit/7649d5053e3305dadb83769bb5cec52ed8235a19) - **optimizer**: only expand stars for select scopes *(PR [#4515](https://github.com/tobymao/sqlglot/pull/4515) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4514](https://github.com/tobymao/sqlglot/issues/4514) opened by [@florian-ernst-alan](https://github.com/florian-ernst-alan)* - [`2b68b9b`](https://github.com/tobymao/sqlglot/commit/2b68b9b7967b68465042a1b8c2ee21bb30007712) - **snowflake**: Allow alias expansion inside JOIN statements *(PR [#4504](https://github.com/tobymao/sqlglot/pull/4504) by [@florian-ernst-alan](https://github.com/florian-ernst-alan))* - :arrow_lower_right: *fixes issue [#4502](https://github.com/tobymao/sqlglot/issues/4502) opened by [@florian-ernst-alan](https://github.com/florian-ernst-alan)* - [`e15cd0b`](https://github.com/tobymao/sqlglot/commit/e15cd0be1c66e0e72d9815575fa9b210e66cf7c9) - **postgres**: generate float if the type has precision *(PR [#4516](https://github.com/tobymao/sqlglot/pull/4516) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4508](https://github.com/tobymao/sqlglot/issues/4508) opened by [@RedTailedHawk](https://github.com/RedTailedHawk)* - [`98906d4`](https://github.com/tobymao/sqlglot/commit/98906d4520a0c582a0534384ee3d0c1449846ee6) - another interval parsing edge case *(PR [#4519](https://github.com/tobymao/sqlglot/pull/4519) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4490](https://github.com/tobymao/sqlglot/issues/4490) opened by [@fuglaeff](https://github.com/fuglaeff)* - [`992f6e9`](https://github.com/tobymao/sqlglot/commit/992f6e9fc867aa5ad60a255be593b8982a0fbcba) - **tsql**: Convert exp.Neg literal to number through to_py() *(PR [#4523](https://github.com/tobymao/sqlglot/pull/4523) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4520](https://github.com/tobymao/sqlglot/issues/4520) opened by [@DzianisKryvasheya](https://github.com/DzianisKryvasheya)* - [`946cd42`](https://github.com/tobymao/sqlglot/commit/946cd4234a2ca403785b7c6a026a39ef604e8754) - **optimizer**: qualify snowflake queries with `level` pseudocolumn *(PR [#4524](https://github.com/tobymao/sqlglot/pull/4524) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4518](https://github.com/tobymao/sqlglot/issues/4518) opened by [@florian-ernst-alan](https://github.com/florian-ernst-alan)* - [`bc68289`](https://github.com/tobymao/sqlglot/commit/bc68289d4d368b29241e56b8f0aefc36db65ad47) - **planner**: ensure aggregate variable is bound *(PR [#4526](https://github.com/tobymao/sqlglot/pull/4526) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4525](https://github.com/tobymao/sqlglot/issues/4525) opened by [@EyalDlph](https://github.com/EyalDlph)* ### :recycle: Refactors - [`cd6e00f`](https://github.com/tobymao/sqlglot/commit/cd6e00f55195e26c3d02e255e66b45ab781addad) - clean up pivot lineage *(PR [#4534](https://github.com/tobymao/sqlglot/pull/4534) by [@georgesittas](https://github.com/georgesittas))* ## [v26.0.0] - 2024-12-10 ### :boom: BREAKING CHANGES - due to [`1d3c9aa`](https://github.com/tobymao/sqlglot/commit/1d3c9aa604c7bf60166a0e5587f1a8d88b89bea6) - Transpile support for bitor/bit_or snowflake function *(PR [#4486](https://github.com/tobymao/sqlglot/pull/4486) by [@ankur334](https://github.com/ankur334))*: Transpile support for bitor/bit_or snowflake function (#4486) - due to [`ab10851`](https://github.com/tobymao/sqlglot/commit/ab108518c53173ddf71ac1dfd9e45df6ac621b81) - Preserve roundtrips of DATETIME/DATETIME2 *(PR [#4491](https://github.com/tobymao/sqlglot/pull/4491) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve roundtrips of DATETIME/DATETIME2 (#4491) ### :sparkles: New Features - [`1d3c9aa`](https://github.com/tobymao/sqlglot/commit/1d3c9aa604c7bf60166a0e5587f1a8d88b89bea6) - **snowflake**: Transpile support for bitor/bit_or snowflake function *(PR [#4486](https://github.com/tobymao/sqlglot/pull/4486) by [@ankur334](https://github.com/ankur334))* - [`822aea0`](https://github.com/tobymao/sqlglot/commit/822aea0826f09fa773193004acb2af99e495fddd) - **snowflake**: Support for inline FOREIGN KEY *(PR [#4493](https://github.com/tobymao/sqlglot/pull/4493) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4489](https://github.com/tobymao/sqlglot/issues/4489) opened by [@kylekarpack](https://github.com/kylekarpack)* ### :bug: Bug Fixes - [`ab10851`](https://github.com/tobymao/sqlglot/commit/ab108518c53173ddf71ac1dfd9e45df6ac621b81) - **tsql**: Preserve roundtrips of DATETIME/DATETIME2 *(PR [#4491](https://github.com/tobymao/sqlglot/pull/4491) by [@VaggelisD](https://github.com/VaggelisD))* - [`43975e4`](https://github.com/tobymao/sqlglot/commit/43975e4b7abcd640cd5a0f91aea1fbda8dd893cb) - **duckdb**: Allow escape strings similar to Postgres *(PR [#4497](https://github.com/tobymao/sqlglot/pull/4497) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4496](https://github.com/tobymao/sqlglot/issues/4496) opened by [@LennartH](https://github.com/LennartH)* ## [v25.34.1] - 2024-12-10 ### :boom: BREAKING CHANGES - due to [`f70f124`](https://github.com/tobymao/sqlglot/commit/f70f12408fbaf021dd105f2eac957b9e6fac045d) - transpile MySQL FORMAT to DuckDB *(PR [#4488](https://github.com/tobymao/sqlglot/pull/4488) by [@georgesittas](https://github.com/georgesittas))*: transpile MySQL FORMAT to DuckDB (#4488) ### :sparkles: New Features - [`f70f124`](https://github.com/tobymao/sqlglot/commit/f70f12408fbaf021dd105f2eac957b9e6fac045d) - transpile MySQL FORMAT to DuckDB *(PR [#4488](https://github.com/tobymao/sqlglot/pull/4488) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4445](https://github.com/tobymao/sqlglot/issues/4445) opened by [@fanyang01](https://github.com/fanyang01)* - [`5a276f3`](https://github.com/tobymao/sqlglot/commit/5a276f33df48dab96e77c560c4b193f9634974f7) - add parse into tuple *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`ddf7483`](https://github.com/tobymao/sqlglot/commit/ddf74833c3e033067e731eab387db658a9a803be) - enable python 3.13 in CI *(PR [#4483](https://github.com/tobymao/sqlglot/pull/4483) by [@simon-pactum](https://github.com/simon-pactum))* ## [v25.34.0] - 2024-12-06 ### :boom: BREAKING CHANGES - due to [`41c6d24`](https://github.com/tobymao/sqlglot/commit/41c6d24c99e130b3c8e35e348a25a59e9e3d5553) - Alias expanded USING STRUCT fields *(PR [#4474](https://github.com/tobymao/sqlglot/pull/4474) by [@VaggelisD](https://github.com/VaggelisD))*: Alias expanded USING STRUCT fields (#4474) ### :sparkles: New Features - [`41c6d24`](https://github.com/tobymao/sqlglot/commit/41c6d24c99e130b3c8e35e348a25a59e9e3d5553) - **optimizer**: Alias expanded USING STRUCT fields *(PR [#4474](https://github.com/tobymao/sqlglot/pull/4474) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3465](https://github.com/TobikoData/sqlmesh/issues/3465) opened by [@esciara](https://github.com/esciara)* ### :bug: Bug Fixes - [`a34bcde`](https://github.com/tobymao/sqlglot/commit/a34bcde1f7b4b2974a0132555477fa5a788126b4) - **bigquery**: properly consume dashed table parts *(PR [#4477](https://github.com/tobymao/sqlglot/pull/4477) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4476](https://github.com/tobymao/sqlglot/issues/4476) opened by [@matthewcyy](https://github.com/matthewcyy)* - [`438ae4c`](https://github.com/tobymao/sqlglot/commit/438ae4c0691fb3ad43ef95e613118a116cb7924c) - **bigquery**: Do not generate NULL ordering on Windows *(PR [#4480](https://github.com/tobymao/sqlglot/pull/4480) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4478](https://github.com/tobymao/sqlglot/issues/4478) opened by [@goldmedal](https://github.com/goldmedal)* ## [v25.33.0] - 2024-12-04 ### :boom: BREAKING CHANGES - due to [`07fa69d`](https://github.com/tobymao/sqlglot/commit/07fa69dcb8970167ba0c55fff39175ab856ea9f3) - Make TIMESTAMP map to Type.TIMESTAMPTZ *(PR [#4451](https://github.com/tobymao/sqlglot/pull/4451) by [@VaggelisD](https://github.com/VaggelisD))*: Make TIMESTAMP map to Type.TIMESTAMPTZ (#4451) - due to [`63d8f41`](https://github.com/tobymao/sqlglot/commit/63d8f41794b2e9d22f87d0a8fbfbd83125889ca2) - treat NEXT as a func keyword, parse NEXT VALUE FOR in tsql, oracle *(PR [#4467](https://github.com/tobymao/sqlglot/pull/4467) by [@georgesittas](https://github.com/georgesittas))*: treat NEXT as a func keyword, parse NEXT VALUE FOR in tsql, oracle (#4467) ### :sparkles: New Features - [`3945acc`](https://github.com/tobymao/sqlglot/commit/3945acc4a0dfd58147de929c9a2c71734d8f1ade) - allow tables to be preserved in replace_table *(PR [#4468](https://github.com/tobymao/sqlglot/pull/4468) by [@georgesittas](https://github.com/georgesittas))* - [`a9dca8d`](https://github.com/tobymao/sqlglot/commit/a9dca8dd1b523efd703003694d4389f9af9d1a12) - **postgres**: Support generated columns *(PR [#4472](https://github.com/tobymao/sqlglot/pull/4472) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4463](https://github.com/tobymao/sqlglot/issues/4463) opened by [@AKST](https://github.com/AKST)* ### :bug: Bug Fixes - [`380dad2`](https://github.com/tobymao/sqlglot/commit/380dad2f5826caa820a69442c42805c7b3c23ada) - **bigquery**: Rename CONTAINS_SUBSTRING to CONTAINS_SUBSTR *(PR [#4457](https://github.com/tobymao/sqlglot/pull/4457) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4456](https://github.com/tobymao/sqlglot/issues/4456) opened by [@romanhaa](https://github.com/romanhaa)* - [`ca5023d`](https://github.com/tobymao/sqlglot/commit/ca5023db5ea2a2ece804f6e389640e0bd4987598) - **presto**: Remove parentheses from CURRENT_USER *(PR [#4459](https://github.com/tobymao/sqlglot/pull/4459) by [@MikeWallis42](https://github.com/MikeWallis42))* - :arrow_lower_right: *fixes issue [#4458](https://github.com/tobymao/sqlglot/issues/4458) opened by [@MikeWallis42](https://github.com/MikeWallis42)* - [`07fa69d`](https://github.com/tobymao/sqlglot/commit/07fa69dcb8970167ba0c55fff39175ab856ea9f3) - **spark**: Make TIMESTAMP map to Type.TIMESTAMPTZ *(PR [#4451](https://github.com/tobymao/sqlglot/pull/4451) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4442](https://github.com/tobymao/sqlglot/issues/4442) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`63d8f41`](https://github.com/tobymao/sqlglot/commit/63d8f41794b2e9d22f87d0a8fbfbd83125889ca2) - **parser**: treat NEXT as a func keyword, parse NEXT VALUE FOR in tsql, oracle *(PR [#4467](https://github.com/tobymao/sqlglot/pull/4467) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4466](https://github.com/tobymao/sqlglot/issues/4466) opened by [@Harmuth94](https://github.com/Harmuth94)* ## [v25.32.1] - 2024-11-27 ### :bug: Bug Fixes - [`954d8fd`](https://github.com/tobymao/sqlglot/commit/954d8fd12740071e0951d1df3a405a4b9634868d) - parse DEFAULT in VALUES clause into a Var *(PR [#4448](https://github.com/tobymao/sqlglot/pull/4448) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4446](https://github.com/tobymao/sqlglot/issues/4446) opened by [@ddh-5230](https://github.com/ddh-5230)* - [`73afd0f`](https://github.com/tobymao/sqlglot/commit/73afd0f435b7e7ccde831ee311c9a76c14797fdc) - **bigquery**: Make JSONPathTokenizer more lenient for new standards *(PR [#4447](https://github.com/tobymao/sqlglot/pull/4447) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4441](https://github.com/tobymao/sqlglot/issues/4441) opened by [@patricksurry](https://github.com/patricksurry)* ## [v25.32.0] - 2024-11-22 ### :boom: BREAKING CHANGES - due to [`0eed45c`](https://github.com/tobymao/sqlglot/commit/0eed45cce82681bfbafc8bfb78eb2a1bce86ae53) - Add support for ATTACH/DETACH statements *(PR [#4419](https://github.com/tobymao/sqlglot/pull/4419) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for ATTACH/DETACH statements (#4419) - due to [`da48b68`](https://github.com/tobymao/sqlglot/commit/da48b68a4f1fa6a754fa2a0a789564675d59546f) - Tokenize hints as comments *(PR [#4426](https://github.com/tobymao/sqlglot/pull/4426) by [@VaggelisD](https://github.com/VaggelisD))*: Tokenize hints as comments (#4426) - due to [`fe35394`](https://github.com/tobymao/sqlglot/commit/fe3539464a153b1c0bf46975d6221dee48a48f02) - fix datetime coercion in the canonicalize rule *(PR [#4431](https://github.com/tobymao/sqlglot/pull/4431) by [@georgesittas](https://github.com/georgesittas))*: fix datetime coercion in the canonicalize rule (#4431) - due to [`fddcd3d`](https://github.com/tobymao/sqlglot/commit/fddcd3dfc264a645909686c201d2288c0adf9047) - bump sqlglotrs to 0.3.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.3.0 ### :sparkles: New Features - [`0eed45c`](https://github.com/tobymao/sqlglot/commit/0eed45cce82681bfbafc8bfb78eb2a1bce86ae53) - **duckdb**: Add support for ATTACH/DETACH statements *(PR [#4419](https://github.com/tobymao/sqlglot/pull/4419) by [@VaggelisD](https://github.com/VaggelisD))* - [`2db757d`](https://github.com/tobymao/sqlglot/commit/2db757dfec9ded26572b8e9a71dcc8ea8a2382fe) - **bigquery**: Support FEATURES_AT_TIME *(PR [#4430](https://github.com/tobymao/sqlglot/pull/4430) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4428](https://github.com/tobymao/sqlglot/issues/4428) opened by [@YuvrajSoni-Ksolves](https://github.com/YuvrajSoni-Ksolves)* - [`fc591ae`](https://github.com/tobymao/sqlglot/commit/fc591ae2fa80be5821cb53d78906afe8e5505654) - **risingwave**: add support for SINK, SOURCE & other DDL properties *(PR [#4387](https://github.com/tobymao/sqlglot/pull/4387) by [@lin0303-siyuan](https://github.com/lin0303-siyuan))* - [`a2bde2e`](https://github.com/tobymao/sqlglot/commit/a2bde2e03e9ef8650756bf304db35b4876746d1f) - **mysql**: improve transpilability of CHAR[ACTER]_LENGTH *(commit by [@georgesittas](https://github.com/georgesittas))* - [`0acc248`](https://github.com/tobymao/sqlglot/commit/0acc248361f49f68f17d799cbaf6b3de06c57f7e) - **snowflake**: Support CREATE ... WITH TAG *(PR [#4434](https://github.com/tobymao/sqlglot/pull/4434) by [@asikowitz](https://github.com/asikowitz))* - :arrow_lower_right: *addresses issue [#4427](https://github.com/tobymao/sqlglot/issues/4427) opened by [@asikowitz](https://github.com/asikowitz)* - [`37863ff`](https://github.com/tobymao/sqlglot/commit/37863ffd747cad9c2b9bed60119cc1551faeffda) - **snowflake**: Transpile non-UNNEST exp.GenerateDateArray refs *(PR [#4433](https://github.com/tobymao/sqlglot/pull/4433) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`83ee97b`](https://github.com/tobymao/sqlglot/commit/83ee97b34cd0fe269b4820f15147d1ed7523612e) - **parser**: Do not parse window function arg as exp.Column *(PR [#4415](https://github.com/tobymao/sqlglot/pull/4415) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4410](https://github.com/tobymao/sqlglot/issues/4410) opened by [@merlindso](https://github.com/merlindso)* - [`b22e0c8`](https://github.com/tobymao/sqlglot/commit/b22e0c8680b0ee5a382e57904b698bf21a94f782) - **parser**: Extend DESCRIBE parser for MySQL FORMAT & statements *(PR [#4417](https://github.com/tobymao/sqlglot/pull/4417) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4414](https://github.com/tobymao/sqlglot/issues/4414) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`d1d2ae7`](https://github.com/tobymao/sqlglot/commit/d1d2ae7d1514abc9477d275352e5e126509157c6) - **duckdb**: Allow count arg on exp.ArgMax & exp.ArgMin *(PR [#4413](https://github.com/tobymao/sqlglot/pull/4413) by [@aersam](https://github.com/aersam))* - :arrow_lower_right: *fixes issue [#4412](https://github.com/tobymao/sqlglot/issues/4412) opened by [@aersam](https://github.com/aersam)* - [`e3c45d5`](https://github.com/tobymao/sqlglot/commit/e3c45d5ec0ae6827e4b0bcfb047aeac131379732) - presto reset session closes [#4421](https://github.com/tobymao/sqlglot/pull/4421) *(commit by [@tobymao](https://github.com/tobymao))* - [`fd81f1b`](https://github.com/tobymao/sqlglot/commit/fd81f1bfee9a566b8df8bb501828c20bd72ac481) - more presto commands *(commit by [@tobymao](https://github.com/tobymao))* - [`da48b68`](https://github.com/tobymao/sqlglot/commit/da48b68a4f1fa6a754fa2a0a789564675d59546f) - Tokenize hints as comments *(PR [#4426](https://github.com/tobymao/sqlglot/pull/4426) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4425](https://github.com/tobymao/sqlglot/issues/4425) opened by [@mkmoisen](https://github.com/mkmoisen)* - [`69d4a8c`](https://github.com/tobymao/sqlglot/commit/69d4a8ccdf5954f293acbdf61c420b72dde5b8af) - **tsql**: Map weekday to %w *(PR [#4438](https://github.com/tobymao/sqlglot/pull/4438) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4435](https://github.com/tobymao/sqlglot/issues/4435) opened by [@travispaice](https://github.com/travispaice)* - [`41d6a13`](https://github.com/tobymao/sqlglot/commit/41d6a13ccfb28fbcf772fd43ea17da3b36567e67) - add return type *(PR [#4440](https://github.com/tobymao/sqlglot/pull/4440) by [@etonlels](https://github.com/etonlels))* - [`fe35394`](https://github.com/tobymao/sqlglot/commit/fe3539464a153b1c0bf46975d6221dee48a48f02) - **optimizer**: fix datetime coercion in the canonicalize rule *(PR [#4431](https://github.com/tobymao/sqlglot/pull/4431) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4429](https://github.com/tobymao/sqlglot/issues/4429) opened by [@Ca1ypso](https://github.com/Ca1ypso)* - [`6aea9f3`](https://github.com/tobymao/sqlglot/commit/6aea9f346ef8f91467e1d5da5a3f94cf862b44fe) - Refactor NORMALIZE_FUNCTIONS flag usage *(PR [#4437](https://github.com/tobymao/sqlglot/pull/4437) by [@VaggelisD](https://github.com/VaggelisD))* ### :recycle: Refactors - [`f32a435`](https://github.com/tobymao/sqlglot/commit/f32a435205ec288f310ad57748ac66805e27f7f5) - **risingwave**: clean up SINK/SOURCE logic *(PR [#4432](https://github.com/tobymao/sqlglot/pull/4432) by [@georgesittas](https://github.com/georgesittas))* - [`b24aced`](https://github.com/tobymao/sqlglot/commit/b24aced2dbb7e471d2dd0eb830ea4f2e24f9d267) - **snowflake**: clean up [WITH] TAG property / constraint *(PR [#4439](https://github.com/tobymao/sqlglot/pull/4439) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`fddcd3d`](https://github.com/tobymao/sqlglot/commit/fddcd3dfc264a645909686c201d2288c0adf9047) - bump sqlglotrs to 0.3.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.31.4] - 2024-11-17 ### :bug: Bug Fixes - [`59b8b6d`](https://github.com/tobymao/sqlglot/commit/59b8b6d1409b4112d425cc31db45519d5936b6fa) - preserve column quoting in DISTINCT ON elimination *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.31.3] - 2024-11-17 ### :sparkles: New Features - [`835e717`](https://github.com/tobymao/sqlglot/commit/835e71795f994599dbc19f1a5969b464154926e1) - **clickhouse**: transform function support *(PR [#4408](https://github.com/tobymao/sqlglot/pull/4408) by [@GaliFFun](https://github.com/GaliFFun))* ### :bug: Bug Fixes - [`0479743`](https://github.com/tobymao/sqlglot/commit/047974393cebbddbbfb878071d159a3e538b0e4d) - **snowflake**: cast to TimeToStr arg to TIMESTAMP more conservatively *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.31.2] - 2024-11-17 ### :bug: Bug Fixes - [`d851269`](https://github.com/tobymao/sqlglot/commit/d851269780c7f0a0c756289c3dea9b1aa58d2a69) - use existing aliases in DISTINCT ON elimination, if any *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.31.1] - 2024-11-17 ### :sparkles: New Features - [`b00d857`](https://github.com/tobymao/sqlglot/commit/b00d857cd8a6d2452c2170077cbfa82352f708dd) - add support for specifying column in row_number function *(PR [#4406](https://github.com/tobymao/sqlglot/pull/4406) by [@GaliFFun](https://github.com/GaliFFun))* ### :bug: Bug Fixes - [`0e46cc7`](https://github.com/tobymao/sqlglot/commit/0e46cc7fa2d80ba4e92182b3fa5f1075a63f4754) - refactor DISTINCT ON elimination transformation *(PR [#4407](https://github.com/tobymao/sqlglot/pull/4407) by [@georgesittas](https://github.com/georgesittas))* ## [v25.31.0] - 2024-11-16 ### :boom: BREAKING CHANGES - due to [`f4abfd5`](https://github.com/tobymao/sqlglot/commit/f4abfd59b8255cf8c39bf51028ee5f6ed704927f) - Support FORMAT_TIMESTAMP *(PR [#4383](https://github.com/tobymao/sqlglot/pull/4383) by [@VaggelisD](https://github.com/VaggelisD))*: Support FORMAT_TIMESTAMP (#4383) - due to [`45eef60`](https://github.com/tobymao/sqlglot/commit/45eef600064ad024b34e32e7acc3aca409fbd9c4) - use select star when eliminating distinct on *(PR [#4401](https://github.com/tobymao/sqlglot/pull/4401) by [@agrigoroi-palantir](https://github.com/agrigoroi-palantir))*: use select star when eliminating distinct on (#4401) ### :sparkles: New Features - [`72ffdcb`](https://github.com/tobymao/sqlglot/commit/72ffdcb631bf7afdeda2ce96911442a94b7f11eb) - **bigquery**: Add parsing support for STRPOS(...) *(PR [#4378](https://github.com/tobymao/sqlglot/pull/4378) by [@VaggelisD](https://github.com/VaggelisD))* - [`e7b67e0`](https://github.com/tobymao/sqlglot/commit/e7b67e0c280179188ce1bca650735978b758dca1) - **bigquery**: Support MAKE_INTERVAL *(PR [#4384](https://github.com/tobymao/sqlglot/pull/4384) by [@VaggelisD](https://github.com/VaggelisD))* - [`37c4809`](https://github.com/tobymao/sqlglot/commit/37c4809dfda48224fd982ea8a48d3dbc5c17f9ae) - **bigquery**: Support INT64(...) *(PR [#4391](https://github.com/tobymao/sqlglot/pull/4391) by [@VaggelisD](https://github.com/VaggelisD))* - [`9694999`](https://github.com/tobymao/sqlglot/commit/96949999d394e27df8b0287a14e9ac82d52bc0f9) - Add support for CONTAINS(...) *(PR [#4399](https://github.com/tobymao/sqlglot/pull/4399) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`f4abfd5`](https://github.com/tobymao/sqlglot/commit/f4abfd59b8255cf8c39bf51028ee5f6ed704927f) - **bigquery**: Support FORMAT_TIMESTAMP *(PR [#4383](https://github.com/tobymao/sqlglot/pull/4383) by [@VaggelisD](https://github.com/VaggelisD))* - [`bb46ee3`](https://github.com/tobymao/sqlglot/commit/bb46ee33d481a888882cbbb26a9240dd2dbb10ee) - **parser**: Parse exp.Column for DROP COLUMN *(PR [#4390](https://github.com/tobymao/sqlglot/pull/4390) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4388](https://github.com/tobymao/sqlglot/issues/4388) opened by [@AhlamHani](https://github.com/AhlamHani)* - [`79f6783`](https://github.com/tobymao/sqlglot/commit/79f67830d7d3ba92bff91eeb95b4dc8bdfa6c44e) - **snowflake**: Wrap DIV0 operands if they're binary expressions *(PR [#4393](https://github.com/tobymao/sqlglot/pull/4393) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4392](https://github.com/tobymao/sqlglot/issues/4392) opened by [@diogo-fernan](https://github.com/diogo-fernan)* - [`647b98d`](https://github.com/tobymao/sqlglot/commit/647b98d84643b88a41218fb67f6a2bd83ca4c702) - **starrocks**: Add RESERVED_KEYWORDS specific to starrocks *(PR [#4402](https://github.com/tobymao/sqlglot/pull/4402) by [@notexistence](https://github.com/notexistence))* - [`45eef60`](https://github.com/tobymao/sqlglot/commit/45eef600064ad024b34e32e7acc3aca409fbd9c4) - use select star when eliminating distinct on *(PR [#4401](https://github.com/tobymao/sqlglot/pull/4401) by [@agrigoroi-palantir](https://github.com/agrigoroi-palantir))* ### :recycle: Refactors - [`a3af2af`](https://github.com/tobymao/sqlglot/commit/a3af2af3a893dfd6c6946b732aa086d1f1d91570) - attach stamement comments consistently *(PR [#4377](https://github.com/tobymao/sqlglot/pull/4377) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4376](https://github.com/tobymao/sqlglot/issues/4376) opened by [@YieldRay](https://github.com/YieldRay)* ### :wrench: Chores - [`858c5b1`](https://github.com/tobymao/sqlglot/commit/858c5b1a43f74e11b8c357986c78b5068792b3af) - improve contribution guide *(PR [#4379](https://github.com/tobymao/sqlglot/pull/4379) by [@georgesittas](https://github.com/georgesittas))* - [`160e688`](https://github.com/tobymao/sqlglot/commit/160e6883225cd6ad41a218213f73aa9f91b5fc5e) - fix relative benchmark import, comment out sqltree *(PR [#4403](https://github.com/tobymao/sqlglot/pull/4403) by [@georgesittas](https://github.com/georgesittas))* - [`8d78add`](https://github.com/tobymao/sqlglot/commit/8d78addccaaffa4ea2dcfe1de002f8a653f137b7) - bump PYO3 to v"0.22.6" *(PR [#4400](https://github.com/tobymao/sqlglot/pull/4400) by [@MartinSahlen](https://github.com/MartinSahlen))* - [`f78e755`](https://github.com/tobymao/sqlglot/commit/f78e755adaf52823642d2b0e1cae54da835ec653) - bump sqlglotrs to v0.2.14 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.30.0] - 2024-11-11 ### :boom: BREAKING CHANGES - due to [`60625ea`](https://github.com/tobymao/sqlglot/commit/60625eae34deb6a6fc36c0f3996f1281eae0ef6f) - Fix STRUCT cast generation *(PR [#4366](https://github.com/tobymao/sqlglot/pull/4366) by [@VaggelisD](https://github.com/VaggelisD))*: Fix STRUCT cast generation (#4366) ### :sparkles: New Features - [`87ab8fe`](https://github.com/tobymao/sqlglot/commit/87ab8fe9cc4d6d060d8fe8a9c3faf8c47c2c9ed6) - **spark, bigquery**: Add support for UNIX_SECONDS(...) *(PR [#4350](https://github.com/tobymao/sqlglot/pull/4350) by [@VaggelisD](https://github.com/VaggelisD))* - [`42da638`](https://github.com/tobymao/sqlglot/commit/42da63812ed489d1d8bbef0fc14c7dfa5ce57b7a) - **bigquery**: Support JSON_VALUE_ARRAY(...) *(PR [#4356](https://github.com/tobymao/sqlglot/pull/4356) by [@VaggelisD](https://github.com/VaggelisD))* - [`e337a42`](https://github.com/tobymao/sqlglot/commit/e337a42dd56f5358e617750e7a70a0d4b7eab3f9) - **bigquery**: Parse REGEXP_SUBSTR as exp.RegexpExtract *(PR [#4358](https://github.com/tobymao/sqlglot/pull/4358) by [@VaggelisD](https://github.com/VaggelisD))* - [`602dbf8`](https://github.com/tobymao/sqlglot/commit/602dbf84ce23f41fba6a87db70ecec6113044bac) - Support REGEXP_EXTRACT_ALL *(PR [#4359](https://github.com/tobymao/sqlglot/pull/4359) by [@VaggelisD](https://github.com/VaggelisD))* - [`27a44a2`](https://github.com/tobymao/sqlglot/commit/27a44a22ff78cc35e8ab7c91b94311ef93d86c5a) - improve Levenshtein expression transpilation *(PR [#4360](https://github.com/tobymao/sqlglot/pull/4360) by [@krzysztof-kwitt](https://github.com/krzysztof-kwitt))* - [`79c675a`](https://github.com/tobymao/sqlglot/commit/79c675a49fb44a6a7a97ea0de79822d8571724be) - **bigquery**: Support JSON_QUERY_ARRAY & JSON_EXTRACT_ARRAY *(PR [#4361](https://github.com/tobymao/sqlglot/pull/4361) by [@VaggelisD](https://github.com/VaggelisD))* - [`57722db`](https://github.com/tobymao/sqlglot/commit/57722db90394d9a102c0e76a3e4d32a9f72f9ff9) - optionally wrap connectors when using builders *(PR [#4369](https://github.com/tobymao/sqlglot/pull/4369) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4362](https://github.com/tobymao/sqlglot/issues/4362) opened by [@gabrielteotonio](https://github.com/gabrielteotonio)* - :arrow_lower_right: *addresses issue [#4367](https://github.com/tobymao/sqlglot/issues/4367) opened by [@gabrielteotonio](https://github.com/gabrielteotonio)* ### :bug: Bug Fixes - [`eb8e2fe`](https://github.com/tobymao/sqlglot/commit/eb8e2fe3ab3fb4b88f72843a5bd21f4a3c1d895c) - bubble up comments in qualified column refs fixes [#4353](https://github.com/tobymao/sqlglot/pull/4353) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`efcbfdb`](https://github.com/tobymao/sqlglot/commit/efcbfdb67b12853581fbfc0d4c4a450c0281849b) - **clickhouse**: Generate exp.Median as lowercase *(PR [#4355](https://github.com/tobymao/sqlglot/pull/4355) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4354](https://github.com/tobymao/sqlglot/issues/4354) opened by [@cpcloud](https://github.com/cpcloud)* - [`60625ea`](https://github.com/tobymao/sqlglot/commit/60625eae34deb6a6fc36c0f3996f1281eae0ef6f) - **duckdb**: Fix STRUCT cast generation *(PR [#4366](https://github.com/tobymao/sqlglot/pull/4366) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4365](https://github.com/tobymao/sqlglot/issues/4365) opened by [@NickCrews](https://github.com/NickCrews)* - [`a665030`](https://github.com/tobymao/sqlglot/commit/a665030323b200f3bed241bb928993b9807c4100) - safe removal while iterating expression list for multiple UNNEST expressions *(PR [#4364](https://github.com/tobymao/sqlglot/pull/4364) by [@gauravsagar483](https://github.com/gauravsagar483))* - [`a71cee4`](https://github.com/tobymao/sqlglot/commit/a71cee4b4eafad9988b945c69dc75583ae105ec7) - Transpilation of exp.ArraySize from Postgres (read) *(PR [#4370](https://github.com/tobymao/sqlglot/pull/4370) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4368](https://github.com/tobymao/sqlglot/issues/4368) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`702fe31`](https://github.com/tobymao/sqlglot/commit/702fe318dadbe6cb83676e2a23ee830774697bb0) - Remove flaky timing test *(PR [#4371](https://github.com/tobymao/sqlglot/pull/4371) by [@VaggelisD](https://github.com/VaggelisD))* - [`4d3904e`](https://github.com/tobymao/sqlglot/commit/4d3904e8906f0573f3352ad82282ea09c571daa8) - **spark**: Support DB's TIMESTAMP_DIFF *(PR [#4373](https://github.com/tobymao/sqlglot/pull/4373) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4372](https://github.com/tobymao/sqlglot/issues/4372) opened by [@nikmalviya](https://github.com/nikmalviya)* - [`060ecfc`](https://github.com/tobymao/sqlglot/commit/060ecfc75fd8a07ffbc19f34959155a0fce317b6) - don't generate comments in table_name *(PR [#4375](https://github.com/tobymao/sqlglot/pull/4375) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`e19fb62`](https://github.com/tobymao/sqlglot/commit/e19fb620dbe6e405518aee381183e4640b638aa4) - improve error handling for unnest_to_explode *(PR [#4339](https://github.com/tobymao/sqlglot/pull/4339) by [@gauravsagar483](https://github.com/gauravsagar483))* ## [v25.29.0] - 2024-11-05 ### :boom: BREAKING CHANGES - due to [`e92904e`](https://github.com/tobymao/sqlglot/commit/e92904e61ab3b14fe18d472df19311f9b014f6cc) - Transpile ANY to EXISTS *(PR [#4305](https://github.com/tobymao/sqlglot/pull/4305) by [@VaggelisD](https://github.com/VaggelisD))*: Transpile ANY to EXISTS (#4305) - due to [`23e620f`](https://github.com/tobymao/sqlglot/commit/23e620f7cd2860fbce45a5377a75ae0c8f031ce0) - Support MEDIAN() function *(PR [#4317](https://github.com/tobymao/sqlglot/pull/4317) by [@VaggelisD](https://github.com/VaggelisD))*: Support MEDIAN() function (#4317) - due to [`a093ae7`](https://github.com/tobymao/sqlglot/commit/a093ae750af8a351e54f1431deba1f2ce6843666) - always wrap value in NOT value IS ... *(PR [#4331](https://github.com/tobymao/sqlglot/pull/4331) by [@georgesittas](https://github.com/georgesittas))*: always wrap value in NOT value IS ... (#4331) - due to [`84f78aa`](https://github.com/tobymao/sqlglot/commit/84f78aafd5d7e74da407167cd394d2bff0718cfb) - parse information schema views into a single identifier *(PR [#4336](https://github.com/tobymao/sqlglot/pull/4336) by [@georgesittas](https://github.com/georgesittas))*: parse information schema views into a single identifier (#4336) ### :sparkles: New Features - [`efd9b4e`](https://github.com/tobymao/sqlglot/commit/efd9b4ed5a761a2ebfc47a1582e9d1b2eb7cb277) - **postgres**: Support JSONB_EXISTS *(PR [#4302](https://github.com/tobymao/sqlglot/pull/4302) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4299](https://github.com/tobymao/sqlglot/issues/4299) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`e92904e`](https://github.com/tobymao/sqlglot/commit/e92904e61ab3b14fe18d472df19311f9b014f6cc) - **spark**: Transpile ANY to EXISTS *(PR [#4305](https://github.com/tobymao/sqlglot/pull/4305) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4298](https://github.com/tobymao/sqlglot/issues/4298) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`2af4936`](https://github.com/tobymao/sqlglot/commit/2af4936bd9b318c695aae249324ff67bcd1292f6) - **snowflake**: Transpile BQ's TIMESTAMP() function *(PR [#4309](https://github.com/tobymao/sqlglot/pull/4309) by [@VaggelisD](https://github.com/VaggelisD))* - [`50a1c91`](https://github.com/tobymao/sqlglot/commit/50a1c919d0d46384e3bd9ba1d45c24dd07efe6d2) - **snowflake**: Transpile exp.TimestampAdd *(PR [#4320](https://github.com/tobymao/sqlglot/pull/4320) by [@VaggelisD](https://github.com/VaggelisD))* - [`01671ce`](https://github.com/tobymao/sqlglot/commit/01671ce137c9cf8d0f12dadc66e0db141f797d16) - **teradata**: add support for hexadecimal literals *(PR [#4323](https://github.com/tobymao/sqlglot/pull/4323) by [@thomascjohnson](https://github.com/thomascjohnson))* - [`23e620f`](https://github.com/tobymao/sqlglot/commit/23e620f7cd2860fbce45a5377a75ae0c8f031ce0) - Support MEDIAN() function *(PR [#4317](https://github.com/tobymao/sqlglot/pull/4317) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4315](https://github.com/tobymao/sqlglot/issues/4315) opened by [@cpcloud](https://github.com/cpcloud)* - [`9faef8d`](https://github.com/tobymao/sqlglot/commit/9faef8d1ceff91dd88db46b2c187d64f15490bf4) - **snowflake**: Transpile exp.TimestampSub *(PR [#4329](https://github.com/tobymao/sqlglot/pull/4329) by [@VaggelisD](https://github.com/VaggelisD))* - [`2d98cac`](https://github.com/tobymao/sqlglot/commit/2d98cacc723bc0c0df8ce11895983fb7cb9f5237) - **BigQuery**: Support JSON_VALUE() *(PR [#4332](https://github.com/tobymao/sqlglot/pull/4332) by [@VaggelisD](https://github.com/VaggelisD))* - [`f8fec0a`](https://github.com/tobymao/sqlglot/commit/f8fec0ab098df37c3b54d91c24e5d8ec84f7cdbe) - **snowflake**: Transpile exp.DatetimeDiff *(PR [#4334](https://github.com/tobymao/sqlglot/pull/4334) by [@VaggelisD](https://github.com/VaggelisD))* - [`16fd1ea`](https://github.com/tobymao/sqlglot/commit/16fd1ea2653a602bdc0d8b81e971fb1acadee585) - **BigQuery**: Support JSON_QUERY *(PR [#4333](https://github.com/tobymao/sqlglot/pull/4333) by [@VaggelisD](https://github.com/VaggelisD))* - [`c09b6a2`](https://github.com/tobymao/sqlglot/commit/c09b6a2a37807795ead251f4fb81a9ba144cce27) - **duckdb**: support flags for RegexpExtract *(PR [#4326](https://github.com/tobymao/sqlglot/pull/4326) by [@NickCrews](https://github.com/NickCrews))* - [`536973c`](https://github.com/tobymao/sqlglot/commit/536973cfc9d00110e388e8af1ed91d73607e07c2) - **trino**: add support for the ON OVERFLOW clause in LISTAGG *(PR [#4340](https://github.com/tobymao/sqlglot/pull/4340) by [@georgesittas](https://github.com/georgesittas))* - [`4584935`](https://github.com/tobymao/sqlglot/commit/4584935cab328eced61c62a998cc013cab5cc3e3) - **snowflake**: Transpile exp.StrToDate *(PR [#4348](https://github.com/tobymao/sqlglot/pull/4348) by [@VaggelisD](https://github.com/VaggelisD))* - [`71f4a47`](https://github.com/tobymao/sqlglot/commit/71f4a47910d5db97fa1a286891d72b5c4694d294) - **snowflake**: Transpile exp.DatetimeAdd *(PR [#4349](https://github.com/tobymao/sqlglot/pull/4349) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`551afff`](https://github.com/tobymao/sqlglot/commit/551afff58ea7bc1047775bfcd5d80b812fb3f682) - handle a Move edge case in the semantic differ *(PR [#4295](https://github.com/tobymao/sqlglot/pull/4295) by [@georgesittas](https://github.com/georgesittas))* - [`a66e721`](https://github.com/tobymao/sqlglot/commit/a66e721dcd63488f7f3b427569a2115ae044c71b) - **generator**: Add NULL FILTER on ARRAY_AGG only for columns *(PR [#4301](https://github.com/tobymao/sqlglot/pull/4301) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4300](https://github.com/tobymao/sqlglot/issues/4300) opened by [@elad-sachs](https://github.com/elad-sachs)* - [`b4ea602`](https://github.com/tobymao/sqlglot/commit/b4ea602ab17b0e8e85ddb090156c7bd2c6354de4) - **clickhouse**: improve parsing of WITH FILL ... INTERPOLATE *(PR [#4311](https://github.com/tobymao/sqlglot/pull/4311) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4310](https://github.com/tobymao/sqlglot/issues/4310) opened by [@brunorpinho](https://github.com/brunorpinho)* - [`749886b`](https://github.com/tobymao/sqlglot/commit/749886b574a5dfa03aeb78b76d9cc097aa0f3e65) - **tsql**: Generate LOG(...) for exp.Ln *(PR [#4318](https://github.com/tobymao/sqlglot/pull/4318) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4316](https://github.com/tobymao/sqlglot/issues/4316) opened by [@cpcloud](https://github.com/cpcloud)* - [`5c1b1f4`](https://github.com/tobymao/sqlglot/commit/5c1b1f43014967f6853752ba8d0899757a3efcd5) - **parser**: optionally parse a Stream expression *(PR [#4325](https://github.com/tobymao/sqlglot/pull/4325) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4324](https://github.com/tobymao/sqlglot/issues/4324) opened by [@lancewl](https://github.com/lancewl)* - [`bb49a00`](https://github.com/tobymao/sqlglot/commit/bb49a00b16487356369bbb77aff9c2ff3f9cda52) - **oracle**: Do not normalize time units for exp.DateTrunc *(PR [#4328](https://github.com/tobymao/sqlglot/pull/4328) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4321](https://github.com/tobymao/sqlglot/issues/4321) opened by [@cpcloud](https://github.com/cpcloud)* - [`a093ae7`](https://github.com/tobymao/sqlglot/commit/a093ae750af8a351e54f1431deba1f2ce6843666) - **clickhouse**: always wrap value in NOT value IS ... *(PR [#4331](https://github.com/tobymao/sqlglot/pull/4331) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4330](https://github.com/tobymao/sqlglot/issues/4330) opened by [@elchyn-cheliabiyeu](https://github.com/elchyn-cheliabiyeu)* - [`def4f1e`](https://github.com/tobymao/sqlglot/commit/def4f1e3a9eac7545dfad223a5d49cee4fb7eeb8) - Refactor exp.RegexpExtract (follow up 4326) *(PR [#4341](https://github.com/tobymao/sqlglot/pull/4341) by [@VaggelisD](https://github.com/VaggelisD))* - [`c1456d0`](https://github.com/tobymao/sqlglot/commit/c1456d07097c42a2ba2078ad30a8afe4cc89597d) - presto/trino current_time closes [#4344](https://github.com/tobymao/sqlglot/pull/4344) *(commit by [@tobymao](https://github.com/tobymao))* - [`8e16abe`](https://github.com/tobymao/sqlglot/commit/8e16abe2fed324b7ed6c718753cc623a8eb37814) - **duckdb**: we ALWAYS need to render group if params is present for RegexpExtract *(PR [#4343](https://github.com/tobymao/sqlglot/pull/4343) by [@NickCrews](https://github.com/NickCrews))* - [`1689dc7`](https://github.com/tobymao/sqlglot/commit/1689dc7adbb913fe603b5e37eba29cc10d344cd2) - **bigquery**: Parse timezone for DATE_TRUNC *(PR [#4347](https://github.com/tobymao/sqlglot/pull/4347) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4346](https://github.com/tobymao/sqlglot/issues/4346) opened by [@CYFish](https://github.com/CYFish)* - [`84f78aa`](https://github.com/tobymao/sqlglot/commit/84f78aafd5d7e74da407167cd394d2bff0718cfb) - **bigquery**: parse information schema views into a single identifier *(PR [#4336](https://github.com/tobymao/sqlglot/pull/4336) by [@georgesittas](https://github.com/georgesittas))* ## [v25.28.0] - 2024-10-25 ### :boom: BREAKING CHANGES - due to [`1691388`](https://github.com/tobymao/sqlglot/commit/16913887f5573f01eb8cd2b9336d4b37b84a449a) - Fix chained exp.SetOperation type annotation *(PR [#4274](https://github.com/tobymao/sqlglot/pull/4274) by [@VaggelisD](https://github.com/VaggelisD))*: Fix chained exp.SetOperation type annotation (#4274) - due to [`c3c1997`](https://github.com/tobymao/sqlglot/commit/c3c199714df04edfe3698594680bac06575ca285) - Add support for STRING function *(PR [#4284](https://github.com/tobymao/sqlglot/pull/4284) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for STRING function (#4284) ### :sparkles: New Features - [`379f487`](https://github.com/tobymao/sqlglot/commit/379f487080d95ef6e87cbbae8003541cde381ac0) - **bigquery**: transpile EDIT_DISTANCE, closes [#4283](https://github.com/tobymao/sqlglot/pull/4283) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c3c1997`](https://github.com/tobymao/sqlglot/commit/c3c199714df04edfe3698594680bac06575ca285) - **bigquery**: Add support for STRING function *(PR [#4284](https://github.com/tobymao/sqlglot/pull/4284) by [@VaggelisD](https://github.com/VaggelisD))* - [`1a26bff`](https://github.com/tobymao/sqlglot/commit/1a26bff619315a6e9dc3eab4dec07746b4820796) - **snowflake**: Transpile exp.SafeDivide *(PR [#4294](https://github.com/tobymao/sqlglot/pull/4294) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`ac66d2f`](https://github.com/tobymao/sqlglot/commit/ac66d2f4b94e6a984adbf3df01139b6378248158) - **clickhouse**: properly parse CREATE FUNCTION DDLs *(PR [#4282](https://github.com/tobymao/sqlglot/pull/4282) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3276](https://github.com/TobikoData/sqlmesh/issues/3276) opened by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog)* - [`1691388`](https://github.com/tobymao/sqlglot/commit/16913887f5573f01eb8cd2b9336d4b37b84a449a) - **optimizer**: Fix chained exp.SetOperation type annotation *(PR [#4274](https://github.com/tobymao/sqlglot/pull/4274) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4261](https://github.com/tobymao/sqlglot/issues/4261) opened by [@gabrielteotonio](https://github.com/gabrielteotonio)* - [`559e7bc`](https://github.com/tobymao/sqlglot/commit/559e7bc5bbc77e94dea6de0470659b3c3fa6851f) - **clickhouse**: Wrap subquery if it's LHS of IS NOT NULL *(PR [#4287](https://github.com/tobymao/sqlglot/pull/4287) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4285](https://github.com/tobymao/sqlglot/issues/4285) opened by [@EugeneTorap](https://github.com/EugeneTorap)* - [`47bc09a`](https://github.com/tobymao/sqlglot/commit/47bc09a85a3781682f5e58bfde5f453fb1a7c50b) - **sqlite**: Fix UNIQUE parsing *(PR [#4293](https://github.com/tobymao/sqlglot/pull/4293) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4291](https://github.com/tobymao/sqlglot/issues/4291) opened by [@tshu-w](https://github.com/tshu-w)* - [`ee266ef`](https://github.com/tobymao/sqlglot/commit/ee266ef8f92fe72252eea36b56e8825715644a4f) - improve support for identifier delimiter escaping *(PR [#4288](https://github.com/tobymao/sqlglot/pull/4288) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`c6ff7f1`](https://github.com/tobymao/sqlglot/commit/c6ff7f1a0b6e443d80bc0f0ad1086d5c7b13b9f4) - bump sqlglotrs to v0.2.13 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.27.0] - 2024-10-22 ### :boom: BREAKING CHANGES - due to [`4d86499`](https://github.com/tobymao/sqlglot/commit/4d8649940d02ac319f2fec372a52674488f01de5) - include the target node for Move edits *(PR [#4277](https://github.com/tobymao/sqlglot/pull/4277) by [@georgesittas](https://github.com/georgesittas))*: include the target node for Move edits (#4277) - due to [`9771965`](https://github.com/tobymao/sqlglot/commit/97719657d1b2074dabfbe54af0e1ea3acd6d4744) - Add support for TIMESTAMP_NTZ_FROM_PARTS *(PR [#4280](https://github.com/tobymao/sqlglot/pull/4280) by [@VaggelisD](https://github.com/VaggelisD))*: Add support for TIMESTAMP_NTZ_FROM_PARTS (#4280) - due to [`768adb3`](https://github.com/tobymao/sqlglot/commit/768adb3d85ed88931d761e5ecc8fb4a3a40d0dc5) - time string literals containing fractional seconds *(PR [#4269](https://github.com/tobymao/sqlglot/pull/4269) by [@treysp](https://github.com/treysp))*: time string literals containing fractional seconds (#4269) ### :sparkles: New Features - [`9771965`](https://github.com/tobymao/sqlglot/commit/97719657d1b2074dabfbe54af0e1ea3acd6d4744) - **snowflake**: Add support for TIMESTAMP_NTZ_FROM_PARTS *(PR [#4280](https://github.com/tobymao/sqlglot/pull/4280) by [@VaggelisD](https://github.com/VaggelisD))* - [`9e11654`](https://github.com/tobymao/sqlglot/commit/9e11654c6ebf7451f14d46c006070effe452519a) - **clickhouse**: add geometry types *(PR [#4278](https://github.com/tobymao/sqlglot/pull/4278) by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog))* ### :bug: Bug Fixes - [`c25a9ab`](https://github.com/tobymao/sqlglot/commit/c25a9ab577d7f0a1056e8afab680ca7801c47fff) - **tsql**: Keep CTE's attached to the query when emulating IF NOT EXISTS *(PR [#4279](https://github.com/tobymao/sqlglot/pull/4279) by [@erindru](https://github.com/erindru))* - [`768adb3`](https://github.com/tobymao/sqlglot/commit/768adb3d85ed88931d761e5ecc8fb4a3a40d0dc5) - **clickhouse**: time string literals containing fractional seconds *(PR [#4269](https://github.com/tobymao/sqlglot/pull/4269) by [@treysp](https://github.com/treysp))* ### :recycle: Refactors - [`4d86499`](https://github.com/tobymao/sqlglot/commit/4d8649940d02ac319f2fec372a52674488f01de5) - **diff**: include the target node for Move edits *(PR [#4277](https://github.com/tobymao/sqlglot/pull/4277) by [@georgesittas](https://github.com/georgesittas))* ## [v25.26.0] - 2024-10-21 ### :boom: BREAKING CHANGES - due to [`142c3e7`](https://github.com/tobymao/sqlglot/commit/142c3e75b25374ba9259f21b51cd728bbeb280ef) - Support TO_DOUBLE function *(PR [#4255](https://github.com/tobymao/sqlglot/pull/4255) by [@VaggelisD](https://github.com/VaggelisD))*: Support TO_DOUBLE function (#4255) - due to [`13d0696`](https://github.com/tobymao/sqlglot/commit/13d06966a2ca9264f35d5a58e1eaff1baa7dc66e) - Support TRY_TO_TIMESTAMP function *(PR [#4257](https://github.com/tobymao/sqlglot/pull/4257) by [@VaggelisD](https://github.com/VaggelisD))*: Support TRY_TO_TIMESTAMP function (#4257) - due to [`7fc0055`](https://github.com/tobymao/sqlglot/commit/7fc0055fb04713ba047baa5eda1ce0baf1cc79e2) - dont parse right-hand side operands of ARRAY JOIN as Tables *(PR [#4258](https://github.com/tobymao/sqlglot/pull/4258) by [@georgesittas](https://github.com/georgesittas))*: dont parse right-hand side operands of ARRAY JOIN as Tables (#4258) - due to [`222152e`](https://github.com/tobymao/sqlglot/commit/222152e32521dbc6de3384b18ab4c677239c6088) - Add type hints for optimizer rules eliminate & merge subqueries *(PR [#4267](https://github.com/tobymao/sqlglot/pull/4267) by [@VaggelisD](https://github.com/VaggelisD))*: Add type hints for optimizer rules eliminate & merge subqueries (#4267) ### :sparkles: New Features - [`6f32e53`](https://github.com/tobymao/sqlglot/commit/6f32e5348d9aeba9c5d51a892023b2e14e072119) - support non-strict qualify_columns *(PR [#4243](https://github.com/tobymao/sqlglot/pull/4243) by [@hsheth2](https://github.com/hsheth2))* - [`ed97954`](https://github.com/tobymao/sqlglot/commit/ed97954ecd7c2d7d4fe1bbf2ec0ecc000dd02b32) - **duckdb**: Transpile Spark's LATERAL VIEW EXPLODE *(PR [#4252](https://github.com/tobymao/sqlglot/pull/4252) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4247](https://github.com/tobymao/sqlglot/issues/4247) opened by [@idanyadgar-clutch](https://github.com/idanyadgar-clutch)* - [`8f5efc7`](https://github.com/tobymao/sqlglot/commit/8f5efc7bc01ba5923584cd6ef38a4d81e763ccae) - **oracle**: parse hints *(PR [#4249](https://github.com/tobymao/sqlglot/pull/4249) by [@mkmoisen](https://github.com/mkmoisen))* - [`8b7ff5e`](https://github.com/tobymao/sqlglot/commit/8b7ff5ee8713a3ba50c48addd3700927a0240cf5) - **starrocks**: support for ALTER TABLE SWAP WITH *(PR [#4256](https://github.com/tobymao/sqlglot/pull/4256) by [@mrhamburg](https://github.com/mrhamburg))* - [`1c43348`](https://github.com/tobymao/sqlglot/commit/1c433487a45379298ef27b3688723df2bd740fd1) - **trino**: Support for LISTAGG function *(PR [#4253](https://github.com/tobymao/sqlglot/pull/4253) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4250](https://github.com/tobymao/sqlglot/issues/4250) opened by [@npochhi](https://github.com/npochhi)* - [`142c3e7`](https://github.com/tobymao/sqlglot/commit/142c3e75b25374ba9259f21b51cd728bbeb280ef) - **snowflake**: Support TO_DOUBLE function *(PR [#4255](https://github.com/tobymao/sqlglot/pull/4255) by [@VaggelisD](https://github.com/VaggelisD))* - [`13d0696`](https://github.com/tobymao/sqlglot/commit/13d06966a2ca9264f35d5a58e1eaff1baa7dc66e) - **snowflake**: Support TRY_TO_TIMESTAMP function *(PR [#4257](https://github.com/tobymao/sqlglot/pull/4257) by [@VaggelisD](https://github.com/VaggelisD))* - [`04dccf3`](https://github.com/tobymao/sqlglot/commit/04dccf3cdaf1c3a0466dda113aba5439f1639ae0) - **tsql**: Support for stored procedure options *(PR [#4260](https://github.com/tobymao/sqlglot/pull/4260) by [@rsanchez-xtillion](https://github.com/rsanchez-xtillion))* - [`36f6841`](https://github.com/tobymao/sqlglot/commit/36f68416b3dd0d9ac703dd926d1f74bc43566e0d) - **bigquery**: support EDIT_DISTANCE (Levinshtein) function *(PR [#4276](https://github.com/tobymao/sqlglot/pull/4276) by [@esciara](https://github.com/esciara))* - :arrow_lower_right: *addresses issue [#4275](https://github.com/tobymao/sqlglot/issues/4275) opened by [@esciara](https://github.com/esciara)* ### :bug: Bug Fixes - [`fcc05c9`](https://github.com/tobymao/sqlglot/commit/fcc05c9daa31c7a51474ec9c72ceafd682359f90) - **bigquery**: Early expand only aliased names in GROUP BY *(PR [#4246](https://github.com/tobymao/sqlglot/pull/4246) by [@VaggelisD](https://github.com/VaggelisD))* - [`5655cfb`](https://github.com/tobymao/sqlglot/commit/5655cfba7afdf8f95dea53d5ededfde209b77c30) - add support for negative intervals in to_interval *(commit by [@georgesittas](https://github.com/georgesittas))* - [`51f4d26`](https://github.com/tobymao/sqlglot/commit/51f4d26ed8694365c61fdefd810a420fcfefdeca) - generate single argument ArrayConcat without trailing comma fixes [#4259](https://github.com/tobymao/sqlglot/pull/4259) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7fc0055`](https://github.com/tobymao/sqlglot/commit/7fc0055fb04713ba047baa5eda1ce0baf1cc79e2) - **clickhouse**: dont parse right-hand side operands of ARRAY JOIN as Tables *(PR [#4258](https://github.com/tobymao/sqlglot/pull/4258) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4254](https://github.com/tobymao/sqlglot/issues/4254) opened by [@xtess16](https://github.com/xtess16)* - [`8f49ad8`](https://github.com/tobymao/sqlglot/commit/8f49ad87fa795349183d13129110ad59387bfe11) - **clickhouse**: traverse_scope with FINAL modifier *(PR [#4263](https://github.com/tobymao/sqlglot/pull/4263) by [@pkit](https://github.com/pkit))* - :arrow_lower_right: *fixes issue [#4262](https://github.com/tobymao/sqlglot/issues/4262) opened by [@obazna](https://github.com/obazna)* - [`83167ea`](https://github.com/tobymao/sqlglot/commit/83167eaa3039195f756c7b1ad95fc9162f19b1b1) - hive dialect hierarchy has no CURRENT_TIME func *(PR [#4264](https://github.com/tobymao/sqlglot/pull/4264) by [@georgesittas](https://github.com/georgesittas))* - [`7a5c7e0`](https://github.com/tobymao/sqlglot/commit/7a5c7e036fa84eb30bcae75829f3cb94503fa99e) - **presto**: transpile BIT to BOOLEAN *(commit by [@georgesittas](https://github.com/georgesittas))* - [`48be3d8`](https://github.com/tobymao/sqlglot/commit/48be3d89b1df96c7b8d81536862f53a98e414f11) - make the semantic diffing aware of changes to non-expression leaves *(PR [#4268](https://github.com/tobymao/sqlglot/pull/4268) by [@georgesittas](https://github.com/georgesittas))* - [`4543fb3`](https://github.com/tobymao/sqlglot/commit/4543fb3cd052dfb20428f5a6254b38def9e756ee) - **optimizer**: Fix merge_subqueries.py::rename_inner_sources() *(PR [#4266](https://github.com/tobymao/sqlglot/pull/4266) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4245](https://github.com/tobymao/sqlglot/issues/4245) opened by [@daniel769](https://github.com/daniel769)* - [`222152e`](https://github.com/tobymao/sqlglot/commit/222152e32521dbc6de3384b18ab4c677239c6088) - **optimizer**: Add type hints for optimizer rules eliminate & merge subqueries *(PR [#4267](https://github.com/tobymao/sqlglot/pull/4267) by [@VaggelisD](https://github.com/VaggelisD))* ### :recycle: Refactors - [`94013a2`](https://github.com/tobymao/sqlglot/commit/94013a21ca69b90da78dc47b16cd86503736597a) - simplify _expression_only_args helper in diff module *(PR [#4251](https://github.com/tobymao/sqlglot/pull/4251) by [@georgesittas](https://github.com/georgesittas))* - [`41e2eba`](https://github.com/tobymao/sqlglot/commit/41e2eba1a01c1a5b784ad9dc6c5191f3d3bc0d74) - **Oracle**: simplify hint arg formatting *(commit by [@georgesittas](https://github.com/georgesittas))* - [`cfd692f`](https://github.com/tobymao/sqlglot/commit/cfd692ff28a59f413671aafbc8dcd61eab3558c3) - move SwapTable logic to the base Parser/Generator classes *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.25.1] - 2024-10-15 ### :bug: Bug Fixes - [`e6567ae`](https://github.com/tobymao/sqlglot/commit/e6567ae11650834874808a844a19836fbb9ee753) - small overload fix for ensure list taking None *(PR [#4248](https://github.com/tobymao/sqlglot/pull/4248) by [@benfdking](https://github.com/benfdking))* ## [v25.25.0] - 2024-10-14 ### :boom: BREAKING CHANGES - due to [`275b64b`](https://github.com/tobymao/sqlglot/commit/275b64b6a28722232a24870e443b249994220d54) - refactor set operation builders so they can work with N expressions *(PR [#4226](https://github.com/tobymao/sqlglot/pull/4226) by [@georgesittas](https://github.com/georgesittas))*: refactor set operation builders so they can work with N expressions (#4226) - due to [`aee76da`](https://github.com/tobymao/sqlglot/commit/aee76da1cadec242f7428d23999f1752cb0708ca) - Native annotations for string functions *(PR [#4231](https://github.com/tobymao/sqlglot/pull/4231) by [@VaggelisD](https://github.com/VaggelisD))*: Native annotations for string functions (#4231) - due to [`202aaa0`](https://github.com/tobymao/sqlglot/commit/202aaa0e7390142ee3ade41c28e2e77cde31f295) - Native annotations for string functions *(PR [#4234](https://github.com/tobymao/sqlglot/pull/4234) by [@VaggelisD](https://github.com/VaggelisD))*: Native annotations for string functions (#4234) - due to [`5741180`](https://github.com/tobymao/sqlglot/commit/5741180e895eaaa75a07af388d36a0d2df97b28c) - produce exp.Column for the RHS of IN *(PR [#4239](https://github.com/tobymao/sqlglot/pull/4239) by [@georgesittas](https://github.com/georgesittas))*: produce exp.Column for the RHS of IN (#4239) - due to [`4da2502`](https://github.com/tobymao/sqlglot/commit/4da25029b1c6f1425b4602f42da4fa1bcd3fccdb) - make Explode a UDTF subclass *(PR [#4242](https://github.com/tobymao/sqlglot/pull/4242) by [@georgesittas](https://github.com/georgesittas))*: make Explode a UDTF subclass (#4242) ### :sparkles: New Features - [`163e943`](https://github.com/tobymao/sqlglot/commit/163e943cdaf449599640c198f69e73d2398eb323) - **tsql**: SPLIT_PART function and conversion to PARSENAME in tsql *(PR [#4211](https://github.com/tobymao/sqlglot/pull/4211) by [@daihuynh](https://github.com/daihuynh))* - [`275b64b`](https://github.com/tobymao/sqlglot/commit/275b64b6a28722232a24870e443b249994220d54) - refactor set operation builders so they can work with N expressions *(PR [#4226](https://github.com/tobymao/sqlglot/pull/4226) by [@georgesittas](https://github.com/georgesittas))* - [`3f6ba3e`](https://github.com/tobymao/sqlglot/commit/3f6ba3e69c9ba92429d2b3b00cac33f45518aa56) - **clickhouse**: Support varlen arrays for ARRAY JOIN *(PR [#4229](https://github.com/tobymao/sqlglot/pull/4229) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4227](https://github.com/tobymao/sqlglot/issues/4227) opened by [@brunorpinho](https://github.com/brunorpinho)* - [`aee76da`](https://github.com/tobymao/sqlglot/commit/aee76da1cadec242f7428d23999f1752cb0708ca) - **bigquery**: Native annotations for string functions *(PR [#4231](https://github.com/tobymao/sqlglot/pull/4231) by [@VaggelisD](https://github.com/VaggelisD))* - [`202aaa0`](https://github.com/tobymao/sqlglot/commit/202aaa0e7390142ee3ade41c28e2e77cde31f295) - **bigquery**: Native annotations for string functions *(PR [#4234](https://github.com/tobymao/sqlglot/pull/4234) by [@VaggelisD](https://github.com/VaggelisD))* - [`eeae25e`](https://github.com/tobymao/sqlglot/commit/eeae25e03a883671f9d5e514f9bd3021fb6c0d32) - support EXPLAIN in mysql *(PR [#4235](https://github.com/tobymao/sqlglot/pull/4235) by [@xiaoyu-meng-mxy](https://github.com/xiaoyu-meng-mxy))* - [`06748d9`](https://github.com/tobymao/sqlglot/commit/06748d93ccd232528003c37fdda25ae8163f3c18) - **mysql**: add support for operation modifiers like HIGH_PRIORITY *(PR [#4238](https://github.com/tobymao/sqlglot/pull/4238) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4236](https://github.com/tobymao/sqlglot/issues/4236) opened by [@asdfsx](https://github.com/asdfsx)* ### :bug: Bug Fixes - [`dcdec95`](https://github.com/tobymao/sqlglot/commit/dcdec95f986426ae90469baca993b47ac390081b) - Make exp.Update a DML node *(PR [#4223](https://github.com/tobymao/sqlglot/pull/4223) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4221](https://github.com/tobymao/sqlglot/issues/4221) opened by [@rahul-ve](https://github.com/rahul-ve)* - [`79caf51`](https://github.com/tobymao/sqlglot/commit/79caf519987718390a086bee19fdc89f6094496c) - **clickhouse**: rename BOOLEAN type to Bool fixes [#4230](https://github.com/tobymao/sqlglot/pull/4230) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b26a3f6`](https://github.com/tobymao/sqlglot/commit/b26a3f67b7113802ba1b4b3b211431e98258dc15) - satisfy mypy *(commit by [@georgesittas](https://github.com/georgesittas))* - [`5741180`](https://github.com/tobymao/sqlglot/commit/5741180e895eaaa75a07af388d36a0d2df97b28c) - **parser**: produce exp.Column for the RHS of IN *(PR [#4239](https://github.com/tobymao/sqlglot/pull/4239) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4237](https://github.com/tobymao/sqlglot/issues/4237) opened by [@rustyconover](https://github.com/rustyconover)* - [`daa6e78`](https://github.com/tobymao/sqlglot/commit/daa6e78e4b810eff826f995aa52f9e38197f1b7e) - **optimizer**: handle subquery predicate substitution correctly in de morgan's rule *(PR [#4240](https://github.com/tobymao/sqlglot/pull/4240) by [@georgesittas](https://github.com/georgesittas))* - [`c0a8355`](https://github.com/tobymao/sqlglot/commit/c0a83556acffcd77521f69bf51503a07310f749d) - **parser**: parse a column reference for the RHS of the IN clause *(PR [#4241](https://github.com/tobymao/sqlglot/pull/4241) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`0882f03`](https://github.com/tobymao/sqlglot/commit/0882f03d526f593b2d415e85b7d7a7c113721806) - Rename exp.RenameTable to exp.AlterRename *(PR [#4224](https://github.com/tobymao/sqlglot/pull/4224) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4222](https://github.com/tobymao/sqlglot/issues/4222) opened by [@s1101010110](https://github.com/s1101010110)* - [`fd42b5c`](https://github.com/tobymao/sqlglot/commit/fd42b5cdaf9421abb11e71d82726536af09e3ae3) - Simplify PARSENAME <-> SPLIT_PART transpilation *(PR [#4225](https://github.com/tobymao/sqlglot/pull/4225) by [@VaggelisD](https://github.com/VaggelisD))* - [`4da2502`](https://github.com/tobymao/sqlglot/commit/4da25029b1c6f1425b4602f42da4fa1bcd3fccdb) - make Explode a UDTF subclass *(PR [#4242](https://github.com/tobymao/sqlglot/pull/4242) by [@georgesittas](https://github.com/georgesittas))* ## [v25.24.5] - 2024-10-08 ### :sparkles: New Features - [`22a1684`](https://github.com/tobymao/sqlglot/commit/22a16848d80a2fa6d310f99d21f7d81f90eb9440) - **bigquery**: Native annotations for more math functions *(PR [#4212](https://github.com/tobymao/sqlglot/pull/4212) by [@VaggelisD](https://github.com/VaggelisD))* - [`354cfff`](https://github.com/tobymao/sqlglot/commit/354cfff13ab30d01c6123fca74eed0669d238aa0) - add builder methods to exp.Update and add with_ arg to exp.update *(PR [#4217](https://github.com/tobymao/sqlglot/pull/4217) by [@brdbry](https://github.com/brdbry))* ### :bug: Bug Fixes - [`2c513b7`](https://github.com/tobymao/sqlglot/commit/2c513b71c7d4b1ff5c7c4e12d6c38694210b1a12) - Attach CTE comments before commas *(PR [#4218](https://github.com/tobymao/sqlglot/pull/4218) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4216](https://github.com/tobymao/sqlglot/issues/4216) opened by [@ajfriend](https://github.com/ajfriend)* ## [v25.24.4] - 2024-10-04 ### :bug: Bug Fixes - [`484df7d`](https://github.com/tobymao/sqlglot/commit/484df7d50df5cb314943e1810db18a7d7d5bb3eb) - tsql union with limit *(commit by [@tobymao](https://github.com/tobymao))* ## [v25.24.3] - 2024-10-03 ### :sparkles: New Features - [`25b18d2`](https://github.com/tobymao/sqlglot/commit/25b18d28e5ad7b3687e2848ff92a0a1fc17b06fa) - **trino**: Support JSON_QUERY *(PR [#4206](https://github.com/tobymao/sqlglot/pull/4206) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4200](https://github.com/tobymao/sqlglot/issues/4200) opened by [@Harmuth94](https://github.com/Harmuth94)* - [`5781b45`](https://github.com/tobymao/sqlglot/commit/5781b455fa3ec495b65f3f3f4a959192389bd816) - **duckdb**: Add more Postgres operators *(PR [#4199](https://github.com/tobymao/sqlglot/pull/4199) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4189](https://github.com/tobymao/sqlglot/issues/4189) opened by [@rustyconover](https://github.com/rustyconover)* - [`89c0703`](https://github.com/tobymao/sqlglot/commit/89c07039da402fb2ad77e00edb4f09079ecbb41d) - **bigquery**: Native math function annotations *(PR [#4201](https://github.com/tobymao/sqlglot/pull/4201) by [@VaggelisD](https://github.com/VaggelisD))* - [`977d9e5`](https://github.com/tobymao/sqlglot/commit/977d9e5a854b58b4469be1af6aa14a5bf5a4b8c6) - allow supplying dialect in diff, conditionally copy ASTs *(PR [#4208](https://github.com/tobymao/sqlglot/pull/4208) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4203](https://github.com/tobymao/sqlglot/issues/4203) opened by [@mkmoisen](https://github.com/mkmoisen)* ### :bug: Bug Fixes - [`332c74b`](https://github.com/tobymao/sqlglot/commit/332c74b881487cd9ce711ca3bd065a8992872098) - attach comments to subquery predicates properly, fix comment case *(PR [#4207](https://github.com/tobymao/sqlglot/pull/4207) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4205](https://github.com/tobymao/sqlglot/issues/4205) opened by [@mkmoisen](https://github.com/mkmoisen)* - [`55da21d`](https://github.com/tobymao/sqlglot/commit/55da21dd043dfcbefa3653fe168eb9cae5dc5bf5) - Unexpected row deduplication using eliminate_full_outer_join *(PR [#4178](https://github.com/tobymao/sqlglot/pull/4178) by [@liaco](https://github.com/liaco))* ## [v25.24.2] - 2024-10-02 ### :sparkles: New Features - [`c8b7c1e`](https://github.com/tobymao/sqlglot/commit/c8b7c1ef7c6070a51638af18833c649a77e735cb) - **optimizer**: Fixture file for function annotations *(PR [#4182](https://github.com/tobymao/sqlglot/pull/4182) by [@VaggelisD](https://github.com/VaggelisD))* - [`0adbbf7`](https://github.com/tobymao/sqlglot/commit/0adbbf7ad8f16700adc48c6757c07768199860d9) - **duckdb**: Parse ** and ^ operators as POW *(PR [#4193](https://github.com/tobymao/sqlglot/pull/4193) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4186](https://github.com/tobymao/sqlglot/issues/4186) opened by [@rustyconover](https://github.com/rustyconover)* - [`4949906`](https://github.com/tobymao/sqlglot/commit/4949906e9dd0c3039a161e06ddb970f37067b88f) - **duckdb**: Parse ~~~ as GLOB *(PR [#4194](https://github.com/tobymao/sqlglot/pull/4194) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4187](https://github.com/tobymao/sqlglot/issues/4187) opened by [@rustyconover](https://github.com/rustyconover)* - [`6ba2bb0`](https://github.com/tobymao/sqlglot/commit/6ba2bb03f973c30788508768c3ba716aa94b0299) - **oracle**: Add support for BULK COLLECT INTO *(PR [#4181](https://github.com/tobymao/sqlglot/pull/4181) by [@mkmoisen](https://github.com/mkmoisen))* - [`0de59ce`](https://github.com/tobymao/sqlglot/commit/0de59cebe550b33ac34a92c1ded1d3f9b8f679c4) - mark `expressions` as unsupported in Into generator *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`475f7a3`](https://github.com/tobymao/sqlglot/commit/475f7a3c639c7b8c5f3af1b2e5fcce9174be39ec) - **redshift**: Add unsupported warnings for UNNEST *(PR [#4173](https://github.com/tobymao/sqlglot/pull/4173) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4169](https://github.com/tobymao/sqlglot/issues/4169) opened by [@bjabes](https://github.com/bjabes)* - [`d38e023`](https://github.com/tobymao/sqlglot/commit/d38e023966c32b208fe5ae9843bbd716e2181521) - **spark**: Offset TRY_ELEMENT_AT by one *(PR [#4183](https://github.com/tobymao/sqlglot/pull/4183) by [@VaggelisD](https://github.com/VaggelisD))* - [`3d1c643`](https://github.com/tobymao/sqlglot/commit/3d1c6430791dcce05f1a71f17311e294d9fc9d3d) - rename SHA function to SHA1 for DuckDB *(PR [#4191](https://github.com/tobymao/sqlglot/pull/4191) by [@rustyconover](https://github.com/rustyconover))* - [`0388a51`](https://github.com/tobymao/sqlglot/commit/0388a519dba63636a9aac3e3272cdea0f0b8312d) - add support for UHUGEINT for duckdb *(PR [#4190](https://github.com/tobymao/sqlglot/pull/4190) by [@rustyconover](https://github.com/rustyconover))* - :arrow_lower_right: *fixes issue [#4184](https://github.com/tobymao/sqlglot/issues/4184) opened by [@rustyconover](https://github.com/rustyconover)* - [`9eba00d`](https://github.com/tobymao/sqlglot/commit/9eba00dca517efe7df171b09ed916af3ea5e350d) - **duckdb**: Parse ~~ as LIKE *(PR [#4195](https://github.com/tobymao/sqlglot/pull/4195) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4188](https://github.com/tobymao/sqlglot/issues/4188) opened by [@rustyconover](https://github.com/rustyconover)* - [`6a65973`](https://github.com/tobymao/sqlglot/commit/6a659736f3a176e335c68fdd07d8265c3d0421dc) - expand UPDATABLE_EXPRESSION_TYPES to account for Identifier changes *(PR [#4197](https://github.com/tobymao/sqlglot/pull/4197) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4192](https://github.com/tobymao/sqlglot/issues/4192) opened by [@mkmoisen](https://github.com/mkmoisen)* - [`a6c28c6`](https://github.com/tobymao/sqlglot/commit/a6c28c63f4e44bb62ba8df30f1407c728eb215f2) - **sqlite**: generate StrPosition as INSTR *(PR [#4198](https://github.com/tobymao/sqlglot/pull/4198) by [@pruzko](https://github.com/pruzko))* - :arrow_lower_right: *fixes issue [#4196](https://github.com/tobymao/sqlglot/issues/4196) opened by [@pruzko](https://github.com/pruzko)* - [`5a123a5`](https://github.com/tobymao/sqlglot/commit/5a123a54ecd033c0a104e33476b17d816a09caac) - **oracle**: retreat properly when parsing BULK COLLECT INTO *(commit by [@georgesittas](https://github.com/georgesittas))* - [`f935e42`](https://github.com/tobymao/sqlglot/commit/f935e42130724e032b294074f3b552f21e20bc57) - properly escape closing identifier delimiters *(PR [#4202](https://github.com/tobymao/sqlglot/pull/4202) by [@georgesittas](https://github.com/georgesittas))* ## [v25.24.1] - 2024-10-01 ### :sparkles: New Features - [`7af33a2`](https://github.com/tobymao/sqlglot/commit/7af33a2f74dd1300bcd45f1974b7fd28abe66b8e) - **spark**: Custom annotation for more string functions *(PR [#4156](https://github.com/tobymao/sqlglot/pull/4156) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`81df4e1`](https://github.com/tobymao/sqlglot/commit/81df4e104ff3d60e3c23d3ac321e719b1f0962c0) - **athena**: Case sensitivity in CTAS property names *(PR [#4171](https://github.com/tobymao/sqlglot/pull/4171) by [@erindru](https://github.com/erindru))* - [`0703152`](https://github.com/tobymao/sqlglot/commit/0703152a25afced183dc5efd5f62311a48545420) - **bigquery**: Do not generate null ordering on agg funcs *(PR [#4172](https://github.com/tobymao/sqlglot/pull/4172) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4170](https://github.com/tobymao/sqlglot/issues/4170) opened by [@yjabri](https://github.com/yjabri)* ## [v25.24.0] - 2024-09-26 ### :boom: BREAKING CHANGES - due to [`3ab6dfb`](https://github.com/tobymao/sqlglot/commit/3ab6dfb486f18d036bfac6a90d5f81b0ce7a91ea) - Generalize COLUMNS(...) APPLY *(PR [#4161](https://github.com/tobymao/sqlglot/pull/4161) by [@VaggelisD](https://github.com/VaggelisD))*: Generalize COLUMNS(...) APPLY (#4161) ### :sparkles: New Features - [`93cef30`](https://github.com/tobymao/sqlglot/commit/93cef30bc534a155bce06f35d441d20e5dd78cf6) - **postgres**: Support OVERLAY function *(PR [#4165](https://github.com/tobymao/sqlglot/pull/4165) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4159](https://github.com/tobymao/sqlglot/issues/4159) opened by [@s1101010110](https://github.com/s1101010110)* - [`0a5444d`](https://github.com/tobymao/sqlglot/commit/0a5444dc822b7c53c008bc946eb3b54ca2147f3c) - expose a flag to automatically exclude Keep diff nodes *(PR [#4168](https://github.com/tobymao/sqlglot/pull/4168) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`9c17264`](https://github.com/tobymao/sqlglot/commit/9c172643aa3f3f0ffcc2e62242b62ba9c6141925) - **hive**: Enclose exp.Split with \E *(PR [#4163](https://github.com/tobymao/sqlglot/pull/4163) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4158](https://github.com/tobymao/sqlglot/issues/4158) opened by [@cpcloud](https://github.com/cpcloud)* - [`3ab6dfb`](https://github.com/tobymao/sqlglot/commit/3ab6dfb486f18d036bfac6a90d5f81b0ce7a91ea) - **clickhouse**: Generalize COLUMNS(...) APPLY *(PR [#4161](https://github.com/tobymao/sqlglot/pull/4161) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4157](https://github.com/tobymao/sqlglot/issues/4157) opened by [@elchyn-cheliabiyeu](https://github.com/elchyn-cheliabiyeu)* ### :recycle: Refactors - [`2540e50`](https://github.com/tobymao/sqlglot/commit/2540e50d2b0df12f940c68acc574e540d19546cf) - simplify check_deploy job *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`f6d3bdd`](https://github.com/tobymao/sqlglot/commit/f6d3bdd740d0fe128d4d5dd99833a6f71c890ed3) - update supported dialect count (21 -> 23) *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.23.2] - 2024-09-25 ### :wrench: Chores - [`eca05d3`](https://github.com/tobymao/sqlglot/commit/eca05d3b08645d7a984ee65b438282b35cb41960) - tweak should_deploy_rs script to avoid marking CI as failed *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.23.1] - 2024-09-25 ### :wrench: Chores - [`349b8f8`](https://github.com/tobymao/sqlglot/commit/349b8f81ed69a3708e1afd15816b3b58e2bf8b3f) - fetch all history to allow workflow script to skip sqlglotrs deployments *(PR [#4162](https://github.com/tobymao/sqlglot/pull/4162) by [@georgesittas](https://github.com/georgesittas))* ## [v25.23.0] - 2024-09-25 ### :boom: BREAKING CHANGES - due to [`da51ea5`](https://github.com/tobymao/sqlglot/commit/da51ea5c8f405859d877a25176e8e48ef8b4b112) - refactor exp.Chr *(PR [#4081](https://github.com/tobymao/sqlglot/pull/4081) by [@georgesittas](https://github.com/georgesittas))*: refactor exp.Chr (#4081) - due to [`9c527b5`](https://github.com/tobymao/sqlglot/commit/9c527b549cc56db9d8f44579397d9f9fe1440573) - treat Nullable as an arg instead of a DataType.TYPE *(PR [#4094](https://github.com/tobymao/sqlglot/pull/4094) by [@georgesittas](https://github.com/georgesittas))*: treat Nullable as an arg instead of a DataType.TYPE (#4094) - due to [`ba015dc`](https://github.com/tobymao/sqlglot/commit/ba015dc1102a4fe0c35cbfe6e3d23dc24263c20f) - add `returning` to merge expression builder *(PR [#4125](https://github.com/tobymao/sqlglot/pull/4125) by [@max-muoto](https://github.com/max-muoto))*: add `returning` to merge expression builder (#4125) - due to [`77a514d`](https://github.com/tobymao/sqlglot/commit/77a514dd7cfa9feb847c429411809092e5578bad) - Parse VALUES & query modifiers in wrapped FROM clause *(PR [#4135](https://github.com/tobymao/sqlglot/pull/4135) by [@VaggelisD](https://github.com/VaggelisD))*: Parse VALUES & query modifiers in wrapped FROM clause (#4135) ### :sparkles: New Features - [`5771d8d`](https://github.com/tobymao/sqlglot/commit/5771d8da94aff104206f93482e7b248d725f1843) - add merge expression builder *(PR [#4084](https://github.com/tobymao/sqlglot/pull/4084) by [@max-muoto](https://github.com/max-muoto))* - [`1d52709`](https://github.com/tobymao/sqlglot/commit/1d5270915d14f3f92341d5057b88b58fff6c0d97) - **postgres**: Parse DO NOTHING and RETURNING in MERGE statement *(PR [#4087](https://github.com/tobymao/sqlglot/pull/4087) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4083](https://github.com/tobymao/sqlglot/issues/4083) opened by [@max-muoto](https://github.com/max-muoto)* - [`1615bad`](https://github.com/tobymao/sqlglot/commit/1615bad98eeddc8e67e8002c3b1fe93bd7c3b690) - Add support for UUID function *(PR [#4089](https://github.com/tobymao/sqlglot/pull/4089) by [@VaggelisD](https://github.com/VaggelisD))* - [`5733600`](https://github.com/tobymao/sqlglot/commit/57336006795d32e9253a9df4813d3029d1d32ef1) - **bigquery**: transpile UUID type to STRING *(PR [#4093](https://github.com/tobymao/sqlglot/pull/4093) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4091](https://github.com/tobymao/sqlglot/issues/4091) opened by [@gigatexal](https://github.com/gigatexal)* - [`75230f5`](https://github.com/tobymao/sqlglot/commit/75230f5970c240add1cff7349fc65fb67541fa34) - **bigquery**: add support for the MERGE ... THEN INSERT ROW syntax *(PR [#4096](https://github.com/tobymao/sqlglot/pull/4096) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4095](https://github.com/tobymao/sqlglot/issues/4095) opened by [@ericist](https://github.com/ericist)* - [`f8d4dc4`](https://github.com/tobymao/sqlglot/commit/f8d4dc4bab90cd369eef090c23b81160a7ae78fc) - **parser**: add support for ALTER INDEX closes [#4105](https://github.com/tobymao/sqlglot/pull/4105) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`28c6f27`](https://github.com/tobymao/sqlglot/commit/28c6f27291d57d85917c62b387b86a598ee3c1d6) - **duckdb**: Support *COLUMNS() function *(PR [#4106](https://github.com/tobymao/sqlglot/pull/4106) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4101](https://github.com/tobymao/sqlglot/issues/4101) opened by [@aersam](https://github.com/aersam)* - [`3cb0041`](https://github.com/tobymao/sqlglot/commit/3cb00417f45624f012e5ce8ababfe3250e813b80) - **snowflake**: Fix exp.Pivot FOR IN clause *(PR [#4109](https://github.com/tobymao/sqlglot/pull/4109) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4108](https://github.com/tobymao/sqlglot/issues/4108) opened by [@kharigardner](https://github.com/kharigardner)* - [`7ac21a0`](https://github.com/tobymao/sqlglot/commit/7ac21a07c73cdf156ae4dc6a848b9f781b265d16) - **athena**: Improve DDL query support *(PR [#4099](https://github.com/tobymao/sqlglot/pull/4099) by [@erindru](https://github.com/erindru))* - [`a34f8b6`](https://github.com/tobymao/sqlglot/commit/a34f8b6ff9b0aa8595214da75fe7cbfbc8285476) - **oracle**: support TRUNC without fmt argument fixes [#4116](https://github.com/tobymao/sqlglot/pull/4116) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bd8e050`](https://github.com/tobymao/sqlglot/commit/bd8e050e7df905082fd32e26c4ee0c6e2d36c897) - **clickhouse**: support ON CLUSTER clause in DELETE *(PR [#4119](https://github.com/tobymao/sqlglot/pull/4119) by [@treysp](https://github.com/treysp))* - [`1fac6a9`](https://github.com/tobymao/sqlglot/commit/1fac6a9f46e147a5583042d6f82deffd04cd58c9) - expose sqlglot.expressions.delete as a sqlglot module function *(PR [#4126](https://github.com/tobymao/sqlglot/pull/4126) by [@max-muoto](https://github.com/max-muoto))* - [`4506b3b`](https://github.com/tobymao/sqlglot/commit/4506b3b58fde8e8fe711df8fd0c9c245a98ca86b) - **duckdb**: add support for the UNION type *(PR [#4128](https://github.com/tobymao/sqlglot/pull/4128) by [@georgesittas](https://github.com/georgesittas))* - [`ba015dc`](https://github.com/tobymao/sqlglot/commit/ba015dc1102a4fe0c35cbfe6e3d23dc24263c20f) - add `returning` to merge expression builder *(PR [#4125](https://github.com/tobymao/sqlglot/pull/4125) by [@max-muoto](https://github.com/max-muoto))* - [`3ec96ab`](https://github.com/tobymao/sqlglot/commit/3ec96ab7318dc5fc07802d31c825a95db7f5b303) - **clickhouse**: Add support for APPLY query modifier *(PR [#4141](https://github.com/tobymao/sqlglot/pull/4141) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4139](https://github.com/tobymao/sqlglot/issues/4139) opened by [@elchyn-cheliabiyeu](https://github.com/elchyn-cheliabiyeu)* - [`04ddc54`](https://github.com/tobymao/sqlglot/commit/04ddc543159bc55e2cf8098cd96b2a5c881ebbc6) - **bigquery**: Support RANGE type *(PR [#4148](https://github.com/tobymao/sqlglot/pull/4148) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4146](https://github.com/tobymao/sqlglot/issues/4146) opened by [@plaflamme](https://github.com/plaflamme)* - [`17533ee`](https://github.com/tobymao/sqlglot/commit/17533ee6361c30731a9a14666ac952b44982b69d) - Add support for GRANT DDL *(PR [#4138](https://github.com/tobymao/sqlglot/pull/4138) by [@VaggelisD](https://github.com/VaggelisD))* - [`1a240ec`](https://github.com/tobymao/sqlglot/commit/1a240ec1cbdaf15abab8df642e189b89de239e84) - Add SUBSTR Support *(PR [#4153](https://github.com/tobymao/sqlglot/pull/4153) by [@mwc360](https://github.com/mwc360))* ### :bug: Bug Fixes - [`da51ea5`](https://github.com/tobymao/sqlglot/commit/da51ea5c8f405859d877a25176e8e48ef8b4b112) - **parser**: refactor exp.Chr *(PR [#4081](https://github.com/tobymao/sqlglot/pull/4081) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4080](https://github.com/tobymao/sqlglot/issues/4080) opened by [@EugeneTorap](https://github.com/EugeneTorap)* - [`6294f9e`](https://github.com/tobymao/sqlglot/commit/6294f9e6d08111c6088f5ed9846e7a64f7724801) - **starrocks**: generate ARRAY_FILTER for exp.ArrayFilter *(PR [#4088](https://github.com/tobymao/sqlglot/pull/4088) by [@gauravsagar483](https://github.com/gauravsagar483))* - [`1e02c02`](https://github.com/tobymao/sqlglot/commit/1e02c0221ea8445ccb5537b0a77e120c0b2c108c) - **mysql**: convert VARCHAR without size to TEXT for DDLs *(PR [#4092](https://github.com/tobymao/sqlglot/pull/4092) by [@georgesittas](https://github.com/georgesittas))* - [`cb5bcff`](https://github.com/tobymao/sqlglot/commit/cb5bcfff1f96972e75681bb2411bca8b60a4bff1) - **clickhouse**: generate formatDateTime instead of DATE_FORMAT fixes [#4098](https://github.com/tobymao/sqlglot/pull/4098) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b10255e`](https://github.com/tobymao/sqlglot/commit/b10255eb8b6b73bf5084fdf6bffd5a7fa351b1ec) - **snowflake**: Manually escape single quotes in colon operator *(PR [#4104](https://github.com/tobymao/sqlglot/pull/4104) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4090](https://github.com/tobymao/sqlglot/issues/4090) opened by [@jussihe-rec](https://github.com/jussihe-rec)* - [`67a9ad8`](https://github.com/tobymao/sqlglot/commit/67a9ad89abfce84f78dd1a34caa9dc8143233609) - Move JSON path escape to generation *(PR [#4110](https://github.com/tobymao/sqlglot/pull/4110) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4090](https://github.com/tobymao/sqlglot/issues/4090) opened by [@jussihe-rec](https://github.com/jussihe-rec)* - [`06c76f7`](https://github.com/tobymao/sqlglot/commit/06c76f7471cdb679a7a7a35064204d94841fd929) - calling interval without unit *(commit by [@tobymao](https://github.com/tobymao))* - [`66c3295`](https://github.com/tobymao/sqlglot/commit/66c32958a9e46642077813adf90079098e41c87e) - **optimizer**: Enable USING expansion with multiple joins *(PR [#4113](https://github.com/tobymao/sqlglot/pull/4113) by [@dg-hellotwin](https://github.com/dg-hellotwin))* - :arrow_lower_right: *fixes issue [#4112](https://github.com/tobymao/sqlglot/issues/4112) opened by [@dg-hellotwin](https://github.com/dg-hellotwin)* - [`21f5bcd`](https://github.com/tobymao/sqlglot/commit/21f5bcd13eb9c567c711cec5879c4d08a052b91c) - parse struct(...)[] type properly *(PR [#4123](https://github.com/tobymao/sqlglot/pull/4123) by [@georgesittas](https://github.com/georgesittas))* - [`22c456d`](https://github.com/tobymao/sqlglot/commit/22c456d032b457244b397598d4480ae22ad316bd) - Do not generate DISTINCT keyword in FILTER *(PR [#4130](https://github.com/tobymao/sqlglot/pull/4130) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4129](https://github.com/tobymao/sqlglot/issues/4129) opened by [@tekumara](https://github.com/tekumara)* - [`089b77e`](https://github.com/tobymao/sqlglot/commit/089b77ec7efcd6decbf9a7be500e73dc88ba4dec) - **athena**: DDL fixes *(PR [#4132](https://github.com/tobymao/sqlglot/pull/4132) by [@erindru](https://github.com/erindru))* - [`e6c9902`](https://github.com/tobymao/sqlglot/commit/e6c990225e2685c617dfd1594c83778036405f6b) - invalid regex *(commit by [@tobymao](https://github.com/tobymao))* - [`77a514d`](https://github.com/tobymao/sqlglot/commit/77a514dd7cfa9feb847c429411809092e5578bad) - **parser**: Parse VALUES & query modifiers in wrapped FROM clause *(PR [#4135](https://github.com/tobymao/sqlglot/pull/4135) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4133](https://github.com/tobymao/sqlglot/issues/4133) opened by [@danxmoran](https://github.com/danxmoran)* - [`8822d6c`](https://github.com/tobymao/sqlglot/commit/8822d6c1b3b62cfd76fd481db473bf8ea1c12b1a) - **parser**: handle brackets in column op json extract arrow parser *(PR [#4140](https://github.com/tobymao/sqlglot/pull/4140) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3151](https://github.com/TobikoData/sqlmesh/issues/3151) opened by [@markgraphene](https://github.com/markgraphene)* - [`be0a4a8`](https://github.com/tobymao/sqlglot/commit/be0a4a85d41bfb617360bd9a59aa9e631e4d6d6c) - **bigquery**: Consume dashed identifiers only if they're connected *(PR [#4144](https://github.com/tobymao/sqlglot/pull/4144) by [@VaggelisD](https://github.com/VaggelisD))* - [`0444819`](https://github.com/tobymao/sqlglot/commit/044481926d4b008027a2c7fb20501514ef507811) - **optimizer**: don't reorder subquery predicates in simplify *(PR [#4147](https://github.com/tobymao/sqlglot/pull/4147) by [@georgesittas](https://github.com/georgesittas))* - [`89519bb`](https://github.com/tobymao/sqlglot/commit/89519bba99fc11f17e8e00bf8e3f6dde213e99be) - **clickhouse**: make ToTableProperty appear right after the DDL name *(PR [#4151](https://github.com/tobymao/sqlglot/pull/4151) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4150](https://github.com/tobymao/sqlglot/issues/4150) opened by [@tzinyama](https://github.com/tzinyama)* ### :recycle: Refactors - [`9c527b5`](https://github.com/tobymao/sqlglot/commit/9c527b549cc56db9d8f44579397d9f9fe1440573) - treat Nullable as an arg instead of a DataType.TYPE *(PR [#4094](https://github.com/tobymao/sqlglot/pull/4094) by [@georgesittas](https://github.com/georgesittas))* - [`2961049`](https://github.com/tobymao/sqlglot/commit/296104950f2e679aea37810a48eb490e170518d3) - implement decorator to easily mark args as unsupported *(PR [#4111](https://github.com/tobymao/sqlglot/pull/4111) by [@georgesittas](https://github.com/georgesittas))* - [`7cf1d70`](https://github.com/tobymao/sqlglot/commit/7cf1d70e909ae319ff659e1455e6fcad1e8cf905) - **optimizer**: Optimize USING expansion *(PR [#4115](https://github.com/tobymao/sqlglot/pull/4115) by [@VaggelisD](https://github.com/VaggelisD))* ### :wrench: Chores - [`75e6406`](https://github.com/tobymao/sqlglot/commit/75e640672eef0ddf752ee36dbc6f904f8e06510f) - **prql**: rewrite tests to use `validate_all()` *(PR [#4097](https://github.com/tobymao/sqlglot/pull/4097) by [@JJHCool](https://github.com/JJHCool))* - [`e1f6ae3`](https://github.com/tobymao/sqlglot/commit/e1f6ae393fa2857dbbb9a14b03cbe39910207233) - **prql**: use validate_all instead of validate_identity *(commit by [@georgesittas](https://github.com/georgesittas))* - [`2dc0b86`](https://github.com/tobymao/sqlglot/commit/2dc0b8620e16b3cdf85f6bb6bea10c2527497933) - **optimizer**: rename helper function in expand_using *(PR [#4117](https://github.com/tobymao/sqlglot/pull/4117) by [@georgesittas](https://github.com/georgesittas))* - [`fd8b8ba`](https://github.com/tobymao/sqlglot/commit/fd8b8ba7dedaee5d237b080db5c4f7e83ba079e9) - create ARRAY_TYPES set under DataType *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.22.0] - 2024-09-19 ### :boom: BREAKING CHANGES - due to [`ba015dc`](https://github.com/tobymao/sqlglot/commit/ba015dc1102a4fe0c35cbfe6e3d23dc24263c20f) - add `returning` to merge expression builder *(PR [#4125](https://github.com/tobymao/sqlglot/pull/4125) by [@max-muoto](https://github.com/max-muoto))*: add `returning` to merge expression builder (#4125) ### :sparkles: New Features - [`1fac6a9`](https://github.com/tobymao/sqlglot/commit/1fac6a9f46e147a5583042d6f82deffd04cd58c9) - expose sqlglot.expressions.delete as a sqlglot module function *(PR [#4126](https://github.com/tobymao/sqlglot/pull/4126) by [@max-muoto](https://github.com/max-muoto))* - [`4506b3b`](https://github.com/tobymao/sqlglot/commit/4506b3b58fde8e8fe711df8fd0c9c245a98ca86b) - **duckdb**: add support for the UNION type *(PR [#4128](https://github.com/tobymao/sqlglot/pull/4128) by [@georgesittas](https://github.com/georgesittas))* - [`ba015dc`](https://github.com/tobymao/sqlglot/commit/ba015dc1102a4fe0c35cbfe6e3d23dc24263c20f) - add `returning` to merge expression builder *(PR [#4125](https://github.com/tobymao/sqlglot/pull/4125) by [@max-muoto](https://github.com/max-muoto))* ### :bug: Bug Fixes - [`22c456d`](https://github.com/tobymao/sqlglot/commit/22c456d032b457244b397598d4480ae22ad316bd) - Do not generate DISTINCT keyword in FILTER *(PR [#4130](https://github.com/tobymao/sqlglot/pull/4130) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4129](https://github.com/tobymao/sqlglot/issues/4129) opened by [@tekumara](https://github.com/tekumara)* - [`089b77e`](https://github.com/tobymao/sqlglot/commit/089b77ec7efcd6decbf9a7be500e73dc88ba4dec) - **athena**: DDL fixes *(PR [#4132](https://github.com/tobymao/sqlglot/pull/4132) by [@erindru](https://github.com/erindru))* - [`e6c9902`](https://github.com/tobymao/sqlglot/commit/e6c990225e2685c617dfd1594c83778036405f6b) - invalid regex *(commit by [@tobymao](https://github.com/tobymao))* ## [v25.21.3] - 2024-09-14 ### :sparkles: New Features - [`bd8e050`](https://github.com/tobymao/sqlglot/commit/bd8e050e7df905082fd32e26c4ee0c6e2d36c897) - **clickhouse**: support ON CLUSTER clause in DELETE *(PR [#4119](https://github.com/tobymao/sqlglot/pull/4119) by [@treysp](https://github.com/treysp))* ### :bug: Bug Fixes - [`21f5bcd`](https://github.com/tobymao/sqlglot/commit/21f5bcd13eb9c567c711cec5879c4d08a052b91c) - parse struct(...)[] type properly *(PR [#4123](https://github.com/tobymao/sqlglot/pull/4123) by [@georgesittas](https://github.com/georgesittas))* ## [v25.21.2] - 2024-09-13 ### :sparkles: New Features - [`a34f8b6`](https://github.com/tobymao/sqlglot/commit/a34f8b6ff9b0aa8595214da75fe7cbfbc8285476) - **oracle**: support TRUNC without fmt argument fixes [#4116](https://github.com/tobymao/sqlglot/pull/4116) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`66c3295`](https://github.com/tobymao/sqlglot/commit/66c32958a9e46642077813adf90079098e41c87e) - **optimizer**: Enable USING expansion with multiple joins *(PR [#4113](https://github.com/tobymao/sqlglot/pull/4113) by [@dg-hellotwin](https://github.com/dg-hellotwin))* - :arrow_lower_right: *fixes issue [#4112](https://github.com/tobymao/sqlglot/issues/4112) opened by [@dg-hellotwin](https://github.com/dg-hellotwin)* ### :recycle: Refactors - [`7cf1d70`](https://github.com/tobymao/sqlglot/commit/7cf1d70e909ae319ff659e1455e6fcad1e8cf905) - **optimizer**: Optimize USING expansion *(PR [#4115](https://github.com/tobymao/sqlglot/pull/4115) by [@VaggelisD](https://github.com/VaggelisD))* ### :wrench: Chores - [`2dc0b86`](https://github.com/tobymao/sqlglot/commit/2dc0b8620e16b3cdf85f6bb6bea10c2527497933) - **optimizer**: rename helper function in expand_using *(PR [#4117](https://github.com/tobymao/sqlglot/pull/4117) by [@georgesittas](https://github.com/georgesittas))* - [`fd8b8ba`](https://github.com/tobymao/sqlglot/commit/fd8b8ba7dedaee5d237b080db5c4f7e83ba079e9) - create ARRAY_TYPES set under DataType *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.21.1] - 2024-09-13 ### :bug: Bug Fixes - [`06c76f7`](https://github.com/tobymao/sqlglot/commit/06c76f7471cdb679a7a7a35064204d94841fd929) - calling interval without unit *(commit by [@tobymao](https://github.com/tobymao))* ## [v25.21.0] - 2024-09-12 ### :boom: BREAKING CHANGES - due to [`da51ea5`](https://github.com/tobymao/sqlglot/commit/da51ea5c8f405859d877a25176e8e48ef8b4b112) - refactor exp.Chr *(PR [#4081](https://github.com/tobymao/sqlglot/pull/4081) by [@georgesittas](https://github.com/georgesittas))*: refactor exp.Chr (#4081) - due to [`9c527b5`](https://github.com/tobymao/sqlglot/commit/9c527b549cc56db9d8f44579397d9f9fe1440573) - treat Nullable as an arg instead of a DataType.TYPE *(PR [#4094](https://github.com/tobymao/sqlglot/pull/4094) by [@georgesittas](https://github.com/georgesittas))*: treat Nullable as an arg instead of a DataType.TYPE (#4094) ### :sparkles: New Features - [`5771d8d`](https://github.com/tobymao/sqlglot/commit/5771d8da94aff104206f93482e7b248d725f1843) - add merge expression builder *(PR [#4084](https://github.com/tobymao/sqlglot/pull/4084) by [@max-muoto](https://github.com/max-muoto))* - [`1d52709`](https://github.com/tobymao/sqlglot/commit/1d5270915d14f3f92341d5057b88b58fff6c0d97) - **postgres**: Parse DO NOTHING and RETURNING in MERGE statement *(PR [#4087](https://github.com/tobymao/sqlglot/pull/4087) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4083](https://github.com/tobymao/sqlglot/issues/4083) opened by [@max-muoto](https://github.com/max-muoto)* - [`1615bad`](https://github.com/tobymao/sqlglot/commit/1615bad98eeddc8e67e8002c3b1fe93bd7c3b690) - Add support for UUID function *(PR [#4089](https://github.com/tobymao/sqlglot/pull/4089) by [@VaggelisD](https://github.com/VaggelisD))* - [`5733600`](https://github.com/tobymao/sqlglot/commit/57336006795d32e9253a9df4813d3029d1d32ef1) - **bigquery**: transpile UUID type to STRING *(PR [#4093](https://github.com/tobymao/sqlglot/pull/4093) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4091](https://github.com/tobymao/sqlglot/issues/4091) opened by [@gigatexal](https://github.com/gigatexal)* - [`75230f5`](https://github.com/tobymao/sqlglot/commit/75230f5970c240add1cff7349fc65fb67541fa34) - **bigquery**: add support for the MERGE ... THEN INSERT ROW syntax *(PR [#4096](https://github.com/tobymao/sqlglot/pull/4096) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4095](https://github.com/tobymao/sqlglot/issues/4095) opened by [@ericist](https://github.com/ericist)* - [`f8d4dc4`](https://github.com/tobymao/sqlglot/commit/f8d4dc4bab90cd369eef090c23b81160a7ae78fc) - **parser**: add support for ALTER INDEX closes [#4105](https://github.com/tobymao/sqlglot/pull/4105) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`28c6f27`](https://github.com/tobymao/sqlglot/commit/28c6f27291d57d85917c62b387b86a598ee3c1d6) - **duckdb**: Support *COLUMNS() function *(PR [#4106](https://github.com/tobymao/sqlglot/pull/4106) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4101](https://github.com/tobymao/sqlglot/issues/4101) opened by [@aersam](https://github.com/aersam)* - [`3cb0041`](https://github.com/tobymao/sqlglot/commit/3cb00417f45624f012e5ce8ababfe3250e813b80) - **snowflake**: Fix exp.Pivot FOR IN clause *(PR [#4109](https://github.com/tobymao/sqlglot/pull/4109) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4108](https://github.com/tobymao/sqlglot/issues/4108) opened by [@kharigardner](https://github.com/kharigardner)* - [`7ac21a0`](https://github.com/tobymao/sqlglot/commit/7ac21a07c73cdf156ae4dc6a848b9f781b265d16) - **athena**: Improve DDL query support *(PR [#4099](https://github.com/tobymao/sqlglot/pull/4099) by [@erindru](https://github.com/erindru))* ### :bug: Bug Fixes - [`da51ea5`](https://github.com/tobymao/sqlglot/commit/da51ea5c8f405859d877a25176e8e48ef8b4b112) - **parser**: refactor exp.Chr *(PR [#4081](https://github.com/tobymao/sqlglot/pull/4081) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4080](https://github.com/tobymao/sqlglot/issues/4080) opened by [@EugeneTorap](https://github.com/EugeneTorap)* - [`6294f9e`](https://github.com/tobymao/sqlglot/commit/6294f9e6d08111c6088f5ed9846e7a64f7724801) - **starrocks**: generate ARRAY_FILTER for exp.ArrayFilter *(PR [#4088](https://github.com/tobymao/sqlglot/pull/4088) by [@gauravsagar483](https://github.com/gauravsagar483))* - [`1e02c02`](https://github.com/tobymao/sqlglot/commit/1e02c0221ea8445ccb5537b0a77e120c0b2c108c) - **mysql**: convert VARCHAR without size to TEXT for DDLs *(PR [#4092](https://github.com/tobymao/sqlglot/pull/4092) by [@georgesittas](https://github.com/georgesittas))* - [`cb5bcff`](https://github.com/tobymao/sqlglot/commit/cb5bcfff1f96972e75681bb2411bca8b60a4bff1) - **clickhouse**: generate formatDateTime instead of DATE_FORMAT fixes [#4098](https://github.com/tobymao/sqlglot/pull/4098) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b10255e`](https://github.com/tobymao/sqlglot/commit/b10255eb8b6b73bf5084fdf6bffd5a7fa351b1ec) - **snowflake**: Manually escape single quotes in colon operator *(PR [#4104](https://github.com/tobymao/sqlglot/pull/4104) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4090](https://github.com/tobymao/sqlglot/issues/4090) opened by [@jussihe-rec](https://github.com/jussihe-rec)* - [`67a9ad8`](https://github.com/tobymao/sqlglot/commit/67a9ad89abfce84f78dd1a34caa9dc8143233609) - Move JSON path escape to generation *(PR [#4110](https://github.com/tobymao/sqlglot/pull/4110) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4090](https://github.com/tobymao/sqlglot/issues/4090) opened by [@jussihe-rec](https://github.com/jussihe-rec)* ### :recycle: Refactors - [`9c527b5`](https://github.com/tobymao/sqlglot/commit/9c527b549cc56db9d8f44579397d9f9fe1440573) - treat Nullable as an arg instead of a DataType.TYPE *(PR [#4094](https://github.com/tobymao/sqlglot/pull/4094) by [@georgesittas](https://github.com/georgesittas))* - [`2961049`](https://github.com/tobymao/sqlglot/commit/296104950f2e679aea37810a48eb490e170518d3) - implement decorator to easily mark args as unsupported *(PR [#4111](https://github.com/tobymao/sqlglot/pull/4111) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`75e6406`](https://github.com/tobymao/sqlglot/commit/75e640672eef0ddf752ee36dbc6f904f8e06510f) - **prql**: rewrite tests to use `validate_all()` *(PR [#4097](https://github.com/tobymao/sqlglot/pull/4097) by [@JJHCool](https://github.com/JJHCool))* - [`e1f6ae3`](https://github.com/tobymao/sqlglot/commit/e1f6ae393fa2857dbbb9a14b03cbe39910207233) - **prql**: use validate_all instead of validate_identity *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.20.1] - 2024-09-06 ### :wrench: Chores - [`8a357cc`](https://github.com/tobymao/sqlglot/commit/8a357ccbcf2301f6a8d60c237a6397bf6547de14) - bump sqlglotrs to 0.2.12 -- remove relative readme path *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.20.0] - 2024-09-06 ### :boom: BREAKING CHANGES - due to [`3e1af21`](https://github.com/tobymao/sqlglot/commit/3e1af21787ee81df5cbb5eb8b4b7f808b404c870) - Canonicalize exp.RegexpExtract group default value *(PR [#4051](https://github.com/tobymao/sqlglot/pull/4051) by [@VaggelisD](https://github.com/VaggelisD))*: Canonicalize exp.RegexpExtract group default value (#4051) - due to [`c8e2eae`](https://github.com/tobymao/sqlglot/commit/c8e2eaecad0b3b0fff725512ef571de41c5be0a1) - do not canonicalize INTERVAL values to number literals *(commit by [@VaggelisD](https://github.com/VaggelisD))*: do not canonicalize INTERVAL values to number literals ### :sparkles: New Features - [`d3ee5ea`](https://github.com/tobymao/sqlglot/commit/d3ee5ea6abd0dfb6e5216bf212e9e737c163eeb9) - **oracle**: parse TRUNC to facilitate transpilation closes [#4054](https://github.com/tobymao/sqlglot/pull/4054) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`143f176`](https://github.com/tobymao/sqlglot/commit/143f176a893f060eecc1fbf4a5b5c54d35a3acc7) - **clickhouse**: transpile oracle functions chr, lag, lead *(PR [#4053](https://github.com/tobymao/sqlglot/pull/4053) by [@sleshJdev](https://github.com/sleshJdev))* - [`d89757e`](https://github.com/tobymao/sqlglot/commit/d89757e21665913d49a3ccc19deeea86ab59820c) - **postgres**: add support for the NOT VALID clause in ALTER TABLE fixes [#4077](https://github.com/tobymao/sqlglot/pull/4077) *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`3e1af21`](https://github.com/tobymao/sqlglot/commit/3e1af21787ee81df5cbb5eb8b4b7f808b404c870) - Canonicalize exp.RegexpExtract group default value *(PR [#4051](https://github.com/tobymao/sqlglot/pull/4051) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4039](https://github.com/tobymao/sqlglot/issues/4039) opened by [@hellozepp](https://github.com/hellozepp)* - [`75cafad`](https://github.com/tobymao/sqlglot/commit/75cafad45fec02eb27f42676ddca4d1777e800f7) - **starrocks**: Move the parse distribute and duplicate to Parser *(PR [#4062](https://github.com/tobymao/sqlglot/pull/4062) by [@hellozepp](https://github.com/hellozepp))* - [`74352d5`](https://github.com/tobymao/sqlglot/commit/74352d523333e5eff464f97b49a1bcfb11ec291b) - **tsql**: use plus operator for string concat to support more systems that use tsql *(PR [#4067](https://github.com/tobymao/sqlglot/pull/4067) by [@cpcloud](https://github.com/cpcloud))* - :arrow_lower_right: *fixes issue [#4066](https://github.com/tobymao/sqlglot/issues/4066) opened by [@cpcloud](https://github.com/cpcloud)* - [`c8e2eae`](https://github.com/tobymao/sqlglot/commit/c8e2eaecad0b3b0fff725512ef571de41c5be0a1) - do not canonicalize INTERVAL values to number literals *(commit by [@VaggelisD](https://github.com/VaggelisD))* - [`532a024`](https://github.com/tobymao/sqlglot/commit/532a024e1a1dbc422e603dc0336149362c5763df) - **snowflake, bigquery**: Remove exp.Trim generation *(PR [#4070](https://github.com/tobymao/sqlglot/pull/4070) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3095](https://github.com/TobikoData/sqlmesh/issues/3095) opened by [@plaflamme](https://github.com/plaflamme)* - [`ad7e582`](https://github.com/tobymao/sqlglot/commit/ad7e582504955c5dba84566994e81873a46d1c28) - **athena**: Apply correct quoting to queries depending on type (DML or DDL) *(PR [#4073](https://github.com/tobymao/sqlglot/pull/4073) by [@erindru](https://github.com/erindru))* - [`cc5b877`](https://github.com/tobymao/sqlglot/commit/cc5b8774c469250fd403ca3379f1c2dcab9d4017) - **parser**: Wrap column constraints in _parse_column_def() *(PR [#4078](https://github.com/tobymao/sqlglot/pull/4078) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4065](https://github.com/tobymao/sqlglot/issues/4065) opened by [@ajuszczak](https://github.com/ajuszczak)* - [`4eb384a`](https://github.com/tobymao/sqlglot/commit/4eb384a799b3ad0f152893eb6217131a3a698ff1) - **clickhouse**: Remove CURRENT_TIMESTAMP from NO_PAREN_FUNCTIONS *(PR [#4079](https://github.com/tobymao/sqlglot/pull/4079) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4076](https://github.com/tobymao/sqlglot/issues/4076) opened by [@hellozepp](https://github.com/hellozepp)* ### :recycle: Refactors - [`534f882`](https://github.com/tobymao/sqlglot/commit/534f88280a895d9f7503e48eedf600628d34aa82) - **clickhouse**: clean up chr, lag, lead generation *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`b986ebe`](https://github.com/tobymao/sqlglot/commit/b986ebe99c95ce6f76a76bfd0ea79ee4ac3757f0) - fill in more details for sqlglotrs pypi page *(PR [#4071](https://github.com/tobymao/sqlglot/pull/4071) by [@georgesittas](https://github.com/georgesittas))* - [`0310926`](https://github.com/tobymao/sqlglot/commit/0310926297b18714a02873b649061b50c7080ac9) - bump sqlglotrs to 0.2.11 (update pypi details) *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.19.0] - 2024-09-03 ### :boom: BREAKING CHANGES - due to [`6da9328`](https://github.com/tobymao/sqlglot/commit/6da932889c2d60c82be118842f4edee031009f8a) - refactor SET OPERATION handling to set correct defaults *(PR [#4009](https://github.com/tobymao/sqlglot/pull/4009) by [@georgesittas](https://github.com/georgesittas))*: refactor SET OPERATION handling to set correct defaults (#4009) - due to [`4b69d18`](https://github.com/tobymao/sqlglot/commit/4b69d18e8e23c9ba2b0a886be497df9c1071f26c) - use TO_GEOGRAPHY, TO_GEOMETRY instead of casts *(PR [#4017](https://github.com/tobymao/sqlglot/pull/4017) by [@georgesittas](https://github.com/georgesittas))*: use TO_GEOGRAPHY, TO_GEOMETRY instead of casts (#4017) - due to [`0985907`](https://github.com/tobymao/sqlglot/commit/098590718104b9a6e9c0340fbe05fd89759c142b) - Add UnsupportedError to unnest_to_explode transform *(PR [#4016](https://github.com/tobymao/sqlglot/pull/4016) by [@VaggelisD](https://github.com/VaggelisD))*: Add UnsupportedError to unnest_to_explode transform (#4016) - due to [`7d63d23`](https://github.com/tobymao/sqlglot/commit/7d63d235c1f8ce7a76db3d31f11050dc65c0fef1) - Support JSON_EXISTS, refactor ON handling *(PR [#4032](https://github.com/tobymao/sqlglot/pull/4032) by [@VaggelisD](https://github.com/VaggelisD))*: Support JSON_EXISTS, refactor ON handling (#4032) ### :sparkles: New Features - [`f550ba1`](https://github.com/tobymao/sqlglot/commit/f550ba1068eaa4be45c19b4a3ea11baad48b27c1) - **presto**: support [ SECURITY { DEFINER | INVOKER } ] *(PR [#4008](https://github.com/tobymao/sqlglot/pull/4008) by [@usmanovbf](https://github.com/usmanovbf))* - [`dedd757`](https://github.com/tobymao/sqlglot/commit/dedd75790ecc2549fa7b28b3612125ca2aaeb762) - **oracle**: Parse multitable inserts *(PR [#4000](https://github.com/tobymao/sqlglot/pull/4000) by [@usefulalgorithm](https://github.com/usefulalgorithm))* - [`0985907`](https://github.com/tobymao/sqlglot/commit/098590718104b9a6e9c0340fbe05fd89759c142b) - Add UnsupportedError to unnest_to_explode transform *(PR [#4016](https://github.com/tobymao/sqlglot/pull/4016) by [@VaggelisD](https://github.com/VaggelisD))* - [`8f5fccf`](https://github.com/tobymao/sqlglot/commit/8f5fccfca8502e0fe00420662825845cc640a1cb) - **presto**: generate non-iso DayOfWeek *(commit by [@georgesittas](https://github.com/georgesittas))* - [`7d63d23`](https://github.com/tobymao/sqlglot/commit/7d63d235c1f8ce7a76db3d31f11050dc65c0fef1) - **oracle**: Support JSON_EXISTS, refactor ON handling *(PR [#4032](https://github.com/tobymao/sqlglot/pull/4032) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4026](https://github.com/tobymao/sqlglot/issues/4026) opened by [@ashishshukla19](https://github.com/ashishshukla19)* - [`85cc7ad`](https://github.com/tobymao/sqlglot/commit/85cc7ad68599dde59ffab460d49010f167cab85d) - **duckdb**: Transpile BQ's exp.ArrayToString *(PR [#4034](https://github.com/tobymao/sqlglot/pull/4034) by [@VaggelisD](https://github.com/VaggelisD))* - [`7f2c7f1`](https://github.com/tobymao/sqlglot/commit/7f2c7f17f5d79c3ea93b43cdeacdb5339955c9a8) - Support for NORMALIZE() function *(PR [#4041](https://github.com/tobymao/sqlglot/pull/4041) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#4037](https://github.com/tobymao/sqlglot/issues/4037) opened by [@jasonrosendale](https://github.com/jasonrosendale)* - [`a1b9803`](https://github.com/tobymao/sqlglot/commit/a1b980327ff94519a4cba1e0e48066c0ea51d359) - support COMPRESS column constraint wihout a value *(PR [#4045](https://github.com/tobymao/sqlglot/pull/4045) by [@thomascjohnson](https://github.com/thomascjohnson))* ### :bug: Bug Fixes - [`8583772`](https://github.com/tobymao/sqlglot/commit/85837729e746743755294727d0394534834f4c4c) - **tsql**: Use count_big instead of count *(PR [#3996](https://github.com/tobymao/sqlglot/pull/3996) by [@colin-ho](https://github.com/colin-ho))* - :arrow_lower_right: *fixes issue [#3995](https://github.com/tobymao/sqlglot/issues/3995) opened by [@colin-ho](https://github.com/colin-ho)* - [`4b7ca2b`](https://github.com/tobymao/sqlglot/commit/4b7ca2be353e7432b84384ff9cfd43f3c43438e0) - **spark**: Custom annotation for SUBSTRING() *(PR [#4004](https://github.com/tobymao/sqlglot/pull/4004) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4002](https://github.com/tobymao/sqlglot/issues/4002) opened by [@racevedoo](https://github.com/racevedoo)* - [`cb172db`](https://github.com/tobymao/sqlglot/commit/cb172dbe4d13fd3badad352ea79d2fd6e5271576) - **clickhouse**: ensure that ALL and DISTINCT are rendered for except and intersect *(PR [#4007](https://github.com/tobymao/sqlglot/pull/4007) by [@cpcloud](https://github.com/cpcloud))* - :arrow_lower_right: *fixes issue [#4005](https://github.com/tobymao/sqlglot/issues/4005) opened by [@cpcloud](https://github.com/cpcloud)* - [`829fdcb`](https://github.com/tobymao/sqlglot/commit/829fdcb1dbe52710269823bda93e3e49c02dbf63) - **starrocks**: exp.Unnest transpilation *(PR [#3999](https://github.com/tobymao/sqlglot/pull/3999) by [@hellozepp](https://github.com/hellozepp))* - :arrow_lower_right: *fixes issue [#3962](https://github.com/tobymao/sqlglot/issues/3962) opened by [@hellozepp](https://github.com/hellozepp)* - [`6da9328`](https://github.com/tobymao/sqlglot/commit/6da932889c2d60c82be118842f4edee031009f8a) - refactor SET OPERATION handling to set correct defaults *(PR [#4009](https://github.com/tobymao/sqlglot/pull/4009) by [@georgesittas](https://github.com/georgesittas))* - [`23a928e`](https://github.com/tobymao/sqlglot/commit/23a928edc1d204a13516e0db38336774962a135e) - **mysql**: Preserve roundtrip of %a, %W time formats *(PR [#4014](https://github.com/tobymao/sqlglot/pull/4014) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#4011](https://github.com/tobymao/sqlglot/issues/4011) opened by [@hellozepp](https://github.com/hellozepp)* - [`2d4483c`](https://github.com/tobymao/sqlglot/commit/2d4483c0f79c5c72438a7093c938b1f178e5d48a) - don't log warning in to_json_path conditionally *(PR [#4015](https://github.com/tobymao/sqlglot/pull/4015) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4013](https://github.com/tobymao/sqlglot/issues/4013) opened by [@hellozepp](https://github.com/hellozepp)* - [`4b69d18`](https://github.com/tobymao/sqlglot/commit/4b69d18e8e23c9ba2b0a886be497df9c1071f26c) - **snowflake**: use TO_GEOGRAPHY, TO_GEOMETRY instead of casts *(PR [#4017](https://github.com/tobymao/sqlglot/pull/4017) by [@georgesittas](https://github.com/georgesittas))* - [`1108426`](https://github.com/tobymao/sqlglot/commit/1108426a0eb23bbcaec8bed946f1dae6682bc1dd) - **optimizer**: annotate unary expressions correctly *(PR [#4019](https://github.com/tobymao/sqlglot/pull/4019) by [@georgesittas](https://github.com/georgesittas))* - [`5fad18c`](https://github.com/tobymao/sqlglot/commit/5fad18c6cb5f62630cdfa2616231436586c41d67) - **presto**: exp.DayOfWeek *(PR [#4024](https://github.com/tobymao/sqlglot/pull/4024) by [@hellozepp](https://github.com/hellozepp))* - [`ea9a494`](https://github.com/tobymao/sqlglot/commit/ea9a4948a3e1619b885fc0b1522a7382d68c9cbe) - **parser**: consume STREAM in _parse_select only if it's a VAR, closes [#4029](https://github.com/tobymao/sqlglot/pull/4029) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`c239a74`](https://github.com/tobymao/sqlglot/commit/c239a741233bf858ce686d2d32a657cbedb49699) - transpile null exclusion for ARRAY_AGG *(PR [#4033](https://github.com/tobymao/sqlglot/pull/4033) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#4031](https://github.com/tobymao/sqlglot/issues/4031) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`60a8f16`](https://github.com/tobymao/sqlglot/commit/60a8f16b5386fe334b6e15afa967ad7bdd2a83de) - **parser**: don't consume strings in match_text_seq *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a726583`](https://github.com/tobymao/sqlglot/commit/a726583995716f815946b4c81c07a916ade727b7) - **parser**: don't consume strings in match_texts *(commit by [@georgesittas](https://github.com/georgesittas))* - [`551d32f`](https://github.com/tobymao/sqlglot/commit/551d32fdebfc78506b77e4f6e6882d2a8cbd457c) - **postgres**: Support for DROP INDEX CONCURRENTLY. *(PR [#4040](https://github.com/tobymao/sqlglot/pull/4040) by [@EdgyEdgemond](https://github.com/EdgyEdgemond))* - :arrow_lower_right: *fixes issue [#3783](https://github.com/tobymao/sqlglot/issues/3783) opened by [@EdgyEdgemond](https://github.com/EdgyEdgemond)* - [`f55647d`](https://github.com/tobymao/sqlglot/commit/f55647d9d9c088880c0c16efff23ef8d22c2be44) - **starrocks**: exp.Create transpilation *(PR [#4023](https://github.com/tobymao/sqlglot/pull/4023) by [@hellozepp](https://github.com/hellozepp))* - :arrow_lower_right: *fixes issue [#3997](https://github.com/tobymao/sqlglot/issues/3997) opened by [@hellozepp](https://github.com/hellozepp)* - [`bf0f5fa`](https://github.com/tobymao/sqlglot/commit/bf0f5fa1ab44daa74102b0f16ae16f905b175fbc) - **parser**: Ensure exp.Coalesce expressions is a list *(PR [#4050](https://github.com/tobymao/sqlglot/pull/4050) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3080](https://github.com/TobikoData/sqlmesh/issues/3080) opened by [@Ziemin](https://github.com/Ziemin)* ### :recycle: Refactors - [`6494776`](https://github.com/tobymao/sqlglot/commit/6494776a45ae4975cee21f70b5f383d29530d155) - simplify multi-insert generation, fix pretty mode *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b11c73e`](https://github.com/tobymao/sqlglot/commit/b11c73e38aa495715c327f44586714e19f699c9c) - clean up starrocks DISTRIBUTED BY property generation *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`c35a62c`](https://github.com/tobymao/sqlglot/commit/c35a62cdbc29c796bf0728d3f26e5ae5474881a8) - set the license for sqlglotrs *(PR [#4048](https://github.com/tobymao/sqlglot/pull/4048) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#4047](https://github.com/tobymao/sqlglot/issues/4047) opened by [@chriscc2](https://github.com/chriscc2)* - [`9b7eb2e`](https://github.com/tobymao/sqlglot/commit/9b7eb2e40e4bec4d18664f09e01c1165122dd43f) - bump sqlglotrs to v0.2.10 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.18.0] - 2024-08-28 ### :boom: BREAKING CHANGES - due to [`22bb9a0`](https://github.com/tobymao/sqlglot/commit/22bb9a0e5c64ae344c9e25ed34200ed743e7b8f0) - stop normalizing qualified anonymous functions *(PR [#3969](https://github.com/tobymao/sqlglot/pull/3969) by [@georgesittas](https://github.com/georgesittas))*: stop normalizing qualified anonymous functions (#3969) - due to [`8aec682`](https://github.com/tobymao/sqlglot/commit/8aec68253b10dcbfe7cc5b3d6e1145ae714ca346) - mysql/tsql datetime precision, formatting, exp.AtTimeZone *(PR [#3951](https://github.com/tobymao/sqlglot/pull/3951) by [@erindru](https://github.com/erindru))*: mysql/tsql datetime precision, formatting, exp.AtTimeZone (#3951) - due to [`2f3626a`](https://github.com/tobymao/sqlglot/commit/2f3626a4fc20c46411cd91bf8beda2bdd103ca4a) - Generation of exp.SHA2, exp.Transform, exp.IgnoreNulls *(PR [#3980](https://github.com/tobymao/sqlglot/pull/3980) by [@VaggelisD](https://github.com/VaggelisD))*: Generation of exp.SHA2, exp.Transform, exp.IgnoreNulls (#3980) - due to [`905b722`](https://github.com/tobymao/sqlglot/commit/905b7226ae4a6dc505fe303bb4df3818cb586826) - preserve each distinct CUBE/ROLLUP/GROUPING SET clause *(PR [#3985](https://github.com/tobymao/sqlglot/pull/3985) by [@georgesittas](https://github.com/georgesittas))*: preserve each distinct CUBE/ROLLUP/GROUPING SET clause (#3985) ### :sparkles: New Features - [`48b214d`](https://github.com/tobymao/sqlglot/commit/48b214da7e39d36938d12059deb827d0a5f6a5a2) - **postgres**: Support for IS JSON predicate *(PR [#3971](https://github.com/tobymao/sqlglot/pull/3971) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3965](https://github.com/tobymao/sqlglot/issues/3965) opened by [@faisal-ksolves](https://github.com/faisal-ksolves)* - [`f7e4e4a`](https://github.com/tobymao/sqlglot/commit/f7e4e4adc64aaef73d23c2550a4bfa9958d4851b) - **duckdb**: add support for the GLOB table function closes [#3973](https://github.com/tobymao/sqlglot/pull/3973) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a0d1377`](https://github.com/tobymao/sqlglot/commit/a0d137787885627aae07f11a9c18a4cc133baa0a) - **spark**: add support for table statement in INSERT *(PR [#3986](https://github.com/tobymao/sqlglot/pull/3986) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3984](https://github.com/tobymao/sqlglot/issues/3984) opened by [@madeirak](https://github.com/madeirak)* - [`f5bfd67`](https://github.com/tobymao/sqlglot/commit/f5bfd67341518d0ecb1c3693e0b41ed5c1cf0596) - **mysql**: Parse JSON_VALUE() *(PR [#3987](https://github.com/tobymao/sqlglot/pull/3987) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3983](https://github.com/tobymao/sqlglot/issues/3983) opened by [@ashishshukla19](https://github.com/ashishshukla19)* - [`79e92ad`](https://github.com/tobymao/sqlglot/commit/79e92ad565c42098ff7b7921fe04e6aac7859dd8) - **spark**: Default naming of STRUCT fields *(PR [#3991](https://github.com/tobymao/sqlglot/pull/3991) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3988](https://github.com/tobymao/sqlglot/issues/3988) opened by [@dor-bernstein](https://github.com/dor-bernstein)* ### :bug: Bug Fixes - [`22bb9a0`](https://github.com/tobymao/sqlglot/commit/22bb9a0e5c64ae344c9e25ed34200ed743e7b8f0) - stop normalizing qualified anonymous functions *(PR [#3969](https://github.com/tobymao/sqlglot/pull/3969) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3967](https://github.com/tobymao/sqlglot/issues/3967) opened by [@cpcloud](https://github.com/cpcloud)* - [`8aec682`](https://github.com/tobymao/sqlglot/commit/8aec68253b10dcbfe7cc5b3d6e1145ae714ca346) - mysql/tsql datetime precision, formatting, exp.AtTimeZone *(PR [#3951](https://github.com/tobymao/sqlglot/pull/3951) by [@erindru](https://github.com/erindru))* - [`d37a5bb`](https://github.com/tobymao/sqlglot/commit/d37a5bbfcd5732aa64a24bd83dde4abcac8b0bed) - **snowflake**: handle DIV0 case where divident is null *(PR [#3975](https://github.com/tobymao/sqlglot/pull/3975) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3974](https://github.com/tobymao/sqlglot/issues/3974) opened by [@Nathan-Fenner](https://github.com/Nathan-Fenner)* - [`b2f877b`](https://github.com/tobymao/sqlglot/commit/b2f877ba5fc9ec9fdafad74196dda1631fdfc0c1) - **oracle**: Use LTRIM/RTRIM unless BOTH is specified *(PR [#3977](https://github.com/tobymao/sqlglot/pull/3977) by [@VaggelisD](https://github.com/VaggelisD))* - [`201b51a`](https://github.com/tobymao/sqlglot/commit/201b51a860d4db2b2e49e04f6534b7ad22ae287c) - **sqlite**: Make IS parser more lenient *(PR [#3981](https://github.com/tobymao/sqlglot/pull/3981) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3978](https://github.com/tobymao/sqlglot/issues/3978) opened by [@focafull](https://github.com/focafull)* - [`2f3626a`](https://github.com/tobymao/sqlglot/commit/2f3626a4fc20c46411cd91bf8beda2bdd103ca4a) - **duckdb**: Generation of exp.SHA2, exp.Transform, exp.IgnoreNulls *(PR [#3980](https://github.com/tobymao/sqlglot/pull/3980) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3972](https://github.com/tobymao/sqlglot/issues/3972) opened by [@dor-bernstein](https://github.com/dor-bernstein)* - [`905b722`](https://github.com/tobymao/sqlglot/commit/905b7226ae4a6dc505fe303bb4df3818cb586826) - **parser**: preserve each distinct CUBE/ROLLUP/GROUPING SET clause *(PR [#3985](https://github.com/tobymao/sqlglot/pull/3985) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3979](https://github.com/tobymao/sqlglot/issues/3979) opened by [@cpcloud](https://github.com/cpcloud)* - [`ee9dc39`](https://github.com/tobymao/sqlglot/commit/ee9dc399134ad86720abe480ee2565de822336cf) - Fix binding of TABLESAMPLE to exp.Subquery instead of top-level exp.Select *(PR [#3994](https://github.com/tobymao/sqlglot/pull/3994) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3992](https://github.com/tobymao/sqlglot/issues/3992) opened by [@cpcloud](https://github.com/cpcloud)* ## [v25.17.0] - 2024-08-26 ### :boom: BREAKING CHANGES - due to [`0a9ba05`](https://github.com/tobymao/sqlglot/commit/0a9ba0536235e10aed02d4ff5e571e435a00febc) - 0 is falsey *(commit by [@tobymao](https://github.com/tobymao))*: 0 is falsey ### :bug: Bug Fixes - [`42b725e`](https://github.com/tobymao/sqlglot/commit/42b725e4821a1426fe7c93f9fecbd4ec372accc9) - flaky test closes [#3961](https://github.com/tobymao/sqlglot/pull/3961) *(commit by [@tobymao](https://github.com/tobymao))* - [`cc29921`](https://github.com/tobymao/sqlglot/commit/cc299217f5d31a0406ba3c4778bb1ce581fe3f4a) - Parse LTRIM/RTRIM functions as positional exp.Trim *(PR [#3958](https://github.com/tobymao/sqlglot/pull/3958) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3957](https://github.com/tobymao/sqlglot/issues/3957) opened by [@sleshJdev](https://github.com/sleshJdev)* - [`678e692`](https://github.com/tobymao/sqlglot/commit/678e6926fdbefb16efbbcaef9cd6c5ca284af54a) - make sample an arg of table, not a wrapper *(PR [#3963](https://github.com/tobymao/sqlglot/pull/3963) by [@barakalon](https://github.com/barakalon))* - [`0a9ba05`](https://github.com/tobymao/sqlglot/commit/0a9ba0536235e10aed02d4ff5e571e435a00febc) - 0 is falsey *(commit by [@tobymao](https://github.com/tobymao))* - [`c1ac987`](https://github.com/tobymao/sqlglot/commit/c1ac9872a6f77acd52546edbc9da53e350ebf080) - **starrocks**: exp.Array generation, exp.Unnest alias *(PR [#3964](https://github.com/tobymao/sqlglot/pull/3964) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3962](https://github.com/tobymao/sqlglot/issues/3962) opened by [@hellozepp](https://github.com/hellozepp)* ## [v25.16.1] - 2024-08-23 ### :bug: Bug Fixes - [`c4e5be7`](https://github.com/tobymao/sqlglot/commit/c4e5be7d3f4d7a9075d11dc56ece02774f32e749) - include dialect when parsing inside cast *(PR [#3960](https://github.com/tobymao/sqlglot/pull/3960) by [@eakmanrq](https://github.com/eakmanrq))* ### :wrench: Chores - [`794dc4c`](https://github.com/tobymao/sqlglot/commit/794dc4cea3c4298c8986ade8e0fee88479851b34) - update readme to include onboarding doc *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.16.0] - 2024-08-22 ### :boom: BREAKING CHANGES - due to [`f68d155`](https://github.com/tobymao/sqlglot/commit/f68d155c38a79a6527685c37f8de8773ce790bca) - exp.Merge, for Trino and Postgres, dont strip the target alias from then WHEN MATCHED condition to prevent an ambiguous column error *(PR [#3940](https://github.com/tobymao/sqlglot/pull/3940) by [@erindru](https://github.com/erindru))*: exp.Merge, for Trino and Postgres, dont strip the target alias from then WHEN MATCHED condition to prevent an ambiguous column error (#3940) - due to [`667f7d9`](https://github.com/tobymao/sqlglot/commit/667f7d9e94e14ff619998d2001b6116d363f2a1f) - attach INTERPOLATE expressions to WithFill *(PR [#3944](https://github.com/tobymao/sqlglot/pull/3944) by [@georgesittas](https://github.com/georgesittas))*: attach INTERPOLATE expressions to WithFill (#3944) - due to [`145fdbf`](https://github.com/tobymao/sqlglot/commit/145fdbf6bb02fa1c55087bfd9f6b3a15fbd4b684) - Redshift date format *(PR [#3942](https://github.com/tobymao/sqlglot/pull/3942) by [@erindru](https://github.com/erindru))*: Redshift date format (#3942) - due to [`a84a21a`](https://github.com/tobymao/sqlglot/commit/a84a21aaef0e65754e67ecebdfcbf7136c77acc7) - Add timezone support to exp.TimeStrToTime *(PR [#3938](https://github.com/tobymao/sqlglot/pull/3938) by [@erindru](https://github.com/erindru))*: Add timezone support to exp.TimeStrToTime (#3938) ### :sparkles: New Features - [`a84a21a`](https://github.com/tobymao/sqlglot/commit/a84a21aaef0e65754e67ecebdfcbf7136c77acc7) - Add timezone support to exp.TimeStrToTime *(PR [#3938](https://github.com/tobymao/sqlglot/pull/3938) by [@erindru](https://github.com/erindru))* - [`70a052a`](https://github.com/tobymao/sqlglot/commit/70a052a672d0c72a3e53b19316defb01144f2907) - transpile from_iso8601_timestamp from presto/trino to duckdb *(PR [#3956](https://github.com/tobymao/sqlglot/pull/3956) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`f68d155`](https://github.com/tobymao/sqlglot/commit/f68d155c38a79a6527685c37f8de8773ce790bca) - exp.Merge, for Trino and Postgres, dont strip the target alias from then WHEN MATCHED condition to prevent an ambiguous column error *(PR [#3940](https://github.com/tobymao/sqlglot/pull/3940) by [@erindru](https://github.com/erindru))* - [`0458dc0`](https://github.com/tobymao/sqlglot/commit/0458dc0fa1978388336b9fa459b28508d7b40f9e) - **optimizer**: expand alias refs recursive CTE edge case patch *(PR [#3943](https://github.com/tobymao/sqlglot/pull/3943) by [@georgesittas](https://github.com/georgesittas))* - [`145fdbf`](https://github.com/tobymao/sqlglot/commit/145fdbf6bb02fa1c55087bfd9f6b3a15fbd4b684) - Redshift date format *(PR [#3942](https://github.com/tobymao/sqlglot/pull/3942) by [@erindru](https://github.com/erindru))* - [`6233c2c`](https://github.com/tobymao/sqlglot/commit/6233c2c75ab3a3bc0dfbf28d3fa8adc1be719281) - **parser**: Support sqls with DESCRIBE partition *(PR [#3945](https://github.com/tobymao/sqlglot/pull/3945) by [@gp1105739](https://github.com/gp1105739))* - :arrow_lower_right: *fixes issue [#3941](https://github.com/tobymao/sqlglot/issues/3941) opened by [@gp1105739](https://github.com/gp1105739)* - [`85cd6e5`](https://github.com/tobymao/sqlglot/commit/85cd6e507b73be89d2d9b2c88c7370a14b813b5c) - **bigquery**: Map %e to %-d *(PR [#3946](https://github.com/tobymao/sqlglot/pull/3946) by [@VaggelisD](https://github.com/VaggelisD))* - [`1ba0f03`](https://github.com/tobymao/sqlglot/commit/1ba0f03fbfe5dadc3411c7ff26e6dfbef852491a) - **duckdb**: TIME does not support modifiers *(PR [#3947](https://github.com/tobymao/sqlglot/pull/3947) by [@georgesittas](https://github.com/georgesittas))* - [`d5d3615`](https://github.com/tobymao/sqlglot/commit/d5d361571cd463869e2243d257f9b6ad0615c070) - **optimizer**: convert TsOrDsToDate to Cast more conservatively *(PR [#3949](https://github.com/tobymao/sqlglot/pull/3949) by [@barakalon](https://github.com/barakalon))* - [`fb6edc7`](https://github.com/tobymao/sqlglot/commit/fb6edc774539704b48e7d2805ef3211636af18aa) - oracle/snowflake comments closes [#3950](https://github.com/tobymao/sqlglot/pull/3950) *(commit by [@tobymao](https://github.com/tobymao))* - [`1284fd0`](https://github.com/tobymao/sqlglot/commit/1284fd0a64890d3548af7ed0a0cc05bb6166ccb2) - **oracle**: Revert NVL() being parsed into exp.Anonymous *(PR [#3954](https://github.com/tobymao/sqlglot/pull/3954) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3952](https://github.com/tobymao/sqlglot/issues/3952) opened by [@sleshJdev](https://github.com/sleshJdev)* - [`c99f8d5`](https://github.com/tobymao/sqlglot/commit/c99f8d5bda79f16fb0d71ae73127cc826860e104) - **duckdb**: Fix exp.Unnest generation for BQ's nested arrays *(PR [#3931](https://github.com/tobymao/sqlglot/pull/3931) by [@VaggelisD](https://github.com/VaggelisD))* ### :recycle: Refactors - [`f16b0e7`](https://github.com/tobymao/sqlglot/commit/f16b0e7203ad60f0ce50861c4d78176ca53eb2cf) - iteratively generate binary expressions *(PR [#3926](https://github.com/tobymao/sqlglot/pull/3926) by [@MatMoore](https://github.com/MatMoore))* - [`667f7d9`](https://github.com/tobymao/sqlglot/commit/667f7d9e94e14ff619998d2001b6116d363f2a1f) - **clickhouse**: attach INTERPOLATE expressions to WithFill *(PR [#3944](https://github.com/tobymao/sqlglot/pull/3944) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`c697357`](https://github.com/tobymao/sqlglot/commit/c6973572dfd953b5539bb4e9dcba402c0c3c6acf) - slightly refactor Generator.binary, add stress test *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6a5f619`](https://github.com/tobymao/sqlglot/commit/6a5f6199f6da0053fa4564e71a17e3b9f91f0496) - New doc - Onboarding Doc *(PR [#3902](https://github.com/tobymao/sqlglot/pull/3902) by [@VaggelisD](https://github.com/VaggelisD))* ## [v25.15.0] - 2024-08-19 ### :boom: BREAKING CHANGES - due to [`a668655`](https://github.com/tobymao/sqlglot/commit/a668655440815605a566c52b65b28decdfb551eb) - preserve SYSDATE *(PR [#3935](https://github.com/tobymao/sqlglot/pull/3935) by [@georgesittas](https://github.com/georgesittas))*: preserve SYSDATE (#3935) ### :sparkles: New Features - [`be11f4c`](https://github.com/tobymao/sqlglot/commit/be11f4c57c7842f69950bafc3225fb9c139af014) - **clickhouse**: add support for "@"-style parameters *(PR [#3939](https://github.com/tobymao/sqlglot/pull/3939) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`a668655`](https://github.com/tobymao/sqlglot/commit/a668655440815605a566c52b65b28decdfb551eb) - **oracle**: preserve SYSDATE *(PR [#3935](https://github.com/tobymao/sqlglot/pull/3935) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3934](https://github.com/tobymao/sqlglot/issues/3934) opened by [@Hal-H2Apps](https://github.com/Hal-H2Apps)* - [`b824f8a`](https://github.com/tobymao/sqlglot/commit/b824f8a4148ace01750db301daf4a663dc03b580) - **parser**: allow complex expressions for UNPIVOT alias *(PR [#3937](https://github.com/tobymao/sqlglot/pull/3937) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3936](https://github.com/tobymao/sqlglot/issues/3936) opened by [@dbittenbender](https://github.com/dbittenbender)* ### :recycle: Refactors - [`f4c34d3`](https://github.com/tobymao/sqlglot/commit/f4c34d37c5773c37a13437c7e0e7eb27b4e98877) - move "MINUS": TokenType.EXCEPT to hive instead of spark *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.14.0] - 2024-08-19 ### :boom: BREAKING CHANGES - due to [`605f1b2`](https://github.com/tobymao/sqlglot/commit/605f1b217d5d1de654cfe2fa1b51435a1a71ae62) - use creatable kind mapping dict for schema<-->database substitution *(PR [#3924](https://github.com/tobymao/sqlglot/pull/3924) by [@treysp](https://github.com/treysp))*: use creatable kind mapping dict for schema<-->database substitution (#3924) - due to [`f418caa`](https://github.com/tobymao/sqlglot/commit/f418caafa8ed317f9e360c6c8f01bdac596258e5) - skip nullable comparison in is_type by default *(PR [#3927](https://github.com/tobymao/sqlglot/pull/3927) by [@georgesittas](https://github.com/georgesittas))*: skip nullable comparison in is_type by default (#3927) ### :sparkles: New Features - [`f418caa`](https://github.com/tobymao/sqlglot/commit/f418caafa8ed317f9e360c6c8f01bdac596258e5) - skip nullable comparison in is_type by default *(PR [#3927](https://github.com/tobymao/sqlglot/pull/3927) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`605f1b2`](https://github.com/tobymao/sqlglot/commit/605f1b217d5d1de654cfe2fa1b51435a1a71ae62) - **clickhouse**: use creatable kind mapping dict for schema<-->database substitution *(PR [#3924](https://github.com/tobymao/sqlglot/pull/3924) by [@treysp](https://github.com/treysp))* ## [v25.13.0] - 2024-08-17 ### :boom: BREAKING CHANGES - due to [`102f5d4`](https://github.com/tobymao/sqlglot/commit/102f5d48279ac1a7a1851737f55a13bd08512f3d) - infer set op types more accurately *(PR [#3918](https://github.com/tobymao/sqlglot/pull/3918) by [@georgesittas](https://github.com/georgesittas))*: infer set op types more accurately (#3918) - due to [`46496a6`](https://github.com/tobymao/sqlglot/commit/46496a6af80bd49d36ef8d265800679d2b07c4db) - improve transpilation of nullable/non-nullable data types *(PR [#3921](https://github.com/tobymao/sqlglot/pull/3921) by [@georgesittas](https://github.com/georgesittas))*: improve transpilation of nullable/non-nullable data types (#3921) ### :bug: Bug Fixes - [`c74a8fd`](https://github.com/tobymao/sqlglot/commit/c74a8fd2acd859f5947f27a8f091f13fba1d39e4) - **clickhouse**: make try_cast toXXXOrNull() functions case-specific *(PR [#3917](https://github.com/tobymao/sqlglot/pull/3917) by [@treysp](https://github.com/treysp))* - [`102f5d4`](https://github.com/tobymao/sqlglot/commit/102f5d48279ac1a7a1851737f55a13bd08512f3d) - **optimizer**: infer set op types more accurately *(PR [#3918](https://github.com/tobymao/sqlglot/pull/3918) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3916](https://github.com/tobymao/sqlglot/issues/3916) opened by [@racevedoo](https://github.com/racevedoo)* ### :recycle: Refactors - [`1d436d4`](https://github.com/tobymao/sqlglot/commit/1d436d45b4469bb8195dd3597319b6fc5c3f2344) - **clickhouse**: transpile TRY_CAST(x AS T) to CAST(x AS Nullable(T)) *(PR [#3919](https://github.com/tobymao/sqlglot/pull/3919) by [@georgesittas](https://github.com/georgesittas))* - [`46496a6`](https://github.com/tobymao/sqlglot/commit/46496a6af80bd49d36ef8d265800679d2b07c4db) - **clickhouse**: improve transpilation of nullable/non-nullable data types *(PR [#3921](https://github.com/tobymao/sqlglot/pull/3921) by [@georgesittas](https://github.com/georgesittas))* ## [v25.12.0] - 2024-08-15 ### :boom: BREAKING CHANGES - due to [`e8e70f3`](https://github.com/tobymao/sqlglot/commit/e8e70f3a6cc2ca24de2afe622bbcbccb1ac8aeb3) - treat DATABASE kind as SCHEMA (and conversely) in exp.Create *(PR [#3912](https://github.com/tobymao/sqlglot/pull/3912) by [@georgesittas](https://github.com/georgesittas))*: treat DATABASE kind as SCHEMA (and conversely) in exp.Create (#3912) ### :sparkles: New Features - [`9a66903`](https://github.com/tobymao/sqlglot/commit/9a66903975f16a09d84337a8405bf70945706412) - **clickhouse**: add support for TryCast generation *(PR [#3913](https://github.com/tobymao/sqlglot/pull/3913) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`7965cac`](https://github.com/tobymao/sqlglot/commit/7965cace1d9632c865cae257781072b0932b709d) - **clickhouse**: wrap query in CTAS when COMMENT prop is present *(PR [#3911](https://github.com/tobymao/sqlglot/pull/3911) by [@georgesittas](https://github.com/georgesittas))* - [`e8e70f3`](https://github.com/tobymao/sqlglot/commit/e8e70f3a6cc2ca24de2afe622bbcbccb1ac8aeb3) - **clickhouse**: treat DATABASE kind as SCHEMA (and conversely) in exp.Create *(PR [#3912](https://github.com/tobymao/sqlglot/pull/3912) by [@georgesittas](https://github.com/georgesittas))* ## [v25.11.3] - 2024-08-14 ### :bug: Bug Fixes - [`57f7aa9`](https://github.com/tobymao/sqlglot/commit/57f7aa9108ed38c0e83ef5bf4fac900434fac777) - **clickhouse**: COMMENT property in CTAS needs to come last *(PR [#3910](https://github.com/tobymao/sqlglot/pull/3910) by [@georgesittas](https://github.com/georgesittas))* ## [v25.11.2] - 2024-08-14 ### :bug: Bug Fixes - [`c22f411`](https://github.com/tobymao/sqlglot/commit/c22f41129985ecfd3b3906b9594ca1692b91708c) - **clickhouse**: ensure we generate the Table in creatable_sql if it represents a db ref *(commit by [@georgesittas](https://github.com/georgesittas))* - [`19eee93`](https://github.com/tobymao/sqlglot/commit/19eee93c8027e6c612611d3b54980e193e0b6f49) - various fixups for unnest(generatedatearray) transpilation *(PR [#3906](https://github.com/tobymao/sqlglot/pull/3906) by [@georgesittas](https://github.com/georgesittas))* ## [v25.11.1] - 2024-08-13 ### :sparkles: New Features - [`790c1b1`](https://github.com/tobymao/sqlglot/commit/790c1b141d4bc2206df017c70416b589932886a4) - **clickhouse**: support PARTITION BY, SETTINGS in Insert expression *(PR [#3904](https://github.com/tobymao/sqlglot/pull/3904) by [@georgesittas](https://github.com/georgesittas))* ## [v25.11.0] - 2024-08-13 ### :boom: BREAKING CHANGES - due to [`0428c37`](https://github.com/tobymao/sqlglot/commit/0428c37e11f42be8eba352e69c1d2e7425824d38) - Support ALTER VIEW AS SELECT *(PR [#3873](https://github.com/tobymao/sqlglot/pull/3873) by [@xiaohui-sun](https://github.com/xiaohui-sun))*: Support ALTER VIEW AS SELECT (#3873) - due to [`a666117`](https://github.com/tobymao/sqlglot/commit/a666117dcb887031f5995c50d687405b9c145fbd) - parse v NOT IN (subquery) as v <> ALL (subquery) *(PR [#3891](https://github.com/tobymao/sqlglot/pull/3891) by [@georgesittas](https://github.com/georgesittas))*: parse v NOT IN (subquery) as v <> ALL (subquery) (#3891) - due to [`d968932`](https://github.com/tobymao/sqlglot/commit/d968932ef742e97ccf3ec6cdca0bc3319830f0a9) - treat identifiers as case-sensitive, handle EMPTY table property, generate DateStrToDate *(PR [#3895](https://github.com/tobymao/sqlglot/pull/3895) by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog))*: treat identifiers as case-sensitive, handle EMPTY table property, generate DateStrToDate (#3895) - due to [`1d7319a`](https://github.com/tobymao/sqlglot/commit/1d7319a8425aace6c11f59552fdd19bdbf5efd03) - transpile Unnest(GenerateDateArray(...)) to various dialects *(PR [#3899](https://github.com/tobymao/sqlglot/pull/3899) by [@georgesittas](https://github.com/georgesittas))*: transpile Unnest(GenerateDateArray(...)) to various dialects (#3899) ### :sparkles: New Features - [`0428c37`](https://github.com/tobymao/sqlglot/commit/0428c37e11f42be8eba352e69c1d2e7425824d38) - **parser**: Support ALTER VIEW AS SELECT *(PR [#3873](https://github.com/tobymao/sqlglot/pull/3873) by [@xiaohui-sun](https://github.com/xiaohui-sun))* - [`8a48458`](https://github.com/tobymao/sqlglot/commit/8a48458e20e6d0833638e750565da138bdcd5d55) - **athena**: parse UNLOAD into exp.Command closes [#3896](https://github.com/tobymao/sqlglot/pull/3896) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6f1527f`](https://github.com/tobymao/sqlglot/commit/6f1527fd3ebd16f49edb351f050a1db687824530) - **bigquery**: transpile format_datetime, datetime_trunc to duckdb *(PR [#3894](https://github.com/tobymao/sqlglot/pull/3894) by [@skadel](https://github.com/skadel))* - [`1d7319a`](https://github.com/tobymao/sqlglot/commit/1d7319a8425aace6c11f59552fdd19bdbf5efd03) - transpile Unnest(GenerateDateArray(...)) to various dialects *(PR [#3899](https://github.com/tobymao/sqlglot/pull/3899) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`2cac14f`](https://github.com/tobymao/sqlglot/commit/2cac14f480dcaf458b1eb36b694770ce24f56e61) - generate set ops in ALTER VIEW AS statement *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a666117`](https://github.com/tobymao/sqlglot/commit/a666117dcb887031f5995c50d687405b9c145fbd) - **snowflake**: parse v NOT IN (subquery) as v <> ALL (subquery) *(PR [#3891](https://github.com/tobymao/sqlglot/pull/3891) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3890](https://github.com/tobymao/sqlglot/issues/3890) opened by [@ajuszczak](https://github.com/ajuszczak)* - [`924a4af`](https://github.com/tobymao/sqlglot/commit/924a4af146952e84688fdccb7b63883fcd7fb255) - **oracle**: preserve function-style MOD syntax fixes [#3897](https://github.com/tobymao/sqlglot/pull/3897) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d968932`](https://github.com/tobymao/sqlglot/commit/d968932ef742e97ccf3ec6cdca0bc3319830f0a9) - **clickhouse**: treat identifiers as case-sensitive, handle EMPTY table property, generate DateStrToDate *(PR [#3895](https://github.com/tobymao/sqlglot/pull/3895) by [@jwhitaker-gridcog](https://github.com/jwhitaker-gridcog))* - [`3e5e730`](https://github.com/tobymao/sqlglot/commit/3e5e7300ec184024f871669db48d68476b3fa4df) - **clickhouse**: generate exp.Values correctly, handle `FORMAT Values` *(PR [#3900](https://github.com/tobymao/sqlglot/pull/3900) by [@georgesittas](https://github.com/georgesittas))* - [`bea3c08`](https://github.com/tobymao/sqlglot/commit/bea3c08e46a020d8545b702c77f0db18c99f1c55) - **parser**: improve performance of OUTER/CROSS APPLY parsing *(PR [#3901](https://github.com/tobymao/sqlglot/pull/3901) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3898](https://github.com/tobymao/sqlglot/issues/3898) opened by [@ewhitley](https://github.com/ewhitley)* ## [v25.10.0] - 2024-08-08 ### :boom: BREAKING CHANGES - due to [`3eb46db`](https://github.com/tobymao/sqlglot/commit/3eb46db5c429f50b5bb6c0c5517a5f7c1084b5ea) - switch off CSV file schema inference by default *(PR [#3879](https://github.com/tobymao/sqlglot/pull/3879) by [@georgesittas](https://github.com/georgesittas))*: switch off CSV file schema inference by default (#3879) ### :sparkles: New Features - [`3e4fcf7`](https://github.com/tobymao/sqlglot/commit/3e4fcf7e8f6a322c14470de6c5dbba152bc9b2fe) - **databricks**: Add support for STREAMING tables *(PR [#3878](https://github.com/tobymao/sqlglot/pull/3878) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3876](https://github.com/tobymao/sqlglot/issues/3876) opened by [@ericvergnaud](https://github.com/ericvergnaud)* - [`528f690`](https://github.com/tobymao/sqlglot/commit/528f6908001db2f132edfa3c61c21815f7e9dc2f) - **duckdb**: Transpile Snowflake's CONVERT_TIMEZONE 3-arg version *(PR [#3883](https://github.com/tobymao/sqlglot/pull/3883) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3875](https://github.com/tobymao/sqlglot/issues/3875) opened by [@milonimrod](https://github.com/milonimrod)* - [`411f62a`](https://github.com/tobymao/sqlglot/commit/411f62ad27f8cbe0d9a429e0cafdf4bd9eb2749f) - **bigquery**: Support for GENERATE_TIMESTAMP_ARRAY, DDB transpilation *(PR [#3888](https://github.com/tobymao/sqlglot/pull/3888) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`7169e6e`](https://github.com/tobymao/sqlglot/commit/7169e6ef52d24754059b9ee4324398d22ddff0da) - **bigquery**: ensure Funcs are preserved when used as Tables *(PR [#3877](https://github.com/tobymao/sqlglot/pull/3877) by [@georgesittas](https://github.com/georgesittas))* - [`62ceed2`](https://github.com/tobymao/sqlglot/commit/62ceed2fa3cd7b41919839d837b860f3814fa769) - **redshift**: parse first arg in DATE_PART into a Var fixes [#3882](https://github.com/tobymao/sqlglot/pull/3882) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`2ad9bfe`](https://github.com/tobymao/sqlglot/commit/2ad9bfef71ae707b83f604f16b47aa583d082c3b) - **snowflake**: support table qualification in USING clause *(PR [#3885](https://github.com/tobymao/sqlglot/pull/3885) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3881](https://github.com/tobymao/sqlglot/issues/3881) opened by [@dlahyani](https://github.com/dlahyani)* - [`ef16b1d`](https://github.com/tobymao/sqlglot/commit/ef16b1da6b43647a0ca08d69eaf3610e3b72671f) - Fix COLLATE's RHS parsing *(PR [#3887](https://github.com/tobymao/sqlglot/pull/3887) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3880](https://github.com/tobymao/sqlglot/issues/3880) opened by [@ewhitley](https://github.com/ewhitley)* ### :recycle: Refactors - [`3eb46db`](https://github.com/tobymao/sqlglot/commit/3eb46db5c429f50b5bb6c0c5517a5f7c1084b5ea) - **optimizer**: switch off CSV file schema inference by default *(PR [#3879](https://github.com/tobymao/sqlglot/pull/3879) by [@georgesittas](https://github.com/georgesittas))* ## [v25.9.0] - 2024-08-05 ### :boom: BREAKING CHANGES - due to [`64e187c`](https://github.com/tobymao/sqlglot/commit/64e187c52cd9725ba79e6afbd444382eba9e5827) - transpile postgres impliclitly exploding GENERATE_SERIES proje… *(PR [#3853](https://github.com/tobymao/sqlglot/pull/3853) by [@georgesittas](https://github.com/georgesittas))*: transpile postgres impliclitly exploding GENERATE_SERIES proje… (#3853) - due to [`e53e7cc`](https://github.com/tobymao/sqlglot/commit/e53e7cc02a224563d0a61b0a39298d606b9bac80) - Generation of exp.ArrayConcat for 2-arg based dialects *(PR [#3864](https://github.com/tobymao/sqlglot/pull/3864) by [@VaggelisD](https://github.com/VaggelisD))*: Generation of exp.ArrayConcat for 2-arg based dialects (#3864) - due to [`659b8bf`](https://github.com/tobymao/sqlglot/commit/659b8bf12e396856d1562ee4678b4f687629e081) - Support for BQ's exp.GenerateDateArray generation *(PR [#3865](https://github.com/tobymao/sqlglot/pull/3865) by [@VaggelisD](https://github.com/VaggelisD))*: Support for BQ's exp.GenerateDateArray generation (#3865) ### :sparkles: New Features - [`6afed2a`](https://github.com/tobymao/sqlglot/commit/6afed2aecc0ce186ff6c484b1ad32ac6a2fb61bc) - **duckdb**: Support for exp.TimeDiff generation *(PR [#3856](https://github.com/tobymao/sqlglot/pull/3856) by [@VaggelisD](https://github.com/VaggelisD))* - [`64e187c`](https://github.com/tobymao/sqlglot/commit/64e187c52cd9725ba79e6afbd444382eba9e5827) - transpile postgres impliclitly exploding GENERATE_SERIES proje… *(PR [#3853](https://github.com/tobymao/sqlglot/pull/3853) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3818](https://github.com/tobymao/sqlglot/issues/3818) opened by [@wojciechowski-p](https://github.com/wojciechowski-p)* - [`8a948c8`](https://github.com/tobymao/sqlglot/commit/8a948c805f7534e266557e1aa08bee0982340685) - **teradata**: Parse RENAME TABLE as Command *(PR [#3863](https://github.com/tobymao/sqlglot/pull/3863) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3861](https://github.com/tobymao/sqlglot/issues/3861) opened by [@EdouardW](https://github.com/EdouardW)* - [`659b8bf`](https://github.com/tobymao/sqlglot/commit/659b8bf12e396856d1562ee4678b4f687629e081) - **duckdb**: Support for BQ's exp.GenerateDateArray generation *(PR [#3865](https://github.com/tobymao/sqlglot/pull/3865) by [@VaggelisD](https://github.com/VaggelisD))* - [`734f54b`](https://github.com/tobymao/sqlglot/commit/734f54bb6ec697a5213f046fbb1e8174b2c31115) - **snowflake**: add support for a a couple of missing clauses in PIVOT clause *(PR [#3867](https://github.com/tobymao/sqlglot/pull/3867) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`8710763`](https://github.com/tobymao/sqlglot/commit/87107631378b0972115a01cc0bb99dbfc44a66d7) - **presto**: map %W to %A in the TIME_MAPPING *(PR [#3855](https://github.com/tobymao/sqlglot/pull/3855) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3854](https://github.com/tobymao/sqlglot/issues/3854) opened by [@ddelzell](https://github.com/ddelzell)* - [`532f3c8`](https://github.com/tobymao/sqlglot/commit/532f3c8714220058170790b13977cc66760841dc) - **duckdb**: Add implicit casts to DATE_DIFF *(PR [#3857](https://github.com/tobymao/sqlglot/pull/3857) by [@VaggelisD](https://github.com/VaggelisD))* - [`299c4a5`](https://github.com/tobymao/sqlglot/commit/299c4a559dd04047d5a4c4691f8965972842fe7d) - **clickhouse**: Fix SETTINGS parsing *(PR [#3859](https://github.com/tobymao/sqlglot/pull/3859) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3858](https://github.com/tobymao/sqlglot/issues/3858) opened by [@obazna](https://github.com/obazna)* - [`810d23d`](https://github.com/tobymao/sqlglot/commit/810d23d4e42f9a7de83015ec425dff9223598219) - **parser**: make assignment parsing more lenient by allowing keyword in LHS *(PR [#3866](https://github.com/tobymao/sqlglot/pull/3866) by [@georgesittas](https://github.com/georgesittas))* - [`e53e7cc`](https://github.com/tobymao/sqlglot/commit/e53e7cc02a224563d0a61b0a39298d606b9bac80) - Generation of exp.ArrayConcat for 2-arg based dialects *(PR [#3864](https://github.com/tobymao/sqlglot/pull/3864) by [@VaggelisD](https://github.com/VaggelisD))* - [`813f127`](https://github.com/tobymao/sqlglot/commit/813f127b293e7087d174f3f632b65ba7b24bc9e3) - **duckdb**: Allow DESCRIBE as a _parse_select() path *(PR [#3871](https://github.com/tobymao/sqlglot/pull/3871) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3869](https://github.com/tobymao/sqlglot/issues/3869) opened by [@cpcloud](https://github.com/cpcloud)* - [`6ff0c01`](https://github.com/tobymao/sqlglot/commit/6ff0c01a5b8b19e3090b8cf08aabbb4b27425abb) - Fixed size array parsing *(PR [#3870](https://github.com/tobymao/sqlglot/pull/3870) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3868](https://github.com/tobymao/sqlglot/issues/3868) opened by [@tekumara](https://github.com/tekumara)* ## [v25.8.1] - 2024-07-30 ### :bug: Bug Fixes - [`a295b3a`](https://github.com/tobymao/sqlglot/commit/a295b3adbef0eff0b3f6c3b8b97b1eaa8c13f144) - **tsql**: regression related to CTEs in CREATE VIEW AS statements *(PR [#3852](https://github.com/tobymao/sqlglot/pull/3852) by [@georgesittas](https://github.com/georgesittas))* ## [v25.8.0] - 2024-07-29 ### :sparkles: New Features - [`e37d63a`](https://github.com/tobymao/sqlglot/commit/e37d63a17d4709135c1de7876b2898cf7bd2e641) - **bigquery**: add support for BYTEINT closes [#3838](https://github.com/tobymao/sqlglot/pull/3838) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`4c912cd`](https://github.com/tobymao/sqlglot/commit/4c912cd2302874b8abeed3cafa93ff3771b8dcba) - **clickhouse**: improve parsing/transpilation of StrToDate *(PR [#3839](https://github.com/tobymao/sqlglot/pull/3839) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3837](https://github.com/tobymao/sqlglot/issues/3837) opened by [@ace-xc](https://github.com/ace-xc)* - [`45f45ea`](https://github.com/tobymao/sqlglot/commit/45f45eaaac5a9130168dddaef4713542886a83cb) - **duckdb**: add support for SUMMARIZE *(PR [#3840](https://github.com/tobymao/sqlglot/pull/3840) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3823](https://github.com/tobymao/sqlglot/issues/3823) opened by [@cpcloud](https://github.com/cpcloud)* ### :bug: Bug Fixes - [`57ecc84`](https://github.com/tobymao/sqlglot/commit/57ecc8465a3c4d1e0ab1db71dc185c80efc5d0aa) - **duckdb**: wrap left IN clause json extract arrow operand fixes [#3836](https://github.com/tobymao/sqlglot/pull/3836) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`2ffb070`](https://github.com/tobymao/sqlglot/commit/2ffb07070952cde7ac9a1883cbf9b4c477c55abb) - **duckdb**: allow fixed length array casts closes [#3841](https://github.com/tobymao/sqlglot/pull/3841) *(PR [#3842](https://github.com/tobymao/sqlglot/pull/3842) by [@tobymao](https://github.com/tobymao))* - [`d71eb4e`](https://github.com/tobymao/sqlglot/commit/d71eb4ebc2a0f82c567b32de51298f0d82f400a1) - pretty gen for tuples *(commit by [@tobymao](https://github.com/tobymao))* - [`12ae9cd`](https://github.com/tobymao/sqlglot/commit/12ae9cdc1c1f52735f8c60488b5d98a4872bf764) - **tsql**: handle JSON_QUERY with a single argument *(PR [#3847](https://github.com/tobymao/sqlglot/pull/3847) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3843](https://github.com/tobymao/sqlglot/issues/3843) opened by [@zachary62](https://github.com/zachary62)* - [`f8ca6b4`](https://github.com/tobymao/sqlglot/commit/f8ca6b4048ee22585cd7635f83b25fe2df9bd748) - **tsql**: bubble up exp.Create CTEs to improve transpilability *(PR [#3848](https://github.com/tobymao/sqlglot/pull/3848) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3844](https://github.com/tobymao/sqlglot/issues/3844) opened by [@zachary62](https://github.com/zachary62)* - [`89976c1`](https://github.com/tobymao/sqlglot/commit/89976c1dbb61bdfe3bbb98702b18365e90a69acb) - **parser**: allow 'cube' to be used for identifiers *(PR [#3850](https://github.com/tobymao/sqlglot/pull/3850) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`d00ea9c`](https://github.com/tobymao/sqlglot/commit/d00ea9c4d39f686fabbe864e88cfe5c071fd4f66) - exclude boolean args in Generator.format_args *(PR [#3849](https://github.com/tobymao/sqlglot/pull/3849) by [@georgesittas](https://github.com/georgesittas))* ## [v25.7.1] - 2024-07-25 ### :bug: Bug Fixes - [`ae95c18`](https://github.com/tobymao/sqlglot/commit/ae95c18f636d34c7f92b48cd5970f4fa6ad81b08) - alter table add columns closes [#3835](https://github.com/tobymao/sqlglot/pull/3835) *(commit by [@tobymao](https://github.com/tobymao))* - [`9b5839d`](https://github.com/tobymao/sqlglot/commit/9b5839d7fb04f78c9ef50b112cd9d4d24558c912) - make ast consistent *(commit by [@tobymao](https://github.com/tobymao))* ## [v25.7.0] - 2024-07-25 ### :sparkles: New Features - [`ba0aa50`](https://github.com/tobymao/sqlglot/commit/ba0aa50072f623c299eb4d2dbb69993541fff27b) - **duckdb**: Transpile BQ's exp.DatetimeAdd, exp.DatetimeSub *(PR [#3777](https://github.com/tobymao/sqlglot/pull/3777) by [@VaggelisD](https://github.com/VaggelisD))* - [`5da91fb`](https://github.com/tobymao/sqlglot/commit/5da91fb50d0f8029ddda16040ebd316c1a651e2d) - **postgres**: Support for CREATE INDEX CONCURRENTLY *(PR [#3787](https://github.com/tobymao/sqlglot/pull/3787) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3783](https://github.com/tobymao/sqlglot/issues/3783) opened by [@EdgyEdgemond](https://github.com/EdgyEdgemond)* - [`00722eb`](https://github.com/tobymao/sqlglot/commit/00722eb41795e7454d0ecb4c3d0e1caf96a19465) - Move ANNOTATORS to Dialect for dialect-aware annotation *(PR [#3786](https://github.com/tobymao/sqlglot/pull/3786) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3778](https://github.com/tobymao/sqlglot/issues/3778) opened by [@ddelzell](https://github.com/ddelzell)* - [`a6d84fb`](https://github.com/tobymao/sqlglot/commit/a6d84fbd9b4120f42b31bb01d4bf3e6258e51562) - **postgres**: Parse TO_DATE as exp.StrToDate *(PR [#3799](https://github.com/tobymao/sqlglot/pull/3799) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3797](https://github.com/tobymao/sqlglot/issues/3797) opened by [@dioptre](https://github.com/dioptre)* - [`3582644`](https://github.com/tobymao/sqlglot/commit/358264478e5449b7e4ebddce1cc463d140f266f5) - **hive, spark, db**: Support for exp.GenerateSeries *(PR [#3798](https://github.com/tobymao/sqlglot/pull/3798) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3793](https://github.com/tobymao/sqlglot/issues/3793) opened by [@wojciechowski-p](https://github.com/wojciechowski-p)* - [`80b4a12`](https://github.com/tobymao/sqlglot/commit/80b4a12b779b661e42d31cf75ead8aff25257f8a) - **tsql**: Support for COLUMNSTORE option on CREATE INDEX *(PR [#3805](https://github.com/tobymao/sqlglot/pull/3805) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3801](https://github.com/tobymao/sqlglot/issues/3801) opened by [@na399](https://github.com/na399)* - [`bf6c126`](https://github.com/tobymao/sqlglot/commit/bf6c12687f3ed032ea7be40875c19fc00e5927ad) - **databricks**: Support USE CATALOG *(PR [#3812](https://github.com/tobymao/sqlglot/pull/3812) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3811](https://github.com/tobymao/sqlglot/issues/3811) opened by [@grusin-db](https://github.com/grusin-db)* - [`624d411`](https://github.com/tobymao/sqlglot/commit/624d4115e3ee4b8db2dbf2970bf0047e14b23e92) - **snowflake**: Support for OBJECT_INSERT, transpile to DDB *(PR [#3807](https://github.com/tobymao/sqlglot/pull/3807) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3802](https://github.com/tobymao/sqlglot/issues/3802) opened by [@buremba](https://github.com/buremba)* - [`5b393fb`](https://github.com/tobymao/sqlglot/commit/5b393fb4d2db47b9229ca12a03aba82cdd510615) - **postgres**: Add missing constraint options *(PR [#3816](https://github.com/tobymao/sqlglot/pull/3816) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3814](https://github.com/tobymao/sqlglot/issues/3814) opened by [@DTovstohan](https://github.com/DTovstohan)* ### :bug: Bug Fixes - [`898f523`](https://github.com/tobymao/sqlglot/commit/898f523a8db9f73b59055f1e38cf4acb07157f00) - **duckdb**: Wrap JSON_EXTRACT if it's subscripted *(PR [#3785](https://github.com/tobymao/sqlglot/pull/3785) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3782](https://github.com/tobymao/sqlglot/issues/3782) opened by [@egan8888](https://github.com/egan8888)* - [`db3748d`](https://github.com/tobymao/sqlglot/commit/db3748d56b138a6427d6f4fc3e32c895ffb993fa) - **mysql**: don't wrap VALUES clause *(PR [#3792](https://github.com/tobymao/sqlglot/pull/3792) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3789](https://github.com/tobymao/sqlglot/issues/3789) opened by [@stephenprater](https://github.com/stephenprater)* - [`44d6506`](https://github.com/tobymao/sqlglot/commit/44d650637d5d7a662b57ec1d8ca74dffe0f7ad73) - with as comments closes [#3794](https://github.com/tobymao/sqlglot/pull/3794) *(commit by [@tobymao](https://github.com/tobymao))* - [`8ca6a61`](https://github.com/tobymao/sqlglot/commit/8ca6a613692e7339717c449ba6966d7c2911b584) - **tsql**: Fix roundtrip of exp.Stddev *(PR [#3806](https://github.com/tobymao/sqlglot/pull/3806) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3804](https://github.com/tobymao/sqlglot/issues/3804) opened by [@JonaGeishauser](https://github.com/JonaGeishauser)* - [`8551063`](https://github.com/tobymao/sqlglot/commit/855106377c97ee313b45046041fafabb2810dab2) - **duckdb**: Fix STRUCT_PACK -> ROW due to is_struct_cast *(PR [#3809](https://github.com/tobymao/sqlglot/pull/3809) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3808](https://github.com/tobymao/sqlglot/issues/3808) opened by [@aersam](https://github.com/aersam)* - [`98f80ed`](https://github.com/tobymao/sqlglot/commit/98f80eda3863b5ff40d566330e6ab35a99f569ca) - **clickhouse**: allow like as an identifier closes [#3813](https://github.com/tobymao/sqlglot/pull/3813) *(commit by [@tobymao](https://github.com/tobymao))* - [`556ba35`](https://github.com/tobymao/sqlglot/commit/556ba35e4ce9efa51561ef0578bfb24a51ce4dcd) - allow parse_identifier to handle single quotes *(commit by [@tobymao](https://github.com/tobymao))* - [`f9810d2`](https://github.com/tobymao/sqlglot/commit/f9810d213f3992881fc13291a681da6553701083) - **snowflake**: Don't consume LPAREN when parsing staged file path *(PR [#3815](https://github.com/tobymao/sqlglot/pull/3815) by [@VaggelisD](https://github.com/VaggelisD))* - [`416f4a1`](https://github.com/tobymao/sqlglot/commit/416f4a1b6a04b858ff8ed94509aacd9bacca145b) - **postgres**: Fix COLLATE column constraint *(PR [#3820](https://github.com/tobymao/sqlglot/pull/3820) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3817](https://github.com/tobymao/sqlglot/issues/3817) opened by [@DTovstohan](https://github.com/DTovstohan)* - [`69b9395`](https://github.com/tobymao/sqlglot/commit/69b93953c35bd7f1d53cf15d9937117edb38f512) - Do not preemptively consume SELECT [ALL] if ALL is connected *(PR [#3822](https://github.com/tobymao/sqlglot/pull/3822) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3819](https://github.com/tobymao/sqlglot/issues/3819) opened by [@nfx](https://github.com/nfx)* - [`1c19abe`](https://github.com/tobymao/sqlglot/commit/1c19abe5b3f3187a2e0ba420cf8c5e5b5ecc788e) - **presto, trino**: Fix StrToUnix transpilation *(PR [#3824](https://github.com/tobymao/sqlglot/pull/3824) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3796](https://github.com/tobymao/sqlglot/issues/3796) opened by [@ddelzell](https://github.com/ddelzell)* ## [v25.6.1] - 2024-07-18 ### :bug: Bug Fixes - [`19370d5`](https://github.com/tobymao/sqlglot/commit/19370d5d16b555e25def503323ec3dc4e5d40e6c) - **postgres**: Decouple UNIQUE from DEFAULT constraints *(PR [#3775](https://github.com/tobymao/sqlglot/pull/3775) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3774](https://github.com/tobymao/sqlglot/issues/3774) opened by [@EdgyEdgemond](https://github.com/EdgyEdgemond)* - [`e99146b`](https://github.com/tobymao/sqlglot/commit/e99146b0989599772c020905f69496ea80e7e2e5) - make copy a dml statement for qualify_tables *(commit by [@tobymao](https://github.com/tobymao))* ## [v25.6.0] - 2024-07-17 ### :boom: BREAKING CHANGES - due to [`89fc63c`](https://github.com/tobymao/sqlglot/commit/89fc63c5831dc5d63feff9e39fea1e90d65e9a09) - QUALIFY comes after WINDOW clause in queries *(PR [#3745](https://github.com/tobymao/sqlglot/pull/3745) by [@georgesittas](https://github.com/georgesittas))*: QUALIFY comes after WINDOW clause in queries (#3745) - due to [`a2a6efb`](https://github.com/tobymao/sqlglot/commit/a2a6efb45dc0f380747aa4afdaa19122389f3c28) - Canonicalize struct & array inline constructor *(PR [#3751](https://github.com/tobymao/sqlglot/pull/3751) by [@VaggelisD](https://github.com/VaggelisD))*: Canonicalize struct & array inline constructor (#3751) ### :sparkles: New Features - [`e9c4bbb`](https://github.com/tobymao/sqlglot/commit/e9c4bbbb0d0a03d1b1efaad9abe0068b3b7efa9d) - Support for ORDER BY ALL *(PR [#3756](https://github.com/tobymao/sqlglot/pull/3756) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3755](https://github.com/tobymao/sqlglot/issues/3755) opened by [@Hunterlige](https://github.com/Hunterlige)* - [`4a843e6`](https://github.com/tobymao/sqlglot/commit/4a843e6cca7bcc0d9956fe975dbc77e67038f1b8) - **postgres**: Support FROM ROWS FROM (...) *(PR [#3753](https://github.com/tobymao/sqlglot/pull/3753) by [@VaggelisD](https://github.com/VaggelisD))* - [`321051a`](https://github.com/tobymao/sqlglot/commit/321051aef30f11f2778444040a2078633e617144) - **presto, trino**: Add support for exp.TimestampAdd *(PR [#3765](https://github.com/tobymao/sqlglot/pull/3765) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3762](https://github.com/tobymao/sqlglot/issues/3762) opened by [@ddelzell](https://github.com/ddelzell)* - [`82a1bb4`](https://github.com/tobymao/sqlglot/commit/82a1bb42856d628651bb5f1ef9aa8f440736c450) - Support for RPAD & LPAD functions *(PR [#3757](https://github.com/tobymao/sqlglot/pull/3757) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`89fc63c`](https://github.com/tobymao/sqlglot/commit/89fc63c5831dc5d63feff9e39fea1e90d65e9a09) - **duckdb, clickhouse**: QUALIFY comes after WINDOW clause in queries *(PR [#3745](https://github.com/tobymao/sqlglot/pull/3745) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3744](https://github.com/tobymao/sqlglot/issues/3744) opened by [@taylorbarstow](https://github.com/taylorbarstow)* - [`15ca924`](https://github.com/tobymao/sqlglot/commit/15ca924ac6e8a72396a882c394856e466cae9ac3) - **optimizer**: Fix expansion of SELECT * REPLACE, RENAME *(PR [#3742](https://github.com/tobymao/sqlglot/pull/3742) by [@VaggelisD](https://github.com/VaggelisD))* - [`0363fef`](https://github.com/tobymao/sqlglot/commit/0363fefd3ddd490ddddae47f7eb0192f0ff3cc5e) - attach comments to Commands *(PR [#3758](https://github.com/tobymao/sqlglot/pull/3758) by [@georgesittas](https://github.com/georgesittas))* - [`a2a6efb`](https://github.com/tobymao/sqlglot/commit/a2a6efb45dc0f380747aa4afdaa19122389f3c28) - **bigquery**: Canonicalize struct & array inline constructor *(PR [#3751](https://github.com/tobymao/sqlglot/pull/3751) by [@VaggelisD](https://github.com/VaggelisD))* - [`5df3f52`](https://github.com/tobymao/sqlglot/commit/5df3f5292488df6a8e21abf3b49086c823797e78) - Remove number matching from COLON placeholder parser *(PR [#3761](https://github.com/tobymao/sqlglot/pull/3761) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3759](https://github.com/tobymao/sqlglot/issues/3759) opened by [@egan8888](https://github.com/egan8888)* - [`0606af6`](https://github.com/tobymao/sqlglot/commit/0606af66dba7c290fee65926dcb74baad82c84ac) - **duckdb**: Transpile UDFs from Databricks *(PR [#3768](https://github.com/tobymao/sqlglot/pull/3768) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3764](https://github.com/tobymao/sqlglot/issues/3764) opened by [@aersam](https://github.com/aersam)* - [`dcc783a`](https://github.com/tobymao/sqlglot/commit/dcc783aad7c2e7184224e90fed7710eb08ddc76a) - **clickhouse**: Allow TokenType.SELECT as a Tuple field identifier *(PR [#3766](https://github.com/tobymao/sqlglot/pull/3766) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3763](https://github.com/tobymao/sqlglot/issues/3763) opened by [@cpcloud](https://github.com/cpcloud)* - [`b42b7ac`](https://github.com/tobymao/sqlglot/commit/b42b7ac5bb1785a9028235c1557b9842ea1d7524) - extract from time/date *(commit by [@tobymao](https://github.com/tobymao))* ## [v25.5.1] - 2024-07-05 ### :bug: Bug Fixes - [`2bdde22`](https://github.com/tobymao/sqlglot/commit/2bdde2221b8017791ce4cc619abb2706464ca408) - **optimizer**: only qualify coalesced USING columns if they exist in table schemas *(PR [#3740](https://github.com/tobymao/sqlglot/pull/3740) by [@georgesittas](https://github.com/georgesittas))* ## [v25.5.0] - 2024-07-04 ### :boom: BREAKING CHANGES - due to [`8335ba1`](https://github.com/tobymao/sqlglot/commit/8335ba10e60c7c63881d7559a6f1fada11b0e55d) - preserve EXTRACT(date_part FROM datetime) calls *(PR [#3729](https://github.com/tobymao/sqlglot/pull/3729) by [@georgesittas](https://github.com/georgesittas))*: preserve EXTRACT(date_part FROM datetime) calls (#3729) - due to [`fb066a6`](https://github.com/tobymao/sqlglot/commit/fb066a6167e1f887bd8c1a1369d063fe70f36a8a) - Decouple NVL() from COALESCE() *(PR [#3734](https://github.com/tobymao/sqlglot/pull/3734) by [@VaggelisD](https://github.com/VaggelisD))*: Decouple NVL() from COALESCE() (#3734) ### :sparkles: New Features - [`0c03299`](https://github.com/tobymao/sqlglot/commit/0c032992fac622ebaee114cd9f6e405be1820054) - **teradata**: random lower upper closes [#3721](https://github.com/tobymao/sqlglot/pull/3721) *(commit by [@tobymao](https://github.com/tobymao))* - [`37b6e2d`](https://github.com/tobymao/sqlglot/commit/37b6e2d806f6da1338c75803919c602f8705acac) - **snowflake**: add support for VECTOR(type, size) *(PR [#3724](https://github.com/tobymao/sqlglot/pull/3724) by [@georgesittas](https://github.com/georgesittas))* - [`1e07c4d`](https://github.com/tobymao/sqlglot/commit/1e07c4d29a43192fb57c120f3b9c1c2fa27d0fa6) - **presto, trino**: Configurable transpilation of Snowflake VARIANT *(PR [#3725](https://github.com/tobymao/sqlglot/pull/3725) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3713](https://github.com/tobymao/sqlglot/issues/3713) opened by [@Leonti](https://github.com/Leonti)* - [`e5a53aa`](https://github.com/tobymao/sqlglot/commit/e5a53aaa015806574cd3c4bbe46b5788e960903e) - **snowflake**: Support for FROM CHANGES *(PR [#3731](https://github.com/tobymao/sqlglot/pull/3731) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3730](https://github.com/tobymao/sqlglot/issues/3730) opened by [@achicoine-coveo](https://github.com/achicoine-coveo)* - [`820d664`](https://github.com/tobymao/sqlglot/commit/820d66430bb23bff88d0057b22842d313e1431c5) - **presto**: wrap md5 string arguments in to_utf8 *(PR [#3732](https://github.com/tobymao/sqlglot/pull/3732) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2855](https://github.com/TobikoData/sqlmesh/issues/2855) opened by [@MikeWallis42](https://github.com/MikeWallis42)* - [`912bc84`](https://github.com/tobymao/sqlglot/commit/912bc84791008ecce545cfbd3b0c9d4362131eb3) - **spark, databricks**: Support view schema binding options *(PR [#3739](https://github.com/tobymao/sqlglot/pull/3739) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3738](https://github.com/tobymao/sqlglot/issues/3738) opened by [@aersam](https://github.com/aersam)* ### :bug: Bug Fixes - [`3454f86`](https://github.com/tobymao/sqlglot/commit/3454f861f22f680f6b8c18cca466154d3b9fe8d1) - **teradata**: use timestamp with time zone over timestamptz *(PR [#3723](https://github.com/tobymao/sqlglot/pull/3723) by [@mtagle](https://github.com/mtagle))* - :arrow_lower_right: *fixes issue [#3722](https://github.com/tobymao/sqlglot/issues/3722) opened by [@mtagle](https://github.com/mtagle)* - [`f4a2872`](https://github.com/tobymao/sqlglot/commit/f4a28721fd33edb3178c1d99746209dadfbba487) - **clickhouse**: switch off table alias columns generation *(PR [#3727](https://github.com/tobymao/sqlglot/pull/3727) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3726](https://github.com/tobymao/sqlglot/issues/3726) opened by [@cpcloud](https://github.com/cpcloud)* - [`8335ba1`](https://github.com/tobymao/sqlglot/commit/8335ba10e60c7c63881d7559a6f1fada11b0e55d) - **clickhouse**: preserve EXTRACT(date_part FROM datetime) calls *(PR [#3729](https://github.com/tobymao/sqlglot/pull/3729) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3728](https://github.com/tobymao/sqlglot/issues/3728) opened by [@cpcloud](https://github.com/cpcloud)* - [`fb066a6`](https://github.com/tobymao/sqlglot/commit/fb066a6167e1f887bd8c1a1369d063fe70f36a8a) - **oracle**: Decouple NVL() from COALESCE() *(PR [#3734](https://github.com/tobymao/sqlglot/pull/3734) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3733](https://github.com/tobymao/sqlglot/issues/3733) opened by [@Hal-H2Apps](https://github.com/Hal-H2Apps)* - [`c790c3b`](https://github.com/tobymao/sqlglot/commit/c790c3b1fa274d7b0faf9f75e7dbc62bc4f55c67) - **tsql**: parse rhs of x::varchar(max) into a type *(PR [#3737](https://github.com/tobymao/sqlglot/pull/3737) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`84416d2`](https://github.com/tobymao/sqlglot/commit/84416d207a2e397aba12a4138fcbd1fab382c22d) - **teradata**: clean up CurrentTimestamp generation logic *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.4.1] - 2024-06-29 ### :bug: Bug Fixes - [`6bf9853`](https://github.com/tobymao/sqlglot/commit/6bf9853fd0b26d5a4e93e37447c3b275cd108872) - **tsql**: cast shorthand closes [#3760](https://github.com/tobymao/sqlglot/pull/3760) *(PR [#3720](https://github.com/tobymao/sqlglot/pull/3720) by [@tobymao](https://github.com/tobymao))* ## [v25.4.0] - 2024-06-28 ### :boom: BREAKING CHANGES - due to [`9fb1d79`](https://github.com/tobymao/sqlglot/commit/9fb1d79398769edb452e075eb3b6416e69f239bf) - extract unit should be a var, not a column *(PR [#3712](https://github.com/tobymao/sqlglot/pull/3712) by [@tobymao](https://github.com/tobymao))*: extract unit should be a var, not a column (#3712) - due to [`ae1816f`](https://github.com/tobymao/sqlglot/commit/ae1816fc71a5a164d1aae6644a9c3bc4cec484d2) - simplify no longer removes neg, add to_py *(PR [#3714](https://github.com/tobymao/sqlglot/pull/3714) by [@tobymao](https://github.com/tobymao))*: simplify no longer removes neg, add to_py (#3714) - due to [`beaf9cc`](https://github.com/tobymao/sqlglot/commit/beaf9cc1f07ff4223f99c84ad6645d3f29af5801) - coalesce left-hand side of join conditions produced by expanding USING *(PR [#3715](https://github.com/tobymao/sqlglot/pull/3715) by [@georgesittas](https://github.com/georgesittas))*: coalesce left-hand side of join conditions produced by expanding USING (#3715) ### :sparkles: New Features - [`97739fe`](https://github.com/tobymao/sqlglot/commit/97739fe692a883a45247d92b2a3efaed33c4b5bf) - add Select expression parser *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1c2279c`](https://github.com/tobymao/sqlglot/commit/1c2279c0659d5cbe30c19afee85308ef7bf4c9c5) - **duckdb**: Transpile exp.Length from other dialects *(PR [#3708](https://github.com/tobymao/sqlglot/pull/3708) by [@VaggelisD](https://github.com/VaggelisD))* - [`23dac71`](https://github.com/tobymao/sqlglot/commit/23dac7147883d559acca7d21e3600c28576ec950) - **snowflake**: add support for CONNECT_BY_ROOT expression *(PR [#3717](https://github.com/tobymao/sqlglot/pull/3717) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3716](https://github.com/tobymao/sqlglot/issues/3716) opened by [@niklaslorenzatalligator](https://github.com/niklaslorenzatalligator)* - [`4f050e0`](https://github.com/tobymao/sqlglot/commit/4f050e0aefcde8fb3c65abaf49c6aa4e2bbe5e2b) - transpile BigQuery's SAFE_CAST with FORMAT to DuckDB *(PR [#3718](https://github.com/tobymao/sqlglot/pull/3718) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2837](https://github.com/TobikoData/sqlmesh/issues/2837) opened by [@hustic](https://github.com/hustic)* ### :bug: Bug Fixes - [`3a86d7e`](https://github.com/tobymao/sqlglot/commit/3a86d7e4ec02e96326021c417dc972b64076567f) - non deterministic aggs in planner closes [#3709](https://github.com/tobymao/sqlglot/pull/3709) *(commit by [@tobymao](https://github.com/tobymao))* - [`3b8568d`](https://github.com/tobymao/sqlglot/commit/3b8568d37792c1916f05faf5df8af1841144b338) - **clickhouse**: extract closes [#3711](https://github.com/tobymao/sqlglot/pull/3711) *(commit by [@tobymao](https://github.com/tobymao))* - [`9fb1d79`](https://github.com/tobymao/sqlglot/commit/9fb1d79398769edb452e075eb3b6416e69f239bf) - extract unit should be a var, not a column *(PR [#3712](https://github.com/tobymao/sqlglot/pull/3712) by [@tobymao](https://github.com/tobymao))* - [`ae1816f`](https://github.com/tobymao/sqlglot/commit/ae1816fc71a5a164d1aae6644a9c3bc4cec484d2) - simplify no longer removes neg, add to_py *(PR [#3714](https://github.com/tobymao/sqlglot/pull/3714) by [@tobymao](https://github.com/tobymao))* - [`beaf9cc`](https://github.com/tobymao/sqlglot/commit/beaf9cc1f07ff4223f99c84ad6645d3f29af5801) - **optimizer**: coalesce left-hand side of join conditions produced by expanding USING *(PR [#3715](https://github.com/tobymao/sqlglot/pull/3715) by [@georgesittas](https://github.com/georgesittas))* ## [v25.3.3] - 2024-06-26 ### :recycle: Refactors - [`972ce7d`](https://github.com/tobymao/sqlglot/commit/972ce7d27d9f083d8ef02ded9278e320da3aa0b6) - control ParseJSON generation logic with a flag *(PR [#3707](https://github.com/tobymao/sqlglot/pull/3707) by [@georgesittas](https://github.com/georgesittas))* ## [v25.3.2] - 2024-06-26 ### :sparkles: New Features - [`a1327c7`](https://github.com/tobymao/sqlglot/commit/a1327c7f4ae74ae25617cd448448ae89c915c744) - **tsql**: Add support for scope qualifier operator *(PR [#3703](https://github.com/tobymao/sqlglot/pull/3703) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#2817](https://github.com/TobikoData/sqlmesh/issues/2817) opened by [@elauser](https://github.com/elauser)* ### :bug: Bug Fixes - [`842a9f0`](https://github.com/tobymao/sqlglot/commit/842a9f0cf6fd49cf1d6ed31a5ad9b40eaa483bff) - **parser**: preserve Cast expression when it's 'safe' and has a format *(PR [#3705](https://github.com/tobymao/sqlglot/pull/3705) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2827](https://github.com/TobikoData/sqlmesh/issues/2827) opened by [@hustic](https://github.com/hustic)* - [`fc0411d`](https://github.com/tobymao/sqlglot/commit/fc0411dc6236c040ce12c036e1ce1165a5143fa1) - **parser**: explicitly check for identifiers in _parse_types *(PR [#3704](https://github.com/tobymao/sqlglot/pull/3704) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2826](https://github.com/TobikoData/sqlmesh/issues/2826) opened by [@plaflamme](https://github.com/plaflamme)* ### :recycle: Refactors - [`e9236e3`](https://github.com/tobymao/sqlglot/commit/e9236e36c94464af21c7e2f35a083eef316feab1) - add EXPAND_ALIAS_REFS_ONLY_IN_GROUP_BY dialect constant *(PR [#3702](https://github.com/tobymao/sqlglot/pull/3702) by [@georgesittas](https://github.com/georgesittas))* - [`92c6ebb`](https://github.com/tobymao/sqlglot/commit/92c6ebb8f703486cf3132c9d2c3c58568c10aea4) - **tsql**: make ScopeResolution round-trippable *(PR [#3706](https://github.com/tobymao/sqlglot/pull/3706) by [@georgesittas](https://github.com/georgesittas))* ## [v25.3.1] - 2024-06-25 ### :sparkles: New Features - [`4ed02b0`](https://github.com/tobymao/sqlglot/commit/4ed02b0a24eeabf813525ba09d646763970dd33b) - transpile TRY_PARSE_JSON Snowflake -> DuckDB *(PR [#3696](https://github.com/tobymao/sqlglot/pull/3696) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3690](https://github.com/tobymao/sqlglot/issues/3690) opened by [@achicoine-coveo](https://github.com/achicoine-coveo)* - [`60fa5e3`](https://github.com/tobymao/sqlglot/commit/60fa5e3f8a6eab3abb12064366a6bde907d9e9de) - **snowflake**: add support for dynamic table DDL *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`442c61d`](https://github.com/tobymao/sqlglot/commit/442c61defe05f4c168a7909d0a5fc5c043a2d2b4) - **tokenizer**: don't treat escapes in raw strings as such for some dialects *(PR [#3689](https://github.com/tobymao/sqlglot/pull/3689) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3686](https://github.com/tobymao/sqlglot/issues/3686) opened by [@aersam](https://github.com/aersam)* - [`f3e928e`](https://github.com/tobymao/sqlglot/commit/f3e928e771e1973a13afe09e4dc295ad492b783f) - **parser**: make parse_var_or_string more lenient *(PR [#3695](https://github.com/tobymao/sqlglot/pull/3695) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3693](https://github.com/tobymao/sqlglot/issues/3693) opened by [@WSKINGS](https://github.com/WSKINGS)* - [`806a7e4`](https://github.com/tobymao/sqlglot/commit/806a7e421a9b5a54a2859d7bb4c3ea131a4a8640) - remove tokenizer cache for multi-threading *(commit by [@tobymao](https://github.com/tobymao))* - [`3fba603`](https://github.com/tobymao/sqlglot/commit/3fba6035ac0263beab73ab62013a64a56dea9165) - don't treat /*+ as a HINT token in dialects that don't support hints *(PR [#3697](https://github.com/tobymao/sqlglot/pull/3697) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3692](https://github.com/tobymao/sqlglot/issues/3692) opened by [@sandband](https://github.com/sandband)* - [`e5d534c`](https://github.com/tobymao/sqlglot/commit/e5d534ce96381f42f26d43c4fcab7eff23946c90) - **optimizer**: Force early alias expansion in BQ & CH *(PR [#3699](https://github.com/tobymao/sqlglot/pull/3699) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3687](https://github.com/tobymao/sqlglot/issues/3687) opened by [@viplazylmht](https://github.com/viplazylmht)* - [`1cfb1ff`](https://github.com/tobymao/sqlglot/commit/1cfb1ff850fb4fcf69fc5962e01c879ce51bec8b) - proper parsing of unit in spark/databricks date_diff *(PR [#3701](https://github.com/tobymao/sqlglot/pull/3701) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3700](https://github.com/tobymao/sqlglot/issues/3700) opened by [@cheesefactory](https://github.com/cheesefactory)* ### :wrench: Chores - [`8b16199`](https://github.com/tobymao/sqlglot/commit/8b16199af3743aee292df5429e1f0087704e1cbc) - bump sqlglotrs to v0.2.8 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.3.0] - 2024-06-21 ### :boom: BREAKING CHANGES - due to [`84d820f`](https://github.com/tobymao/sqlglot/commit/84d820f96b161fdd5b00f265890b5c75c65a36f0) - Time/Datetime/Timestamp function additions *(PR [#3666](https://github.com/tobymao/sqlglot/pull/3666) by [@VaggelisD](https://github.com/VaggelisD))*: Time/Datetime/Timestamp function additions (#3666) - due to [`acbc81d`](https://github.com/tobymao/sqlglot/commit/acbc81d47a2e721c4334ac86b5e17177429cd1c6) - Preserve JSON/VARIANT path with operators *(PR [#3678](https://github.com/tobymao/sqlglot/pull/3678) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve JSON/VARIANT path with operators (#3678) ### :sparkles: New Features - [`84d820f`](https://github.com/tobymao/sqlglot/commit/84d820f96b161fdd5b00f265890b5c75c65a36f0) - **bigquery**: Time/Datetime/Timestamp function additions *(PR [#3666](https://github.com/tobymao/sqlglot/pull/3666) by [@VaggelisD](https://github.com/VaggelisD))* - [`d46ad95`](https://github.com/tobymao/sqlglot/commit/d46ad95bb623f1931d9e373d8444d9ed947362c5) - **tokenizer**: add support for nested comments *(PR [#3670](https://github.com/tobymao/sqlglot/pull/3670) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3668](https://github.com/tobymao/sqlglot/issues/3668) opened by [@aersam](https://github.com/aersam)* - [`ac0e89c`](https://github.com/tobymao/sqlglot/commit/ac0e89c4401f2f278d32c3e956670b262ab21ce7) - **snowflake**: add SECURE post table property fixes [#3677](https://github.com/tobymao/sqlglot/pull/3677) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`acbc81d`](https://github.com/tobymao/sqlglot/commit/acbc81d47a2e721c4334ac86b5e17177429cd1c6) - **databricks**: Preserve JSON/VARIANT path with operators *(PR [#3678](https://github.com/tobymao/sqlglot/pull/3678) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3673](https://github.com/tobymao/sqlglot/issues/3673) opened by [@aersam](https://github.com/aersam)* - [`07158c7`](https://github.com/tobymao/sqlglot/commit/07158c77ae7879aa83b7982cefb4ec9d01c11857) - **clickhouse**: Fix roundtrips of DATE/TIMESTAMP functions *(PR [#3683](https://github.com/tobymao/sqlglot/pull/3683) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3679](https://github.com/tobymao/sqlglot/issues/3679) opened by [@TacoBel42](https://github.com/TacoBel42)* ### :bug: Bug Fixes - [`79aea2a`](https://github.com/tobymao/sqlglot/commit/79aea2affece72acfac52b3ac85cf740d55ccff0) - **doris**: ensure LAG/LEAD are generated with three arguments *(commit by [@georgesittas](https://github.com/georgesittas))* - [`08fb2ec`](https://github.com/tobymao/sqlglot/commit/08fb2ecf808f25eae74b579f8e5c4369edc7c604) - **parser**: check if FROM exists when making implicit unnest explicit fixes [#3671](https://github.com/tobymao/sqlglot/pull/3671) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`049fc5a`](https://github.com/tobymao/sqlglot/commit/049fc5a430ad6fa2998fd94d6e20b23da3b545c3) - **bigquery**: handle the case-sensitive strategy in normalize_identifier *(PR [#3667](https://github.com/tobymao/sqlglot/pull/3667) by [@georgesittas](https://github.com/georgesittas))* - [`9e1b6aa`](https://github.com/tobymao/sqlglot/commit/9e1b6aa5d9e2abb141143327c835c8f3b4bbcb0f) - **parser**: handle another edge case in struct field type parser *(PR [#3682](https://github.com/tobymao/sqlglot/pull/3682) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3680](https://github.com/tobymao/sqlglot/issues/3680) opened by [@plaflamme](https://github.com/plaflamme)* - [`a1a0278`](https://github.com/tobymao/sqlglot/commit/a1a02782f22b471ee3c896d57f15237dc86565d1) - jsonbcontains default gen *(commit by [@tobymao](https://github.com/tobymao))* - [`bf44942`](https://github.com/tobymao/sqlglot/commit/bf44942a7d35eb83685ad3aa2b360c7105a9f5b7) - **oracle**: Fix default NULL_ORDERING *(PR [#3688](https://github.com/tobymao/sqlglot/pull/3688) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3684](https://github.com/tobymao/sqlglot/issues/3684) opened by [@ncclementi](https://github.com/ncclementi)* ### :wrench: Chores - [`7ae99fe`](https://github.com/tobymao/sqlglot/commit/7ae99fe8284cf2e60819b3992bc79a020dfd00c5) - bump sqlglotrs to 0.2.7 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.2.0] - 2024-06-17 ### :boom: BREAKING CHANGES - due to [`d331e56`](https://github.com/tobymao/sqlglot/commit/d331e56aad7784a122dc36d7bffe5cf0565e38d1) - Normalize time units in their full singular form *(PR [#3652](https://github.com/tobymao/sqlglot/pull/3652) by [@VaggelisD](https://github.com/VaggelisD))*: Normalize time units in their full singular form (#3652) - due to [`468123e`](https://github.com/tobymao/sqlglot/commit/468123e4b7612287e128529de62f3a88f4e1d579) - create SetOperation class *(PR [#3661](https://github.com/tobymao/sqlglot/pull/3661) by [@georgesittas](https://github.com/georgesittas))*: create SetOperation class (#3661) ### :sparkles: New Features - [`e7a158b`](https://github.com/tobymao/sqlglot/commit/e7a158b6f0990db00a4890dfb456de6112f50fd2) - set misc. dialect settings if available *(PR [#3649](https://github.com/tobymao/sqlglot/pull/3649) by [@georgesittas](https://github.com/georgesittas))* - [`ff3dabc`](https://github.com/tobymao/sqlglot/commit/ff3dabc75f9a03627caa988b85f88be04a6c70a4) - **tsql**: index on closes [#3658](https://github.com/tobymao/sqlglot/pull/3658) *(commit by [@tobymao](https://github.com/tobymao))* - [`fb4d908`](https://github.com/tobymao/sqlglot/commit/fb4d9080a042d40455bcf631ca6a0afaacb19683) - **tsql**: clustered index closes [#3659](https://github.com/tobymao/sqlglot/pull/3659) *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`88c4965`](https://github.com/tobymao/sqlglot/commit/88c49651ecc9d55967f5c8056352de0f0981989f) - **mysql**: delete redundant keywords *(PR [#3646](https://github.com/tobymao/sqlglot/pull/3646) by [@Toms1999](https://github.com/Toms1999))* - [`4c82c0d`](https://github.com/tobymao/sqlglot/commit/4c82c0d01086e0a622a1448d25f51b0e760d053f) - Parse UNNEST as a function in base dialect *(PR [#3650](https://github.com/tobymao/sqlglot/pull/3650) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3647](https://github.com/tobymao/sqlglot/issues/3647) opened by [@ronnix](https://github.com/ronnix)* - [`d331e56`](https://github.com/tobymao/sqlglot/commit/d331e56aad7784a122dc36d7bffe5cf0565e38d1) - **redshift**: Normalize time units in their full singular form *(PR [#3652](https://github.com/tobymao/sqlglot/pull/3652) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3651](https://github.com/tobymao/sqlglot/issues/3651) opened by [@vidit-wisdom](https://github.com/vidit-wisdom)* - [`a06ee36`](https://github.com/tobymao/sqlglot/commit/a06ee3695d4d23626c1ef0700b373fc84600d374) - **parser**: edge case in _parse_types *(PR [#3656](https://github.com/tobymao/sqlglot/pull/3656) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3655](https://github.com/tobymao/sqlglot/issues/3655) opened by [@dangoldin](https://github.com/dangoldin)* - [`a739741`](https://github.com/tobymao/sqlglot/commit/a739741dca5eefd7d4a2c450dd4506cb951d7efb) - teradata warning *(commit by [@tobymao](https://github.com/tobymao))* - [`868f30d`](https://github.com/tobymao/sqlglot/commit/868f30d1ff46ec9b8a048bb79fbb511f458fd769) - improve schema error handling *(PR [#3663](https://github.com/tobymao/sqlglot/pull/3663) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3662](https://github.com/tobymao/sqlglot/issues/3662) opened by [@dexhorthy](https://github.com/dexhorthy)* ### :recycle: Refactors - [`e8cab58`](https://github.com/tobymao/sqlglot/commit/e8cab58c4c44e84ee21d11e8554ee7aed5dc5901) - clean up join mark elimination rule *(PR [#3653](https://github.com/tobymao/sqlglot/pull/3653) by [@georgesittas](https://github.com/georgesittas))* - [`468123e`](https://github.com/tobymao/sqlglot/commit/468123e4b7612287e128529de62f3a88f4e1d579) - create SetOperation class *(PR [#3661](https://github.com/tobymao/sqlglot/pull/3661) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3660](https://github.com/tobymao/sqlglot/issues/3660) opened by [@sorgfresser](https://github.com/sorgfresser)* ## [v25.1.0] - 2024-06-12 ### :boom: BREAKING CHANGES - due to [`d6cfb41`](https://github.com/tobymao/sqlglot/commit/d6cfb41d63893eadf23a81adf413952f3bd4f0ad) - Support for DATE_ADD functions *(PR [#3609](https://github.com/tobymao/sqlglot/pull/3609) by [@VaggelisD](https://github.com/VaggelisD))*: Support for DATE_ADD functions (#3609) ### :sparkles: New Features - [`d6cfb41`](https://github.com/tobymao/sqlglot/commit/d6cfb41d63893eadf23a81adf413952f3bd4f0ad) - **spark, databricks**: Support for DATE_ADD functions *(PR [#3609](https://github.com/tobymao/sqlglot/pull/3609) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3607](https://github.com/tobymao/sqlglot/issues/3607) opened by [@justinbt21](https://github.com/justinbt21)* - [`4b30b87`](https://github.com/tobymao/sqlglot/commit/4b30b872b6db73da51e81ef72e1f3bf8763b652b) - **postgres**: Support DIV() func for integer division *(PR [#3602](https://github.com/tobymao/sqlglot/pull/3602) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3601](https://github.com/tobymao/sqlglot/issues/3601) opened by [@andrrreasss](https://github.com/andrrreasss)* - [`ee9b01d`](https://github.com/tobymao/sqlglot/commit/ee9b01d5631f8f0942b61dfaf0632ae0ac2543bb) - **mysql**: support ADD INDEX/KEY/UNIQUE in ALTER TABLE *(PR [#3621](https://github.com/tobymao/sqlglot/pull/3621) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3615](https://github.com/tobymao/sqlglot/issues/3615) opened by [@hubg398](https://github.com/hubg398)* - [`c49cefa`](https://github.com/tobymao/sqlglot/commit/c49cefafaf5e9e51778ab85499fde29600d66ed7) - **mysql**: support STRAIGHT_JOIN *(PR [#3623](https://github.com/tobymao/sqlglot/pull/3623) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3622](https://github.com/tobymao/sqlglot/issues/3622) opened by [@gabocic](https://github.com/gabocic)* - [`e998308`](https://github.com/tobymao/sqlglot/commit/e998308be079bca343af053b99e3826606811df5) - eliminate join marks *(PR [#3580](https://github.com/tobymao/sqlglot/pull/3580) by [@mrhopko](https://github.com/mrhopko))* - [`227e054`](https://github.com/tobymao/sqlglot/commit/227e0544ede5dfe3063f3497e865be6e383db524) - **oracle**: support unicode strings u'...' *(PR [#3641](https://github.com/tobymao/sqlglot/pull/3641) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3638](https://github.com/tobymao/sqlglot/issues/3638) opened by [@deebify](https://github.com/deebify)* - [`6df5757`](https://github.com/tobymao/sqlglot/commit/6df5757d7714269c035b3a3a015c81bde436f2bb) - bq datetime -> timestampfromparts *(PR [#3642](https://github.com/tobymao/sqlglot/pull/3642) by [@tobymao](https://github.com/tobymao))* - [`6abd2c9`](https://github.com/tobymao/sqlglot/commit/6abd2c943896e65b6c9bb5343304dcd8f01b425e) - **oracle**: Support for WITH READ ONLY / CHECK OPTION *(PR [#3639](https://github.com/tobymao/sqlglot/pull/3639) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3635](https://github.com/tobymao/sqlglot/issues/3635) opened by [@deebify](https://github.com/deebify)* ### :bug: Bug Fixes - [`514b3a5`](https://github.com/tobymao/sqlglot/commit/514b3a52384fc9164bc5c63fda6b779d68e427b0) - **redshift**: add support for Oracle style outer join markers [#3611](https://github.com/tobymao/sqlglot/pull/3611) *(PR [#3612](https://github.com/tobymao/sqlglot/pull/3612) by [@sandband](https://github.com/sandband))* - :arrow_lower_right: *fixes issue [#3611](https://github.com/tobymao/sqlglot/issues/3611) opened by [@sandband](https://github.com/sandband)* - [`6a607d3`](https://github.com/tobymao/sqlglot/commit/6a607d3fa604be7fdbd51e7de06aeedae73039b7) - unnest should also be a function *(commit by [@tobymao](https://github.com/tobymao))* - [`0e1a1fb`](https://github.com/tobymao/sqlglot/commit/0e1a1fb31de5fefc16a978162d6c6dd4141e1c4d) - **optimizer**: don't use datetrunc type for right side *(PR [#3614](https://github.com/tobymao/sqlglot/pull/3614) by [@barakalon](https://github.com/barakalon))* - [`d96459f`](https://github.com/tobymao/sqlglot/commit/d96459f18b308466fbbfd9fcbe658e33ec931f1e) - **postgres**: sha256 support *(commit by [@tobymao](https://github.com/tobymao))* - [`05fe847`](https://github.com/tobymao/sqlglot/commit/05fe847aeb6525836d4eadb908c65a50755dc0c5) - **snowflake**: support fqns in masking/projection policy constraint *(PR [#3620](https://github.com/tobymao/sqlglot/pull/3620) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3617](https://github.com/tobymao/sqlglot/issues/3617) opened by [@kosta-foundational](https://github.com/kosta-foundational)* - [`caa3051`](https://github.com/tobymao/sqlglot/commit/caa305161893079f87d4d51d9042b5103a850be4) - **snowflake**: Allow SELECT keyword as JSON path key *(PR [#3627](https://github.com/tobymao/sqlglot/pull/3627) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3619](https://github.com/tobymao/sqlglot/issues/3619) opened by [@kosta-foundational](https://github.com/kosta-foundational)* - [`96efb64`](https://github.com/tobymao/sqlglot/commit/96efb6458ad5c6b92990d8ea69545e60b2eaa8a5) - **tokenizer**: properly handle tags that need to be identifiers in heredocs *(PR [#3630](https://github.com/tobymao/sqlglot/pull/3630) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3618](https://github.com/tobymao/sqlglot/issues/3618) opened by [@bigluck](https://github.com/bigluck)* - [`4f8edba`](https://github.com/tobymao/sqlglot/commit/4f8edba78d070e2d4b50da56ddb5ed139120c587) - **oracle**: Allow optional format in TO_DATE *(PR [#3637](https://github.com/tobymao/sqlglot/pull/3637) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3636](https://github.com/tobymao/sqlglot/issues/3636) opened by [@deebify](https://github.com/deebify)* - [`d8c6153`](https://github.com/tobymao/sqlglot/commit/d8c61534f2b11287af22eb70948dfb735cd778bc) - **oracle**: don't apply eliminate_join_markers at parse time *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1afe6ac`](https://github.com/tobymao/sqlglot/commit/1afe6ac62b9c827a001c5a6ab917304c5756fb09) - don't generate neq(0) if subquery predicate in ensure_bools *(commit by [@georgesittas](https://github.com/georgesittas))* - [`fc050bd`](https://github.com/tobymao/sqlglot/commit/fc050bddf937509961cfd83e9fa86ed7e931da11) - **sqlite**: Fix transpilation of GENERATED AS IDENTITY *(PR [#3634](https://github.com/tobymao/sqlglot/pull/3634) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3632](https://github.com/tobymao/sqlglot/issues/3632) opened by [@lelandbatey](https://github.com/lelandbatey)* - [`47472d9`](https://github.com/tobymao/sqlglot/commit/47472d9c0a27070fd5f4f9b8c12a8bd8c86b1de1) - **duckdb**: get rid of TEXT length to facilitate transpilation *(PR [#3633](https://github.com/tobymao/sqlglot/pull/3633) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`6194c0f`](https://github.com/tobymao/sqlglot/commit/6194c0f37fd322ee2c33ebe30dcee6c836a66943) - clean up logic related to join marker parsing/generation *(PR [#3613](https://github.com/tobymao/sqlglot/pull/3613) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`5396a8e`](https://github.com/tobymao/sqlglot/commit/5396a8e6ea29876c824b741c2812ad15f4768e4c) - fix SQLFrame casing *(PR [#3616](https://github.com/tobymao/sqlglot/pull/3616) by [@eakmanrq](https://github.com/eakmanrq))* - [`0397d6f`](https://github.com/tobymao/sqlglot/commit/0397d6f7638c658528cdfef3c85f89afc7fc8952) - bump sqlglotrs to v0.2.6 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.0.3] - 2024-06-06 ### :sparkles: New Features - [`97f8d1a`](https://github.com/tobymao/sqlglot/commit/97f8d1a05801bcd7fd237dac0470c232d3106ca4) - add materialize dialect *(PR [#3577](https://github.com/tobymao/sqlglot/pull/3577) by [@bobbyiliev](https://github.com/bobbyiliev))* - [`bde5a8d`](https://github.com/tobymao/sqlglot/commit/bde5a8de346125704f757ed6a2de444905fe146e) - add risingwave dialect *(PR [#3598](https://github.com/tobymao/sqlglot/pull/3598) by [@neverchanje](https://github.com/neverchanje))* ### :recycle: Refactors - [`5140817`](https://github.com/tobymao/sqlglot/commit/51408172ce940b6ab0ad783d98e632d972da6a0a) - **risingwave**: clean up initial implementation of RisingWave *(commit by [@georgesittas](https://github.com/georgesittas))* - [`f920014`](https://github.com/tobymao/sqlglot/commit/f920014709c2d3ccb7ec18fb622ecd6b6ee0afcd) - **materialize**: clean up initial implementation of Materialize *(PR [#3608](https://github.com/tobymao/sqlglot/pull/3608) by [@georgesittas](https://github.com/georgesittas))* ## [v25.0.2] - 2024-06-05 ### :sparkles: New Features - [`472058d`](https://github.com/tobymao/sqlglot/commit/472058daccf8dc2a7f7f4b7082309a06802017a5) - **bigquery**: add support for GAP_FILL function *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v25.0.0] - 2024-06-05 ### :bug: Bug Fixes - [`f7081c4`](https://github.com/tobymao/sqlglot/commit/f7081c455cf2f61af61dcfd0859a1bf272b84258) - builder other props closes [#3588](https://github.com/tobymao/sqlglot/pull/3588) *(commit by [@tobymao](https://github.com/tobymao))* - [`13009ca`](https://github.com/tobymao/sqlglot/commit/13009ca5c14d81b7a07311a38f329b967f909926) - **doris**: use CSV syntax for GROUP_CONCAT *(PR [#3592](https://github.com/tobymao/sqlglot/pull/3592) by [@Toms1999](https://github.com/Toms1999))* - [`bf7fd12`](https://github.com/tobymao/sqlglot/commit/bf7fd12f9a19bf91dd89f76cf376bf6004d83dc0) - no_ilike_sql to lower both sides *(PR [#3593](https://github.com/tobymao/sqlglot/pull/3593) by [@barakalon](https://github.com/barakalon))* - [`8d87568`](https://github.com/tobymao/sqlglot/commit/8d875681403a43282e1f414ca90f3cf955f26027) - stop normalization_distance early *(PR [#3594](https://github.com/tobymao/sqlglot/pull/3594) by [@barakalon](https://github.com/barakalon))* - [`3e38912`](https://github.com/tobymao/sqlglot/commit/3e38912cd0de2e3939221b6ad8ae194e68cfe288) - **duckdb**: add reserved keywords *(PR [#3597](https://github.com/tobymao/sqlglot/pull/3597) by [@georgesittas](https://github.com/georgesittas))* - [`5683d5f`](https://github.com/tobymao/sqlglot/commit/5683d5fe7eeae8f70751de962644c0981c21c7fc) - **hive**: generate TRUNC for TimestampTrunc *(PR [#3600](https://github.com/tobymao/sqlglot/pull/3600) by [@Toms1999](https://github.com/Toms1999))* - [`ff55ec1`](https://github.com/tobymao/sqlglot/commit/ff55ec1ca8c259f3c304aa7f6039c033f1fe728c) - **hive**: generate string unit for TRUNC, parse it into TimestampTrunc too *(commit by [@georgesittas](https://github.com/georgesittas))* - [`12b6aa7`](https://github.com/tobymao/sqlglot/commit/12b6aa7006bbf005c750070d9e266153057ff281) - **snowflake**: Fix COPY INTO with subquery *(PR [#3605](https://github.com/tobymao/sqlglot/pull/3605) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3604](https://github.com/tobymao/sqlglot/issues/3604) opened by [@ajuszczak](https://github.com/ajuszczak)* - [`061be9b`](https://github.com/tobymao/sqlglot/commit/061be9bda9e03b17590a0ac58fa2fec0540e2e77) - optimize absorb_and_eliminate and remove_complements *(PR [#3595](https://github.com/tobymao/sqlglot/pull/3595) by [@barakalon](https://github.com/barakalon))* ### :wrench: Chores - [`7dd244b`](https://github.com/tobymao/sqlglot/commit/7dd244b6a57e4e8cc9d07cbaf3e89c60fa665a69) - **hive**: test TRUNC roundtrip *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v24.1.2] - 2024-06-04 ### :sparkles: New Features - [`158ca97`](https://github.com/tobymao/sqlglot/commit/158ca9724c23e7a58f6782719b477f2adb57acae) - **duckdb**: transpile TIMESTAMPNTZ into TIMESTAMP *(PR [#3587](https://github.com/tobymao/sqlglot/pull/3587) by [@georgesittas](https://github.com/georgesittas))* ## [v24.1.1] - 2024-06-03 ### :bug: Bug Fixes - [`60d9085`](https://github.com/tobymao/sqlglot/commit/60d9085a4ec2d0c39aa904bf81b7e15b5bac8ea5) - **postgres**: collate with identifier closes [#3578](https://github.com/tobymao/sqlglot/pull/3578) *(commit by [@tobymao](https://github.com/tobymao))* - [`317e3a9`](https://github.com/tobymao/sqlglot/commit/317e3a96a49f439aa06af31abb06990f9a1b0d63) - **bigquery**: expanding positional refs with ambiguous references *(PR [#3585](https://github.com/tobymao/sqlglot/pull/3585) by [@tobymao](https://github.com/tobymao))* - [`5e321f1`](https://github.com/tobymao/sqlglot/commit/5e321f15ac4e54c78b9f90475e1bac4a94eaa48d) - div aliases closes [#3583](https://github.com/tobymao/sqlglot/pull/3583) *(PR [#3586](https://github.com/tobymao/sqlglot/pull/3586) by [@tobymao](https://github.com/tobymao))* ## [v24.1.0] - 2024-05-30 ### :boom: BREAKING CHANGES - due to [`0788c94`](https://github.com/tobymao/sqlglot/commit/0788c944a85d7323b61109ee1ccb5859e3d08404) - Expand stars on BigQuery's tbl.struct_col.* selections *(PR [#3531](https://github.com/tobymao/sqlglot/pull/3531) by [@VaggelisD](https://github.com/VaggelisD))*: Expand stars on BigQuery's tbl.struct_col.* selections (#3531) - due to [`3e71393`](https://github.com/tobymao/sqlglot/commit/3e71393cb8e201a75321fbc179289eb15b1dc6ce) - Refactor struct star expansion in BQ *(PR [#3576](https://github.com/tobymao/sqlglot/pull/3576) by [@VaggelisD](https://github.com/VaggelisD))*: Refactor struct star expansion in BQ (#3576) ### :sparkles: New Features - [`0788c94`](https://github.com/tobymao/sqlglot/commit/0788c944a85d7323b61109ee1ccb5859e3d08404) - **optimizer**: Expand stars on BigQuery's tbl.struct_col.* selections *(PR [#3531](https://github.com/tobymao/sqlglot/pull/3531) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3484](https://github.com/tobymao/sqlglot/issues/3484) opened by [@Bladieblah](https://github.com/Bladieblah)* ### :bug: Bug Fixes - [`14d63ee`](https://github.com/tobymao/sqlglot/commit/14d63ee8172ddc972d6677071cae3880c748c3aa) - bubble up Identifier comments to TableAliases *(PR [#3571](https://github.com/tobymao/sqlglot/pull/3571) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3570](https://github.com/tobymao/sqlglot/issues/3570) opened by [@fangxingli](https://github.com/fangxingli)* - [`ba90c22`](https://github.com/tobymao/sqlglot/commit/ba90c22921448ef6b5a0497a9a48918d0e8a9654) - **snowflake**: COPY Postfix *(PR [#3564](https://github.com/tobymao/sqlglot/pull/3564) by [@VaggelisD](https://github.com/VaggelisD))* - [`3e71393`](https://github.com/tobymao/sqlglot/commit/3e71393cb8e201a75321fbc179289eb15b1dc6ce) - **optimizer**: Refactor struct star expansion in BQ *(PR [#3576](https://github.com/tobymao/sqlglot/pull/3576) by [@VaggelisD](https://github.com/VaggelisD))* ### :recycle: Refactors - [`1e1dc3f`](https://github.com/tobymao/sqlglot/commit/1e1dc3fea8c5fc1f86fefe6af384e38c8531f2d2) - **optimizer**: minor improvements in the struct star expansion *(PR [#3568](https://github.com/tobymao/sqlglot/pull/3568) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`b12ea8c`](https://github.com/tobymao/sqlglot/commit/b12ea8c126d5debef59e9d9bcbbc6fd5ecf56682) - minor style changes related to COPY INTO *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v24.0.3] - 2024-05-29 ### :bug: Bug Fixes - [`fb8db9f`](https://github.com/tobymao/sqlglot/commit/fb8db9f2219cfd578fda5c3f51737c180d5aecc6) - **parser**: edge case where TYPE_CONVERTERS leads to type instead of column *(PR [#3566](https://github.com/tobymao/sqlglot/pull/3566) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3565](https://github.com/tobymao/sqlglot/issues/3565) opened by [@galunto](https://github.com/galunto)* - [`aac8570`](https://github.com/tobymao/sqlglot/commit/aac85705c43edfcd1ebb552573f496c14dce519b) - use index2 instead of self._index in _parse_type index difference *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v24.0.2] - 2024-05-28 ### :sparkles: New Features - [`078471d`](https://github.com/tobymao/sqlglot/commit/078471d3643da418c91b71dc7bfce5453b924028) - **mysql,doris**: improve transpilation of INTERVAL (plural to singular) *(PR [#3543](https://github.com/tobymao/sqlglot/pull/3543) by [@Toms1999](https://github.com/Toms1999))* - [`fe56e64`](https://github.com/tobymao/sqlglot/commit/fe56e64aff775002c52843b6b9df973d96349400) - **postgres**: add support for col int[size] column def syntax *(PR [#3548](https://github.com/tobymao/sqlglot/pull/3548) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3544](https://github.com/tobymao/sqlglot/issues/3544) opened by [@judahrand](https://github.com/judahrand)* - :arrow_lower_right: *addresses issue [#3545](https://github.com/tobymao/sqlglot/issues/3545) opened by [@judahrand](https://github.com/judahrand)* - [`188dce8`](https://github.com/tobymao/sqlglot/commit/188dce8ae98f23b5741882c698109563445f11f6) - **snowflake**: add support for WITH-prefixed column constraints *(PR [#3549](https://github.com/tobymao/sqlglot/pull/3549) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3537](https://github.com/tobymao/sqlglot/issues/3537) opened by [@barino86](https://github.com/barino86)* - [`712d247`](https://github.com/tobymao/sqlglot/commit/712d24704f1be9e54fd6385d6fdbd05173b007aa) - add support for ALTER COLUMN DROP NOT NULL *(PR [#3550](https://github.com/tobymao/sqlglot/pull/3550) by [@noklam](https://github.com/noklam))* - :arrow_lower_right: *addresses issue [#3534](https://github.com/tobymao/sqlglot/issues/3534) opened by [@barino86](https://github.com/barino86)* - [`7c323bd`](https://github.com/tobymao/sqlglot/commit/7c323bde83f1804d7a1e98fcf94e6832385a03d6) - add option in schema's find method to ensure types are DataTypes *(PR [#3560](https://github.com/tobymao/sqlglot/pull/3560) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`1a8a16b`](https://github.com/tobymao/sqlglot/commit/1a8a16b459c7fe20fc2c689ad601b5beac57a206) - **clickhouse**: improve struct type parsing *(PR [#3547](https://github.com/tobymao/sqlglot/pull/3547) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3546](https://github.com/tobymao/sqlglot/issues/3546) opened by [@cpcloud](https://github.com/cpcloud)* - [`970d3b0`](https://github.com/tobymao/sqlglot/commit/970d3b03750d58ec236ce205bc250616e1fb1349) - **postgres**: setting un-suffixed FLOAT as DOUBLE ([#3551](https://github.com/tobymao/sqlglot/pull/3551)) *(PR [#3552](https://github.com/tobymao/sqlglot/pull/3552) by [@sandband](https://github.com/sandband))* - :arrow_lower_right: *fixes issue [#3551](https://github.com/tobymao/sqlglot/issues/3551) opened by [@sandband](https://github.com/sandband)* - [`e1a9a8b`](https://github.com/tobymao/sqlglot/commit/e1a9a8b6b7fbd44e62cee626540f90425d22d50c) - **redshift**: add support for MINUS operator [#3553](https://github.com/tobymao/sqlglot/pull/3553) *(PR [#3555](https://github.com/tobymao/sqlglot/pull/3555) by [@sandband](https://github.com/sandband))* - [`beb0269`](https://github.com/tobymao/sqlglot/commit/beb0269b39e848897eaf56e1966d342db72e5c7c) - **tsql**: adapt TimeStrToTime to avoid superfluous casts *(PR [#3558](https://github.com/tobymao/sqlglot/pull/3558) by [@Themiscodes](https://github.com/Themiscodes))* - [`eae3c51`](https://github.com/tobymao/sqlglot/commit/eae3c5165c16b61c7b524a55776bdb1127005c7d) - use regex to split interval strings *(PR [#3556](https://github.com/tobymao/sqlglot/pull/3556) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3554](https://github.com/tobymao/sqlglot/issues/3554) opened by [@kevinjqiu](https://github.com/kevinjqiu)* ### :recycle: Refactors - [`a67de5f`](https://github.com/tobymao/sqlglot/commit/a67de5faaa88c1fb5d9857a69c9df06506520cbc) - get rid of redundant dict_depth check in schema find *(PR [#3561](https://github.com/tobymao/sqlglot/pull/3561) by [@georgesittas](https://github.com/georgesittas))* - [`89a8984`](https://github.com/tobymao/sqlglot/commit/89a8984b8db3817d934b4395e190f3848b1ee77a) - move UNESCAPED_SEQUENCES out of the _Dialect metaclass *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`893addf`](https://github.com/tobymao/sqlglot/commit/893addf9d07602ec3a77097f38d696b6760c6038) - add SET NOT NULL test *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v24.0.1] - 2024-05-23 ### :boom: BREAKING CHANGES - due to [`80c622e`](https://github.com/tobymao/sqlglot/commit/80c622e0c252ef3be9e469c1cf116c1cd4eaef94) - add reserved keywords fixes [#3526](https://github.com/tobymao/sqlglot/pull/3526) *(commit by [@georgesittas](https://github.com/georgesittas))*: add reserved keywords fixes #3526 ### :sparkles: New Features - [`a255610`](https://github.com/tobymao/sqlglot/commit/a2556101c8d04907ae49252def84c55d2daf78b2) - add StringToArray expression (postgres), improve its transpilation *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8f46d48`](https://github.com/tobymao/sqlglot/commit/8f46d48d4ef4e6be022aff5739992f149519c19d) - **redshift**: transpile SPLIT_TO_STRING *(commit by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`80c622e`](https://github.com/tobymao/sqlglot/commit/80c622e0c252ef3be9e469c1cf116c1cd4eaef94) - **doris**: add reserved keywords fixes [#3526](https://github.com/tobymao/sqlglot/pull/3526) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`ebf5fc7`](https://github.com/tobymao/sqlglot/commit/ebf5fc70d8936b5e1522a3ae1b9e231cefe49623) - **hive**: generate correct names for weekofyear, dayofmonth, dayofweek *(PR [#3533](https://github.com/tobymao/sqlglot/pull/3533) by [@oshyun](https://github.com/oshyun))* - :arrow_lower_right: *fixes issue [#3532](https://github.com/tobymao/sqlglot/issues/3532) opened by [@oshyun](https://github.com/oshyun)* - [`3fe3c2c`](https://github.com/tobymao/sqlglot/commit/3fe3c2c0a3e5f465a0c62261c5a0ba6faf8f0846) - **parser**: make _parse_type less aggressive, only parse column as last resort *(PR [#3541](https://github.com/tobymao/sqlglot/pull/3541) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3539](https://github.com/tobymao/sqlglot/issues/3539) opened by [@crash-g](https://github.com/crash-g)* - :arrow_lower_right: *fixes issue [#3540](https://github.com/tobymao/sqlglot/issues/3540) opened by [@crash-g](https://github.com/crash-g)* - [`8afff02`](https://github.com/tobymao/sqlglot/commit/8afff028977593789abe31c6168a93b7e32ac890) - **tsql**: preserve REPLICATE roundtrip *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v24.0.0] - 2024-05-21 ### :boom: BREAKING CHANGES - due to [`a077f17`](https://github.com/tobymao/sqlglot/commit/a077f17d10200980769ff69dd9044c95d6d718f2) - add reserved keywords *(PR [#3525](https://github.com/tobymao/sqlglot/pull/3525) by [@georgesittas](https://github.com/georgesittas))*: add reserved keywords (#3525) ### :sparkles: New Features - [`d958bba`](https://github.com/tobymao/sqlglot/commit/d958bba8494b8bca9cf3ffef0384690bafd78393) - **snowflake**: add support for CREATE WAREHOUSE *(PR [#3510](https://github.com/tobymao/sqlglot/pull/3510) by [@yingw787](https://github.com/yingw787))* - :arrow_lower_right: *addresses issue [#3502](https://github.com/tobymao/sqlglot/issues/3502) opened by [@yingw787](https://github.com/yingw787)* - [`2105300`](https://github.com/tobymao/sqlglot/commit/21053004dbb4c6dc3bcb078c4ab93f267e2c63b2) - **databricks**: Enable hex string literals *(PR [#3522](https://github.com/tobymao/sqlglot/pull/3522) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3521](https://github.com/tobymao/sqlglot/issues/3521) opened by [@aersam](https://github.com/aersam)* - [`1ef3bb6`](https://github.com/tobymao/sqlglot/commit/1ef3bb6ab49eff66a50c4d3983f19292b6979e98) - **snowflake**: Add support for `CREATE STREAMLIT` *(PR [#3519](https://github.com/tobymao/sqlglot/pull/3519) by [@yingw787](https://github.com/yingw787))* - :arrow_lower_right: *addresses issue [#3516](https://github.com/tobymao/sqlglot/issues/3516) opened by [@yingw787](https://github.com/yingw787)* ### :bug: Bug Fixes - [`5cecbfa`](https://github.com/tobymao/sqlglot/commit/5cecbfa63a770c4d623f4a5f76d1a7a5f59d087d) - unnest identifier closes [#3512](https://github.com/tobymao/sqlglot/pull/3512) *(commit by [@tobymao](https://github.com/tobymao))* - [`33ab353`](https://github.com/tobymao/sqlglot/commit/33ab3536d68203f4fceee63507b5c73076d48ed7) - **snowflake**: parse certain DB_CREATABLES as identifiers *(commit by [@georgesittas](https://github.com/georgesittas))* - [`d468f92`](https://github.com/tobymao/sqlglot/commit/d468f92a16decabdf847d7de19f82d65d1939d92) - **doris**: dont generate arrows for JSONExtract* closes [#3513](https://github.com/tobymao/sqlglot/pull/3513) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`bfb9f98`](https://github.com/tobymao/sqlglot/commit/bfb9f983d35e080ec1f8c171a65d576af873c0ea) - **postgres**: parse @> into ArrayContainsAll, improve transpilation *(PR [#3515](https://github.com/tobymao/sqlglot/pull/3515) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3511](https://github.com/tobymao/sqlglot/issues/3511) opened by [@Toms1999](https://github.com/Toms1999)* - [`4def45b`](https://github.com/tobymao/sqlglot/commit/4def45bb553f6fbc65dcf0fa3d6e8c3f5ec000ea) - make UDF DDL property parsing more lenient closes [#3517](https://github.com/tobymao/sqlglot/pull/3517) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a077f17`](https://github.com/tobymao/sqlglot/commit/a077f17d10200980769ff69dd9044c95d6d718f2) - **mysql**: add reserved keywords *(PR [#3525](https://github.com/tobymao/sqlglot/pull/3525) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3520](https://github.com/tobymao/sqlglot/issues/3520) opened by [@Toms1999](https://github.com/Toms1999)* - :arrow_lower_right: *fixes issue [#3524](https://github.com/tobymao/sqlglot/issues/3524) opened by [@Toms1999](https://github.com/Toms1999)* ### :wrench: Chores - [`358f30c`](https://github.com/tobymao/sqlglot/commit/358f30cc02959275c53a2ee9eccde04ddc6a74a5) - remove redundant postgres JSONB token mapping *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.17.0] - 2024-05-19 ### :boom: BREAKING CHANGES - due to [`77d21d9`](https://github.com/tobymao/sqlglot/commit/77d21d9379c3f130b803ea651ec3d36256bb84a4) - parse : operator as JSONExtract (similar to Snowflake) *(PR [#3508](https://github.com/tobymao/sqlglot/pull/3508) by [@georgesittas](https://github.com/georgesittas))*: parse : operator as JSONExtract (similar to Snowflake) (#3508) ### :sparkles: New Features - [`1125662`](https://github.com/tobymao/sqlglot/commit/11256629d74c4721ed13ed534509d266e260dde6) - add support for snowflake lambdas with type annotations closes … *(PR [#3506](https://github.com/tobymao/sqlglot/pull/3506) by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`77d21d9`](https://github.com/tobymao/sqlglot/commit/77d21d9379c3f130b803ea651ec3d36256bb84a4) - **databricks**: parse : operator as JSONExtract (similar to Snowflake) *(PR [#3508](https://github.com/tobymao/sqlglot/pull/3508) by [@georgesittas](https://github.com/georgesittas))* ## [v23.16.0] - 2024-05-18 ### :boom: BREAKING CHANGES - due to [`e281db8`](https://github.com/tobymao/sqlglot/commit/e281db8784682649be305e9a05c45211402f107c) - Add ALTER TABLE SET *(PR [#3485](https://github.com/tobymao/sqlglot/pull/3485) by [@VaggelisD](https://github.com/VaggelisD))*: Add ALTER TABLE SET (#3485) ### :sparkles: New Features - [`e281db8`](https://github.com/tobymao/sqlglot/commit/e281db8784682649be305e9a05c45211402f107c) - Add ALTER TABLE SET *(PR [#3485](https://github.com/tobymao/sqlglot/pull/3485) by [@VaggelisD](https://github.com/VaggelisD))* - [`9aee21b`](https://github.com/tobymao/sqlglot/commit/9aee21b88e73809e2cdc4e48f04e16edcf1141d7) - add RETURNS NULL ON NULL and STRICT properties *(PR [#3504](https://github.com/tobymao/sqlglot/pull/3504) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3503](https://github.com/tobymao/sqlglot/issues/3503) opened by [@krzysztof-kwitt](https://github.com/krzysztof-kwitt)* ### :wrench: Chores - [`0896d11`](https://github.com/tobymao/sqlglot/commit/0896d113b94aaea82e90dd04cdf917dfa546d08e) - lint *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.15.10] - 2024-05-17 ### :sparkles: New Features - [`89c1d3a`](https://github.com/tobymao/sqlglot/commit/89c1d3a4dd3387576c384413b3a8991a2dd030de) - **clickhouse**: support generate TimestampTrunc, Variance, Stddev *(PR [#3489](https://github.com/tobymao/sqlglot/pull/3489) by [@longxiaofei](https://github.com/longxiaofei))* ### :bug: Bug Fixes - [`03879bb`](https://github.com/tobymao/sqlglot/commit/03879bb3249ee83cce34d629f1016575d3b932e3) - **postgres**: date_trunc supports time zone *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6e7f37a`](https://github.com/tobymao/sqlglot/commit/6e7f37af86a4f36ec47ea4ef3519e5c97376e090) - copy into pretty printing and default dialect *(PR [#3496](https://github.com/tobymao/sqlglot/pull/3496) by [@tobymao](https://github.com/tobymao))* - [`e8600e2`](https://github.com/tobymao/sqlglot/commit/e8600e24370a131a0b375a1a9943fdf590968198) - property eq needs highest precedence *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.15.9] - 2024-05-17 ### :boom: BREAKING CHANGES - due to [`846d5cd`](https://github.com/tobymao/sqlglot/commit/846d5cd2fe85f836f5ad888e783fedfa2108d579) - set default precision / width for DECIMAL type *(PR [#3472](https://github.com/tobymao/sqlglot/pull/3472) by [@georgesittas](https://github.com/georgesittas))*: set default precision / width for DECIMAL type (#3472) - due to [`e3ff67b`](https://github.com/tobymao/sqlglot/commit/e3ff67b0327a217a0523f82e6a11940feab1a8ac) - preserve star clauses (EXCLUDE, RENAME, REPLACE) *(PR [#3477](https://github.com/tobymao/sqlglot/pull/3477) by [@georgesittas](https://github.com/georgesittas))*: preserve star clauses (EXCLUDE, RENAME, REPLACE) (#3477) - due to [`b417c80`](https://github.com/tobymao/sqlglot/commit/b417c80b4208df1b97363db53af42158aa97bbd6) - parse TININT into UTINYINT to improve transpilation *(PR [#3486](https://github.com/tobymao/sqlglot/pull/3486) by [@georgesittas](https://github.com/georgesittas))*: parse TININT into UTINYINT to improve transpilation (#3486) - due to [`54e31af`](https://github.com/tobymao/sqlglot/commit/54e31af7d86138662c9619d50b4ae2e68e04942b) - add DECLARE statement parsing *(PR [#3462](https://github.com/tobymao/sqlglot/pull/3462) by [@jlucas-fsp](https://github.com/jlucas-fsp))*: add DECLARE statement parsing (#3462) - due to [`7287bb9`](https://github.com/tobymao/sqlglot/commit/7287bb9bf578b2b3afaf25647f505b9d73040dc7) - nested cte ordering closes [#3488](https://github.com/tobymao/sqlglot/pull/3488) *(commit by [@tobymao](https://github.com/tobymao))*: nested cte ordering closes #3488 ### :sparkles: New Features - [`2c29bf3`](https://github.com/tobymao/sqlglot/commit/2c29bf3b7a163b88754c4593996bbba9b3c791b6) - **snowflake**: add support for CREATE TAG DDL statement *(PR [#3473](https://github.com/tobymao/sqlglot/pull/3473) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3468](https://github.com/tobymao/sqlglot/issues/3468) opened by [@tekumara](https://github.com/tekumara)* - [`2433993`](https://github.com/tobymao/sqlglot/commit/24339934167e651f2afd6966024e4d96ef55c677) - **transpiler**: handle different hex behavior for dialects *(PR [#3463](https://github.com/tobymao/sqlglot/pull/3463) by [@viplazylmht](https://github.com/viplazylmht))* - :arrow_lower_right: *addresses issue [#3460](https://github.com/tobymao/sqlglot/issues/3460) opened by [@viplazylmht](https://github.com/viplazylmht)* - [`0009e09`](https://github.com/tobymao/sqlglot/commit/0009e09b1a7f94f85985670a09bb0be92c673b46) - add epoch_ms of duckdb to other dialects *(PR [#3471](https://github.com/tobymao/sqlglot/pull/3471) by [@longxiaofei](https://github.com/longxiaofei))* - [`461215b`](https://github.com/tobymao/sqlglot/commit/461215b259de98125ea6b09d7bd875edb3ccce75) - **clickhouse**: add support for PROJECTION in CREATE TABLE statement *(PR [#3465](https://github.com/tobymao/sqlglot/pull/3465) by [@GaliFFun](https://github.com/GaliFFun))* - [`54e31af`](https://github.com/tobymao/sqlglot/commit/54e31af7d86138662c9619d50b4ae2e68e04942b) - **tsql**: add DECLARE statement parsing *(PR [#3462](https://github.com/tobymao/sqlglot/pull/3462) by [@jlucas-fsp](https://github.com/jlucas-fsp))* - [`c811adb`](https://github.com/tobymao/sqlglot/commit/c811adb73e6f83265fedc26274c7d4b40f8a1c85) - snowflake array_construct_compact to spark *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`58d5f2b`](https://github.com/tobymao/sqlglot/commit/58d5f2bece42acdda5f8c08d30e6f61a5e538d4c) - **presto**: fix parsing and generating hash functions presto/trino *(PR [#3459](https://github.com/tobymao/sqlglot/pull/3459) by [@viplazylmht](https://github.com/viplazylmht))* - :arrow_lower_right: *fixes issue [#3458](https://github.com/tobymao/sqlglot/issues/3458) opened by [@viplazylmht](https://github.com/viplazylmht)* - [`065281e`](https://github.com/tobymao/sqlglot/commit/065281e28be75597f3f97cee22995423ed483660) - **optimizer**: fix multiple bugs in unnest_subqueries, clean up test suite *(PR [#3464](https://github.com/tobymao/sqlglot/pull/3464) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3448](https://github.com/tobymao/sqlglot/issues/3448) opened by [@yesemsanthoshkumar](https://github.com/yesemsanthoshkumar)* - [`80ba1e8`](https://github.com/tobymao/sqlglot/commit/80ba1e8786a6347b8f20f340c185a0b41d017c73) - preserve quotes for projections produced by the eliminate_qualify rule *(PR [#3470](https://github.com/tobymao/sqlglot/pull/3470) by [@aersam](https://github.com/aersam))* - :arrow_lower_right: *fixes issue [#3467](https://github.com/tobymao/sqlglot/issues/3467) opened by [@aersam](https://github.com/aersam)* - [`3bc1fbe`](https://github.com/tobymao/sqlglot/commit/3bc1fbed40d9d0d05f189ca60fdc7af19b815e8b) - make quoting of alias_or_name in eliminate_qualify more robust *(commit by [@georgesittas](https://github.com/georgesittas))* - [`1843e9b`](https://github.com/tobymao/sqlglot/commit/1843e9b825da6e97bda8c7b4fffce40baf199af1) - allow parameters in user-defined types *(PR [#3474](https://github.com/tobymao/sqlglot/pull/3474) by [@georgesittas](https://github.com/georgesittas))* - [`e004d2a`](https://github.com/tobymao/sqlglot/commit/e004d2a3d88ea77d34ecdb8290df1e73511e6b6c) - **duckdb**: preserve precedence of json extraction when converting to arrow syntax *(PR [#3478](https://github.com/tobymao/sqlglot/pull/3478) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3476](https://github.com/tobymao/sqlglot/issues/3476) opened by [@asiunov](https://github.com/asiunov)* - [`e3ff67b`](https://github.com/tobymao/sqlglot/commit/e3ff67b0327a217a0523f82e6a11940feab1a8ac) - **snowflake**: preserve star clauses (EXCLUDE, RENAME, REPLACE) *(PR [#3477](https://github.com/tobymao/sqlglot/pull/3477) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3475](https://github.com/tobymao/sqlglot/issues/3475) opened by [@asiunov](https://github.com/asiunov)* - [`428fd61`](https://github.com/tobymao/sqlglot/commit/428fd61574e10be9afab23ac711758b229cc174f) - **mysql**: generate CONCAT for DPipe *(PR [#3482](https://github.com/tobymao/sqlglot/pull/3482) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3481](https://github.com/tobymao/sqlglot/issues/3481) opened by [@Toms1999](https://github.com/Toms1999)* - [`b417c80`](https://github.com/tobymao/sqlglot/commit/b417c80b4208df1b97363db53af42158aa97bbd6) - **tsql**: parse TININT into UTINYINT to improve transpilation *(PR [#3486](https://github.com/tobymao/sqlglot/pull/3486) by [@georgesittas](https://github.com/georgesittas))* - [`a3ff49e`](https://github.com/tobymao/sqlglot/commit/a3ff49e93f2c6752f512192ca8b6b6ad18fc925a) - **presto**: fix DELETE DML statement for presto/trino *(PR [#3466](https://github.com/tobymao/sqlglot/pull/3466) by [@viplazylmht](https://github.com/viplazylmht))* - [`7287bb9`](https://github.com/tobymao/sqlglot/commit/7287bb9bf578b2b3afaf25647f505b9d73040dc7) - nested cte ordering closes [#3488](https://github.com/tobymao/sqlglot/pull/3488) *(commit by [@tobymao](https://github.com/tobymao))* - [`5b64475`](https://github.com/tobymao/sqlglot/commit/5b64475bfd2d6a0ddcb3d0adb60d06dca62421a0) - allow rollup to be used as an identifier *(PR [#3495](https://github.com/tobymao/sqlglot/pull/3495) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3494](https://github.com/tobymao/sqlglot/issues/3494) opened by [@dangoldin](https://github.com/dangoldin)* ### :recycle: Refactors - [`846d5cd`](https://github.com/tobymao/sqlglot/commit/846d5cd2fe85f836f5ad888e783fedfa2108d579) - **snowflake**: set default precision / width for DECIMAL type *(PR [#3472](https://github.com/tobymao/sqlglot/pull/3472) by [@georgesittas](https://github.com/georgesittas))* - [`930f923`](https://github.com/tobymao/sqlglot/commit/930f923c6da182be33ad4c912b64ec052a63af30) - clean up Hex / LowerHex implementation *(PR [#3483](https://github.com/tobymao/sqlglot/pull/3483) by [@georgesittas](https://github.com/georgesittas))* - [`883fcd7`](https://github.com/tobymao/sqlglot/commit/883fcd78645539a275b66472f0bd1dfe1d3d4401) - **presto**: make DELETE transpilation more robust *(PR [#3487](https://github.com/tobymao/sqlglot/pull/3487) by [@georgesittas](https://github.com/georgesittas))* - [`49f7f85`](https://github.com/tobymao/sqlglot/commit/49f7f857634ae85547c805ac53911895407dd7cb) - **tsql**: handle TABLE more gracefully for DeclareItem *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.15.8] - 2024-05-11 ### :boom: BREAKING CHANGES - due to [`510f8b5`](https://github.com/tobymao/sqlglot/commit/510f8b5726c59a13284e9482dc47d488559e6c9e) - improve transpilation of TABLESAMPLE clause *(PR [#3457](https://github.com/tobymao/sqlglot/pull/3457) by [@georgesittas](https://github.com/georgesittas))*: improve transpilation of TABLESAMPLE clause (#3457) ### :sparkles: New Features - [`510f8b5`](https://github.com/tobymao/sqlglot/commit/510f8b5726c59a13284e9482dc47d488559e6c9e) - improve transpilation of TABLESAMPLE clause *(PR [#3457](https://github.com/tobymao/sqlglot/pull/3457) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3456](https://github.com/tobymao/sqlglot/issues/3456) opened by [@whummer](https://github.com/whummer)* - [`e28c959`](https://github.com/tobymao/sqlglot/commit/e28c959bf44208bdb3821b38c13fde59f1944fbb) - make create table cmd parsing less aggressive so that they can be used in sqlmesh @if macros *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.15.7] - 2024-05-11 ### :wrench: Chores - [`c3bb3da`](https://github.com/tobymao/sqlglot/commit/c3bb3da670d06cb2eef545a909635224b6e7c205) - change python-version to 3.11 for build-rs *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.15.6] - 2024-05-11 ### :wrench: Chores - [`cd8f568`](https://github.com/tobymao/sqlglot/commit/cd8f568dba53efe6b9883035c48a67134016e612) - fix rust deployment workflow bug *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.15.3] - 2024-05-10 ### :wrench: Chores - [`130255e`](https://github.com/tobymao/sqlglot/commit/130255ebc927c48b3d3e479e17c38269bd7d8056) - update rust *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.15.2] - 2024-05-10 ### :sparkles: New Features - [`116172a`](https://github.com/tobymao/sqlglot/commit/116172a41119e72aaf618a83761f73d52f0440d2) - add support for ON property in ALTER and DROP statements *(PR [#3450](https://github.com/tobymao/sqlglot/pull/3450) by [@GaliFFun](https://github.com/GaliFFun))* - [`aa104fd`](https://github.com/tobymao/sqlglot/commit/aa104fd2ccd73a13ca60fa3de3296ed4c007e8da) - add semi colon comments *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`2c62267`](https://github.com/tobymao/sqlglot/commit/2c62267e2ae908d10d8164f080bc66a133596bf6) - **bigquery**: fix SHA1 generator *(PR [#3453](https://github.com/tobymao/sqlglot/pull/3453) by [@viplazylmht](https://github.com/viplazylmht))* - :arrow_lower_right: *fixes issue [#3451](https://github.com/tobymao/sqlglot/issues/3451) opened by [@viplazylmht](https://github.com/viplazylmht)* - [`fb3dea9`](https://github.com/tobymao/sqlglot/commit/fb3dea9a803157b4684cd62e2ef0b6a6b612f7e1) - **clickhouse**: fix parsing and generating hash functions *(PR [#3454](https://github.com/tobymao/sqlglot/pull/3454) by [@viplazylmht](https://github.com/viplazylmht))* - :arrow_lower_right: *fixes issue [#3452](https://github.com/tobymao/sqlglot/issues/3452) opened by [@viplazylmht](https://github.com/viplazylmht)* - [`b76dfda`](https://github.com/tobymao/sqlglot/commit/b76dfda7b4122a59c52bcbb445cffc6617e68b8c) - **snowflake**: COPY Subquery postfix *(PR [#3449](https://github.com/tobymao/sqlglot/pull/3449) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3434](https://github.com/tobymao/sqlglot/issues/3434) opened by [@whummer](https://github.com/whummer)* ### :wrench: Chores - [`684df5f`](https://github.com/tobymao/sqlglot/commit/684df5f7e11bb89def9ff71da0913de222bdaf3c) - remove unnecessary set_op *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.15.1] - 2024-05-10 ### :bug: Bug Fixes - [`33ac4fc`](https://github.com/tobymao/sqlglot/commit/33ac4fca3e5f162500ddde529cd69c338a6fecc5) - add create view tsql *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.15.0] - 2024-05-09 ### :boom: BREAKING CHANGES - due to [`9338ebc`](https://github.com/tobymao/sqlglot/commit/9338ebc6dc9635f12639b562ee2af140cf708b6b) - tsql drop view no catalog *(commit by [@tobymao](https://github.com/tobymao))*: tsql drop view no catalog ### :sparkles: New Features - [`80670bb`](https://github.com/tobymao/sqlglot/commit/80670bbd1e062cc476dcee17d0b9972ff7dc0424) - **snowflake**: Support for APPROX_PERCENTILE *(PR [#3426](https://github.com/tobymao/sqlglot/pull/3426) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3424](https://github.com/tobymao/sqlglot/issues/3424) opened by [@baruchoxman](https://github.com/baruchoxman)* - [`b46c5b3`](https://github.com/tobymao/sqlglot/commit/b46c5b3ddaed359fb59264f00d7033c7b36bd9a4) - **clickhouse**: add support for partition expression *(PR [#3428](https://github.com/tobymao/sqlglot/pull/3428) by [@GaliFFun](https://github.com/GaliFFun))* - [`07badc9`](https://github.com/tobymao/sqlglot/commit/07badc9d155cfd6d0c70e4419ed763b8c52b4973) - **clickhouse**: add support for ALTER TABLE REPLACE PARTITION statement *(PR [#3441](https://github.com/tobymao/sqlglot/pull/3441) by [@GaliFFun](https://github.com/GaliFFun))* - [`baf39e7`](https://github.com/tobymao/sqlglot/commit/baf39e78cdebf5478b59f83120c43b39b27d1a31) - **redshift**: improve ALTER TABLE .. ALTER .. support *(PR [#3444](https://github.com/tobymao/sqlglot/pull/3444) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`e8014e2`](https://github.com/tobymao/sqlglot/commit/e8014e2a479c37ef75510e7d5ca90ed30522ce60) - **mysql**: Parse REPLACE statement as Command *(PR [#3425](https://github.com/tobymao/sqlglot/pull/3425) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3423](https://github.com/tobymao/sqlglot/issues/3423) opened by [@DyCheer](https://github.com/DyCheer)* - [`273731f`](https://github.com/tobymao/sqlglot/commit/273731fd8cba4d6bda0d7ce109f25c49de0ec95c) - **snowflake**: parse CREATE SEQUENCE with commas *(PR [#3436](https://github.com/tobymao/sqlglot/pull/3436) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3435](https://github.com/tobymao/sqlglot/issues/3435) opened by [@whummer](https://github.com/whummer)* - [`761ba6f`](https://github.com/tobymao/sqlglot/commit/761ba6fb507158d4e5ea51ca396809be91c11ebf) - don't generate connector comments when comments=False closes [#3439](https://github.com/tobymao/sqlglot/pull/3439) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a2a6eaa`](https://github.com/tobymao/sqlglot/commit/a2a6eaa5d7ace2879ded7c3a4cf4192b75c07f26) - handle empty string in connector comment padding *(PR [#3437](https://github.com/tobymao/sqlglot/pull/3437) by [@uncledata](https://github.com/uncledata))* - [`1bc0ce5`](https://github.com/tobymao/sqlglot/commit/1bc0ce57eca5e401a4c39237b52ee722bdfb46af) - func to binary MOD generation *(PR [#3440](https://github.com/tobymao/sqlglot/pull/3440) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3431](https://github.com/tobymao/sqlglot/issues/3431) opened by [@daniel769](https://github.com/daniel769)* - [`5cfb29c`](https://github.com/tobymao/sqlglot/commit/5cfb29c7ff6015e39d7fd5b94ed2aa66436e33ae) - **bigquery**: MOD edge case *(commit by [@georgesittas](https://github.com/georgesittas))* - [`9825c4c`](https://github.com/tobymao/sqlglot/commit/9825c4cb616af07a048109c499666081bc6e4eba) - improve error handling for nested schema levels *(PR [#3445](https://github.com/tobymao/sqlglot/pull/3445) by [@tobymao](https://github.com/tobymao))* - [`c309def`](https://github.com/tobymao/sqlglot/commit/c309defa450f755dbed1d1b6f276b4b1765166e2) - **duckdb**: use name sequence instead of single _t for unnest alias *(PR [#3446](https://github.com/tobymao/sqlglot/pull/3446) by [@georgesittas](https://github.com/georgesittas))* - [`0927ae3`](https://github.com/tobymao/sqlglot/commit/0927ae3c448ebf068b89bfa5e46b8f135121b470) - **executor**: use timezone-aware object to represent datetime in UTC *(PR [#3447](https://github.com/tobymao/sqlglot/pull/3447) by [@georgesittas](https://github.com/georgesittas))* - [`9338ebc`](https://github.com/tobymao/sqlglot/commit/9338ebc6dc9635f12639b562ee2af140cf708b6b) - tsql drop view no catalog *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`30f9d30`](https://github.com/tobymao/sqlglot/commit/30f9d30d8ab3727a43b1e6f363f28631cbfa7f92) - bump ruff to 0.4.3 *(PR [#3430](https://github.com/tobymao/sqlglot/pull/3430) by [@georgesittas](https://github.com/georgesittas))* - [`91bed56`](https://github.com/tobymao/sqlglot/commit/91bed5607e442d416021a1f93e4a457fb47b6a1f) - test 3.12 *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.14.0] - 2024-05-07 ### :boom: BREAKING CHANGES - due to [`258ad3b`](https://github.com/tobymao/sqlglot/commit/258ad3bbf73f55d02ed78a93fa0f16d4630159e3) - parse column instead of identifier for SET assignment LHS *(PR [#3417](https://github.com/tobymao/sqlglot/pull/3417) by [@georgesittas](https://github.com/georgesittas))*: parse column instead of identifier for SET assignment LHS (#3417) ### :bug: Bug Fixes - [`258ad3b`](https://github.com/tobymao/sqlglot/commit/258ad3bbf73f55d02ed78a93fa0f16d4630159e3) - parse column instead of identifier for SET assignment LHS *(PR [#3417](https://github.com/tobymao/sqlglot/pull/3417) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3415](https://github.com/tobymao/sqlglot/issues/3415) opened by [@tekumara](https://github.com/tekumara)* - [`17c31da`](https://github.com/tobymao/sqlglot/commit/17c31da9e159dc1cdd91bd6df38c43606bdc48c9) - **lineage**: get rid of comments in Node names *(PR [#3418](https://github.com/tobymao/sqlglot/pull/3418) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3414](https://github.com/tobymao/sqlglot/issues/3414) opened by [@jaspev123](https://github.com/jaspev123)* - [`ea197ea`](https://github.com/tobymao/sqlglot/commit/ea197eae2fcdbeba395b53cf4864fc2e44134c71) - **snowflake**: ensure OBJECT_CONSTRUCT is not generated inside of VALUES *(PR [#3419](https://github.com/tobymao/sqlglot/pull/3419) by [@georgesittas](https://github.com/georgesittas))* ## [v23.13.7] - 2024-05-04 ### :wrench: Chores - [`4dbcd4f`](https://github.com/tobymao/sqlglot/commit/4dbcd4f7147204b7bafa32d14dfe615882562b6b) - refactor publish workflow for sqlglotrs releasing *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.13.6] - 2024-05-04 ### :wrench: Chores - [`aa4f90a`](https://github.com/tobymao/sqlglot/commit/aa4f90acde9c022fb7f984b30763c732977c1b4c) - refactor publish workflow for sqlglotrs releasing *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.13.5] - 2024-05-04 ### :wrench: Chores - [`0deffd8`](https://github.com/tobymao/sqlglot/commit/0deffd89a8c6d2da90c9a654c22b78dd4c7dd8f6) - refactor publish workflow for sqlglotrs releasing *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.13.4] - 2024-05-04 ### :wrench: Chores - [`5125732`](https://github.com/tobymao/sqlglot/commit/5125732f05408750aceefba99b48aeb4def89557) - refactor publish workflow for sqlglotrs releasing *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.13.3] - 2024-05-04 ### :wrench: Chores - [`0a36dd8`](https://github.com/tobymao/sqlglot/commit/0a36dd85cd7de544a509f7e4ccdddf0cb0c1f697) - fix should-deploy-rs bash condition *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.13.2] - 2024-05-04 ### :bug: Bug Fixes - [`fc979a0`](https://github.com/tobymao/sqlglot/commit/fc979a0055c0f402cda77448d9c7dfecf45a901f) - **snowflake**: make FILE_FORMAT option always be uppercase in COPY INTO *(commit by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`1b5b77d`](https://github.com/tobymao/sqlglot/commit/1b5b77d849260589a2f7d3593c4472e47cae0280) - improve unsupported error documentation *(PR [#3406](https://github.com/tobymao/sqlglot/pull/3406) by [@georgesittas](https://github.com/georgesittas))* - [`fcb51af`](https://github.com/tobymao/sqlglot/commit/fcb51afc4631cfc5f494c9114d4aba667aa46087) - release sqlglotrs only when Cargo.toml is updated *(PR [#3408](https://github.com/tobymao/sqlglot/pull/3408) by [@georgesittas](https://github.com/georgesittas))* ## [v23.13.1] - 2024-05-04 ### :bug: Bug Fixes - [`2c2a788`](https://github.com/tobymao/sqlglot/commit/2c2a788bb3a5a46e7729a117a6e6b62d33beb020) - **snowflake**: COPY postfix *(PR [#3398](https://github.com/tobymao/sqlglot/pull/3398) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3388](https://github.com/tobymao/sqlglot/issues/3388) opened by [@dangoldin](https://github.com/dangoldin)* ## [v23.13.0] - 2024-05-03 ### :boom: BREAKING CHANGES - due to [`cc6259d`](https://github.com/tobymao/sqlglot/commit/cc6259de3d68831ded31bfb7fafe1ce654aa89dd) - Mark UDTF child scopes as ScopeType.SUBQUERY *(PR [#3390](https://github.com/tobymao/sqlglot/pull/3390) by [@VaggelisD](https://github.com/VaggelisD))*: Mark UDTF child scopes as ScopeType.SUBQUERY (#3390) - due to [`33bae9b`](https://github.com/tobymao/sqlglot/commit/33bae9b527b27f02dfafff3f45534f85aa9e0d9d) - get rid of superfluous "parameters" arg in RegexpReplace *(PR [#3394](https://github.com/tobymao/sqlglot/pull/3394) by [@georgesittas](https://github.com/georgesittas))*: get rid of superfluous "parameters" arg in RegexpReplace (#3394) - due to [`3768514`](https://github.com/tobymao/sqlglot/commit/3768514e3b2f256b69553e173b40f17180744ab0) - snowflake optional merge insert *(commit by [@tobymao](https://github.com/tobymao))*: snowflake optional merge insert - due to [`d1b4f1f`](https://github.com/tobymao/sqlglot/commit/d1b4f1f256cd772bec366d6bf13d9589e1fdfc4b) - Introducing TIMESTAMP_NTZ token and data type *(PR [#3386](https://github.com/tobymao/sqlglot/pull/3386) by [@VaggelisD](https://github.com/VaggelisD))*: Introducing TIMESTAMP_NTZ token and data type (#3386) ### :sparkles: New Features - [`d1b4f1f`](https://github.com/tobymao/sqlglot/commit/d1b4f1f256cd772bec366d6bf13d9589e1fdfc4b) - Introducing TIMESTAMP_NTZ token and data type *(PR [#3386](https://github.com/tobymao/sqlglot/pull/3386) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3379](https://github.com/tobymao/sqlglot/issues/3379) opened by [@aersam](https://github.com/aersam)* - [`16691f9`](https://github.com/tobymao/sqlglot/commit/16691f962822a132e233d61c2b67ec0fc3da51eb) - **prql**: add support for AGGREGATE *(PR [#3395](https://github.com/tobymao/sqlglot/pull/3395) by [@fool1280](https://github.com/fool1280))* - [`534fb80`](https://github.com/tobymao/sqlglot/commit/534fb80462370b5236061472496c35a16e9bab4a) - **postgres**: add support for anonymos index DDL syntax *(PR [#3403](https://github.com/tobymao/sqlglot/pull/3403) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`a2afcca`](https://github.com/tobymao/sqlglot/commit/a2afccafd300939eaa5a3b075820f3bf8e8dcaac) - **mysql**: don't cast into invalid numeric/text types *(PR [#3375](https://github.com/tobymao/sqlglot/pull/3375) by [@georgesittas](https://github.com/georgesittas))* - [`60b5c3b`](https://github.com/tobymao/sqlglot/commit/60b5c3b1b5dfb4aa00754f4b2473ad054b8dd14a) - **spark**: transpile presto TRY, fix JSON casting issue *(PR [#3376](https://github.com/tobymao/sqlglot/pull/3376) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3374](https://github.com/tobymao/sqlglot/issues/3374) opened by [@cploonker](https://github.com/cploonker)* - [`3e8de71`](https://github.com/tobymao/sqlglot/commit/3e8de7124b735a6ab52971a3e51702c4e7b74be5) - **postgres**: allow FOR clause without FROM in SUBSTRING closes [#3377](https://github.com/tobymao/sqlglot/pull/3377) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`b2a7e55`](https://github.com/tobymao/sqlglot/commit/b2a7e550b25fd95eb0abba63228c9e285be168e0) - **optimizer**: Remove XOR from connector simplifications *(PR [#3380](https://github.com/tobymao/sqlglot/pull/3380) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3372](https://github.com/tobymao/sqlglot/issues/3372) opened by [@colincointe](https://github.com/colincointe)* - [`477754c`](https://github.com/tobymao/sqlglot/commit/477754c72c47b6dc9dd01463b8f6fae6686cb1ac) - **trino**: bring back TRIM parsing *(PR [#3385](https://github.com/tobymao/sqlglot/pull/3385) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3384](https://github.com/tobymao/sqlglot/issues/3384) opened by [@dmelchor-stripe](https://github.com/dmelchor-stripe)* - [`cc6259d`](https://github.com/tobymao/sqlglot/commit/cc6259de3d68831ded31bfb7fafe1ce654aa89dd) - **optimizer**: Mark UDTF child scopes as ScopeType.SUBQUERY *(PR [#3390](https://github.com/tobymao/sqlglot/pull/3390) by [@VaggelisD](https://github.com/VaggelisD))* - [`0d23b20`](https://github.com/tobymao/sqlglot/commit/0d23b20352a8931adf8224d322da324b18e8282d) - allow joins in FROM expression parser *(PR [#3389](https://github.com/tobymao/sqlglot/pull/3389) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3387](https://github.com/tobymao/sqlglot/issues/3387) opened by [@MikeWallis42](https://github.com/MikeWallis42)* - [`e7021df`](https://github.com/tobymao/sqlglot/commit/e7021df397a1dc5e726d1e391ef6428a3190856d) - **duckdb**: Preserve DATE_SUB roundtrip *(PR [#3382](https://github.com/tobymao/sqlglot/pull/3382) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3373](https://github.com/tobymao/sqlglot/issues/3373) opened by [@zergar](https://github.com/zergar)* - [`641b296`](https://github.com/tobymao/sqlglot/commit/641b296017591b65ffc223d28b37e51886789ca7) - **postgres**: tokenize INT8 as BIGINT *(PR [#3392](https://github.com/tobymao/sqlglot/pull/3392) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3391](https://github.com/tobymao/sqlglot/issues/3391) opened by [@fuzi1996](https://github.com/fuzi1996)* - [`33bae9b`](https://github.com/tobymao/sqlglot/commit/33bae9b527b27f02dfafff3f45534f85aa9e0d9d) - get rid of superfluous "parameters" arg in RegexpReplace *(PR [#3394](https://github.com/tobymao/sqlglot/pull/3394) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3393](https://github.com/tobymao/sqlglot/issues/3393) opened by [@rzykov](https://github.com/rzykov)* - [`3768514`](https://github.com/tobymao/sqlglot/commit/3768514e3b2f256b69553e173b40f17180744ab0) - snowflake optional merge insert *(commit by [@tobymao](https://github.com/tobymao))* - [`f44cd24`](https://github.com/tobymao/sqlglot/commit/f44cd248a82f5519afd0edba5112a499b804fe8f) - make generated constraint parsing more lenient fixes [#3397](https://github.com/tobymao/sqlglot/pull/3397) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`00ff877`](https://github.com/tobymao/sqlglot/commit/00ff87719ab4d6e3a407334c8d811366d0c7ead5) - **tsql**: quote hash sign as well for quoted temporary tables *(PR [#3401](https://github.com/tobymao/sqlglot/pull/3401) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3399](https://github.com/tobymao/sqlglot/issues/3399) opened by [@gforsyth](https://github.com/gforsyth)* - [`84b7026`](https://github.com/tobymao/sqlglot/commit/84b7026e2fbc4e73c3b4c0c86cb764b95541841e) - **trino**: support for data type 'tdigest' *(PR [#3402](https://github.com/tobymao/sqlglot/pull/3402) by [@suryaiyer95](https://github.com/suryaiyer95))* - [`24e1115`](https://github.com/tobymao/sqlglot/commit/24e1115c957d42a5511c1c428516e3ce5426cd88) - **trino|presto**: adding cast support for "hyperloglog" column type *(PR [#3405](https://github.com/tobymao/sqlglot/pull/3405) by [@uncledata](https://github.com/uncledata))* ## [v23.12.2] - 2024-04-30 ### :sparkles: New Features - [`d2a6f16`](https://github.com/tobymao/sqlglot/commit/d2a6f16c35cbe355932d0e0eab2fc6ba096d8a97) - COPY TO/FROM statement *(PR [#3359](https://github.com/tobymao/sqlglot/pull/3359) by [@VaggelisD](https://github.com/VaggelisD))* - [`f034ea0`](https://github.com/tobymao/sqlglot/commit/f034ea0fdd7429bf6694e07b4aff06c665c10951) - **mysql**: Transpile TimestampTrunc *(PR [#3367](https://github.com/tobymao/sqlglot/pull/3367) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3366](https://github.com/tobymao/sqlglot/issues/3366) opened by [@sivpr2000](https://github.com/sivpr2000)* ### :bug: Bug Fixes - [`f697cb1`](https://github.com/tobymao/sqlglot/commit/f697cb16b6d744253febb2f83476853e63e06f88) - duckdb describe query closes [#3353](https://github.com/tobymao/sqlglot/pull/3353) *(commit by [@tobymao](https://github.com/tobymao))* - [`6e0fc5d`](https://github.com/tobymao/sqlglot/commit/6e0fc5dd8e1921aac1e3f9834dd6a1c0e30b9e50) - export optimizer functions explicitly in init *(PR [#3358](https://github.com/tobymao/sqlglot/pull/3358) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3354](https://github.com/tobymao/sqlglot/issues/3354) opened by [@tekumara](https://github.com/tekumara)* - [`23d45ee`](https://github.com/tobymao/sqlglot/commit/23d45eefb8b5f650d2e723499a12ac6801d5cd14) - **postgres**: don't generate CommentColumnConstraint *(PR [#3357](https://github.com/tobymao/sqlglot/pull/3357) by [@georgesittas](https://github.com/georgesittas))* - [`e87685b`](https://github.com/tobymao/sqlglot/commit/e87685b6971d6ddb7d222993b38aa224c39c5154) - **lineage**: use source names of derived table sources for laterals *(PR [#3360](https://github.com/tobymao/sqlglot/pull/3360) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3356](https://github.com/tobymao/sqlglot/issues/3356) opened by [@eliaperantoni](https://github.com/eliaperantoni)* - [`e82a30b`](https://github.com/tobymao/sqlglot/commit/e82a30b6563547daea0bb087e1b6b5bf3b0532d3) - **postgres**: don't generate SchemaCommentProperty *(PR [#3364](https://github.com/tobymao/sqlglot/pull/3364) by [@georgesittas](https://github.com/georgesittas))* - [`47dc52c`](https://github.com/tobymao/sqlglot/commit/47dc52c99ea50b55d08f2b57885eebbd577b8b46) - **mysql**: convert epoch extraction into UNIX_TIMESTAMP call *(PR [#3369](https://github.com/tobymao/sqlglot/pull/3369) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3368](https://github.com/tobymao/sqlglot/issues/3368) opened by [@FaizelK](https://github.com/FaizelK)* - [`b8f0979`](https://github.com/tobymao/sqlglot/commit/b8f0979537cf3ad9ef83f2c30d6cfb23cd4d2d1e) - **mysql**: generate GROUP_CONCAT for ArrayAgg *(PR [#3370](https://github.com/tobymao/sqlglot/pull/3370) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3368](https://github.com/tobymao/sqlglot/issues/3368) opened by [@FaizelK](https://github.com/FaizelK)* ### :recycle: Refactors - [`b928f54`](https://github.com/tobymao/sqlglot/commit/b928f542a81d299311d01bd8f1eb762a13adf5c8) - don't mutate the AST when creating DDL scopes *(PR [#3371](https://github.com/tobymao/sqlglot/pull/3371) by [@georgesittas](https://github.com/georgesittas))* ## [v23.12.1] - 2024-04-25 ### :wrench: Chores - [`719d394`](https://github.com/tobymao/sqlglot/commit/719d3949b75bcdac0d19b86d7398c5d9c4b5bdc3) - add a test for quoted aliases *(commit by [@tobymao](https://github.com/tobymao))* - [`6d7a9f4`](https://github.com/tobymao/sqlglot/commit/6d7a9f4ec0cd87efe19128dc9e55967172bf324e) - use unknown token types *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.12.0] - 2024-04-25 ### :boom: BREAKING CHANGES - due to [`c5ce47b`](https://github.com/tobymao/sqlglot/commit/c5ce47ba7863e0c536e076ea78ec27cb52324493) - Combine aggregate functions with orderby from WITHIN GROUP *(PR [#3352](https://github.com/tobymao/sqlglot/pull/3352) by [@VaggelisD](https://github.com/VaggelisD))*: Combine aggregate functions with orderby from WITHIN GROUP (#3352) ### :sparkles: New Features - [`80793cc`](https://github.com/tobymao/sqlglot/commit/80793ccdb52b1975d93c64a20380047bc6cf4479) - parse (a,) as a tuple instead of a paren *(PR [#3341](https://github.com/tobymao/sqlglot/pull/3341) by [@georgesittas](https://github.com/georgesittas))* - [`b3826f8`](https://github.com/tobymao/sqlglot/commit/b3826f873dc81adbfe4fbe35e83b71f4c37c3b16) - allow comments to be attached for identifiers used in definitions *(PR [#3340](https://github.com/tobymao/sqlglot/pull/3340) by [@georgesittas](https://github.com/georgesittas))* - [`ce7d893`](https://github.com/tobymao/sqlglot/commit/ce7d893c7e0d627b94e9225a06b83b863bd61a40) - **clickhouse**: Parse window functions in ParameterizedAggFuncs *(PR [#3347](https://github.com/tobymao/sqlglot/pull/3347) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3344](https://github.com/tobymao/sqlglot/issues/3344) opened by [@alesk](https://github.com/alesk)* ### :bug: Bug Fixes - [`0e54975`](https://github.com/tobymao/sqlglot/commit/0e54975bf27f8d765378f47872d372ba3817088e) - **tsql**: only use target table name when generating sp_rename *(PR [#3342](https://github.com/tobymao/sqlglot/pull/3342) by [@georgesittas](https://github.com/georgesittas))* - [`52bdd0c`](https://github.com/tobymao/sqlglot/commit/52bdd0ce104606c520ad4edf8c781ccc502d5a0e) - **tsql**: Convert TIMESTAMP to ROWVERSION, transpile both to BINARY *(PR [#3348](https://github.com/tobymao/sqlglot/pull/3348) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3345](https://github.com/tobymao/sqlglot/issues/3345) opened by [@aersam](https://github.com/aersam)* - [`c5ce47b`](https://github.com/tobymao/sqlglot/commit/c5ce47ba7863e0c536e076ea78ec27cb52324493) - **duckdb**: Combine aggregate functions with orderby from WITHIN GROUP *(PR [#3352](https://github.com/tobymao/sqlglot/pull/3352) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3350](https://github.com/tobymao/sqlglot/issues/3350) opened by [@btyuhas](https://github.com/btyuhas)* ### :wrench: Chores - [`eae2f6b`](https://github.com/tobymao/sqlglot/commit/eae2f6be8f13eb44c404dc638ec50d08f203b094) - update sqlglot logo *(commit by [@tobymao](https://github.com/tobymao))* - [`fb9a7ad`](https://github.com/tobymao/sqlglot/commit/fb9a7ad8f2af98a248e4576677b7b615b9d4c3e7) - copy png *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.11.2] - 2024-04-19 ### :bug: Bug Fixes - [`68595eb`](https://github.com/tobymao/sqlglot/commit/68595eba02ca9f3a01359566104b4315a313ec0a) - edge case *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.11.1] - 2024-04-19 ### :bug: Bug Fixes - [`9cf6f4e`](https://github.com/tobymao/sqlglot/commit/9cf6f4e49208d5a41bca1bd437d31b1ed894e6eb) - don't allow any_token on reserved keywords *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.11.0] - 2024-04-19 ### :boom: BREAKING CHANGES - due to [`290e408`](https://github.com/tobymao/sqlglot/commit/290e408ccf0d0eeec767d4b58bc1293878a3a3ae) - Preserve DPipe in simplify_concat *(PR [#3317](https://github.com/tobymao/sqlglot/pull/3317) by [@VaggelisD](https://github.com/VaggelisD))*: Preserve DPipe in simplify_concat (#3317) - due to [`83cff79`](https://github.com/tobymao/sqlglot/commit/83cff79633225fe3d8606ec3a5a9e8c1081edd0c) - add comprehensive reserved keywords for presto and redshift *(PR [#3322](https://github.com/tobymao/sqlglot/pull/3322) by [@tobymao](https://github.com/tobymao))*: add comprehensive reserved keywords for presto and redshift (#3322) - due to [`61f5b12`](https://github.com/tobymao/sqlglot/commit/61f5b1274cc1f3d68f0f16d4b3efcdc082f67257) - Introduce partition in exp.Table *(PR [#3327](https://github.com/tobymao/sqlglot/pull/3327) by [@VaggelisD](https://github.com/VaggelisD))*: Introduce partition in exp.Table (#3327) - due to [`1832ff1`](https://github.com/tobymao/sqlglot/commit/1832ff130da06ec905835583f101c031dc4faf1d) - dynamic styling for inline arrays *(commit by [@tobymao](https://github.com/tobymao))*: dynamic styling for inline arrays - due to [`5fb7f5b`](https://github.com/tobymao/sqlglot/commit/5fb7f5b21bc441af8d6fabaff7c3d542d96d3811) - dont double indent comments *(commit by [@tobymao](https://github.com/tobymao))*: dont double indent comments ### :sparkles: New Features - [`4f1691a`](https://github.com/tobymao/sqlglot/commit/4f1691a221f3d7395774f8c131a656a3ec531534) - allow qualify to also annotate on the fly for unnest support *(PR [#3316](https://github.com/tobymao/sqlglot/pull/3316) by [@tobymao](https://github.com/tobymao))* - [`83cff79`](https://github.com/tobymao/sqlglot/commit/83cff79633225fe3d8606ec3a5a9e8c1081edd0c) - add comprehensive reserved keywords for presto and redshift *(PR [#3322](https://github.com/tobymao/sqlglot/pull/3322) by [@tobymao](https://github.com/tobymao))* - [`ef3311a`](https://github.com/tobymao/sqlglot/commit/ef3311a8ece67e6300e5ff121660dea8cfd80480) - **hive**: Add 'STORED AS' option in INSERT DIRECTORY *(PR [#3326](https://github.com/tobymao/sqlglot/pull/3326) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3320](https://github.com/tobymao/sqlglot/issues/3320) opened by [@bkyryliuk](https://github.com/bkyryliuk)* - [`7f9cb2d`](https://github.com/tobymao/sqlglot/commit/7f9cb2d2fe2c09e94f9dbaafcc0a808428b5b21c) - **clickhouse**: Add support for DATE_FORMAT / formatDateTime *(PR [#3329](https://github.com/tobymao/sqlglot/pull/3329) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3324](https://github.com/tobymao/sqlglot/issues/3324) opened by [@PaienNate](https://github.com/PaienNate)* - [`61f5b12`](https://github.com/tobymao/sqlglot/commit/61f5b1274cc1f3d68f0f16d4b3efcdc082f67257) - Introduce partition in exp.Table *(PR [#3327](https://github.com/tobymao/sqlglot/pull/3327) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3319](https://github.com/tobymao/sqlglot/issues/3319) opened by [@bkyryliuk](https://github.com/bkyryliuk)* - [`31744b2`](https://github.com/tobymao/sqlglot/commit/31744b26ed97c12fd3cb1e3a0661695fac4c0736) - **prql**: handle NULL *(PR [#3331](https://github.com/tobymao/sqlglot/pull/3331) by [@fool1280](https://github.com/fool1280))* - [`1105044`](https://github.com/tobymao/sqlglot/commit/1105044fa8c5af8269eeddfe8e160f0c52de913c) - **tsql**: add alter table rename *(commit by [@tobymao](https://github.com/tobymao))* - [`1832ff1`](https://github.com/tobymao/sqlglot/commit/1832ff130da06ec905835583f101c031dc4faf1d) - dynamic styling for inline arrays *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`ef84f17`](https://github.com/tobymao/sqlglot/commit/ef84f177b7d76b7bf43d6ef38a89cfbe47f4e13b) - **optimizer**: don't simplify parentheses when parent is SubqueryPredicate *(PR [#3315](https://github.com/tobymao/sqlglot/pull/3315) by [@georgesittas](https://github.com/georgesittas))* - [`290e408`](https://github.com/tobymao/sqlglot/commit/290e408ccf0d0eeec767d4b58bc1293878a3a3ae) - **optimizer**: Preserve DPipe in simplify_concat *(PR [#3317](https://github.com/tobymao/sqlglot/pull/3317) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#2439](https://github.com/TobikoData/sqlmesh/issues/2439) opened by [@ma1f](https://github.com/ma1f)* - [`52b957a`](https://github.com/tobymao/sqlglot/commit/52b957a0691b09dc43628703b9b3633d7238df5b) - transform eliminate_qualify on generated columns *(PR [#3307](https://github.com/tobymao/sqlglot/pull/3307) by [@viplazylmht](https://github.com/viplazylmht))* - [`eb8d7b8`](https://github.com/tobymao/sqlglot/commit/eb8d7b80c74850d791ac51a117ed5381b3431b3b) - remove e*s mapping because it's not equivalent to %f *(commit by [@tobymao](https://github.com/tobymao))* - [`9de1494`](https://github.com/tobymao/sqlglot/commit/9de1494899bfc9ad13270a38054a8deab2fc926e) - allow bigquery udf with resered keyword closes [#3332](https://github.com/tobymao/sqlglot/pull/3332) *(PR [#3333](https://github.com/tobymao/sqlglot/pull/3333) by [@tobymao](https://github.com/tobymao))* - [`e2b6213`](https://github.com/tobymao/sqlglot/commit/e2b62133add5a39e3a2df1d0c8e634fcab3487ff) - don't double comment unions *(commit by [@tobymao](https://github.com/tobymao))* - [`5fb7f5b`](https://github.com/tobymao/sqlglot/commit/5fb7f5b21bc441af8d6fabaff7c3d542d96d3811) - dont double indent comments *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`81b28c2`](https://github.com/tobymao/sqlglot/commit/81b28c2a7882b642069afb80cee16991542f84e3) - fix tests with latest duckdb *(commit by [@tobymao](https://github.com/tobymao))* - [`17f7eaf`](https://github.com/tobymao/sqlglot/commit/17f7eaff564790b1fe7faa414143accf362f550e) - add test *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.10.0] - 2024-04-12 ### :bug: Bug Fixes - [`506760d`](https://github.com/tobymao/sqlglot/commit/506760d2597779e287be4fffdeb1b375994320b1) - **redshift**: unqualify unnest columns *(PR [#3314](https://github.com/tobymao/sqlglot/pull/3314) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`0450521`](https://github.com/tobymao/sqlglot/commit/0450521a4470633be26ad5399247d5c9083e2afc) - get rid of 1st projection pad for leading comma formatting *(PR [#3308](https://github.com/tobymao/sqlglot/pull/3308) by [@georgesittas](https://github.com/georgesittas))* ## [v23.9.0] - 2024-04-12 ### :boom: BREAKING CHANGES - due to [`32cdc36`](https://github.com/tobymao/sqlglot/commit/32cdc3635b22e3e5d0cd5caf5a6ad171ca7c34fb) - allow unions to be limited directly and stop subquerying since… *(PR [#3301](https://github.com/tobymao/sqlglot/pull/3301) by [@tobymao](https://github.com/tobymao))*: allow unions to be limited directly and stop subquerying since… (#3301) - due to [`3c97d34`](https://github.com/tobymao/sqlglot/commit/3c97d3437ea573fd3764eab05ed619353fced580) - parse right-hand side of IN () as a Subquery *(PR [#3304](https://github.com/tobymao/sqlglot/pull/3304) by [@georgesittas](https://github.com/georgesittas))*: parse right-hand side of IN () as a Subquery (#3304) - due to [`75e0c69`](https://github.com/tobymao/sqlglot/commit/75e0c69e33922168fcadb4e457ae93815bf533e1) - cast less aggressively *(PR [#3302](https://github.com/tobymao/sqlglot/pull/3302) by [@georgesittas](https://github.com/georgesittas))*: cast less aggressively (#3302) ### :sparkles: New Features - [`a721923`](https://github.com/tobymao/sqlglot/commit/a72192306c8fad6253ad9a03661edcfaa15757c7) - **prql**: Add support for SORT *(PR [#3297](https://github.com/tobymao/sqlglot/pull/3297) by [@fool1280](https://github.com/fool1280))* - [`2ea438b`](https://github.com/tobymao/sqlglot/commit/2ea438b89f76a357390d657fe3f9e01d2a79e7e4) - is_negative helper method *(commit by [@tobymao](https://github.com/tobymao))* - [`b28cd89`](https://github.com/tobymao/sqlglot/commit/b28cd89823a38f3a90c57344a44719364d66d723) - improve transpilation of datetime functions to Teradata *(PR [#3295](https://github.com/tobymao/sqlglot/pull/3295) by [@maureen-daum](https://github.com/maureen-daum))* - [`32cdc36`](https://github.com/tobymao/sqlglot/commit/32cdc3635b22e3e5d0cd5caf5a6ad171ca7c34fb) - allow unions to be limited directly and stop subquerying since… *(PR [#3301](https://github.com/tobymao/sqlglot/pull/3301) by [@tobymao](https://github.com/tobymao))* - :arrow_lower_right: *addresses issue [#3300](https://github.com/tobymao/sqlglot/issues/3300) opened by [@williaster](https://github.com/williaster)* - [`1bc51df`](https://github.com/tobymao/sqlglot/commit/1bc51dfa9d8fd5d7dbea42d3d55aa1db66776ce5) - **teradata**: handle transpile of quarter function *(PR [#3303](https://github.com/tobymao/sqlglot/pull/3303) by [@maureen-daum](https://github.com/maureen-daum))* - [`4790414`](https://github.com/tobymao/sqlglot/commit/4790414b887b347cb94d810eeb3fe4713970984e) - **prql**: Handle DESC with sort *(PR [#3299](https://github.com/tobymao/sqlglot/pull/3299) by [@fool1280](https://github.com/fool1280))* ### :bug: Bug Fixes - [`3c97d34`](https://github.com/tobymao/sqlglot/commit/3c97d3437ea573fd3764eab05ed619353fced580) - parse right-hand side of IN () as a Subquery *(PR [#3304](https://github.com/tobymao/sqlglot/pull/3304) by [@georgesittas](https://github.com/georgesittas))* - [`75e0c69`](https://github.com/tobymao/sqlglot/commit/75e0c69e33922168fcadb4e457ae93815bf533e1) - cast less aggressively *(PR [#3302](https://github.com/tobymao/sqlglot/pull/3302) by [@georgesittas](https://github.com/georgesittas))* - [`d3472c6`](https://github.com/tobymao/sqlglot/commit/d3472c664fdfb7c9cfa9a54c6b0491b605cf4913) - Add postgres transpilation for TIME_TO_UNIX *(PR [#3305](https://github.com/tobymao/sqlglot/pull/3305) by [@crericha](https://github.com/crericha))* - [`2224881`](https://github.com/tobymao/sqlglot/commit/2224881ed378abe075ebcd3bfbc3eee901f89d71) - case when / if should ignore null types *(commit by [@tobymao](https://github.com/tobymao))* - [`5b2feb7`](https://github.com/tobymao/sqlglot/commit/5b2feb760ecd4c8ee64f8c464518e7e874f9b9bb) - allow unnesting to bring struct fields into scope *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`b523bb6`](https://github.com/tobymao/sqlglot/commit/b523bb630b28866ebab581d43e99f0b2b821ec12) - cleanup teradata to simplify first *(commit by [@tobymao](https://github.com/tobymao))* - [`6f73186`](https://github.com/tobymao/sqlglot/commit/6f73186681e8eb9f100a1fe4104c82cbae9d0f61) - refactor to use inline lambda *(commit by [@tobymao](https://github.com/tobymao))* - [`6b21bba`](https://github.com/tobymao/sqlglot/commit/6b21bba378e411797a57d3de8bd06d3efb6afa8c) - make test runnable *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.8.2] - 2024-04-10 ### :sparkles: New Features - [`eabb708`](https://github.com/tobymao/sqlglot/commit/eabb708db9ce7255f947542d57c31a6c93103985) - **prql**: add filter, set operations *(PR [#3291](https://github.com/tobymao/sqlglot/pull/3291) by [@fool1280](https://github.com/fool1280))* ### :bug: Bug Fixes - [`94c188d`](https://github.com/tobymao/sqlglot/commit/94c188d4920fd03e978253ed98711de259d6acb2) - **optimizer**: propagate recursive CTE source to children scopes early *(PR [#3294](https://github.com/tobymao/sqlglot/pull/3294) by [@georgesittas](https://github.com/georgesittas))* - [`281db61`](https://github.com/tobymao/sqlglot/commit/281db61009ee01d10690dcc1f2039062b2a1a58c) - replace fully qualified columns with generated table aliases since they become invalid *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.8.1] - 2024-04-09 ### :sparkles: New Features - [`942856d`](https://github.com/tobymao/sqlglot/commit/942856d1bdae43b13114a15a66c84467a0f90e75) - **postgres**: add COMMENT ON MATERIALIZED VIEW *(PR [#3293](https://github.com/tobymao/sqlglot/pull/3293) by [@l-vincent-l](https://github.com/l-vincent-l))* ### :bug: Bug Fixes - [`fd24b27`](https://github.com/tobymao/sqlglot/commit/fd24b2779fa962077e84d234b6821e67f3815551) - make exp.to_column more lenient *(PR [#3292](https://github.com/tobymao/sqlglot/pull/3292) by [@georgesittas](https://github.com/georgesittas))* ## [v23.8.0] - 2024-04-08 ### :boom: BREAKING CHANGES - due to [`6bba030`](https://github.com/tobymao/sqlglot/commit/6bba0308b590aed73e454c2c40d600c670e0ad7f) - transpile map retrieval to duckdb, transpile TRY_ELEMENT_AT *(PR [#3277](https://github.com/tobymao/sqlglot/pull/3277) by [@georgesittas](https://github.com/georgesittas))*: transpile map retrieval to duckdb, transpile TRY_ELEMENT_AT (#3277) - due to [`02218fc`](https://github.com/tobymao/sqlglot/commit/02218fc4f75d22487976572f51bd131170a728e5) - allow to_column to properly parse quoted column paths, make types simpler *(PR [#3289](https://github.com/tobymao/sqlglot/pull/3289) by [@tobymao](https://github.com/tobymao))*: allow to_column to properly parse quoted column paths, make types simpler (#3289) ### :sparkles: New Features - [`08222c2`](https://github.com/tobymao/sqlglot/commit/08222c2c626353be108347b95644660fe04dfcd1) - **clickhouse**: add support for MATERIALIZED, EPHEMERAL column constraints *(PR [#3275](https://github.com/tobymao/sqlglot/pull/3275) by [@pkit](https://github.com/pkit))* - [`6bba030`](https://github.com/tobymao/sqlglot/commit/6bba0308b590aed73e454c2c40d600c670e0ad7f) - transpile map retrieval to duckdb, transpile TRY_ELEMENT_AT *(PR [#3277](https://github.com/tobymao/sqlglot/pull/3277) by [@georgesittas](https://github.com/georgesittas))* - [`1726923`](https://github.com/tobymao/sqlglot/commit/17269232ea7f1f2ebf6daae7a49d55ccadc31798) - desc history databricks closes [#3280](https://github.com/tobymao/sqlglot/pull/3280) *(commit by [@tobymao](https://github.com/tobymao))* - [`0690cbc`](https://github.com/tobymao/sqlglot/commit/0690cbc14f023589f38bcceea443642c5a9cc586) - **snowflake**: FINAL/RUNNING keywords in MATCH_RECOGNIZE MEASURES *(PR [#3284](https://github.com/tobymao/sqlglot/pull/3284) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3282](https://github.com/tobymao/sqlglot/issues/3282) opened by [@galunto](https://github.com/galunto)* - [`1311ba3`](https://github.com/tobymao/sqlglot/commit/1311ba3da3b5e05f148d602885fcc34cc73c3c6f) - **presto**: add support for DISTINCT / ALL after GROUP BY *(PR [#3290](https://github.com/tobymao/sqlglot/pull/3290) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3286](https://github.com/tobymao/sqlglot/issues/3286) opened by [@bkyryliuk](https://github.com/bkyryliuk)* ### :bug: Bug Fixes - [`f65d812`](https://github.com/tobymao/sqlglot/commit/f65d8129b0ae887ff882cf5117f04f64b7e7db6f) - move EphemeralColumnConstraint generation to base generator *(commit by [@georgesittas](https://github.com/georgesittas))* - [`6d1c44d`](https://github.com/tobymao/sqlglot/commit/6d1c44d5b7ac9e3e929de84a761906ad42a07aee) - **optimizer**: unnest union subqueries *(PR [#3278](https://github.com/tobymao/sqlglot/pull/3278) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3276](https://github.com/tobymao/sqlglot/issues/3276) opened by [@khabiri](https://github.com/khabiri)* - [`a37d231`](https://github.com/tobymao/sqlglot/commit/a37d231200af1dc99fc45fc40627671ee82f6d5e) - **presto**: allow qualify to be an alias closes [#3287](https://github.com/tobymao/sqlglot/pull/3287) *(commit by [@tobymao](https://github.com/tobymao))* - [`02218fc`](https://github.com/tobymao/sqlglot/commit/02218fc4f75d22487976572f51bd131170a728e5) - allow to_column to properly parse quoted column paths, make types simpler *(PR [#3289](https://github.com/tobymao/sqlglot/pull/3289) by [@tobymao](https://github.com/tobymao))* - [`fe0eb57`](https://github.com/tobymao/sqlglot/commit/fe0eb57feecce413e3e2992db73424c8cf585599) - pass quoted to the identifier *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`c793629`](https://github.com/tobymao/sqlglot/commit/c79362953a5bf12278f861b8b5d39e6847b22e3b) - another test case *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.7.0] - 2024-04-04 ### :sparkles: New Features - [`e33fb01`](https://github.com/tobymao/sqlglot/commit/e33fb012b47892fab03fab7de896495951f23174) - **prql**: Add support for TAKE *(PR [#3258](https://github.com/tobymao/sqlglot/pull/3258) by [@fool1280](https://github.com/fool1280))* ### :bug: Bug Fixes - [`19302ab`](https://github.com/tobymao/sqlglot/commit/19302abe17a6828e1928075de45c1e2a4f3008ce) - **optimizer**: preserve the original type when creating a date literal *(PR [#3273](https://github.com/tobymao/sqlglot/pull/3273) by [@georgesittas](https://github.com/georgesittas))* ## [v23.6.4] - 2024-04-03 ### :bug: Bug Fixes - [`803fc9e`](https://github.com/tobymao/sqlglot/commit/803fc9e8f245e48e8b0e13760c5fa60cd596a464) - allow placeholders in units closes [#3265](https://github.com/tobymao/sqlglot/pull/3265) *(PR [#3267](https://github.com/tobymao/sqlglot/pull/3267) by [@tobymao](https://github.com/tobymao))* - [`64ae85b`](https://github.com/tobymao/sqlglot/commit/64ae85ba1344b293ba01dfa300d100ff144cdd7b) - nested cte ordering closes [#3266](https://github.com/tobymao/sqlglot/pull/3266) *(commit by [@tobymao](https://github.com/tobymao))* - [`09287d9`](https://github.com/tobymao/sqlglot/commit/09287d9b2a39d2476d1f72880f9d2dccfdb210ec) - amend interval unit parsing regression *(PR [#3269](https://github.com/tobymao/sqlglot/pull/3269) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3268](https://github.com/tobymao/sqlglot/issues/3268) opened by [@LilyFoote](https://github.com/LilyFoote)* - [`bc26e84`](https://github.com/tobymao/sqlglot/commit/bc26e840d171dd03e6053f22ecd785d59cbd4f80) - **optimizer**: tweaks to date simplification *(PR [#3270](https://github.com/tobymao/sqlglot/pull/3270) by [@barakalon](https://github.com/barakalon))* ## [v23.6.0] - 2024-04-02 ### :wrench: Chores - [`4eec748`](https://github.com/tobymao/sqlglot/commit/4eec748d7fd0c73d9593cb3da2b9ebc1d2440436) - deploy sqlglot and then sqlglotrs *(PR [#3264](https://github.com/tobymao/sqlglot/pull/3264) by [@georgesittas](https://github.com/georgesittas))* ## [v23.4.0] - 2024-04-02 ### :boom: BREAKING CHANGES - due to [`e148fe1`](https://github.com/tobymao/sqlglot/commit/e148fe1ace1fe647369c14f2649f428307686a2f) - describe formatted closes [#3244](https://github.com/tobymao/sqlglot/pull/3244) *(commit by [@tobymao](https://github.com/tobymao))*: describe formatted closes #3244 - due to [`2c359e7`](https://github.com/tobymao/sqlglot/commit/2c359e790a58e4df9008282401a5578d3ce9d3a4) - properly transpile escape sequences *(PR [#3256](https://github.com/tobymao/sqlglot/pull/3256) by [@georgesittas](https://github.com/georgesittas))*: properly transpile escape sequences (#3256) - due to [`9787567`](https://github.com/tobymao/sqlglot/commit/978756783b639e174f3f614f3e39382fef296640) - bump sqlglotrs to 0.2.0 *(commit by [@georgesittas](https://github.com/georgesittas))*: bump sqlglotrs to 0.2.0 ### :sparkles: New Features - [`8dba8e2`](https://github.com/tobymao/sqlglot/commit/8dba8e2508f04ccdf9b10eaa8a456478190a53a5) - **optimizer**: Support for small integer CAST elimination *(PR [#3234](https://github.com/tobymao/sqlglot/pull/3234) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3229](https://github.com/tobymao/sqlglot/issues/3229) opened by [@NickCrews](https://github.com/NickCrews)* - [`e148fe1`](https://github.com/tobymao/sqlglot/commit/e148fe1ace1fe647369c14f2649f428307686a2f) - describe formatted closes [#3244](https://github.com/tobymao/sqlglot/pull/3244) *(commit by [@tobymao](https://github.com/tobymao))* - [`a48d7eb`](https://github.com/tobymao/sqlglot/commit/a48d7eb9f3d1f9b3d1ffb9b3ec99b1024b7c3da9) - allow non func hints closes [#3248](https://github.com/tobymao/sqlglot/pull/3248) *(commit by [@tobymao](https://github.com/tobymao))* - [`d90ec95`](https://github.com/tobymao/sqlglot/commit/d90ec95001a9747d6066d1872c5a9402e2837f62) - add conversion of named tuples and classes to structs *(PR [#3245](https://github.com/tobymao/sqlglot/pull/3245) by [@tobymao](https://github.com/tobymao))* - [`f88640b`](https://github.com/tobymao/sqlglot/commit/f88640b8df22e29ad2fa845b580cf78ad4fb2262) - **clickhouse**: CREATE TABLE computed columns, column compression, index *(PR [#3252](https://github.com/tobymao/sqlglot/pull/3252) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3243](https://github.com/tobymao/sqlglot/issues/3243) opened by [@lksv](https://github.com/lksv)* - [`a64ec1b`](https://github.com/tobymao/sqlglot/commit/a64ec1bf60fda00e6dd7122a338c6dac80d005e4) - **snowflake**: MATCH_CONDITION in ASOF JOIN *(PR [#3255](https://github.com/tobymao/sqlglot/pull/3255) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3254](https://github.com/tobymao/sqlglot/issues/3254) opened by [@Bilbottom](https://github.com/Bilbottom)* ### :bug: Bug Fixes - [`a630c50`](https://github.com/tobymao/sqlglot/commit/a630c50737bb9deb6f44e1afd374b113612a1d24) - allow interval spans closes [#3246](https://github.com/tobymao/sqlglot/pull/3246) *(commit by [@tobymao](https://github.com/tobymao))* - [`28c5ee7`](https://github.com/tobymao/sqlglot/commit/28c5ee7243af9fb8aa5abf2d5d36d6fa4ef47681) - **mysql**: Duplicate parsing of ENGINE_ATTRIBUTE *(PR [#3253](https://github.com/tobymao/sqlglot/pull/3253) by [@VaggelisD](https://github.com/VaggelisD))* - [`2c359e7`](https://github.com/tobymao/sqlglot/commit/2c359e790a58e4df9008282401a5578d3ce9d3a4) - properly transpile escape sequences *(PR [#3256](https://github.com/tobymao/sqlglot/pull/3256) by [@georgesittas](https://github.com/georgesittas))* - [`6badfd1`](https://github.com/tobymao/sqlglot/commit/6badfd17b416380a4077f2ef48f1efcbed3c78d3) - Fix STRPOS for Presto & Trino *(PR [#3261](https://github.com/tobymao/sqlglot/pull/3261) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3259](https://github.com/tobymao/sqlglot/issues/3259) opened by [@amitgilad3](https://github.com/amitgilad3)* ### :wrench: Chores - [`9787567`](https://github.com/tobymao/sqlglot/commit/978756783b639e174f3f614f3e39382fef296640) - bump sqlglotrs to 0.2.0 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.3.0] - 2024-03-29 ### :boom: BREAKING CHANGES - due to [`0919be5`](https://github.com/tobymao/sqlglot/commit/0919be5eea7aba175e173dbfc0b6547e5c9473a8) - StrToUnix Hive parsing, Presto generation fixes *(PR [#3225](https://github.com/tobymao/sqlglot/pull/3225) by [@georgesittas](https://github.com/georgesittas))*: StrToUnix Hive parsing, Presto generation fixes (#3225) - due to [`163c85c`](https://github.com/tobymao/sqlglot/commit/163c85c8ed327150a6e5c79f1a4b52a8848d4408) - convert dt with isoformat sep space for better compat, trino doesnt accept T *(commit by [@tobymao](https://github.com/tobymao))*: convert dt with isoformat sep space for better compat, trino doesnt accept T ### :sparkles: New Features - [`59f1d13`](https://github.com/tobymao/sqlglot/commit/59f1d13bc5e37ebe6636b05e0381facc9725f7b0) - **oracle**: Support for CONNECT BY [NOCYCLE] *(PR [#3238](https://github.com/tobymao/sqlglot/pull/3238) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3237](https://github.com/tobymao/sqlglot/issues/3237) opened by [@Hal-H2Apps](https://github.com/Hal-H2Apps)* - [`12563ae`](https://github.com/tobymao/sqlglot/commit/12563ae0645487d5e63343224e1016cce4be447b) - mvp for transpling sqlite's STRFTIME *(PR [#3242](https://github.com/tobymao/sqlglot/pull/3242) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3240](https://github.com/tobymao/sqlglot/issues/3240) opened by [@markhalonen](https://github.com/markhalonen)* ### :bug: Bug Fixes - [`0919be5`](https://github.com/tobymao/sqlglot/commit/0919be5eea7aba175e173dbfc0b6547e5c9473a8) - StrToUnix Hive parsing, Presto generation fixes *(PR [#3225](https://github.com/tobymao/sqlglot/pull/3225) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3221](https://github.com/tobymao/sqlglot/issues/3221) opened by [@luhea](https://github.com/luhea)* - [`163c85c`](https://github.com/tobymao/sqlglot/commit/163c85c8ed327150a6e5c79f1a4b52a8848d4408) - convert dt with isoformat sep space for better compat, trino doesnt accept T *(commit by [@tobymao](https://github.com/tobymao))* - [`555647d`](https://github.com/tobymao/sqlglot/commit/555647d5541c2e52b40d098ee42f6454518e8401) - make property value parsing more lenient *(PR [#3230](https://github.com/tobymao/sqlglot/pull/3230) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3228](https://github.com/tobymao/sqlglot/issues/3228) opened by [@hsheth2](https://github.com/hsheth2)* - [`8325039`](https://github.com/tobymao/sqlglot/commit/83250398c7804863a6b3f339305600df39515ccc) - **duckdb**: wrap columns inside of INTERVAL expressions *(commit by [@georgesittas](https://github.com/georgesittas))* - [`fd5783f`](https://github.com/tobymao/sqlglot/commit/fd5783f34cb0cb7052477f25a9847a5efd61c04f) - don't evaluate Rand twice when ordering by it *(PR [#3233](https://github.com/tobymao/sqlglot/pull/3233) by [@georgesittas](https://github.com/georgesittas))* - [`b097da5`](https://github.com/tobymao/sqlglot/commit/b097da5a624fa467830464427ec57bf3b303de3f) - index error when comment sql is none *(commit by [@tobymao](https://github.com/tobymao))* - [`bf94ce3`](https://github.com/tobymao/sqlglot/commit/bf94ce317497ab92e9fe0562b3034f3482601072) - > 1 nested joins closes [#3231](https://github.com/tobymao/sqlglot/pull/3231) *(commit by [@tobymao](https://github.com/tobymao))* - [`2a3a5cd`](https://github.com/tobymao/sqlglot/commit/2a3a5cdcffe39d42153b3e960a580d084a27c0eb) - properly parse/generate duckdb MAP {..} syntax, annotate MAPs *(PR [#3241](https://github.com/tobymao/sqlglot/pull/3241) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`647611e`](https://github.com/tobymao/sqlglot/commit/647611e16bdb5ecfc2eec30111cc6689200836b7) - only set vars with necessary *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.1.0] - 2024-03-26 ### :boom: BREAKING CHANGES - due to [`71b82b4`](https://github.com/tobymao/sqlglot/commit/71b82b424b9c336072b011785a0e3e9650ae1380) - allow transformations that mutate the visited node's parent *(PR [#3182](https://github.com/tobymao/sqlglot/pull/3182) by [@georgesittas](https://github.com/georgesittas))*: allow transformations that mutate the visited node's parent (#3182) ### :sparkles: New Features - [`c19878a`](https://github.com/tobymao/sqlglot/commit/c19878a329078ce6ebfbb4337316ff5e43b8c924) - transpile Snowflake's ADDTIME *(PR [#3180](https://github.com/tobymao/sqlglot/pull/3180) by [@georgesittas](https://github.com/georgesittas))* - [`66e2e49`](https://github.com/tobymao/sqlglot/commit/66e2e497626a77540b9addd35f2edb287c7b62fe) - improve lineage perf *(commit by [@tobymao](https://github.com/tobymao))* - [`ad23608`](https://github.com/tobymao/sqlglot/commit/ad23608f9f3724f0c35e5d517bba51f77a84f6cb) - **mysql**: Parse MODIFY COLUMN *(PR [#3189](https://github.com/tobymao/sqlglot/pull/3189) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3186](https://github.com/tobymao/sqlglot/issues/3186) opened by [@kosti-hokkanen-supermetrics](https://github.com/kosti-hokkanen-supermetrics)* - [`a18444d`](https://github.com/tobymao/sqlglot/commit/a18444dbd7ccfc05b189dcb2005c85a1048cc8de) - add expressions for CORR, COVAR_SAMP, COVAR_POP *(PR [#3193](https://github.com/tobymao/sqlglot/pull/3193) by [@ttzhou](https://github.com/ttzhou))* - [`3620b99`](https://github.com/tobymao/sqlglot/commit/3620b9974c28df7d4d189ebd5fdcb675f41a275d) - add support for converting `bytes` to sqlglot AST *(PR [#3198](https://github.com/tobymao/sqlglot/pull/3198) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3195](https://github.com/tobymao/sqlglot/issues/3195) opened by [@aersam](https://github.com/aersam)* - [`648c819`](https://github.com/tobymao/sqlglot/commit/648c819071082f7a1f2f6587336ae765d4915034) - redshift starts with support *(PR [#3194](https://github.com/tobymao/sqlglot/pull/3194) by [@eakmanrq](https://github.com/eakmanrq))* - [`c355a4a`](https://github.com/tobymao/sqlglot/commit/c355a4a821c7eaf76df510020d825a9f326068de) - **tsql**: add support for WITH in view DDL *(PR [#3203](https://github.com/tobymao/sqlglot/pull/3203) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3202](https://github.com/tobymao/sqlglot/issues/3202) opened by [@mr-miles](https://github.com/mr-miles)* - [`8622eb2`](https://github.com/tobymao/sqlglot/commit/8622eb21c89a0d7569e27b3c739592cb96946a3a) - **duckdb**: add support for heredoc string syntax *(PR [#3212](https://github.com/tobymao/sqlglot/pull/3212) by [@georgesittas](https://github.com/georgesittas))* - [`b50dc5e`](https://github.com/tobymao/sqlglot/commit/b50dc5ecc7d29bce43229d050da8c4e37951853c) - Support for MySQL & Redshift UnixTotime *(PR [#3223](https://github.com/tobymao/sqlglot/pull/3223) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3214](https://github.com/tobymao/sqlglot/issues/3214) opened by [@exgalibas](https://github.com/exgalibas)* - [`2f6a2f1`](https://github.com/tobymao/sqlglot/commit/2f6a2f13bbd40f3d5348b0ed1b8cf6736ef9d1c5) - **optimizer**: Support for UNION BY NAME *(PR [#3224](https://github.com/tobymao/sqlglot/pull/3224) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3222](https://github.com/tobymao/sqlglot/issues/3222) opened by [@yiyuanliu](https://github.com/yiyuanliu)* ### :bug: Bug Fixes - [`71b82b4`](https://github.com/tobymao/sqlglot/commit/71b82b424b9c336072b011785a0e3e9650ae1380) - allow transformations that mutate the visited node's parent *(PR [#3182](https://github.com/tobymao/sqlglot/pull/3182) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3181](https://github.com/tobymao/sqlglot/issues/3181) opened by [@l-vincent-l](https://github.com/l-vincent-l)* - [`6827edd`](https://github.com/tobymao/sqlglot/commit/6827edd108bbc6ecfcc0f03495f00c08022efb3b) - **postgres**: Fix ARROW/DARROW column operators *(PR [#3191](https://github.com/tobymao/sqlglot/pull/3191) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3185](https://github.com/tobymao/sqlglot/issues/3185) opened by [@ZipBrandon](https://github.com/ZipBrandon)* - [`0dd9ba5`](https://github.com/tobymao/sqlglot/commit/0dd9ba5ef57d29b6406a5d2a7e381eb6e6f56221) - Fix backtracking through try/catch exceptions *(PR [#3190](https://github.com/tobymao/sqlglot/pull/3190) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3175](https://github.com/tobymao/sqlglot/issues/3175) opened by [@herry13](https://github.com/herry13)* - [`5cdd874`](https://github.com/tobymao/sqlglot/commit/5cdd8749bacb101711a477798ff96bace44ccfb1) - **generator**: compute csv leading comma pad length correctly *(PR [#3201](https://github.com/tobymao/sqlglot/pull/3201) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3199](https://github.com/tobymao/sqlglot/issues/3199) opened by [@giovannipcarvalho](https://github.com/giovannipcarvalho)* - [`73fc807`](https://github.com/tobymao/sqlglot/commit/73fc807a48bfadc5bbe5594b55ba45480e93be3c) - **tokenizer**: don't increment array cursor by 2 on CRLF *(PR [#3204](https://github.com/tobymao/sqlglot/pull/3204) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3200](https://github.com/tobymao/sqlglot/issues/3200) opened by [@mr-miles](https://github.com/mr-miles)* - [`af1b026`](https://github.com/tobymao/sqlglot/commit/af1b02697303160050ee32b1c89ff80f14d9d0fa) - **snowflake**: convert VALUES with invalid expressions into UNION ALL *(PR [#3213](https://github.com/tobymao/sqlglot/pull/3213) by [@georgesittas](https://github.com/georgesittas))* - [`ec4648f`](https://github.com/tobymao/sqlglot/commit/ec4648f7eb982bf48b5bf09271c1955b892867fa) - **optimizer**: don't merge ORDER BY into UNION *(PR [#3215](https://github.com/tobymao/sqlglot/pull/3215) by [@barakalon](https://github.com/barakalon))* - :arrow_lower_right: *fixes issue [#3211](https://github.com/tobymao/sqlglot/issues/3211) opened by [@rorynormaness](https://github.com/rorynormaness)* - [`e4dd052`](https://github.com/tobymao/sqlglot/commit/e4dd0526031591179156a1eea45089213b23cdf7) - allow snowflake object_construct with string keys to transpile to sqlglot dialect *(commit by [@tobymao](https://github.com/tobymao))* - [`9e39076`](https://github.com/tobymao/sqlglot/commit/9e39076b7f581dc68e10c558ff8f6c9809bfe841) - **tsql**: datestrtodate for tsql closes [#3216](https://github.com/tobymao/sqlglot/pull/3216) *(commit by [@tobymao](https://github.com/tobymao))* - [`e7c9158`](https://github.com/tobymao/sqlglot/commit/e7c91584ac7fb35082ebd1d4873f13307ea848af) - bq datetime to timestamp *(PR [#3220](https://github.com/tobymao/sqlglot/pull/3220) by [@eakmanrq](https://github.com/eakmanrq))* - [`e6b8d1f`](https://github.com/tobymao/sqlglot/commit/e6b8d1f0061d55bf434d1a838f858b9fa412e312) - **optimizer**: constrain UDTF scope boundary *(PR [#3226](https://github.com/tobymao/sqlglot/pull/3226) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3209](https://github.com/tobymao/sqlglot/issues/3209) opened by [@rorynormaness](https://github.com/rorynormaness)* ### :recycle: Refactors - [`4cd0e17`](https://github.com/tobymao/sqlglot/commit/4cd0e1719a55a75dac1114736fbbe48a8aa8f294) - get rid of redundant condition in Expression.replace *(PR [#3192](https://github.com/tobymao/sqlglot/pull/3192) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`3879518`](https://github.com/tobymao/sqlglot/commit/3879518f951233fed3434c493a5786573ee814fd) - bump sqlglotrs to 0.1.3 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v23.0.5] - 2024-03-20 ### :bug: Bug Fixes - [`ed2c9e1`](https://github.com/tobymao/sqlglot/commit/ed2c9e126cc7e679c543adaa2827c1f5c47b96d7) - move varchar max conversion to base *(commit by [@tobymao](https://github.com/tobymao))* - [`e3b6139`](https://github.com/tobymao/sqlglot/commit/e3b61392b1d050447f77fcf1b04efd6dcbfc311e) - move comment from window function to Window expression *(PR [#3178](https://github.com/tobymao/sqlglot/pull/3178) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2299](https://github.com/TobikoData/sqlmesh/issues/2299) opened by [@georgesittas](https://github.com/georgesittas)* - [`a452276`](https://github.com/tobymao/sqlglot/commit/a452276da4daaa436a9ac95566bcbb2954d149e3) - **clickhouse**: Fixing FORMAT being parsed as implicit alias *(PR [#3179](https://github.com/tobymao/sqlglot/pull/3179) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3176](https://github.com/tobymao/sqlglot/issues/3176) opened by [@mlipiev](https://github.com/mlipiev)* ## [v23.0.4] - 2024-03-20 ### :bug: Bug Fixes - [`42cf703`](https://github.com/tobymao/sqlglot/commit/42cf70351e7811a077da29af42b28662ede203ac) - redshift varchar(max) catch lower case *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`20cd803`](https://github.com/tobymao/sqlglot/commit/20cd8038268b162af7bae63d54ed2f349502042a) - cleanup redundant check *(commit by [@tobymao](https://github.com/tobymao))* - [`7e12342`](https://github.com/tobymao/sqlglot/commit/7e12342029d33ff139a3566243789f54e36f4525) - add superset to readme *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.0.3] - 2024-03-19 ### :sparkles: New Features - [`bc8bc7f`](https://github.com/tobymao/sqlglot/commit/bc8bc7f8c9a6a20a35bab8ea7b34cf6431616b50) - replace a nested child node with a list convenience *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`eaaeab0`](https://github.com/tobymao/sqlglot/commit/eaaeab088010f55ccc221a9a4968f0d4ff67d8b1) - **snowflake**: Allow non-literal expressions too in DATE functions *(PR [#3167](https://github.com/tobymao/sqlglot/pull/3167) by [@VaggelisD](https://github.com/VaggelisD))* ## [v23.0.2] - 2024-03-19 ### :sparkles: New Features - [`32cc2be`](https://github.com/tobymao/sqlglot/commit/32cc2be1b19ade551b42cc70a96f1675ac8773f4) - **postgres**: add support for materialized CTEs *(PR [#3171](https://github.com/tobymao/sqlglot/pull/3171) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3170](https://github.com/tobymao/sqlglot/issues/3170) opened by [@betodealmeida](https://github.com/betodealmeida)* ### :bug: Bug Fixes - [`df4ce17`](https://github.com/tobymao/sqlglot/commit/df4ce17f24bbb16a64172e351f4e27ac74de668a) - can't expand group by for nulls and bools *(commit by [@tobymao](https://github.com/tobymao))* - [`d859fc0`](https://github.com/tobymao/sqlglot/commit/d859fc0f6eeb0971dab5b22748d1e84425829444) - unnest annotation with generate_date_array *(PR [#3169](https://github.com/tobymao/sqlglot/pull/3169) by [@tobymao](https://github.com/tobymao))* ## [v23.0.1] - 2024-03-19 ### :sparkles: New Features - [`931774d`](https://github.com/tobymao/sqlglot/commit/931774dde50aa04efecd1ae9cdd6965655670d71) - iterative connector sql *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`5e18d49`](https://github.com/tobymao/sqlglot/commit/5e18d490be3990116bbacd1b09dd52542f51c151) - fill in missing implementation details for replace(None) *(PR [#3166](https://github.com/tobymao/sqlglot/pull/3166) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3165](https://github.com/tobymao/sqlglot/issues/3165) opened by [@streamnsight](https://github.com/streamnsight)* - [`a0df28f`](https://github.com/tobymao/sqlglot/commit/a0df28f4092ca84d07111cead550b9d6772993ad) - can't simplify null parens *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`eb0a6c3`](https://github.com/tobymao/sqlglot/commit/eb0a6c31c92d16abe785087271b14f7611ff24bc) - actually pop the where statement *(commit by [@tobymao](https://github.com/tobymao))* - [`f6778ef`](https://github.com/tobymao/sqlglot/commit/f6778ef039a646fb5641f0e91b28f6cbc2f52e78) - add recursion test *(commit by [@tobymao](https://github.com/tobymao))* ## [v23.0.0] - 2024-03-18 ### :sparkles: New Features - [`e838713`](https://github.com/tobymao/sqlglot/commit/e838713bdb3da8a5d04eed43b2015a9d3a71addd) - **mysql**: Support for multi arg GROUP_CONCAT *(PR [#3150](https://github.com/tobymao/sqlglot/pull/3150) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3142](https://github.com/tobymao/sqlglot/issues/3142) opened by [@optionals](https://github.com/optionals)* - [`7e8f134`](https://github.com/tobymao/sqlglot/commit/7e8f134fb2d78940b27f81be7a347caee371601c) - **test**: Add standard alias to some TPC-DS query *(PR [#3151](https://github.com/tobymao/sqlglot/pull/3151) by [@fool1280](https://github.com/fool1280))* - [`6d0e965`](https://github.com/tobymao/sqlglot/commit/6d0e9658733c672154ec69fd2a4140332954b466) - add skip limit token kwarg *(PR [#3149](https://github.com/tobymao/sqlglot/pull/3149) by [@z3z1ma](https://github.com/z3z1ma))* - [`3ed5845`](https://github.com/tobymao/sqlglot/commit/3ed58458f9c89a1241a6fa6bb787e236289af58d) - include table alias in bigquery unnest *(PR [#3156](https://github.com/tobymao/sqlglot/pull/3156) by [@eakmanrq](https://github.com/eakmanrq))* - [`706fac3`](https://github.com/tobymao/sqlglot/commit/706fac382fbde6c1c6af8acd277291a3f18f94ee) - add bigquery mod op *(PR [#3157](https://github.com/tobymao/sqlglot/pull/3157) by [@eakmanrq](https://github.com/eakmanrq))* - [`6ffdc25`](https://github.com/tobymao/sqlglot/commit/6ffdc25c673db33c3e9ac5a2c6970c4331a3f978) - **clickhouse**: Support for INSERT INTO TABLE FUNCTION *(PR [#3162](https://github.com/tobymao/sqlglot/pull/3162) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3161](https://github.com/tobymao/sqlglot/issues/3161) opened by [@mlipiev](https://github.com/mlipiev)* - [`021af42`](https://github.com/tobymao/sqlglot/commit/021af4206f4ff2ad4bd57d30cf1f2f78f24fc844) - **snowflake**: Adding support for DATE, TO_DATE, TRY_TO_DATE functions *(PR [#3160](https://github.com/tobymao/sqlglot/pull/3160) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3152](https://github.com/tobymao/sqlglot/issues/3152) opened by [@lei0zhou](https://github.com/lei0zhou)* ### :bug: Bug Fixes - [`cfde552`](https://github.com/tobymao/sqlglot/commit/cfde552005e1682d6dd1b71850e163021bf4532f) - asof identifier closes [#3153](https://github.com/tobymao/sqlglot/pull/3153) *(commit by [@tobymao](https://github.com/tobymao))* - [`b1e6eef`](https://github.com/tobymao/sqlglot/commit/b1e6eefcd4dd60f541047a10ed35c1ac733a636c) - bigquery values transpilation with no column alias *(commit by [@tobymao](https://github.com/tobymao))* - [`c0760b3`](https://github.com/tobymao/sqlglot/commit/c0760b3be11af701273e55c2c976d67d9a575cc4) - parse over any closes [#3155](https://github.com/tobymao/sqlglot/pull/3155) *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`38b931e`](https://github.com/tobymao/sqlglot/commit/38b931ebed9255ce5d0d6185414b6f01ca02b0fd) - pin ruff *(commit by [@tobymao](https://github.com/tobymao))* - [`66a6284`](https://github.com/tobymao/sqlglot/commit/66a62847342f15b8d412fb91814342951fe23247) - improve type hints of Query methods *(PR [#3148](https://github.com/tobymao/sqlglot/pull/3148) by [@georgesittas](https://github.com/georgesittas))* ## [v22.5.0] - 2024-03-14 ### :boom: BREAKING CHANGES - due to [`2b4952e`](https://github.com/tobymao/sqlglot/commit/2b4952eb151b3f20739803e7bf443b56da457b1f) - desugar LOG2 and LOG10 by converting them into LOG *(PR [#3139](https://github.com/tobymao/sqlglot/pull/3139) by [@georgesittas](https://github.com/georgesittas))*: desugar LOG2 and LOG10 by converting them into LOG (#3139) ### :sparkles: New Features - [`c01ff44`](https://github.com/tobymao/sqlglot/commit/c01ff44b036526807624ba2d1f4b247081e8c56f) - **snowflake**: Add TO_TIMESTAMP test and update env.py *(PR [#3130](https://github.com/tobymao/sqlglot/pull/3130) by [@fool1280](https://github.com/fool1280))* - [`8526c8e`](https://github.com/tobymao/sqlglot/commit/8526c8e30376c0826ab31a0a342656d5ebced662) - **tsql**: transpile LIMIT with OFFSET properly *(PR [#3145](https://github.com/tobymao/sqlglot/pull/3145) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3144](https://github.com/tobymao/sqlglot/issues/3144) opened by [@iMayK](https://github.com/iMayK)* ### :bug: Bug Fixes - [`a9db8ff`](https://github.com/tobymao/sqlglot/commit/a9db8ff6ac528da8c3a7a66f0b80a3f0d1a0ed7e) - don't mutate parent nested classes if undefined in a dialect *(PR [#3134](https://github.com/tobymao/sqlglot/pull/3134) by [@georgesittas](https://github.com/georgesittas))* - [`d6bac3e`](https://github.com/tobymao/sqlglot/commit/d6bac3e54c6445c52daa04015b1b2e4a6933e682) - **duckdb**: Slice + Array bug *(PR [#3137](https://github.com/tobymao/sqlglot/pull/3137) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3136](https://github.com/tobymao/sqlglot/issues/3136) opened by [@cpcloud](https://github.com/cpcloud)* - [`230a845`](https://github.com/tobymao/sqlglot/commit/230a845d82576b24ef8a3bbcc83677ed637e8247) - optimizer bugs *(commit by [@tobymao](https://github.com/tobymao))* ### :recycle: Refactors - [`2b4952e`](https://github.com/tobymao/sqlglot/commit/2b4952eb151b3f20739803e7bf443b56da457b1f) - desugar LOG2 and LOG10 by converting them into LOG *(PR [#3139](https://github.com/tobymao/sqlglot/pull/3139) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3138](https://github.com/tobymao/sqlglot/issues/3138) opened by [@baruchoxman](https://github.com/baruchoxman)* ### :wrench: Chores - [`ebbf5a1`](https://github.com/tobymao/sqlglot/commit/ebbf5a14da12b442bff84d93f8542d4322e0811d) - copy sqlglot.svg in docs/ to also display logo in website *(PR [#3147](https://github.com/tobymao/sqlglot/pull/3147) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3146](https://github.com/tobymao/sqlglot/issues/3146) opened by [@lostmygithubaccount](https://github.com/lostmygithubaccount)* ## [v22.4.0] - 2024-03-12 ### :boom: BREAKING CHANGES - due to [`b1c8cac`](https://github.com/tobymao/sqlglot/commit/b1c8cace6ed3e58657726fa5617a6df63d91f737) - traverse union scopes iteratively *(PR [#3112](https://github.com/tobymao/sqlglot/pull/3112) by [@georgesittas](https://github.com/georgesittas))*: traverse union scopes iteratively (#3112) ### :sparkles: New Features - [`88033da`](https://github.com/tobymao/sqlglot/commit/88033dad05550cde05dcb86cce61a621c071382c) - **test**: add more passing tpcds tests *(PR [#3110](https://github.com/tobymao/sqlglot/pull/3110) by [@fool1280](https://github.com/fool1280))* - [`804af34`](https://github.com/tobymao/sqlglot/commit/804af347a7cefac251b78fdcb8ff35b63c249d82) - **duckdb**: add support for positional joins *(PR [#3111](https://github.com/tobymao/sqlglot/pull/3111) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3109](https://github.com/tobymao/sqlglot/issues/3109) opened by [@dylanscott](https://github.com/dylanscott)* - [`c4e7bbf`](https://github.com/tobymao/sqlglot/commit/c4e7bbfd3d88f3efb1fea806f85091dbe32379cf) - improve transpilation of TO_NUMBER *(commit by [@codeDing18](https://github.com/codeDing18))* - [`80d484c`](https://github.com/tobymao/sqlglot/commit/80d484c428329fb53c905fff9f86ea0ee7bcef3b) - **postgres**: generate StrToDate *(PR [#3124](https://github.com/tobymao/sqlglot/pull/3124) by [@georgesittas](https://github.com/georgesittas))* - [`09708f5`](https://github.com/tobymao/sqlglot/commit/09708f571bb7b62e96bbfba363b00714243d1a17) - Adding EXCLUDE constraint support *(PR [#3116](https://github.com/tobymao/sqlglot/pull/3116) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3097](https://github.com/tobymao/sqlglot/issues/3097) opened by [@dezhin](https://github.com/dezhin)* - [`9b25a8e`](https://github.com/tobymao/sqlglot/commit/9b25a8e3788c4cc7a299c703fe5b4086fe86015d) - Adding BACKUP property *(PR [#3127](https://github.com/tobymao/sqlglot/pull/3127) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3125](https://github.com/tobymao/sqlglot/issues/3125) opened by [@hsheth2](https://github.com/hsheth2)* - [`0ea849b`](https://github.com/tobymao/sqlglot/commit/0ea849b35bd3dd980c4f851d3ea7b5bc628e6fb5) - Adding NAME data type in Postgres/Redshift *(PR [#3128](https://github.com/tobymao/sqlglot/pull/3128) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3123](https://github.com/tobymao/sqlglot/issues/3123) opened by [@hsheth2](https://github.com/hsheth2)* ### :bug: Bug Fixes - [`c333017`](https://github.com/tobymao/sqlglot/commit/c333017fe49c0645cdaa3a75d0a7cc6a5b46dddc) - correctly generate ArrayJoin in various dialects *(PR [#3120](https://github.com/tobymao/sqlglot/pull/3120) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3119](https://github.com/tobymao/sqlglot/issues/3119) opened by [@cpcloud](https://github.com/cpcloud)* - [`12d72a6`](https://github.com/tobymao/sqlglot/commit/12d72a6ff6534919979f77a5f045aa9d947d9a09) - make the lineage sources dict type covariant *(PR [#3122](https://github.com/tobymao/sqlglot/pull/3122) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3121](https://github.com/tobymao/sqlglot/issues/3121) opened by [@rexledesma](https://github.com/rexledesma)* - [`b1c8cac`](https://github.com/tobymao/sqlglot/commit/b1c8cace6ed3e58657726fa5617a6df63d91f737) - traverse union scopes iteratively *(PR [#3112](https://github.com/tobymao/sqlglot/pull/3112) by [@georgesittas](https://github.com/georgesittas))* - [`94b5a2f`](https://github.com/tobymao/sqlglot/commit/94b5a2fcba3c41d38734f045b7f1d5d4735e4828) - **athena**: Fix CREATE TABLE properties, STRING data type *(PR [#3129](https://github.com/tobymao/sqlglot/pull/3129) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *fixes issue [#3126](https://github.com/tobymao/sqlglot/issues/3126) opened by [@matthias-Q](https://github.com/matthias-Q)* ### :recycle: Refactors - [`0ce9ef1`](https://github.com/tobymao/sqlglot/commit/0ce9ef12d9c030b145d7a7a7432bfc188d6c179a) - improve parsing of storage provider setting in index params *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v22.3.1] - 2024-03-09 ### :sparkles: New Features - [`80b2320`](https://github.com/tobymao/sqlglot/commit/80b23201f9668a5845002c1c21b0a394003847f9) - no recursion dfs *(PR [#3105](https://github.com/tobymao/sqlglot/pull/3105) by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`fa84e2c`](https://github.com/tobymao/sqlglot/commit/fa84e2c2d9ae349033039ec649decc371561e421) - copy all arg keys, including those set to None *(PR [#3108](https://github.com/tobymao/sqlglot/pull/3108) by [@georgesittas](https://github.com/georgesittas))* ## [v22.3.0] - 2024-03-08 ### :sparkles: New Features - [`46c9c2c`](https://github.com/tobymao/sqlglot/commit/46c9c2c35ea5132995cb07a99b94d18d959e6172) - **snowflake**: parse CREATE SEQUENCE *(PR [#3072](https://github.com/tobymao/sqlglot/pull/3072) by [@tekumara](https://github.com/tekumara))* - :arrow_lower_right: *addresses issue [#2954](https://github.com/tobymao/sqlglot/issues/2954) opened by [@tharwan](https://github.com/tharwan)* - [`9f1e1ad`](https://github.com/tobymao/sqlglot/commit/9f1e1ad4350fb412319511825ca3da9b9af14084) - add Athena dialect *(PR [#3089](https://github.com/tobymao/sqlglot/pull/3089) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3087](https://github.com/tobymao/sqlglot/issues/3087) opened by [@sbrandtb](https://github.com/sbrandtb)* - [`efee388`](https://github.com/tobymao/sqlglot/commit/efee38858c4501ccace4b3eb3f066cb352f3ac60) - no more recursion for union generation *(PR [#3101](https://github.com/tobymao/sqlglot/pull/3101) by [@tobymao](https://github.com/tobymao))* - [`ddab9df`](https://github.com/tobymao/sqlglot/commit/ddab9dff663985d9473ce4b2dbe4fe266ae1bdf7) - **duckdb**: add support for exp.ArrayJoin *(PR [#3102](https://github.com/tobymao/sqlglot/pull/3102) by [@seruman](https://github.com/seruman))* - [`8d5be0c`](https://github.com/tobymao/sqlglot/commit/8d5be0cf54e77000b220cfcca0edfdeb1759b70b) - **duckdb**: make ARRAY_TO_STRING transpilable to other dialects *(commit by [@georgesittas](https://github.com/georgesittas))* - [`a38db01`](https://github.com/tobymao/sqlglot/commit/a38db014cce8ada9554c205c879ae0c0dfda1b14) - Generalizing CREATE SEQUENCE *(PR [#3090](https://github.com/tobymao/sqlglot/pull/3090) by [@VaggelisD](https://github.com/VaggelisD))* - [`18fd079`](https://github.com/tobymao/sqlglot/commit/18fd0794302a1ecaa91be9dfbc7feddd0b8a3b05) - no recursion copy *(PR [#3103](https://github.com/tobymao/sqlglot/pull/3103) by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`b101013`](https://github.com/tobymao/sqlglot/commit/b101013336d0aef6dc99b5ebef85afc12591e212) - subquery edge cases *(PR [#3076](https://github.com/tobymao/sqlglot/pull/3076) by [@tobymao](https://github.com/tobymao))* - [`8c4400b`](https://github.com/tobymao/sqlglot/commit/8c4400ba194661d1e1ee4aa4ea2649b2356a5f02) - **bigquery**: more table qualification edge cases closes [#3083](https://github.com/tobymao/sqlglot/pull/3083) *(commit by [@tobymao](https://github.com/tobymao))* - [`d898f55`](https://github.com/tobymao/sqlglot/commit/d898f559fac44789da08689e835619f978c05a3e) - **bigquery**: even more edge cases *(commit by [@tobymao](https://github.com/tobymao))* - [`4fb74ff`](https://github.com/tobymao/sqlglot/commit/4fb74ff61effd9e5fa8593cdf1c9229d5106ab7e) - dataframe optimize user input *(PR [#3092](https://github.com/tobymao/sqlglot/pull/3092) by [@eakmanrq](https://github.com/eakmanrq))* - :arrow_lower_right: *fixes issue [#3091](https://github.com/tobymao/sqlglot/issues/3091) opened by [@alexdemeo](https://github.com/alexdemeo)* ### :recycle: Refactors - [`cea7508`](https://github.com/tobymao/sqlglot/commit/cea7508c5f2b5838e889486d28df47ad9b263345) - **lineage**: simplify `Node.walk()` *(PR [#3098](https://github.com/tobymao/sqlglot/pull/3098) by [@rexledesma](https://github.com/rexledesma))* - [`ebe5a46`](https://github.com/tobymao/sqlglot/commit/ebe5a462ed50711d6ded18b454c5294e487e323f) - **executor**: simplify column type inference *(PR [#3104](https://github.com/tobymao/sqlglot/pull/3104) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`6c67a2b`](https://github.com/tobymao/sqlglot/commit/6c67a2b0dbe2a66ea4ce2e008101f4cf41b1c517) - reduce size of tpcds *(commit by [@tobymao](https://github.com/tobymao))* - [`21e4fca`](https://github.com/tobymao/sqlglot/commit/21e4fca2b744a22981d8ff1696986061d3344d40) - update dialect count in README to include Athena *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v22.2.1] - 2024-03-04 ### :sparkles: New Features - [`19e07f3`](https://github.com/tobymao/sqlglot/commit/19e07f39688f53fafac25655616883363f20b1cf) - initial commit prql *(commit by [@tobymao](https://github.com/tobymao))* - [`13b64fd`](https://github.com/tobymao/sqlglot/commit/13b64fdcdde35e8fe022f76f4f2a5d55d53b982f) - more prql *(commit by [@tobymao](https://github.com/tobymao))* - [`3d263aa`](https://github.com/tobymao/sqlglot/commit/3d263aafbfb8d45bc678914e1eb925592c30eaf8) - **oracle**: Support for INSERT hint *(PR [#3077](https://github.com/tobymao/sqlglot/pull/3077) by [@VaggelisD](https://github.com/VaggelisD))* - :arrow_lower_right: *addresses issue [#3074](https://github.com/tobymao/sqlglot/issues/3074) opened by [@sunrutcon](https://github.com/sunrutcon)* ### :bug: Bug Fixes - [`c51b64f`](https://github.com/tobymao/sqlglot/commit/c51b64fa6a437698fd8b347d98ffaf9fb543d2d5) - json extract precedence closes [#3068](https://github.com/tobymao/sqlglot/pull/3068) *(commit by [@tobymao](https://github.com/tobymao))* - [`223a475`](https://github.com/tobymao/sqlglot/commit/223a4751f88809710872fa7d757d22d9eeeb4f40) - **planner**: don't overwrite JOIN step name *(PR [#3071](https://github.com/tobymao/sqlglot/pull/3071) by [@georgesittas](https://github.com/georgesittas))* - [`2770ddc`](https://github.com/tobymao/sqlglot/commit/2770ddcc34148f85caeabf2b6f4f799b3e825a6c) - drop CLUSTER/DISTRIBUTED/SORT BY modifiers when unsupported *(PR [#3069](https://github.com/tobymao/sqlglot/pull/3069) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3066](https://github.com/tobymao/sqlglot/issues/3066) opened by [@cploonker](https://github.com/cploonker)* - [`4173ea2`](https://github.com/tobymao/sqlglot/commit/4173ea29bbd8944896c259fe45209de69fcbdc46) - handle lineage of subqueries *(PR [#3075](https://github.com/tobymao/sqlglot/pull/3075) by [@tobymao](https://github.com/tobymao))* - [`0ebce40`](https://github.com/tobymao/sqlglot/commit/0ebce40f5e524c61ece022bbf8640556e880a4bf) - **redshift**: don't transform multi-arg DISTINCT clause *(PR [#3079](https://github.com/tobymao/sqlglot/pull/3079) by [@georgesittas](https://github.com/georgesittas))* ## [v22.2.0] - 2024-03-01 ### :boom: BREAKING CHANGES - due to [`08bafbd`](https://github.com/tobymao/sqlglot/commit/08bafbd597b6a81e222832fac9f068f1290e41fa) - handle unnesting groups closes [#3056](https://github.com/tobymao/sqlglot/pull/3056) *(PR [#3058](https://github.com/tobymao/sqlglot/pull/3058) by [@tobymao](https://github.com/tobymao))*: handle unnesting groups closes #3056 (#3058) - due to [`4029fab`](https://github.com/tobymao/sqlglot/commit/4029fab81e9abcedd6321baaf5baf9aa192f643d) - expand alias refs of double aggs if it is a window func *(PR [#3059](https://github.com/tobymao/sqlglot/pull/3059) by [@tobymao](https://github.com/tobymao))*: expand alias refs of double aggs if it is a window func (#3059) ### :sparkles: New Features - [`8662e31`](https://github.com/tobymao/sqlglot/commit/8662e31bd115eb668c4b74377ed2985937e81510) - **postgres**: improve transpilation of JSON array unnesting *(PR [#3063](https://github.com/tobymao/sqlglot/pull/3063) by [@georgesittas](https://github.com/georgesittas))* - [`c9bde44`](https://github.com/tobymao/sqlglot/commit/c9bde44bac5f7026388ec6357a6c1e00ee760edc) - Making parse_number & parse_string more lenient *(PR [#3064](https://github.com/tobymao/sqlglot/pull/3064) by [@VaggelisD](https://github.com/VaggelisD))* ### :bug: Bug Fixes - [`08bafbd`](https://github.com/tobymao/sqlglot/commit/08bafbd597b6a81e222832fac9f068f1290e41fa) - handle unnesting groups closes [#3056](https://github.com/tobymao/sqlglot/pull/3056) *(PR [#3058](https://github.com/tobymao/sqlglot/pull/3058) by [@tobymao](https://github.com/tobymao))* - [`4029fab`](https://github.com/tobymao/sqlglot/commit/4029fab81e9abcedd6321baaf5baf9aa192f643d) - expand alias refs of double aggs if it is a window func *(PR [#3059](https://github.com/tobymao/sqlglot/pull/3059) by [@tobymao](https://github.com/tobymao))* - [`4e6e82c`](https://github.com/tobymao/sqlglot/commit/4e6e82c9d4d9ef33635446e19b4b44f3ae27160c) - **snowflake**: allow any identifier after : closes [#3061](https://github.com/tobymao/sqlglot/pull/3061) *(PR [#3062](https://github.com/tobymao/sqlglot/pull/3062) by [@georgesittas](https://github.com/georgesittas))* - [`e2becea`](https://github.com/tobymao/sqlglot/commit/e2becead1e6be12ddf8bde703d2c403220506784) - is distinct from parsing *(commit by [@tobymao](https://github.com/tobymao))* - [`c8a753b`](https://github.com/tobymao/sqlglot/commit/c8a753b488d99172db9df10616e8bd3431452ff8) - Ignore Identifier nodes in the diffing algorithm *(PR [#3065](https://github.com/tobymao/sqlglot/pull/3065) by [@izeigerman](https://github.com/izeigerman))* ## [v22.1.1] - 2024-02-29 ### :sparkles: New Features - [`1e25ec9`](https://github.com/tobymao/sqlglot/commit/1e25ec984510a1ffee76956b0dcb15bcd84f5d44) - **test**: handle NULL value in TPC-DS *(PR [#3052](https://github.com/tobymao/sqlglot/pull/3052) by [@fool1280](https://github.com/fool1280))* - [`ad21b6b`](https://github.com/tobymao/sqlglot/commit/ad21b6b47716d394ca6b8fb3b82d58b887d5adb3) - **test**: add more passing tpc-ds test *(PR [#3053](https://github.com/tobymao/sqlglot/pull/3053) by [@fool1280](https://github.com/fool1280))* ### :bug: Bug Fixes - [`08249af`](https://github.com/tobymao/sqlglot/commit/08249af50351a24277e1f3f1574629eb5c68d3a5) - Hive UnixToTime regression, README stale results *(PR [#3055](https://github.com/tobymao/sqlglot/pull/3055) by [@VaggelisD](https://github.com/VaggelisD))* - [`39b3813`](https://github.com/tobymao/sqlglot/commit/39b381341fe697ae54f5d3a438b4035447fe552a) - **redshift**: don't pop recursive cte table columns *(commit by [@tobymao](https://github.com/tobymao))* - [`6a9501f`](https://github.com/tobymao/sqlglot/commit/6a9501f7407be3682ce3b9cc73b7340ad9a0c2e8) - ensure UDF identifier quotes are preserved *(PR [#3057](https://github.com/tobymao/sqlglot/pull/3057) by [@georgesittas](https://github.com/georgesittas))* ## [v22.1.0] - 2024-02-29 ### :sparkles: New Features - [`6393979`](https://github.com/tobymao/sqlglot/commit/63939796b39c69b25adfc6f224ccd4761f23cb66) - **oracle**: connect_by_root closes [#3050](https://github.com/tobymao/sqlglot/pull/3050) *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`bd0a40d`](https://github.com/tobymao/sqlglot/commit/bd0a40dde2ab2ad168ada0d5bae0c99fba9d762f) - normalize column for lineage and raise if cannot find closes [#3049](https://github.com/tobymao/sqlglot/pull/3049) *(commit by [@tobymao](https://github.com/tobymao))* ## [v22.0.2] - 2024-02-28 ### :sparkles: New Features - [`51f8d58`](https://github.com/tobymao/sqlglot/commit/51f8d5897b18e6f7c0bc66881a3e36c8842ff2ff) - **tsql**: add support for OPTION clause, select only *(PR [#3025](https://github.com/tobymao/sqlglot/pull/3025) by [@nadav-botanica](https://github.com/nadav-botanica))* - [`c9eef99`](https://github.com/tobymao/sqlglot/commit/c9eef99b8fe3367c22a8186fb397ad550ac11386) - Support for TRUNCATE TABLE/DATABASE DDL *(PR [#3026](https://github.com/tobymao/sqlglot/pull/3026) by [@VaggelisD](https://github.com/VaggelisD))* - [`703b878`](https://github.com/tobymao/sqlglot/commit/703b87816c3e5f7b50407d2f2a14f3a9cba4e3f8) - **mysql**: add LOCK property, allow properties after ALTER TABLE *(PR [#3027](https://github.com/tobymao/sqlglot/pull/3027) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3020](https://github.com/tobymao/sqlglot/issues/3020) opened by [@samotarnik](https://github.com/samotarnik)* ### :bug: Bug Fixes - [`bc4acb9`](https://github.com/tobymao/sqlglot/commit/bc4acb9582a80a6c3d4b491b48a68f110e399e3a) - allow trailing comma in ORDER BY list *(PR [#3031](https://github.com/tobymao/sqlglot/pull/3031) by [@georgesittas](https://github.com/georgesittas))* - [`4105639`](https://github.com/tobymao/sqlglot/commit/4105639ddbbc504d4bd4607511ac35e8ca30c774) - **bigquery**: unquoted project-0.x closes [#3029](https://github.com/tobymao/sqlglot/pull/3029) *(commit by [@tobymao](https://github.com/tobymao))* - [`f1f2aec`](https://github.com/tobymao/sqlglot/commit/f1f2aecb09c6c0d9a965d87669368945abd112cc) - bigquery edgecase *(commit by [@tobymao](https://github.com/tobymao))* - [`5c01c01`](https://github.com/tobymao/sqlglot/commit/5c01c010348271e8cfddea3ed0ac51293c3819b3) - handle falsey values for replace_placeholders kwargs *(PR [#3036](https://github.com/tobymao/sqlglot/pull/3036) by [@sarchila](https://github.com/sarchila))* - [`ccfbb22`](https://github.com/tobymao/sqlglot/commit/ccfbb2238131bda8fc7a3ad8a9c50a0f009dac52) - **clickhouse**: make CTE expression parser more flexible fixes [#3038](https://github.com/tobymao/sqlglot/pull/3038) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`30e0bb1`](https://github.com/tobymao/sqlglot/commit/30e0bb13162e75c53b031bbb69c66093f8ad4a96) - another edge case *(commit by [@tobymao](https://github.com/tobymao))* - [`0d93852`](https://github.com/tobymao/sqlglot/commit/0d938524a618b4bd7c057623a2c8755ca3afec6d) - **oracle**: handle GLOBAL/PRIVATE keyword in temp table DDL *(PR [#3045](https://github.com/tobymao/sqlglot/pull/3045) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3037](https://github.com/tobymao/sqlglot/issues/3037) opened by [@gforsyth](https://github.com/gforsyth)* - [`e89d38d`](https://github.com/tobymao/sqlglot/commit/e89d38ddd5f699f2ac09baf77238ad5fab00acb8) - **duckdb**: recognize ENUM as a type *(PR [#3044](https://github.com/tobymao/sqlglot/pull/3044) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#3043](https://github.com/tobymao/sqlglot/issues/3043) opened by [@joouha](https://github.com/joouha)* - [`4db7781`](https://github.com/tobymao/sqlglot/commit/4db77816a44652b3edc8aae5aab24242854f9a14) - avoid raising a KeyError in the lineage module, log a warning *(PR [#3048](https://github.com/tobymao/sqlglot/pull/3048) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`5337980`](https://github.com/tobymao/sqlglot/commit/53379805454f0e6f325581b839d2fcb37c10de1b) - simplify parsing of keyword sequences as Vars *(PR [#3034](https://github.com/tobymao/sqlglot/pull/3034) by [@georgesittas](https://github.com/georgesittas))* - [`bc35c59`](https://github.com/tobymao/sqlglot/commit/bc35c59004cb3fb9849f0ee8e5f06b356396c0b0) - use _parse_var_from_options for USE statement parser *(PR [#3035](https://github.com/tobymao/sqlglot/pull/3035) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`c0d355a`](https://github.com/tobymao/sqlglot/commit/c0d355a27d86539dfd95a87fea7e1bd75c4fabe4) - bump sqlglotrs to 0.1.2 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v22.0.1] - 2024-02-26 ### :bug: Bug Fixes - [`e2fc6e8`](https://github.com/tobymao/sqlglot/commit/e2fc6e88dc7ae52d956dd84721de197c6c698d90) - **optimizer**: fix parent mutation of new_projections in column qualifier *(PR [#3030](https://github.com/tobymao/sqlglot/pull/3030) by [@georgesittas](https://github.com/georgesittas))* ## [v22.0.0] - 2024-02-26 ### :boom: BREAKING CHANGES - due to [`2507aa2`](https://github.com/tobymao/sqlglot/commit/2507aa2dbad3304304558565f266a7f94acd9e98) - consolidate Subqueryable and Unionable into Query expression *(PR [#2992](https://github.com/tobymao/sqlglot/pull/2992) by [@georgesittas](https://github.com/georgesittas))*: consolidate Subqueryable and Unionable into Query expression (#2992) - due to [`d5eb2b1`](https://github.com/tobymao/sqlglot/commit/d5eb2b1e0907026e6e981a8f453f747cb16f44d6) - make implicit unnest syntax explicit by using UNNEST calls *(PR [#3005](https://github.com/tobymao/sqlglot/pull/3005) by [@georgesittas](https://github.com/georgesittas))*: make implicit unnest syntax explicit by using UNNEST calls (#3005) - due to [`238f9aa`](https://github.com/tobymao/sqlglot/commit/238f9aa7c32037d0c280cfe6ece77eed9c311cc5) - refactor structs to always be aliases *(PR [#3017](https://github.com/tobymao/sqlglot/pull/3017) by [@tobymao](https://github.com/tobymao))*: refactor structs to always be aliases (#3017) - due to [`06bcfcd`](https://github.com/tobymao/sqlglot/commit/06bcfcdf69f850693d941675bbcfce1aa80482f6) - select expressions not statements closes [#3022](https://github.com/tobymao/sqlglot/pull/3022), statements can be parsed without into *(commit by [@tobymao](https://github.com/tobymao))*: select expressions not statements closes #3022, statements can be parsed without into - due to [`1612e62`](https://github.com/tobymao/sqlglot/commit/1612e622bd3514d9ca366837f47452969e5267d8) - Add reference to lineage node *(PR [#3018](https://github.com/tobymao/sqlglot/pull/3018) by [@vchan](https://github.com/vchan))*: Add reference to lineage node (#3018) ### :sparkles: New Features - [`e50609b`](https://github.com/tobymao/sqlglot/commit/e50609b119c65407f4f7fe27f06510187dc750a0) - Supporting RANGE <-> GENERATE_SERIES between DuckDB & SQLite *(PR [#3010](https://github.com/tobymao/sqlglot/pull/3010) by [@VaggelisD](https://github.com/VaggelisD))* - [`1709ec2`](https://github.com/tobymao/sqlglot/commit/1709ec2519edc4b1a91f435d76f1b962355be326) - bigquery e6s format *(commit by [@tobymao](https://github.com/tobymao))* - [`17e34e7`](https://github.com/tobymao/sqlglot/commit/17e34e79d22e3c8211f1bf42047d4ed3557628b6) - add unnest type annotations *(PR [#3019](https://github.com/tobymao/sqlglot/pull/3019) by [@tobymao](https://github.com/tobymao))* - [`efdbc12`](https://github.com/tobymao/sqlglot/commit/efdbc127a06b1c6204327caa0d6b0cb01590da13) - clickhouse prewhere closes [#3024](https://github.com/tobymao/sqlglot/pull/3024) *(commit by [@tobymao](https://github.com/tobymao))* - [`1612e62`](https://github.com/tobymao/sqlglot/commit/1612e622bd3514d9ca366837f47452969e5267d8) - Add reference to lineage node *(PR [#3018](https://github.com/tobymao/sqlglot/pull/3018) by [@vchan](https://github.com/vchan))* - [`5c3bd10`](https://github.com/tobymao/sqlglot/commit/5c3bd1074960874b4557b13df6d30782fe7b0757) - **test**: add more passing tests of tpc-ds *(PR [#3016](https://github.com/tobymao/sqlglot/pull/3016) by [@fool1280](https://github.com/fool1280))* ### :bug: Bug Fixes - [`7f547e6`](https://github.com/tobymao/sqlglot/commit/7f547e641f7a0ecaa804d5bea14bd24abce1d346) - it's actually seconds + fraction *(commit by [@tobymao](https://github.com/tobymao))* - [`238f9aa`](https://github.com/tobymao/sqlglot/commit/238f9aa7c32037d0c280cfe6ece77eed9c311cc5) - refactor structs to always be aliases *(PR [#3017](https://github.com/tobymao/sqlglot/pull/3017) by [@tobymao](https://github.com/tobymao))* - :arrow_lower_right: *fixes issue [#3015](https://github.com/tobymao/sqlglot/issues/3015) opened by [@wizardxz](https://github.com/wizardxz)* - [`06bcfcd`](https://github.com/tobymao/sqlglot/commit/06bcfcdf69f850693d941675bbcfce1aa80482f6) - select expressions not statements closes [#3022](https://github.com/tobymao/sqlglot/pull/3022), statements can be parsed without into *(commit by [@tobymao](https://github.com/tobymao))* ### :recycle: Refactors - [`2507aa2`](https://github.com/tobymao/sqlglot/commit/2507aa2dbad3304304558565f266a7f94acd9e98) - consolidate Subqueryable and Unionable into Query expression *(PR [#2992](https://github.com/tobymao/sqlglot/pull/2992) by [@georgesittas](https://github.com/georgesittas))* - [`d5eb2b1`](https://github.com/tobymao/sqlglot/commit/d5eb2b1e0907026e6e981a8f453f747cb16f44d6) - make implicit unnest syntax explicit by using UNNEST calls *(PR [#3005](https://github.com/tobymao/sqlglot/pull/3005) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2996](https://github.com/tobymao/sqlglot/issues/2996) opened by [@wizardxz](https://github.com/wizardxz)* - [`8943179`](https://github.com/tobymao/sqlglot/commit/8943179dfadba4ed36740322e1e5d3611032b51e) - move limit method to Query, get rid of Subquery.subquery override *(PR [#3013](https://github.com/tobymao/sqlglot/pull/3013) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`9595240`](https://github.com/tobymao/sqlglot/commit/9595240a1c0f0e5ace9f67f31564e5d5edb9a9d2) - make prewhere clickhouse only *(commit by [@tobymao](https://github.com/tobymao))* ## [v21.2.1] - 2024-02-22 ### :sparkles: New Features - [`2a88e40`](https://github.com/tobymao/sqlglot/commit/2a88e40da89fa083bbd8fd0174082fa8e677780a) - **bigquery**: support ELSE and ELSEIF procedural statements *(PR [#3011](https://github.com/tobymao/sqlglot/pull/3011) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#3009](https://github.com/tobymao/sqlglot/issues/3009) opened by [@razvan-am](https://github.com/razvan-am)* - [`d2e15ed`](https://github.com/tobymao/sqlglot/commit/d2e15ed9b2ab2699f7105f73170b9d780293d432) - improve transpilation of Doris' MONTHS_ADD *(PR [#3012](https://github.com/tobymao/sqlglot/pull/3012) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`c23ac05`](https://github.com/tobymao/sqlglot/commit/c23ac05379e2aa5cb5681e26e2c0b8137300baa3) - bigquery group by order by rewriting with indices *(commit by [@tobymao](https://github.com/tobymao))* ## [v21.2.0] - 2024-02-22 ### :boom: BREAKING CHANGES - due to [`2940417`](https://github.com/tobymao/sqlglot/commit/2940417116761f821c913bf093759243db33c343) - simplify ADD CONSTRAINT handling *(PR [#2990](https://github.com/tobymao/sqlglot/pull/2990) by [@georgesittas](https://github.com/georgesittas))*: simplify ADD CONSTRAINT handling (#2990) ### :sparkles: New Features - [`7c48079`](https://github.com/tobymao/sqlglot/commit/7c4807918de53d18fbfe0295b2644f0ad46003a8) - support parameters in BigQuery / DuckDB *(PR [#2991](https://github.com/tobymao/sqlglot/pull/2991) by [@r1b](https://github.com/r1b))* - [`b7c2744`](https://github.com/tobymao/sqlglot/commit/b7c2744eba3df631b575e8ab35f29f46419f83ba) - **tests**: update test_executor with tpc-ds *(PR [#2983](https://github.com/tobymao/sqlglot/pull/2983) by [@fool1280](https://github.com/fool1280))* - [`c433cad`](https://github.com/tobymao/sqlglot/commit/c433cad7df383e97308ceb946d7f1dc171a5d60b) - allow more leniant bigquery wildcard parsing *(PR [#2998](https://github.com/tobymao/sqlglot/pull/2998) by [@tobymao](https://github.com/tobymao))* - [`8607247`](https://github.com/tobymao/sqlglot/commit/860724732b70b5557221998a45c3c950b39d664a) - support LEFT JOIN UNNEST in duckdb *(PR [#2999](https://github.com/tobymao/sqlglot/pull/2999) by [@r1b](https://github.com/r1b))* - [`64e38ed`](https://github.com/tobymao/sqlglot/commit/64e38edb32f9a66a9503e75424d0545da3dbe5df) - add support for more Snowflake SHOW commands *(PR [#3002](https://github.com/tobymao/sqlglot/pull/3002) by [@DanCardin](https://github.com/DanCardin))* ### :bug: Bug Fixes - [`bc18f56`](https://github.com/tobymao/sqlglot/commit/bc18f56a39e0034e2b285efd7a882a417c517a99) - **optimizer**: don't coerce nested arg types in annotate_by_args *(PR [#2997](https://github.com/tobymao/sqlglot/pull/2997) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2152](https://github.com/TobikoData/sqlmesh/issues/2152) opened by [@plaflamme](https://github.com/plaflamme)* - [`ccd8cc0`](https://github.com/tobymao/sqlglot/commit/ccd8cc01429d21653198edce079679e17dbb22f6) - doris to_char closes [#3001](https://github.com/tobymao/sqlglot/pull/3001) *(commit by [@tobymao](https://github.com/tobymao))* ### :recycle: Refactors - [`2940417`](https://github.com/tobymao/sqlglot/commit/2940417116761f821c913bf093759243db33c343) - simplify ADD CONSTRAINT handling *(PR [#2990](https://github.com/tobymao/sqlglot/pull/2990) by [@georgesittas](https://github.com/georgesittas))* - [`d2711f7`](https://github.com/tobymao/sqlglot/commit/d2711f717aac4a7b624225d31c7fa827f8287476) - clean up duplicative placeholder_sql implementations *(PR [#2993](https://github.com/tobymao/sqlglot/pull/2993) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`53efb58`](https://github.com/tobymao/sqlglot/commit/53efb587a642a171bdb4fb6ad4c33a83c4391908) - cleanup tests *(commit by [@tobymao](https://github.com/tobymao))* ## [v21.1.2] - 2024-02-19 ### :sparkles: New Features - [`b8cbf66`](https://github.com/tobymao/sqlglot/commit/b8cbf66471158371a27d9145b3b553b7a1384c9d) - **bigquery**: parse procedural EXCEPTION WHEN statement into a Command closes [#2981](https://github.com/tobymao/sqlglot/pull/2981) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`906ceee`](https://github.com/tobymao/sqlglot/commit/906ceee0946c547f83177916c3f8d6aeb23023a8) - **duckdb**: implement generation logic for exp.ArrayAny *(PR [#2984](https://github.com/tobymao/sqlglot/pull/2984) by [@georgesittas](https://github.com/georgesittas))* - [`92455e4`](https://github.com/tobymao/sqlglot/commit/92455e4d4e2c8d5a874a5050d9a38f943479cdca) - **snowflake**: create storage integration *(PR [#2985](https://github.com/tobymao/sqlglot/pull/2985) by [@tekumara](https://github.com/tekumara))* - [`bedf6e9`](https://github.com/tobymao/sqlglot/commit/bedf6e9dabf9da25e1fff2f3c8ae22fbf7face0b) - improve transpilation support for ArrayAny *(PR [#2986](https://github.com/tobymao/sqlglot/pull/2986) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2707](https://github.com/tobymao/sqlglot/issues/2707) opened by [@HuashiSCNU0303](https://github.com/HuashiSCNU0303)* ### :bug: Bug Fixes - [`cc67ab2`](https://github.com/tobymao/sqlglot/commit/cc67ab2513c71a6b9574f8c3cf4c8ba2927d798f) - **tsql**: map StrPosition back to CHARINDEX fixes [#2968](https://github.com/tobymao/sqlglot/pull/2968) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`46f15ce`](https://github.com/tobymao/sqlglot/commit/46f15cef87de3159bc1d422b2620278e9e27ec16) - **postgres**: ensure json extraction can roundtrip unaltered *(PR [#2974](https://github.com/tobymao/sqlglot/pull/2974) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2971](https://github.com/tobymao/sqlglot/issues/2971) opened by [@l-vincent-l](https://github.com/l-vincent-l)* - [`7ee4fe7`](https://github.com/tobymao/sqlglot/commit/7ee4fe73b29234f2837a212b6c872efd7f5c30ea) - expand using with star except *(commit by [@tobymao](https://github.com/tobymao))* ### :recycle: Refactors - [`5a34f3d`](https://github.com/tobymao/sqlglot/commit/5a34f3d5f652ac209fd122aa25e46d99d8e5cba6) - clean up tech debt in dialect implementations *(PR [#2977](https://github.com/tobymao/sqlglot/pull/2977) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`ae92789`](https://github.com/tobymao/sqlglot/commit/ae92789cdac4c4f0bb3d5f542bd9fe93aee4ea70) - rephrase some sentences in the FAQ section *(PR [#2980](https://github.com/tobymao/sqlglot/pull/2980) by [@georgesittas](https://github.com/georgesittas))* - [`22ed4d0`](https://github.com/tobymao/sqlglot/commit/22ed4d0a976dbba15962670873422e86874680b0) - cleanup kv defs from brackets *(PR [#2987](https://github.com/tobymao/sqlglot/pull/2987) by [@tobymao](https://github.com/tobymao))* ## [v21.1.1] - 2024-02-14 ### :sparkles: New Features - [`1d0b3d3`](https://github.com/tobymao/sqlglot/commit/1d0b3d3a22ba5a8128505d636a2ff71d0ea03d03) - add support for multi-part interval addition syntax *(PR [#2970](https://github.com/tobymao/sqlglot/pull/2970) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2969](https://github.com/tobymao/sqlglot/issues/2969) opened by [@aersam](https://github.com/aersam)* ### :bug: Bug Fixes - [`1c67f03`](https://github.com/tobymao/sqlglot/commit/1c67f030cd9df530e26c620079b2298b1db97d50) - **parser**: enable parsing of values into Identifier for some dialects *(PR [#2962](https://github.com/tobymao/sqlglot/pull/2962) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2957](https://github.com/tobymao/sqlglot/issues/2957) opened by [@hsheth2](https://github.com/hsheth2)* - [`d8b0d4f`](https://github.com/tobymao/sqlglot/commit/d8b0d4fcc82662004056a68b05ca20f30996661f) - don't treat VALUES as a keyword in BigQuery, Redshift *(PR [#2965](https://github.com/tobymao/sqlglot/pull/2965) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2963](https://github.com/tobymao/sqlglot/issues/2963) opened by [@sean-rose](https://github.com/sean-rose)* - [`5b7fd10`](https://github.com/tobymao/sqlglot/commit/5b7fd107f279c2f83c9d66d4353032c6d830202c) - **optimizer**: more optimizations for qualifying wide tables *(PR [#2972](https://github.com/tobymao/sqlglot/pull/2972) by [@barakalon](https://github.com/barakalon))* - [`6cb985a`](https://github.com/tobymao/sqlglot/commit/6cb985ae1346c1a912ed6f81be30310ee1c91dfa) - pass dialect in to_table call inside replace_tables *(PR [#2973](https://github.com/tobymao/sqlglot/pull/2973) by [@georgesittas](https://github.com/georgesittas))* ## [v21.1.0] - 2024-02-12 ### :sparkles: New Features - [`e71d489`](https://github.com/tobymao/sqlglot/commit/e71d4899e6744812fdefc2704c66bbd6043b5bc9) - add array and tuple helpers *(commit by [@tobymao](https://github.com/tobymao))* - [`876e075`](https://github.com/tobymao/sqlglot/commit/876e07580bb2de06b587fc8ad40eb67604ae8507) - **postgres**: root operator closes [#2940](https://github.com/tobymao/sqlglot/pull/2940) *(commit by [@tobymao](https://github.com/tobymao))* - [`e731276`](https://github.com/tobymao/sqlglot/commit/e731276dd5490a7d294430e0887eebf19e16d28f) - **snowflake**: add support for SHOW USERS *(PR [#2948](https://github.com/tobymao/sqlglot/pull/2948) by [@DanCardin](https://github.com/DanCardin))* - [`b9d4468`](https://github.com/tobymao/sqlglot/commit/b9d44688c2b785212db635f121b686df02e2dec9) - **tableau**: identifier and quotes closes [#2950](https://github.com/tobymao/sqlglot/pull/2950) *(commit by [@tobymao](https://github.com/tobymao))* - [`f8d9dbf`](https://github.com/tobymao/sqlglot/commit/f8d9dbf6744f95bf4b7517e8bcc35dd3a6f70c5d) - **sqlite**: add support for IIF *(PR [#2951](https://github.com/tobymao/sqlglot/pull/2951) by [@georgesittas](https://github.com/georgesittas))* - [`b755551`](https://github.com/tobymao/sqlglot/commit/b7555516c6bf038dc39c4bba2b243839ceb6e3b5) - **clickhouse**: add basic support for system statement *(PR [#2953](https://github.com/tobymao/sqlglot/pull/2953) by [@GaliFFun](https://github.com/GaliFFun))* ### :bug: Bug Fixes - [`844018b`](https://github.com/tobymao/sqlglot/commit/844018b8d3a3398d746fdc04c966c7e19d311998) - explode_outer to unnest closes [#2941](https://github.com/tobymao/sqlglot/pull/2941) *(commit by [@tobymao](https://github.com/tobymao))* - [`159da45`](https://github.com/tobymao/sqlglot/commit/159da4523d6eb3ca6853d631bb98dc8f13c7b0fb) - posexplode_outer to unnest *(PR [#2942](https://github.com/tobymao/sqlglot/pull/2942) by [@chelsea-lin](https://github.com/chelsea-lin))* - [`76d6634`](https://github.com/tobymao/sqlglot/commit/76d66340e566bd9fa8c783f5d311101eb2e80480) - **spark**: CREATE TABLE ... PARTITIONED BY fixes *(PR [#2937](https://github.com/tobymao/sqlglot/pull/2937) by [@barakalon](https://github.com/barakalon))* - [`d07ddf9`](https://github.com/tobymao/sqlglot/commit/d07ddf9b460c1b6f672fda4f34dc9231419e6c9d) - **optimizer**: remove redundant casts *(PR [#2945](https://github.com/tobymao/sqlglot/pull/2945) by [@barakalon](https://github.com/barakalon))* - [`b70a394`](https://github.com/tobymao/sqlglot/commit/b70a394222bf209298026fd100f6b9498acf9fff) - if doesn't support different types *(commit by [@tobymao](https://github.com/tobymao))* - [`6a988e0`](https://github.com/tobymao/sqlglot/commit/6a988e0160022d33623cd036bf84bb0b222c9062) - **bigquery**: fix annotation of timestamp(x) *(PR [#2946](https://github.com/tobymao/sqlglot/pull/2946) by [@georgesittas](https://github.com/georgesittas))* - [`78e6d0d`](https://github.com/tobymao/sqlglot/commit/78e6d0de83efbff1d3b61c8550db56c1819f7c22) - **optimizer**: qualify_columns optimizations for wide tables *(PR [#2955](https://github.com/tobymao/sqlglot/pull/2955) by [@barakalon](https://github.com/barakalon))* - [`c20cc70`](https://github.com/tobymao/sqlglot/commit/c20cc70dfc7f6395af157521c7e99074d697beb4) - **redshift**: don't assume Table is an unnested Column if Join has a predicate *(PR [#2956](https://github.com/tobymao/sqlglot/pull/2956) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2952](https://github.com/tobymao/sqlglot/issues/2952) opened by [@vidit-wisdom](https://github.com/vidit-wisdom)* ### :wrench: Chores - [`c4524ce`](https://github.com/tobymao/sqlglot/commit/c4524ce1e6a85e16db7ea0289116d0160732dc51) - fix unit test *(commit by [@tobymao](https://github.com/tobymao))* ## [v21.0.2] - 2024-02-08 ### :sparkles: New Features - [`1842c96`](https://github.com/tobymao/sqlglot/commit/1842c96611cadb0227dd3ce8f42457679ab0e08b) - **clickhouse**: add support for LIMIT BY clause *(PR [#2926](https://github.com/tobymao/sqlglot/pull/2926) by [@georgesittas](https://github.com/georgesittas))* - [`9241858`](https://github.com/tobymao/sqlglot/commit/9241858e559f089b166d9b794e3ebb395624d84a) - add typing for explode closes [#2927](https://github.com/tobymao/sqlglot/pull/2927) *(commit by [@tobymao](https://github.com/tobymao))* - [`85073d1`](https://github.com/tobymao/sqlglot/commit/85073d1538de8ceef3e5c622a901efd9e6bd38e3) - transpile multi-arg DISTINCT expression *(PR [#2936](https://github.com/tobymao/sqlglot/pull/2936) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2930](https://github.com/tobymao/sqlglot/issues/2930) opened by [@Jake-00](https://github.com/Jake-00)* ### :bug: Bug Fixes - [`b827626`](https://github.com/tobymao/sqlglot/commit/b8276262bdca57e358284fadfdd468d2bc957e84) - remove find method from Schema *(PR [#2934](https://github.com/tobymao/sqlglot/pull/2934) by [@georgesittas](https://github.com/georgesittas))* - [`08cd117`](https://github.com/tobymao/sqlglot/commit/08cd117322302f08c95889ebf8699f4171c1d504) - **postgres**: fallback to parameter parser if heredoc is untokenizable *(PR [#2935](https://github.com/tobymao/sqlglot/pull/2935) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2931](https://github.com/tobymao/sqlglot/issues/2931) opened by [@eric-zhu](https://github.com/eric-zhu)* ### :wrench: Chores - [`e4b5edb`](https://github.com/tobymao/sqlglot/commit/e4b5edbef42944b44d11c35aea31411ce3d79826) - bump sqlglotrs to 0.1.1 *(commit by [@georgesittas](https://github.com/georgesittas))* ## [v21.0.1] - 2024-02-07 ### :sparkles: New Features - [`3a20eac`](https://github.com/tobymao/sqlglot/commit/3a20eaccbf5d5a80bd24b95c837cca8103dfe70a) - **clickhouse**: add support for JSONExtractString, clean up some helpers *(PR [#2925](https://github.com/tobymao/sqlglot/pull/2925) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2051](https://github.com/tobymao/sqlglot/issues/2051) opened by [@BTheunissen](https://github.com/BTheunissen)* ## [v21.0.0] - 2024-02-07 ### :boom: BREAKING CHANGES - due to [`b4e8868`](https://github.com/tobymao/sqlglot/commit/b4e886877ecfbafdd64c515c765c3c54764bd987) - improve transpilation of JSON paths across dialects *(PR [#2883](https://github.com/tobymao/sqlglot/pull/2883) by [@georgesittas](https://github.com/georgesittas))*: improve transpilation of JSON paths across dialects (#2883) - due to [`aa388ea`](https://github.com/tobymao/sqlglot/commit/aa388ea64404a26550dbb0734f4d3e35111f9e2c) - ignore nulls closes [#2896](https://github.com/tobymao/sqlglot/pull/2896) *(PR [#2898](https://github.com/tobymao/sqlglot/pull/2898) by [@tobymao](https://github.com/tobymao))*: ignore nulls closes #2896 (#2898) - due to [`617a8c0`](https://github.com/tobymao/sqlglot/commit/617a8c0dfc5e9f2716f7827381af0db2e135059e) - timestamp diff for mysql and databricks *(commit by [@tobymao](https://github.com/tobymao))*: timestamp diff for mysql and databricks - due to [`b00b393`](https://github.com/tobymao/sqlglot/commit/b00b393d853ae05a3fce4ef78d7673edbcabf67d) - use raise instead of assert for assert_is *(commit by [@tobymao](https://github.com/tobymao))*: use raise instead of assert for assert_is - due to [`326aa31`](https://github.com/tobymao/sqlglot/commit/326aa31e32e511f4e40d3a5a7b1d599b5e2c1307) - deprecate case where transforms can be plain strs *(PR [#2919](https://github.com/tobymao/sqlglot/pull/2919) by [@georgesittas](https://github.com/georgesittas))*: deprecate case where transforms can be plain strs (#2919) ### :sparkles: New Features - [`fb450f0`](https://github.com/tobymao/sqlglot/commit/fb450f0263ecd6b7c9d0f49d84441327d50b9d83) - add tsql right left auto casting closes [#2899](https://github.com/tobymao/sqlglot/pull/2899) *(commit by [@tobymao](https://github.com/tobymao))* - [`617a8c0`](https://github.com/tobymao/sqlglot/commit/617a8c0dfc5e9f2716f7827381af0db2e135059e) - timestamp diff for mysql and databricks *(commit by [@tobymao](https://github.com/tobymao))* - [`3fa92ca`](https://github.com/tobymao/sqlglot/commit/3fa92cac285cbb2bd9d8b5724dadb77be7e12731) - **redshift**: parse GETDATE *(PR [#2904](https://github.com/tobymao/sqlglot/pull/2904) by [@erickpeirson](https://github.com/erickpeirson))* - [`d262139`](https://github.com/tobymao/sqlglot/commit/d26213998b27fa9b6a66b6d21ab5a3a15f65635e) - **snowflake**: implement parsing logic for SHOW TABLES *(PR [#2913](https://github.com/tobymao/sqlglot/pull/2913) by [@tekumara](https://github.com/tekumara))* - [`838e780`](https://github.com/tobymao/sqlglot/commit/838e7800c32ad16074efef6a188ebd89083a9717) - improve transpilation of CREATE TABLE LIKE statement *(PR [#2923](https://github.com/tobymao/sqlglot/pull/2923) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2922](https://github.com/tobymao/sqlglot/issues/2922) opened by [@tharwan](https://github.com/tharwan)* - [`cbbad1f`](https://github.com/tobymao/sqlglot/commit/cbbad1fc40b6b2ca837ddb0f798b1802ad4063da) - improve transpilation of JSON path wildcards *(PR [#2924](https://github.com/tobymao/sqlglot/pull/2924) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`aa388ea`](https://github.com/tobymao/sqlglot/commit/aa388ea64404a26550dbb0734f4d3e35111f9e2c) - ignore nulls closes [#2896](https://github.com/tobymao/sqlglot/pull/2896) *(PR [#2898](https://github.com/tobymao/sqlglot/pull/2898) by [@tobymao](https://github.com/tobymao))* - [`b00b393`](https://github.com/tobymao/sqlglot/commit/b00b393d853ae05a3fce4ef78d7673edbcabf67d) - use raise instead of assert for assert_is *(commit by [@tobymao](https://github.com/tobymao))* - [`ab97246`](https://github.com/tobymao/sqlglot/commit/ab972462b1c545b4a60bb88cb40cdb98cb64e360) - array overlaps closes [#2903](https://github.com/tobymao/sqlglot/pull/2903) *(commit by [@tobymao](https://github.com/tobymao))* - [`f3bdcb0`](https://github.com/tobymao/sqlglot/commit/f3bdcb087bb993289c4a1a5d2de54155ae2d2681) - **duckdb**: fix JSON pointer path parsing, reduce warning noise *(PR [#2911](https://github.com/tobymao/sqlglot/pull/2911) by [@georgesittas](https://github.com/georgesittas))* - [`072264f`](https://github.com/tobymao/sqlglot/commit/072264f8af25737050f7becd27af5a9331bde896) - **mysql**: SHOW SCHEMAS *(PR [#2916](https://github.com/tobymao/sqlglot/pull/2916) by [@barakalon](https://github.com/barakalon))* - [`15fdff2`](https://github.com/tobymao/sqlglot/commit/15fdff2df3363ab8d3595e7eeb8baee65e525733) - **optimizer**: don't remove NOT parenthesis *(PR [#2917](https://github.com/tobymao/sqlglot/pull/2917) by [@barakalon](https://github.com/barakalon))* - [`d20d826`](https://github.com/tobymao/sqlglot/commit/d20d826e9cc4a9b0d636a9b56b5547cd906a5903) - have table exclude this if schema target *(PR [#2921](https://github.com/tobymao/sqlglot/pull/2921) by [@eakmanrq](https://github.com/eakmanrq))* ### :recycle: Refactors - [`b4e8868`](https://github.com/tobymao/sqlglot/commit/b4e886877ecfbafdd64c515c765c3c54764bd987) - improve transpilation of JSON paths across dialects *(PR [#2883](https://github.com/tobymao/sqlglot/pull/2883) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2839](https://github.com/tobymao/sqlglot/issues/2839) opened by [@FlaviovLeal](https://github.com/FlaviovLeal)* - [`9481f94`](https://github.com/tobymao/sqlglot/commit/9481f946b068e43d99c9aaae6e1c59abf384eeac) - several JSON path improvements *(PR [#2914](https://github.com/tobymao/sqlglot/pull/2914) by [@georgesittas](https://github.com/georgesittas))* - [`326aa31`](https://github.com/tobymao/sqlglot/commit/326aa31e32e511f4e40d3a5a7b1d599b5e2c1307) - deprecate case where transforms can be plain strs *(PR [#2919](https://github.com/tobymao/sqlglot/pull/2919) by [@georgesittas](https://github.com/georgesittas))* - [`15582f4`](https://github.com/tobymao/sqlglot/commit/15582f40bd18da3fa7adbe454b401ef8d31a131e) - move JSON path generation logic in Generator *(PR [#2920](https://github.com/tobymao/sqlglot/pull/2920) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`afb4f9b`](https://github.com/tobymao/sqlglot/commit/afb4f9bfe074200e60b5a870267fe21aa04a87c5) - switch to ruff *(commit by [@tobymao](https://github.com/tobymao))* - [`f9fdf7b`](https://github.com/tobymao/sqlglot/commit/f9fdf7b3bb25aa7e830b70600728bb35ee1e4ff7) - switch to ruff *(PR [#2912](https://github.com/tobymao/sqlglot/pull/2912) by [@tobymao](https://github.com/tobymao))* - [`71c33fa`](https://github.com/tobymao/sqlglot/commit/71c33fa13b9c416ae50acb10a9b08dcfcfd35f92) - pandas warning *(commit by [@tobymao](https://github.com/tobymao))* ## [v20.11.0] - 2024-01-29 ### :boom: BREAKING CHANGES - due to [`eb8b40a`](https://github.com/tobymao/sqlglot/commit/eb8b40aade54eec8b34a808dda95420dcf7a7e13) - deprecate NULL, TRUE, FALSE constant expressions *(PR [#2884](https://github.com/tobymao/sqlglot/pull/2884) by [@georgesittas](https://github.com/georgesittas))*: deprecate NULL, TRUE, FALSE constant expressions (#2884) ### :sparkles: New Features - [`3a8ed85`](https://github.com/tobymao/sqlglot/commit/3a8ed8573d5562110b312586ae6fca22038e5d05) - add alter table alter comment closes [#2889](https://github.com/tobymao/sqlglot/pull/2889) *(commit by [@tobymao](https://github.com/tobymao))* ### :bug: Bug Fixes - [`dc2d7d7`](https://github.com/tobymao/sqlglot/commit/dc2d7d7dd4253fe6b247d534bd92327f186e9aa8) - **tsql**: len text transpilation closes [#2885](https://github.com/tobymao/sqlglot/pull/2885) *(commit by [@tobymao](https://github.com/tobymao))* - [`ad50f47`](https://github.com/tobymao/sqlglot/commit/ad50f479c47d5b4990f1b41272c69079a453cf21) - type imports *(PR [#2886](https://github.com/tobymao/sqlglot/pull/2886) by [@tobymao](https://github.com/tobymao))* - [`e4fb7f6`](https://github.com/tobymao/sqlglot/commit/e4fb7f6e1b8ab15ceb5acc6a93256c849c738740) - union should return union *(commit by [@tobymao](https://github.com/tobymao))* - [`8f795ea`](https://github.com/tobymao/sqlglot/commit/8f795ea00164b69acba093c3684ab54b62138e8e) - don't expand star except/replace refs *(commit by [@tobymao](https://github.com/tobymao))* - [`218121c`](https://github.com/tobymao/sqlglot/commit/218121c274656a1b252143a7d0fc2d73407115ca) - alter table cluster by closes [#2887](https://github.com/tobymao/sqlglot/pull/2887) *(commit by [@tobymao](https://github.com/tobymao))* - [`5cec283`](https://github.com/tobymao/sqlglot/commit/5cec2839f8ed8477821bf766025f4b5de0621fe2) - bigquery script if statement closes [#2888](https://github.com/tobymao/sqlglot/pull/2888) *(commit by [@tobymao](https://github.com/tobymao))* - [`5fc7791`](https://github.com/tobymao/sqlglot/commit/5fc7791a4d19d704c0d4fafe8924cf8f76fcb867) - all view column options without types closes [#2891](https://github.com/tobymao/sqlglot/pull/2891) *(commit by [@tobymao](https://github.com/tobymao))* - [`102304e`](https://github.com/tobymao/sqlglot/commit/102304e28f2ed7126840789837ed797a75bae44e) - **postgres**: generate CurrentUser without parentheses closes [#2893](https://github.com/tobymao/sqlglot/pull/2893) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`43902db`](https://github.com/tobymao/sqlglot/commit/43902db25706a2434fe7e9ba39addd1c31c2aa64) - error level ignore comments closes [#2895](https://github.com/tobymao/sqlglot/pull/2895) *(commit by [@tobymao](https://github.com/tobymao))* ### :wrench: Chores - [`eb8b40a`](https://github.com/tobymao/sqlglot/commit/eb8b40aade54eec8b34a808dda95420dcf7a7e13) - deprecate NULL, TRUE, FALSE constant expressions *(PR [#2884](https://github.com/tobymao/sqlglot/pull/2884) by [@georgesittas](https://github.com/georgesittas))* - [`29cddd5`](https://github.com/tobymao/sqlglot/commit/29cddd5c3f5401033197d47e7544cedd91b8046c) - change warning message *(commit by [@tobymao](https://github.com/tobymao))* - [`9eac93e`](https://github.com/tobymao/sqlglot/commit/9eac93e0acd5ae8b034045759fc48937586cbc2e) - upgrade black *(commit by [@tobymao](https://github.com/tobymao))* - [`4f3fac7`](https://github.com/tobymao/sqlglot/commit/4f3fac7815e0d8206c80f1f255336ab630503d4d) - cleanup command parsing and warnings *(commit by [@tobymao](https://github.com/tobymao))* ## [v20.10.0] - 2024-01-24 ### :boom: BREAKING CHANGES - due to [`1f5fc39`](https://github.com/tobymao/sqlglot/commit/1f5fc39c10b92b94bd94afa5fd038fdb9afeb4b4) - jsonpath parsing *(PR [#2867](https://github.com/tobymao/sqlglot/pull/2867) by [@tobymao](https://github.com/tobymao))*: jsonpath parsing (#2867) ### :sparkles: New Features - [`89b439e`](https://github.com/tobymao/sqlglot/commit/89b439e2f93b6c3bedb4e58fe4b5014d42dd5080) - **postgres**: support the INCLUDE clause in INDEX creation *(PR [#2857](https://github.com/tobymao/sqlglot/pull/2857) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2855](undefined) opened by [@dezhin](https://github.com/dezhin)* - [`90ffff8`](https://github.com/tobymao/sqlglot/commit/90ffff83266b5714b1371a576d9484dfbe4be155) - **clickhouse**: AggregateFunction data type *(PR [#2832](https://github.com/tobymao/sqlglot/pull/2832) by [@pkit](https://github.com/pkit))* - [`326d3ae`](https://github.com/tobymao/sqlglot/commit/326d3ae7113cca4a67cca3ab3335f7b8dde91f71) - improve transpilation of Spark's TO_UTC_TIMESTAMP *(PR [#2861](https://github.com/tobymao/sqlglot/pull/2861) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2846](undefined) opened by [@hdemers](https://github.com/hdemers)* - [`6d03587`](https://github.com/tobymao/sqlglot/commit/6d035871feee2b206d73e37b5f9b8c99fc40708f) - **snowflake**: SHOW SCHEMAS/OBJECTS *(PR [#2845](https://github.com/tobymao/sqlglot/pull/2845) by [@tekumara](https://github.com/tekumara))* - :arrow_lower_right: *addresses issue [#2784](undefined) opened by [@tekumara](https://github.com/tekumara)* - [`d5a08b8`](https://github.com/tobymao/sqlglot/commit/d5a08b8f3a1cdd87e9560c0fe4f11b0f1586978b) - **optimizer**: improve struct type annotation support for EQ-delimited kv pairs *(PR [#2863](https://github.com/tobymao/sqlglot/pull/2863) by [@fool1280](https://github.com/fool1280))* - [`1f5fc39`](https://github.com/tobymao/sqlglot/commit/1f5fc39c10b92b94bd94afa5fd038fdb9afeb4b4) - jsonpath parsing *(PR [#2867](https://github.com/tobymao/sqlglot/pull/2867) by [@tobymao](https://github.com/tobymao))* - [`7fd9045`](https://github.com/tobymao/sqlglot/commit/7fd9045488beb88b2726ae906b8769b7963d1b37) - add support for rename column *(PR [#2866](https://github.com/tobymao/sqlglot/pull/2866) by [@gableh](https://github.com/gableh))* - [`89b781b`](https://github.com/tobymao/sqlglot/commit/89b781b991ce264cd7f8c44fa67860eb9a587b07) - **postgres**: add support for the INHERITS property closes [#2871](https://github.com/tobymao/sqlglot/pull/2871) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`970c202`](https://github.com/tobymao/sqlglot/commit/970c2022a27d4fc355fedb7af830367a5dd96009) - **postgres**: add support for the SET property *(PR [#2873](https://github.com/tobymao/sqlglot/pull/2873) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2872](undefined) opened by [@edongashi](https://github.com/edongashi)* - [`6845c37`](https://github.com/tobymao/sqlglot/commit/6845c37e749448972a231926236c08affb71a64f) - make the CREATE parser more lenient *(PR [#2875](https://github.com/tobymao/sqlglot/pull/2875) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`bf03a45`](https://github.com/tobymao/sqlglot/commit/bf03a45d8df9abd63b8102e431c13ca0eb0b0fb0) - **snowflake**: extend _parse_range to gracefully handle colon operator *(PR [#2856](https://github.com/tobymao/sqlglot/pull/2856) by [@georgesittas](https://github.com/georgesittas))* - [`ad14f4e`](https://github.com/tobymao/sqlglot/commit/ad14f4ed1ed3870ae0d7370643c67c235fc89b4b) - qualify alter table table refs in optimizer qualify *(PR [#2862](https://github.com/tobymao/sqlglot/pull/2862) by [@z3z1ma](https://github.com/z3z1ma))* - [`8599903`](https://github.com/tobymao/sqlglot/commit/859990356210f3091b1b66647c1a674fdb0f2ad9) - **optimizer**: compute external columns for union sopes correctly *(PR [#2864](https://github.com/tobymao/sqlglot/pull/2864) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2860](undefined) opened by [@derekpaulsen](https://github.com/derekpaulsen)* - [`3e065f9`](https://github.com/tobymao/sqlglot/commit/3e065f9503607e6c620fc187e2bdc2d45f7fa1dd) - **optimizer**: don't copy projection in qualify_outputs when attaching alias *(PR [#2868](https://github.com/tobymao/sqlglot/pull/2868) by [@georgesittas](https://github.com/georgesittas))* - [`a642758`](https://github.com/tobymao/sqlglot/commit/a6427585e16fa4b5adc9e01cc22baeb09b2f69bb) - avoid dag cycle with unnesting subqueries closes [#2876](https://github.com/tobymao/sqlglot/pull/2876) *(commit by [@tobymao](https://github.com/tobymao))* - [`0648453`](https://github.com/tobymao/sqlglot/commit/06484532f0e222004e844501a192b8d4aec654c7) - set div type on multiplication closes [#2878](https://github.com/tobymao/sqlglot/pull/2878) *(commit by [@tobymao](https://github.com/tobymao))* - [`b7fb800`](https://github.com/tobymao/sqlglot/commit/b7fb8006b825ec36d62135877b4e32fccad8d044) - **oracle**: generate with time zone for timestamptz fixes [#2879](https://github.com/tobymao/sqlglot/pull/2879) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`8770e3b`](https://github.com/tobymao/sqlglot/commit/8770e3b7855110a82cb3bc05f3cb6c36a88cfdb2) - **optimizer**: don't qualify CTEs for DDL/DML statements *(PR [#2880](https://github.com/tobymao/sqlglot/pull/2880) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2877](undefined) opened by [@hsheth2](https://github.com/hsheth2)* ### :wrench: Chores - [`d55cfba`](https://github.com/tobymao/sqlglot/commit/d55cfba7226a61cc8419b3a83eaa0a2ead23be10) - move postgres tests *(commit by [@tobymao](https://github.com/tobymao))* - [`0e43c58`](https://github.com/tobymao/sqlglot/commit/0e43c58a38bb6af63f653062e79626b85f605b63) - **parser**: warn when parsing (>1 tokens) SQL into exp.Command *(PR [#2874](https://github.com/tobymao/sqlglot/pull/2874) by [@georgesittas](https://github.com/georgesittas))* ## [v20.9.0] - 2024-01-18 ### :boom: BREAKING CHANGES - due to [`1be93e4`](https://github.com/tobymao/sqlglot/commit/1be93e45d8347e5fa8a4e39dad625c6dd66ea461) - properly support all unix time scales *(commit by [@tobymao](https://github.com/tobymao))*: properly support all unix time scales ### :sparkles: New Features - [`816976f`](https://github.com/tobymao/sqlglot/commit/816976f52865fb8ade580c727a890a90378c8e50) - extend submodule annotate_types to handle STRUCT *(PR [#2783](https://github.com/tobymao/sqlglot/pull/2783) by [@fool1280](https://github.com/fool1280))* - [`7bce2f6`](https://github.com/tobymao/sqlglot/commit/7bce2f6abe79dfd8064c625294d94364042207c5) - **oracle**: add support for ORDER SIBLINGS BY clause *(PR [#2821](https://github.com/tobymao/sqlglot/pull/2821) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2819](undefined) opened by [@Shweta-BI-Lead](https://github.com/Shweta-BI-Lead)* - [`ce8d254`](https://github.com/tobymao/sqlglot/commit/ce8d254305f56724982eed8e099ab1abeb8750a1) - **snowflake**: parse RM/REMOVE as commands *(PR [#2825](https://github.com/tobymao/sqlglot/pull/2825) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2824](undefined) opened by [@sfc-gh-jlambert](https://github.com/sfc-gh-jlambert)* - [`1902778`](https://github.com/tobymao/sqlglot/commit/19027786facf8ff730af49c1693149e244502cb0) - add support for multi-unit intervals *(PR [#2822](https://github.com/tobymao/sqlglot/pull/2822) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2793](undefined) opened by [@nikmalviya](https://github.com/nikmalviya)* - [`a537898`](https://github.com/tobymao/sqlglot/commit/a53789840b56be747fa5d670a9d5ea120aee371a) - add support for DESCRIBE EXTENDED *(PR [#2828](https://github.com/tobymao/sqlglot/pull/2828) by [@georgesittas](https://github.com/georgesittas))* - [`6e50759`](https://github.com/tobymao/sqlglot/commit/6e50759fb19c8d00825f626fb8c1ab6792fabd56) - **clickhouse**: support Date32 type *(PR [#2830](https://github.com/tobymao/sqlglot/pull/2830) by [@pkit](https://github.com/pkit))* - [`9560e8f`](https://github.com/tobymao/sqlglot/commit/9560e8fa93d6ac7f4f015bd55091d2fe75e85508) - add support for Heredocs in Databricks Python UDFs *(PR [#2801](https://github.com/tobymao/sqlglot/pull/2801) by [@viethungle-vt1401](https://github.com/viethungle-vt1401))* - [`52ed590`](https://github.com/tobymao/sqlglot/commit/52ed590b0fa75ef8a9f6e4cb2fb48b4fff65996f) - transpile SELECT .. INTO to dialects that do not support it *(PR [#2820](https://github.com/tobymao/sqlglot/pull/2820) by [@giorgosnikolaou](https://github.com/giorgosnikolaou))* - [`ea536c4`](https://github.com/tobymao/sqlglot/commit/ea536c4bd7bae0b2916d4bdf9a0ae6a7c5106135) - remove target alias in trino merge *(PR [#2852](https://github.com/tobymao/sqlglot/pull/2852) by [@eakmanrq](https://github.com/eakmanrq))* ### :bug: Bug Fixes - [`6ddbefc`](https://github.com/tobymao/sqlglot/commit/6ddbefcb08ef933454ff8501ac4a3ea4cba2fe60) - **snowflake**: apply range parser after colon, if any *(PR [#2800](https://github.com/tobymao/sqlglot/pull/2800) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2798](undefined) opened by [@mbarugelCA](https://github.com/mbarugelCA)* - [`0c4f44e`](https://github.com/tobymao/sqlglot/commit/0c4f44e8027b28613c72285313493c4683c65275) - **oracle**: regexp_replace replacement is optional closes [#2803](https://github.com/tobymao/sqlglot/pull/2803) *(commit by [@tobymao](https://github.com/tobymao))* - [`5072d5a`](https://github.com/tobymao/sqlglot/commit/5072d5af9a9f629e857071a66228317afd89b1a6) - **oracle**: improve parsing of JSON_OBJECT[AGG] functions *(PR [#2807](https://github.com/tobymao/sqlglot/pull/2807) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2806](undefined) opened by [@Shweta-BI-Lead](https://github.com/Shweta-BI-Lead)* - [`ea39f10`](https://github.com/tobymao/sqlglot/commit/ea39f10150916f2624cb6efcefb6752154c2f88c) - **optimizer**: pushdown predicates more conservatively to avoid DAG cycles *(PR [#2808](https://github.com/tobymao/sqlglot/pull/2808) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2797](undefined) opened by [@Konntroll](https://github.com/Konntroll)* - [`ea58003`](https://github.com/tobymao/sqlglot/commit/ea58003caeed17861085825c19a1a5823e065691) - **snowflake**: insert overwrite into closes [#2815](https://github.com/tobymao/sqlglot/pull/2815) *(commit by [@tobymao](https://github.com/tobymao))* - [`d5fa5be`](https://github.com/tobymao/sqlglot/commit/d5fa5be010a2656ead5524a0d756da6e25ab31dc) - **optimizer**: handle table alias columns for (UN)PIVOTs *(PR [#2816](https://github.com/tobymao/sqlglot/pull/2816) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2791](undefined) opened by [@billstark](https://github.com/billstark)* - [`24177f3`](https://github.com/tobymao/sqlglot/commit/24177f39b448490618d3b04f8b8ad75ec2069fd3) - unnest star expansion closes [#2811](https://github.com/tobymao/sqlglot/pull/2811) *(PR [#2818](https://github.com/tobymao/sqlglot/pull/2818) by [@tobymao](https://github.com/tobymao))* - [`3dba6c1`](https://github.com/tobymao/sqlglot/commit/3dba6c194e2fff90f93f0370255a94a1b2a2365a) - add parentheses to no_safe_divide args *(PR [#2826](https://github.com/tobymao/sqlglot/pull/2826) by [@j1ah0ng](https://github.com/j1ah0ng))* - [`ed68d6b`](https://github.com/tobymao/sqlglot/commit/ed68d6be51e8054eaf8b7bf64048f20c999c6cd2) - **clickhouse**: add ipv4/6 data type parser *(PR [#2829](https://github.com/tobymao/sqlglot/pull/2829) by [@pkit](https://github.com/pkit))* - [`70b280f`](https://github.com/tobymao/sqlglot/commit/70b280f24df0f4a0d4cc8262d72ce46412f76be3) - allow insert columns to have spaces *(commit by [@tobymao](https://github.com/tobymao))* - [`57917b8`](https://github.com/tobymao/sqlglot/commit/57917b88ddd9d7d85bc76fc8cb46ffcbf228453d) - **oracle**: TO_TIMESTAMP not parsed as StrToTime *(PR [#2833](https://github.com/tobymao/sqlglot/pull/2833) by [@pkit](https://github.com/pkit))* - :arrow_lower_right: *fixes issue [#2831](undefined) opened by [@Rhiyo](https://github.com/Rhiyo)* - [`9960e11`](https://github.com/tobymao/sqlglot/commit/9960e114818640f26aaa6d911ad3e7ee53df1842) - **optimizer**: annotate struct value without alias correctly *(PR [#2812](https://github.com/tobymao/sqlglot/pull/2812) by [@fool1280](https://github.com/fool1280))* - [`a6d396b`](https://github.com/tobymao/sqlglot/commit/a6d396b79cfa199a66b04af9ed62bcd7cd619096) - **doris**: add transformation of aggregation function and last_day function *(PR [#2835](https://github.com/tobymao/sqlglot/pull/2835) by [@echo-hhj](https://github.com/echo-hhj))* - [`8cc252b`](https://github.com/tobymao/sqlglot/commit/8cc252b61b418004bbd6380a8447cb383cf51282) - interval without unit alias closes [#2838](https://github.com/tobymao/sqlglot/pull/2838) *(commit by [@tobymao](https://github.com/tobymao))* - [`607817f`](https://github.com/tobymao/sqlglot/commit/607817f7e43edefe0a077bfeb81a77dd78e170e5) - schema with period name closes [#2842](https://github.com/tobymao/sqlglot/pull/2842) *(commit by [@tobymao](https://github.com/tobymao))* - [`1be93e4`](https://github.com/tobymao/sqlglot/commit/1be93e45d8347e5fa8a4e39dad625c6dd66ea461) - properly support all unix time scales *(commit by [@tobymao](https://github.com/tobymao))* - [`a657fc0`](https://github.com/tobymao/sqlglot/commit/a657fc0ea21aff7452f292fecfcb4bc08ca2e4e9) - **clickhouse,doris**: fix the transformation of ArraySum *(PR [#2843](https://github.com/tobymao/sqlglot/pull/2843) by [@echo-hhj](https://github.com/echo-hhj))* - [`c92888c`](https://github.com/tobymao/sqlglot/commit/c92888c6b49d2ba60ce789281535679fd93cd235) - **parser**: fix order of query modifier parsing for nested subqueries *(PR [#2851](https://github.com/tobymao/sqlglot/pull/2851) by [@georgesittas](https://github.com/georgesittas))* - [`7949a4f`](https://github.com/tobymao/sqlglot/commit/7949a4f295fc0a9f0becca9b6460f8517ec733f1) - **clickhouse**: ensure arraySum generation is preserved, add tests *(commit by [@georgesittas](https://github.com/georgesittas))* - [`17a6ac6`](https://github.com/tobymao/sqlglot/commit/17a6ac6f5fb96a42668842b093a823662b5850b8) - move comment in expr as alias next to the alias *(PR [#2853](https://github.com/tobymao/sqlglot/pull/2853) by [@georgesittas](https://github.com/georgesittas))* ### :recycle: Refactors - [`9e5ae50`](https://github.com/tobymao/sqlglot/commit/9e5ae50e02879cfb4915584df90f8dcfadbca321) - use flag instead of regex *(commit by [@tobymao](https://github.com/tobymao))* - [`5c13a1e`](https://github.com/tobymao/sqlglot/commit/5c13a1e8e2ede284d12920734cea8ff82ebaf054) - simplify merge without target transformation *(PR [#2854](https://github.com/tobymao/sqlglot/pull/2854) by [@georgesittas](https://github.com/georgesittas))* ### :wrench: Chores - [`5996a69`](https://github.com/tobymao/sqlglot/commit/5996a6949979dcfceee133f943a010ec4820e808) - **presto**: get rid of assert in ELEMENT_AT parser *(commit by [@georgesittas](https://github.com/georgesittas))* - [`f9a02ec`](https://github.com/tobymao/sqlglot/commit/f9a02ecc44f4d0139aae5edda922cc95d4a3e892) - fix table, column instantiation in schema normalization tests *(PR [#2817](https://github.com/tobymao/sqlglot/pull/2817) by [@georgesittas](https://github.com/georgesittas))* ## [v20.8.0] - 2024-01-08 ### :boom: BREAKING CHANGES - due to [`68e1214`](https://github.com/tobymao/sqlglot/commit/68e121462b2c3dc388f3ae0d1d392ee8afc63133) - column field typing *(commit by [@tobymao](https://github.com/tobymao))*: column field typing ### :sparkles: New Features - [`2d822f3`](https://github.com/tobymao/sqlglot/commit/2d822f3972bf0f77baaadb135a5e19c1bc0c4040) - improve support for Doris' TO_DATE, Oracle's SYSDATE *(PR [#2775](https://github.com/tobymao/sqlglot/pull/2775) by [@georgesittas](https://github.com/georgesittas))* - [`7187215`](https://github.com/tobymao/sqlglot/commit/71872159114b85324d08191b854ab8462a298742) - desc builder *(commit by [@tobymao](https://github.com/tobymao))* - [`ba62639`](https://github.com/tobymao/sqlglot/commit/ba62639aa6d81a062c867ebe20af64446b931b7d) - add support for CREATE FUNCTION (SQL) characteristics for MySQL and Databricks *(PR [#2777](https://github.com/tobymao/sqlglot/pull/2777) by [@viethungle-vt1401](https://github.com/viethungle-vt1401))* - :arrow_lower_right: *addresses issue [#1980](undefined) opened by [@xinglin-zhao](https://github.com/xinglin-zhao)* - [`963e2dc`](https://github.com/tobymao/sqlglot/commit/963e2dc9a4b699938d0477bc379e9d2da01818af) - **snowflake**: add support for SHOW COLUMNS *(PR [#2778](https://github.com/tobymao/sqlglot/pull/2778) by [@andrew-sha](https://github.com/andrew-sha))* - [`46c9733`](https://github.com/tobymao/sqlglot/commit/46c973309850d4e32b1a0f0594d7b143eb14d059) - **tsql**: round func closes [#2790](https://github.com/tobymao/sqlglot/pull/2790) *(commit by [@tobymao](https://github.com/tobymao))* - [`2dfb7e8`](https://github.com/tobymao/sqlglot/commit/2dfb7e806c52715d9e83d2201ed63974ff238ad3) - **optimizer**: add support for the UNPIVOT operator *(PR [#2771](https://github.com/tobymao/sqlglot/pull/2771) by [@georgesittas](https://github.com/georgesittas))* ### :bug: Bug Fixes - [`68e1214`](https://github.com/tobymao/sqlglot/commit/68e121462b2c3dc388f3ae0d1d392ee8afc63133) - column field typing *(commit by [@tobymao](https://github.com/tobymao))* - [`3f31706`](https://github.com/tobymao/sqlglot/commit/3f31706b913e53d13e45fe94b41ee115cc7bd5c5) - tsql exec command [#2772](https://github.com/tobymao/sqlglot/pull/2772) *(commit by [@tobymao](https://github.com/tobymao))* - [`f6cbadb`](https://github.com/tobymao/sqlglot/commit/f6cbadb7a293035720460f869dd1a6d48a707d04) - **snowflake**: add a couple of special fn types *(PR [#2774](https://github.com/tobymao/sqlglot/pull/2774) by [@georgesittas](https://github.com/georgesittas))* - [`0634f73`](https://github.com/tobymao/sqlglot/commit/0634f738d3a935cde3e7df1671c65e666c7a52b4) - **optimizer**: replace star with outer column list *(PR [#2776](https://github.com/tobymao/sqlglot/pull/2776) by [@georgesittas](https://github.com/georgesittas))* - [`d31ae0d`](https://github.com/tobymao/sqlglot/commit/d31ae0decb46678851744356c7b113f8c1c3e8c9) - allow string aliases closes [#2788](https://github.com/tobymao/sqlglot/pull/2788) *(commit by [@tobymao](https://github.com/tobymao))* - [`8f8f00e`](https://github.com/tobymao/sqlglot/commit/8f8f00ec66beb6dc3d90898ead29828eee8f5e32) - don't transform null ordering with positional orders closes [#2779](https://github.com/tobymao/sqlglot/pull/2779) *(commit by [@tobymao](https://github.com/tobymao))* - [`f85ce3b`](https://github.com/tobymao/sqlglot/commit/f85ce3b354366b2e206e6d2815f34a8e345d10ba) - **tsql**: gracefully handle complex formats in FORMAT *(PR [#2794](https://github.com/tobymao/sqlglot/pull/2794) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2787](undefined) opened by [@cpcloud](https://github.com/cpcloud)* - [`a2499f5`](https://github.com/tobymao/sqlglot/commit/a2499f591eeb7538db86abd8cc9341c8d91e325d) - **tsql**: generate correct TRIM syntax closes [#2786](https://github.com/tobymao/sqlglot/pull/2786) *(commit by [@georgesittas](https://github.com/georgesittas))* - [`59ecd2f`](https://github.com/tobymao/sqlglot/commit/59ecd2f17cac61b1ed7d206437d2fab4497e58fa) - **clickhouse**: allow transpilation of countIf, fix 2 arg variant parsing *(PR [#2795](https://github.com/tobymao/sqlglot/pull/2795) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2792](undefined) opened by [@cpcloud](https://github.com/cpcloud)* - [`3f7748b`](https://github.com/tobymao/sqlglot/commit/3f7748b08d3f6616d8ea7eac6ded4980b6507ddf) - don't transpile nulls last in window specs *(commit by [@tobymao](https://github.com/tobymao))* ## [v20.7.1] - 2024-01-04 ### :bug: Bug Fixes - [`9d94b9c`](https://github.com/tobymao/sqlglot/commit/9d94b9c77f5fbd22ad147789f23e971ec6bb3c72) - don't normalize schema if normalize is set to false *(PR [#2767](https://github.com/tobymao/sqlglot/pull/2767) by [@tobymao](https://github.com/tobymao))* ## [v20.6.0] - 2024-01-04 ### :boom: BREAKING CHANGES - due to [`4648c6a`](https://github.com/tobymao/sqlglot/commit/4648c6acbeab8d5155be3f8e53d11d8f00c33a2e) - set sample clause keyword(s) as class constant to enable transpilation *(PR [#2750](https://github.com/tobymao/sqlglot/pull/2750) by [@georgesittas](https://github.com/georgesittas))*: set sample clause keyword(s) as class constant to enable transpilation (#2750) - due to [`0b6bdc4`](https://github.com/tobymao/sqlglot/commit/0b6bdc4513aa417dae0a00565b56e78b544306e7) - improve transpilation of JSON value extraction *(PR [#2744](https://github.com/tobymao/sqlglot/pull/2744) by [@georgesittas](https://github.com/georgesittas))*: improve transpilation of JSON value extraction (#2744) - due to [`862b305`](https://github.com/tobymao/sqlglot/commit/862b305db47c0cea4a66b61211a3ed088e935ea5) - improve table sampling transpilation *(PR [#2761](https://github.com/tobymao/sqlglot/pull/2761) by [@georgesittas](https://github.com/georgesittas))*: improve table sampling transpilation (#2761) ### :sparkles: New Features - [`202f035`](https://github.com/tobymao/sqlglot/commit/202f035e88e78c15b34c9cc85c4396eab8da1d29) - clickhouse: add aggregate parsing *(PR [#2734](https://github.com/tobymao/sqlglot/pull/2734) by [@pkit](https://github.com/pkit))* - [`e772e26`](https://github.com/tobymao/sqlglot/commit/e772e262c551f6aa5bb726579253a55db5686da3) - guess the correct dialect in case we get an unknown one *(PR [#2753](https://github.com/tobymao/sqlglot/pull/2753) by [@georgesittas](https://github.com/georgesittas))* - [`7a07862`](https://github.com/tobymao/sqlglot/commit/7a07862dba744a969c98b2c1069531423673461d) - implement to_s method in Expression for verbose repr mode *(PR [#2756](https://github.com/tobymao/sqlglot/pull/2756) by [@georgesittas](https://github.com/georgesittas))* - [`4072184`](https://github.com/tobymao/sqlglot/commit/4072184cc498a509bceba2a3dfe12f43794273df) - improve transpilation of TIME/TIMESTAMP_FROM_PARTS *(PR [#2755](https://github.com/tobymao/sqlglot/pull/2755) by [@georgesittas](https://github.com/georgesittas))* - [`3bd811d`](https://github.com/tobymao/sqlglot/commit/3bd811d67e7001046812f2a8590bd07e23b81c88) - **optimizer**: allow star expansion to be turned off *(PR [#2762](https://github.com/tobymao/sqlglot/pull/2762) by [@georgesittas](https://github.com/georgesittas))* - [`c246285`](https://github.com/tobymao/sqlglot/commit/c24628531bbd587473e0a43ded7ba8b5e4f35cd8) - bigquery unix_date closes [#2758](https://github.com/tobymao/sqlglot/pull/2758) *(commit by [@tobymao](https://github.com/tobymao))* - [`a2abbc7`](https://github.com/tobymao/sqlglot/commit/a2abbc773fb330e669c81abc115a81e1055a060f) - improve transpilation of LAST_DAY *(PR [#2766](https://github.com/tobymao/sqlglot/pull/2766) by [@georgesittas](https://github.com/georgesittas))* - [`7e7ac65`](https://github.com/tobymao/sqlglot/commit/7e7ac65bb67ef5a45ef487859a9cff4f2d0fc07a) - **snowflake**: add support for OBJECT_CONSTRUCT_KEEP_NULL *(PR [#2769](https://github.com/tobymao/sqlglot/pull/2769) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2768](undefined) opened by [@tekumara](https://github.com/tekumara)* ### :bug: Bug Fixes - [`ed972f9`](https://github.com/tobymao/sqlglot/commit/ed972f9f50fcdc612b7770739a249a918c4d4011) - alter table rename should not qualify with db in postgres *(PR [#2736](https://github.com/tobymao/sqlglot/pull/2736) by [@z3z1ma](https://github.com/z3z1ma))* - [`1ebfb36`](https://github.com/tobymao/sqlglot/commit/1ebfb3688975e420a70bac10c49ad127446c4c65) - else interval *(commit by [@tobymao](https://github.com/tobymao))* - [`a43174f`](https://github.com/tobymao/sqlglot/commit/a43174f8ebdb4a51ad12d7dc9c332372a5a0bd84) - generate `CROSS JOIN` instead of comma in `explode_to_unnest` transformation *(PR [#2739](https://github.com/tobymao/sqlglot/pull/2739) by [@cpcloud](https://github.com/cpcloud))* - :arrow_lower_right: *fixes issue [#2735](undefined) opened by [@cpcloud](https://github.com/cpcloud)* - [`e543c55`](https://github.com/tobymao/sqlglot/commit/e543c558a3efea960028bf4c9864cad48e616e82) - interval is null *(commit by [@tobymao](https://github.com/tobymao))* - [`fb3188f`](https://github.com/tobymao/sqlglot/commit/fb3188f43bfdef2fb315b8b1280aaa207bd7888a) - lineage closes [#2742](https://github.com/tobymao/sqlglot/pull/2742) *(commit by [@tobymao](https://github.com/tobymao))* - [`b608b2d`](https://github.com/tobymao/sqlglot/commit/b608b2d944934d9da4a2cb373bdf69322f25041a) - **duckdb**: percentile_cont closes [#2741](https://github.com/tobymao/sqlglot/pull/2741) *(commit by [@tobymao](https://github.com/tobymao))* - [`0b6bdc4`](https://github.com/tobymao/sqlglot/commit/0b6bdc4513aa417dae0a00565b56e78b544306e7) - improve transpilation of JSON value extraction *(PR [#2744](https://github.com/tobymao/sqlglot/pull/2744) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2738](undefined) opened by [@tekumara](https://github.com/tekumara)* - [`33d6e5f`](https://github.com/tobymao/sqlglot/commit/33d6e5f2636451ffcdd914c100a446246d8031df) - **bigquery**: enable transpilation of single-argument TIME func *(PR [#2752](https://github.com/tobymao/sqlglot/pull/2752) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2749](undefined) opened by [@jherrmannNetfonds](https://github.com/jherrmannNetfonds)* - [`72f8cfa`](https://github.com/tobymao/sqlglot/commit/72f8cfa2c156efb586bc91d20efd8d0cf9c18735) - **snowflake**: parse two argument version of TIMESTAMP_FROM_PARTS *(PR [#2754](https://github.com/tobymao/sqlglot/pull/2754) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2751](undefined) opened by [@notgerry](https://github.com/notgerry)* - [`2a94f2b`](https://github.com/tobymao/sqlglot/commit/2a94f2ba0d93ee1454a2d19cc8577e211fd5cbe0) - **bigquery**: fix parsing of COUNTIF *(PR [#2765](https://github.com/tobymao/sqlglot/pull/2765) by [@giovannipcarvalho](https://github.com/giovannipcarvalho))* - :arrow_lower_right: *fixes issue [#2764](undefined) opened by [@giovannipcarvalho](https://github.com/giovannipcarvalho)* - [`862b305`](https://github.com/tobymao/sqlglot/commit/862b305db47c0cea4a66b61211a3ed088e935ea5) - improve table sampling transpilation *(PR [#2761](https://github.com/tobymao/sqlglot/pull/2761) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *fixes issue [#2757](undefined) opened by [@tekumara](https://github.com/tekumara)* ### :recycle: Refactors - [`4648c6a`](https://github.com/tobymao/sqlglot/commit/4648c6acbeab8d5155be3f8e53d11d8f00c33a2e) - set sample clause keyword(s) as class constant to enable transpilation *(PR [#2750](https://github.com/tobymao/sqlglot/pull/2750) by [@georgesittas](https://github.com/georgesittas))* - :arrow_lower_right: *addresses issue [#2747](undefined) opened by [@tekumara](https://github.com/tekumara)* [v11.6.3]: https://github.com/tobymao/sqlglot/compare/v11.6.2...v11.6.3 [v11.7.0]: https://github.com/tobymao/sqlglot/compare/v11.6.3...v11.7.0 [v12.0.0]: https://github.com/tobymao/sqlglot/compare/v11.7.1...v12.0.0 [v12.1.0]: https://github.com/tobymao/sqlglot/compare/v12.0.0...v12.1.0 [v12.2.0]: https://github.com/tobymao/sqlglot/compare/v12.1.0...v12.2.0 [v12.4.0]: https://github.com/tobymao/sqlglot/compare/v12.3.0...v12.4.0 [v13.0.0]: https://github.com/tobymao/sqlglot/compare/v12.4.0...v13.0.0 [v13.0.1]: https://github.com/tobymao/sqlglot/compare/v13.0.0...v13.0.1 [v13.0.2]: https://github.com/tobymao/sqlglot/compare/v13.0.1...v13.0.2 [v13.1.0]: https://github.com/tobymao/sqlglot/compare/v13.0.2...v13.1.0 [v13.2.0]: https://github.com/tobymao/sqlglot/compare/v13.1.0...v13.2.0 [v13.2.1]: https://github.com/tobymao/sqlglot/compare/v13.2.0...v13.2.1 [v13.2.2]: https://github.com/tobymao/sqlglot/compare/v13.2.1...v13.2.2 [v13.3.0]: https://github.com/tobymao/sqlglot/compare/v13.2.2...v13.3.0 [v13.3.1]: https://github.com/tobymao/sqlglot/compare/v13.3.0...v13.3.1 [v14.0.0]: https://github.com/tobymao/sqlglot/compare/v13.3.1...v14.0.0 [v14.1.0]: https://github.com/tobymao/sqlglot/compare/show...v14.1.0 [v14.1.1]: https://github.com/tobymao/sqlglot/compare/v14.1.0...v14.1.1 [v15.0.0]: https://github.com/tobymao/sqlglot/compare/v14.1.1...v15.0.0 [v15.1.0]: https://github.com/tobymao/sqlglot/compare/v15.0.0...v15.1.0 [v15.2.0]: https://github.com/tobymao/sqlglot/compare/v15.1.0...v15.2.0 [v15.3.0]: https://github.com/tobymao/sqlglot/compare/v15.2.0...v15.3.0 [v16.0.0]: https://github.com/tobymao/sqlglot/compare/v15.2.0...v16.0.0 [v16.1.0]: https://github.com/tobymao/sqlglot/compare/v16.0.0...v16.1.0 [v16.1.1]: https://github.com/tobymao/sqlglot/compare/v16.1.0...v16.1.1 [v16.1.3]: https://github.com/tobymao/sqlglot/compare/v16.1.2...v16.1.3 [v16.1.4]: https://github.com/tobymao/sqlglot/compare/v16.1.3...v16.1.4 [v16.2.0]: https://github.com/tobymao/sqlglot/compare/v16.1.4...v16.2.0 [v16.2.1]: https://github.com/tobymao/sqlglot/compare/v16.2.0...v16.2.1 [v16.3.0]: https://github.com/tobymao/sqlglot/compare/v16.2.1...v16.3.0 [v16.3.1]: https://github.com/tobymao/sqlglot/compare/v16.3.0...v16.3.1 [v16.4.0]: https://github.com/tobymao/sqlglot/compare/v16.3.1...v16.4.0 [v16.4.1]: https://github.com/tobymao/sqlglot/compare/v16.4.0...v16.4.1 [v16.4.2]: https://github.com/tobymao/sqlglot/compare/v16.4.1...v16.4.2 [v16.5.0]: https://github.com/tobymao/sqlglot/compare/v16.4.2...v16.5.0 [v16.6.0]: https://github.com/tobymao/sqlglot/compare/v16.5.0...v16.6.0 [v16.7.0]: https://github.com/tobymao/sqlglot/compare/v16.6.0...v16.7.0 [v16.7.1]: https://github.com/tobymao/sqlglot/compare/v16.7.0...v16.7.1 [v16.7.2]: https://github.com/tobymao/sqlglot/compare/v16.7.1...v16.7.2 [v16.7.4]: https://github.com/tobymao/sqlglot/compare/v16.7.3...v16.7.4 [v16.7.6]: https://github.com/tobymao/sqlglot/compare/v16.7.5...v16.7.6 [v16.7.7]: https://github.com/tobymao/sqlglot/compare/v16.7.6...v16.7.7 [v16.8.0]: https://github.com/tobymao/sqlglot/compare/v16.7.7...v16.8.0 [v16.8.1]: https://github.com/tobymao/sqlglot/compare/v16.8.0...v16.8.1 [v17.0.0]: https://github.com/tobymao/sqlglot/compare/v16.8.1...v17.0.0 [v17.1.0]: https://github.com/tobymao/sqlglot/compare/v17.0.0...v17.1.0 [v17.2.0]: https://github.com/tobymao/sqlglot/compare/v17.1.0...v17.2.0 [v17.3.0]: https://github.com/tobymao/sqlglot/compare/v17.2.0...v17.3.0 [v17.4.0]: https://github.com/tobymao/sqlglot/compare/v17.3.0...v17.4.0 [v17.4.1]: https://github.com/tobymao/sqlglot/compare/v17.4.0...v17.4.1 [v17.5.0]: https://github.com/tobymao/sqlglot/compare/v17.4.1...v17.5.0 [v17.6.0]: https://github.com/tobymao/sqlglot/compare/v17.5.0...v17.6.0 [v17.6.1]: https://github.com/tobymao/sqlglot/compare/v17.6.0...v17.6.1 [v17.7.0]: https://github.com/tobymao/sqlglot/compare/v17.6.1...v17.7.0 [v17.8.0]: https://github.com/tobymao/sqlglot/compare/v17.7.0...v17.8.0 [v17.8.1]: https://github.com/tobymao/sqlglot/compare/v17.8.0...v17.8.1 [v17.8.2]: https://github.com/tobymao/sqlglot/compare/v17.8.1...v17.8.2 [v17.8.3]: https://github.com/tobymao/sqlglot/compare/v17.8.2...v17.8.3 [v17.8.4]: https://github.com/tobymao/sqlglot/compare/v17.8.3...v17.8.4 [v17.8.5]: https://github.com/tobymao/sqlglot/compare/v17.8.4...v17.8.5 [v17.9.0]: https://github.com/tobymao/sqlglot/compare/v17.8.6...v17.9.0 [v17.9.1]: https://github.com/tobymao/sqlglot/compare/v17.9.0...v17.9.1 [v17.10.0]: https://github.com/tobymao/sqlglot/compare/v17.9.1...v17.10.0 [v17.10.1]: https://github.com/tobymao/sqlglot/compare/v17.10.0...v17.10.1 [v17.10.2]: https://github.com/tobymao/sqlglot/compare/v17.10.1...v17.10.2 [v17.12.0]: https://github.com/tobymao/sqlglot/compare/v17.11.0...v17.12.0 [v17.13.0]: https://github.com/tobymao/sqlglot/compare/v17.12.0...v17.13.0 [v17.14.0]: https://github.com/tobymao/sqlglot/compare/v17.13.0...v17.14.0 [v17.14.1]: https://github.com/tobymao/sqlglot/compare/v17.14.0...v17.14.1 [v17.14.2]: https://github.com/tobymao/sqlglot/compare/v17.14.1...v17.14.2 [v17.15.0]: https://github.com/tobymao/sqlglot/compare/v17.14.2...v17.15.0 [v17.15.1]: https://github.com/tobymao/sqlglot/compare/v17.15.0...v17.15.1 [v17.16.0]: https://github.com/tobymao/sqlglot/compare/v17.15.1...v17.16.0 [v17.16.1]: https://github.com/tobymao/sqlglot/compare/v17.16.0...v17.16.1 [v17.16.2]: https://github.com/tobymao/sqlglot/compare/v17.16.1...v17.16.2 [v18.0.0]: https://github.com/tobymao/sqlglot/compare/v17.16.2...v18.0.0 [v18.0.1]: https://github.com/tobymao/sqlglot/compare/v18.0.0...v18.0.1 [v18.1.0]: https://github.com/tobymao/sqlglot/compare/v18.0.1...v18.1.0 [v18.2.0]: https://github.com/tobymao/sqlglot/compare/v18.1.0...v18.2.0 [v18.3.0]: https://github.com/tobymao/sqlglot/compare/v18.2.0...v18.3.0 [v18.4.0]: https://github.com/tobymao/sqlglot/compare/v18.3.0...v18.4.0 [v18.4.1]: https://github.com/tobymao/sqlglot/compare/v18.4.0...v18.4.1 [v18.5.0]: https://github.com/tobymao/sqlglot/compare/v18.4.1...v18.5.0 [v18.5.1]: https://github.com/tobymao/sqlglot/compare/v18.5.0...v18.5.1 [v18.6.0]: https://github.com/tobymao/sqlglot/compare/v18.5.1...v18.6.0 [v18.7.0]: https://github.com/tobymao/sqlglot/compare/v18.6.0...v18.7.0 [v18.8.0]: https://github.com/tobymao/sqlglot/compare/v18.7.0...v18.8.0 [v18.9.0]: https://github.com/tobymao/sqlglot/compare/v18.8.0...v18.9.0 [v18.10.0]: https://github.com/tobymao/sqlglot/compare/v18.9.0...v18.10.0 [v18.10.1]: https://github.com/tobymao/sqlglot/compare/v18.10.0...v18.10.1 [v18.11.0]: https://github.com/tobymao/sqlglot/compare/v18.10.1...v18.11.0 [v18.11.1]: https://github.com/tobymao/sqlglot/compare/v18.11.0...v18.11.1 [v18.11.2]: https://github.com/tobymao/sqlglot/compare/v18.11.1...v18.11.2 [v18.11.3]: https://github.com/tobymao/sqlglot/compare/v18.11.2...v18.11.3 [v18.11.4]: https://github.com/tobymao/sqlglot/compare/v18.11.3...v18.11.4 [v18.11.5]: https://github.com/tobymao/sqlglot/compare/v18.11.4...v18.11.5 [v18.11.6]: https://github.com/tobymao/sqlglot/compare/v18.11.5...v18.11.6 [v18.12.0]: https://github.com/tobymao/sqlglot/compare/v18.11.6...v18.12.0 [v18.13.0]: https://github.com/tobymao/sqlglot/compare/v18.12.0...v18.13.0 [v18.14.0]: https://github.com/tobymao/sqlglot/compare/v18.13.0...v18.14.0 [v18.15.0]: https://github.com/tobymao/sqlglot/compare/v18.14.0...v18.15.0 [v18.15.1]: https://github.com/tobymao/sqlglot/compare/v18.15.0...v18.15.1 [v18.16.0]: https://github.com/tobymao/sqlglot/compare/v18.15.1...v18.16.0 [v18.16.1]: https://github.com/tobymao/sqlglot/compare/v18.16.0...v18.16.1 [v18.17.0]: https://github.com/tobymao/sqlglot/compare/v18.16.1...v18.17.0 [v19.0.0]: https://github.com/tobymao/sqlglot/compare/v18.17.0...v19.0.0 [v19.0.1]: https://github.com/tobymao/sqlglot/compare/v19.0.0...v19.0.1 [v19.0.2]: https://github.com/tobymao/sqlglot/compare/v19.0.1...v19.0.2 [v19.0.3]: https://github.com/tobymao/sqlglot/compare/v19.0.2...v19.0.3 [v19.1.0]: https://github.com/tobymao/sqlglot/compare/v19.0.3...v19.1.0 [v19.1.1]: https://github.com/tobymao/sqlglot/compare/v19.1.0...v19.1.1 [v19.1.2]: https://github.com/tobymao/sqlglot/compare/v19.1.1...v19.1.2 [v19.1.3]: https://github.com/tobymao/sqlglot/compare/v19.1.2...v19.1.3 [v19.2.0]: https://github.com/tobymao/sqlglot/compare/v19.1.3...v19.2.0 [v19.3.0]: https://github.com/tobymao/sqlglot/compare/v19.2.0...v19.3.0 [v19.3.1]: https://github.com/tobymao/sqlglot/compare/v19.3.0...v19.3.1 [v19.4.0]: https://github.com/tobymao/sqlglot/compare/v19.3.1...v19.4.0 [v19.5.0]: https://github.com/tobymao/sqlglot/compare/v19.4.0...v19.5.0 [v19.5.1]: https://github.com/tobymao/sqlglot/compare/v19.5.0...v19.5.1 [v19.6.0]: https://github.com/tobymao/sqlglot/compare/v19.5.1...v19.6.0 [v19.7.0]: https://github.com/tobymao/sqlglot/compare/v19.6.0...v19.7.0 [v19.8.0]: https://github.com/tobymao/sqlglot/compare/v19.7.0...v19.8.0 [v19.8.1]: https://github.com/tobymao/sqlglot/compare/v19.8.0...v19.8.1 [v19.8.2]: https://github.com/tobymao/sqlglot/compare/v19.8.1...v19.8.2 [v19.8.3]: https://github.com/tobymao/sqlglot/compare/v19.8.2...v19.8.3 [v19.9.0]: https://github.com/tobymao/sqlglot/compare/v19.8.3...v19.9.0 [v20.0.0]: https://github.com/tobymao/sqlglot/compare/v19.9.0...v20.0.0 [v20.1.0]: https://github.com/tobymao/sqlglot/compare/v20.0.0...v20.1.0 [v20.2.0]: https://github.com/tobymao/sqlglot/compare/v20.1.0...v20.2.0 [v20.5.0]: https://github.com/tobymao/sqlglot/compare/v20.4.0...v20.5.0 [v20.6.0]: https://github.com/tobymao/sqlglot/compare/v20.5.0...v20.6.0 [v20.7.1]: https://github.com/tobymao/sqlglot/compare/v20.6.0...v20.7.1 [v20.8.0]: https://github.com/tobymao/sqlglot/compare/v20.7.1...v20.8.0 [v20.9.0]: https://github.com/tobymao/sqlglot/compare/v20.8.0...v20.9.0 [v20.10.0]: https://github.com/tobymao/sqlglot/compare/v20.9.0...v20.10.0 [v20.11.0]: https://github.com/tobymao/sqlglot/compare/v20.10.0...v20.11.0 [v21.0.0]: https://github.com/tobymao/sqlglot/compare/v20.11.0...v21.0.0 [v21.0.1]: https://github.com/tobymao/sqlglot/compare/v21.0.0...v21.0.1 [v21.0.2]: https://github.com/tobymao/sqlglot/compare/v21.0.1...v21.0.2 [v21.1.0]: https://github.com/tobymao/sqlglot/compare/v21.0.2...v21.1.0 [v21.1.1]: https://github.com/tobymao/sqlglot/compare/v21.1.0...v21.1.1 [v21.1.2]: https://github.com/tobymao/sqlglot/compare/v21.1.1...v21.1.2 [v21.2.0]: https://github.com/tobymao/sqlglot/compare/v21.1.2...v21.2.0 [v21.2.1]: https://github.com/tobymao/sqlglot/compare/v21.2.0...v21.2.1 [v22.0.0]: https://github.com/tobymao/sqlglot/compare/v21.2.1...v22.0.0 [v22.0.1]: https://github.com/tobymao/sqlglot/compare/v22.0.0...v22.0.1 [v22.0.2]: https://github.com/tobymao/sqlglot/compare/v22.0.1...v22.0.2 [v22.1.0]: https://github.com/tobymao/sqlglot/compare/v22.0.2...v22.1.0 [v22.1.1]: https://github.com/tobymao/sqlglot/compare/v22.1.0...v22.1.1 [v22.2.0]: https://github.com/tobymao/sqlglot/compare/v22.1.1...v22.2.0 [v22.2.1]: https://github.com/tobymao/sqlglot/compare/v22.2.0...v22.2.1 [v22.3.0]: https://github.com/tobymao/sqlglot/compare/v22.2.1...v22.3.0 [v22.3.1]: https://github.com/tobymao/sqlglot/compare/v22.3.0...v22.3.1 [v22.4.0]: https://github.com/tobymao/sqlglot/compare/v22.3.1...v22.4.0 [v22.5.0]: https://github.com/tobymao/sqlglot/compare/v22.4.0...v22.5.0 [v23.0.0]: https://github.com/tobymao/sqlglot/compare/v22.5.0...v23.0.0 [v23.0.1]: https://github.com/tobymao/sqlglot/compare/v23.0.0...v23.0.1 [v23.0.2]: https://github.com/tobymao/sqlglot/compare/v23.0.1...v23.0.2 [v23.0.3]: https://github.com/tobymao/sqlglot/compare/v23.0.2...v23.0.3 [v23.0.4]: https://github.com/tobymao/sqlglot/compare/v23.0.3...v23.0.4 [v23.0.5]: https://github.com/tobymao/sqlglot/compare/v23.0.4...v23.0.5 [v23.1.0]: https://github.com/tobymao/sqlglot/compare/v23.0.5...v23.1.0 [v23.3.0]: https://github.com/tobymao/sqlglot/compare/v23.2.0...v23.3.0 [v23.4.0]: https://github.com/tobymao/sqlglot/compare/v23.3.0...v23.4.0 [v23.6.0]: https://github.com/tobymao/sqlglot/compare/v23.5.0...v23.6.0 [v23.6.4]: https://github.com/tobymao/sqlglot/compare/v23.6.3...v23.6.4 [v23.7.0]: https://github.com/tobymao/sqlglot/compare/v23.6.4...v23.7.0 [v23.8.0]: https://github.com/tobymao/sqlglot/compare/v23.7.0...v23.8.0 [v23.8.1]: https://github.com/tobymao/sqlglot/compare/v23.8.0...v23.8.1 [v23.8.2]: https://github.com/tobymao/sqlglot/compare/v23.8.1...v23.8.2 [v23.9.0]: https://github.com/tobymao/sqlglot/compare/v23.8.2...v23.9.0 [v23.10.0]: https://github.com/tobymao/sqlglot/compare/v23.9.0...v23.10.0 [v23.11.0]: https://github.com/tobymao/sqlglot/compare/v23.10.0...v23.11.0 [v23.11.1]: https://github.com/tobymao/sqlglot/compare/v23.11.0...v23.11.1 [v23.11.2]: https://github.com/tobymao/sqlglot/compare/v23.11.1...v23.11.2 [v23.12.0]: https://github.com/tobymao/sqlglot/compare/v23.11.2...v23.12.0 [v23.12.1]: https://github.com/tobymao/sqlglot/compare/v23.12.0...v23.12.1 [v23.12.2]: https://github.com/tobymao/sqlglot/compare/v23.12.1...v23.12.2 [v23.13.0]: https://github.com/tobymao/sqlglot/compare/v23.12.2...v23.13.0 [v23.13.1]: https://github.com/tobymao/sqlglot/compare/v23.13.0...v23.13.1 [v23.13.2]: https://github.com/tobymao/sqlglot/compare/v23.13.1...v23.13.2 [v23.13.3]: https://github.com/tobymao/sqlglot/compare/v23.13.2...v23.13.3 [v23.13.4]: https://github.com/tobymao/sqlglot/compare/v23.13.3...v23.13.4 [v23.13.5]: https://github.com/tobymao/sqlglot/compare/v23.13.4...v23.13.5 [v23.13.6]: https://github.com/tobymao/sqlglot/compare/v23.13.5...v23.13.6 [v23.13.7]: https://github.com/tobymao/sqlglot/compare/v23.13.6...v23.13.7 [v23.14.0]: https://github.com/tobymao/sqlglot/compare/v23.13.7...v23.14.0 [v23.15.0]: https://github.com/tobymao/sqlglot/compare/v23.14.0...v23.15.0 [v23.15.1]: https://github.com/tobymao/sqlglot/compare/v23.15.0...v23.15.1 [v23.15.2]: https://github.com/tobymao/sqlglot/compare/v23.15.1...v23.15.2 [v23.15.3]: https://github.com/tobymao/sqlglot/compare/v23.15.2...v23.15.3 [v23.15.6]: https://github.com/tobymao/sqlglot/compare/v23.15.5...v23.15.6 [v23.15.7]: https://github.com/tobymao/sqlglot/compare/v23.15.6...v23.15.7 [v23.15.8]: https://github.com/tobymao/sqlglot/compare/v23.15.7...v23.15.8 [v23.15.9]: https://github.com/tobymao/sqlglot/compare/v23.15.8...v23.15.9 [v23.15.10]: https://github.com/tobymao/sqlglot/compare/v23.15.9...v23.15.10 [v23.16.0]: https://github.com/tobymao/sqlglot/compare/v23.15.10...v23.16.0 [v23.17.0]: https://github.com/tobymao/sqlglot/compare/v23.16.0...v23.17.0 [v24.0.0]: https://github.com/tobymao/sqlglot/compare/v23.17.0...v24.0.0 [v24.0.1]: https://github.com/tobymao/sqlglot/compare/v24.0.0...v24.0.1 [v24.0.2]: https://github.com/tobymao/sqlglot/compare/v24.0.1...v24.0.2 [v24.0.3]: https://github.com/tobymao/sqlglot/compare/v24.0.2...v24.0.3 [v24.1.0]: https://github.com/tobymao/sqlglot/compare/v24.0.3...v24.1.0 [v24.1.1]: https://github.com/tobymao/sqlglot/compare/v24.1.0...v24.1.1 [v24.1.2]: https://github.com/tobymao/sqlglot/compare/v24.1.1...v24.1.2 [v25.0.0]: https://github.com/tobymao/sqlglot/compare/v24.1.2...v25.0.0 [v25.0.2]: https://github.com/tobymao/sqlglot/compare/v25.0.1...v25.0.2 [v25.0.3]: https://github.com/tobymao/sqlglot/compare/v25.0.2...v25.0.3 [v25.1.0]: https://github.com/tobymao/sqlglot/compare/v25.0.3...v25.1.0 [v25.2.0]: https://github.com/tobymao/sqlglot/compare/v25.1.0...v25.2.0 [v25.3.0]: https://github.com/tobymao/sqlglot/compare/v25.2.0...v25.3.0 [v25.3.1]: https://github.com/tobymao/sqlglot/compare/v25.3.0...v25.3.1 [v25.3.2]: https://github.com/tobymao/sqlglot/compare/v25.3.1...v25.3.2 [v25.3.3]: https://github.com/tobymao/sqlglot/compare/v25.3.2...v25.3.3 [v25.4.0]: https://github.com/tobymao/sqlglot/compare/v25.3.3...v25.4.0 [v25.4.1]: https://github.com/tobymao/sqlglot/compare/v25.4.0...v25.4.1 [v25.5.0]: https://github.com/tobymao/sqlglot/compare/v25.4.1...v25.5.0 [v25.5.1]: https://github.com/tobymao/sqlglot/compare/v25.5.0...v25.5.1 [v25.6.0]: https://github.com/tobymao/sqlglot/compare/v25.5.1...v25.6.0 [v25.6.1]: https://github.com/tobymao/sqlglot/compare/v25.6.0...v25.6.1 [v25.7.0]: https://github.com/tobymao/sqlglot/compare/v25.6.1...v25.7.0 [v25.7.1]: https://github.com/tobymao/sqlglot/compare/v25.7.0...v25.7.1 [v25.8.0]: https://github.com/tobymao/sqlglot/compare/v25.7.1...v25.8.0 [v25.8.1]: https://github.com/tobymao/sqlglot/compare/v25.8.0...v25.8.1 [v25.9.0]: https://github.com/tobymao/sqlglot/compare/v25.8.1...v25.9.0 [v25.10.0]: https://github.com/tobymao/sqlglot/compare/v25.9.0...v25.10.0 [v25.11.0]: https://github.com/tobymao/sqlglot/compare/v25.10.0...v25.11.0 [v25.11.1]: https://github.com/tobymao/sqlglot/compare/v25.11.0...v25.11.1 [v25.11.2]: https://github.com/tobymao/sqlglot/compare/v25.11.1...v25.11.2 [v25.11.3]: https://github.com/tobymao/sqlglot/compare/v25.11.2...v25.11.3 [v25.12.0]: https://github.com/tobymao/sqlglot/compare/v25.11.3...v25.12.0 [v25.13.0]: https://github.com/tobymao/sqlglot/compare/v25.12.0...v25.13.0 [v25.14.0]: https://github.com/tobymao/sqlglot/compare/v25.13.0...v25.14.0 [v25.15.0]: https://github.com/tobymao/sqlglot/compare/v25.14.0...v25.15.0 [v25.16.0]: https://github.com/tobymao/sqlglot/compare/v25.15.0...v25.16.0 [v25.16.1]: https://github.com/tobymao/sqlglot/compare/v25.16.0...v25.16.1 [v25.17.0]: https://github.com/tobymao/sqlglot/compare/v25.16.1...v25.17.0 [v25.18.0]: https://github.com/tobymao/sqlglot/compare/v25.17.0...v25.18.0 [v25.19.0]: https://github.com/tobymao/sqlglot/compare/v25.18.0...v25.19.0 [v25.20.0]: https://github.com/tobymao/sqlglot/compare/v25.19.0...v25.20.0 [v25.20.1]: https://github.com/tobymao/sqlglot/compare/v25.20.0...v25.20.1 [v25.21.0]: https://github.com/tobymao/sqlglot/compare/v25.20.1...v25.21.0 [v25.21.1]: https://github.com/tobymao/sqlglot/compare/v25.21.0...v25.21.1 [v25.21.2]: https://github.com/tobymao/sqlglot/compare/v25.21.1...v25.21.2 [v25.21.3]: https://github.com/tobymao/sqlglot/compare/v25.21.2...v25.21.3 [v25.22.0]: https://github.com/tobymao/sqlglot/compare/v25.21.3...v25.22.0 [v25.23.0]: https://github.com/tobymao/sqlglot/compare/v25.20.2...v25.23.0 [v25.23.1]: https://github.com/tobymao/sqlglot/compare/v25.23.0...v25.23.1 [v25.23.2]: https://github.com/tobymao/sqlglot/compare/v25.23.1...v25.23.2 [v25.24.0]: https://github.com/tobymao/sqlglot/compare/v25.23.2...v25.24.0 [v25.24.1]: https://github.com/tobymao/sqlglot/compare/v25.24.0...v25.24.1 [v25.24.2]: https://github.com/tobymao/sqlglot/compare/v25.24.1...v25.24.2 [v25.24.3]: https://github.com/tobymao/sqlglot/compare/v25.24.2...v25.24.3 [v25.24.4]: https://github.com/tobymao/sqlglot/compare/v25.24.3...v25.24.4 [v25.24.5]: https://github.com/tobymao/sqlglot/compare/v25.24.4...v25.24.5 [v25.25.0]: https://github.com/tobymao/sqlglot/compare/v25.24.5...v25.25.0 [v25.25.1]: https://github.com/tobymao/sqlglot/compare/v25.25.0...v25.25.1 [v25.26.0]: https://github.com/tobymao/sqlglot/compare/v25.25.1...v25.26.0 [v25.27.0]: https://github.com/tobymao/sqlglot/compare/v25.26.0...v25.27.0 [v25.28.0]: https://github.com/tobymao/sqlglot/compare/v25.27.0...v25.28.0 [v25.29.0]: https://github.com/tobymao/sqlglot/compare/v25.28.0...v25.29.0 [v25.30.0]: https://github.com/tobymao/sqlglot/compare/v25.29.0...v25.30.0 [v25.31.0]: https://github.com/tobymao/sqlglot/compare/v25.30.0...v25.31.0 [v25.31.1]: https://github.com/tobymao/sqlglot/compare/v25.31.0...v25.31.1 [v25.31.2]: https://github.com/tobymao/sqlglot/compare/v25.31.1...v25.31.2 [v25.31.3]: https://github.com/tobymao/sqlglot/compare/v25.31.2...v25.31.3 [v25.31.4]: https://github.com/tobymao/sqlglot/compare/v25.31.3...v25.31.4 [v25.32.0]: https://github.com/tobymao/sqlglot/compare/v25.31.4...v25.32.0 [v25.32.1]: https://github.com/tobymao/sqlglot/compare/v25.32.0...v25.32.1 [v25.33.0]: https://github.com/tobymao/sqlglot/compare/v25.32.1...v25.33.0 [v25.34.0]: https://github.com/tobymao/sqlglot/compare/v25.33.0...v25.34.0 [v25.34.1]: https://github.com/tobymao/sqlglot/compare/v25.34.0...v25.34.1 [v26.0.0]: https://github.com/tobymao/sqlglot/compare/v25.34.1...v26.0.0 [v26.0.1]: https://github.com/tobymao/sqlglot/compare/v26.0.0...v26.0.1 [v26.1.0]: https://github.com/tobymao/sqlglot/compare/v26.0.1...v26.1.0 [v26.1.1]: https://github.com/tobymao/sqlglot/compare/v26.1.0...v26.1.1 [v26.1.2]: https://github.com/tobymao/sqlglot/compare/v26.1.1...v26.1.2 [v26.1.3]: https://github.com/tobymao/sqlglot/compare/v26.1.2...v26.1.3 [v26.2.0]: https://github.com/tobymao/sqlglot/compare/v26.1.3...v26.2.0 [v26.2.1]: https://github.com/tobymao/sqlglot/compare/v26.2.0...v26.2.1 [v26.3.0]: https://github.com/tobymao/sqlglot/compare/v26.2.1...v26.3.0 [v26.3.1]: https://github.com/tobymao/sqlglot/compare/v26.3.0...v26.3.1 [v26.3.2]: https://github.com/tobymao/sqlglot/compare/v26.3.1...v26.3.2 [v26.3.3]: https://github.com/tobymao/sqlglot/compare/v26.3.2...v26.3.3 [v26.3.4]: https://github.com/tobymao/sqlglot/compare/v26.3.3...v26.3.4 [v26.3.5]: https://github.com/tobymao/sqlglot/compare/v26.3.4...v26.3.5 [v26.3.6]: https://github.com/tobymao/sqlglot/compare/v26.3.5...v26.3.6 [v26.3.7]: https://github.com/tobymao/sqlglot/compare/v26.3.6...v26.3.7 [v26.3.8]: https://github.com/tobymao/sqlglot/compare/v26.3.7...v26.3.8 [v26.3.9]: https://github.com/tobymao/sqlglot/compare/v26.3.8...v26.3.9 [v26.4.0]: https://github.com/tobymao/sqlglot/compare/v26.3.9...v26.4.0 [v26.4.1]: https://github.com/tobymao/sqlglot/compare/v26.4.0...v26.4.1 [v26.5.0]: https://github.com/tobymao/sqlglot/compare/v26.4.1...v26.5.0 [v26.6.0]: https://github.com/tobymao/sqlglot/compare/v26.5.0...v26.6.0 [v26.7.0]: https://github.com/tobymao/sqlglot/compare/v26.6.0...v26.7.0 [v26.8.0]: https://github.com/tobymao/sqlglot/compare/v26.7.0...v26.8.0 [v26.9.0]: https://github.com/tobymao/sqlglot/compare/v26.8.0...v26.9.0 [v26.10.0]: https://github.com/tobymao/sqlglot/compare/v26.9.0...v26.10.0 [v26.10.1]: https://github.com/tobymao/sqlglot/compare/v26.10.0...v26.10.1 [v26.11.0]: https://github.com/tobymao/sqlglot/compare/v26.10.1...v26.11.0 [v26.11.1]: https://github.com/tobymao/sqlglot/compare/v26.11.0...v26.11.1 [v26.12.0]: https://github.com/tobymao/sqlglot/compare/v26.11.1...v26.12.0 [v26.13.0]: https://github.com/tobymao/sqlglot/compare/v26.12.1...v26.13.0 [v26.13.1]: https://github.com/tobymao/sqlglot/compare/v26.13.0...v26.13.1 [v26.13.2]: https://github.com/tobymao/sqlglot/compare/v26.13.1...v26.13.2 [v26.14.0]: https://github.com/tobymao/sqlglot/compare/v26.13.2...v26.14.0 [v26.15.0]: https://github.com/tobymao/sqlglot/compare/v26.14.0...v26.15.0 [v26.16.0]: https://github.com/tobymao/sqlglot/compare/v26.15.0...v26.16.0 [v26.16.1]: https://github.com/tobymao/sqlglot/compare/v26.16.0...v26.16.1 [v26.16.2]: https://github.com/tobymao/sqlglot/compare/v26.16.1...v26.16.2 [v26.16.3]: https://github.com/tobymao/sqlglot/compare/v26.16.2...v26.16.3 [v26.16.4]: https://github.com/tobymao/sqlglot/compare/v26.16.3...v26.16.4 [v26.18.0]: https://github.com/tobymao/sqlglot/compare/v26.12.3...v26.18.0 [v26.18.1]: https://github.com/tobymao/sqlglot/compare/v26.18.0...v26.18.1 [v26.19.0]: https://github.com/tobymao/sqlglot/compare/v26.18.1...v26.19.0 [v26.20.0]: https://github.com/tobymao/sqlglot/compare/v26.19.0...v26.20.0 [v26.21.0]: https://github.com/tobymao/sqlglot/compare/v26.20.0...v26.21.0 [v26.22.0]: https://github.com/tobymao/sqlglot/compare/v26.21.0...v26.22.0 [v26.22.1]: https://github.com/tobymao/sqlglot/compare/v26.22.0...v26.22.1 [v26.23.0]: https://github.com/tobymao/sqlglot/compare/v26.22.1...v26.23.0 [v26.24.0]: https://github.com/tobymao/sqlglot/compare/v26.23.0...v26.24.0 [v26.25.0]: https://github.com/tobymao/sqlglot/compare/v26.24.0...v26.25.0 [v26.25.1]: https://github.com/tobymao/sqlglot/compare/v26.25.0...v26.25.1 [v26.25.2]: https://github.com/tobymao/sqlglot/compare/v26.25.1...v26.25.2 [v26.25.3]: https://github.com/tobymao/sqlglot/compare/v26.25.2...v26.25.3 [v26.26.0]: https://github.com/tobymao/sqlglot/compare/v26.25.3...v26.26.0 [v26.27.0]: https://github.com/tobymao/sqlglot/compare/v26.26.0...v26.27.0 [v26.28.1]: https://github.com/tobymao/sqlglot/compare/v26.27.1...v26.28.1 [v26.29.0]: https://github.com/tobymao/sqlglot/compare/v26.28.1...v26.29.0 [v26.30.0]: https://github.com/tobymao/sqlglot/compare/v26.29.0...v26.30.0 [v26.31.0]: https://github.com/tobymao/sqlglot/compare/v26.21.2...v26.31.0 [v26.33.0]: https://github.com/tobymao/sqlglot/compare/v26.32.0...v26.33.0 [v27.0.0]: https://github.com/tobymao/sqlglot/compare/v26.21.3...v27.0.0 [v27.1.0]: https://github.com/tobymao/sqlglot/compare/v26.31.2...v27.1.0 [v27.2.0]: https://github.com/tobymao/sqlglot/compare/v27.1.0...v27.2.0 [v27.3.0]: https://github.com/tobymao/sqlglot/compare/v27.0.1...v27.3.0 [v27.3.1]: https://github.com/tobymao/sqlglot/compare/v27.3.0...v27.3.1 [v27.4.0]: https://github.com/tobymao/sqlglot/compare/v27.3.1...v27.4.0 [v27.4.1]: https://github.com/tobymao/sqlglot/compare/v27.4.0...v27.4.1 [v27.5.0]: https://github.com/tobymao/sqlglot/compare/v27.4.1...v27.5.0 [v27.5.1]: https://github.com/tobymao/sqlglot/compare/v27.5.0...v27.5.1 [v27.6.0]: https://github.com/tobymao/sqlglot/compare/v27.5.1...v27.6.0 [v27.7.0]: https://github.com/tobymao/sqlglot/compare/v27.6.0...v27.7.0 [v27.8.0]: https://github.com/tobymao/sqlglot/compare/v27.7.0...v27.8.0 [v27.9.0]: https://github.com/tobymao/sqlglot/compare/v27.8.0...v27.9.0 [v27.10.0]: https://github.com/tobymao/sqlglot/compare/v27.9.0...v27.10.0 [v27.11.0]: https://github.com/tobymao/sqlglot/compare/v27.10.0...v27.11.0 [v27.12.0]: https://github.com/tobymao/sqlglot/compare/v27.11.0...v27.12.0 [v27.13.0]: https://github.com/tobymao/sqlglot/compare/v27.12.0...v27.13.0 [v27.13.1]: https://github.com/tobymao/sqlglot/compare/v27.13.0...v27.13.1 [v27.13.2]: https://github.com/tobymao/sqlglot/compare/v27.13.1...v27.13.2 [v27.14.0]: https://github.com/tobymao/sqlglot/compare/v27.13.2...v27.14.0 [v27.15.0]: https://github.com/tobymao/sqlglot/compare/v27.14.0...v27.15.0 [v27.15.1]: https://github.com/tobymao/sqlglot/compare/v27.15.0...v27.15.1 [v27.15.2]: https://github.com/tobymao/sqlglot/compare/v27.15.1...v27.15.2 [v27.15.3]: https://github.com/tobymao/sqlglot/compare/v27.15.2...v27.15.3 [v27.16.0]: https://github.com/tobymao/sqlglot/compare/v27.15.3...v27.16.0 [v27.16.1]: https://github.com/tobymao/sqlglot/compare/v27.16.0...v27.16.1 [v27.16.2]: https://github.com/tobymao/sqlglot/compare/v27.16.1...v27.16.2 [v27.16.3]: https://github.com/tobymao/sqlglot/compare/v27.16.2...v27.16.3 [v27.17.0]: https://github.com/tobymao/sqlglot/compare/v27.16.3...v27.17.0 [v27.18.0]: https://github.com/tobymao/sqlglot/compare/v27.17.0...v27.18.0 [v27.19.0]: https://github.com/tobymao/sqlglot/compare/v27.18.0...v27.19.0 [v27.20.0]: https://github.com/tobymao/sqlglot/compare/v27.19.0...v27.20.0 [v27.21.0]: https://github.com/tobymao/sqlglot/compare/v27.20.0...v27.21.0 [v27.22.0]: https://github.com/tobymao/sqlglot/compare/v27.21.0...v27.22.0 [v27.22.1]: https://github.com/tobymao/sqlglot/compare/v27.22.0...v27.22.1 [v27.22.2]: https://github.com/tobymao/sqlglot/compare/v27.22.1...v27.22.2 [v27.25.0]: https://github.com/tobymao/sqlglot/compare/v27.24.2...v27.25.0 [v27.26.0]: https://github.com/tobymao/sqlglot/compare/v27.25.2...v27.26.0 [v27.27.0]: https://github.com/tobymao/sqlglot/compare/v27.26.0...v27.27.0 [v27.28.0]: https://github.com/tobymao/sqlglot/compare/v27.27.0...v27.28.0 [v27.29.0]: https://github.com/tobymao/sqlglot/compare/v27.28.1...v27.29.0 [v28.0.0]: https://github.com/tobymao/sqlglot/compare/v27.29.0...v28.0.0 [v28.1.0]: https://github.com/tobymao/sqlglot/compare/v28.0.0...v28.1.0 [v28.2.0]: https://github.com/tobymao/sqlglot/compare/v28.1.0...v28.2.0 [v28.3.0]: https://github.com/tobymao/sqlglot/compare/v28.2.0...v28.3.0 [v28.4.0]: https://github.com/tobymao/sqlglot/compare/v27.6.1...v28.4.0 [v28.4.1]: https://github.com/tobymao/sqlglot/compare/v28.4.0...v28.4.1 [v28.5.0]: https://github.com/tobymao/sqlglot/compare/v28.4.1...v28.5.0 [v28.6.0]: https://github.com/tobymao/sqlglot/compare/v28.5.0...v28.6.0 [v28.7.0]: https://github.com/tobymao/sqlglot/compare/v28.6.0...v28.7.0 [v28.8.0]: https://github.com/tobymao/sqlglot/compare/v28.7.0...v28.8.0 [v28.9.0]: https://github.com/tobymao/sqlglot/compare/v28.8.0...v28.9.0 [v28.10.0]: https://github.com/tobymao/sqlglot/compare/v28.9.0...v28.10.0 [v29.0.0]: https://github.com/tobymao/sqlglot/compare/v28.10.1...v29.0.0 [v29.0.1]: https://github.com/tobymao/sqlglot/compare/v29.0.0...v29.0.1 [v30.0.0]: https://github.com/tobymao/sqlglot/compare/v29.0.1...v30.0.0 [v30.0.1]: https://github.com/tobymao/sqlglot/compare/v30.0.0...v30.0.1 [v30.0.2]: https://github.com/tobymao/sqlglot/compare/v30.0.1...v30.0.2 [v30.0.3]: https://github.com/tobymao/sqlglot/compare/v30.0.2...v30.0.3 ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to [SQLGlot](https://github.com/tobymao/sqlglot/blob/main/README.md) SQLGLot is open source software. We value feedback and we want to make contributing to this project as easy and transparent as possible, whether it's: - Reporting a bug - Discussing the current state of the code - Submitting a fix - Proposing new features ## We develop with GitHub We use GitHub to host code, to track issues and feature requests, as well as accept pull requests. ## Finding tasks to work on When the core SQLGlot team does not plan to work on an issue, it is usually closed as "not planned". This may happen when a request is exceptionally difficult to address, or because the team deems that it shouldn't be prioritized. These issues can be a good starting point when looking for tasks to work on. Simply filter the issue list to fetch the closed issues and then search for those marked as "not planned". If the scope of an issue is not clear or you need guidance, feel free to ask for clarifications. Before taking on a task, consider studying the [AST primer](https://github.com/tobymao/sqlglot/blob/main/posts/ast_primer.md) and the [onboarding document](https://github.com/tobymao/sqlglot/blob/main/posts/onboarding.md). ## Submitting code changes Pull requests are the best way to propose changes to the codebase, and we actively welcome them. Pull requests should be small and they need to follow the conventions of the project. For features that require many changes, please reach out to us on [Slack](https://tobikodata.com/slack) before making a request, in order to share any relevant context and increase its chances of getting merged. 1. Fork the repo and create your branch from `main` 2. If you've added code with non-trivial changes, add tests 3. If you've changed APIs, update the documentation (docstrings) 4. Ensure the test suite & linter [checks](https://github.com/tobymao/sqlglot/blob/main/README.md#run-tests-and-lint) pass 5. Issue that pull request and wait for it to be reviewed by a maintainer or contributor Note: make sure to follow the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) guidelines when creating a PR. ## IMPORTANT: Keep PRs minimal in scope Each pull request should focus on a single, well-defined change. Avoid bundling multiple unrelated fixes or features in one PR. This makes code review faster and more effective, increases the likelihood of acceptance, and helps maintain a clean git history. ## Report bugs using GitHub's [issues](https://github.com/tobymao/sqlglot/issues) We use GitHub issues to track public bugs. Report a bug by opening a new issue. **Great Bug Reports** tend to have: - A quick summary and/or background - Steps to reproduce - Be specific - Give sample code if you can - What you expected would happen - What actually happens - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) - References (e.g. documentation pages related to the issue) ## Start a discussion using GitHub's [discussions](https://github.com/tobymao/sqlglot/discussions) [We use GitHub discussions](https://github.com/tobymao/sqlglot/discussions/190) to discuss about the current state of the code. If you want to propose a new feature, this is the right place to do it. Just start a discussion, and let us know why you think this feature would be a good addition to SQLGlot (by possibly including some usage examples). ## [License](https://github.com/tobymao/sqlglot/blob/main/LICENSE) By contributing, you agree that your contributions will be licensed under its MIT License. ## References This document was adapted from [briandk's template](https://gist.github.com/briandk/3d2e8b3ec8daf5a27a62). ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2026 Toby Mao Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MANIFEST.in ================================================ prune docs/ prune posts/ prune benchmarks/ prune .github/ prune pdoc/ ================================================ FILE: Makefile ================================================ .PHONY: install install-dev install-devc install-devc-release install-pre-commit bench bench-parse bench-optimize test test-fast unit testc unitc style check docs docs-serve hidec showc clean resolve-integration-conflicts update-fixtures ifdef UV PIP := uv pip else PIP := pip endif SO_BACKUP := /tmp/sqlglot_so_backup FIND_SO := find sqlglot -name "*.so" hidec: rm -rf $(SO_BACKUP) && $(FIND_SO) | tar cf $(SO_BACKUP) -T - 2>/dev/null && $(FIND_SO) -delete; true showc: tar xf $(SO_BACKUP) 2>/dev/null; rm -f $(SO_BACKUP); true clean: rm -rf build sqlglotc/build sqlglotc/dist sqlglotc/*.egg-info sqlglotc/sqlglot $(FIND_SO) -delete 2>/dev/null; true install: $(PIP) install -e . install-dev: $(PIP) install -e ".[dev]" git submodule update --init 2>/dev/null || true @if ! command -v gh >/dev/null 2>&1; then \ echo ""; \ echo "gh (GitHub CLI) is not installed. It is needed to auto-create PRs for integration tests."; \ printf "Install it via brew? [y/N] "; \ read answer; \ if [ "$$answer" = "y" ] || [ "$$answer" = "Y" ]; then \ brew install gh; \ else \ echo "Skipping. You can install it later: https://cli.github.com/"; \ fi; \ fi install-devc: cd sqlglotc && MYPYC_OPT=0 python setup.py build_ext --inplace install-devc-release: clean cd sqlglotc && python setup.py build_ext --inplace install-pre-commit: pre-commit install pre-commit install --hook-type post-checkout pre-commit install --hook-type pre-push pre-commit install --hook-type post-merge @printf '#!/bin/bash\n.github/scripts/integration_tests_sync.sh post-commit\n' > .git/hooks/post-commit @chmod +x .git/hooks/post-commit bench: bench-parse bench-optimize bench-parse: python -m benchmarks.parse bench-optimize: python -m benchmarks.optimize test: hidec trap '$(MAKE) showc' EXIT; python -m unittest test-fast: python -m unittest --failfast unit: hidec trap '$(MAKE) showc' EXIT; SKIP_INTEGRATION=1 python -m unittest testc: install-devc python -m unittest unitc: install-devc SKIP_INTEGRATION=1 python -m unittest style: pre-commit run --all-files check: style test testc docs: python pdoc/cli.py -o docs docs-serve: python pdoc/cli.py --port 8002 resolve-integration-conflicts: cd sqlglot-integration-tests && git pull --rebase --autostash update-fixtures: python sqlglot-integration-tests/scripts/update_dbt_fixtures.py ================================================ FILE: README.md ================================================ ![SQLGlot logo](sqlglot.png) SQLGlot is a no-dependency SQL parser, transpiler, optimizer, and engine. It can be used to format SQL or translate between [31 different dialects](https://github.com/tobymao/sqlglot/blob/main/sqlglot/dialects/__init__.py) like [DuckDB](https://duckdb.org/), [Presto](https://prestodb.io/) / [Trino](https://trino.io/), [Spark](https://spark.apache.org/) / [Databricks](https://www.databricks.com/), [Snowflake](https://www.snowflake.com/en/), and [BigQuery](https://cloud.google.com/bigquery/). It aims to read a wide variety of SQL inputs and output syntactically and semantically correct SQL in the targeted dialects. It is a very comprehensive generic SQL parser with a robust [test suite](https://github.com/tobymao/sqlglot/blob/main/tests/). It is also quite [performant](#benchmarks), while being written purely in Python. You can easily [customize](#custom-dialects) the parser, [analyze](#metadata) queries, traverse expression trees, and programmatically [build](#build-and-modify-sql) SQL. SQLGlot can detect a variety of [syntax errors](#parser-errors), such as unbalanced parentheses, incorrect usage of reserved keywords, and so on. These errors are highlighted and dialect incompatibilities can warn or raise depending on configurations. Learn more about SQLGlot in the API [documentation](https://sqlglot.com/) and the expression tree [primer](https://github.com/tobymao/sqlglot/blob/main/posts/ast_primer.md). Contributions are very welcome in SQLGlot; read the [contribution guide](https://github.com/tobymao/sqlglot/blob/main/CONTRIBUTING.md) and the [onboarding document](https://github.com/tobymao/sqlglot/blob/main/posts/onboarding.md) to get started! ## Table of Contents * [Install](#install) * [Versioning](#versioning) * [Get in Touch](#get-in-touch) * [FAQ](#faq) * [Examples](#examples) * [Formatting and Transpiling](#formatting-and-transpiling) * [Metadata](#metadata) * [Parser Errors](#parser-errors) * [Unsupported Errors](#unsupported-errors) * [Build and Modify SQL](#build-and-modify-sql) * [SQL Optimizer](#sql-optimizer) * [AST Introspection](#ast-introspection) * [AST Diff](#ast-diff) * [Custom Dialects](#custom-dialects) * [SQL Execution](#sql-execution) * [Used By](#used-by) * [Documentation](#documentation) * [Run Tests and Lint](#run-tests-and-lint) * [Deployment](#deployment) * [Benchmarks](#benchmarks) * [Optional Dependencies](#optional-dependencies) * [Supported Dialects](#supported-dialects) ## Install From PyPI: ```bash # Pure python version pip3 install sqlglot # C extensions compiled with mypyc # prebuilt wheel if available for your platform, otherwise builds from source pip3 install "sqlglot[c]" ``` Or with a local checkout: ``` # Optionally prefix with UV=1 to use uv for the installation make install ``` Requirements for development (optional): ``` # Optionally prefix with UV=1 to use uv for the installation make install-dev ``` ## Versioning Given a version number `MAJOR`.`MINOR`.`PATCH`, SQLGlot uses the following versioning strategy: - The `PATCH` version is incremented when there are backwards-compatible fixes or feature additions. - The `MINOR` version is incremented when there are backwards-incompatible fixes or feature additions. - The `MAJOR` version is incremented when there are significant backwards-incompatible fixes or feature additions. ## Get in Touch We'd love to hear from you. Join our community [Slack channel](https://tobikodata.com/slack)! ## FAQ I tried to parse SQL that should be valid but it failed, why did that happen? * Most of the time, issues like this occur because the "source" dialect is omitted during parsing. For example, this is how to correctly parse a SQL query written in Spark SQL: `parse_one(sql, dialect="spark")` (alternatively: `read="spark"`). If no dialect is specified, `parse_one` will attempt to parse the query according to the "SQLGlot dialect", which is designed to be a superset of all supported dialects. If you tried specifying the dialect and it still doesn't work, please file an issue. I tried to output SQL but it's not in the correct dialect! * Like parsing, generating SQL also requires the target dialect to be specified, otherwise the SQLGlot dialect will be used by default. For example, to transpile a query from Spark SQL to DuckDB, do `parse_one(sql, dialect="spark").sql(dialect="duckdb")` (alternatively: `transpile(sql, read="spark", write="duckdb")`). What happened to sqlglot.dataframe? * The PySpark dataframe api was moved to a standalone library called [SQLFrame](https://github.com/eakmanrq/sqlframe) in v24. It now allows you to run queries as opposed to just generate SQL. ## Examples ### Formatting and Transpiling Easily translate from one dialect to another. For example, date/time functions vary between dialects and can be hard to deal with: ```python import sqlglot sqlglot.transpile("SELECT EPOCH_MS(1618088028295)", read="duckdb", write="hive")[0] ``` ```sql 'SELECT FROM_UNIXTIME(1618088028295 / POW(10, 3))' ``` SQLGlot can even translate custom time formats: ```python import sqlglot sqlglot.transpile("SELECT STRFTIME(x, '%y-%-m-%S')", read="duckdb", write="hive")[0] ``` ```sql "SELECT DATE_FORMAT(x, 'yy-M-ss')" ``` Identifier delimiters and data types can be translated as well: ```python import sqlglot # Spark SQL requires backticks (`) for delimited identifiers and uses `FLOAT` over `REAL` sql = """WITH baz AS (SELECT a, c FROM foo WHERE a = 1) SELECT f.a, b.b, baz.c, CAST("b"."a" AS REAL) d FROM foo f JOIN bar b ON f.a = b.a LEFT JOIN baz ON f.a = baz.a""" # Translates the query into Spark SQL, formats it, and delimits all of its identifiers print(sqlglot.transpile(sql, write="spark", identify=True, pretty=True)[0]) ``` ```sql WITH `baz` AS ( SELECT `a`, `c` FROM `foo` WHERE `a` = 1 ) SELECT `f`.`a`, `b`.`b`, `baz`.`c`, CAST(`b`.`a` AS FLOAT) AS `d` FROM `foo` AS `f` JOIN `bar` AS `b` ON `f`.`a` = `b`.`a` LEFT JOIN `baz` ON `f`.`a` = `baz`.`a` ``` Comments are also preserved on a best-effort basis: ```python sql = """ /* multi line comment */ SELECT tbl.cola /* comment 1 */ + tbl.colb /* comment 2 */, CAST(x AS SIGNED), # comment 3 y -- comment 4 FROM bar /* comment 5 */, tbl # comment 6 """ # Note: MySQL-specific comments (`#`) are converted into standard syntax print(sqlglot.transpile(sql, read='mysql', pretty=True)[0]) ``` ```sql /* multi line comment */ SELECT tbl.cola /* comment 1 */ + tbl.colb /* comment 2 */, CAST(x AS INT), /* comment 3 */ y /* comment 4 */ FROM bar /* comment 5 */, tbl /* comment 6 */ ``` ### Metadata You can explore SQL with expression helpers to do things like find columns and tables in a query: ```python from sqlglot import parse_one, exp # print all column references (a and b) for column in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Column): print(column.alias_or_name) # find all projections in select statements (a and c) for select in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Select): for projection in select.expressions: print(projection.alias_or_name) # find all tables (x, y, z) for table in parse_one("SELECT * FROM x JOIN y JOIN z").find_all(exp.Table): print(table.name) ``` Read the [ast primer](https://github.com/tobymao/sqlglot/blob/main/posts/ast_primer.md) to learn more about SQLGlot's internals. ### Parser Errors When the parser detects an error in the syntax, it raises a `ParseError`: ```python import sqlglot sqlglot.transpile("SELECT foo FROM (SELECT baz FROM t") ``` ``` sqlglot.errors.ParseError: Expecting ). Line 1, Col: 34. SELECT foo FROM (SELECT baz FROM t ~ ``` Structured syntax errors are accessible for programmatic use: ```python import sqlglot.errors try: sqlglot.transpile("SELECT foo FROM (SELECT baz FROM t") except sqlglot.errors.ParseError as e: print(e.errors) ``` ```python [{ 'description': 'Expecting )', 'line': 1, 'col': 34, 'start_context': 'SELECT foo FROM (SELECT baz FROM ', 'highlight': 't', 'end_context': '', 'into_expression': None }] ``` ### Unsupported Errors It may not be possible to translate some queries between certain dialects. For these cases, SQLGlot may emit a warning and will proceed to do a best-effort translation by default: ```python import sqlglot sqlglot.transpile("SELECT APPROX_DISTINCT(a, 0.1) FROM foo", read="presto", write="hive") ``` ```sql APPROX_COUNT_DISTINCT does not support accuracy 'SELECT APPROX_COUNT_DISTINCT(a) FROM foo' ``` This behavior can be changed by setting the [`unsupported_level`](https://github.com/tobymao/sqlglot/blob/b0e8dc96ba179edb1776647b5bde4e704238b44d/sqlglot/errors.py#L9) attribute. For example, we can set it to either `RAISE` or `IMMEDIATE` to ensure an exception is raised instead: ```python import sqlglot sqlglot.transpile("SELECT APPROX_DISTINCT(a, 0.1) FROM foo", read="presto", write="hive", unsupported_level=sqlglot.ErrorLevel.RAISE) ``` ``` sqlglot.errors.UnsupportedError: APPROX_COUNT_DISTINCT does not support accuracy ``` There are queries that require additional information to be accurately transpiled, such as the schemas of the tables referenced in them. This is because certain transformations are type-sensitive, meaning that type inference is needed in order to understand their semantics. Even though the `qualify` and `annotate_types` optimizer [rules](https://github.com/tobymao/sqlglot/tree/main/sqlglot/optimizer) can help with this, they are not used by default because they add significant overhead and complexity. Transpilation is generally a hard problem, so SQLGlot employs an "incremental" approach to solving it. This means that there may be dialect pairs that currently lack support for some inputs, but this is expected to improve over time. We highly appreciate well-documented and tested issues or PRs, so feel free to [reach out](#get-in-touch) if you need guidance! ### Build and Modify SQL SQLGlot supports incrementally building SQL expressions: ```python from sqlglot import select, condition where = condition("x=1").and_("y=1") select("*").from_("y").where(where).sql() ``` ```sql 'SELECT * FROM y WHERE x = 1 AND y = 1' ``` It's possible to modify a parsed tree: ```python from sqlglot import parse_one parse_one("SELECT x FROM y").from_("z").sql() ``` ```sql 'SELECT x FROM z' ``` Parsed expressions can also be transformed recursively by applying a mapping function to each node in the tree: ```python from sqlglot import exp, parse_one expression_tree = parse_one("SELECT a FROM x") def transformer(node): if isinstance(node, exp.Column) and node.name == "a": return parse_one("FUN(a)") return node transformed_tree = expression_tree.transform(transformer) transformed_tree.sql() ``` ```sql 'SELECT FUN(a) FROM x' ``` ### SQL Optimizer SQLGlot can rewrite queries into an "optimized" form. It performs a variety of [techniques](https://github.com/tobymao/sqlglot/blob/main/sqlglot/optimizer/optimizer.py) to create a new canonical AST. This AST can be used to standardize queries or provide the foundations for implementing an actual engine. For example: ```python import sqlglot from sqlglot.optimizer import optimize print( optimize( sqlglot.parse_one(""" SELECT A OR (B OR (C AND D)) FROM x WHERE Z = date '2021-01-01' + INTERVAL '1' month OR 1 = 0 """), schema={"x": {"A": "INT", "B": "INT", "C": "INT", "D": "INT", "Z": "STRING"}} ).sql(pretty=True) ) ``` ```sql SELECT ( "x"."a" <> 0 OR "x"."b" <> 0 OR "x"."c" <> 0 ) AND ( "x"."a" <> 0 OR "x"."b" <> 0 OR "x"."d" <> 0 ) AS "_col_0" FROM "x" AS "x" WHERE CAST("x"."z" AS DATE) = CAST('2021-02-01' AS DATE) ``` ### AST Introspection You can see the AST version of the parsed SQL by calling `repr`: ```python from sqlglot import parse_one print(repr(parse_one("SELECT a + 1 AS z"))) ``` ```python Select( expressions=[ Alias( this=Add( this=Column( this=Identifier(this=a, quoted=False)), expression=Literal(this=1, is_string=False)), alias=Identifier(this=z, quoted=False))]) ``` ### AST Diff SQLGlot can calculate the semantic difference between two expressions and output changes in a form of a sequence of actions needed to transform a source expression into a target one: ```python from sqlglot import diff, parse_one diff(parse_one("SELECT a + b, c, d"), parse_one("SELECT c, a - b, d")) ``` ```python [ Remove(expression=Add( this=Column( this=Identifier(this=a, quoted=False)), expression=Column( this=Identifier(this=b, quoted=False)))), Insert(expression=Sub( this=Column( this=Identifier(this=a, quoted=False)), expression=Column( this=Identifier(this=b, quoted=False)))), Keep( source=Column(this=Identifier(this=a, quoted=False)), target=Column(this=Identifier(this=a, quoted=False))), ... ] ``` See also: [Semantic Diff for SQL](https://github.com/tobymao/sqlglot/blob/main/posts/sql_diff.md). ### Custom Dialects [Dialects](https://github.com/tobymao/sqlglot/tree/main/sqlglot/dialects) can be added by subclassing `Dialect`: ```python from sqlglot import exp from sqlglot.dialects.dialect import Dialect from sqlglot.generator import Generator from sqlglot.tokens import Tokenizer, TokenType class Custom(Dialect): class Tokenizer(Tokenizer): QUOTES = ["'", '"'] IDENTIFIERS = ["`"] KEYWORDS = { **Tokenizer.KEYWORDS, "INT64": TokenType.BIGINT, "FLOAT64": TokenType.DOUBLE, } class Generator(Generator): TRANSFORMS = {exp.Array: lambda self, e: f"[{self.expressions(e)}]"} TYPE_MAPPING = { exp.DataType.Type.TINYINT: "INT64", exp.DataType.Type.SMALLINT: "INT64", exp.DataType.Type.INT: "INT64", exp.DataType.Type.BIGINT: "INT64", exp.DataType.Type.DECIMAL: "NUMERIC", exp.DataType.Type.FLOAT: "FLOAT64", exp.DataType.Type.DOUBLE: "FLOAT64", exp.DataType.Type.BOOLEAN: "BOOL", exp.DataType.Type.TEXT: "STRING", } print(Dialect["custom"]) ``` ``` ``` ### SQL Execution SQLGlot is able to interpret SQL queries, where the tables are represented as Python dictionaries. The engine is not supposed to be fast, but it can be useful for unit testing and running SQL natively across Python objects. Additionally, the foundation can be easily integrated with fast compute kernels, such as [Arrow](https://arrow.apache.org/docs/index.html) and [Pandas](https://pandas.pydata.org/). The example below showcases the execution of a query that involves aggregations and joins: ```python from sqlglot.executor import execute tables = { "sushi": [ {"id": 1, "price": 1.0}, {"id": 2, "price": 2.0}, {"id": 3, "price": 3.0}, ], "order_items": [ {"sushi_id": 1, "order_id": 1}, {"sushi_id": 1, "order_id": 1}, {"sushi_id": 2, "order_id": 1}, {"sushi_id": 3, "order_id": 2}, ], "orders": [ {"id": 1, "user_id": 1}, {"id": 2, "user_id": 2}, ], } execute( """ SELECT o.user_id, SUM(s.price) AS price FROM orders o JOIN order_items i ON o.id = i.order_id JOIN sushi s ON i.sushi_id = s.id GROUP BY o.user_id """, tables=tables ) ``` ```python user_id price 1 4.0 2 3.0 ``` See also: [Writing a Python SQL engine from scratch](https://github.com/tobymao/sqlglot/blob/main/posts/python_sql_engine.md). ## Used By * [SQLMesh](https://github.com/TobikoData/sqlmesh) * [Apache Superset](https://github.com/apache/superset) * [Dagster](https://github.com/dagster-io/dagster) * [Fugue](https://github.com/fugue-project/fugue) * [Ibis](https://github.com/ibis-project/ibis) * [dlt](https://github.com/dlt-hub/dlt) * [mysql-mimic](https://github.com/kelsin/mysql-mimic) * [Querybook](https://github.com/pinterest/querybook) * [Quokka](https://github.com/marsupialtail/quokka) * [Splink](https://github.com/moj-analytical-services/splink) * [SQLFrame](https://github.com/eakmanrq/sqlframe) ## Documentation SQLGlot uses [pdoc](https://pdoc.dev/) to serve its API documentation. A hosted version is on the [SQLGlot website](https://sqlglot.com/), or you can build locally with: ``` make docs-serve ``` ## Run Tests and Lint ``` make style # Only linter checks make unit # Only unit tests (pure Python) make test # Unit and integration tests (pure Python) make unitc # Only unit tests (mypyc compiled) make testc # Unit and integration tests (mypyc compiled) make check # Full test suite & linter checks make clean # Remove compiled C artifacts (.so files, build dirs) ``` ## Deployment To deploy a new SQLGlot version, follow these steps: 1. Run `git pull` to make sure the local git repo is at the head of the main branch 2. Do a `git tag` operation to bump the SQLGlot version, e.g. `git tag v28.5.0` 3. Run `git push && git push --tags` to deploy the new version ## Benchmarks [Benchmarks](https://github.com/tobymao/sqlglot/blob/main/benchmarks/parse.py) run on Python 3.14.3 in seconds. sqlglot, sqltree, sqlparse, and sqlfluff are python based whereas sqloxide and polyglot-sql are rust bindings. | Query | sqlglot | sqlglot[c] | sqltree | sqlparse | sqlfluff | sqloxide | polyglot-sql | | ----------------- | --------------- | --------------- | --------------- | ---------------- | ----------------- | --------------- | --------------- | | tpch | 0.002709 (1.00) | 0.000740 (0.27) | 0.002172 (0.80) | 0.014152 (5.22) | 0.241027 (88.97) | 0.000655 (0.24) | 0.000698 (0.26) | | short | 0.000226 (1.00) | 0.000075 (0.33) | 0.000184 (0.81) | 0.000938 (4.15) | 0.031542 (139.47) | 0.000041 (0.18) | 0.000174 (0.77) | | deep_arithmetic | 0.007760 (1.00) | 0.002015 (0.26) | 0.005927 (0.76) | N/A | 1.359824 (175.22) | 0.003117 (0.40) | 0.002964 (0.38) | | large_in | 0.407987 (1.00) | 0.101644 (0.25) | 0.467943 (1.15) | N/A | N/A | 0.147765 (0.36) | 0.105854 (0.26) | | values | 0.466734 (1.00) | 0.113762 (0.24) | 0.522797 (1.12) | N/A | N/A | 0.117628 (0.25) | 0.117169 (0.25) | | many_joins | 0.011943 (1.00) | 0.002701 (0.23) | 0.009887 (0.83) | 0.059303 (4.97) | 1.246253 (104.35) | 0.002918 (0.24) | 0.002964 (0.25) | | many_unions | 0.041321 (1.00) | 0.008291 (0.20) | 0.038249 (0.93) | N/A | 1.826401 (44.20) | 0.012395 (0.30) | 0.013087 (0.32) | | nested_subqueries | 0.001200 (1.00) | 0.000235 (0.20) | N/A | 0.003860 (3.22) | 0.089490 (74.56) | 0.000215 (0.18) | 0.000262 (0.22) | | many_columns | 0.011821 (1.00) | 0.002825 (0.24) | 0.012722 (1.08) | 0.238510 (20.18) | 1.050386 (88.86) | 0.002515 (0.21) | 0.003765 (0.32) | | large_case | 0.035822 (1.00) | 0.008593 (0.24) | 0.033578 (0.94) | N/A | 4.200220 (117.25) | 0.009870 (0.28) | 0.009442 (0.26) | | complex_where | 0.032710 (1.00) | 0.006602 (0.20) | N/A | 0.136203 (4.16) | 2.492927 (76.21) | 0.006002 (0.18) | 0.007787 (0.24) | | many_ctes | 0.017610 (1.00) | 0.003630 (0.21) | 0.012377 (0.70) | 0.123620 (7.02) | 0.657611 (37.34) | 0.004197 (0.24) | 0.003273 (0.19) | | many_windows | 0.020790 (1.00) | 0.005751 (0.28) | N/A | 0.203144 (9.77) | 1.421216 (68.36) | 0.003941 (0.19) | 0.004570 (0.22) | | nested_functions | 0.000703 (1.00) | 0.000189 (0.27) | 0.000754 (1.07) | 0.005082 (7.23) | 0.091007 (129.51) | 0.000168 (0.24) | 0.000225 (0.32) | | large_strings | 0.005073 (1.00) | 0.001480 (0.29) | 0.014533 (2.86) | 0.049392 (9.74) | 0.320672 (63.22) | 0.001616 (0.32) | 0.002151 (0.42) | | many_numbers | 0.103898 (1.00) | 0.024483 (0.24) | 0.120119 (1.16) | N/A | N/A | 0.031667 (0.30) | 0.026880 (0.26) | ``` make bench # Run parsing benchmark make bench-optimize # Run optimization benchmark ``` ## Optional Dependencies SQLGlot uses [dateutil](https://github.com/dateutil/dateutil) to simplify literal timedelta expressions. The optimizer will not simplify expressions like the following if the module cannot be found: ```sql x + interval '1' month ``` ## Supported Dialects | Dialect | Support Level | |---------|---------------| | Athena | Official | | BigQuery | Official | | ClickHouse | Official | | Databricks | Official | | Doris | Community | | Dremio | Community | | Drill | Community | | Druid | Community | | DuckDB | Official | | Exasol | Community | | Fabric | Community | | Hive | Official | | Materialize | Community | | MySQL | Official | | Oracle | Official | | Postgres | Official | | Presto | Official | | PRQL | Community | | Redshift | Official | | RisingWave | Community | | SingleStore | Community | | Snowflake | Official | | Solr | Community | | Spark | Official | | SQLite | Official | | StarRocks | Official | | Tableau | Official | | Teradata | Community | | Trino | Official | | TSQL | Official | | YDB | [Plugin](https://pypi.org/project/ydb-sqlglot-plugin) | **Official Dialects** are maintained by the core SQLGlot team with higher priority for bug fixes and feature additions. **Community Dialects** are developed and maintained primarily through community contributions. These are fully functional but may receive lower priority for issue resolution compared to officially supported dialects. We welcome and encourage community contributions to improve these dialects. **Plugin Dialects** (supported since v28.6.0) are third-party dialects developed and maintained in external repositories by independent contributors. These dialects are not part of the SQLGlot codebase and are distributed as separate packages. The SQLGlot team does not provide support or maintenance for plugin dialects — please direct any issues or feature requests to their respective repositories. See [Creating a Dialect Plugin](#creating-a-dialect-plugin) below for information on how to build your own. ### Creating a Dialect Plugin If your database isn't supported, you can create a plugin that registers a custom dialect via entry points. Create a package with your dialect class and register it in `setup.py`: ```python from setuptools import setup setup( name="mydb-sqlglot-dialect", entry_points={ "sqlglot.dialects": [ "mydb = my_package.dialect:MyDB", ], }, ) ``` The dialect will be automatically discovered and can be used like any built-in dialect: ```python from sqlglot import transpile transpile("SELECT * FROM t", read="mydb", write="postgres") ``` See the [Custom Dialects](#custom-dialects) section for implementation details. ================================================ FILE: benchmarks/__init__.py ================================================ ================================================ FILE: benchmarks/compare.py ================================================ #!/usr/bin/env python3 """Compare two benchmark JSON files and output a markdown table with diff indicators.""" import json import sys def _fmt_time(seconds): if seconds >= 1: return f"{seconds:.2f}s" if seconds >= 1e-3: return f"{seconds * 1e3:.1f}ms" return f"{seconds * 1e6:.0f}us" def _indicator(ratio): """Return an emoji indicator based on the speedup/slowdown ratio (pr_time / main_time).""" if ratio <= 0.95: return "\U0001f7e2\U0001f7e2" # 5%+ faster if ratio <= 0.97: return "\U0001f7e2" # 3-5% faster if ratio <= 0.99: return "\U0001f7e9" # 1-3% faster if ratio <= 1.01: return "\u26aa" # no significant change if ratio <= 1.03: return "\U0001f7e7" # 1-3% slower if ratio <= 1.05: return "\U0001f534" # 3-5% slower return "\U0001f534\U0001f534" # 5%+ slower def _diff_text(ratio): pct = (ratio - 1.0) * 100 if abs(pct) < 0.05: return "0.0%" if pct < 0: return f"{-pct:.1f}% faster" return f"{pct:.1f}% slower" def compare(main_file, pr_file): with open(main_file) as f: main = json.load(f) with open(pr_file) as f: pr = json.load(f) # Collect all query names across both parsers parsers = set() queries = [] seen_queries = set() for key in list(main.keys()) + list(pr.keys()): parser, query = key.split(":", 1) parsers.add(parser) if query not in seen_queries: queries.append(query) seen_queries.add(query) # Sort parsers: sqlglot first, then sqlglotc parser_order = sorted(parsers, key=lambda p: (p != "sqlglot", p != "sqlglotc", p)) # Build table lines = [] lines.append("## Benchmark Results\n") lines.append( "**Legend:** \U0001f7e2\U0001f7e2 = 5%+ faster | \U0001f7e2 = 3-5% faster | \U0001f7e9 = 1-3% faster | \u26aa = unchanged | \U0001f7e7 = 1-3% slower | \U0001f534 = 3-5% slower | \U0001f534\U0001f534 = 5%+ slower\n" ) for parser in parser_order: display = ( "sqlglot" if parser == "sqlglot" else "sqlglot[c]" if parser == "sqlglotc" else parser ) lines.append(f"\n### {display}\n") lines.append("| Query | main | PR | diff | |") lines.append("| ----- | ---: | ---: | ---: | --- |") for query in queries: key = f"{parser}:{query}" main_time = main.get(key) pr_time = pr.get(key) if main_time is None and pr_time is None: continue main_str = _fmt_time(main_time) if main_time else "N/A" pr_str = _fmt_time(pr_time) if pr_time else "N/A" if main_time and pr_time: ratio = pr_time / main_time diff_str = _diff_text(ratio) ind = _indicator(ratio) else: diff_str = "N/A" ind = "" lines.append(f"| {query} | {main_str} | {pr_str} | {diff_str} | {ind} |") lines.append("\n---\n*Comment `/benchmark` to re-run.*") return "\n".join(lines) if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: compare_benchmarks.py ", file=sys.stderr) sys.exit(1) print(compare(sys.argv[1], sys.argv[2])) ================================================ FILE: benchmarks/optimize.py ================================================ import sys import os import pyperf # Add the project root to the path so we can import from tests sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from sqlglot.optimizer import optimize from sqlglot import parse_one from tests.helpers import load_sql_fixture_pairs, TPCH_SCHEMA, TPCDS_SCHEMA # Deeply nested conditions currently require a lot of recursion sys.setrecursionlimit(10000) def gen_condition(n): return parse_one(" OR ".join(f"a = {i} AND b = {i}" for i in range(n))) # Create benchmark functions that return the setup data def get_tpch_setup(): return ( [parse_one(sql) for _, sql, _ in load_sql_fixture_pairs("optimizer/tpc-h/tpc-h.sql")], TPCH_SCHEMA, ) def get_tpcds_setup(): return ( [parse_one(sql) for _, sql, _ in load_sql_fixture_pairs("optimizer/tpc-ds/tpc-ds.sql")], TPCDS_SCHEMA, ) def get_condition_10_setup(): return ([gen_condition(10)], {}) def get_condition_100_setup(): return ([gen_condition(100)], {}) def get_condition_1000_setup(): return ([gen_condition(1000)], {}) # Optimizer functions that will be benchmarked def optimize_queries(expressions, schema): for e in expressions: optimize(e, schema) def run_benchmarks(): runner = pyperf.Runner() # Define benchmarks with their setup functions benchmarks = { "tpch": get_tpch_setup, # "tpcds": get_tpcds_setup, # This is left out because it's too slow in CI "condition_10": get_condition_10_setup, "condition_100": get_condition_100_setup, "condition_1000": get_condition_1000_setup, } for benchmark_name, benchmark_setup in benchmarks.items(): expressions, schema = benchmark_setup() runner.bench_func(f"optimize_{benchmark_name}", optimize_queries, expressions, schema) if __name__ == "__main__": run_benchmarks() ================================================ FILE: benchmarks/parse.py ================================================ import argparse import inspect import json import os import subprocess import sys import tempfile import time # --- Query definitions --- large_in = ( "SELECT * FROM t WHERE x IN (" + ", ".join(f"'s{i}'" for i in range(20000)) + ")" " OR y IN (" + ", ".join(str(i) for i in range(20000)) + ")" ) values = "INSERT INTO t VALUES " + ", ".join( "(" + ", ".join(f"'s{i}_{j}'" if j % 2 else str(i * 20 + j) for j in range(20)) + ")" for i in range(2000) ) many_joins = "SELECT * FROM t0" + "".join( f"\nJOIN t{i} ON t{i}.id = t{i - 1}.id" for i in range(1, 200) ) many_unions = "\nUNION ALL\n".join(f"SELECT {i} AS a, 's{i}' AS b FROM t{i}" for i in range(500)) short = "SELECT 1 AS a, CASE WHEN 1 THEN 1 WHEN 2 THEN 2 ELSE 3 END AS b, c FROM x" deep_arithmetic = "SELECT 1+" deep_arithmetic += "+".join(str(i) for i in range(500)) deep_arithmetic += " AS a, 2*" deep_arithmetic += "*".join(str(i) for i in range(500)) deep_arithmetic += " AS b FROM x" nested_subqueries = ( "SELECT * FROM " + "".join("(SELECT * FROM " for _ in range(20)) + "t" + ")" * 20 ) many_columns = "SELECT " + ", ".join(f"c{i}" for i in range(1000)) + " FROM t" large_case = ( "SELECT CASE " + " ".join(f"WHEN x = {i} THEN {i}" for i in range(1000)) + " ELSE -1 END FROM t" ) complex_where = "SELECT * FROM t WHERE " + " AND ".join( f"(c{i} > {i} OR c{i} LIKE '%s{i}%' OR c{i} BETWEEN {i} AND {i + 10} OR c{i} IS NULL)" for i in range(200) ) many_ctes = ( "WITH " + ", ".join(f"t{i} AS (SELECT {i} AS a FROM t{i - 1 if i else 'base'})" for i in range(200)) + " SELECT * FROM t199" ) many_windows = ( "SELECT " + ", ".join( f"SUM(c{i}) OVER (PARTITION BY p{i % 10} ORDER BY o{i % 5}) AS w{i}" for i in range(200) ) + " FROM t" ) nested_functions = "SELECT " + "COALESCE(" * 20 + "x" + ", NULL)" * 20 + " FROM t" large_strings = "SELECT " + ", ".join(f"'{'x' * 100}'" for i in range(500)) + " FROM t" many_numbers = "SELECT " + ", ".join(str(i) for i in range(10000)) + " FROM t" tpch = """ WITH "_e_0" AS ( SELECT "partsupp"."ps_partkey" AS "ps_partkey", "partsupp"."ps_suppkey" AS "ps_suppkey", "partsupp"."ps_supplycost" AS "ps_supplycost" FROM "partsupp" AS "partsupp" ), "_e_1" AS ( SELECT "region"."r_regionkey" AS "r_regionkey", "region"."r_name" AS "r_name" FROM "region" AS "region" WHERE "region"."r_name" = 'EUROPE' ) SELECT "supplier"."s_acctbal" AS "s_acctbal", "supplier"."s_name" AS "s_name", "nation"."n_name" AS "n_name", "part"."p_partkey" AS "p_partkey", "part"."p_mfgr" AS "p_mfgr", "supplier"."s_address" AS "s_address", "supplier"."s_phone" AS "s_phone", "supplier"."s_comment" AS "s_comment" FROM ( SELECT "part"."p_partkey" AS "p_partkey", "part"."p_mfgr" AS "p_mfgr", "part"."p_type" AS "p_type", "part"."p_size" AS "p_size" FROM "part" AS "part" WHERE "part"."p_size" = 15 AND "part"."p_type" LIKE '%BRASS' ) AS "part" LEFT JOIN ( SELECT MIN("partsupp"."ps_supplycost") AS "_col_0", "partsupp"."ps_partkey" AS "_u_1" FROM "_e_0" AS "partsupp" CROSS JOIN "_e_1" AS "region" JOIN ( SELECT "nation"."n_nationkey" AS "n_nationkey", "nation"."n_regionkey" AS "n_regionkey" FROM "nation" AS "nation" ) AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN ( SELECT "supplier"."s_suppkey" AS "s_suppkey", "supplier"."s_nationkey" AS "s_nationkey" FROM "supplier" AS "supplier" ) AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" GROUP BY "partsupp"."ps_partkey" ) AS "_u_0" ON "part"."p_partkey" = "_u_0"."_u_1" CROSS JOIN "_e_1" AS "region" JOIN ( SELECT "nation"."n_nationkey" AS "n_nationkey", "nation"."n_name" AS "n_name", "nation"."n_regionkey" AS "n_regionkey" FROM "nation" AS "nation" ) AS "nation" ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN "_e_0" AS "partsupp" ON "part"."p_partkey" = "partsupp"."ps_partkey" JOIN ( SELECT "supplier"."s_suppkey" AS "s_suppkey", "supplier"."s_name" AS "s_name", "supplier"."s_address" AS "s_address", "supplier"."s_nationkey" AS "s_nationkey", "supplier"."s_phone" AS "s_phone", "supplier"."s_acctbal" AS "s_acctbal", "supplier"."s_comment" AS "s_comment" FROM "supplier" AS "supplier" ) AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" WHERE "partsupp"."ps_supplycost" = "_u_0"."_col_0" AND NOT "_u_0"."_u_1" IS NULL ORDER BY "supplier"."s_acctbal" DESC, "nation"."n_name", "supplier"."s_name", "part"."p_partkey" LIMIT 100 """ QUERIES = { "tpch": tpch, "short": short, "deep_arithmetic": deep_arithmetic, "large_in": large_in, "values": values, "many_joins": many_joins, "many_unions": many_unions, "nested_subqueries": nested_subqueries, "many_columns": many_columns, "large_case": large_case, "complex_where": complex_where, "many_ctes": many_ctes, "many_windows": many_windows, "nested_functions": nested_functions, "large_strings": large_strings, "many_numbers": many_numbers, } # --- Parser definitions --- def sqlglot_parse(sql): import sqlglot sqlglot.parse_one(sql, error_level=sqlglot.ErrorLevel.IGNORE) def sqltree_parse(sql): import sqltree sqltree.api.sqltree(sql.replace('"', "`").replace("''", '"')) def sqlparse_parse(sql): import sqlparse sqlparse.parse(sql) def moz_sql_parser_parse(sql): import moz_sql_parser moz_sql_parser.parse(sql) def sqloxide_parse(sql): import sqloxide sqloxide.parse_sql(sql, dialect="ansi") def sqlfluff_parse(sql): import sqlfluff sqlfluff.parse(sql) def polyglot_sql_parse(sql): import polyglot_sql polyglot_sql.parse_one(sql) THIRD_PARTY_PARSERS = { "sqltree": sqltree_parse, "sqlparse": sqlparse_parse, "sqlfluff": sqlfluff_parse, "moz_sql_parser": moz_sql_parser_parse, "sqloxide": sqloxide_parse, "polyglot_sql": polyglot_sql_parse, } DISPLAY_NAMES = { "sqlglot": "sqlglot", "sqlglotc": "sqlglot[c]", "polyglot_sql": "polyglot-sql", "sqltree": "sqltree", "sqlparse": "sqlparse", "moz_sql_parser": "moz_sql_parser", "sqlfluff": "sqlfluff", "sqloxide": "sqloxide", } # --- Third-party parser discovery --- def _check_parser(parse_fn, queries): """Check which queries a parser can handle, one subprocess per query (isolates segfaults). Returns None if not installed, else set of query names.""" fn_name = parse_fn.__name__ source = inspect.getsource(parse_fn) supported = set() installed = None for name, sql in queries.items(): code = f"""import signal def _timeout(signum, frame): raise TimeoutError() signal.signal(signal.SIGALRM, _timeout) signal.alarm(5) {source} {fn_name}({repr(sql)}) """ with tempfile.NamedTemporaryFile(mode="w", encoding="utf8", suffix=".py", delete=True) as f: f.write(code) f.flush() try: result = subprocess.run([sys.executable, f.name], capture_output=True, timeout=10) except subprocess.TimeoutExpired: installed = True continue if b"ModuleNotFoundError" in result.stderr: return None installed = True if result.returncode == 0: supported.add(name) return supported if installed else None def _discover_parsers(): """Discover available third-party parsers and which queries they support.""" valid_pairs = set() available = [] for parser_name, parse_fn in THIRD_PARTY_PARSERS.items(): supported = _check_parser(parse_fn, QUERIES) if supported is None: continue for query_name in supported: valid_pairs.add((parser_name, query_name)) available.append(parser_name) return available, valid_pairs # --- Benchmarking --- _quiet = False def _bench(name, fn, *args, iterations=5): """Benchmark fn(*args) and return the best time in seconds.""" best = float("inf") for _ in range(iterations): t0 = time.perf_counter() fn(*args) elapsed = time.perf_counter() - t0 if elapsed < best: best = elapsed if elapsed > 1: break if not _quiet: print(f" {name}: {_fmt_time(best)}") return best def _bench_sqlglot(results): """Benchmark sqlglot (or sqlglotc if .so loaded) and add to results.""" import sqlglot.expressions.core as _ec prefix = "sqlglotc" if _ec.__file__.endswith(".so") else "sqlglot" for query_name, sql in QUERIES.items(): results[f"{prefix}:{query_name}"] = _bench(f"{prefix}:{query_name}", sqlglot_parse, sql) return prefix def _bench_third_party(results): """Benchmark third-party parsers and add to results. Returns list of available parser names.""" available, valid_pairs = _discover_parsers() for query_name, sql in QUERIES.items(): for parser_name, parse_fn in THIRD_PARTY_PARSERS.items(): if (parser_name, query_name) in valid_pairs: results[f"{parser_name}:{query_name}"] = _bench( f"{parser_name}:{query_name}", parse_fn, sql ) return available # --- Table printing --- def _fmt_ratio(ratio): return f"{ratio:.2f}" def _fmt_time(seconds): if seconds >= 1: return f"{seconds:.2f} sec" if seconds >= 1e-3: return f"{seconds * 1e3:.2f} ms" return f"{seconds * 1e6:.1f} us" def _print_table(base_parser, all_parsers, results): query_width = max(len(q) for q in QUERIES) query_width = max(query_width, len("Query")) # Pre-compute all cells to determine column widths cells = {} for query_name in QUERIES: base_time = results.get(f"{base_parser}:{query_name}") for p in all_parsers: t = results.get(f"{p}:{query_name}") if t is not None and base_time: ratio = t / base_time cells[(p, query_name)] = f"{t:.6f} ({_fmt_ratio(ratio)})" else: cells[(p, query_name)] = "N/A" col_widths = {} for p in all_parsers: name = DISPLAY_NAMES.get(p, p) w = len(name) for query_name in QUERIES: w = max(w, len(cells[(p, query_name)])) col_widths[p] = w header = f"| {'Query':>{query_width}} |" sep = f"| {'-' * query_width} |" for p in all_parsers: name = DISPLAY_NAMES.get(p, p) header += f" {name:>{col_widths[p]}} |" sep += f" {'-' * col_widths[p]} |" print() print(header) print(sep) for query_name in QUERIES: row = f"| {query_name:>{query_width}} |" for p in all_parsers: row += f" {cells[(p, query_name)]:>{col_widths[p]}} |" print(row) # --- Subprocess entry point for .so mode --- def _has_so_files(): import glob return bool(glob.glob("sqlglot/**/*.so", recursive=True)) def _run_subprocess(): """Run sqlglot benchmarks and print results to stdout as key=value lines.""" global _quiet _quiet = bool(os.environ.get("_BENCH_QUIET")) results = {} _bench_sqlglot(results) for key, value in results.items(): print(f"{key}={value}") # --- Main --- def _parse_args(): parser = argparse.ArgumentParser(description="SQLGlot parser benchmarks") parser.add_argument("--json", metavar="FILE", help="Write results as JSON to FILE") parser.add_argument("--quiet", action="store_true", help="Suppress progress output") parser.add_argument( "--sqlglot-only", action="store_true", help="Only benchmark sqlglot/sqlglotc (skip third-party parsers)", ) return parser.parse_args() if __name__ == "__main__": if os.environ.get("_BENCH_SUBPROCESS"): _run_subprocess() else: args = _parse_args() _quiet = args.quiet if _has_so_files(): if not _quiet: print("=== Running sqlglot[c] ===", flush=True) env = {**os.environ, "_BENCH_SUBPROCESS": "1"} if _quiet: env["_BENCH_QUIET"] = "1" proc = subprocess.run( [sys.executable, __file__], env=env, capture_output=True, text=True, check=True ) results = {} for line in proc.stdout.splitlines(): if "=" in line: key, value = line.split("=", 1) results[key] = float(value) elif not _quiet: print(line) if not _quiet: print("\n=== Hiding .so files ===", flush=True) subprocess.run(["make", "hidec"], check=True, capture_output=True) try: if not _quiet: print("\n=== Running pure Python ===", flush=True) _bench_sqlglot(results) if not args.sqlglot_only: if not _quiet: print("\n=== Running third-party parsers ===", flush=True) available = _bench_third_party(results) else: available = [] finally: subprocess.run(["make", "showc"], capture_output=True) if args.json: with open(args.json, "w") as f: json.dump(results, f, indent=2) else: _print_table("sqlglot", ["sqlglot", "sqlglotc"] + available, results) else: results = {} prefix = _bench_sqlglot(results) if not args.sqlglot_only: available = _bench_third_party(results) else: available = [] if args.json: with open(args.json, "w") as f: json.dump(results, f, indent=2) else: _print_table(prefix, [prefix] + available, results) ================================================ FILE: pdoc/cli.py ================================================ #!/usr/bin/env python3 from importlib import import_module from pathlib import Path from unittest import mock from pdoc.__main__ import cli, parser # Need this import or else import_module doesn't work import sqlglot # noqa from sqlglot.dialects import * # noqa: F403 # Load all dialects up front because lazy loading breaks pdoc's dynamic importing sqlglot.dialects.__all__ = [globals()[attr_name] for attr_name in sqlglot.dialects.__all__] def mocked_import(*args, **kwargs): """Return a MagicMock if import fails for any reason""" try: return import_module(*args, **kwargs) except Exception: mocked_module = mock.MagicMock() mocked_module.__name__ = args[0] return mocked_module if __name__ == "__main__": # Mock uninstalled dependencies so pdoc can still work with mock.patch("importlib.import_module", side_effect=mocked_import): opts = parser.parse_args() opts.docformat = "google" opts.modules = ["sqlglot"] opts.footer_text = "Copyright (c) 2023 Toby Mao" opts.template_directory = Path(__file__).parent.joinpath("templates").absolute() opts.edit_url = ["sqlglot=https://github.com/tobymao/sqlglot/tree/main/sqlglot/"] with mock.patch("pdoc.__main__.parser", **{"parse_args.return_value": opts}): cli() ================================================ FILE: pdoc/templates/module.html.jinja2 ================================================ {% extends "default/module.html.jinja2" %} {% if module.docstring %} {% macro module_name() %} {% endmacro %} {% endif %} ================================================ FILE: posts/ast_primer.md ================================================ # Primer on SQLGlot's Abstract Syntax Tree SQLGlot is a powerful tool for analyzing and transforming SQL, but the learning curve can be intimidating. This post is intended to familiarize newbies with SQLGlot's abstract syntax trees, how to traverse them, and how to mutate them. ## The tree SQLGlot parses SQL into an abstract syntax tree (AST). ```python from sqlglot import parse_one ast = parse_one("SELECT a FROM (SELECT a FROM x) AS x") ``` An AST is a data structure that represents a SQL statement. The best way to glean the structure of a particular AST is python's builtin `repr` function: ```python repr(ast) # Select( # expressions=[ # Column( # this=Identifier(this=a, quoted=False))], # from=From( # this=Subquery( # this=Select( # expressions=[ # Column( # this=Identifier(this=a, quoted=False))], # from=From( # this=Table( # this=Identifier(this=x, quoted=False)))), # alias=TableAlias( # this=Identifier(this=x, quoted=False))))) ``` This is a textual representation of the internal data structure. Here's a breakdown of some of its components: ``` `Select` is the expression type | Select( expressions=[ ------------------------------- `expressions` is a child key of `Select` Column( ----------------------------------- `Column` is the expression type of the child this=Identifier(this=a, quoted=False))], from=From( ---------------------------------- `from` is another child key of `Select` ... ``` ## Nodes of the tree The nodes in this tree are instances of `sqlglot.Expression`. Nodes reference their children in `args` and their parent in `parent`: ```python ast.args # { # "expressions": [Column(this=...)], # "from": From(this=...), # ... # } ast.args["expressions"][0] # Column(this=...) ast.args["expressions"][0].args["this"] # Identifier(this=...) ast.args["from"] # From(this=...) assert ast.args["expressions"][0].args["this"].parent.parent is ast ``` Children can either be: 1. An Expression instance 2. A list of Expression instances 3. Another Python object, such as str or bool. This will always be a leaf node in the tree. Navigating this tree requires an understanding of the different Expression types. The best way to browse Expression types is directly in the code at [expressions.py](../sqlglot/expressions.py). Let's look at a simplified version of one Expression type: ```python class Column(Expression): arg_types = { "this": True, "table": False, ... } ``` `Column` subclasses `Expression`. `arg_types` is a class attribute that specifies the possible children. The `args` keys of an Expression instance correspond to the `arg_types` keys of its class. The values of the `arg_types` dict are `True` if the key is required. There are some common `arg_types` keys: - "this": This is typically used for the primary child. In `Column`, "this" is the identifier for the column's name. - "expression": This is typically used for the secondary child - "expressions": This is typically used for a primary list of children There aren't strict rules for when these keys are used, but they help with some of the convenience methods available on all Expression types: - `Expression.this`: shorthand for `self.args.get("this")` - `Expression.expression`: similarly, shorthand for the expression arg - `Expression.expressions`: similarly, shorthand for the expressions list arg - `Expression.name`: text name for whatever `this` is `arg_types` don't specify the possible Expression types of children. This can be a challenge when you are writing code to traverse a particular AST and you don't know what to expect. A common trick is to parse an example query and print out the `repr`. You can traverse an AST using just args, but there are some higher-order functions for programmatic traversal. > [!NOTE] > SQLGlot can parse and generate SQL for many different dialects. However, there is only a single set of Expression types for all dialects. We like to say that the AST can represent the _superset_ of all dialects. > > Sometimes, SQLGlot will parse SQL from a dialect into Expression types you didn't expect: > > ```python > ast = parse_one("SELECT NOW()", dialect="postgres") > > repr(ast) > # Select( > # expressions=[ > # CurrentTimestamp()]) > ``` > > This is because SQLGlot tries to converge dialects on a standard AST. This means you can often write one piece of code that handles multiple dialects. ## Traversing the AST Analyzing a SQL statement requires traversing this data structure. There are a few ways to do this: ### Args If you know the structure of an AST, you can use `Expression.args` just like above. However, this can be very limited if you're dealing with arbitrary SQL. ### Walk methods The walk methods of `Expression` (`find`, `find_all`, and `walk`) are the simplest way to analyze an AST. `find` and `find_all` search an AST for specific Expression types: ```python from sqlglot import exp ast.find(exp.Select) # Select( # expressions=[ # Column( # this=Identifier(this=a, quoted=False))], # ... list(ast.find_all(exp.Select)) # [Select( # expressions=[ # Column( # this=Identifier(this=a, quoted=False))], # ... ``` Both `find` and `find_all` are built on `walk`, which gives finer grained control: ```python for node in ast.walk(): ... ``` > [!WARNING] > Here's a common pitfall of the walk methods: > ```python > ast.find_all(exp.Table) > ``` > At first glance, this seems like a great way to find all tables in a query. However, `Table` instances are not always tables in your database. Here's an example where this fails: > ```python > ast = parse_one(""" > WITH x AS ( > SELECT a FROM y > ) > SELECT a FROM x > """) > > # This is NOT a good way to find all tables in the query! > for table in ast.find_all(exp.Table): > print(table) > > # x -- this is a common table expression, NOT an actual table > # y > ``` > > For programmatic traversal of ASTs that requires deeper semantic understanding of a query, you need "scope". ### Scope Scope is a traversal module that handles more semantic context of SQL queries. It's harder to use than the `walk` methods but is more powerful: ```python from sqlglot.optimizer.scope import build_scope ast = parse_one(""" WITH x AS ( SELECT a FROM y ) SELECT a FROM x """) root = build_scope(ast) for scope in root.traverse(): print(scope) # Scope # y.c => Scope # y.b => Scope
Into: SELECT T.*, FROM
AS T """ if not isinstance(expression, exp.Select): return expression select_expressions = expression.expressions or [] def is_bare_star(expr: exp.Expr) -> bool: return isinstance(expr, exp.Star) and expr.this is None has_other_expression = False bare_star_expr: exp.Expr | None = None for expr in select_expressions: has_bare_star = is_bare_star(expr) if has_bare_star and bare_star_expr is None: bare_star_expr = expr elif not has_bare_star: has_other_expression = True if bare_star_expr and has_other_expression: break if not (bare_star_expr and has_other_expression): return expression scope = build_scope(expression) if not scope or not scope.selected_sources: return expression table_identifiers: list[exp.Identifier] = [] for source_name, (source_expr, _) in scope.selected_sources.items(): ident = ( source_expr.this.copy() if isinstance(source_expr, exp.Table) and isinstance(source_expr.this, exp.Identifier) else exp.to_identifier(source_name) ) table_identifiers.append(ident) qualified_star_columns = [ exp.Column(this=bare_star_expr.copy(), table=ident) for ident in table_identifiers ] new_select_expressions: list[exp.Expr] = [] for select_expr in select_expressions: new_select_expressions.extend(qualified_star_columns) if is_bare_star( select_expr ) else new_select_expressions.append(select_expr) expression.set("expressions", new_select_expressions) return expression def _add_date_sql(self: Exasol.Generator, expression: DATE_ADD_OR_SUB) -> str: interval = expression.expression if isinstance(expression.expression, exp.Interval) else None unit = ( (interval.text("unit") or "DAY").upper() if interval is not None else (expression.text("unit") or "DAY").upper() ) if unit not in DATE_UNITS: self.unsupported(f"'{unit}' is not supported in Exasol.") return self.function_fallback_sql(expression) offset_expr: exp.Expr = expression.expression if interval is not None: offset_expr = interval.this if isinstance(expression, exp.DateSub): offset_expr = exp.Neg(this=offset_expr) return self.func(f"ADD_{unit}S", expression.this, offset_expr) def _group_by_all(expression: exp.Expr) -> exp.Expr: if not isinstance(expression, exp.Select): return expression group = expression.args.get("group") if not group or not group.args.get("all"): return expression if expression.is_star: if any(proj.find(exp.AggFunc) for proj in expression.expressions): raise UnsupportedError( "GROUP BY ALL with star projection and aggregates is not supported by Exasol" ) expression.set("distinct", exp.Distinct()) expression.set("group", None) return expression group_positions = [ exp.Literal.number(i) for i, proj in enumerate(expression.expressions, start=1) if not proj.find(exp.AggFunc) ] if not group_positions: expression.set("group", None) return expression group.set("expressions", group_positions) group.set("all", None) return expression class Exasol(Dialect): # https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm#SQLidentifier NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE # https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm SUPPORTS_USER_DEFINED_TYPES = False # https://docs.exasol.com/db/latest/sql/select.htm SUPPORTS_COLUMN_JOIN_MARKS = True NULL_ORDERING = "nulls_are_last" # https://docs.exasol.com/db/latest/sql_references/literals.htm#StringLiterals CONCAT_COALESCE = True TIME_MAPPING = { "yyyy": "%Y", "YYYY": "%Y", "yy": "%y", "YY": "%y", "mm": "%m", "MM": "%m", "MONTH": "%B", "MON": "%b", "dd": "%d", "DD": "%d", "DAY": "%A", "DY": "%a", "H12": "%I", "H24": "%H", "HH": "%H", "ID": "%u", "vW": "%V", "IW": "%V", "vYYY": "%G", "IYYY": "%G", "MI": "%M", "SS": "%S", "uW": "%W", "UW": "%U", "Z": "%z", } class Tokenizer(tokens.Tokenizer): IDENTIFIERS = ['"', ("[", "]")] KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "USER": TokenType.CURRENT_USER, # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/if.htm "ENDIF": TokenType.END, "LONG VARCHAR": TokenType.TEXT, "REGEXP_LIKE": TokenType.RLIKE, "SEPARATOR": TokenType.SEPARATOR, "SYSTIMESTAMP": TokenType.SYSTIMESTAMP, } KEYWORDS.pop("DIV") Parser = ExasolParser class Generator(generator.Generator): # https://docs.exasol.com/db/latest/sql_references/data_types/datatypedetails.htm#StringDataType STRING_TYPE_MAPPING = { exp.DType.BLOB: "VARCHAR", exp.DType.LONGBLOB: "VARCHAR", exp.DType.LONGTEXT: "VARCHAR", exp.DType.MEDIUMBLOB: "VARCHAR", exp.DType.MEDIUMTEXT: "VARCHAR", exp.DType.TINYBLOB: "VARCHAR", exp.DType.TINYTEXT: "VARCHAR", # https://docs.exasol.com/db/latest/sql_references/data_types/datatypealiases.htm exp.DType.TEXT: "LONG VARCHAR", exp.DType.VARBINARY: "VARCHAR", } # https://docs.exasol.com/db/latest/sql_references/data_types/datatypealiases.htm TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, **STRING_TYPE_MAPPING, exp.DType.TINYINT: "SMALLINT", exp.DType.MEDIUMINT: "INT", exp.DType.DECIMAL32: "DECIMAL", exp.DType.DECIMAL64: "DECIMAL", exp.DType.DECIMAL128: "DECIMAL", exp.DType.DECIMAL256: "DECIMAL", exp.DType.DATETIME: "TIMESTAMP", exp.DType.TIMESTAMPTZ: "TIMESTAMP", exp.DType.TIMESTAMPLTZ: "TIMESTAMP", exp.DType.TIMESTAMPNTZ: "TIMESTAMP", } def datatype_sql(self, expression: exp.DataType) -> str: # Exasol supports a fixed default precision of 3 for TIMESTAMP WITH LOCAL TIME ZONE # and does not allow specifying a different custom precision if expression.is_type(exp.DType.TIMESTAMPLTZ): return "TIMESTAMP WITH LOCAL TIME ZONE" return super().datatype_sql(expression) TRANSFORMS = { **generator.Generator.TRANSFORMS, # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm exp.All: rename_func("EVERY"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_and.htm exp.BitwiseAnd: rename_func("BIT_AND"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_or.htm exp.BitwiseOr: rename_func("BIT_OR"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_not.htm exp.BitwiseNot: rename_func("BIT_NOT"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_lshift.htm exp.BitwiseLeftShift: rename_func("BIT_LSHIFT"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_rshift.htm exp.BitwiseRightShift: rename_func("BIT_RSHIFT"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_xor.htm exp.BitwiseXor: rename_func("BIT_XOR"), exp.CurrentSchema: lambda *_: "CURRENT_SCHEMA", exp.DateDiff: _date_diff_sql, exp.DateAdd: _add_date_sql, exp.TsOrDsAdd: _add_date_sql, exp.DateSub: _add_date_sql, # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/div.htm#DIV exp.IntDiv: rename_func("DIV"), exp.TsOrDsDiff: _date_diff_sql, exp.DateTrunc: _date_trunc_sql, exp.DayOfWeek: lambda self, e: f"CAST(TO_CHAR({self.sql(e, 'this')}, 'D') AS INTEGER)", exp.DatetimeTrunc: timestamptrunc_sql(), exp.GroupConcat: lambda self, e: groupconcat_sql( self, e, func_name="LISTAGG", within_group=True ), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/edit_distance.htm#EDIT_DISTANCE exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( rename_func("EDIT_DISTANCE") ), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/mod.htm exp.Mod: rename_func("MOD"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/from_posix_time.htm exp.UnixToTime: lambda self, e: self.func("FROM_POSIX_TIME", e.this), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/rank.htm exp.Rank: unsupported_args("expressions")(lambda *_: "RANK()"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/dense_rank.htm exp.DenseRank: unsupported_args("expressions")(lambda *_: "DENSE_RANK()"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/regexp_substr.htm exp.RegexpExtract: unsupported_args("parameters", "group")( rename_func("REGEXP_SUBSTR") ), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/regexp_replace.htm exp.RegexpReplace: unsupported_args("modifiers")(rename_func("REGEXP_REPLACE")), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/var_pop.htm exp.VariancePop: rename_func("VAR_POP"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/approximate_count_distinct.htm exp.ApproxDistinct: unsupported_args("accuracy")( rename_func("APPROXIMATE_COUNT_DISTINCT") ), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/to_char%20(datetime).htm exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), exp.ToChar: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/to_date.htm exp.TsOrDsToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), exp.TimeStrToTime: timestrtotime_sql, exp.TimestampTrunc: _timestamp_trunc_sql, exp.StrToTime: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), exp.CurrentUser: lambda *_: "CURRENT_USER", exp.AtTimeZone: lambda self, e: self.func( "CONVERT_TZ", e.this, "'UTC'", e.args.get("zone"), ), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/instr.htm exp.StrPosition: lambda self, e: strposition_sql( self, e, func_name="INSTR", supports_position=True, supports_occurrence=True ), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/hash_sha%5B1%5D.htm#HASH_SHA%5B1%5D exp.SHA: rename_func("HASH_SHA"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/hash_sha256.htm # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/hash_sha512.htm exp.SHA2: _sha2_sql, exp.MD5: rename_func("HASH_MD5"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/hashtype_md5.htm exp.MD5Digest: rename_func("HASHTYPE_MD5"), # https://docs.exasol.com/db/latest/sql/create_view.htm exp.CommentColumnConstraint: lambda self, e: f"COMMENT IS {self.sql(e, 'this')}", exp.Select: transforms.preprocess( [ _qualify_unscoped_star, _add_local_prefix_for_aliases, _group_by_all, ] ), exp.SubstringIndex: _substring_index_sql, exp.WeekOfYear: rename_func("WEEK"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/to_date.htm exp.Date: rename_func("TO_DATE"), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/to_timestamp.htm exp.Timestamp: rename_func("TO_TIMESTAMP"), exp.Quarter: lambda self, e: f"CEIL(MONTH(TO_DATE({self.sql(e, 'this')}))/3)", exp.LastDay: no_last_day_sql, } # https://docs.exasol.com/db/7.1/sql_references/system_tables/metadata/exa_sql_keywords.htm RESERVED_KEYWORDS = { "absolute", "action", "add", "after", "all", "allocate", "alter", "and", "any", "append", "are", "array", "as", "asc", "asensitive", "assertion", "at", "attribute", "authid", "authorization", "before", "begin", "between", "bigint", "binary", "bit", "blob", "blocked", "bool", "boolean", "both", "by", "byte", "call", "called", "cardinality", "cascade", "cascaded", "case", "casespecific", "cast", "catalog", "chain", "char", "character", "character_set_catalog", "character_set_name", "character_set_schema", "characteristics", "check", "checked", "clob", "close", "coalesce", "collate", "collation", "collation_catalog", "collation_name", "collation_schema", "column", "commit", "condition", "connect_by_iscycle", "connect_by_isleaf", "connect_by_root", "connection", "constant", "constraint", "constraint_state_default", "constraints", "constructor", "contains", "continue", "control", "convert", "corresponding", "create", "cs", "csv", "cube", "current", "current_cluster", "current_cluster_uid", "current_date", "current_path", "current_role", "current_schema", "current_session", "current_statement", "current_time", "current_timestamp", "current_user", "cursor", "cycle", "data", "datalink", "datetime_interval_code", "datetime_interval_precision", "day", "dbtimezone", "deallocate", "dec", "decimal", "declare", "default", "default_like_escape_character", "deferrable", "deferred", "defined", "definer", "delete", "deref", "derived", "desc", "describe", "descriptor", "deterministic", "disable", "disabled", "disconnect", "dispatch", "distinct", "dlurlcomplete", "dlurlpath", "dlurlpathonly", "dlurlscheme", "dlurlserver", "dlvalue", "do", "domain", "double", "drop", "dynamic", "dynamic_function", "dynamic_function_code", "each", "else", "elseif", "elsif", "emits", "enable", "enabled", "end", "end-exec", "endif", "enforce", "equals", "errors", "escape", "except", "exception", "exec", "execute", "exists", "exit", "export", "external", "extract", "false", "fbv", "fetch", "file", "final", "first", "float", "following", "for", "forall", "force", "format", "found", "free", "from", "fs", "full", "function", "general", "generated", "geometry", "get", "global", "go", "goto", "grant", "granted", "group", "group_concat", "grouping", "groups", "hashtype", "hashtype_format", "having", "high", "hold", "hour", "identity", "if", "ifnull", "immediate", "impersonate", "implementation", "import", "in", "index", "indicator", "inner", "inout", "input", "insensitive", "insert", "instance", "instantiable", "int", "integer", "integrity", "intersect", "interval", "into", "inverse", "invoker", "is", "iterate", "join", "key_member", "key_type", "large", "last", "lateral", "ldap", "leading", "leave", "left", "level", "like", "limit", "listagg", "localtime", "localtimestamp", "locator", "log", "longvarchar", "loop", "low", "map", "match", "matched", "merge", "method", "minus", "minute", "mod", "modifies", "modify", "module", "month", "names", "national", "natural", "nchar", "nclob", "new", "next", "nls_date_format", "nls_date_language", "nls_first_day_of_week", "nls_numeric_characters", "nls_timestamp_format", "no", "nocycle", "nologging", "none", "not", "null", "nullif", "number", "numeric", "nvarchar", "nvarchar2", "object", "of", "off", "old", "on", "only", "open", "option", "options", "or", "order", "ordering", "ordinality", "others", "out", "outer", "output", "over", "overlaps", "overlay", "overriding", "pad", "parallel_enable", "parameter", "parameter_specific_catalog", "parameter_specific_name", "parameter_specific_schema", "parquet", "partial", "path", "permission", "placing", "plus", "preceding", "preferring", "prepare", "preserve", "prior", "privileges", "procedure", "profile", "qualify", "random", "range", "read", "reads", "real", "recovery", "recursive", "ref", "references", "referencing", "refresh", "regexp_like", "relative", "release", "rename", "repeat", "replace", "restore", "restrict", "result", "return", "returned_length", "returned_octet_length", "returns", "revoke", "right", "rollback", "rollup", "routine", "row", "rows", "rowtype", "savepoint", "schema", "scope", "scope_user", "script", "scroll", "search", "second", "section", "security", "select", "selective", "self", "sensitive", "separator", "sequence", "session", "session_user", "sessiontimezone", "set", "sets", "shortint", "similar", "smallint", "some", "source", "space", "specific", "specifictype", "sql", "sql_bigint", "sql_bit", "sql_char", "sql_date", "sql_decimal", "sql_double", "sql_float", "sql_integer", "sql_longvarchar", "sql_numeric", "sql_preprocessor_script", "sql_real", "sql_smallint", "sql_timestamp", "sql_tinyint", "sql_type_date", "sql_type_timestamp", "sql_varchar", "sqlexception", "sqlstate", "sqlwarning", "start", "state", "statement", "static", "structure", "style", "substring", "subtype", "sysdate", "system", "system_user", "systimestamp", "table", "temporary", "text", "then", "time", "timestamp", "timezone_hour", "timezone_minute", "tinyint", "to", "trailing", "transaction", "transform", "transforms", "translation", "treat", "trigger", "trim", "true", "truncate", "under", "union", "unique", "unknown", "unlink", "unnest", "until", "update", "usage", "user", "using", "value", "values", "varchar", "varchar2", "varray", "verify", "view", "when", "whenever", "where", "while", "window", "with", "within", "without", "work", "year", "yes", "zone", } def converttimezone_sql(self, expression: exp.ConvertTimezone) -> str: from_tz = expression.args.get("source_tz") to_tz = expression.args.get("target_tz") datetime = expression.args.get("timestamp") options = expression.args.get("options") return self.func("CONVERT_TZ", datetime, from_tz, to_tz, options) def if_sql(self, expression: exp.If) -> str: this = self.sql(expression, "this") true = self.sql(expression, "true") false = self.sql(expression, "false") return f"IF {this} THEN {true} ELSE {false} ENDIF" def collate_sql(self, expression: exp.Collate) -> str: return self.sql(expression.this) def jsonextract_sql(self, expression: exp.JSONExtract) -> str: sql = self.func( "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions ) emits = self.sql(expression, "emits") if emits: sql = f"{sql} EMITS {emits}" return sql @unsupported_args("flag") def regexplike_sql(self, expression: exp.RegexpLike) -> str: if not expression.args.get("full_match"): pattern = expression.expression if pattern.is_string: expression.set("expression", exp.Literal.string(f".*{pattern.name}.*")) else: expression.set( "expression", exp.Paren( this=exp.Concat( expressions=[ exp.Literal.string(".*"), pattern, exp.Literal.string(".*"), ] ) ), ) return self.binary(expression, "REGEXP_LIKE") ================================================ FILE: sqlglot/dialects/fabric.py ================================================ from __future__ import annotations from sqlglot import exp, transforms from sqlglot.dialects.dialect import NormalizationStrategy from sqlglot.dialects.tsql import TSQL from sqlglot.parsers.fabric import FabricParser from sqlglot.tokens import TokenType def _cap_data_type_precision(expression: exp.DataType, max_precision: int = 6) -> exp.DataType: """ Cap the precision of to a maximum of `max_precision` digits. If no precision is specified, default to `max_precision`. """ precision_param = expression.find(exp.DataTypeParam) if precision_param and precision_param.this.is_int: current_precision = precision_param.this.to_py() target_precision = min(current_precision, max_precision) else: target_precision = max_precision return exp.DataType( this=expression.this, expressions=[exp.DataTypeParam(this=exp.Literal.number(target_precision))], ) def _add_default_precision_to_varchar(expression: exp.Expr) -> exp.Expr: """Transform function to add VARCHAR(MAX) or CHAR(MAX) for cross-dialect conversion.""" if ( isinstance(expression, exp.Create) and expression.kind == "TABLE" and isinstance(expression.this, exp.Schema) ): for column in expression.this.expressions: if isinstance(column, exp.ColumnDef): column_type = column.kind if ( isinstance(column_type, exp.DataType) and column_type.this in (exp.DType.VARCHAR, exp.DType.CHAR) and not column_type.expressions ): # For transpilation, VARCHAR/CHAR without precision becomes VARCHAR(MAX)/CHAR(MAX) column_type.set("expressions", [exp.var("MAX")]) return expression class Fabric(TSQL): """ Microsoft Fabric Data Warehouse dialect that inherits from T-SQL. Microsoft Fabric is a cloud-based analytics platform that provides a unified data warehouse experience. While it shares much of T-SQL's syntax, it has specific differences and limitations that this dialect addresses. Key differences from T-SQL: - Case-sensitive identifiers (unlike T-SQL which is case-insensitive) - Limited data type support with mappings to supported alternatives - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents References: - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area """ # Fabric is case-sensitive unlike T-SQL which is case-insensitive NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE class Tokenizer(TSQL.Tokenizer): # Override T-SQL tokenizer to handle TIMESTAMP differently # In T-SQL, TIMESTAMP is a synonym for ROWVERSION, but in Fabric we want it to be a datetime type # Also add UTINYINT keyword mapping since T-SQL doesn't have it KEYWORDS = { **TSQL.Tokenizer.KEYWORDS, "TIMESTAMP": TokenType.TIMESTAMP, "UTINYINT": TokenType.UTINYINT, } Parser = FabricParser class Generator(TSQL.Generator): # Fabric-specific type mappings - override T-SQL types that aren't supported # Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types TYPE_MAPPING = { **TSQL.Generator.TYPE_MAPPING, exp.DType.DATETIME: "DATETIME2", exp.DType.DECIMAL: "DECIMAL", exp.DType.IMAGE: "VARBINARY", exp.DType.INT: "INT", exp.DType.JSON: "VARCHAR", exp.DType.MONEY: "DECIMAL", exp.DType.NCHAR: "CHAR", exp.DType.NVARCHAR: "VARCHAR", exp.DType.ROWVERSION: "ROWVERSION", exp.DType.SMALLDATETIME: "DATETIME2", exp.DType.SMALLMONEY: "DECIMAL", exp.DType.TIMESTAMP: "DATETIME2", exp.DType.TIMESTAMPNTZ: "DATETIME2", exp.DType.TIMESTAMPTZ: "DATETIME2", exp.DType.TINYINT: "SMALLINT", exp.DType.UTINYINT: "SMALLINT", exp.DType.UUID: "UNIQUEIDENTIFIER", exp.DType.XML: "VARCHAR", } TRANSFORMS = { **TSQL.Generator.TRANSFORMS, exp.Create: transforms.preprocess([_add_default_precision_to_varchar]), } def datatype_sql(self, expression: exp.DataType) -> str: # Check if this is a temporal type that needs precision handling. Fabric limits temporal # types to max 6 digits precision. When no precision is specified, we default to 6 digits. if ( expression.is_type(*exp.DataType.TEMPORAL_TYPES) and expression.this != exp.DType.DATE ): # Create a new expression with the capped precision expression = _cap_data_type_precision(expression) return super().datatype_sql(expression) def cast_sql(self, expression: exp.Cast, safe_prefix: str | None = None) -> str: # Cast to DATETIMEOFFSET if inside an AT TIME ZONE expression # https://learn.microsoft.com/en-us/sql/t-sql/data-types/datetimeoffset-transact-sql#microsoft-fabric-support if expression.is_type(exp.DType.TIMESTAMPTZ): at_time_zone = expression.find_ancestor(exp.AtTimeZone, exp.Select) # Return normal cast, if the expression is not in an AT TIME ZONE context if not isinstance(at_time_zone, exp.AtTimeZone): return super().cast_sql(expression, safe_prefix) # Get the precision from the original TIMESTAMPTZ cast and cap it to 6 capped_data_type = _cap_data_type_precision(expression.to, max_precision=6) precision = capped_data_type.find(exp.DataTypeParam) precision_value = ( precision.this.to_py() if precision and precision.this.is_int else 6 ) # Do the cast explicitly to bypass sqlglot's default handling datetimeoffset = f"CAST({expression.this} AS DATETIMEOFFSET({precision_value}))" return self.sql(datetimeoffset) return super().cast_sql(expression, safe_prefix) def attimezone_sql(self, expression: exp.AtTimeZone) -> str: # Wrap the AT TIME ZONE expression in a cast to DATETIME2 if it contains a TIMESTAMPTZ ## https://learn.microsoft.com/en-us/sql/t-sql/data-types/datetimeoffset-transact-sql#microsoft-fabric-support timestamptz_cast = expression.find(exp.Cast) if timestamptz_cast and timestamptz_cast.to.is_type(exp.DType.TIMESTAMPTZ): # Get the precision from the original TIMESTAMPTZ cast and cap it to 6 data_type = timestamptz_cast.to capped_data_type = _cap_data_type_precision(data_type, max_precision=6) precision_param = capped_data_type.find(exp.DataTypeParam) precision = precision_param.this.to_py() if precision_param else 6 # Generate the AT TIME ZONE expression (which will handle the inner cast conversion) at_time_zone_sql = super().attimezone_sql(expression) # Wrap it in an outer cast to DATETIME2 return f"CAST({at_time_zone_sql} AS DATETIME2({precision}))" return super().attimezone_sql(expression) def unixtotime_sql(self, expression: exp.UnixToTime) -> str: scale = expression.args.get("scale") timestamp = expression.this if scale not in (None, exp.UnixToTime.SECONDS): self.unsupported(f"UnixToTime scale {scale} is not supported by Fabric") return "" # Convert unix timestamp (seconds) to microseconds and round to avoid decimals microseconds = timestamp * exp.Literal.number("1e6") rounded = exp.func("round", microseconds, 0) rounded_ms_as_bigint = exp.cast(rounded, exp.DType.BIGINT) # Create the base datetime as '1970-01-01' cast to DATETIME2(6) epoch_start = exp.cast("'1970-01-01'", "datetime2(6)", dialect="fabric") dateadd = exp.DateAdd( this=epoch_start, expression=rounded_ms_as_bigint, unit=exp.Literal.string("MICROSECONDS"), ) return self.sql(dateadd) ================================================ FILE: sqlglot/dialects/hive.py ================================================ from __future__ import annotations import re import typing as t from copy import deepcopy from functools import partial from collections import defaultdict from sqlglot import exp, generator, jsonpath, tokens, transforms from sqlglot.dialects.dialect import ( DATE_ADD_OR_SUB, Dialect, NormalizationStrategy, approx_count_distinct_sql, arg_max_or_min_no_count, datestrtodate_sql, if_sql, is_parse_json, left_to_substring_sql, max_or_greatest, min_or_least, no_ilike_sql, no_recursive_cte_sql, no_trycast_sql, regexp_extract_sql, regexp_replace_sql, rename_func, right_to_substring_sql, strposition_sql, struct_extract_sql, time_format, timestrtotime_sql, trim_sql, unit_to_str, var_map_sql, sequence_sql, property_sql, ) from sqlglot.transforms import ( remove_unique_constraints, ctas_with_tmp_tables_to_create_tmp_view, preprocess, move_schema_columns_to_partitioned_by, ) from sqlglot.parsers.hive import HiveParser from sqlglot.tokens import TokenType from sqlglot.generator import unsupported_args from sqlglot.optimizer.annotate_types import TypeAnnotator from sqlglot.typing.hive import EXPRESSION_METADATA # (FuncType, Multiplier) DATE_DELTA_INTERVAL = { "YEAR": ("ADD_MONTHS", 12), "MONTH": ("ADD_MONTHS", 1), "QUARTER": ("ADD_MONTHS", 3), "WEEK": ("DATE_ADD", 7), "DAY": ("DATE_ADD", 1), } TIME_DIFF_FACTOR = { "MILLISECOND": " * 1000", "SECOND": "", "MINUTE": " / 60", "HOUR": " / 3600", } DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str: if isinstance(expression, exp.TsOrDsAdd) and not expression.unit: return self.func("DATE_ADD", expression.this, expression.expression) unit = expression.text("unit").upper() func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) if isinstance(expression, exp.DateSub): multiplier *= -1 increment = expression.expression if isinstance(increment, exp.Literal): value = increment.to_py() if increment.is_number else int(increment.name) increment = exp.Literal.number(value * multiplier) elif multiplier != 1: increment *= exp.Literal.number(multiplier) return self.func(func, expression.this, increment) def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str: unit = expression.text("unit").upper() factor = TIME_DIFF_FACTOR.get(unit) if factor is not None: left = self.sql(expression, "this") right = self.sql(expression, "expression") sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" return f"({sec_diff}){factor}" if factor else sec_diff months_between = unit in DIFF_MONTH_SWITCH sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" if months_between or multiplier_sql: # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part. # For the same reason, we want to truncate if there's a divisor present. diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)" return diff_sql def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: this = expression.this if is_parse_json(this): if this.this.is_string: # Since FROM_JSON requires a nested type, we always wrap the json string with # an array to ensure that "naked" strings like "'a'" will be handled correctly wrapped_json = exp.Literal.string(f"[{this.this.name}]") from_json = self.func( "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) ) to_json = self.func("TO_JSON", from_json) # This strips the [, ] delimiters of the dummy array printed by TO_JSON return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") return self.sql(this) return self.func("TO_JSON", this, expression.args.get("options")) @generator.unsupported_args(("expression", "Hive's SORT_ARRAY does not support a comparator.")) def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: return self.func("SORT_ARRAY", expression.this) def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) def _unix_to_time_sql(self: Hive.Generator, expression: exp.UnixToTime) -> str: timestamp = self.sql(expression, "this") scale = expression.args.get("scale") if scale in (None, exp.UnixToTime.SECONDS): return rename_func("FROM_UNIXTIME")(self, expression) return f"FROM_UNIXTIME({timestamp} / POW(10, {scale}))" def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: this = self.sql(expression, "this") time_format = self.format_time(expression) if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" return f"CAST({this} AS DATE)" def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: this = self.sql(expression, "this") time_format = self.format_time(expression) if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" return f"CAST({this} AS TIMESTAMP)" def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: time_format = self.format_time(expression) if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): return self.func("TO_DATE", expression.this, time_format) if isinstance(expression.parent, self.TS_OR_DS_EXPRESSIONS): return self.sql(expression, "this") return self.func("TO_DATE", expression.this) class Hive(Dialect): ALIAS_POST_TABLESAMPLE = True IDENTIFIERS_CAN_START_WITH_DIGIT = True SUPPORTS_USER_DEFINED_TYPES = False SAFE_DIVISION = True ARRAY_AGG_INCLUDES_NULLS = None REGEXP_EXTRACT_DEFAULT_GROUP = 1 ALTER_TABLE_SUPPORTS_CASCADE = True # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE EXPRESSION_METADATA = EXPRESSION_METADATA.copy() # https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362046#LanguageManualUDF-StringFunctions # https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java#L266-L269 INITCAP_DEFAULT_DELIMITER_CHARS = " \t\n\r\f\u000b\u001c\u001d\u001e\u001f" # Support only the non-ANSI mode (default for Hive, Spark2, Spark) COERCES_TO = defaultdict(set, deepcopy(TypeAnnotator.COERCES_TO)) for target_type in { *exp.DataType.NUMERIC_TYPES, *exp.DataType.TEMPORAL_TYPES, exp.DType.INTERVAL, }: COERCES_TO[target_type] |= exp.DataType.TEXT_TYPES TIME_MAPPING = { "y": "%Y", "Y": "%Y", "YYYY": "%Y", "yyyy": "%Y", "YY": "%y", "yy": "%y", "MMMM": "%B", "MMM": "%b", "MM": "%m", "M": "%-m", "dd": "%d", "d": "%-d", "HH": "%H", "H": "%-H", "hh": "%I", "h": "%-I", "mm": "%M", "m": "%-M", "ss": "%S", "s": "%-S", "SSSSSS": "%f", "a": "%p", "DD": "%j", "D": "%-j", "E": "%a", "EE": "%a", "EEE": "%a", "EEEE": "%A", "z": "%Z", "Z": "%z", } DATE_FORMAT = "'yyyy-MM-dd'" DATEINT_FORMAT = "'yyyyMMdd'" TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" class JSONPathTokenizer(jsonpath.JSONPathTokenizer): VAR_TOKENS = { *jsonpath.JSONPathTokenizer.VAR_TOKENS, TokenType.DASH, } class Tokenizer(tokens.Tokenizer): QUOTES = ["'", '"'] IDENTIFIERS = ["`"] STRING_ESCAPES = ["\\"] SINGLE_TOKENS = { **tokens.Tokenizer.SINGLE_TOKENS, "$": TokenType.PARAMETER, } KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "ADD ARCHIVE": TokenType.COMMAND, "ADD ARCHIVES": TokenType.COMMAND, "ADD FILE": TokenType.COMMAND, "ADD FILES": TokenType.COMMAND, "ADD JAR": TokenType.COMMAND, "ADD JARS": TokenType.COMMAND, "MINUS": TokenType.EXCEPT, "MSCK REPAIR": TokenType.COMMAND, "REFRESH": TokenType.REFRESH, "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, "VERSION AS OF": TokenType.VERSION_SNAPSHOT, "SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, } NUMERIC_LITERALS = { "L": "BIGINT", "S": "SMALLINT", "Y": "TINYINT", "D": "DOUBLE", "F": "FLOAT", "BD": "DECIMAL", } Parser = HiveParser class Generator(generator.Generator): LIMIT_FETCH = "LIMIT" TABLESAMPLE_WITH_METHOD = False JOIN_HINTS = False TABLE_HINTS = False QUERY_HINTS = False INDEX_ON = "ON TABLE" EXTRACT_ALLOWS_QUOTES = False NVL2_SUPPORTED = False LAST_DAY_SUPPORTS_DATE_PART = False JSON_PATH_SINGLE_QUOTE_ESCAPE = True SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") SUPPORTS_TO_NUMBER = False WITH_PROPERTIES_PREFIX = "TBLPROPERTIES" PARSE_JSON_NAME: t.Optional[str] = None PAD_FILL_PATTERN_IS_REQUIRED = True SUPPORTS_MEDIAN = False ARRAY_SIZE_NAME = "SIZE" ALTER_SET_TYPE = "" EXPRESSIONS_WITHOUT_NESTED_CTES = { exp.Insert, exp.Select, exp.Subquery, exp.SetOperation, } SUPPORTED_JSON_PATH_PARTS = { exp.JSONPathKey, exp.JSONPathRoot, exp.JSONPathSubscript, exp.JSONPathWildcard, } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.BIT: "BOOLEAN", exp.DType.BLOB: "BINARY", exp.DType.DATETIME: "TIMESTAMP", exp.DType.ROWVERSION: "BINARY", exp.DType.TEXT: "STRING", exp.DType.TIME: "TIMESTAMP", exp.DType.TIMESTAMPNTZ: "TIMESTAMP", exp.DType.TIMESTAMPTZ: "TIMESTAMP", exp.DType.UTINYINT: "SMALLINT", exp.DType.VARBINARY: "BINARY", } TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.Property: property_sql, exp.AnyValue: rename_func("FIRST"), exp.ApproxDistinct: approx_count_distinct_sql, exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), exp.ArrayConcat: rename_func("CONCAT"), exp.ArrayToString: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), exp.ArraySort: _array_sort_sql, exp.With: no_recursive_cte_sql, exp.DateAdd: _add_date_sql, exp.DateDiff: _date_diff_sql, exp.DateStrToDate: datestrtodate_sql, exp.DateSub: _add_date_sql, exp.DateToDi: lambda self, e: ( f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)" ), exp.DiToDate: lambda self, e: ( f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})" ), exp.StorageHandlerProperty: lambda self, e: f"STORED BY {self.sql(e, 'this')}", exp.FromBase64: rename_func("UNBASE64"), exp.GenerateSeries: sequence_sql, exp.GenerateDateArray: sequence_sql, exp.If: if_sql(), exp.ILike: no_ilike_sql, exp.IntDiv: lambda self, e: self.binary(e, "DIV"), exp.IsNan: rename_func("ISNAN"), exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression), exp.JSONExtractScalar: lambda self, e: self.func( "GET_JSON_OBJECT", e.this, e.expression ), exp.JSONFormat: _json_format_sql, exp.Left: left_to_substring_sql, exp.Map: var_map_sql, exp.Max: max_or_greatest, exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), exp.Min: min_or_least, exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), exp.NotNullColumnConstraint: lambda _, e: ( "" if e.args.get("allow_null") else "NOT NULL" ), exp.VarMap: var_map_sql, exp.Create: preprocess( [ remove_unique_constraints, ctas_with_tmp_tables_to_create_tmp_view, move_schema_columns_to_partitioned_by, ] ), exp.Quantile: rename_func("PERCENTILE"), exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), exp.RegexpExtract: regexp_extract_sql, exp.RegexpExtractAll: regexp_extract_sql, exp.RegexpReplace: regexp_replace_sql, exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), exp.RegexpSplit: rename_func("SPLIT"), exp.Right: right_to_substring_sql, exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), exp.Split: lambda self, e: self.func( "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression, "'\\\\E'") ), exp.Select: transforms.preprocess( [ transforms.eliminate_qualify, transforms.eliminate_distinct_on, partial(transforms.unnest_to_explode, unnest_using_arrays_zip=False), transforms.any_to_exists, ] ), exp.StrPosition: lambda self, e: strposition_sql( self, e, func_name="LOCATE", supports_position=True ), exp.StrToDate: _str_to_date_sql, exp.StrToTime: _str_to_time_sql, exp.StrToUnix: _str_to_unix_sql, exp.StructExtract: struct_extract_sql, exp.StarMap: rename_func("MAP"), exp.Table: transforms.preprocess([transforms.unnest_generate_series]), exp.TimeStrToDate: rename_func("TO_DATE"), exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimestampTrunc: lambda self, e: self.func("TRUNC", e.this, unit_to_str(e)), exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), exp.ToBase64: rename_func("BASE64"), exp.TsOrDiToDi: lambda self, e: ( f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)" ), exp.TsOrDsAdd: _add_date_sql, exp.TsOrDsDiff: _date_diff_sql, exp.TsOrDsToDate: _to_date_sql, exp.TryCast: no_trycast_sql, exp.Trim: trim_sql, exp.Unicode: rename_func("ASCII"), exp.UnixToStr: lambda self, e: self.func( "FROM_UNIXTIME", e.this, time_format("hive")(self, e) ), exp.UnixToTime: _unix_to_time_sql, exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), exp.Unnest: rename_func("EXPLODE"), exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", exp.NumberToStr: rename_func("FORMAT_NUMBER"), exp.National: lambda self, e: self.national_sql(e, prefix=""), exp.ClusteredColumnConstraint: lambda self, e: ( f"({self.expressions(e, 'this', indent=False)})" ), exp.NonClusteredColumnConstraint: lambda self, e: ( f"({self.expressions(e, 'this', indent=False)})" ), exp.NotForReplicationColumnConstraint: lambda *_: "", exp.OnProperty: lambda *_: "", exp.PartitionedByBucket: lambda self, e: self.func("BUCKET", e.expression, e.this), exp.PartitionByTruncate: lambda self, e: self.func("TRUNCATE", e.expression, e.this), exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY", exp.WeekOfYear: rename_func("WEEKOFYEAR"), exp.DayOfMonth: rename_func("DAYOFMONTH"), exp.DayOfWeek: rename_func("DAYOFWEEK"), exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( rename_func("LEVENSHTEIN") ), } PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, } TS_OR_DS_EXPRESSIONS: t.Tuple[t.Type[exp.Expr], ...] = ( exp.DateDiff, exp.Day, exp.Month, exp.Year, ) IGNORE_NULLS_FUNCS = (exp.First, exp.Last, exp.FirstValue, exp.LastValue) def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: this = expression.this if isinstance(this, self.IGNORE_NULLS_FUNCS): return self.func(this.sql_name(), this.this, exp.true()) return super().ignorenulls_sql(expression) def unnest_sql(self, expression: exp.Unnest) -> str: return rename_func("EXPLODE")(self, expression) def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: if isinstance(expression.this, exp.JSONPathWildcard): self.unsupported("Unsupported wildcard in JSONPathKey expression") return "" return super()._jsonpathkey_sql(expression) def parameter_sql(self, expression: exp.Parameter) -> str: this = self.sql(expression, "this") expression_sql = self.sql(expression, "expression") parent = expression.parent this = f"{this}:{expression_sql}" if expression_sql else this if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): # We need to produce SET key = value instead of SET ${key} = value return this return f"${{{this}}}" def schema_sql(self, expression: exp.Schema) -> str: for ordered in expression.find_all(exp.Ordered): if ordered.args.get("desc") is False: ordered.set("desc", None) return super().schema_sql(expression) def constraint_sql(self, expression: exp.Constraint) -> str: for prop in list(expression.find_all(exp.Properties)): prop.pop() this = self.sql(expression, "this") expressions = self.expressions(expression, sep=" ", flat=True) return f"CONSTRAINT {this} {expressions}" def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: serde_props = self.sql(expression, "serde_properties") serde_props = f" {serde_props}" if serde_props else "" return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: return self.func( "COLLECT_LIST", expression.this.this if isinstance(expression.this, exp.Order) else expression.this, ) # Hive/Spark lack native numeric TRUNC. CAST to BIGINT truncates toward zero (not rounds). # Potential enhancement: a TRUNC_TEMPLATE using FLOOR/CEIL with scale (Spark 3.3+) # could preserve decimals: CASE WHEN x >= 0 THEN FLOOR(x, d) ELSE CEIL(x, d) END @unsupported_args("decimals") def trunc_sql(self, expression: exp.Trunc) -> str: return self.sql(exp.cast(expression.this, exp.DType.BIGINT)) def datatype_sql(self, expression: exp.DataType) -> str: if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and ( not expression.expressions or expression.expressions[0].name == "MAX" ): expression = exp.DataType.build("text") elif expression.is_type(exp.DType.TEXT) and expression.expressions: expression.set("this", exp.DType.VARCHAR) elif expression.this in exp.DataType.TEMPORAL_TYPES: expression = exp.DataType.build(expression.this) elif expression.is_type("float"): size_expression = expression.find(exp.DataTypeParam) if size_expression: size = int(size_expression.name) expression = ( exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") ) return super().datatype_sql(expression) def version_sql(self, expression: exp.Version) -> str: sql = super().version_sql(expression) return sql.replace("FOR ", "", 1) def struct_sql(self, expression: exp.Struct) -> str: values = [] for i, e in enumerate(expression.expressions): if isinstance(e, exp.PropertyEQ): self.unsupported("Hive does not support named structs.") values.append(e.expression) else: values.append(e) return self.func("STRUCT", *values) def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: return super().columndef_sql( expression, sep=( ": " if isinstance(expression.parent, exp.DataType) and expression.parent.is_type("struct") else sep ), ) def altercolumn_sql(self, expression: exp.AlterColumn) -> str: this = self.sql(expression, "this") new_name = self.sql(expression, "rename_to") or this dtype = self.sql(expression, "dtype") comment = ( f" COMMENT {self.sql(expression, 'comment')}" if self.sql(expression, "comment") else "" ) default = self.sql(expression, "default") visible = expression.args.get("visible") allow_null = expression.args.get("allow_null") drop = expression.args.get("drop") if any([default, drop, visible, allow_null, drop]): self.unsupported("Unsupported CHANGE COLUMN syntax") if not dtype: self.unsupported("CHANGE COLUMN without a type is not supported") return f"CHANGE COLUMN {this} {new_name} {dtype}{comment}" def renamecolumn_sql(self, expression: exp.RenameColumn) -> str: self.unsupported("Cannot rename columns without data type defined in Hive") return "" def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) exprs = f" {exprs}" if exprs else "" location = self.sql(expression, "location") location = f" LOCATION {location}" if location else "" file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") file_format = f" FILEFORMAT {file_format}" if file_format else "" serde = self.sql(expression, "serde") serde = f" SERDE {serde}" if serde else "" tags = self.expressions(expression, key="tag", flat=True, sep="") tags = f" TAGS {tags}" if tags else "" return f"SET{serde}{exprs}{location}{file_format}{tags}" def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str: prefix = "WITH " if expression.args.get("with_") else "" exprs = self.expressions(expression, flat=True) return f"{prefix}SERDEPROPERTIES ({exprs})" def exists_sql(self, expression: exp.Exists) -> str: if expression.expression: return self.function_fallback_sql(expression) return super().exists_sql(expression) def timetostr_sql(self, expression: exp.TimeToStr) -> str: this = expression.this if isinstance(this, exp.TimeStrToTime): this = this.this return self.func("DATE_FORMAT", this, self.format_time(expression)) def fileformatproperty_sql(self, expression: exp.FileFormatProperty) -> str: if isinstance(expression.this, exp.InputOutputFormat): this = self.sql(expression, "this") else: this = expression.name.upper() return f"STORED AS {this}" ================================================ FILE: sqlglot/dialects/materialize.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.helper import seq_get from sqlglot.dialects.postgres import Postgres from sqlglot.parsers.materialize import MaterializeParser from sqlglot.transforms import ( remove_unique_constraints, ctas_with_tmp_tables_to_create_tmp_view, preprocess, ) class Materialize(Postgres): Parser = MaterializeParser class Generator(Postgres.Generator): SUPPORTS_CREATE_TABLE_LIKE = False SUPPORTS_BETWEEN_FLAGS = False TRANSFORMS = { **Postgres.Generator.TRANSFORMS, exp.AutoIncrementColumnConstraint: lambda self, e: "", exp.Create: preprocess( [ remove_unique_constraints, ctas_with_tmp_tables_to_create_tmp_view, ] ), exp.GeneratedAsIdentityColumnConstraint: lambda self, e: "", exp.OnConflict: lambda self, e: "", exp.PrimaryKeyColumnConstraint: lambda self, e: "", } TRANSFORMS.pop(exp.ToMap) def propertyeq_sql(self, expression: exp.PropertyEQ) -> str: return self.binary(expression, "=>") def datatype_sql(self, expression: exp.DataType) -> str: if expression.is_type(exp.DType.LIST): if expression.expressions: return f"{self.expressions(expression, flat=True)} LIST" return "LIST" if expression.is_type(exp.DType.MAP) and len(expression.expressions) == 2: key, value = expression.expressions return f"MAP[{self.sql(key)} => {self.sql(value)}]" return super().datatype_sql(expression) def list_sql(self, expression: exp.List) -> str: if isinstance(seq_get(expression.expressions, 0), exp.Select): return self.func("LIST", seq_get(expression.expressions, 0)) return f"{self.normalize_func('LIST')}[{self.expressions(expression, flat=True)}]" def tomap_sql(self, expression: exp.ToMap) -> str: if isinstance(expression.this, exp.Select): return self.func("MAP", expression.this) return f"{self.normalize_func('MAP')}[{self.expressions(expression.this)}]" ================================================ FILE: sqlglot/dialects/mysql.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, arrow_json_extract_sql, build_date_delta, build_date_delta_with_interval, date_add_interval_sql, datestrtodate_sql, length_or_char_length_sql, max_or_greatest, min_or_least, no_ilike_sql, no_paren_current_date_sql, no_pivot_sql, no_tablesample_sql, no_trycast_sql, rename_func, strposition_sql, unit_to_var, trim_sql, timestrtotime_sql, ) from sqlglot.generator import unsupported_args from sqlglot.parsers.mysql import MySQLParser from sqlglot.tokens import TokenType from sqlglot.typing.mysql import EXPRESSION_METADATA def _date_trunc_sql(self: MySQL.Generator, expression: exp.DateTrunc) -> str: expr = self.sql(expression, "this") unit = expression.text("unit").upper() if unit == "WEEK": concat = f"CONCAT(YEAR({expr}), ' ', WEEK({expr}, 1), ' 1')" date_format = "%Y %u %w" elif unit == "MONTH": concat = f"CONCAT(YEAR({expr}), ' ', MONTH({expr}), ' 1')" date_format = "%Y %c %e" elif unit == "QUARTER": concat = f"CONCAT(YEAR({expr}), ' ', QUARTER({expr}) * 3 - 2, ' 1')" date_format = "%Y %c %e" elif unit == "YEAR": concat = f"CONCAT(YEAR({expr}), ' 1 1')" date_format = "%Y %c %e" else: if unit != "DAY": self.unsupported(f"Unexpected interval unit: {unit}") return self.func("DATE", expr) return self.func("STR_TO_DATE", concat, f"'{date_format}'") def _str_to_date_sql( self: MySQL.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate ) -> str: return self.func("STR_TO_DATE", expression.this, self.format_time(expression)) def _unix_to_time_sql(self: MySQL.Generator, expression: exp.UnixToTime) -> str: scale = expression.args.get("scale") timestamp = expression.this if scale in (None, exp.UnixToTime.SECONDS): return self.func("FROM_UNIXTIME", timestamp, self.format_time(expression)) return self.func( "FROM_UNIXTIME", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), self.format_time(expression), ) def date_add_sql( kind: str, ) -> t.Callable[[generator.Generator, exp.Expr], str]: def func(self: generator.Generator, expression: exp.Expr) -> str: return self.func( f"DATE_{kind}", expression.this, exp.Interval(this=expression.expression, unit=unit_to_var(expression)), ) return func def _ts_or_ds_to_date_sql(self: MySQL.Generator, expression: exp.TsOrDsToDate) -> str: time_format = expression.args.get("format") return _str_to_date_sql(self, expression) if time_format else self.func("DATE", expression.this) def _remove_ts_or_ds_to_date( to_sql: t.Optional[t.Callable[[MySQL.Generator, exp.Expr], str]] = None, args: t.Tuple[str, ...] = ("this",), ) -> t.Callable[[MySQL.Generator, exp.Func], str]: def func(self: MySQL.Generator, expression: exp.Func) -> str: for arg_key in args: arg = expression.args.get(arg_key) if isinstance(arg, (exp.TsOrDsToDate, exp.TsOrDsToTimestamp)) and not arg.args.get( "format" ): expression.set(arg_key, arg.this) return to_sql(self, expression) if to_sql else self.function_fallback_sql(expression) return func class MySQL(Dialect): PROMOTE_TO_INFERRED_DATETIME_TYPE = True # https://dev.mysql.com/doc/refman/8.0/en/identifiers.html IDENTIFIERS_CAN_START_WITH_DIGIT = True # We default to treating all identifiers as case-sensitive, since it matches MySQL's # behavior on Linux systems. For MacOS and Windows systems, one can override this # setting by specifying `dialect="mysql, normalization_strategy = lowercase"`. # # See also https://dev.mysql.com/doc/refman/8.2/en/identifier-case-sensitivity.html NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE TIME_FORMAT = "'%Y-%m-%d %T'" DPIPE_IS_STRING_CONCAT = False SUPPORTS_USER_DEFINED_TYPES = False SAFE_DIVISION = True SAFE_TO_ELIMINATE_DOUBLE_NEGATION = False LEAST_GREATEST_IGNORES_NULLS = False EXPRESSION_METADATA = EXPRESSION_METADATA.copy() # https://prestodb.io/docs/current/functions/datetime.html#mysql-date-functions TIME_MAPPING = { "%M": "%B", "%c": "%-m", "%e": "%-d", "%h": "%I", "%i": "%M", "%s": "%S", "%u": "%W", "%k": "%-H", "%l": "%-I", "%T": "%H:%M:%S", "%W": "%A", } VALID_INTERVAL_UNITS = { *Dialect.VALID_INTERVAL_UNITS, "SECOND_MICROSECOND", "MINUTE_MICROSECOND", "MINUTE_SECOND", "HOUR_MICROSECOND", "HOUR_SECOND", "HOUR_MINUTE", "DAY_MICROSECOND", "DAY_SECOND", "DAY_MINUTE", "DAY_HOUR", "YEAR_MONTH", } class Tokenizer(tokens.Tokenizer): QUOTES = ["'", '"'] COMMENTS = ["--", "#", ("/*", "*/")] IDENTIFIERS = ["`"] STRING_ESCAPES = ["'", '"', "\\"] BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] # https://dev.mysql.com/doc/refman/8.4/en/string-literals.html ESCAPE_FOLLOW_CHARS = ["0", "b", "n", "r", "t", "Z", "%", "_"] NESTED_COMMENTS = False KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "BLOB": TokenType.BLOB, "CHARSET": TokenType.CHARACTER_SET, "DISTINCTROW": TokenType.DISTINCT, "EXPLAIN": TokenType.DESCRIBE, "FORCE": TokenType.FORCE, "IGNORE": TokenType.IGNORE, "KEY": TokenType.KEY, "LOCK TABLES": TokenType.COMMAND, "LONGBLOB": TokenType.LONGBLOB, "LONGTEXT": TokenType.LONGTEXT, "MEDIUMBLOB": TokenType.MEDIUMBLOB, "MEDIUMINT": TokenType.MEDIUMINT, "MEDIUMTEXT": TokenType.MEDIUMTEXT, "MEMBER OF": TokenType.MEMBER_OF, "MOD": TokenType.MOD, "SEPARATOR": TokenType.SEPARATOR, "SERIAL": TokenType.SERIAL, "SIGNED": TokenType.BIGINT, "SIGNED INTEGER": TokenType.BIGINT, "SOUNDS LIKE": TokenType.SOUNDS_LIKE, "START": TokenType.BEGIN, "TIMESTAMP": TokenType.TIMESTAMPTZ, "TINYBLOB": TokenType.TINYBLOB, "TINYTEXT": TokenType.TINYTEXT, "UNLOCK TABLES": TokenType.COMMAND, "UNSIGNED": TokenType.UBIGINT, "UNSIGNED INTEGER": TokenType.UBIGINT, "YEAR": TokenType.YEAR, "_ARMSCII8": TokenType.INTRODUCER, "_ASCII": TokenType.INTRODUCER, "_BIG5": TokenType.INTRODUCER, "_BINARY": TokenType.INTRODUCER, "_CP1250": TokenType.INTRODUCER, "_CP1251": TokenType.INTRODUCER, "_CP1256": TokenType.INTRODUCER, "_CP1257": TokenType.INTRODUCER, "_CP850": TokenType.INTRODUCER, "_CP852": TokenType.INTRODUCER, "_CP866": TokenType.INTRODUCER, "_CP932": TokenType.INTRODUCER, "_DEC8": TokenType.INTRODUCER, "_EUCJPMS": TokenType.INTRODUCER, "_EUCKR": TokenType.INTRODUCER, "_GB18030": TokenType.INTRODUCER, "_GB2312": TokenType.INTRODUCER, "_GBK": TokenType.INTRODUCER, "_GEOSTD8": TokenType.INTRODUCER, "_GREEK": TokenType.INTRODUCER, "_HEBREW": TokenType.INTRODUCER, "_HP8": TokenType.INTRODUCER, "_KEYBCS2": TokenType.INTRODUCER, "_KOI8R": TokenType.INTRODUCER, "_KOI8U": TokenType.INTRODUCER, "_LATIN1": TokenType.INTRODUCER, "_LATIN2": TokenType.INTRODUCER, "_LATIN5": TokenType.INTRODUCER, "_LATIN7": TokenType.INTRODUCER, "_MACCE": TokenType.INTRODUCER, "_MACROMAN": TokenType.INTRODUCER, "_SJIS": TokenType.INTRODUCER, "_SWE7": TokenType.INTRODUCER, "_TIS620": TokenType.INTRODUCER, "_UCS2": TokenType.INTRODUCER, "_UJIS": TokenType.INTRODUCER, # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html "_UTF8": TokenType.INTRODUCER, "_UTF16": TokenType.INTRODUCER, "_UTF16LE": TokenType.INTRODUCER, "_UTF32": TokenType.INTRODUCER, "_UTF8MB3": TokenType.INTRODUCER, "_UTF8MB4": TokenType.INTRODUCER, "@@": TokenType.SESSION_PARAMETER, } COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} - {TokenType.SHOW} Parser = MySQLParser class Generator(generator.Generator): INTERVAL_ALLOWS_PLURAL_FORM = False LOCKING_READS_SUPPORTED = True NULL_ORDERING_SUPPORTED: t.Optional[bool] = None JOIN_HINTS = False TABLE_HINTS = True DUPLICATE_KEY_UPDATE_WITH_SET = False QUERY_HINT_SEP = " " VALUES_AS_TABLE = False NVL2_SUPPORTED = False LAST_DAY_SUPPORTS_DATE_PART = False JSON_TYPE_REQUIRED_FOR_EXTRACTION = True JSON_PATH_BRACKETED_KEY_SUPPORTED = False JSON_KEY_VALUE_PAIR_SEP = "," SUPPORTS_TO_NUMBER = False PARSE_JSON_NAME: t.Optional[str] = None PAD_FILL_PATTERN_IS_REQUIRED = True WRAP_DERIVED_VALUES = False VARCHAR_REQUIRES_SIZE = True SUPPORTS_MEDIAN = False UPDATE_STATEMENT_SUPPORTS_FROM = False TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.ArrayAgg: rename_func("GROUP_CONCAT"), exp.BitwiseAndAgg: rename_func("BIT_AND"), exp.BitwiseOrAgg: rename_func("BIT_OR"), exp.BitwiseXorAgg: rename_func("BIT_XOR"), exp.BitwiseCount: rename_func("BIT_COUNT"), exp.Chr: lambda self, e: self.chr_sql(e, "CHAR"), exp.CurrentDate: no_paren_current_date_sql, exp.CurrentVersion: rename_func("VERSION"), exp.DateDiff: _remove_ts_or_ds_to_date( lambda self, e: self.func("DATEDIFF", e.this, e.expression), ("this", "expression") ), exp.DateAdd: _remove_ts_or_ds_to_date(date_add_sql("ADD")), exp.DateStrToDate: datestrtodate_sql, exp.DateSub: _remove_ts_or_ds_to_date(date_add_sql("SUB")), exp.DateTrunc: _date_trunc_sql, exp.Day: _remove_ts_or_ds_to_date(), exp.DayOfMonth: _remove_ts_or_ds_to_date(rename_func("DAYOFMONTH")), exp.DayOfWeek: _remove_ts_or_ds_to_date(rename_func("DAYOFWEEK")), exp.DayOfYear: _remove_ts_or_ds_to_date(rename_func("DAYOFYEAR")), exp.GroupConcat: lambda self, e: ( f"""GROUP_CONCAT({self.sql(e, "this")} SEPARATOR {self.sql(e, "separator") or "','"})""" ), exp.ILike: no_ilike_sql, exp.JSONExtractScalar: arrow_json_extract_sql, exp.Length: length_or_char_length_sql, exp.LogicalOr: rename_func("MAX"), exp.LogicalAnd: rename_func("MIN"), exp.Max: max_or_greatest, exp.Min: min_or_least, exp.Month: _remove_ts_or_ds_to_date(), exp.NullSafeEQ: lambda self, e: self.binary(e, "<=>"), exp.NullSafeNEQ: lambda self, e: f"NOT {self.binary(e, '<=>')}", exp.NumberToStr: rename_func("FORMAT"), exp.Pivot: no_pivot_sql, exp.Select: transforms.preprocess( [ transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins, transforms.eliminate_qualify, transforms.eliminate_full_outer_join, transforms.unnest_generate_date_array_using_recursive_cte, ] ), exp.StrPosition: lambda self, e: strposition_sql( self, e, func_name="LOCATE", supports_position=True ), exp.StrToDate: _str_to_date_sql, exp.StrToTime: _str_to_date_sql, exp.Stuff: rename_func("INSERT"), exp.SessionUser: lambda *_: "SESSION_USER()", exp.TableSample: no_tablesample_sql, exp.TimeFromParts: rename_func("MAKETIME"), exp.TimestampAdd: date_add_interval_sql("DATE", "ADD"), exp.TimestampDiff: lambda self, e: self.func( "TIMESTAMPDIFF", unit_to_var(e), e.expression, e.this ), exp.TimestampSub: date_add_interval_sql("DATE", "SUB"), exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimeStrToTime: lambda self, e: timestrtotime_sql( self, e, include_precision=not e.args.get("zone"), ), exp.TimeToStr: _remove_ts_or_ds_to_date( lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)) ), exp.Trim: trim_sql, exp.Trunc: rename_func("TRUNCATE"), exp.TryCast: no_trycast_sql, exp.TsOrDsAdd: date_add_sql("ADD"), exp.TsOrDsDiff: lambda self, e: self.func("DATEDIFF", e.this, e.expression), exp.TsOrDsToDate: _ts_or_ds_to_date_sql, exp.Unicode: lambda self, e: f"ORD(CONVERT({self.sql(e.this)} USING utf32))", exp.UnixToTime: _unix_to_time_sql, exp.Week: _remove_ts_or_ds_to_date(), exp.WeekOfYear: _remove_ts_or_ds_to_date(rename_func("WEEKOFYEAR")), exp.Year: _remove_ts_or_ds_to_date(), exp.UtcTimestamp: rename_func("UTC_TIMESTAMP"), exp.UtcTime: rename_func("UTC_TIME"), } UNSIGNED_TYPE_MAPPING = { exp.DType.UBIGINT: "BIGINT", exp.DType.UINT: "INT", exp.DType.UMEDIUMINT: "MEDIUMINT", exp.DType.USMALLINT: "SMALLINT", exp.DType.UTINYINT: "TINYINT", exp.DType.UDECIMAL: "DECIMAL", exp.DType.UDOUBLE: "DOUBLE", } TIMESTAMP_TYPE_MAPPING = { exp.DType.DATETIME2: "DATETIME", exp.DType.SMALLDATETIME: "DATETIME", exp.DType.TIMESTAMP: "DATETIME", exp.DType.TIMESTAMPNTZ: "DATETIME", exp.DType.TIMESTAMPTZ: "TIMESTAMP", exp.DType.TIMESTAMPLTZ: "TIMESTAMP", } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, **UNSIGNED_TYPE_MAPPING, **TIMESTAMP_TYPE_MAPPING, } TYPE_MAPPING.pop(exp.DType.MEDIUMTEXT) TYPE_MAPPING.pop(exp.DType.LONGTEXT) TYPE_MAPPING.pop(exp.DType.TINYTEXT) TYPE_MAPPING.pop(exp.DType.BLOB) TYPE_MAPPING.pop(exp.DType.MEDIUMBLOB) TYPE_MAPPING.pop(exp.DType.LONGBLOB) TYPE_MAPPING.pop(exp.DType.TINYBLOB) PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, exp.PartitionedByProperty: exp.Properties.Location.UNSUPPORTED, exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA, exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA, } LIMIT_FETCH = "LIMIT" LIMIT_ONLY_LITERALS = True CHAR_CAST_MAPPING = dict.fromkeys( ( exp.DType.LONGTEXT, exp.DType.LONGBLOB, exp.DType.MEDIUMBLOB, exp.DType.MEDIUMTEXT, exp.DType.TEXT, exp.DType.TINYBLOB, exp.DType.TINYTEXT, exp.DType.VARCHAR, ), "CHAR", ) SIGNED_CAST_MAPPING = dict.fromkeys( ( exp.DType.BIGINT, exp.DType.BOOLEAN, exp.DType.INT, exp.DType.SMALLINT, exp.DType.TINYINT, exp.DType.MEDIUMINT, ), "SIGNED", ) # MySQL doesn't support many datatypes in cast. # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast CAST_MAPPING = { **CHAR_CAST_MAPPING, **SIGNED_CAST_MAPPING, exp.DType.UBIGINT: "UNSIGNED", } TIMESTAMP_FUNC_TYPES = { exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMPLTZ, } # https://dev.mysql.com/doc/refman/8.0/en/keywords.html RESERVED_KEYWORDS = { "accessible", "add", "all", "alter", "analyze", "and", "as", "asc", "asensitive", "before", "between", "bigint", "binary", "blob", "both", "by", "call", "cascade", "case", "change", "char", "character", "check", "collate", "column", "condition", "constraint", "continue", "convert", "create", "cross", "cube", "cume_dist", "current_date", "current_time", "current_timestamp", "current_user", "cursor", "database", "databases", "day_hour", "day_microsecond", "day_minute", "day_second", "dec", "decimal", "declare", "default", "delayed", "delete", "dense_rank", "desc", "describe", "deterministic", "distinct", "distinctrow", "div", "double", "drop", "dual", "each", "else", "elseif", "empty", "enclosed", "escaped", "except", "exists", "exit", "explain", "false", "fetch", "first_value", "float", "float4", "float8", "for", "force", "foreign", "from", "fulltext", "function", "generated", "get", "grant", "group", "grouping", "groups", "having", "high_priority", "hour_microsecond", "hour_minute", "hour_second", "if", "ignore", "in", "index", "infile", "inner", "inout", "insensitive", "insert", "int", "int1", "int2", "int3", "int4", "int8", "integer", "intersect", "interval", "into", "io_after_gtids", "io_before_gtids", "is", "iterate", "join", "json_table", "key", "keys", "kill", "lag", "last_value", "lateral", "lead", "leading", "leave", "left", "like", "limit", "linear", "lines", "load", "localtime", "localtimestamp", "lock", "long", "longblob", "longtext", "loop", "low_priority", "master_bind", "master_ssl_verify_server_cert", "match", "maxvalue", "mediumblob", "mediumint", "mediumtext", "middleint", "minute_microsecond", "minute_second", "mod", "modifies", "natural", "not", "no_write_to_binlog", "nth_value", "ntile", "null", "numeric", "of", "on", "optimize", "optimizer_costs", "option", "optionally", "or", "order", "out", "outer", "outfile", "over", "partition", "percent_rank", "precision", "primary", "procedure", "purge", "range", "rank", "read", "reads", "read_write", "real", "recursive", "references", "regexp", "release", "rename", "repeat", "replace", "require", "resignal", "restrict", "return", "revoke", "right", "rlike", "row", "rows", "row_number", "schema", "schemas", "second_microsecond", "select", "sensitive", "separator", "set", "show", "signal", "smallint", "spatial", "specific", "sql", "sqlexception", "sqlstate", "sqlwarning", "sql_big_result", "sql_calc_found_rows", "sql_small_result", "ssl", "starting", "stored", "straight_join", "system", "table", "terminated", "then", "tinyblob", "tinyint", "tinytext", "to", "trailing", "trigger", "true", "undo", "union", "unique", "unlock", "unsigned", "update", "usage", "use", "using", "utc_date", "utc_time", "utc_timestamp", "values", "varbinary", "varchar", "varcharacter", "varying", "virtual", "when", "where", "while", "window", "with", "write", "xor", "year_month", "zerofill", } SQL_SECURITY_VIEW_LOCATION = exp.Properties.Location.POST_CREATE def locate_properties(self, properties: exp.Properties) -> t.DefaultDict: locations = super().locate_properties(properties) # MySQL puts SQL SECURITY before VIEW but after the schema for functions/procedures if isinstance(create := properties.parent, exp.Create) and create.kind == "VIEW": post_schema = locations[exp.Properties.Location.POST_SCHEMA] for i, p in enumerate(post_schema): if isinstance(p, exp.SqlSecurityProperty): post_schema.pop(i) locations[self.SQL_SECURITY_VIEW_LOCATION].append(p) break return locations def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: persisted = "STORED" if expression.args.get("persisted") else "VIRTUAL" return f"GENERATED ALWAYS AS ({self.sql(expression.this.unnest())}) {persisted}" def array_sql(self, expression: exp.Array) -> str: self.unsupported("Arrays are not supported by MySQL") return self.function_fallback_sql(expression) def arraycontainsall_sql(self, expression: exp.ArrayContainsAll) -> str: self.unsupported("Array operations are not supported by MySQL") return self.function_fallback_sql(expression) def dpipe_sql(self, expression: exp.DPipe) -> str: return self.func("CONCAT", *expression.flatten()) def extract_sql(self, expression: exp.Extract) -> str: unit = expression.name if unit and unit.lower() == "epoch": return self.func("UNIX_TIMESTAMP", expression.expression) return super().extract_sql(expression) def datatype_sql(self, expression: exp.DataType) -> str: if ( self.VARCHAR_REQUIRES_SIZE and expression.is_type(exp.DType.VARCHAR) and not expression.expressions ): # `VARCHAR` must always have a size - if it doesn't, we always generate `TEXT` return "TEXT" # https://dev.mysql.com/doc/refman/8.0/en/numeric-type-syntax.html result = super().datatype_sql(expression) if expression.this in self.UNSIGNED_TYPE_MAPPING: result = f"{result} UNSIGNED" return result def jsonarraycontains_sql(self, expression: exp.JSONArrayContains) -> str: return f"{self.sql(expression, 'this')} MEMBER OF({self.sql(expression, 'expression')})" def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: if expression.to.this in self.TIMESTAMP_FUNC_TYPES: return self.func("TIMESTAMP", expression.this) to = self.CAST_MAPPING.get(expression.to.this) if to: expression.to.set("this", to) return super().cast_sql(expression) def show_sql(self, expression: exp.Show) -> str: this = f" {expression.name}" full = " FULL" if expression.args.get("full") else "" global_ = " GLOBAL" if expression.args.get("global_") else "" target = self.sql(expression, "target") target = f" {target}" if target else "" if expression.name in ("COLUMNS", "INDEX"): target = f" FROM{target}" elif expression.name == "GRANTS": target = f" FOR{target}" elif expression.name in ("LINKS", "PARTITIONS"): target = f" ON{target}" if target else "" elif expression.name == "PROJECTIONS": target = f" ON TABLE{target}" if target else "" db = self._prefixed_sql("FROM", expression, "db") like = self._prefixed_sql("LIKE", expression, "like") where = self.sql(expression, "where") types = self.expressions(expression, key="types") types = f" {types}" if types else types query = self._prefixed_sql("FOR QUERY", expression, "query") if expression.name == "PROFILE": offset = self._prefixed_sql("OFFSET", expression, "offset") limit = self._prefixed_sql("LIMIT", expression, "limit") else: offset = "" limit = self._oldstyle_limit_sql(expression) log = self._prefixed_sql("IN", expression, "log") position = self._prefixed_sql("FROM", expression, "position") channel = self._prefixed_sql("FOR CHANNEL", expression, "channel") if expression.name == "ENGINE": mutex_or_status = " MUTEX" if expression.args.get("mutex") else " STATUS" else: mutex_or_status = "" for_table = self._prefixed_sql("FOR TABLE", expression, "for_table") for_group = self._prefixed_sql("FOR GROUP", expression, "for_group") for_user = self._prefixed_sql("FOR USER", expression, "for_user") for_role = self._prefixed_sql("FOR ROLE", expression, "for_role") into_outfile = self._prefixed_sql("INTO OUTFILE", expression, "into_outfile") json = " JSON" if expression.args.get("json") else "" return f"SHOW{full}{global_}{this}{json}{target}{for_table}{types}{db}{query}{log}{position}{channel}{mutex_or_status}{like}{where}{offset}{limit}{for_group}{for_user}{for_role}{into_outfile}" def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str: """To avoid TO keyword in ALTER ... RENAME statements. It's moved from Doris, because it's the same for all MySQL, Doris, and StarRocks. """ return super().alterrename_sql(expression, include_to=False) def altercolumn_sql(self, expression: exp.AlterColumn) -> str: dtype = self.sql(expression, "dtype") if not dtype: return super().altercolumn_sql(expression) this = self.sql(expression, "this") return f"MODIFY COLUMN {this} {dtype}" def _prefixed_sql(self, prefix: str, expression: exp.Expr, arg: str) -> str: sql = self.sql(expression, arg) return f" {prefix} {sql}" if sql else "" def _oldstyle_limit_sql(self, expression: exp.Show) -> str: limit = self.sql(expression, "limit") offset = self.sql(expression, "offset") if limit: limit_offset = f"{offset}, {limit}" if offset else limit return f" LIMIT {limit_offset}" return "" def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: unit = expression.args.get("unit") # Pick an old-enough date to avoid negative timestamp diffs start_ts = "'0000-01-01 00:00:00'" # Source: https://stackoverflow.com/a/32955740 timestamp_diff = build_date_delta(exp.TimestampDiff)([unit, start_ts, expression.this]) interval = exp.Interval(this=timestamp_diff, unit=unit) dateadd = build_date_delta_with_interval(exp.DateAdd)([start_ts, interval]) return self.sql(dateadd) def converttimezone_sql(self, expression: exp.ConvertTimezone) -> str: from_tz = expression.args.get("source_tz") to_tz = expression.args.get("target_tz") dt = expression.args.get("timestamp") return self.func("CONVERT_TZ", dt, from_tz, to_tz) def attimezone_sql(self, expression: exp.AtTimeZone) -> str: self.unsupported("AT TIME ZONE is not supported by MySQL") return self.sql(expression.this) def isascii_sql(self, expression: exp.IsAscii) -> str: return f"REGEXP_LIKE({self.sql(expression.this)}, '^[[:ascii:]]*$')" def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: # https://dev.mysql.com/doc/refman/8.4/en/window-function-descriptions.html self.unsupported("MySQL does not support IGNORE NULLS.") return self.sql(expression.this) @unsupported_args("this") def currentschema_sql(self, expression: exp.CurrentSchema) -> str: return self.func("SCHEMA") def partition_sql(self, expression: exp.Partition) -> str: parent = expression.parent if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)): return self.expressions(expression, flat=True) return super().partition_sql(expression) def _partition_by_sql( self, expression: exp.PartitionByRangeProperty | exp.PartitionByListProperty, kind: str ) -> str: partitions = self.expressions(expression, key="partition_expressions", flat=True) create = self.expressions(expression, key="create_expressions", flat=True) return f"PARTITION BY {kind} ({partitions}) ({create})" def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str: return self._partition_by_sql(expression, "RANGE") def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str: return self._partition_by_sql(expression, "LIST") def partitionlist_sql(self, expression: exp.PartitionList) -> str: name = self.sql(expression, "this") values = self.expressions(expression, flat=True) return f"PARTITION {name} VALUES IN ({values})" def partitionrange_sql(self, expression: exp.PartitionRange) -> str: name = self.sql(expression, "this") values = self.expressions(expression, flat=True) return f"PARTITION {name} VALUES LESS THAN ({values})" ================================================ FILE: sqlglot/dialects/oracle.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, no_ilike_sql, rename_func, strposition_sql, to_number_with_nls_param, trim_sql, ) from sqlglot.parsers.oracle import OracleParser from sqlglot.tokens import TokenType def _trim_sql(self: Oracle.Generator, expression: exp.Trim) -> str: position = expression.args.get("position") if position and position.upper() in ("LEADING", "TRAILING"): return self.trim_sql(expression) return trim_sql(self, expression) class Oracle(Dialect): ALIAS_POST_TABLESAMPLE = True LOCKING_READS_SUPPORTED = True TABLESAMPLE_SIZE_IS_PERCENT = True NULL_ORDERING = "nulls_are_large" ON_CONDITION_EMPTY_BEFORE_ERROR = False ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False DISABLES_ALIAS_REF_EXPANSION = True # See section 8: https://docs.oracle.com/cd/A97630_01/server.920/a96540/sql_elements9a.htm NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE # https://docs.oracle.com/database/121/SQLRF/sql_elements004.htm#SQLRF00212 # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes TIME_MAPPING = { "D": "%u", # Day of week (1-7) "DAY": "%A", # name of day "DD": "%d", # day of month (1-31) "DDD": "%j", # day of year (1-366) "DY": "%a", # abbreviated name of day "HH": "%I", # Hour of day (1-12) "HH12": "%I", # alias for HH "HH24": "%H", # Hour of day (0-23) "IW": "%V", # Calendar week of year (1-52 or 1-53), as defined by the ISO 8601 standard "MI": "%M", # Minute (0-59) "MM": "%m", # Month (01-12; January = 01) "MON": "%b", # Abbreviated name of month "MONTH": "%B", # Name of month "SS": "%S", # Second (0-59) "WW": "%W", # Week of year (1-53) "YY": "%y", # 15 "YYYY": "%Y", # 2015 "FF6": "%f", # only 6 digits are supported in python formats } PSEUDOCOLUMNS = {"ROWNUM", "ROWID", "OBJECT_ID", "OBJECT_VALUE", "LEVEL"} def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: # Disable quoting for pseudocolumns as it may break queries e.g # `WHERE "ROWNUM" = ...` does not work but `WHERE ROWNUM = ...` does return ( identifier.quoted or not isinstance(identifier.parent, exp.Pseudocolumn) ) and super().can_quote(identifier, identify=identify) class Tokenizer(tokens.Tokenizer): VAR_SINGLE_TOKENS = {"@", "$", "#"} UNICODE_STRINGS = [ (prefix + q, q) for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) for prefix in ("U", "u") ] NESTED_COMMENTS = False KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "(+)": TokenType.JOIN_MARKER, "BINARY_DOUBLE": TokenType.DOUBLE, "BINARY_FLOAT": TokenType.FLOAT, "BULK COLLECT INTO": TokenType.BULK_COLLECT_INTO, "COLUMNS": TokenType.COLUMN, "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, "MINUS": TokenType.EXCEPT, "NVARCHAR2": TokenType.NVARCHAR, "ORDER SIBLINGS BY": TokenType.ORDER_SIBLINGS_BY, "SAMPLE": TokenType.TABLE_SAMPLE, "START": TokenType.BEGIN, "TOP": TokenType.TOP, "VARCHAR2": TokenType.VARCHAR, "SYSTIMESTAMP": TokenType.SYSTIMESTAMP, } Parser = OracleParser class Generator(generator.Generator): LOCKING_READS_SUPPORTED = True JOIN_HINTS = False TABLE_HINTS = False DATA_TYPE_SPECIFIERS_ALLOWED = True ALTER_TABLE_INCLUDE_COLUMN_KEYWORD = False LIMIT_FETCH = "FETCH" TABLESAMPLE_KEYWORDS = "SAMPLE" LAST_DAY_SUPPORTS_DATE_PART = False SUPPORTS_SELECT_INTO = True TZ_TO_WITH_TIME_ZONE = True SUPPORTS_WINDOW_EXCLUDE = True QUERY_HINT_SEP = " " SUPPORTS_DECODE_CASE = True TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.TINYINT: "SMALLINT", exp.DType.SMALLINT: "SMALLINT", exp.DType.INT: "INT", exp.DType.BIGINT: "INT", exp.DType.DECIMAL: "NUMBER", exp.DType.DOUBLE: "DOUBLE PRECISION", exp.DType.VARCHAR: "VARCHAR2", exp.DType.NVARCHAR: "NVARCHAR2", exp.DType.NCHAR: "NCHAR", exp.DType.TEXT: "CLOB", exp.DType.TIMETZ: "TIME", exp.DType.TIMESTAMPNTZ: "TIMESTAMP", exp.DType.TIMESTAMPTZ: "TIMESTAMP", exp.DType.BINARY: "BLOB", exp.DType.VARBINARY: "BLOB", exp.DType.ROWVERSION: "BLOB", } TYPE_MAPPING.pop(exp.DType.BLOB) TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.DateStrToDate: lambda self, e: self.func( "TO_DATE", e.this, exp.Literal.string("YYYY-MM-DD") ), exp.DateTrunc: lambda self, e: self.func("TRUNC", e.this, e.unit), exp.EuclideanDistance: rename_func("L2_DISTANCE"), exp.ILike: no_ilike_sql, exp.LogicalOr: rename_func("MAX"), exp.LogicalAnd: rename_func("MIN"), exp.Mod: rename_func("MOD"), exp.Rand: rename_func("DBMS_RANDOM.VALUE"), exp.Select: transforms.preprocess( [ transforms.eliminate_distinct_on, transforms.eliminate_qualify, ] ), exp.StrPosition: lambda self, e: strposition_sql( self, e, func_name="INSTR", supports_position=True, supports_occurrence=True ), exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), exp.Subquery: lambda self, e: self.subquery_sql(e, sep=" "), exp.Substring: rename_func("SUBSTR"), exp.Table: lambda self, e: self.table_sql(e, sep=" "), exp.TableSample: lambda self, e: self.tablesample_sql(e), exp.TemporaryProperty: lambda _, e: f"{e.name or 'GLOBAL'} TEMPORARY", exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), exp.ToChar: lambda self, e: self.function_fallback_sql(e), exp.ToNumber: to_number_with_nls_param, exp.Trim: _trim_sql, exp.Unicode: lambda self, e: f"ASCII(UNISTR({self.sql(e.this)}))", exp.UnixToTime: lambda self, e: ( f"TO_DATE('1970-01-01', 'YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)" ), exp.UtcTimestamp: rename_func("UTC_TIMESTAMP"), exp.UtcTime: rename_func("UTC_TIME"), exp.Systimestamp: lambda self, e: "SYSTIMESTAMP", } PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } def currenttimestamp_sql(self, expression: exp.CurrentTimestamp) -> str: if expression.args.get("sysdate"): return "SYSDATE" this = expression.this return self.func("CURRENT_TIMESTAMP", this) if this else "CURRENT_TIMESTAMP" def offset_sql(self, expression: exp.Offset) -> str: return f"{super().offset_sql(expression)} ROWS" def add_column_sql(self, expression: exp.Expr) -> str: return f"ADD {self.sql(expression)}" def queryoption_sql(self, expression: exp.QueryOption) -> str: option = self.sql(expression, "this") value = self.sql(expression, "expression") value = f" CONSTRAINT {value}" if value else "" return f"{option}{value}" def coalesce_sql(self, expression: exp.Coalesce) -> str: func_name = "NVL" if expression.args.get("is_nvl") else "COALESCE" return rename_func(func_name)(self, expression) def into_sql(self, expression: exp.Into) -> str: into = "INTO" if not expression.args.get("bulk_collect") else "BULK COLLECT INTO" if expression.this: return f"{self.seg(into)} {self.sql(expression, 'this')}" return f"{self.seg(into)} {self.expressions(expression)}" def hint_sql(self, expression: exp.Hint) -> str: expressions = [] for expression in expression.expressions: if isinstance(expression, exp.Anonymous): formatted_args = self.format_args(*expression.expressions, sep=" ") expressions.append(f"{self.sql(expression, 'this')}({formatted_args})") else: expressions.append(self.sql(expression)) return f" /*+ {self.expressions(sqls=expressions, sep=self.QUERY_HINT_SEP).strip()} */" def isascii_sql(self, expression: exp.IsAscii) -> str: return f"NVL(REGEXP_LIKE({self.sql(expression.this)}, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)" def interval_sql(self, expression: exp.Interval) -> str: return f"{'INTERVAL ' if isinstance(expression.this, exp.Literal) else ''}{self.sql(expression, 'this')} {self.sql(expression, 'unit')}" def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: param_constraint = expression.find(exp.InOutColumnConstraint) if param_constraint: sep = f" {self.sql(param_constraint)} " param_constraint.pop() return super().columndef_sql(expression, sep) ================================================ FILE: sqlglot/dialects/postgres.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import ( DATE_ADD_OR_SUB, Dialect, JSON_EXTRACT_TYPE, any_value_to_max_sql, array_append_sql, array_concat_sql, bool_xor_sql, datestrtodate_sql, filter_array_using_unnest, generate_series_sql, getbit_sql, inline_array_sql, json_extract_segments, json_path_key_only_name, max_or_greatest, merge_without_target_sql, min_or_least, no_last_day_sql, no_map_from_entries_sql, no_paren_current_date_sql, no_pivot_sql, no_trycast_sql, rename_func, sha256_sql, struct_extract_sql, timestamptrunc_sql, timestrtotime_sql, trim_sql, ts_or_ds_add_cast, strposition_sql, count_if_to_sum, groupconcat_sql, regexp_replace_global_modifier, sha2_digest_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import seq_get from sqlglot.parsers.postgres import PostgresParser from sqlglot.tokens import TokenType DATE_DIFF_FACTOR = { "MICROSECOND": " * 1000000", "MILLISECOND": " * 1000", "SECOND": "", "MINUTE": " / 60", "HOUR": " / 3600", "DAY": " / 86400", } def _date_add_sql(kind: str) -> t.Callable[[Postgres.Generator, DATE_ADD_OR_SUB], str]: def func(self: Postgres.Generator, expression: DATE_ADD_OR_SUB) -> str: if isinstance(expression, exp.TsOrDsAdd): expression = ts_or_ds_add_cast(expression) this = self.sql(expression, "this") unit = expression.args.get("unit") e = self._simplify_unless_literal(expression.expression) if isinstance(e, exp.Literal): e.set("is_string", True) elif e.is_number: e = exp.Literal.string(e.to_py()) else: self.unsupported("Cannot add non literal") return f"{this} {kind} {self.sql(exp.Interval(this=e, unit=unit))}" return func def _date_diff_sql(self: Postgres.Generator, expression: exp.DateDiff) -> str: unit = expression.text("unit").upper() factor = DATE_DIFF_FACTOR.get(unit) end = f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)" start = f"CAST({self.sql(expression, 'expression')} AS TIMESTAMP)" if factor is not None: return f"CAST(EXTRACT(epoch FROM {end} - {start}){factor} AS BIGINT)" age = f"AGE({end}, {start})" if unit == "WEEK": unit = f"EXTRACT(days FROM ({end} - {start})) / 7" elif unit == "MONTH": unit = f"EXTRACT(year FROM {age}) * 12 + EXTRACT(month FROM {age})" elif unit == "QUARTER": unit = f"EXTRACT(year FROM {age}) * 4 + EXTRACT(month FROM {age}) / 3" elif unit == "YEAR": unit = f"EXTRACT(year FROM {age})" else: unit = age return f"CAST({unit} AS BIGINT)" def _substring_sql(self: Postgres.Generator, expression: exp.Substring) -> str: this = self.sql(expression, "this") start = self.sql(expression, "start") length = self.sql(expression, "length") from_part = f" FROM {start}" if start else "" for_part = f" FOR {length}" if length else "" return f"SUBSTRING({this}{from_part}{for_part})" def _auto_increment_to_serial(expression: exp.Expr) -> exp.Expr: auto = expression.find(exp.AutoIncrementColumnConstraint) if auto: expression.args["constraints"].remove(auto.parent) kind = expression.args["kind"] if kind.this == exp.DType.INT: kind.replace(exp.DataType(this=exp.DType.SERIAL)) elif kind.this == exp.DType.SMALLINT: kind.replace(exp.DataType(this=exp.DType.SMALLSERIAL)) elif kind.this == exp.DType.BIGINT: kind.replace(exp.DataType(this=exp.DType.BIGSERIAL)) return expression def _serial_to_generated(expression: exp.Expr) -> exp.Expr: if not isinstance(expression, exp.ColumnDef): return expression kind = expression.kind if not kind: return expression if kind.this == exp.DType.SERIAL: data_type = exp.DataType(this=exp.DType.INT) elif kind.this == exp.DType.SMALLSERIAL: data_type = exp.DataType(this=exp.DType.SMALLINT) elif kind.this == exp.DType.BIGSERIAL: data_type = exp.DataType(this=exp.DType.BIGINT) else: data_type = None if data_type: expression.args["kind"].replace(data_type) constraints = expression.args["constraints"] generated = exp.ColumnConstraint(kind=exp.GeneratedAsIdentityColumnConstraint(this=False)) notnull = exp.ColumnConstraint(kind=exp.NotNullColumnConstraint()) if notnull not in constraints: constraints.insert(0, notnull) if generated not in constraints: constraints.insert(0, generated) return expression def _json_extract_sql( name: str, op: str ) -> t.Callable[[Postgres.Generator, JSON_EXTRACT_TYPE], str]: def _generate(self: Postgres.Generator, expression: JSON_EXTRACT_TYPE) -> str: if expression.args.get("only_json_types"): return json_extract_segments(name, quoted_index=False, op=op)(self, expression) return json_extract_segments(name)(self, expression) return _generate def _unix_to_time_sql(self: Postgres.Generator, expression: exp.UnixToTime) -> str: scale = expression.args.get("scale") timestamp = expression.this if scale in (None, exp.UnixToTime.SECONDS): return self.func("TO_TIMESTAMP", timestamp, self.format_time(expression)) return self.func( "TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), self.format_time(expression), ) def _levenshtein_sql(self: Postgres.Generator, expression: exp.Levenshtein) -> str: name = "LEVENSHTEIN_LESS_EQUAL" if expression.args.get("max_dist") else "LEVENSHTEIN" return rename_func(name)(self, expression) def _versioned_anyvalue_sql(self: Postgres.Generator, expression: exp.AnyValue) -> str: # https://www.postgresql.org/docs/16/functions-aggregate.html # https://www.postgresql.org/about/featurematrix/ if self.dialect.version < (16,): return any_value_to_max_sql(self, expression) return rename_func("ANY_VALUE")(self, expression) def _round_sql(self: Postgres.Generator, expression: exp.Round) -> str: this = self.sql(expression, "this") decimals = self.sql(expression, "decimals") if not decimals: return self.func("ROUND", this) if not expression.type: from sqlglot.optimizer.annotate_types import annotate_types expression = annotate_types(expression, dialect=self.dialect) # ROUND(double precision, integer) is not permitted in Postgres # so it's necessary to cast to decimal before rounding. if expression.this.is_type(exp.DType.DOUBLE): decimal_type = exp.DataType.build(exp.DType.DECIMAL, expressions=expression.expressions) this = self.sql(exp.Cast(this=this, to=decimal_type)) return self.func("ROUND", this, decimals) class Postgres(Dialect): INDEX_OFFSET = 1 TYPED_DIVISION = True CONCAT_COALESCE = True NULL_ORDERING = "nulls_are_large" TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" TABLESAMPLE_SIZE_IS_PERCENT = True TABLES_REFERENCEABLE_AS_COLUMNS = True DEFAULT_FUNCTIONS_COLUMN_NAMES = { exp.ExplodingGenerateSeries: "generate_series", } TIME_MAPPING = { "d": "%u", # 1-based day of week "D": "%u", # 1-based day of week "dd": "%d", # day of month "DD": "%d", # day of month "ddd": "%j", # zero padded day of year "DDD": "%j", # zero padded day of year "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres "FMDDD": "%-j", # day of year "FMHH12": "%-I", # 9 "FMHH24": "%-H", # 9 "FMMI": "%-M", # Minute "FMMM": "%-m", # 1 "FMSS": "%-S", # Second "HH12": "%I", # 09 "HH24": "%H", # 09 "mi": "%M", # zero padded minute "MI": "%M", # zero padded minute "mm": "%m", # 01 "MM": "%m", # 01 "OF": "%z", # utc offset "ss": "%S", # zero padded second "SS": "%S", # zero padded second "TMDay": "%A", # TM is locale dependent "TMDy": "%a", "TMMon": "%b", # Sep "TMMonth": "%B", # September "TZ": "%Z", # uppercase timezone name "US": "%f", # zero padded microsecond "ww": "%U", # 1-based week of year "WW": "%U", # 1-based week of year "yy": "%y", # 15 "YY": "%y", # 15 "yyyy": "%Y", # 2015 "YYYY": "%Y", # 2015 } class Tokenizer(tokens.Tokenizer): BIT_STRINGS = [("b'", "'"), ("B'", "'")] HEX_STRINGS = [("x'", "'"), ("X'", "'")] BYTE_STRINGS = [("e'", "'"), ("E'", "'")] BYTE_STRING_ESCAPES = ["'", "\\"] HEREDOC_STRINGS = ["$"] HEREDOC_TAG_IS_IDENTIFIER = True HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "~": TokenType.RLIKE, "@@": TokenType.DAT, "@>": TokenType.AT_GT, "<@": TokenType.LT_AT, "?&": TokenType.QMARK_AMP, "?|": TokenType.QMARK_PIPE, "#-": TokenType.HASH_DASH, "|/": TokenType.PIPE_SLASH, "||/": TokenType.DPIPE_SLASH, "BEGIN": TokenType.BEGIN, "BIGSERIAL": TokenType.BIGSERIAL, "CSTRING": TokenType.PSEUDO_TYPE, "DECLARE": TokenType.COMMAND, "DO": TokenType.COMMAND, "EXEC": TokenType.COMMAND, "HSTORE": TokenType.HSTORE, "INT8": TokenType.BIGINT, "MONEY": TokenType.MONEY, "NAME": TokenType.NAME, "OID": TokenType.OBJECT_IDENTIFIER, "ONLY": TokenType.ONLY, "POINT": TokenType.POINT, "REFRESH": TokenType.COMMAND, "REINDEX": TokenType.COMMAND, "RESET": TokenType.COMMAND, "SERIAL": TokenType.SERIAL, "SMALLSERIAL": TokenType.SMALLSERIAL, "TEMP": TokenType.TEMPORARY, "REGCLASS": TokenType.OBJECT_IDENTIFIER, "REGCOLLATION": TokenType.OBJECT_IDENTIFIER, "REGCONFIG": TokenType.OBJECT_IDENTIFIER, "REGDICTIONARY": TokenType.OBJECT_IDENTIFIER, "REGNAMESPACE": TokenType.OBJECT_IDENTIFIER, "REGOPER": TokenType.OBJECT_IDENTIFIER, "REGOPERATOR": TokenType.OBJECT_IDENTIFIER, "REGPROC": TokenType.OBJECT_IDENTIFIER, "REGPROCEDURE": TokenType.OBJECT_IDENTIFIER, "REGROLE": TokenType.OBJECT_IDENTIFIER, "REGTYPE": TokenType.OBJECT_IDENTIFIER, "FLOAT": TokenType.DOUBLE, "XML": TokenType.XML, "VARIADIC": TokenType.VARIADIC, "INOUT": TokenType.INOUT, } KEYWORDS.pop("/*+") KEYWORDS.pop("DIV") SINGLE_TOKENS = { **tokens.Tokenizer.SINGLE_TOKENS, "$": TokenType.HEREDOC_STRING, } VAR_SINGLE_TOKENS = {"$"} Parser = PostgresParser class Generator(generator.Generator): SINGLE_STRING_INTERVAL = True RENAME_TABLE_WITH_DB = False LOCKING_READS_SUPPORTED = True JOIN_HINTS = False TABLE_HINTS = False QUERY_HINTS = False NVL2_SUPPORTED = False PARAMETER_TOKEN = "$" NAMED_PLACEHOLDER_TOKEN = "%" TABLESAMPLE_SIZE_IS_ROWS = False TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" SUPPORTS_SELECT_INTO = True JSON_TYPE_REQUIRED_FOR_EXTRACTION = True SUPPORTS_UNLOGGED_TABLES = True LIKE_PROPERTY_INSIDE_SCHEMA = True MULTI_ARG_DISTINCT = False CAN_IMPLEMENT_ARRAY_ANY = True SUPPORTS_WINDOW_EXCLUDE = True COPY_HAS_INTO_KEYWORD = False ARRAY_CONCAT_IS_VAR_LEN = False SUPPORTS_MEDIAN = False ARRAY_SIZE_DIM_REQUIRED = True SUPPORTS_BETWEEN_FLAGS = True INOUT_SEPARATOR = "" # PostgreSQL uses "INOUT" (no space) SUPPORTED_JSON_PATH_PARTS = { exp.JSONPathKey, exp.JSONPathRoot, exp.JSONPathSubscript, } def lateral_sql(self, expression: exp.Lateral) -> str: sql = super().lateral_sql(expression) if expression.args.get("cross_apply") is not None: sql = f"{sql} ON TRUE" return sql TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.TINYINT: "SMALLINT", exp.DType.FLOAT: "REAL", exp.DType.DOUBLE: "DOUBLE PRECISION", exp.DType.BINARY: "BYTEA", exp.DType.VARBINARY: "BYTEA", exp.DType.ROWVERSION: "BYTEA", exp.DType.DATETIME: "TIMESTAMP", exp.DType.TIMESTAMPNTZ: "TIMESTAMP", exp.DType.BLOB: "BYTEA", } TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.AnyValue: _versioned_anyvalue_sql, exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), exp.ArrayFilter: filter_array_using_unnest, exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND", swap_params=True), exp.BitwiseAndAgg: rename_func("BIT_AND"), exp.BitwiseOrAgg: rename_func("BIT_OR"), exp.BitwiseXor: lambda self, e: self.binary(e, "#"), exp.BitwiseXorAgg: rename_func("BIT_XOR"), exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]), exp.CurrentDate: no_paren_current_date_sql, exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.CurrentUser: lambda *_: "CURRENT_USER", exp.CurrentVersion: rename_func("VERSION"), exp.DateAdd: _date_add_sql("+"), exp.DateDiff: _date_diff_sql, exp.DateStrToDate: datestrtodate_sql, exp.DateSub: _date_add_sql("-"), exp.Explode: rename_func("UNNEST"), exp.ExplodingGenerateSeries: rename_func("GENERATE_SERIES"), exp.GenerateSeries: generate_series_sql("GENERATE_SERIES"), exp.Getbit: getbit_sql, exp.GroupConcat: lambda self, e: groupconcat_sql( self, e, func_name="STRING_AGG", within_group=False ), exp.IntDiv: rename_func("DIV"), exp.JSONArrayAgg: lambda self, e: self.func( "JSON_AGG", self.sql(e, "this"), suffix=f"{self.sql(e, 'order')})", ), exp.JSONExtract: _json_extract_sql("JSON_EXTRACT_PATH", "->"), exp.JSONExtractScalar: _json_extract_sql("JSON_EXTRACT_PATH_TEXT", "->>"), exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), exp.JSONBContains: lambda self, e: self.binary(e, "?"), exp.ParseJSON: lambda self, e: self.sql(exp.cast(e.this, exp.DType.JSON)), exp.JSONPathKey: json_path_key_only_name, exp.JSONPathRoot: lambda *_: "", exp.JSONPathSubscript: lambda self, e: self.json_path_part(e.this), exp.LastDay: no_last_day_sql, exp.LogicalOr: rename_func("BOOL_OR"), exp.LogicalAnd: rename_func("BOOL_AND"), exp.Max: max_or_greatest, exp.MapFromEntries: no_map_from_entries_sql, exp.Min: min_or_least, exp.Merge: merge_without_target_sql, exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", exp.PercentileCont: transforms.preprocess( [transforms.add_within_group_for_percentiles] ), exp.PercentileDisc: transforms.preprocess( [transforms.add_within_group_for_percentiles] ), exp.Pivot: no_pivot_sql, exp.Rand: rename_func("RANDOM"), exp.RegexpLike: lambda self, e: self.binary(e, "~"), exp.RegexpILike: lambda self, e: self.binary(e, "~*"), exp.RegexpReplace: lambda self, e: self.func( "REGEXP_REPLACE", e.this, e.expression, e.args.get("replacement"), e.args.get("position"), e.args.get("occurrence"), regexp_replace_global_modifier(e), ), exp.Round: _round_sql, exp.Select: transforms.preprocess( [ transforms.eliminate_semi_and_anti_joins, transforms.eliminate_qualify, ] ), exp.SHA2: sha256_sql, exp.SHA2Digest: sha2_digest_sql, exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="POSITION"), exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), exp.StructExtract: struct_extract_sql, exp.Substring: _substring_sql, exp.TimeFromParts: rename_func("MAKE_TIME"), exp.TimestampFromParts: rename_func("MAKE_TIMESTAMP"), exp.TimestampTrunc: timestamptrunc_sql(zone=True), exp.TimeStrToTime: timestrtotime_sql, exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), exp.ToChar: lambda self, e: ( self.function_fallback_sql(e) if e.args.get("format") else self.tochar_sql(e) ), exp.Trim: trim_sql, exp.TryCast: no_trycast_sql, exp.TsOrDsAdd: _date_add_sql("+"), exp.TsOrDsDiff: _date_diff_sql, exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this), exp.Uuid: lambda *_: "GEN_RANDOM_UUID()", exp.TimeToUnix: lambda self, e: self.func( "DATE_PART", exp.Literal.string("epoch"), e.this ), exp.VariancePop: rename_func("VAR_POP"), exp.Variance: rename_func("VAR_SAMP"), exp.Xor: bool_xor_sql, exp.Unicode: rename_func("ASCII"), exp.UnixToTime: _unix_to_time_sql, exp.Levenshtein: _levenshtein_sql, exp.JSONObjectAgg: rename_func("JSON_OBJECT_AGG"), exp.JSONBObjectAgg: rename_func("JSONB_OBJECT_AGG"), exp.CountIf: count_if_to_sum, } TRANSFORMS.pop(exp.CommentColumnConstraint) PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } def schemacommentproperty_sql(self, expression: exp.SchemaCommentProperty) -> str: self.unsupported("Table comments are not supported in the CREATE statement") return "" def commentcolumnconstraint_sql(self, expression: exp.CommentColumnConstraint) -> str: self.unsupported("Column comments are not supported in the CREATE statement") return "" def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: # PostgreSQL places parameter modes BEFORE parameter name param_constraint = expression.find(exp.InOutColumnConstraint) if param_constraint: mode_sql = self.sql(param_constraint) param_constraint.pop() # Remove to prevent double-rendering base_sql = super().columndef_sql(expression, sep) return f"{mode_sql} {base_sql}" return super().columndef_sql(expression, sep) def unnest_sql(self, expression: exp.Unnest) -> str: if len(expression.expressions) == 1: arg = expression.expressions[0] if isinstance(arg, exp.GenerateDateArray): generate_series: exp.Expr = exp.GenerateSeries(**arg.args) if isinstance(expression.parent, (exp.From, exp.Join)): generate_series = ( exp.select("value::date") .from_(exp.Table(this=generate_series).as_("_t", table=["value"])) .subquery(expression.args.get("alias") or "_unnested_generate_series") ) return self.sql(generate_series) from sqlglot.optimizer.annotate_types import annotate_types this = annotate_types(arg, dialect=self.dialect) if this.is_type("array"): while isinstance(this, exp.Cast): this = this.this arg_as_json = self.sql(exp.cast(this, exp.DType.JSON)) alias = self.sql(expression, "alias") alias = f" AS {alias}" if alias else "" if expression.args.get("offset"): self.unsupported("Unsupported JSON_ARRAY_ELEMENTS with offset") return f"JSON_ARRAY_ELEMENTS({arg_as_json}){alias}" return super().unnest_sql(expression) def bracket_sql(self, expression: exp.Bracket) -> str: """Forms like ARRAY[1, 2, 3][3] aren't allowed; we need to wrap the ARRAY.""" if isinstance(expression.this, exp.Array): expression.set("this", exp.paren(expression.this, copy=False)) return super().bracket_sql(expression) def matchagainst_sql(self, expression: exp.MatchAgainst) -> str: this = self.sql(expression, "this") expressions = [f"{self.sql(e)} @@ {this}" for e in expression.expressions] sql = " OR ".join(expressions) return f"({sql})" if len(expressions) > 1 else sql def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) exprs = f"({exprs})" if exprs else "" access_method = self.sql(expression, "access_method") access_method = f"ACCESS METHOD {access_method}" if access_method else "" tablespace = self.sql(expression, "tablespace") tablespace = f"TABLESPACE {tablespace}" if tablespace else "" option = self.sql(expression, "option") return f"SET {exprs}{access_method}{tablespace}{option}" def datatype_sql(self, expression: exp.DataType) -> str: if expression.is_type(exp.DType.ARRAY): if expression.expressions: values = self.expressions(expression, key="values", flat=True) return f"{self.expressions(expression, flat=True)}[{values}]" return "ARRAY" if expression.is_type(exp.DType.DOUBLE, exp.DType.FLOAT) and expression.expressions: # Postgres doesn't support precision for REAL and DOUBLE PRECISION types return f"FLOAT({self.expressions(expression, flat=True)})" return super().datatype_sql(expression) def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: this = expression.this # Postgres casts DIV() to decimal for transpilation but when roundtripping it's superfluous if isinstance(this, exp.IntDiv) and expression.to == exp.DataType.build("decimal"): return self.sql(this) return super().cast_sql(expression, safe_prefix=safe_prefix) def array_sql(self, expression: exp.Array) -> str: exprs = expression.expressions func_name = self.normalize_func("ARRAY") if isinstance(seq_get(exprs, 0), exp.Select): return f"{func_name}({self.sql(exprs[0])})" return f"{func_name}{inline_array_sql(self, expression)}" def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: return f"GENERATED ALWAYS AS ({self.sql(expression, 'this')}) STORED" def isascii_sql(self, expression: exp.IsAscii) -> str: return f"({self.sql(expression.this)} ~ '^[[:ascii:]]*$')" def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: # https://www.postgresql.org/docs/current/functions-window.html self.unsupported("PostgreSQL does not support IGNORE NULLS.") return self.sql(expression.this) def respectnulls_sql(self, expression: exp.RespectNulls) -> str: # https://www.postgresql.org/docs/current/functions-window.html self.unsupported("PostgreSQL does not support RESPECT NULLS.") return self.sql(expression.this) @unsupported_args("this") def currentschema_sql(self, expression: exp.CurrentSchema) -> str: return "CURRENT_SCHEMA" def interval_sql(self, expression: exp.Interval) -> str: unit = expression.text("unit").lower() this = expression.this if unit.startswith("quarter") and isinstance(this, exp.Literal): this.replace(exp.Literal.string(int(this.to_py()) * 3)) expression.args["unit"].replace(exp.var("MONTH")) return super().interval_sql(expression) def placeholder_sql(self, expression: exp.Placeholder) -> str: if expression.args.get("jdbc"): return "?" this = f"({expression.name})" if expression.this else "" return f"{self.NAMED_PLACEHOLDER_TOKEN}{this}s" def arraycontains_sql(self, expression: exp.ArrayContains) -> str: # Convert DuckDB's LIST_CONTAINS(array, value) to PostgreSQL # DuckDB behavior: # - LIST_CONTAINS([1,2,3], 2) -> true # - LIST_CONTAINS([1,2,3], 4) -> false # - LIST_CONTAINS([1,2,NULL], 4) -> false (not NULL) # - LIST_CONTAINS([1,2,3], NULL) -> NULL # # PostgreSQL equivalent: CASE WHEN value IS NULL THEN NULL # ELSE COALESCE(value = ANY(array), FALSE) END value = expression.expression array = expression.this coalesce_expr = exp.Coalesce( this=value.eq(exp.Any(this=exp.paren(expression=array, copy=False))), expressions=[exp.false()], ) case_expr = ( exp.Case() .when(exp.Is(this=value, expression=exp.null()), exp.null(), copy=False) .else_(coalesce_expr, copy=False) ) return self.sql(case_expr) ================================================ FILE: sqlglot/dialects/presto.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, bool_xor_sql, bracket_to_element_at_sql, datestrtodate_sql, encode_decode_sql, if_sql, left_to_substring_sql, no_ilike_sql, no_pivot_sql, no_timestamp_sql, regexp_extract_sql, rename_func, right_to_substring_sql, sha256_sql, strposition_sql, struct_extract_sql, timestamptrunc_sql, timestrtotime_sql, ts_or_ds_add_cast, unit_to_str, sequence_sql, explode_to_unnest_sql, sha2_digest_sql, ) from sqlglot.dialects.hive import Hive from sqlglot.dialects.mysql import MySQL from sqlglot.optimizer.scope import find_all_in_scope from sqlglot.parsers.presto import PrestoParser from sqlglot.tokens import TokenType from sqlglot.transforms import unqualify_columns from sqlglot.generator import unsupported_args from sqlglot.typing.presto import EXPRESSION_METADATA DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: delimiters = expression.expression if delimiters and not ( delimiters.is_string and delimiters.this == self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS ): self.unsupported("INITCAP does not support custom delimiters") regex = r"(\w)(\w*)" return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: if expression.args.get("asc") == exp.false(): comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" else: comparator = None return self.func("ARRAY_SORT", expression.this, comparator) def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: if isinstance(expression.parent, exp.PartitionedByProperty): # Any columns in the ARRAY[] string literals should not be quoted expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) partition_exprs = [ self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") for c in expression.expressions ] return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) if expression.parent: for schema in expression.parent.find_all(exp.Schema): if schema is expression: continue column_defs = schema.find_all(exp.ColumnDef) if column_defs and isinstance(schema.parent, exp.Property): expression.expressions.extend(column_defs) return self.schema_sql(expression) def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: self.unsupported("Presto does not support exact quantiles") return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) def _str_to_time_sql( self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate ) -> str: return self.func("DATE_PARSE", expression.this, self.format_time(expression)) def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: time_format = self.format_time(expression) if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DType.DATE)) return self.sql(exp.cast(exp.cast(expression.this, exp.DType.TIMESTAMP), exp.DType.DATE)) def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: expression = ts_or_ds_add_cast(expression) unit = unit_to_str(expression) return self.func("DATE_ADD", unit, expression.expression, expression.this) def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: this = exp.cast(expression.this, exp.DType.TIMESTAMP) expr = exp.cast(expression.expression, exp.DType.TIMESTAMP) unit = unit_to_str(expression) return self.func("DATE_DIFF", unit, expr, this) def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: """ Trino doesn't support FIRST / LAST as functions, but they're valid in the context of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases they're converted into an ARBITRARY call. Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions """ if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): return self.function_fallback_sql(expression) return rename_func("ARBITRARY")(self, expression) def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: scale = expression.args.get("scale") timestamp = self.sql(expression, "this") if scale in (None, exp.UnixToTime.SECONDS): return rename_func("FROM_UNIXTIME")(self, expression) return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" def _to_int(self: Presto.Generator, expression: exp.Expr) -> exp.Expr: if not expression.type: from sqlglot.optimizer.annotate_types import annotate_types annotate_types(expression, dialect=self.dialect) if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: return exp.cast(expression, to=exp.DType.BIGINT) return expression def _date_delta_sql( name: str, negate_interval: bool = False ) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: interval = _to_int(self, expression.expression) return self.func( name, unit_to_str(expression), interval * (-1) if negate_interval else interval, expression.this, ) return _delta_sql def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: explode = expression.this if isinstance(explode, exp.Explode): exploded_type = explode.this.type alias = expression.args.get("alias") # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array if ( isinstance(alias, exp.TableAlias) and isinstance(exploded_type, exp.DataType) and exploded_type.is_type(exp.DType.ARRAY) and exploded_type.expressions and exploded_type.expressions[0].is_type(exp.DType.STRUCT) ): # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) elif isinstance(explode, exp.Inline): explode.replace(exp.Explode(this=explode.this.copy())) return explode_to_unnest_sql(self, expression) def amend_exploded_column_table(expression: exp.Expr) -> exp.Expr: # We check for expression.type because the columns can be amended only if types were inferred if isinstance(expression, exp.Select) and expression.type: for lateral in expression.args.get("laterals") or []: alias = lateral.args.get("alias") if ( not isinstance(lateral.this, exp.Explode) or not isinstance(alias, exp.TableAlias) or len(alias.columns) != 1 ): continue new_table = alias.this old_table = alias.columns[0].name.lower() # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified # with the struct column, resulting in invalid Presto references that need to be amended for column in find_all_in_scope(expression, exp.Column): if column.db.lower() == old_table: column.set("table", column.args["db"].pop()) elif column.table.lower() == old_table: column.set("table", new_table.copy()) elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): column.parent.replace(exp.column(column.parent.expression, table=new_table)) return expression class Presto(Dialect): INDEX_OFFSET = 1 NULL_ORDERING = "nulls_are_last" TIME_FORMAT = MySQL.TIME_FORMAT STRICT_STRING_CONCAT = True TYPED_DIVISION = True TABLESAMPLE_SIZE_IS_PERCENT = True LOG_BASE_FIRST: t.Optional[bool] = None SUPPORTS_VALUES_DEFAULT = False LEAST_GREATEST_IGNORES_NULLS = False TIME_MAPPING = MySQL.TIME_MAPPING # https://github.com/trinodb/trino/issues/17 # https://github.com/trinodb/trino/issues/12289 # https://github.com/prestodb/presto/issues/2863 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE EXPRESSION_METADATA = EXPRESSION_METADATA.copy() SUPPORTED_SETTINGS = { *Dialect.SUPPORTED_SETTINGS, "variant_extract_is_json_extract", } class Tokenizer(tokens.Tokenizer): HEX_STRINGS = [("x'", "'"), ("X'", "'")] UNICODE_STRINGS = [ (prefix + q, q) for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) for prefix in ("U&", "u&") ] NESTED_COMMENTS = False KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "DEALLOCATE PREPARE": TokenType.COMMAND, "DESCRIBE INPUT": TokenType.COMMAND, "DESCRIBE OUTPUT": TokenType.COMMAND, "RESET SESSION": TokenType.COMMAND, "START": TokenType.BEGIN, "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, "ROW": TokenType.STRUCT, "IPADDRESS": TokenType.IPADDRESS, "IPPREFIX": TokenType.IPPREFIX, "TDIGEST": TokenType.TDIGEST, "HYPERLOGLOG": TokenType.HLLSKETCH, } KEYWORDS.pop("/*+") KEYWORDS.pop("QUALIFY") Parser = PrestoParser class Generator(generator.Generator): INTERVAL_ALLOWS_PLURAL_FORM = False JOIN_HINTS = False TABLE_HINTS = False QUERY_HINTS = False IS_BOOL_ALLOWED = False TZ_TO_WITH_TIME_ZONE = True NVL2_SUPPORTED = False STRUCT_DELIMITER = ("(", ")") LIMIT_ONLY_LITERALS = True SUPPORTS_SINGLE_ARG_CONCAT = False LIKE_PROPERTY_INSIDE_SCHEMA = True MULTI_ARG_DISTINCT = False SUPPORTS_TO_NUMBER = False HEX_FUNC = "TO_HEX" PARSE_JSON_NAME = "JSON_PARSE" PAD_FILL_PATTERN_IS_REQUIRED = True EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False SUPPORTS_MEDIAN = False ARRAY_SIZE_NAME = "CARDINALITY" PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.BINARY: "VARBINARY", exp.DType.BIT: "BOOLEAN", exp.DType.DATETIME: "TIMESTAMP", exp.DType.DATETIME64: "TIMESTAMP", exp.DType.FLOAT: "REAL", exp.DType.HLLSKETCH: "HYPERLOGLOG", exp.DType.INT: "INTEGER", exp.DType.STRUCT: "ROW", exp.DType.TEXT: "VARCHAR", exp.DType.TIMESTAMPTZ: "TIMESTAMP", exp.DType.TIMESTAMPNTZ: "TIMESTAMP", exp.DType.TIMETZ: "TIME", } TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.AnyValue: rename_func("ARBITRARY"), exp.ApproxQuantile: lambda self, e: self.func( "APPROX_PERCENTILE", e.this, e.args.get("weight"), e.args.get("quantile"), e.args.get("accuracy"), ), exp.ArgMax: rename_func("MAX_BY"), exp.ArgMin: rename_func("MIN_BY"), exp.Array: transforms.preprocess( [transforms.inherit_struct_field_names], generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", ), exp.ArrayAny: rename_func("ANY_MATCH"), exp.ArrayConcat: rename_func("CONCAT"), exp.ArrayContains: rename_func("CONTAINS"), exp.ArrayToString: rename_func("ARRAY_JOIN"), exp.ArrayUniqueAgg: rename_func("SET_AGG"), exp.ArraySlice: rename_func("SLICE"), exp.AtTimeZone: rename_func("AT_TIMEZONE"), exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), exp.BitwiseLeftShift: lambda self, e: self.func( "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression ), exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), exp.BitwiseRightShift: lambda self, e: self.func( "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression ), exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), exp.CurrentTime: lambda *_: "CURRENT_TIME", exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.CurrentUser: lambda *_: "CURRENT_USER", exp.DateAdd: _date_delta_sql("DATE_ADD"), exp.DateDiff: lambda self, e: self.func( "DATE_DIFF", unit_to_str(e), e.expression, e.this ), exp.DateStrToDate: datestrtodate_sql, exp.DateToDi: lambda self, e: ( f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)" ), exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), exp.DiToDate: lambda self, e: ( f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)" ), exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), exp.FileFormatProperty: lambda self, e: ( f"format={self.sql(exp.Literal.string(e.name))}" ), exp.First: _first_last_sql, exp.FromTimeZone: lambda self, e: ( f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'" ), exp.GenerateSeries: sequence_sql, exp.GenerateDateArray: sequence_sql, exp.If: if_sql(), exp.ILike: no_ilike_sql, exp.Initcap: _initcap_sql, exp.Last: _first_last_sql, exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), exp.Lateral: _explode_to_unnest_sql, exp.Left: left_to_substring_sql, exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( rename_func("LEVENSHTEIN_DISTANCE") ), exp.LogicalAnd: rename_func("BOOL_AND"), exp.LogicalOr: rename_func("BOOL_OR"), exp.Pivot: no_pivot_sql, exp.Quantile: _quantile_sql, exp.RegexpExtract: regexp_extract_sql, exp.RegexpExtractAll: regexp_extract_sql, exp.Right: right_to_substring_sql, exp.Schema: _schema_sql, exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), exp.Select: transforms.preprocess( [ transforms.eliminate_window_clause, transforms.eliminate_qualify, transforms.eliminate_distinct_on, transforms.explode_projection_to_unnest(1), transforms.eliminate_semi_and_anti_joins, amend_exploded_column_table, ] ), exp.SortArray: _no_sort_array, exp.SqlSecurityProperty: lambda self, e: f"SECURITY {self.sql(e.this)}", exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", exp.StrToMap: rename_func("SPLIT_TO_MAP"), exp.StrToTime: _str_to_time_sql, exp.StructExtract: struct_extract_sql, exp.Table: transforms.preprocess([transforms.unnest_generate_series]), exp.Timestamp: no_timestamp_sql, exp.TimestampAdd: _date_delta_sql("DATE_ADD"), exp.TimestampTrunc: timestamptrunc_sql(), exp.TimeStrToDate: timestrtotime_sql, exp.TimeStrToTime: timestrtotime_sql, exp.TimeStrToUnix: lambda self, e: self.func( "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) ), exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), exp.TimeToUnix: rename_func("TO_UNIXTIME"), exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), exp.TsOrDiToDi: lambda self, e: ( f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)" ), exp.TsOrDsAdd: _ts_or_ds_add_sql, exp.TsOrDsDiff: _ts_or_ds_diff_sql, exp.TsOrDsToDate: _ts_or_ds_to_date_sql, exp.Unhex: rename_func("FROM_HEX"), exp.UnixToStr: lambda self, e: ( f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})" ), exp.UnixToTime: _unix_to_time_sql, exp.UnixToTimeStr: lambda self, e: ( f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)" ), exp.VariancePop: rename_func("VAR_POP"), exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), exp.WithinGroup: transforms.preprocess( [transforms.remove_within_group_for_percentiles] ), # Note: Presto's TRUNCATE always returns DOUBLE, even with decimals=0, whereas # most dialects return INT (SQLite also returns REAL, see sqlite.py). This creates # a bidirectional transpilation gap: Presto→Other may change float division to int # division, and vice versa. Modeling precisely would require exp.FloatTrunc or # similar, deemed overengineering for this subtle semantic difference. exp.Trunc: rename_func("TRUNCATE"), exp.Xor: bool_xor_sql, exp.MD5Digest: rename_func("MD5"), exp.SHA: rename_func("SHA1"), exp.SHA1Digest: rename_func("SHA1"), exp.SHA2: sha256_sql, exp.SHA2Digest: sha2_digest_sql, } RESERVED_KEYWORDS = { "alter", "and", "as", "between", "by", "case", "cast", "constraint", "create", "cross", "current_time", "current_timestamp", "deallocate", "delete", "describe", "distinct", "drop", "else", "end", "escape", "except", "execute", "exists", "extract", "false", "for", "from", "full", "group", "having", "in", "inner", "insert", "intersect", "into", "is", "join", "left", "like", "natural", "not", "null", "on", "or", "order", "outer", "prepare", "right", "select", "table", "then", "true", "union", "using", "values", "when", "where", "with", } def extract_sql(self, expression: exp.Extract) -> str: date_part = expression.name if not date_part.startswith("EPOCH"): return super().extract_sql(expression) if date_part == "EPOCH_MILLISECOND": scale = 10**3 elif date_part == "EPOCH_MICROSECOND": scale = 10**6 elif date_part == "EPOCH_NANOSECOND": scale = 10**9 else: scale = None value = expression.expression ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) to_unix: exp.Expr = exp.TimeToUnix(this=ts) if scale: to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) return self.sql(to_unix) def jsonformat_sql(self, expression: exp.JSONFormat) -> str: this = expression.this is_json = expression.args.get("is_json") if this and not (is_json or this.type): from sqlglot.optimizer.annotate_types import annotate_types this = annotate_types(this, dialect=self.dialect) if not (is_json or this.is_type(exp.DType.JSON)): this.replace(exp.cast(this, exp.DType.JSON)) return self.function_fallback_sql(expression) def md5_sql(self, expression: exp.MD5) -> str: this = expression.this if not this.type: from sqlglot.optimizer.annotate_types import annotate_types this = annotate_types(this, dialect=self.dialect) if this.is_type(*exp.DataType.TEXT_TYPES): this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) def strtounix_sql(self, expression: exp.StrToUnix) -> str: # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, # which seems to be using the same time mapping as Hive, as per: # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html this = expression.this value_as_text = exp.cast(this, exp.DType.TEXT) value_as_timestamp = exp.cast(this, exp.DType.TIMESTAMP) if this.is_string else this parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) formatted_value = self.func( "DATE_FORMAT", value_as_timestamp, self.format_time(expression) ) parse_with_tz = self.func( "PARSE_DATETIME", formatted_value, self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), ) coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) return self.func("TO_UNIXTIME", coalesced) def bracket_sql(self, expression: exp.Bracket) -> str: if expression.args.get("safe"): return bracket_to_element_at_sql(self, expression) return super().bracket_sql(expression) def struct_sql(self, expression: exp.Struct) -> str: if not expression.type: from sqlglot.optimizer.annotate_types import annotate_types annotate_types(expression, dialect=self.dialect) values: t.List[str] = [] schema: t.List[str] = [] unknown_type = False for e in expression.expressions: if isinstance(e, exp.PropertyEQ): if e.type and e.type.is_type(exp.DType.UNKNOWN): unknown_type = True else: schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") values.append(self.sql(e, "expression")) else: values.append(self.sql(e)) size = len(expression.expressions) if not size or len(schema) != size: if unknown_type: self.unsupported( "Cannot convert untyped key-value definitions (try annotate_types)." ) return self.func("ROW", *values) return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" def interval_sql(self, expression: exp.Interval) -> str: if expression.this and expression.text("unit").upper().startswith("WEEK"): return f"({expression.this.name} * INTERVAL '7' DAY)" return super().interval_sql(expression) def transaction_sql(self, expression: exp.Transaction) -> str: modes = expression.args.get("modes") modes = f" {', '.join(modes)}" if modes else "" return f"START TRANSACTION{modes}" def offset_limit_modifiers( self, expression: exp.Expr, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] ) -> t.List[str]: return [ self.sql(expression, "offset"), self.sql(limit), ] def create_sql(self, expression: exp.Create) -> str: """ Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), so we need to remove them """ kind = expression.args["kind"] schema = expression.this if kind == "VIEW" and schema.expressions: expression.this.set("expressions", None) return super().create_sql(expression) def delete_sql(self, expression: exp.Delete) -> str: """ Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement. """ tables = expression.args.get("tables") or [expression.this] if len(tables) > 1: return super().delete_sql(expression) table = tables[0] expression.set("this", table) expression.set("tables", None) if isinstance(table, exp.Table): table_alias = table.args.get("alias") if table_alias: table_alias.pop() expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) return super().delete_sql(expression) def jsonextract_sql(self, expression: exp.JSONExtract) -> str: is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino if not expression.args.get("variant_extract") or is_json_extract: return self.func( "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions ) this = self.sql(expression, "this") # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y segments = [] for path_key in expression.expression.expressions[1:]: if not isinstance(path_key, exp.JSONPathKey): # Cannot transpile subscripts, wildcards etc to dot notation self.unsupported( f"Cannot transpile JSONPath segment '{path_key}' to ROW access" ) continue key = path_key.this if not exp.SAFE_IDENTIFIER_RE.match(key): key = f'"{key}"' segments.append(f".{key}") expr = "".join(segments) return f"{this}{expr}" def groupconcat_sql(self, expression: exp.GroupConcat) -> str: return self.func( "ARRAY_JOIN", self.func("ARRAY_AGG", expression.this), expression.args.get("separator"), ) ================================================ FILE: sqlglot/dialects/prql.py ================================================ from __future__ import annotations from sqlglot import tokens from sqlglot.dialects.dialect import Dialect from sqlglot.parsers.prql import PRQLParser from sqlglot.tokens import TokenType class PRQL(Dialect): DPIPE_IS_STRING_CONCAT = False class Tokenizer(tokens.Tokenizer): IDENTIFIERS = ["`"] QUOTES = ["'", '"'] SINGLE_TOKENS = { **tokens.Tokenizer.SINGLE_TOKENS, "=": TokenType.ALIAS, "'": TokenType.QUOTE, '"': TokenType.QUOTE, "`": TokenType.IDENTIFIER, "#": TokenType.COMMENT, } KEYWORDS = { **tokens.Tokenizer.KEYWORDS, } Parser = PRQLParser ================================================ FILE: sqlglot/dialects/redshift.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, transforms from sqlglot.typing.redshift import EXPRESSION_METADATA from sqlglot.dialects.dialect import ( NormalizationStrategy, array_concat_sql, concat_to_dpipe_sql, concat_ws_to_dpipe_sql, date_delta_sql, generatedasidentitycolumnconstraint_sql, json_extract_segments, no_tablesample_sql, rename_func, ) from sqlglot.dialects.postgres import Postgres from sqlglot.generator import Generator from sqlglot.helper import seq_get from sqlglot.parsers.redshift import RedshiftParser from sqlglot.tokens import TokenType class Redshift(Postgres): # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE EXPRESSION_METADATA = EXPRESSION_METADATA.copy() SUPPORTS_USER_DEFINED_TYPES = False INDEX_OFFSET = 0 COPY_PARAMS_ARE_CSV = False HEX_LOWERCASE = True HAS_DISTINCT_ARRAY_CONSTRUCTORS = True COALESCE_COMPARISON_NON_STANDARD = True REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL = False ARRAY_FUNCS_PROPAGATES_NULLS = True # ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" TIME_MAPPING = {**Postgres.TIME_MAPPING, "MON": "%b", "HH24": "%H", "HH": "%I"} Parser = RedshiftParser class Tokenizer(Postgres.Tokenizer): BIT_STRINGS = [] HEX_STRINGS = [] STRING_ESCAPES = ["\\", "'"] KEYWORDS = { **Postgres.Tokenizer.KEYWORDS, "(+)": TokenType.JOIN_MARKER, "HLLSKETCH": TokenType.HLLSKETCH, "MINUS": TokenType.EXCEPT, "SUPER": TokenType.SUPER, "TOP": TokenType.TOP, "UNLOAD": TokenType.COMMAND, "VARBYTE": TokenType.VARBINARY, "BINARY VARYING": TokenType.VARBINARY, } KEYWORDS.pop("VALUES") # Redshift allows # to appear as a table identifier prefix SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() SINGLE_TOKENS.pop("#") class Generator(Postgres.Generator): LOCKING_READS_SUPPORTED = False QUERY_HINTS = False VALUES_AS_TABLE = False TZ_TO_WITH_TIME_ZONE = True NVL2_SUPPORTED = True LAST_DAY_SUPPORTS_DATE_PART = False CAN_IMPLEMENT_ARRAY_ANY = False MULTI_ARG_DISTINCT = True COPY_PARAMS_ARE_WRAPPED = False HEX_FUNC = "TO_HEX" PARSE_JSON_NAME = "JSON_PARSE" ARRAY_CONCAT_IS_VAR_LEN = False SUPPORTS_CONVERT_TIMEZONE = True EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False SUPPORTS_MEDIAN = True ALTER_SET_TYPE = "TYPE" SUPPORTS_DECODE_CASE = True SUPPORTS_BETWEEN_FLAGS = False LIMIT_FETCH = "LIMIT" STAR_EXCEPT = "EXCLUDE" STAR_EXCLUDE_REQUIRES_DERIVED_TABLE = False # Redshift doesn't have `WITH` as part of their with_properties so we remove it WITH_PROPERTIES_PREFIX = " " TYPE_MAPPING = { **Postgres.Generator.TYPE_MAPPING, exp.DType.BINARY: "VARBYTE", exp.DType.BLOB: "VARBYTE", exp.DType.INT: "INTEGER", exp.DType.TIMETZ: "TIME", exp.DType.TIMESTAMPTZ: "TIMESTAMP", exp.DType.VARBINARY: "VARBYTE", exp.DType.ROWVERSION: "VARBYTE", } TRANSFORMS = { **Postgres.Generator.TRANSFORMS, exp.ArrayConcat: array_concat_sql("ARRAY_CONCAT"), exp.Concat: concat_to_dpipe_sql, exp.ConcatWs: concat_ws_to_dpipe_sql, exp.ApproxDistinct: lambda self, e: ( f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})" ), exp.CurrentTimestamp: lambda self, e: ( "SYSDATE" if e.args.get("sysdate") else "GETDATE()" ), exp.DateAdd: date_delta_sql("DATEADD"), exp.DateDiff: date_delta_sql("DATEDIFF"), exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), exp.DistStyleProperty: lambda self, e: self.naked_property(e), exp.Explode: lambda self, e: self.explode_sql(e), exp.FarmFingerprint: rename_func("FARMFINGERPRINT64"), exp.FromBase: rename_func("STRTOL"), exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), exp.GroupConcat: rename_func("LISTAGG"), exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), exp.RegexpExtract: rename_func("REGEXP_SUBSTR"), exp.Select: transforms.preprocess( [ transforms.eliminate_window_clause, transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins, transforms.unqualify_unnest, transforms.unnest_generate_date_array_using_recursive_cte, ] ), exp.SortKeyProperty: lambda self, e: ( f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})" ), exp.StartsWith: lambda self, e: ( f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'" ), exp.StringToArray: rename_func("SPLIT_TO_ARRAY"), exp.TableSample: no_tablesample_sql, exp.TsOrDsAdd: date_delta_sql("DATEADD"), exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e), exp.SHA2Digest: lambda self, e: self.func( "SHA2", e.this, e.args.get("length") or exp.Literal.number(256) ), } # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots TRANSFORMS.pop(exp.Pivot) # Postgres doesn't support JSON_PARSE, but Redshift does TRANSFORMS.pop(exp.ParseJSON) # Redshift supports these functions TRANSFORMS.pop(exp.AnyValue) TRANSFORMS.pop(exp.LastDay) TRANSFORMS.pop(exp.SHA2) # Postgres and Redshift have different semantics for Getbit TRANSFORMS.pop(exp.Getbit) # Postgres does not permit a double precision argument in ROUND; Redshift does TRANSFORMS.pop(exp.Round) RESERVED_KEYWORDS = { "aes128", "aes256", "all", "allowoverwrite", "analyse", "analyze", "and", "any", "array", "as", "asc", "authorization", "az64", "backup", "between", "binary", "blanksasnull", "both", "bytedict", "bzip2", "case", "cast", "check", "collate", "column", "constraint", "create", "credentials", "cross", "current_date", "current_time", "current_timestamp", "current_user", "current_user_id", "default", "deferrable", "deflate", "defrag", "delta", "delta32k", "desc", "disable", "distinct", "do", "else", "emptyasnull", "enable", "encode", "encrypt ", "encryption", "end", "except", "explicit", "false", "for", "foreign", "freeze", "from", "full", "globaldict256", "globaldict64k", "grant", "group", "gzip", "having", "identity", "ignore", "ilike", "in", "initially", "inner", "intersect", "interval", "into", "is", "isnull", "join", "leading", "left", "like", "limit", "localtime", "localtimestamp", "lun", "luns", "lzo", "lzop", "minus", "mostly16", "mostly32", "mostly8", "natural", "new", "not", "notnull", "null", "nulls", "off", "offline", "offset", "oid", "old", "on", "only", "open", "or", "order", "outer", "overlaps", "parallel", "partition", "percent", "permissions", "pivot", "placing", "primary", "raw", "readratio", "recover", "references", "rejectlog", "resort", "respect", "restore", "right", "select", "session_user", "similar", "snapshot", "some", "sysdate", "system", "table", "tag", "tdes", "text255", "text32k", "then", "timestamp", "to", "top", "trailing", "true", "truncatecolumns", "type", "union", "unique", "unnest", "unpivot", "user", "using", "verbose", "wallet", "when", "where", "with", "without", } def unnest_sql(self, expression: exp.Unnest) -> str: args = expression.expressions num_args = len(args) if num_args != 1: self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") return "" if isinstance(expression.find_ancestor(exp.From, exp.Join, exp.Select), exp.Select): self.unsupported("Unsupported UNNEST when not used in FROM/JOIN clauses") return "" arg = self.sql(seq_get(args, 0)) alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) return f"{arg} AS {alias}" if alias else arg def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: if expression.is_type(exp.DType.JSON): # Redshift doesn't support a JSON type, so casting to it is treated as a noop return self.sql(expression, "this") return super().cast_sql(expression, safe_prefix=safe_prefix) def datatype_sql(self, expression: exp.DataType) -> str: """ Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert `TEXT` to `VARCHAR`. """ if expression.is_type("text"): expression.set("this", exp.DType.VARCHAR) precision = expression.args.get("expressions") if not precision: expression.append("expressions", exp.var("MAX")) return super().datatype_sql(expression) def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" location = self.sql(expression, "location") location = f" LOCATION {location}" if location else "" file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") file_format = f" FILE FORMAT {file_format}" if file_format else "" return f"SET{exprs}{location}{file_format}" def array_sql(self, expression: exp.Array) -> str: if expression.args.get("bracket_notation"): return super().array_sql(expression) return rename_func("ARRAY")(self, expression) def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: return Generator.ignorenulls_sql(self, expression) def respectnulls_sql(self, expression: exp.RespectNulls) -> str: return Generator.respectnulls_sql(self, expression) def explode_sql(self, expression: exp.Explode) -> str: self.unsupported("Unsupported EXPLODE() function") return "" def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str: scale = expression.args.get("scale") this = self.sql(expression.this) if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int: this = f"({this} / POWER(10, {scale.to_py()}))" return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')" ================================================ FILE: sqlglot/dialects/risingwave.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.dialects.postgres import Postgres from sqlglot.generator import Generator from sqlglot.parsers.risingwave import RisingWaveParser from sqlglot.tokens import TokenType class RisingWave(Postgres): REQUIRES_PARENTHESIZED_STRUCT_ACCESS = True SUPPORTS_STRUCT_STAR_EXPANSION = True class Tokenizer(Postgres.Tokenizer): KEYWORDS = { **Postgres.Tokenizer.KEYWORDS, "SINK": TokenType.SINK, "SOURCE": TokenType.SOURCE, } Parser = RisingWaveParser class Generator(Postgres.Generator): LOCKING_READS_SUPPORTED = False SUPPORTS_BETWEEN_FLAGS = False TRANSFORMS = { **Postgres.Generator.TRANSFORMS, exp.FileFormatProperty: lambda self, e: f"FORMAT {self.sql(e, 'this')}", } PROPERTIES_LOCATION = { **Postgres.Generator.PROPERTIES_LOCATION, exp.FileFormatProperty: exp.Properties.Location.POST_EXPRESSION, } EXPRESSION_PRECEDES_PROPERTIES_CREATABLES = {"SINK"} def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: return Generator.computedcolumnconstraint_sql(self, expression) def datatype_sql(self, expression: exp.DataType) -> str: if expression.is_type(exp.DType.MAP) and len(expression.expressions) == 2: key_type, value_type = expression.expressions return f"MAP({self.sql(key_type)}, {self.sql(value_type)})" return super().datatype_sql(expression) ================================================ FILE: sqlglot/dialects/singlestore.py ================================================ import re from sqlglot import TokenType import typing as t from sqlglot import exp from sqlglot.dialects.dialect import ( json_extract_segments, json_path_key_only_name, rename_func, bool_xor_sql, count_if_to_sum, timestamptrunc_sql, date_add_interval_sql, timestampdiff_sql, ) from sqlglot.dialects.mysql import MySQL, _remove_ts_or_ds_to_date, date_add_sql from sqlglot.expressions import DataType from sqlglot.generator import unsupported_args from sqlglot.parsers.singlestore import SingleStoreParser, cast_to_time6 class SingleStore(MySQL): SUPPORTS_ORDER_BY_ALL = True TIME_MAPPING: t.Dict[str, str] = { "D": "%u", # Day of week (1-7) "DD": "%d", # day of month (01-31) "DY": "%a", # abbreviated name of day "HH": "%I", # Hour of day (01-12) "HH12": "%I", # alias for HH "HH24": "%H", # Hour of day (00-23) "MI": "%M", # Minute (00-59) "MM": "%m", # Month (01-12; January = 01) "MON": "%b", # Abbreviated name of month "MONTH": "%B", # Name of month "SS": "%S", # Second (00-59) "RR": "%y", # 15 "YY": "%y", # 15 "YYYY": "%Y", # 2015 "FF6": "%f", # only 6 digits are supported in python formats } VECTOR_TYPE_ALIASES = { "I8": "TINYINT", "I16": "SMALLINT", "I32": "INT", "I64": "BIGINT", "F32": "FLOAT", "F64": "DOUBLE", } INVERSE_VECTOR_TYPE_ALIASES = {v: k for k, v in VECTOR_TYPE_ALIASES.items()} class Tokenizer(MySQL.Tokenizer): BYTE_STRINGS = [("e'", "'"), ("E'", "'")] KEYWORDS = { **MySQL.Tokenizer.KEYWORDS, "BSON": TokenType.JSONB, "GEOGRAPHYPOINT": TokenType.GEOGRAPHYPOINT, "TIMESTAMP": TokenType.TIMESTAMP, "UTC_DATE": TokenType.UTC_DATE, "UTC_TIME": TokenType.UTC_TIME, "UTC_TIMESTAMP": TokenType.UTC_TIMESTAMP, ":>": TokenType.COLON_GT, "!:>": TokenType.NCOLON_GT, "::$": TokenType.DCOLONDOLLAR, "::%": TokenType.DCOLONPERCENT, "::?": TokenType.DCOLONQMARK, "RECORD": TokenType.STRUCT, } Parser = SingleStoreParser class Generator(MySQL.Generator): SUPPORTS_UESCAPE = False NULL_ORDERING_SUPPORTED = True MATCH_AGAINST_TABLE_PREFIX = "TABLE " STRUCT_DELIMITER = ("(", ")") @staticmethod def _unicode_substitute(m: re.Match[str]) -> str: # Interpret the number as hex and convert it to the Unicode string return chr(int(m.group(1), 16)) UNICODE_SUBSTITUTE: t.Optional[t.Callable[[re.Match[str]], str]] = _unicode_substitute SUPPORTED_JSON_PATH_PARTS = { exp.JSONPathKey, exp.JSONPathRoot, exp.JSONPathSubscript, } TRANSFORMS = { **MySQL.Generator.TRANSFORMS, exp.TsOrDsToDate: lambda self, e: ( self.func("TO_DATE", e.this, self.format_time(e)) if e.args.get("format") else self.func("DATE", e.this) ), exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), exp.ToChar: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), exp.StrToDate: lambda self, e: self.func( "STR_TO_DATE", e.this, self.format_time( e, inverse_time_mapping=MySQL.INVERSE_TIME_MAPPING, inverse_time_trie=MySQL.INVERSE_TIME_TRIE, ), ), exp.TimeToStr: lambda self, e: self.func( "DATE_FORMAT", e.this, self.format_time( e, inverse_time_mapping=MySQL.INVERSE_TIME_MAPPING, inverse_time_trie=MySQL.INVERSE_TIME_TRIE, ), ), exp.Date: unsupported_args("zone", "expressions")(rename_func("DATE")), exp.Cast: unsupported_args("format", "action", "default")( lambda self, e: f"{self.sql(e, 'this')} :> {self.sql(e, 'to')}" ), exp.TryCast: unsupported_args("format", "action", "default")( lambda self, e: f"{self.sql(e, 'this')} !:> {self.sql(e, 'to')}" ), exp.CastToStrType: lambda self, e: self.sql( exp.cast(e.this, DataType.build(e.args["to"].name)) ), exp.StrToUnix: unsupported_args("format")(rename_func("UNIX_TIMESTAMP")), exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), exp.UnixSeconds: rename_func("UNIX_TIMESTAMP"), exp.UnixToStr: lambda self, e: self.func( "FROM_UNIXTIME", e.this, self.format_time( e, inverse_time_mapping=MySQL.INVERSE_TIME_MAPPING, inverse_time_trie=MySQL.INVERSE_TIME_TRIE, ), ), exp.UnixToTime: unsupported_args("scale", "zone", "hours", "minutes")( lambda self, e: self.func( "FROM_UNIXTIME", e.this, self.format_time( e, inverse_time_mapping=MySQL.INVERSE_TIME_MAPPING, inverse_time_trie=MySQL.INVERSE_TIME_TRIE, ), ), ), exp.UnixToTimeStr: lambda self, e: f"FROM_UNIXTIME({self.sql(e, 'this')}) :> TEXT", exp.DateBin: unsupported_args("unit", "zone")( lambda self, e: self.func("TIME_BUCKET", e.this, e.expression, e.args.get("origin")) ), exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DType.DATE)), exp.FromTimeZone: lambda self, e: self.func( "CONVERT_TZ", e.this, e.args.get("zone"), "'UTC'" ), exp.DiToDate: lambda self, e: ( f"STR_TO_DATE({self.sql(e, 'this')}, {SingleStore.DATEINT_FORMAT})" ), exp.DateToDi: lambda self, e: ( f"(DATE_FORMAT({self.sql(e, 'this')}, {SingleStore.DATEINT_FORMAT}) :> INT)" ), exp.TsOrDiToDi: lambda self, e: ( f"(DATE_FORMAT({self.sql(e, 'this')}, {SingleStore.DATEINT_FORMAT}) :> INT)" ), exp.Time: unsupported_args("zone")(lambda self, e: f"{self.sql(e, 'this')} :> TIME"), exp.DatetimeAdd: _remove_ts_or_ds_to_date(date_add_sql("ADD")), exp.DatetimeTrunc: unsupported_args("zone")(timestamptrunc_sql()), exp.DatetimeSub: date_add_interval_sql("DATE", "SUB"), exp.DatetimeDiff: timestampdiff_sql, exp.DateTrunc: unsupported_args("zone")(timestamptrunc_sql()), exp.DateDiff: unsupported_args("zone")( lambda self, e: ( timestampdiff_sql(self, e) if e.unit is not None else self.func("DATEDIFF", e.this, e.expression) ) ), exp.TsOrDsDiff: lambda self, e: ( timestampdiff_sql(self, e) if e.unit is not None else self.func("DATEDIFF", e.this, e.expression) ), exp.TimestampTrunc: unsupported_args("zone")(timestamptrunc_sql()), exp.CurrentDatetime: lambda self, e: self.sql( cast_to_time6(exp.CurrentTimestamp(this=exp.Literal.number(6)), exp.DType.DATETIME) ), exp.JSONExtract: unsupported_args( "only_json_types", "expressions", "variant_extract", "json_query", "option", "quote", "on_condition", "requires_json", )(json_extract_segments("JSON_EXTRACT_JSON")), exp.JSONBExtract: json_extract_segments("BSON_EXTRACT_BSON"), exp.JSONPathKey: json_path_key_only_name, exp.JSONPathSubscript: lambda self, e: self.json_path_part(e.this), exp.JSONPathRoot: lambda *_: "", exp.JSONFormat: unsupported_args("options", "is_json")(rename_func("JSON_PRETTY")), exp.JSONArrayAgg: unsupported_args("null_handling", "return_type", "strict")( lambda self, e: self.func("JSON_AGG", e.this, suffix=f"{self.sql(e, 'order')})") ), exp.JSONArray: unsupported_args("null_handling", "return_type", "strict")( rename_func("JSON_BUILD_ARRAY") ), exp.JSONBExists: lambda self, e: self.func( "BSON_MATCH_ANY_EXISTS", e.this, e.args.get("path") ), exp.JSONExists: lambda self, e: ( f"{self.sql(e.this)}::?{self.sql(e.args.get('path'))}" if e.args.get("from_dcolonqmark") else self.func("JSON_MATCH_ANY_EXISTS", e.this, e.args.get("path")) ), exp.JSONObject: unsupported_args( "null_handling", "unique_keys", "return_type", "encoding" )(rename_func("JSON_BUILD_OBJECT")), exp.DayOfWeekIso: lambda self, e: f"(({self.func('DAYOFWEEK', e.this)} % 7) + 1)", exp.DayOfMonth: rename_func("DAY"), exp.Hll: rename_func("APPROX_COUNT_DISTINCT"), exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), exp.CountIf: count_if_to_sum, exp.LogicalOr: lambda self, e: f"MAX(ABS({self.sql(e, 'this')}))", exp.LogicalAnd: lambda self, e: f"MIN(ABS({self.sql(e, 'this')}))", exp.ApproxQuantile: unsupported_args("accuracy", "weight")( lambda self, e: self.func( "APPROX_PERCENTILE", e.this, e.args.get("quantile"), e.args.get("error_tolerance"), ) ), exp.Variance: rename_func("VAR_SAMP"), exp.VariancePop: rename_func("VAR_POP"), exp.Xor: bool_xor_sql, exp.Cbrt: lambda self, e: self.sql( exp.Pow(this=e.this, expression=exp.Literal.number(1) / exp.Literal.number(3)) ), exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), exp.Repeat: lambda self, e: self.func( "LPAD", exp.Literal.string(""), exp.Mul(this=self.func("LENGTH", e.this), expression=e.args.get("times")), e.this, ), exp.IsAscii: lambda self, e: f"({self.sql(e, 'this')} RLIKE '^[\x00-\x7f]*$')", exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), exp.Contains: rename_func("INSTR"), exp.RegexpExtractAll: unsupported_args("position", "occurrence", "group")( lambda self, e: self.func( "REGEXP_MATCH", e.this, e.expression, e.args.get("parameters"), ) ), exp.RegexpExtract: unsupported_args("group")( lambda self, e: self.func( "REGEXP_SUBSTR", e.this, e.expression, e.args.get("position"), e.args.get("occurrence"), e.args.get("parameters"), ) ), exp.StartsWith: lambda self, e: self.func( "REGEXP_INSTR", e.this, self.func("CONCAT", exp.Literal.string("^"), e.expression) ), exp.FromBase: lambda self, e: self.func( "CONV", e.this, e.expression, exp.Literal.number(10) ), exp.RegexpILike: lambda self, e: self.binary( exp.RegexpLike( this=exp.Lower(this=e.this), expression=exp.Lower(this=e.expression), ), "RLIKE", ), exp.Stuff: lambda self, e: self.func( "CONCAT", self.func("SUBSTRING", e.this, exp.Literal.number(1), e.args.get("start") - 1), e.expression, self.func("SUBSTRING", e.this, e.args.get("start") + e.args.get("length")), ), exp.National: lambda self, e: self.national_sql(e, prefix=""), exp.Reduce: unsupported_args("finish")( lambda self, e: self.func( "REDUCE", e.args.get("initial"), e.this, e.args.get("merge") ) ), exp.MatchAgainst: unsupported_args("modifier")( lambda self, e: super().matchagainst_sql(e) ), exp.Show: unsupported_args( "history", "terse", "offset", "starts_with", "limit", "from_", "scope", "scope_kind", "mutex", "query", "channel", "log", "types", "privileges", )(lambda self, e: super().show_sql(e)), exp.Describe: unsupported_args( "style", "kind", "expressions", "partition", "format", )(lambda self, e: super().describe_sql(e)), } TRANSFORMS.pop(exp.JSONExtractScalar) TRANSFORMS.pop(exp.CurrentDate) UNSUPPORTED_TYPES = { exp.DType.ARRAY, exp.DType.AGGREGATEFUNCTION, exp.DType.SIMPLEAGGREGATEFUNCTION, exp.DType.BIGSERIAL, exp.DType.BPCHAR, exp.DType.DATEMULTIRANGE, exp.DType.DATERANGE, exp.DType.DYNAMIC, exp.DType.HLLSKETCH, exp.DType.HSTORE, exp.DType.IMAGE, exp.DType.INET, exp.DType.INT128, exp.DType.INT256, exp.DType.INT4MULTIRANGE, exp.DType.INT4RANGE, exp.DType.INT8MULTIRANGE, exp.DType.INT8RANGE, exp.DType.INTERVAL, exp.DType.IPADDRESS, exp.DType.IPPREFIX, exp.DType.IPV4, exp.DType.IPV6, exp.DType.LIST, exp.DType.MAP, exp.DType.LOWCARDINALITY, exp.DType.MONEY, exp.DType.MULTILINESTRING, exp.DType.NAME, exp.DType.NESTED, exp.DType.NOTHING, exp.DType.NULL, exp.DType.NUMMULTIRANGE, exp.DType.NUMRANGE, exp.DType.OBJECT, exp.DType.RANGE, exp.DType.ROWVERSION, exp.DType.SERIAL, exp.DType.SMALLSERIAL, exp.DType.SMALLMONEY, exp.DType.SUPER, exp.DType.TIMETZ, exp.DType.TIMESTAMPNTZ, exp.DType.TIMESTAMPLTZ, exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMP_NS, exp.DType.TSMULTIRANGE, exp.DType.TSRANGE, exp.DType.TSTZMULTIRANGE, exp.DType.TSTZRANGE, exp.DType.UINT128, exp.DType.UINT256, exp.DType.UNION, exp.DType.UNKNOWN, exp.DType.USERDEFINED, exp.DType.UUID, exp.DType.VARIANT, exp.DType.XML, exp.DType.TDIGEST, } TYPE_MAPPING = { **MySQL.Generator.TYPE_MAPPING, exp.DType.BIGDECIMAL: "DECIMAL", exp.DType.BIT: "BOOLEAN", exp.DType.DATE32: "DATE", exp.DType.DATETIME64: "DATETIME", exp.DType.DECIMAL32: "DECIMAL", exp.DType.DECIMAL64: "DECIMAL", exp.DType.DECIMAL128: "DECIMAL", exp.DType.DECIMAL256: "DECIMAL", exp.DType.ENUM8: "ENUM", exp.DType.ENUM16: "ENUM", exp.DType.FIXEDSTRING: "TEXT", exp.DType.GEOMETRY: "GEOGRAPHY", exp.DType.POINT: "GEOGRAPHYPOINT", exp.DType.RING: "GEOGRAPHY", exp.DType.LINESTRING: "GEOGRAPHY", exp.DType.POLYGON: "GEOGRAPHY", exp.DType.MULTIPOLYGON: "GEOGRAPHY", exp.DType.STRUCT: "RECORD", exp.DType.JSONB: "BSON", exp.DType.TIMESTAMP: "TIMESTAMP", exp.DType.TIMESTAMP_S: "TIMESTAMP", exp.DType.TIMESTAMP_MS: "TIMESTAMP(6)", } # https://docs.singlestore.com/cloud/reference/sql-reference/restricted-keywords/list-of-restricted-keywords/ RESERVED_KEYWORDS = { "abs", "absolute", "access", "account", "acos", "action", "add", "adddate", "addtime", "admin", "aes_decrypt", "aes_encrypt", "after", "against", "aggregate", "aggregates", "aggregator", "aggregator_id", "aggregator_plan_hash", "aggregators", "algorithm", "all", "also", "alter", "always", "analyse", "analyze", "and", "anti_join", "any", "any_value", "approx_count_distinct", "approx_count_distinct_accumulate", "approx_count_distinct_combine", "approx_count_distinct_estimate", "approx_geography_intersects", "approx_percentile", "arghistory", "arrange", "arrangement", "array", "as", "asc", "ascii", "asensitive", "asin", "asm", "assertion", "assignment", "ast", "asymmetric", "async", "at", "atan", "atan2", "attach", "attribute", "authorization", "auto", "auto_increment", "auto_reprovision", "autostats", "autostats_cardinality_mode", "autostats_enabled", "autostats_histogram_mode", "autostats_sampling", "availability", "avg", "avg_row_length", "avro", "azure", "background", "_background_threads_for_cleanup", "backup", "backup_history", "backup_id", "backward", "batch", "batches", "batch_interval", "_batch_size_limit", "before", "begin", "between", "bigint", "bin", "binary", "_binary", "bit", "bit_and", "bit_count", "bit_or", "bit_xor", "blob", "bool", "boolean", "bootstrap", "both", "_bt", "btree", "bucket_count", "by", "byte", "byte_length", "cache", "call", "call_for_pipeline", "called", "capture", "cascade", "cascaded", "case", "cast", "catalog", "ceil", "ceiling", "chain", "change", "char", "character", "characteristics", "character_length", "char_length", "charset", "check", "checkpoint", "_check_can_connect", "_check_consistency", "checksum", "_checksum", "class", "clear", "client", "client_found_rows", "close", "cluster", "clustered", "cnf", "coalesce", "coercibility", "collate", "collation", "collect", "column", "columnar", "columns", "columnstore", "columnstore_segment_rows", "comment", "comments", "commit", "committed", "_commit_log_tail", "committed", "compact", "compile", "compressed", "compression", "concat", "concat_ws", "concurrent", "concurrently", "condition", "configuration", "connection", "connection_id", "connections", "config", "constraint", "constraints", "content", "continue", "_continue_replay", "conv", "conversion", "convert", "convert_tz", "copy", "_core", "cos", "cost", "cot", "count", "create", "credentials", "cross", "cube", "csv", "cume_dist", "curdate", "current", "current_catalog", "current_date", "current_role", "current_schema", "current_security_groups", "current_security_roles", "current_time", "current_timestamp", "current_user", "cursor", "curtime", "cycle", "data", "database", "databases", "date", "date_add", "datediff", "date_format", "date_sub", "date_trunc", "datetime", "day", "day_hour", "day_microsecond", "day_minute", "dayname", "dayofmonth", "dayofweek", "dayofyear", "day_second", "deallocate", "dec", "decimal", "declare", "decode", "default", "defaults", "deferrable", "deferred", "defined", "definer", "degrees", "delayed", "delay_key_write", "delete", "delimiter", "delimiters", "dense_rank", "desc", "describe", "detach", "deterministic", "dictionary", "differential", "directory", "disable", "discard", "_disconnect", "disk", "distinct", "distinctrow", "distributed_joins", "div", "do", "document", "domain", "dot_product", "double", "drop", "_drop_profile", "dual", "dump", "duplicate", "dynamic", "earliest", "each", "echo", "election", "else", "elseif", "elt", "enable", "enclosed", "encoding", "encrypted", "end", "engine", "engines", "enum", "errors", "escape", "escaped", "estimate", "euclidean_distance", "event", "events", "except", "exclude", "excluding", "exclusive", "execute", "exists", "exit", "exp", "explain", "extended", "extension", "external", "external_host", "external_port", "extract", "extractor", "extractors", "extra_join", "_failover", "failed_login_attempts", "failure", "false", "family", "fault", "fetch", "field", "fields", "file", "files", "fill", "first", "first_value", "fix_alter", "fixed", "float", "float4", "float8", "floor", "flush", "following", "for", "force", "force_compiled_mode", "force_interpreter_mode", "foreground", "foreign", "format", "forward", "found_rows", "freeze", "from", "from_base64", "from_days", "from_unixtime", "fs", "_fsync", "full", "fulltext", "function", "functions", "gc", "gcs", "get_format", "_gc", "_gcx", "generate", "geography", "geography_area", "geography_contains", "geography_distance", "geography_intersects", "geography_latitude", "geography_length", "geography_longitude", "geographypoint", "geography_point", "geography_within_distance", "geometry", "geometry_area", "geometry_contains", "geometry_distance", "geometry_filter", "geometry_intersects", "geometry_length", "geometrypoint", "geometry_point", "geometry_within_distance", "geometry_x", "geometry_y", "global", "_global_version_timestamp", "grant", "granted", "grants", "greatest", "group", "grouping", "groups", "group_concat", "gzip", "handle", "handler", "hard_cpu_limit_percentage", "hash", "has_temp_tables", "having", "hdfs", "header", "heartbeat_no_logging", "hex", "highlight", "high_priority", "hold", "holding", "host", "hosts", "hour", "hour_microsecond", "hour_minute", "hour_second", "identified", "identity", "if", "ifnull", "ignore", "ilike", "immediate", "immutable", "implicit", "import", "in", "including", "increment", "incremental", "index", "indexes", "inet_aton", "inet_ntoa", "inet6_aton", "inet6_ntoa", "infile", "inherit", "inherits", "_init_profile", "init", "initcap", "initialize", "initially", "inject", "inline", "inner", "inout", "input", "insensitive", "insert", "insert_method", "instance", "instead", "instr", "int", "int1", "int2", "int3", "int4", "int8", "integer", "_internal_dynamic_typecast", "interpreter_mode", "intersect", "interval", "into", "invoker", "is", "isnull", "isolation", "iterate", "join", "json", "json_agg", "json_array_contains_double", "json_array_contains_json", "json_array_contains_string", "json_array_push_double", "json_array_push_json", "json_array_push_string", "json_delete_key", "json_extract_double", "json_extract_json", "json_extract_string", "json_extract_bigint", "json_get_type", "json_length", "json_set_double", "json_set_json", "json_set_string", "json_splice_double", "json_splice_json", "json_splice_string", "kafka", "key", "key_block_size", "keys", "kill", "killall", "label", "lag", "language", "large", "last", "last_day", "last_insert_id", "last_value", "lateral", "latest", "lc_collate", "lc_ctype", "lcase", "lead", "leading", "leaf", "leakproof", "least", "leave", "leaves", "left", "length", "level", "license", "like", "limit", "lines", "listen", "llvm", "ln", "load", "loaddata_where", "_load", "local", "localtime", "localtimestamp", "locate", "location", "lock", "log", "log10", "log2", "long", "longblob", "longtext", "loop", "lower", "low_priority", "lpad", "_ls", "ltrim", "lz4", "management", "_management_thread", "mapping", "master", "match", "materialized", "max", "maxvalue", "max_concurrency", "max_errors", "max_partitions_per_batch", "max_queue_depth", "max_retries_per_batch_partition", "max_rows", "mbc", "md5", "mpl", "median", "mediumblob", "mediumint", "mediumtext", "member", "memory", "memory_percentage", "_memsql_table_id_lookup", "memsql", "memsql_deserialize", "memsql_imitating_kafka", "memsql_serialize", "merge", "metadata", "microsecond", "middleint", "min", "min_rows", "minus", "minute", "minute_microsecond", "minute_second", "minvalue", "mod", "mode", "model", "modifies", "modify", "month", "monthname", "months_between", "move", "mpl", "names", "named", "namespace", "national", "natural", "nchar", "next", "no", "node", "none", "no_query_rewrite", "noparam", "not", "nothing", "notify", "now", "nowait", "no_write_to_binlog", "no_query_rewrite", "norely", "nth_value", "ntile", "null", "nullcols", "nullif", "nulls", "numeric", "nvarchar", "object", "octet_length", "of", "off", "offline", "offset", "offsets", "oids", "on", "online", "only", "open", "operator", "optimization", "optimize", "optimizer", "optimizer_state", "option", "options", "optionally", "or", "order", "ordered_serialize", "orphan", "out", "out_of_order", "outer", "outfile", "over", "overlaps", "overlay", "owned", "owner", "pack_keys", "paired", "parser", "parquet", "partial", "partition", "partition_id", "partitioning", "partitions", "passing", "password", "password_lock_time", "parser", "pause", "_pause_replay", "percent_rank", "percentile_cont", "percentile_disc", "periodic", "persisted", "pi", "pipeline", "pipelines", "pivot", "placing", "plan", "plans", "plancache", "plugins", "pool", "pools", "port", "position", "pow", "power", "preceding", "precision", "prepare", "prepared", "preserve", "primary", "prior", "privileges", "procedural", "procedure", "procedures", "process", "processlist", "profile", "profiles", "program", "promote", "proxy", "purge", "quarter", "queries", "query", "query_timeout", "queue", "quote", "radians", "rand", "range", "rank", "read", "_read", "reads", "real", "reassign", "rebalance", "recheck", "record", "recursive", "redundancy", "redundant", "ref", "reference", "references", "refresh", "regexp", "reindex", "relative", "release", "reload", "rely", "remote", "remove", "rename", "repair", "_repair_table", "repeat", "repeatable", "_repl", "_reprovisioning", "replace", "replica", "replicate", "replicating", "replication", "durability", "require", "resource", "resource_pool", "reset", "restart", "restore", "restrict", "result", "_resurrect", "retry", "return", "returning", "returns", "reverse", "revoke", "rg_pool", "right", "right_anti_join", "right_semi_join", "right_straight_join", "rlike", "role", "roles", "rollback", "rollup", "round", "routine", "row", "row_count", "row_format", "row_number", "rows", "rowstore", "rule", "rpad", "_rpc", "rtrim", "running", "s3", "safe", "save", "savepoint", "scalar", "schema", "schemas", "schema_binding", "scroll", "search", "second", "second_microsecond", "sec_to_time", "security", "select", "semi_join", "_send_threads", "sensitive", "separator", "sequence", "sequences", "serial", "serializable", "series", "service_user", "server", "session", "session_user", "set", "setof", "security_lists_intersect", "sha", "sha1", "sha2", "shard", "sharded", "sharded_id", "share", "show", "shutdown", "sigmoid", "sign", "signal", "similar", "simple", "site", "signed", "sin", "skip", "skipped_batches", "sleep", "_sleep", "smallint", "snapshot", "_snapshot", "_snapshots", "soft_cpu_limit_percentage", "some", "soname", "sparse", "spatial", "spatial_check_index", "specific", "split", "sql", "sql_big_result", "sql_buffer_result", "sql_cache", "sql_calc_found_rows", "sqlexception", "sql_mode", "sql_no_cache", "sql_no_logging", "sql_small_result", "sqlstate", "sqlwarning", "sqrt", "ssl", "stable", "standalone", "start", "starting", "state", "statement", "statistics", "stats", "status", "std", "stddev", "stddev_pop", "stddev_samp", "stdin", "stdout", "stop", "storage", "str_to_date", "straight_join", "strict", "string", "strip", "subdate", "substr", "substring", "substring_index", "success", "sum", "super", "symmetric", "sync_snapshot", "sync", "_sync", "_sync2", "_sync_partitions", "_sync_snapshot", "synchronize", "sysid", "system", "table", "table_checksum", "tables", "tablespace", "tags", "tan", "target_size", "task", "temp", "template", "temporary", "temptable", "_term_bump", "terminate", "terminated", "test", "text", "then", "time", "timediff", "time_bucket", "time_format", "timeout", "timestamp", "timestampadd", "timestampdiff", "timezone", "time_to_sec", "tinyblob", "tinyint", "tinytext", "to", "to_base64", "to_char", "to_date", "to_days", "to_json", "to_number", "to_seconds", "to_timestamp", "tracelogs", "traditional", "trailing", "transform", "transaction", "_transactions_experimental", "treat", "trigger", "triggers", "trim", "true", "trunc", "truncate", "trusted", "two_phase", "_twopcid", "type", "types", "ucase", "unbounded", "uncommitted", "undefined", "undo", "unencrypted", "unenforced", "unhex", "unhold", "unicode", "union", "unique", "_unittest", "unix_timestamp", "unknown", "unlisten", "_unload", "unlock", "unlogged", "unpivot", "unsigned", "until", "update", "upgrade", "upper", "usage", "use", "user", "users", "using", "utc_date", "utc_time", "utc_timestamp", "_utf8", "vacuum", "valid", "validate", "validator", "value", "values", "varbinary", "varchar", "varcharacter", "variables", "variadic", "variance", "var_pop", "var_samp", "varying", "vector_sub", "verbose", "version", "view", "void", "volatile", "voting", "wait", "_wake", "warnings", "week", "weekday", "weekofyear", "when", "where", "while", "whitespace", "window", "with", "without", "within", "_wm_heartbeat", "work", "workload", "wrapper", "write", "xact_id", "xor", "year", "year_month", "yes", "zerofill", "zone", } def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str: json_type = expression.args.get("json_type") func_name = "JSON_EXTRACT_JSON" if json_type is None else f"JSON_EXTRACT_{json_type}" return json_extract_segments(func_name)(self, expression) def jsonbextractscalar_sql(self, expression: exp.JSONBExtractScalar) -> str: json_type = expression.args.get("json_type") func_name = "BSON_EXTRACT_BSON" if json_type is None else f"BSON_EXTRACT_{json_type}" return json_extract_segments(func_name)(self, expression) def jsonextractarray_sql(self, expression: exp.JSONExtractArray) -> str: self.unsupported("Arrays are not supported in SingleStore") return self.function_fallback_sql(expression) @unsupported_args("on_condition") def jsonvalue_sql(self, expression: exp.JSONValue) -> str: res: exp.Expr = exp.JSONExtractScalar( this=expression.this, expression=expression.args.get("path"), json_type="STRING", ) returning = expression.args.get("returning") if returning is not None: res = exp.Cast(this=res, to=returning) return self.sql(res) def all_sql(self, expression: exp.All) -> str: self.unsupported("ALL subquery predicate is not supported in SingleStore") return super().all_sql(expression) def jsonarraycontains_sql(self, expression: exp.JSONArrayContains) -> str: json_type = expression.text("json_type").upper() if json_type: return self.func( f"JSON_ARRAY_CONTAINS_{json_type}", expression.expression, expression.this ) return self.func( "JSON_ARRAY_CONTAINS_JSON", expression.expression, self.func("TO_JSON", expression.this), ) @unsupported_args("kind", "values") def datatype_sql(self, expression: exp.DataType) -> str: if expression.args.get("nested") and not expression.is_type(exp.DType.STRUCT): self.unsupported( f"Argument 'nested' is not supported for representation of '{expression.this.value}' in SingleStore" ) if expression.is_type(exp.DType.VARBINARY) and not expression.expressions: # `VARBINARY` must always have a size - if it doesn't, we always generate `BLOB` return "BLOB" if expression.is_type( exp.DType.DECIMAL32, exp.DType.DECIMAL64, exp.DType.DECIMAL128, exp.DType.DECIMAL256, ): scale = self.expressions(expression, flat=True) if expression.is_type(exp.DType.DECIMAL32): precision = "9" elif expression.is_type(exp.DType.DECIMAL64): precision = "18" elif expression.is_type(exp.DType.DECIMAL128): precision = "38" else: # 65 is a maximum precision supported in SingleStore precision = "65" if scale is not None: return f"DECIMAL({precision}, {scale[0]})" else: return f"DECIMAL({precision})" if expression.is_type(exp.DType.VECTOR): expressions = expression.expressions if len(expressions) == 2: type_name = self.sql(expressions[0]) if type_name in self.dialect.INVERSE_VECTOR_TYPE_ALIASES: type_name = self.dialect.INVERSE_VECTOR_TYPE_ALIASES[type_name] return f"VECTOR({self.sql(expressions[1])}, {type_name})" return super().datatype_sql(expression) def collate_sql(self, expression: exp.Collate) -> str: # SingleStore does not support setting a collation for column in the SELECT query, # so we cast column to a LONGTEXT type with specific collation return self.binary(expression, ":> LONGTEXT COLLATE") def currentdate_sql(self, expression: exp.CurrentDate) -> str: timezone = expression.this if timezone: if isinstance(timezone, exp.Literal) and timezone.name.lower() == "utc": return self.func("UTC_DATE") self.unsupported("CurrentDate with timezone is not supported in SingleStore") return self.func("CURRENT_DATE") def currenttime_sql(self, expression: exp.CurrentTime) -> str: arg = expression.this if arg: if isinstance(arg, exp.Literal) and arg.name.lower() == "utc": return self.func("UTC_TIME") if isinstance(arg, exp.Literal) and arg.is_number: return self.func("CURRENT_TIME", arg) self.unsupported("CurrentTime with timezone is not supported in SingleStore") return self.func("CURRENT_TIME") def currenttimestamp_sql(self, expression: exp.CurrentTimestamp) -> str: arg = expression.this if arg: if isinstance(arg, exp.Literal) and arg.name.lower() == "utc": return self.func("UTC_TIMESTAMP") if isinstance(arg, exp.Literal) and arg.is_number: return self.func("CURRENT_TIMESTAMP", arg) self.unsupported("CurrentTimestamp with timezone is not supported in SingleStore") return self.func("CURRENT_TIMESTAMP") def standardhash_sql(self, expression: exp.StandardHash) -> str: hash_function = expression.expression if hash_function is None: return self.func("SHA", expression.this) if isinstance(hash_function, exp.Literal): if hash_function.name.lower() == "sha": return self.func("SHA", expression.this) if hash_function.name.lower() == "md5": return self.func("MD5", expression.this) self.unsupported( f"{hash_function.this} hash method is not supported in SingleStore" ) return self.func("SHA", expression.this) self.unsupported("STANDARD_HASH function is not supported in SingleStore") return self.func("SHA", expression.this) @unsupported_args("is_database", "exists", "cluster", "identity", "option", "partition") def truncatetable_sql(self, expression: exp.TruncateTable) -> str: statements = [] for expression in expression.expressions: statements.append(f"TRUNCATE {self.sql(expression)}") return "; ".join(statements) @unsupported_args("exists") def renamecolumn_sql(self, expression: exp.RenameColumn) -> str: old_column = self.sql(expression, "this") new_column = self.sql(expression, "to") return f"CHANGE {old_column} {new_column}" @unsupported_args("drop", "comment", "allow_null", "visible", "using") def altercolumn_sql(self, expression: exp.AlterColumn) -> str: alter = super().altercolumn_sql(expression) collate = self.sql(expression, "collate") collate = f" COLLATE {collate}" if collate else "" return f"{alter}{collate}" def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: this = self.sql(expression, "this") not_null = " NOT NULL" if expression.args.get("not_null") else "" type = self.sql(expression, "data_type") or "AUTO" return f"AS {this} PERSISTED {type}{not_null}" ================================================ FILE: sqlglot/dialects/snowflake.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, generator, jsonpath, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, array_append_sql, array_concat_sql, date_delta_sql, datestrtodate_sql, groupconcat_sql, if_sql, inline_array_sql, map_date_part, max_or_greatest, min_or_least, no_make_interval_sql, no_timestamp_sql, rename_func, strposition_sql, timestampdiff_sql, timestamptrunc_sql, timestrtotime_sql, unit_to_str, var_map_sql, ) from sqlglot.generator import unsupported_args from sqlglot.helper import find_new_name, flatten, seq_get from sqlglot.optimizer.scope import build_scope, find_all_in_scope from sqlglot.parsers.snowflake import ( RANKING_WINDOW_FUNCTIONS_WITH_FRAME, TIMESTAMP_TYPES, SnowflakeParser, build_object_construct, ) from sqlglot.tokens import TokenType from sqlglot.typing.snowflake import EXPRESSION_METADATA if t.TYPE_CHECKING: from sqlglot._typing import E def _build_datediff(args: t.List) -> exp.DateDiff: return exp.DateDiff( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), date_part_boundary=True, ) def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: def _builder(args: t.List) -> E: return expr_type( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), ) return _builder def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: flag = expression.text("flag") if "i" not in flag: flag += "i" return self.func( "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) ) def _unqualify_pivot_columns(expression: exp.Expr) -> exp.Expr: """ Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, so we need to unqualify them. Same goes for ANY ORDER BY . Example: >>> from sqlglot import parse_one >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) """ if isinstance(expression, exp.Pivot): if expression.unpivot: expression = transforms.unqualify_columns(expression) else: for field in expression.fields: field_expr = seq_get(field.expressions if field else [], 0) if isinstance(field_expr, exp.PivotAny): unqualified_field_expr = transforms.unqualify_columns(field_expr) t.cast(exp.Expr, field).set("expressions", unqualified_field_expr, 0) return expression def _flatten_structured_types_unless_iceberg(expression: exp.Expr) -> exp.Expr: assert isinstance(expression, exp.Create) def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: if expression.this in exp.DataType.NESTED_TYPES: expression.set("expressions", None) return expression props = expression.args.get("properties") if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): for schema_expression in expression.this.expressions: if isinstance(schema_expression, exp.ColumnDef): column_type = schema_expression.kind if isinstance(column_type, exp.DataType): column_type.transform(_flatten_structured_type, copy=False) return expression def _unnest_generate_date_array(unnest: exp.Unnest) -> None: generate_date_array = unnest.expressions[0] start = generate_date_array.args.get("start") end = generate_date_array.args.get("end") step = generate_date_array.args.get("step") if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": return unit = step.args.get("unit") unnest_alias = unnest.args.get("alias") if unnest_alias: unnest_alias = unnest_alias.copy() sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" else: sequence_value_name = "value" # We'll add the next sequence value to the starting date and project the result date_add = _build_date_time_add(exp.DateAdd)( [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] ) # We use DATEDIFF to compute the number of sequence values needed number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] ) unnest.set("expressions", [number_sequence]) unnest_parent = unnest.parent if isinstance(unnest_parent, exp.Join): select = unnest_parent.parent if isinstance(select, exp.Select): replace_column_name = ( sequence_value_name if isinstance(sequence_value_name, str) else sequence_value_name.name ) scope = build_scope(select) if scope: for column in scope.columns: if column.name.lower() == replace_column_name.lower(): column.replace( date_add.as_(replace_column_name) if isinstance(column.parent, exp.Select) else date_add ) lateral = exp.Lateral(this=unnest_parent.this.pop()) unnest_parent.replace(exp.Join(this=lateral)) else: unnest.replace( exp.select(date_add.as_(sequence_value_name)) .from_(unnest.copy()) .subquery(unnest_alias) ) def _transform_generate_date_array(expression: exp.Expr) -> exp.Expr: if isinstance(expression, exp.Select): for generate_date_array in expression.find_all(exp.GenerateDateArray): parent = generate_date_array.parent # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake # query is the following (it'll be unnested properly on the next iteration due to copy): # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) if not isinstance(parent, exp.Unnest): unnest = exp.Unnest(expressions=[generate_date_array.copy()]) generate_date_array.replace( exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() ) if ( isinstance(parent, exp.Unnest) and isinstance(parent.parent, (exp.From, exp.Join)) and len(parent.expressions) == 1 ): _unnest_generate_date_array(parent) return expression def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: # Other dialects don't support all of the following parameters, so we need to # generate default values as necessary to ensure the transpilation is correct group = expression.args.get("group") # To avoid generating all these default values, we set group to None if # it's 0 (also default value) which doesn't trigger the following chain if group and group.name == "0": group = None parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) return self.func( "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_SUBSTR_ALL", expression.this, expression.expression, position, occurrence, parameters, group, ) def _json_extract_value_array_sql( self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray ) -> str: json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) ident = exp.to_identifier("x") if isinstance(expression, exp.JSONValueArray): this: exp.Expr = exp.cast(ident, to=exp.DType.VARCHAR) else: this = exp.ParseJSON(this=f"TO_JSON({ident})") transform_lambda = exp.Lambda(expressions=[ident], this=this) return self.func("TRANSFORM", json_extract, transform_lambda) def _qualify_unnested_columns(expression: exp.Expr) -> exp.Expr: if isinstance(expression, exp.Select): scope = build_scope(expression) if not scope: return expression unnests = list(scope.find_all(exp.Unnest)) if not unnests: return expression taken_source_names = set(scope.sources) column_source: t.Dict[str, exp.Identifier] = {} unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} unnest_identifier: t.Optional[exp.Identifier] = None orig_expression = expression.copy() for unnest in unnests: if not isinstance(unnest.parent, (exp.From, exp.Join)): continue # Try to infer column names produced by an unnest operator. This is only possible # when we can peek into the (statically known) contents of the unnested value. unnest_columns: t.Set[str] = set() for unnest_expr in unnest.expressions: if not isinstance(unnest_expr, exp.Array): continue for array_expr in unnest_expr.expressions: if not ( isinstance(array_expr, exp.Struct) and array_expr.expressions and all( isinstance(struct_expr, exp.PropertyEQ) for struct_expr in array_expr.expressions ) ): continue unnest_columns.update( struct_expr.this.name.lower() for struct_expr in array_expr.expressions ) break if unnest_columns: break unnest_alias = unnest.args.get("alias") if not unnest_alias: alias_name = find_new_name(taken_source_names, "value") taken_source_names.add(alias_name) # Produce a `TableAlias` AST similar to what is produced for BigQuery. This # will be corrected later, when we generate SQL for the `Unnest` AST node. aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) scope.replace(unnest, aliased_unnest) unnest_identifier = aliased_unnest.args["alias"].columns[0] else: alias_columns = getattr(unnest_alias, "columns", []) unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) if not isinstance(unnest_identifier, exp.Identifier): return orig_expression unnest_to_identifier[unnest] = unnest_identifier column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) for column in scope.columns: if column.table: continue table = column_source.get(column.name.lower()) if ( unnest_identifier and not table and len(scope.sources) == 1 and column.name.lower() != unnest_identifier.name.lower() ): unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) if isinstance(unnest_ancestor, exp.Unnest): ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) if ( ancestor_identifier and ancestor_identifier.name.lower() == unnest_identifier.name.lower() ): continue table = unnest_identifier column.set("table", table and table.copy()) return expression def _eliminate_dot_variant_lookup(expression: exp.Expr) -> exp.Expr: if isinstance(expression, exp.Select): # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced # by Snowflake's parser. # # Additionally, at the time of writing this, BigQuery is the only dialect that produces a # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. unnest_aliases = set() for unnest in find_all_in_scope(expression, exp.Unnest): unnest_alias = unnest.args.get("alias") if ( isinstance(unnest_alias, exp.TableAlias) and not unnest_alias.this and len(unnest_alias.columns) == 1 ): unnest_aliases.add(unnest_alias.columns[0].name) if unnest_aliases: for c in find_all_in_scope(expression, exp.Column): if c.table in unnest_aliases: bracket_lhs = c.args["table"] bracket_rhs = exp.Literal.string(c.name) bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) if c.parent is expression: # Retain column projection names by using aliases c.replace(exp.alias_(bracket, c.this.copy())) else: c.replace(bracket) return expression class Snowflake(Dialect): # https://docs.snowflake.com/en/sql-reference/identifiers-syntax NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE NULL_ORDERING = "nulls_are_large" TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" SUPPORTS_USER_DEFINED_TYPES = False PREFER_CTE_ALIAS_COLUMN = True TABLESAMPLE_SIZE_IS_PERCENT = True COPY_PARAMS_ARE_CSV = False ARRAY_AGG_INCLUDES_NULLS = None ARRAY_FUNCS_PROPAGATES_NULLS = True ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False TRY_CAST_REQUIRES_STRING = True SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True LEAST_GREATEST_IGNORES_NULLS = False EXPRESSION_METADATA = EXPRESSION_METADATA.copy() # https://docs.snowflake.com/en/en/sql-reference/functions/initcap INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' INVERSE_TIME_MAPPING = { "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' } TIME_MAPPING = { "YYYY": "%Y", "yyyy": "%Y", "YY": "%y", "yy": "%y", "MMMM": "%B", "mmmm": "%B", "MON": "%b", "mon": "%b", "MM": "%m", "mm": "%m", "DD": "%d", "dd": "%-d", "DY": "%a", "dy": "%w", "HH24": "%H", "hh24": "%H", "HH12": "%I", "hh12": "%I", "MI": "%M", "mi": "%M", "SS": "%S", "ss": "%S", "FF": "%f_nine", # %f_ internal representation with precision specified "ff": "%f_nine", "FF0": "%f_zero", "ff0": "%f_zero", "FF1": "%f_one", "ff1": "%f_one", "FF2": "%f_two", "ff2": "%f_two", "FF3": "%f_three", "ff3": "%f_three", "FF4": "%f_four", "ff4": "%f_four", "FF5": "%f_five", "ff5": "%f_five", "FF6": "%f", "ff6": "%f", "FF7": "%f_seven", "ff7": "%f_seven", "FF8": "%f_eight", "ff8": "%f_eight", "FF9": "%f_nine", "ff9": "%f_nine", "TZHTZM": "%z", "tzhtzm": "%z", "TZH:TZM": "%:z", # internal representation for ±HH:MM "tzh:tzm": "%:z", "TZH": "%-z", # internal representation ±HH "tzh": "%-z", '"T"': "T", # remove the optional double quotes around the separator between the date and time # Seems like Snowflake treats AM/PM in the format string as equivalent, # only the time (stamp) value's AM/PM affects the output "AM": "%p", "am": "%p", "PM": "%p", "pm": "%p", } DATE_PART_MAPPING = { **Dialect.DATE_PART_MAPPING, "ISOWEEK": "WEEKISO", # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve # EPOCH_SECOND as a distinct value for two reasons: # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND # to BIGINT to match Snowflake's integer behavior # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. "EPOCH_SECOND": "EPOCH_SECOND", "EPOCH_SECONDS": "EPOCH_SECOND", } PSEUDOCOLUMNS = {"LEVEL"} def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an # unquoted DUAL keyword in a special way and does not map it to a user-defined table return super().can_quote(identifier, identify) and not ( isinstance(identifier.parent, exp.Table) and not identifier.quoted and identifier.name.lower() == "dual" ) class JSONPathTokenizer(jsonpath.JSONPathTokenizer): SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() SINGLE_TOKENS.pop("$") Parser = SnowflakeParser class Tokenizer(tokens.Tokenizer): STRING_ESCAPES = ["\\", "'"] HEX_STRINGS = [("x'", "'"), ("X'", "'")] RAW_STRINGS = ["$$"] COMMENTS = ["--", "//", ("/*", "*/")] NESTED_COMMENTS = False KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "BYTEINT": TokenType.INT, "FILE://": TokenType.URI_START, "FILE FORMAT": TokenType.FILE_FORMAT, "GET": TokenType.GET, "INTEGRATION": TokenType.INTEGRATION, "MATCH_CONDITION": TokenType.MATCH_CONDITION, "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, "MINUS": TokenType.EXCEPT, "NCHAR VARYING": TokenType.VARCHAR, "PACKAGE": TokenType.PACKAGE, "POLICY": TokenType.POLICY, "POOL": TokenType.POOL, "PUT": TokenType.PUT, "REMOVE": TokenType.COMMAND, "RM": TokenType.COMMAND, "ROLE": TokenType.ROLE, "RULE": TokenType.RULE, "SAMPLE": TokenType.TABLE_SAMPLE, "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, "SQL_DOUBLE": TokenType.DOUBLE, "SQL_VARCHAR": TokenType.VARCHAR, "STAGE": TokenType.STAGE, "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, "STREAMLIT": TokenType.STREAMLIT, "TAG": TokenType.TAG, "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, "TOP": TokenType.TOP, "VOLUME": TokenType.VOLUME, "WAREHOUSE": TokenType.WAREHOUSE, # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float # FLOAT is a synonym for DOUBLE in Snowflake "FLOAT": TokenType.DOUBLE, } KEYWORDS.pop("/*+") SINGLE_TOKENS = { **tokens.Tokenizer.SINGLE_TOKENS, "$": TokenType.PARAMETER, "!": TokenType.EXCLAMATION, } VAR_SINGLE_TOKENS = {"$"} COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} class Generator(generator.Generator): PARAMETER_TOKEN = "$" MATCHED_BY_SOURCE = False SINGLE_STRING_INTERVAL = True JOIN_HINTS = False TABLE_HINTS = False QUERY_HINTS = False AGGREGATE_FILTER_SUPPORTED = False SUPPORTS_TABLE_COPY = False COLLATE_IS_FUNC = True LIMIT_ONLY_LITERALS = True JSON_KEY_VALUE_PAIR_SEP = "," INSERT_OVERWRITE = " OVERWRITE INTO" STRUCT_DELIMITER = ("(", ")") COPY_PARAMS_ARE_WRAPPED = False COPY_PARAMS_EQ_REQUIRED = True STAR_EXCEPT = "EXCLUDE" SUPPORTS_EXPLODING_PROJECTIONS = False ARRAY_CONCAT_IS_VAR_LEN = False SUPPORTS_CONVERT_TIMEZONE = True EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False SUPPORTS_MEDIAN = True ARRAY_SIZE_NAME = "ARRAY_SIZE" SUPPORTS_DECODE_CASE = True IS_BOOL_ALLOWED = False DIRECTED_JOINS = True TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), exp.ArgMax: rename_func("MAX_BY"), exp.ArgMin: rename_func("MIN_BY"), exp.Array: transforms.preprocess([transforms.inherit_struct_field_names]), exp.ArrayConcat: array_concat_sql("ARRAY_CAT"), exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), exp.ArrayContains: lambda self, e: self.func( "ARRAY_CONTAINS", e.expression if e.args.get("ensure_variant") is False else exp.cast(e.expression, exp.DType.VARIANT, copy=False), e.this, ), exp.ArrayPosition: lambda self, e: self.func( "ARRAY_POSITION", e.expression, e.this, ), exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), exp.ArrayOverlaps: rename_func("ARRAYS_OVERLAP"), exp.AtTimeZone: lambda self, e: self.func( "CONVERT_TIMEZONE", e.args.get("zone"), e.this ), exp.BitwiseOr: rename_func("BITOR"), exp.BitwiseXor: rename_func("BITXOR"), exp.BitwiseAnd: rename_func("BITAND"), exp.BitwiseAndAgg: rename_func("BITANDAGG"), exp.BitwiseOrAgg: rename_func("BITORAGG"), exp.BitwiseXorAgg: rename_func("BITXORAGG"), exp.BitwiseNot: rename_func("BITNOT"), exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), exp.CurrentTimestamp: lambda self, e: ( self.func("SYSDATE") if e.args.get("sysdate") else self.function_fallback_sql(e) ), exp.CurrentSchemas: lambda self, e: self.func("CURRENT_SCHEMAS"), exp.Localtime: lambda self, e: ( self.func("CURRENT_TIME", e.this) if e.this else "CURRENT_TIME" ), exp.Localtimestamp: lambda self, e: ( self.func("CURRENT_TIMESTAMP", e.this) if e.this else "CURRENT_TIMESTAMP" ), exp.DateAdd: date_delta_sql("DATEADD"), exp.DateDiff: date_delta_sql("DATEDIFF"), exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), exp.DatetimeDiff: timestampdiff_sql, exp.DateStrToDate: datestrtodate_sql, exp.Decrypt: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}DECRYPT", e.this, e.args.get("passphrase"), e.args.get("aad"), e.args.get("encryption_method"), ), exp.DecryptRaw: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}DECRYPT_RAW", e.this, e.args.get("key"), e.args.get("iv"), e.args.get("aad"), e.args.get("encryption_method"), e.args.get("aead"), ), exp.DayOfMonth: rename_func("DAYOFMONTH"), exp.DayOfWeek: rename_func("DAYOFWEEK"), exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), exp.DayOfYear: rename_func("DAYOFYEAR"), exp.DotProduct: rename_func("VECTOR_INNER_PRODUCT"), exp.Explode: rename_func("FLATTEN"), exp.Extract: lambda self, e: self.func( "DATE_PART", map_date_part(e.this, self.dialect), e.expression ), exp.CosineDistance: rename_func("VECTOR_COSINE_SIMILARITY"), exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), exp.FileFormatProperty: lambda self, e: ( f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})" ), exp.FromTimeZone: lambda self, e: self.func( "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this ), exp.GenerateSeries: lambda self, e: self.func( "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] if e.args.get("is_end_exclusive") else e.args["end"] + 1, e.args.get("step"), ), exp.GetExtract: rename_func("GET"), exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), exp.If: if_sql(name="IFF", false_value="NULL"), exp.JSONExtractArray: _json_extract_value_array_sql, exp.JSONExtractScalar: lambda self, e: self.func( "JSON_EXTRACT_PATH_TEXT", e.this, e.expression ), exp.JSONKeys: rename_func("OBJECT_KEYS"), exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), exp.JSONPathRoot: lambda *_: "", exp.JSONValueArray: _json_extract_value_array_sql, exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( rename_func("EDITDISTANCE") ), exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", exp.LogicalAnd: rename_func("BOOLAND_AGG"), exp.LogicalOr: rename_func("BOOLOR_AGG"), exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), exp.ManhattanDistance: rename_func("VECTOR_L1_DISTANCE"), exp.MakeInterval: no_make_interval_sql, exp.Max: max_or_greatest, exp.Min: min_or_least, exp.ParseJSON: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}PARSE_JSON", e.this ), exp.ToBinary: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}TO_BINARY", e.this, e.args.get("format") ), exp.ToBoolean: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}TO_BOOLEAN", e.this ), exp.ToDouble: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}TO_DOUBLE", e.this, e.args.get("format") ), exp.ToFile: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}TO_FILE", e.this, e.args.get("path") ), exp.ToNumber: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}TO_NUMBER", e.this, e.args.get("format"), e.args.get("precision"), e.args.get("scale"), ), exp.JSONFormat: rename_func("TO_JSON"), exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", exp.PercentileCont: transforms.preprocess( [transforms.add_within_group_for_percentiles] ), exp.PercentileDisc: transforms.preprocess( [transforms.add_within_group_for_percentiles] ), exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), exp.RegexpExtract: _regexpextract_sql, exp.RegexpExtractAll: _regexpextract_sql, exp.RegexpILike: _regexpilike_sql, exp.Select: transforms.preprocess( [ transforms.eliminate_window_clause, transforms.eliminate_distinct_on, transforms.explode_projection_to_unnest(), transforms.eliminate_semi_and_anti_joins, _transform_generate_date_array, _qualify_unnested_columns, _eliminate_dot_variant_lookup, ] ), exp.SHA: rename_func("SHA1"), exp.SHA1Digest: rename_func("SHA1_BINARY"), exp.MD5Digest: rename_func("MD5_BINARY"), exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), exp.LowerHex: rename_func("TO_CHAR"), exp.Skewness: rename_func("SKEW"), exp.StarMap: rename_func("OBJECT_CONSTRUCT"), exp.StartsWith: rename_func("STARTSWITH"), exp.EndsWith: rename_func("ENDSWITH"), exp.Rand: lambda self, e: self.func("RANDOM", e.this), exp.StrPosition: lambda self, e: strposition_sql( self, e, func_name="CHARINDEX", supports_position=True ), exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), exp.Stuff: rename_func("INSERT"), exp.StPoint: rename_func("ST_MAKEPOINT"), exp.TimeAdd: date_delta_sql("TIMEADD"), exp.TimeSlice: lambda self, e: self.func( "TIME_SLICE", e.this, e.expression, unit_to_str(e), e.args.get("kind"), ), exp.Timestamp: no_timestamp_sql, exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), exp.TimestampDiff: lambda self, e: self.func( "TIMESTAMPDIFF", e.unit, e.expression, e.this ), exp.TimestampTrunc: timestamptrunc_sql(), exp.TimeStrToTime: timestrtotime_sql, exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", exp.ToArray: rename_func("TO_ARRAY"), exp.ToChar: lambda self, e: self.function_fallback_sql(e), exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), exp.TsOrDsToDate: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}TO_DATE", e.this, self.format_time(e) ), exp.TsOrDsToTime: lambda self, e: self.func( f"{'TRY_' if e.args.get('safe') else ''}TO_TIME", e.this, self.format_time(e) ), exp.Unhex: rename_func("HEX_DECODE_BINARY"), exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, e.args.get("scale")), exp.Uuid: rename_func("UUID_STRING"), exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), exp.Booland: rename_func("BOOLAND"), exp.Boolor: rename_func("BOOLOR"), exp.WeekOfYear: rename_func("WEEKISO"), exp.YearOfWeek: rename_func("YEAROFWEEK"), exp.YearOfWeekIso: rename_func("YEAROFWEEKISO"), exp.Xor: rename_func("BOOLXOR"), exp.ByteLength: rename_func("OCTET_LENGTH"), exp.Flatten: rename_func("ARRAY_FLATTEN"), exp.ArrayConcatAgg: lambda self, e: self.func( "ARRAY_FLATTEN", exp.ArrayAgg(this=e.this) ), exp.SHA2Digest: lambda self, e: self.func( "SHA2_BINARY", e.this, e.args.get("length") or exp.Literal.number(256) ), } def sortarray_sql(self, expression: exp.SortArray) -> str: asc = expression.args.get("asc") nulls_first = expression.args.get("nulls_first") if asc == exp.false() and nulls_first == exp.true(): nulls_first = None return self.func("ARRAY_SORT", expression.this, asc, nulls_first) def nthvalue_sql(self, expression: exp.NthValue) -> str: result = self.func("NTH_VALUE", expression.this, expression.args.get("offset")) from_first = expression.args.get("from_first") if from_first is not None: if from_first: result = result + " FROM FIRST" else: result = result + " FROM LAST" return result SUPPORTED_JSON_PATH_PARTS = { exp.JSONPathKey, exp.JSONPathRoot, exp.JSONPathSubscript, } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.BIGDECIMAL: "DOUBLE", exp.DType.NESTED: "OBJECT", exp.DType.STRUCT: "OBJECT", exp.DType.TEXT: "VARCHAR", } TOKEN_MAPPING = { TokenType.AUTO_INCREMENT: "AUTOINCREMENT", } PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.CredentialsProperty: exp.Properties.Location.POST_WITH, exp.LocationProperty: exp.Properties.Location.POST_WITH, exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, exp.SetProperty: exp.Properties.Location.UNSUPPORTED, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } UNSUPPORTED_VALUES_EXPRESSIONS = { exp.Map, exp.StarMap, exp.Struct, exp.VarMap, } RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) def with_properties(self, properties: exp.Properties) -> str: return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): values_as_table = False return super().values_sql(expression, values_as_table=values_as_table) def datatype_sql(self, expression: exp.DataType) -> str: # Check if this is a FLOAT type nested inside a VECTOR type # VECTOR only accepts FLOAT (not DOUBLE), INT, and STRING as element types # https://docs.snowflake.com/en/sql-reference/data-types-vector if expression.is_type(exp.DType.DOUBLE): parent = expression.parent if isinstance(parent, exp.DataType) and parent.is_type(exp.DType.VECTOR): # Preserve FLOAT for VECTOR types instead of mapping to synonym DOUBLE return "FLOAT" expressions = expression.expressions if expressions and expression.is_type(*exp.DataType.STRUCT_TYPES): for field_type in expressions: # The correct syntax is OBJECT [ ( str: return self.func( "TO_NUMBER", expression.this, expression.args.get("format"), expression.args.get("precision"), expression.args.get("scale"), ) def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: milli = expression.args.get("milli") if milli is not None: milli_to_nano = milli.pop() * exp.Literal.number(1000000) expression.set("nano", milli_to_nano) return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: if expression.is_type(exp.DType.GEOGRAPHY): return self.func("TO_GEOGRAPHY", expression.this) if expression.is_type(exp.DType.GEOMETRY): return self.func("TO_GEOMETRY", expression.this) return super().cast_sql(expression, safe_prefix=safe_prefix) def trycast_sql(self, expression: exp.TryCast) -> str: value = expression.this if value.type is None: from sqlglot.optimizer.annotate_types import annotate_types value = annotate_types(value, dialect=self.dialect) # Snowflake requires that TRY_CAST's value be a string # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or # if we can deduce that the value is a string, then we can generate TRY_CAST if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): return super().trycast_sql(expression) return self.cast_sql(expression) def log_sql(self, expression: exp.Log) -> str: if not expression.expression: return self.func("LN", expression.this) return super().log_sql(expression) def greatest_sql(self, expression: exp.Greatest) -> str: name = "GREATEST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "GREATEST" return self.func(name, expression.this, *expression.expressions) def least_sql(self, expression: exp.Least) -> str: name = "LEAST_IGNORE_NULLS" if expression.args.get("ignore_nulls") else "LEAST" return self.func(name, expression.this, *expression.expressions) def generator_sql(self, expression: exp.Generator) -> str: args = [] rowcount = expression.args.get("rowcount") timelimit = expression.args.get("timelimit") if rowcount: args.append(exp.Kwarg(this=exp.var("ROWCOUNT"), expression=rowcount)) if timelimit: args.append(exp.Kwarg(this=exp.var("TIMELIMIT"), expression=timelimit)) return self.func("GENERATOR", *args) def unnest_sql(self, expression: exp.Unnest) -> str: unnest_alias = expression.args.get("alias") offset = expression.args.get("offset") unnest_alias_columns = unnest_alias.columns if unnest_alias else [] value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") columns = [ exp.to_identifier("seq"), exp.to_identifier("key"), exp.to_identifier("path"), offset.pop() if isinstance(offset, exp.Expr) else exp.to_identifier("index"), value, exp.to_identifier("this"), ] if unnest_alias: unnest_alias.set("columns", columns) else: unnest_alias = exp.TableAlias(this="_u", columns=columns) table_input = self.sql(expression.expressions[0]) if not table_input.startswith("INPUT =>"): table_input = f"INPUT => {table_input}" expression_parent = expression.parent explode = ( f"FLATTEN({table_input})" if isinstance(expression_parent, exp.Lateral) else f"TABLE(FLATTEN({table_input}))" ) alias = self.sql(unnest_alias) alias = f" AS {alias}" if alias else "" value = ( "" if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) else f"{value} FROM " ) return f"{value}{explode}{alias}" def show_sql(self, expression: exp.Show) -> str: terse = "TERSE " if expression.args.get("terse") else "" history = " HISTORY" if expression.args.get("history") else "" like = self.sql(expression, "like") like = f" LIKE {like}" if like else "" scope = self.sql(expression, "scope") scope = f" {scope}" if scope else "" scope_kind = self.sql(expression, "scope_kind") if scope_kind: scope_kind = f" IN {scope_kind}" starts_with = self.sql(expression, "starts_with") if starts_with: starts_with = f" STARTS WITH {starts_with}" limit = self.sql(expression, "limit") from_ = self.sql(expression, "from_") if from_: from_ = f" FROM {from_}" privileges = self.expressions(expression, key="privileges", flat=True) privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" def describe_sql(self, expression: exp.Describe) -> str: kind_value = expression.args.get("kind") or "TABLE" properties = expression.args.get("properties") if properties: qualifier = self.expressions(properties, sep=" ") kind = f" {qualifier} {kind_value}" else: kind = f" {kind_value}" this = f" {self.sql(expression, 'this')}" expressions = self.expressions(expression, flat=True) expressions = f" {expressions}" if expressions else "" return f"DESCRIBE{kind}{this}{expressions}" def generatedasidentitycolumnconstraint_sql( self, expression: exp.GeneratedAsIdentityColumnConstraint ) -> str: start = expression.args.get("start") start = f" START {start}" if start else "" increment = expression.args.get("increment") increment = f" INCREMENT {increment}" if increment else "" order = expression.args.get("order") if order is not None: order_clause = " ORDER" if order else " NOORDER" else: order_clause = "" return f"AUTOINCREMENT{start}{increment}{order_clause}" def cluster_sql(self, expression: exp.Cluster) -> str: return f"CLUSTER BY ({self.expressions(expression, flat=True)})" def struct_sql(self, expression: exp.Struct) -> str: if len(expression.expressions) == 1: arg = expression.expressions[0] if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object return f"{{{self.sql(expression.expressions[0])}}}" keys = [] values = [] for i, e in enumerate(expression.expressions): if isinstance(e, exp.PropertyEQ): keys.append( exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this ) values.append(e.expression) else: keys.append(exp.Literal.string(f"_{i}")) values.append(e) return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) @unsupported_args("weight", "accuracy") def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) exprs = f" {exprs}" if exprs else "" file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" tag = self.expressions(expression, key="tag", flat=True) tag = f" TAG {tag}" if tag else "" return f"SET{exprs}{file_format}{copy_options}{tag}" def strtotime_sql(self, expression: exp.StrToTime): # target_type is stored as a DataType instance target_type = expression.args.get("target_type") # Get the type enum from DataType instance or from type annotation if isinstance(target_type, exp.DataType): type_enum = target_type.this elif expression.type: type_enum = expression.type.this else: type_enum = exp.DType.TIMESTAMP func_name = TIMESTAMP_TYPES.get(type_enum, "TO_TIMESTAMP") return self.func( f"{'TRY_' if expression.args.get('safe') else ''}{func_name}", expression.this, self.format_time(expression), ) def timestampsub_sql(self, expression: exp.TimestampSub): return self.sql( exp.TimestampAdd( this=expression.this, expression=expression.expression * -1, unit=expression.unit, ) ) def jsonextract_sql(self, expression: exp.JSONExtract): this = expression.this # JSON strings are valid coming from other dialects such as BQ so # for these cases we PARSE_JSON preemptively if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( "requires_json" ): this = exp.ParseJSON(this=this) return self.func( "GET_PATH", this, expression.expression, ) def timetostr_sql(self, expression: exp.TimeToStr) -> str: this = expression.this if this.is_string: this = exp.cast(this, exp.DType.TIMESTAMP) return self.func("TO_CHAR", this, self.format_time(expression)) def datesub_sql(self, expression: exp.DateSub) -> str: value = expression.expression if value: value.replace(value * (-1)) else: self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") return date_delta_sql("DATEADD")(self, expression) def select_sql(self, expression: exp.Select) -> str: limit = expression.args.get("limit") offset = expression.args.get("offset") if offset and not limit: expression.limit(exp.Null(), copy=False) return super().select_sql(expression) def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: is_materialized = expression.find(exp.MaterializedProperty) copy_grants_property = expression.find(exp.CopyGrantsProperty) if expression.kind == "VIEW" and is_materialized and copy_grants_property: # For materialized views, COPY GRANTS is located *before* the columns list # This is in contrast to normal views where COPY GRANTS is located *after* the columns list # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) this_name = self.sql(expression.this, "this") copy_grants = self.sql(copy_grants_property) this_schema = self.schema_columns_sql(expression.this) this_schema = f"{self.sep()}{this_schema}" if this_schema else "" return f"{this_name}{self.sep()}{copy_grants}{this_schema}" return super().createable_sql(expression, locations) def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: this = expression.this # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG # and add it later as part of the WITHIN GROUP clause order = this if isinstance(this, exp.Order) else None if order: expression.set("this", order.this.pop()) expr_sql = super().arrayagg_sql(expression) if order: expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) return expr_sql def arraytostring_sql(self, expression: exp.ArrayToString) -> str: return self.func("ARRAY_TO_STRING", expression.this, expression.expression) def array_sql(self, expression: exp.Array) -> str: expressions = expression.expressions first_expr = seq_get(expressions, 0) if isinstance(first_expr, exp.Select): # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) if first_expr.text("kind").upper() == "STRUCT": object_construct_args = [] for expr in first_expr.expressions: # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) name = expr.this if isinstance(expr, exp.Alias) else expr object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) array_agg = exp.ArrayAgg( this=build_object_construct(args=object_construct_args) ) first_expr.set("kind", None) first_expr.set("expressions", [array_agg]) return self.sql(first_expr.subquery()) return inline_array_sql(self, expression) def currentdate_sql(self, expression: exp.CurrentDate) -> str: zone = self.sql(expression, "this") if not zone: return super().currentdate_sql(expression) expr = exp.Cast( this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), to=exp.DataType(this=exp.DType.DATE), ) return self.sql(expr) def dot_sql(self, expression: exp.Dot) -> str: this = expression.this if not this.type: from sqlglot.optimizer.annotate_types import annotate_types this = annotate_types(this, dialect=self.dialect) if not isinstance(this, exp.Dot) and this.is_type(exp.DType.STRUCT): # Generate colon notation for the top level STRUCT return f"{self.sql(this)}:{self.sql(expression, 'expression')}" return super().dot_sql(expression) def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}" def format_sql(self, expression: exp.Format) -> str: if expression.name.lower() == "%s" and len(expression.expressions) == 1: return self.func("TO_CHAR", expression.expressions[0]) return self.function_fallback_sql(expression) def splitpart_sql(self, expression: exp.SplitPart) -> str: # Set part_index to 1 if missing if not expression.args.get("delimiter"): expression.set("delimiter", exp.Literal.string(" ")) if not expression.args.get("part_index"): expression.set("part_index", exp.Literal.number(1)) return rename_func("SPLIT_PART")(self, expression) def uniform_sql(self, expression: exp.Uniform) -> str: gen = expression.args.get("gen") seed = expression.args.get("seed") # From Databricks UNIFORM(min, max, seed) -> Wrap gen in RANDOM(seed) if seed: gen = exp.Rand(this=seed) # No gen argument (from Databricks 2-arg UNIFORM(min, max)) -> Add RANDOM() if not gen: gen = exp.Rand() return self.func("UNIFORM", expression.this, expression.expression, gen) def window_sql(self, expression: exp.Window) -> str: spec = expression.args.get("spec") this = expression.this if ( ( isinstance(this, RANKING_WINDOW_FUNCTIONS_WITH_FRAME) or ( isinstance(this, (exp.RespectNulls, exp.IgnoreNulls)) and isinstance(this.this, RANKING_WINDOW_FUNCTIONS_WITH_FRAME) ) ) and spec and ( spec.text("kind").upper() == "ROWS" and spec.text("start").upper() == "UNBOUNDED" and spec.text("start_side").upper() == "PRECEDING" and spec.text("end").upper() == "UNBOUNDED" and spec.text("end_side").upper() == "FOLLOWING" ) ): # omit the default window from window ranking functions expression.set("spec", None) return super().window_sql(expression) ================================================ FILE: sqlglot/dialects/solr.py ================================================ from sqlglot import tokens from sqlglot.dialects.dialect import Dialect, NormalizationStrategy from sqlglot.parsers.solr import SolrParser # https://solr.apache.org/guide/solr/latest/query-guide/sql-query.html class Solr(Dialect): NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE DPIPE_IS_STRING_CONCAT = False Parser = SolrParser class Tokenizer(tokens.Tokenizer): QUOTES = ["'"] IDENTIFIERS = ["`"] ================================================ FILE: sqlglot/dialects/spark.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.dialects.dialect import ( array_append_sql, rename_func, unit_to_var, timestampdiff_sql, date_delta_to_binary_interval_op, groupconcat_sql, ) from sqlglot.parsers.spark import SparkParser from sqlglot import generator from sqlglot.dialects.spark2 import Spark2, temporary_storage_provider from sqlglot.typing.spark import EXPRESSION_METADATA from sqlglot.helper import seq_get from sqlglot.tokens import TokenType from sqlglot.transforms import ( ctas_with_tmp_tables_to_create_tmp_view, remove_unique_constraints, preprocess, move_partitioned_by_to_schema_columns, ) def _normalize_partition(e: exp.Expr) -> exp.Expr: """Normalize the expressions in PARTITION BY (, , ...)""" if isinstance(e, str): return exp.to_identifier(e) if isinstance(e, exp.Literal): return exp.to_identifier(e.name) return e def _dateadd_sql(self: Spark.Generator, expression: exp.TsOrDsAdd | exp.TimestampAdd) -> str: if not expression.unit or ( isinstance(expression, exp.TsOrDsAdd) and expression.text("unit").upper() == "DAY" ): # Coming from Hive/Spark2 DATE_ADD or roundtripping the 2-arg version of Spark3/DB return self.func("DATE_ADD", expression.this, expression.expression) this = self.func( "DATE_ADD", unit_to_var(expression), expression.expression, expression.this, ) if isinstance(expression, exp.TsOrDsAdd): # The 3 arg version of DATE_ADD produces a timestamp in Spark3/DB but possibly not # in other dialects return_type = expression.return_type if not return_type.is_type(exp.DType.TIMESTAMP, exp.DType.DATETIME): this = f"CAST({this} AS {return_type})" return this def _groupconcat_sql(self: Spark.Generator, expression: exp.GroupConcat) -> str: if self.dialect.version < (4,): expr = exp.ArrayToString( this=exp.ArrayAgg(this=expression.this), expression=expression.args.get("separator") or exp.Literal.string(""), ) return self.sql(expr) return groupconcat_sql(self, expression) class Spark(Spark2): SUPPORTS_ORDER_BY_ALL = True SUPPORTS_NULL_TYPE = True ARRAY_FUNCS_PROPAGATES_NULLS = True EXPRESSION_METADATA = EXPRESSION_METADATA.copy() class Tokenizer(Spark2.Tokenizer): STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS = False RAW_STRINGS = [ (prefix + q, q) for q in t.cast(t.List[str], Spark2.Tokenizer.QUOTES) for prefix in ("r", "R") ] KEYWORDS = { **Spark2.Tokenizer.KEYWORDS, "DECLARE": TokenType.DECLARE, } Parser = SparkParser class Generator(Spark2.Generator): SUPPORTS_TO_NUMBER = True PAD_FILL_PATTERN_IS_REQUIRED = False SUPPORTS_CONVERT_TIMEZONE = True SUPPORTS_MEDIAN = True SUPPORTS_UNIX_SECONDS = True SUPPORTS_DECODE_CASE = True SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True TYPE_MAPPING = { **Spark2.Generator.TYPE_MAPPING, exp.DType.MONEY: "DECIMAL(15, 4)", exp.DType.SMALLMONEY: "DECIMAL(6, 4)", exp.DType.UUID: "STRING", exp.DType.TIMESTAMPLTZ: "TIMESTAMP_LTZ", exp.DType.TIMESTAMPNTZ: "TIMESTAMP_NTZ", } TRANSFORMS = { **Spark2.Generator.TRANSFORMS, exp.ArrayConstructCompact: lambda self, e: self.func( "ARRAY_COMPACT", self.func("ARRAY", *e.expressions) ), exp.ArrayInsert: lambda self, e: self.func( "ARRAY_INSERT", e.this, e.args.get("position"), e.expression ), exp.ArrayAppend: array_append_sql("ARRAY_APPEND"), exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND"), exp.BitwiseAndAgg: rename_func("BIT_AND"), exp.BitwiseOrAgg: rename_func("BIT_OR"), exp.BitwiseXorAgg: rename_func("BIT_XOR"), exp.BitwiseCount: rename_func("BIT_COUNT"), exp.Create: preprocess( [ remove_unique_constraints, lambda e: ctas_with_tmp_tables_to_create_tmp_view( e, temporary_storage_provider ), move_partitioned_by_to_schema_columns, ] ), exp.CurrentVersion: rename_func("VERSION"), exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), exp.DatetimeAdd: date_delta_to_binary_interval_op(cast=False), exp.DatetimeSub: date_delta_to_binary_interval_op(cast=False), exp.GroupConcat: _groupconcat_sql, exp.EndsWith: rename_func("ENDSWITH"), exp.JSONKeys: rename_func("JSON_OBJECT_KEYS"), exp.PartitionedByProperty: lambda self, e: ( f"PARTITIONED BY {self.wrap(self.expressions(sqls=[_normalize_partition(e) for e in e.this.expressions], skip_first=True))}" ), exp.SafeAdd: rename_func("TRY_ADD"), exp.SafeMultiply: rename_func("TRY_MULTIPLY"), exp.SafeSubtract: rename_func("TRY_SUBTRACT"), exp.StartsWith: rename_func("STARTSWITH"), exp.TimeAdd: date_delta_to_binary_interval_op(cast=False), exp.TimeSub: date_delta_to_binary_interval_op(cast=False), exp.TsOrDsAdd: _dateadd_sql, exp.TimestampAdd: _dateadd_sql, exp.TimestampFromParts: rename_func("MAKE_TIMESTAMP"), exp.TimestampSub: date_delta_to_binary_interval_op(cast=False), exp.DatetimeDiff: timestampdiff_sql, exp.TimestampDiff: timestampdiff_sql, exp.TryCast: lambda self, e: ( self.trycast_sql(e) if e.args.get("safe") else self.cast_sql(e) ), } TRANSFORMS.pop(exp.AnyValue) TRANSFORMS.pop(exp.DateDiff) TRANSFORMS.pop(exp.With) def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: return generator.Generator.ignorenulls_sql(self, expression) def bracket_sql(self, expression: exp.Bracket) -> str: if expression.args.get("safe"): key = seq_get(self.bracket_offset_expressions(expression, index_offset=1), 0) return self.func("TRY_ELEMENT_AT", expression.this, key) return super().bracket_sql(expression) def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: return f"GENERATED ALWAYS AS ({self.sql(expression, 'this')})" def anyvalue_sql(self, expression: exp.AnyValue) -> str: return self.function_fallback_sql(expression) def datediff_sql(self, expression: exp.DateDiff) -> str: end = self.sql(expression, "this") start = self.sql(expression, "expression") if expression.unit: return self.func("DATEDIFF", unit_to_var(expression), start, end) return self.func("DATEDIFF", end, start) def placeholder_sql(self, expression: exp.Placeholder) -> str: if not expression.args.get("widget"): return super().placeholder_sql(expression) return f"{{{expression.name}}}" def readparquet_sql(self, expression: exp.ReadParquet) -> str: if len(expression.expressions) != 1: self.unsupported("READ_PARQUET with multiple arguments is not supported") return "" parquet_file = expression.expressions[0] return f"parquet.`{parquet_file.name}`" def ifblock_sql(self, expression: exp.IfBlock) -> str: condition = expression.this true_block = expression.args.get("true") condition_expr = None if isinstance(condition, exp.Not): inner = condition.this if isinstance(inner, exp.Is) and isinstance(inner.expression, exp.Null): condition_expr = inner.this if isinstance(condition_expr, exp.ObjectId): object_type = condition_expr.expression if ( (object_type is None or object_type.name.upper() == "U") and isinstance(true_block, exp.Block) and isinstance(drop := true_block.expressions[0], exp.Drop) ): drop.set("exists", True) return self.sql(drop) return super().ifblock_sql(expression) ================================================ FILE: sqlglot/dialects/spark2.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, transforms from sqlglot.dialects.dialect import ( bracket_to_element_at_sql, is_parse_json, rename_func, unit_to_str, ) from sqlglot.dialects.hive import Hive from sqlglot.parsers.spark2 import Spark2Parser from sqlglot.tokens import TokenType from sqlglot.transforms import ( preprocess, remove_unique_constraints, ctas_with_tmp_tables_to_create_tmp_view, move_schema_columns_to_partitioned_by, ) from sqlglot.typing.spark2 import EXPRESSION_METADATA def _map_sql(self: Spark2.Generator, expression: exp.Map) -> str: keys = expression.args.get("keys") values = expression.args.get("values") if not keys or not values: return self.func("MAP") return self.func("MAP_FROM_ARRAYS", keys, values) def _str_to_date(self: Spark2.Generator, expression: exp.StrToDate) -> str: time_format = self.format_time(expression) if time_format == Hive.DATE_FORMAT: return self.func("TO_DATE", expression.this) return self.func("TO_DATE", expression.this, time_format) def _unix_to_time_sql(self: Spark2.Generator, expression: exp.UnixToTime) -> str: scale = expression.args.get("scale") timestamp = expression.this if scale is None: return self.sql(exp.cast(exp.func("from_unixtime", timestamp), exp.DType.TIMESTAMP)) if scale == exp.UnixToTime.SECONDS: return self.func("TIMESTAMP_SECONDS", timestamp) if scale == exp.UnixToTime.MILLIS: return self.func("TIMESTAMP_MILLIS", timestamp) if scale == exp.UnixToTime.MICROS: return self.func("TIMESTAMP_MICROS", timestamp) unix_seconds = exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)) return self.func("TIMESTAMP_SECONDS", unix_seconds) def _unalias_pivot(expression: exp.Expr) -> exp.Expr: """ Spark doesn't allow PIVOT aliases, so we need to remove them and possibly wrap a pivoted source in a subquery with the same alias to preserve the query's semantics. Example: >>> from sqlglot import parse_one >>> expr = parse_one("SELECT piv.x FROM tbl PIVOT (SUM(a) FOR b IN ('x')) piv") >>> print(_unalias_pivot(expr).sql(dialect="spark")) SELECT piv.x FROM (SELECT * FROM tbl PIVOT(SUM(a) FOR b IN ('x'))) AS piv """ if isinstance(expression, exp.From) and expression.this.args.get("pivots"): pivot = expression.this.args["pivots"][0] if pivot.alias: alias = pivot.args["alias"].pop() return exp.From( this=expression.this.replace( exp.select("*") .from_(expression.this.copy(), copy=False) .subquery(alias=alias, copy=False) ) ) return expression def _unqualify_pivot_columns(expression: exp.Expr) -> exp.Expr: """ Spark doesn't allow the column referenced in the PIVOT's field to be qualified, so we need to unqualify it. Example: >>> from sqlglot import parse_one >>> expr = parse_one("SELECT * FROM tbl PIVOT (SUM(tbl.sales) FOR tbl.quarter IN ('Q1', 'Q2'))") >>> print(_unqualify_pivot_columns(expr).sql(dialect="spark")) SELECT * FROM tbl PIVOT(SUM(tbl.sales) FOR quarter IN ('Q1', 'Q2')) """ if isinstance(expression, exp.Pivot): expression.set( "fields", [transforms.unqualify_columns(field) for field in expression.fields] ) return expression def temporary_storage_provider(expression: exp.Expr) -> exp.Expr: # spark2, spark, Databricks require a storage provider for temporary tables provider = exp.FileFormatProperty(this=exp.Literal.string("parquet")) expression.args["properties"].append("expressions", provider) return expression class Spark2(Hive): ALTER_TABLE_SUPPORTS_CASCADE = False EXPRESSION_METADATA = EXPRESSION_METADATA.copy() # https://spark.apache.org/docs/latest/api/sql/index.html#initcap # https://docs.databricks.com/aws/en/sql/language-manual/functions/initcap # https://github.com/apache/spark/blob/master/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java#L859-L905 INITCAP_DEFAULT_DELIMITER_CHARS = " " class Tokenizer(Hive.Tokenizer): HEX_STRINGS = [("X'", "'"), ("x'", "'")] KEYWORDS = { **Hive.Tokenizer.KEYWORDS, "TIMESTAMP": TokenType.TIMESTAMPTZ, } Parser = Spark2Parser class Generator(Hive.Generator): QUERY_HINTS = True NVL2_SUPPORTED = True CAN_IMPLEMENT_ARRAY_ANY = True ALTER_SET_TYPE = "TYPE" PROPERTIES_LOCATION = { **Hive.Generator.PROPERTIES_LOCATION, exp.EngineProperty: exp.Properties.Location.UNSUPPORTED, exp.AutoIncrementProperty: exp.Properties.Location.UNSUPPORTED, exp.CharacterSetProperty: exp.Properties.Location.UNSUPPORTED, exp.CollateProperty: exp.Properties.Location.UNSUPPORTED, } TS_OR_DS_EXPRESSIONS = ( *Hive.Generator.TS_OR_DS_EXPRESSIONS, exp.DayOfMonth, exp.DayOfWeek, exp.DayOfYear, exp.WeekOfYear, ) TRANSFORMS = { **Hive.Generator.TRANSFORMS, exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), exp.ArraySum: lambda self, e: ( f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)" ), exp.ArrayToString: rename_func("ARRAY_JOIN"), exp.ArraySlice: rename_func("SLICE"), exp.AtTimeZone: lambda self, e: self.func( "FROM_UTC_TIMESTAMP", e.this, e.args.get("zone") ), exp.BitwiseLeftShift: rename_func("SHIFTLEFT"), exp.BitwiseRightShift: rename_func("SHIFTRIGHT"), exp.Create: preprocess( [ remove_unique_constraints, lambda e: ctas_with_tmp_tables_to_create_tmp_view( e, temporary_storage_provider ), move_schema_columns_to_partitioned_by, ] ), exp.DateFromParts: rename_func("MAKE_DATE"), exp.DateTrunc: lambda self, e: self.func("TRUNC", e.this, unit_to_str(e)), exp.DayOfMonth: rename_func("DAYOFMONTH"), exp.DayOfWeek: rename_func("DAYOFWEEK"), # (DAY_OF_WEEK(datetime) % 7) + 1 is equivalent to DAYOFWEEK_ISO(datetime) exp.DayOfWeekIso: lambda self, e: f"(({self.func('DAYOFWEEK', e.this)} % 7) + 1)", exp.DayOfYear: rename_func("DAYOFYEAR"), exp.Format: rename_func("FORMAT_STRING"), exp.From: transforms.preprocess([_unalias_pivot]), exp.FromTimeZone: lambda self, e: self.func( "TO_UTC_TIMESTAMP", e.this, e.args.get("zone") ), exp.LogicalAnd: rename_func("BOOL_AND"), exp.LogicalOr: rename_func("BOOL_OR"), exp.Map: _map_sql, exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), exp.Reduce: rename_func("AGGREGATE"), exp.RegexpReplace: lambda self, e: self.func( "REGEXP_REPLACE", e.this, e.expression, e.args["replacement"], e.args.get("position"), ), exp.Select: transforms.preprocess( [ transforms.eliminate_qualify, transforms.eliminate_distinct_on, transforms.unnest_to_explode, transforms.any_to_exists, ] ), exp.SHA2Digest: lambda self, e: self.func( "SHA2", e.this, e.args.get("length") or exp.Literal.number(256) ), exp.StrToDate: _str_to_date, exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", unit_to_str(e), e.this), exp.UnixToTime: _unix_to_time_sql, exp.VariancePop: rename_func("VAR_POP"), exp.WeekOfYear: rename_func("WEEKOFYEAR"), exp.WithinGroup: transforms.preprocess( [transforms.remove_within_group_for_percentiles] ), } TRANSFORMS.pop(exp.ArraySort) TRANSFORMS.pop(exp.ILike) TRANSFORMS.pop(exp.Left) TRANSFORMS.pop(exp.MonthsBetween) TRANSFORMS.pop(exp.Right) WRAP_DERIVED_VALUES = False CREATE_FUNCTION_RETURN_AS = False def struct_sql(self, expression: exp.Struct) -> str: from sqlglot.generator import Generator return Generator.struct_sql(self, expression) def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: arg = expression.this is_json_extract = isinstance( arg, (exp.JSONExtract, exp.JSONExtractScalar) ) and not arg.args.get("variant_extract") # We can't use a non-nested type (eg. STRING) as a schema if expression.to.args.get("nested") and (is_parse_json(arg) or is_json_extract): schema = f"'{self.sql(expression, 'to')}'" return self.func("FROM_JSON", arg if is_json_extract else arg.this, schema) if is_parse_json(expression): return self.func("TO_JSON", arg) return super(Hive.Generator, self).cast_sql(expression, safe_prefix=safe_prefix) def fileformatproperty_sql(self, expression: exp.FileFormatProperty) -> str: if expression.args.get("hive_format"): return super().fileformatproperty_sql(expression) return f"USING {expression.name.upper()}" def altercolumn_sql(self, expression: exp.AlterColumn) -> str: this = self.sql(expression, "this") new_name = self.sql(expression, "rename_to") or this comment = self.sql(expression, "comment") if new_name == this: if comment: return f"ALTER COLUMN {this} COMMENT {comment}" return super(Hive.Generator, self).altercolumn_sql(expression) return f"RENAME COLUMN {this} TO {new_name}" def renamecolumn_sql(self, expression: exp.RenameColumn) -> str: return super(Hive.Generator, self).renamecolumn_sql(expression) def bracket_sql(self, expression: exp.Bracket) -> str: if expression.args.get("safe") is False: return bracket_to_element_at_sql(self, expression) return super().bracket_sql(expression) ================================================ FILE: sqlglot/dialects/sqlite.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, any_value_to_max_sql, arrow_json_extract_sql, concat_to_dpipe_sql, count_if_to_sum, no_ilike_sql, no_pivot_sql, no_tablesample_sql, no_trycast_sql, rename_func, strposition_sql, ) from sqlglot.generator import unsupported_args from sqlglot.parsers.sqlite import SQLiteParser from sqlglot.tokens import TokenType def _transform_create(expression: exp.Expr) -> exp.Expr: """Move primary key to a column and enforce auto_increment on primary keys.""" schema = expression.this if isinstance(expression, exp.Create) and isinstance(schema, exp.Schema): defs = {} primary_key = None for e in schema.expressions: if isinstance(e, exp.ColumnDef): defs[e.name] = e elif isinstance(e, exp.PrimaryKey): primary_key = e if primary_key and len(primary_key.expressions) == 1: column = defs[primary_key.expressions[0].name] column.append( "constraints", exp.ColumnConstraint(kind=exp.PrimaryKeyColumnConstraint()) ) schema.expressions.remove(primary_key) else: for column in defs.values(): auto_increment = None for constraint in column.constraints: if isinstance(constraint.kind, exp.PrimaryKeyColumnConstraint): break if isinstance(constraint.kind, exp.AutoIncrementColumnConstraint): auto_increment = constraint if auto_increment: column.constraints.remove(auto_increment) return expression def _generated_to_auto_increment(expression: exp.Expr) -> exp.Expr: if not isinstance(expression, exp.ColumnDef): return expression generated = expression.find(exp.GeneratedAsIdentityColumnConstraint) if generated: t.cast(exp.ColumnConstraint, generated.parent).pop() not_null = expression.find(exp.NotNullColumnConstraint) if not_null: t.cast(exp.ColumnConstraint, not_null.parent).pop() expression.append( "constraints", exp.ColumnConstraint(kind=exp.AutoIncrementColumnConstraint()) ) return expression class SQLite(Dialect): # https://sqlite.org/forum/forumpost/5e575586ac5c711b?raw NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE TYPED_DIVISION = True SAFE_DIVISION = True SAFE_TO_ELIMINATE_DOUBLE_NEGATION = False class Tokenizer(tokens.Tokenizer): IDENTIFIERS = ['"', ("[", "]"), "`"] HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", ""), ("0X", "")] NESTED_COMMENTS = False KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "ATTACH": TokenType.ATTACH, "DETACH": TokenType.DETACH, "INDEXED BY": TokenType.INDEXED_BY, "MATCH": TokenType.MATCH, } KEYWORDS.pop("/*+") COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} Parser = SQLiteParser class Generator(generator.Generator): JOIN_HINTS = False TABLE_HINTS = False QUERY_HINTS = False NVL2_SUPPORTED = False JSON_PATH_BRACKETED_KEY_SUPPORTED = False SUPPORTS_CREATE_TABLE_LIKE = False SUPPORTS_TABLE_ALIAS_COLUMNS = False SUPPORTS_TO_NUMBER = False SUPPORTS_WINDOW_EXCLUDE = True EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False SUPPORTS_MEDIAN = False JSON_KEY_VALUE_PAIR_SEP = "," PARSE_JSON_NAME: t.Optional[str] = None SUPPORTED_JSON_PATH_PARTS = { exp.JSONPathKey, exp.JSONPathRoot, exp.JSONPathSubscript, } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.BOOLEAN: "INTEGER", exp.DType.TINYINT: "INTEGER", exp.DType.SMALLINT: "INTEGER", exp.DType.INT: "INTEGER", exp.DType.BIGINT: "INTEGER", exp.DType.FLOAT: "REAL", exp.DType.DOUBLE: "REAL", exp.DType.DECIMAL: "REAL", exp.DType.CHAR: "TEXT", exp.DType.NCHAR: "TEXT", exp.DType.VARCHAR: "TEXT", exp.DType.NVARCHAR: "TEXT", exp.DType.BINARY: "BLOB", exp.DType.VARBINARY: "BLOB", } TYPE_MAPPING.pop(exp.DType.BLOB) TOKEN_MAPPING = { TokenType.AUTO_INCREMENT: "AUTOINCREMENT", } TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.AnyValue: any_value_to_max_sql, exp.Chr: rename_func("CHAR"), exp.Concat: concat_to_dpipe_sql, exp.CountIf: count_if_to_sum, exp.Create: transforms.preprocess([_transform_create]), exp.CurrentDate: lambda *_: "CURRENT_DATE", exp.CurrentTime: lambda *_: "CURRENT_TIME", exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", exp.CurrentVersion: lambda *_: "SQLITE_VERSION()", exp.ColumnDef: transforms.preprocess([_generated_to_auto_increment]), exp.DateStrToDate: lambda self, e: self.sql(e, "this"), exp.If: rename_func("IIF"), exp.ILike: no_ilike_sql, exp.JSONArrayAgg: unsupported_args("order", "null_handling", "return_type", "strict")( rename_func("JSON_GROUP_ARRAY") ), exp.JSONExtractScalar: arrow_json_extract_sql, exp.JSONObjectAgg: lambda self, e: self._jsonobject_sql(e, name="JSON_GROUP_OBJECT"), exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( rename_func("EDITDIST3") ), exp.LogicalOr: rename_func("MAX"), exp.LogicalAnd: rename_func("MIN"), exp.Pivot: no_pivot_sql, exp.Rand: rename_func("RANDOM"), exp.Select: transforms.preprocess( [ transforms.eliminate_distinct_on, transforms.eliminate_qualify, transforms.eliminate_semi_and_anti_joins, ] ), exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="INSTR"), exp.TableSample: no_tablesample_sql, exp.TimeStrToTime: lambda self, e: self.sql(e, "this"), exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.args.get("format"), e.this), exp.TryCast: no_trycast_sql, exp.TsOrDsToTimestamp: lambda self, e: self.sql(e, "this"), } # SQLite doesn't generally support CREATE TABLE .. properties # https://www.sqlite.org/lang_createtable.html PROPERTIES_LOCATION = { prop: exp.Properties.Location.UNSUPPORTED for prop in generator.Generator.PROPERTIES_LOCATION } # There are a few exceptions (e.g. temporary tables) which are supported or # can be transpiled to SQLite, so we explicitly override them accordingly PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE LIMIT_FETCH = "LIMIT" def bitwiseandagg_sql(self, expression: exp.BitwiseAndAgg) -> str: self.unsupported("BITWISE_AND aggregation is not supported in SQLite") return self.function_fallback_sql(expression) def bitwiseoragg_sql(self, expression: exp.BitwiseOrAgg) -> str: self.unsupported("BITWISE_OR aggregation is not supported in SQLite") return self.function_fallback_sql(expression) def bitwisexoragg_sql(self, expression: exp.BitwiseXorAgg) -> str: self.unsupported("BITWISE_XOR aggregation is not supported in SQLite") return self.function_fallback_sql(expression) def jsonextract_sql(self, expression: exp.JSONExtract) -> str: if expression.expressions: return self.function_fallback_sql(expression) return arrow_json_extract_sql(self, expression) def dateadd_sql(self, expression: exp.DateAdd) -> str: modifier = expression.expression modifier = modifier.name if modifier.is_string else self.sql(modifier) unit = expression.args.get("unit") modifier = f"'{modifier} {unit.name}'" if unit else f"'{modifier}'" return self.func("DATE", expression.this, modifier) def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: if expression.is_type("date"): return self.func("DATE", expression.this) return super().cast_sql(expression) # Note: SQLite's TRUNC always returns REAL (e.g., trunc(10.99) -> 10.0), not INTEGER. # This creates a transpilation gap affecting division semantics, similar to Presto. # Unlike Presto where this only affects decimals=0, SQLite has no decimals parameter # so every use of TRUNC is affected. Modeling precisely would require exp.FloatTrunc. @unsupported_args("decimals") def trunc_sql(self, expression: exp.Trunc) -> str: return self.func("TRUNC", expression.this) def generateseries_sql(self, expression: exp.GenerateSeries) -> str: parent = expression.parent alias = parent and parent.args.get("alias") if isinstance(alias, exp.TableAlias) and alias.columns: column_alias = alias.columns[0] alias.set("columns", None) sql = self.sql( exp.select(exp.alias_("value", column_alias)).from_(expression).subquery() ) else: sql = self.function_fallback_sql(expression) return sql def datediff_sql(self, expression: exp.DateDiff) -> str: unit = expression.args.get("unit") unit = unit.name.upper() if unit else "DAY" sql = f"(JULIANDAY({self.sql(expression, 'this')}) - JULIANDAY({self.sql(expression, 'expression')}))" if unit == "MONTH": sql = f"{sql} / 30.0" elif unit == "YEAR": sql = f"{sql} / 365.0" elif unit == "HOUR": sql = f"{sql} * 24.0" elif unit == "MINUTE": sql = f"{sql} * 1440.0" elif unit == "SECOND": sql = f"{sql} * 86400.0" elif unit == "MILLISECOND": sql = f"{sql} * 86400000.0" elif unit == "MICROSECOND": sql = f"{sql} * 86400000000.0" elif unit == "NANOSECOND": sql = f"{sql} * 8640000000000.0" else: self.unsupported(f"DATEDIFF unsupported for '{unit}'.") return f"CAST({sql} AS INTEGER)" # https://www.sqlite.org/lang_aggfunc.html#group_concat def groupconcat_sql(self, expression: exp.GroupConcat) -> str: this = expression.this distinct = expression.find(exp.Distinct) if distinct: this = distinct.expressions[0] distinct_sql = "DISTINCT " else: distinct_sql = "" if isinstance(expression.this, exp.Order): self.unsupported("SQLite GROUP_CONCAT doesn't support ORDER BY.") if expression.this.this and not distinct: this = expression.this.this separator = expression.args.get("separator") return f"GROUP_CONCAT({distinct_sql}{self.format_args(this, separator)})" def least_sql(self, expression: exp.Least) -> str: if expression.expressions: return rename_func("MIN")(self, expression) return self.sql(expression, "this") def greatest_sql(self, expression: exp.Greatest) -> str: if expression.expressions: return rename_func("MAX")(self, expression) return self.sql(expression, "this") def transaction_sql(self, expression: exp.Transaction) -> str: this = expression.this this = f" {this}" if this else "" return f"BEGIN{this} TRANSACTION" def isascii_sql(self, expression: exp.IsAscii) -> str: return f"(NOT {self.sql(expression.this)} GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))" @unsupported_args("this") def currentschema_sql(self, expression: exp.CurrentSchema) -> str: return "'main'" def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: self.unsupported("SQLite does not support IGNORE NULLS.") return self.sql(expression.this) def respectnulls_sql(self, expression: exp.RespectNulls) -> str: return self.sql(expression.this) def windowspec_sql(self, expression: exp.WindowSpec) -> str: if ( expression.text("kind").upper() == "RANGE" and expression.text("start").upper() == "CURRENT ROW" ): return "RANGE CURRENT ROW" return super().windowspec_sql(expression) ================================================ FILE: sqlglot/dialects/starrocks.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, transforms from sqlglot.dialects.dialect import ( approx_count_distinct_sql, arrow_json_extract_sql, rename_func, unit_to_str, inline_array_sql, property_sql, ) from sqlglot.dialects.mysql import MySQL from sqlglot.parsers.starrocks import StarRocksParser from sqlglot.tokens import TokenType def _eliminate_between_in_delete(expression: exp.Expr) -> exp.Expr: """ StarRocks doesn't support BETWEEN in DELETE statements, so we convert BETWEEN expressions to explicit comparisons. https://docs.starrocks.io/docs/sql-reference/sql-statements/table_bucket_part_index/DELETE/#parameters Example: >>> from sqlglot import parse_one >>> expr = parse_one("DELETE FROM t WHERE x BETWEEN 1 AND 10") >>> print(_eliminate_between_in_delete(expr).sql(dialect="starrocks")) DELETE FROM t WHERE x >= 1 AND x <= 10 """ if where := expression.args.get("where"): for between in where.find_all(exp.Between): between.replace( exp.and_( exp.GTE(this=between.this.copy(), expression=between.args["low"]), exp.LTE(this=between.this.copy(), expression=between.args["high"]), copy=False, ) ) return expression # https://docs.starrocks.io/docs/sql-reference/sql-functions/spatial-functions/st_distance_sphere/ def st_distance_sphere(self, expression: exp.StDistance) -> str: point1 = expression.this point2 = expression.expression point1_x = self.func("ST_X", point1) point1_y = self.func("ST_Y", point1) point2_x = self.func("ST_X", point2) point2_y = self.func("ST_Y", point2) return self.func("ST_Distance_Sphere", point1_x, point1_y, point2_x, point2_y) class StarRocks(MySQL): STRICT_JSON_PATH_SYNTAX = False INDEX_OFFSET = 1 class Tokenizer(MySQL.Tokenizer): KEYWORDS = { **MySQL.Tokenizer.KEYWORDS, "LARGEINT": TokenType.INT128, } Parser = StarRocksParser class Generator(MySQL.Generator): EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False JSON_TYPE_REQUIRED_FOR_EXTRACTION = False VARCHAR_REQUIRES_SIZE = False PARSE_JSON_NAME: t.Optional[str] = "PARSE_JSON" WITH_PROPERTIES_PREFIX = "PROPERTIES" UPDATE_STATEMENT_SUPPORTS_FROM = True INSERT_OVERWRITE = " OVERWRITE" # StarRocks doesn't support "IS TRUE/FALSE" syntax. IS_BOOL_ALLOWED = False # StarRocks doesn't support renaming a table with a database. RENAME_TABLE_WITH_DB = False CAST_MAPPING = {} TYPE_MAPPING = { **MySQL.Generator.TYPE_MAPPING, exp.DType.INT128: "LARGEINT", exp.DType.TEXT: "STRING", exp.DType.TIMESTAMP: "DATETIME", exp.DType.TIMESTAMPTZ: "DATETIME", } SQL_SECURITY_VIEW_LOCATION = exp.Properties.Location.POST_SCHEMA PROPERTIES_LOCATION = { **MySQL.Generator.PROPERTIES_LOCATION, exp.PrimaryKey: exp.Properties.Location.POST_SCHEMA, exp.UniqueKeyProperty: exp.Properties.Location.POST_SCHEMA, exp.RollupProperty: exp.Properties.Location.POST_SCHEMA, exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, } TRANSFORMS = { **MySQL.Generator.TRANSFORMS, exp.Array: inline_array_sql, exp.ArrayAgg: rename_func("ARRAY_AGG"), exp.ArrayFilter: rename_func("ARRAY_FILTER"), exp.ArrayToString: rename_func("ARRAY_JOIN"), exp.ApproxDistinct: approx_count_distinct_sql, exp.CurrentVersion: lambda *_: "CURRENT_VERSION()", exp.DateDiff: lambda self, e: self.func( "DATE_DIFF", unit_to_str(e), e.this, e.expression ), exp.Delete: transforms.preprocess([_eliminate_between_in_delete]), exp.Flatten: rename_func("ARRAY_FLATTEN"), exp.JSONExtractScalar: arrow_json_extract_sql, exp.JSONExtract: arrow_json_extract_sql, exp.Property: property_sql, exp.RegexpLike: rename_func("REGEXP"), exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), exp.SqlSecurityProperty: lambda self, e: f"SECURITY {self.sql(e.this)}", exp.StDistance: st_distance_sphere, exp.StrToUnix: lambda self, e: self.func("UNIX_TIMESTAMP", e.this, self.format_time(e)), exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", unit_to_str(e), e.this), exp.TimeStrToDate: rename_func("TO_DATE"), exp.UnixToStr: lambda self, e: self.func("FROM_UNIXTIME", e.this, self.format_time(e)), exp.UnixToTime: rename_func("FROM_UNIXTIME"), } TRANSFORMS.pop(exp.DateTrunc) # https://docs.starrocks.io/docs/sql-reference/sql-statements/keywords/#reserved-keywords RESERVED_KEYWORDS = { "add", "all", "alter", "analyze", "and", "array", "as", "asc", "between", "bigint", "bitmap", "both", "by", "case", "char", "character", "check", "collate", "column", "compaction", "convert", "create", "cross", "cube", "current_date", "current_role", "current_time", "current_timestamp", "current_user", "database", "databases", "decimal", "decimalv2", "decimal32", "decimal64", "decimal128", "default", "deferred", "delete", "dense_rank", "desc", "describe", "distinct", "double", "drop", "dual", "else", "except", "exists", "explain", "false", "first_value", "float", "for", "force", "from", "full", "function", "grant", "group", "grouping", "grouping_id", "groups", "having", "hll", "host", "if", "ignore", "immediate", "in", "index", "infile", "inner", "insert", "int", "integer", "intersect", "into", "is", "join", "json", "key", "keys", "kill", "lag", "largeint", "last_value", "lateral", "lead", "left", "like", "limit", "load", "localtime", "localtimestamp", "maxvalue", "minus", "mod", "not", "ntile", "null", "on", "or", "order", "outer", "outfile", "over", "partition", "percentile", "primary", "procedure", "qualify", "range", "rank", "read", "regexp", "release", "rename", "replace", "revoke", "right", "rlike", "row", "row_number", "rows", "schema", "schemas", "select", "set", "set_var", "show", "smallint", "system", "table", "terminated", "text", "then", "tinyint", "to", "true", "union", "unique", "unsigned", "update", "use", "using", "values", "varchar", "when", "where", "with", } def create_sql(self, expression: exp.Create) -> str: # Starrocks' primary key is defined outside of the schema, so we need to move it there schema = expression.this if isinstance(schema, exp.Schema): primary_key = schema.find(exp.PrimaryKey) if primary_key: props = expression.args.get("properties") if not props: props = exp.Properties(expressions=[]) expression.set("properties", props) # Verify if the first one is an engine property. Is true then insert it after the engine, # otherwise insert it at the beginning engine = props.find(exp.EngineProperty) engine_index = (engine.index or 0) if engine else -1 props.set("expressions", primary_key.pop(), engine_index + 1, overwrite=False) return super().create_sql(expression) def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str: this = expression.this if isinstance(this, exp.Schema): # For MVs, StarRocks needs outer parentheses. create = expression.find_ancestor(exp.Create) sql = self.expressions(this, flat=True) if (create and create.kind == "VIEW") or all( isinstance(col, (exp.Column, exp.Identifier)) for col in this.expressions ): sql = f"({sql})" return f"PARTITION BY {sql}" return f"PARTITION BY {self.sql(this)}" def cluster_sql(self, expression: exp.Cluster) -> str: """Generate StarRocks ORDER BY clause for clustering.""" expressions = self.expressions(expression, flat=True) return f"ORDER BY ({expressions})" if expressions else "" def refreshtriggerproperty_sql(self, expression: exp.RefreshTriggerProperty) -> str: """Generate StarRocks REFRESH clause for materialized views. There is a little difference of the syntax between StarRocks and Doris. """ method = self.sql(expression, "method") method = f" {method}" if method else "" kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" starts = self.sql(expression, "starts") starts = f" START ({starts})" if starts else "" every = self.sql(expression, "every") unit = self.sql(expression, "unit") every = f" EVERY (INTERVAL {every} {unit})" if every and unit else "" return f"REFRESH{method}{kind}{starts}{every}" ================================================ FILE: sqlglot/dialects/tableau.py ================================================ from __future__ import annotations from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import Dialect, rename_func, strposition_sql as _strposition_sql from sqlglot.parsers.tableau import TableauParser class Tableau(Dialect): LOG_BASE_FIRST = False class Tokenizer(tokens.Tokenizer): IDENTIFIERS = [("[", "]")] QUOTES = ["'", '"'] class Generator(generator.Generator): JOIN_HINTS = False TABLE_HINTS = False QUERY_HINTS = False TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.Coalesce: rename_func("IFNULL"), exp.Select: transforms.preprocess([transforms.eliminate_distinct_on]), } PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } def if_sql(self, expression: exp.If) -> str: this = self.sql(expression, "this") true = self.sql(expression, "true") false = self.sql(expression, "false") return f"IF {this} THEN {true} ELSE {false} END" def count_sql(self, expression: exp.Count) -> str: this = expression.this if isinstance(this, exp.Distinct): return self.func("COUNTD", *this.expressions) return self.func("COUNT", this) def strposition_sql(self, expression: exp.StrPosition) -> str: has_occurrence = "occurrence" in expression.args return _strposition_sql( self, expression, func_name="FINDNTH" if has_occurrence else "FIND", supports_occurrence=has_occurrence, ) Parser = TableauParser ================================================ FILE: sqlglot/dialects/teradata.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, max_or_greatest, min_or_least, rename_func, strposition_sql, to_number_with_nls_param, ) from sqlglot.parsers.teradata import TeradataParser from sqlglot.tokens import TokenType def _date_add_sql( kind: t.Literal["+", "-"], ) -> t.Callable[[Teradata.Generator, exp.DateAdd | exp.DateSub], str]: def func(self: Teradata.Generator, expression: exp.DateAdd | exp.DateSub) -> str: this = self.sql(expression, "this") unit = expression.args.get("unit") value = self._simplify_unless_literal(expression.expression) if not isinstance(value, exp.Literal): self.unsupported("Cannot add non literal") if isinstance(value, exp.Neg): kind_to_op = {"+": "-", "-": "+"} value = exp.Literal.string(value.this.to_py()) else: kind_to_op = {"+": "+", "-": "-"} value.set("is_string", True) return f"{this} {kind_to_op[kind]} {self.sql(exp.Interval(this=value, unit=unit))}" return func class Teradata(Dialect): TYPED_DIVISION = True TIME_MAPPING = { "YY": "%y", "Y4": "%Y", "YYYY": "%Y", "M4": "%B", "M3": "%b", "M": "%-M", "MI": "%M", "MM": "%m", "MMM": "%b", "MMMM": "%B", "D": "%-d", "DD": "%d", "D3": "%j", "DDD": "%j", "H": "%-H", "HH": "%H", "HH24": "%H", "S": "%-S", "SS": "%S", "SSSSSS": "%f", "E": "%a", "EE": "%a", "E3": "%a", "E4": "%A", "EEE": "%a", "EEEE": "%A", } class Tokenizer(tokens.Tokenizer): # Tested each of these and they work, although there is no # Teradata documentation explicitly mentioning them. HEX_STRINGS = [("X'", "'"), ("x'", "'"), ("0x", "")] # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Exprs-and-Predicates/March-2017/Comparison-Operators-and-Functions/Comparison-Operators/ANSI-Compliance # https://docs.teradata.com/r/SQL-Functions-Operators-Exprs-and-Predicates/June-2017/Arithmetic-Trigonometric-Hyperbolic-Operators/Functions KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "**": TokenType.DSTAR, "^=": TokenType.NEQ, "BYTEINT": TokenType.SMALLINT, "COLLECT": TokenType.COMMAND, "DEL": TokenType.DELETE, "EQ": TokenType.EQ, "GE": TokenType.GTE, "GT": TokenType.GT, "HELP": TokenType.COMMAND, "INS": TokenType.INSERT, "LE": TokenType.LTE, "LOCKING": TokenType.LOCK, "LT": TokenType.LT, "MINUS": TokenType.EXCEPT, "MOD": TokenType.MOD, "NE": TokenType.NEQ, "NOT=": TokenType.NEQ, "SAMPLE": TokenType.TABLE_SAMPLE, "SEL": TokenType.SELECT, "ST_GEOMETRY": TokenType.GEOMETRY, "TOP": TokenType.TOP, "UPD": TokenType.UPDATE, } KEYWORDS.pop("/*+") # Teradata does not support % as a modulo operator SINGLE_TOKENS = {**tokens.Tokenizer.SINGLE_TOKENS} SINGLE_TOKENS.pop("%") Parser = TeradataParser class Generator(generator.Generator): LIMIT_IS_TOP = True JOIN_HINTS = False TABLE_HINTS = False QUERY_HINTS = False TABLESAMPLE_KEYWORDS = "SAMPLE" LAST_DAY_SUPPORTS_DATE_PART = False CAN_IMPLEMENT_ARRAY_ANY = True TZ_TO_WITH_TIME_ZONE = True ARRAY_SIZE_NAME = "CARDINALITY" TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.GEOMETRY: "ST_GEOMETRY", exp.DType.DOUBLE: "DOUBLE PRECISION", exp.DType.TIMESTAMPTZ: "TIMESTAMP", } PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.OnCommitProperty: exp.Properties.Location.POST_INDEX, exp.PartitionedByProperty: exp.Properties.Location.POST_EXPRESSION, exp.StabilityProperty: exp.Properties.Location.POST_CREATE, } TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.ArgMax: rename_func("MAX_BY"), exp.ArgMin: rename_func("MIN_BY"), exp.Max: max_or_greatest, exp.Min: min_or_least, exp.Pow: lambda self, e: self.binary(e, "**"), exp.Rand: lambda self, e: self.func("RANDOM", e.args.get("lower"), e.args.get("upper")), exp.Select: transforms.preprocess( [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] ), exp.StrPosition: lambda self, e: strposition_sql( self, e, func_name="INSTR", supports_position=True, supports_occurrence=True ), exp.StrToDate: lambda self, e: ( f"CAST({self.sql(e, 'this')} AS DATE FORMAT {self.format_time(e)})" ), exp.ToChar: lambda self, e: self.function_fallback_sql(e), exp.ToNumber: to_number_with_nls_param, exp.Use: lambda self, e: f"DATABASE {self.sql(e, 'this')}", exp.DateAdd: _date_add_sql("+"), exp.DateSub: _date_add_sql("-"), exp.Quarter: lambda self, e: self.sql(exp.Extract(this="QUARTER", expression=e.this)), } def currenttimestamp_sql(self, expression: exp.CurrentTimestamp) -> str: prefix, suffix = ("(", ")") if expression.this else ("", "") return self.func("CURRENT_TIMESTAMP", expression.this, prefix=prefix, suffix=suffix) def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: if expression.to.this == exp.DType.UNKNOWN and expression.args.get("format"): # We don't actually want to print the unknown type in CAST( AS FORMAT ) expression.to.pop() return super().cast_sql(expression, safe_prefix=safe_prefix) def trycast_sql(self, expression: exp.TryCast) -> str: return self.cast_sql(expression, safe_prefix="TRY") def tablesample_sql( self, expression: exp.TableSample, tablesample_keyword: t.Optional[str] = None, ) -> str: return f"{self.sql(expression, 'this')} SAMPLE {self.expressions(expression)}" def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str: return f"PARTITION BY {self.sql(expression, 'this')}" # FROM before SET in Teradata UPDATE syntax # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/Teradata-VantageTM-SQL-Data-Manipulation-Language-17.20/Statement-Syntax/UPDATE/UPDATE-Syntax-Basic-Form-FROM-Clause def update_sql(self, expression: exp.Update) -> str: this = self.sql(expression, "this") from_sql = self.sql(expression, "from_") set_sql = self.expressions(expression, flat=True) where_sql = self.sql(expression, "where") sql = f"UPDATE {this}{from_sql} SET {set_sql}{where_sql}" return self.prepend_ctes(expression, sql) def mod_sql(self, expression: exp.Mod) -> str: return self.binary(expression, "MOD") def rangen_sql(self, expression: exp.RangeN) -> str: this = self.sql(expression, "this") expressions_sql = self.expressions(expression) each_sql = self.sql(expression, "each") each_sql = f" EACH {each_sql}" if each_sql else "" return f"RANGE_N({this} BETWEEN {expressions_sql}{each_sql})" def lockingstatement_sql(self, expression: exp.LockingStatement) -> str: """Generate SQL for LOCKING statement""" locking_clause = self.sql(expression, "this") query_sql = self.sql(expression, "expression") return f"{locking_clause} {query_sql}" def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: kind = self.sql(expression, "kind").upper() if kind == "TABLE" and locations.get(exp.Properties.Location.POST_NAME): this_name = self.sql(expression.this, "this") this_properties = self.properties( exp.Properties(expressions=locations[exp.Properties.Location.POST_NAME]), wrapped=False, prefix=",", ) this_schema = self.schema_columns_sql(expression.this) return f"{this_name}{this_properties}{self.sep()}{this_schema}" return super().createable_sql(expression, locations) def extract_sql(self, expression: exp.Extract) -> str: this = self.sql(expression, "this") if this.upper() != "QUARTER": return super().extract_sql(expression) to_char = exp.func("to_char", expression.expression, exp.Literal.string("Q")) return self.sql(exp.cast(to_char, exp.DType.INT)) def interval_sql(self, expression: exp.Interval) -> str: multiplier = 0 unit = expression.text("unit") if unit.startswith("WEEK"): multiplier = 7 elif unit.startswith("QUARTER"): multiplier = 90 if multiplier: return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" return super().interval_sql(expression) ================================================ FILE: sqlglot/dialects/trino.py ================================================ from __future__ import annotations from sqlglot import exp, transforms from sqlglot.dialects.dialect import ( merge_without_target_sql, trim_sql, timestrtotime_sql, groupconcat_sql, rename_func, ) from sqlglot.dialects.presto import amend_exploded_column_table, Presto from sqlglot.parsers.trino import TrinoParser from sqlglot.tokens import TokenType class Trino(Presto): SUPPORTS_USER_DEFINED_TYPES = False LOG_BASE_FIRST = True class Tokenizer(Presto.Tokenizer): KEYWORDS = { **Presto.Tokenizer.KEYWORDS, "REFRESH": TokenType.REFRESH, } Parser = TrinoParser class Generator(Presto.Generator): EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = True PROPERTIES_LOCATION = { **Presto.Generator.PROPERTIES_LOCATION, exp.LocationProperty: exp.Properties.Location.POST_WITH, } TRANSFORMS = { **Presto.Generator.TRANSFORMS, exp.ArraySum: lambda self, e: ( f"REDUCE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)" ), exp.ArrayUniqueAgg: lambda self, e: f"ARRAY_AGG(DISTINCT {self.sql(e, 'this')})", exp.CurrentVersion: rename_func("VERSION"), exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, on_overflow=True), exp.LocationProperty: lambda self, e: self.property_sql(e), exp.Merge: merge_without_target_sql, exp.Select: transforms.preprocess( [ transforms.eliminate_qualify, transforms.eliminate_distinct_on, transforms.explode_projection_to_unnest(1), transforms.eliminate_semi_and_anti_joins, amend_exploded_column_table, ] ), exp.TimeStrToTime: lambda self, e: timestrtotime_sql(self, e, include_precision=True), exp.Trim: trim_sql, } SUPPORTED_JSON_PATH_PARTS = { exp.JSONPathKey, exp.JSONPathRoot, exp.JSONPathSubscript, } def jsonextract_sql(self, expression: exp.JSONExtract) -> str: if not expression.args.get("json_query"): return super().jsonextract_sql(expression) json_path = self.sql(expression, "expression") option = self.sql(expression, "option") option = f" {option}" if option else "" quote = self.sql(expression, "quote") quote = f" {quote}" if quote else "" on_condition = self.sql(expression, "on_condition") on_condition = f" {on_condition}" if on_condition else "" return self.func( "JSON_QUERY", expression.this, json_path + option + quote + on_condition, ) ================================================ FILE: sqlglot/dialects/tsql.py ================================================ from __future__ import annotations import typing as t from functools import reduce from sqlglot import exp, generator, tokens, transforms from sqlglot.dialects.dialect import ( Dialect, NormalizationStrategy, any_value_to_max_sql, date_delta_sql, datestrtodate_sql, generatedasidentitycolumnconstraint_sql, max_or_greatest, min_or_least, rename_func, strposition_sql, timestrtotime_sql, trim_sql, ) from sqlglot.helper import seq_get from sqlglot.parsers.tsql import OPTIONS_THAT_REQUIRE_EQUAL, TSQLParser from sqlglot.time import format_time from sqlglot.tokens import TokenType from sqlglot.typing.tsql import EXPRESSION_METADATA DATE_PART_UNMAPPING = { "WEEKISO": "ISO_WEEK", "DAYOFWEEK": "WEEKDAY", "TIMEZONE_MINUTE": "TZOFFSET", } BIT_TYPES = {exp.EQ, exp.NEQ, exp.Is, exp.In, exp.Select, exp.Alias} def _format_sql(self: TSQL.Generator, expression: exp.NumberToStr | exp.TimeToStr) -> str: fmt = expression.args["format"] if not isinstance(expression, exp.NumberToStr): if fmt.is_string: mapped_fmt = format_time(fmt.name, TSQL.INVERSE_TIME_MAPPING) fmt_sql = self.sql(exp.Literal.string(mapped_fmt)) else: fmt_sql = self.format_time(expression) or self.sql(fmt) else: fmt_sql = self.sql(fmt) return self.func("FORMAT", expression.this, fmt_sql, expression.args.get("culture")) def _string_agg_sql(self: TSQL.Generator, expression: exp.GroupConcat) -> str: this = expression.this distinct = expression.find(exp.Distinct) if distinct: # exp.Distinct can appear below an exp.Order or an exp.GroupConcat expression self.unsupported("T-SQL STRING_AGG doesn't support DISTINCT.") this = distinct.pop().expressions[0] order = "" if isinstance(expression.this, exp.Order): if expression.this.this: this = expression.this.this.pop() # Order has a leading space order = f" WITHIN GROUP ({self.sql(expression.this)[1:]})" separator = expression.args.get("separator") or exp.Literal.string(",") return f"STRING_AGG({self.format_args(this, separator)}){order}" def qualify_derived_table_outputs(expression: exp.Expr) -> exp.Expr: """Ensures all (unnamed) output columns are aliased for CTEs and Subqueries.""" alias = expression.args.get("alias") if ( isinstance(expression, (exp.CTE, exp.Subquery)) and isinstance(alias, exp.TableAlias) and not alias.columns ): from sqlglot.optimizer.qualify_columns import qualify_outputs # We keep track of the unaliased column projection indexes instead of the expressions # themselves, because the latter are going to be replaced by new nodes when the aliases # are added and hence we won't be able to reach these newly added Alias parents query = expression.this unaliased_column_indexes = ( i for i, c in enumerate(query.selects) if isinstance(c, exp.Column) and not c.alias ) qualify_outputs(query) # Preserve the quoting information of columns for newly added Alias nodes query_selects = query.selects for select_index in unaliased_column_indexes: alias = query_selects[select_index] column = alias.this if isinstance(column.this, exp.Identifier): alias.args["alias"].set("quoted", column.this.quoted) return expression def _json_extract_sql( self: TSQL.Generator, expression: exp.JSONExtract | exp.JSONExtractScalar ) -> str: json_query = self.func("JSON_QUERY", expression.this, expression.expression) json_value = self.func("JSON_VALUE", expression.this, expression.expression) return self.func("ISNULL", json_query, json_value) def _timestrtotime_sql(self: TSQL.Generator, expression: exp.TimeStrToTime): sql = timestrtotime_sql(self, expression) if expression.args.get("zone"): # If there is a timezone, produce an expression like: # CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC' # If you dont have AT TIME ZONE 'UTC', wrapping that expression in another cast back to DATETIME2 just drops the timezone information return self.sql(exp.AtTimeZone(this=sql, zone=exp.Literal.string("UTC"))) return sql class TSQL(Dialect): LOG_BASE_FIRST = False TYPED_DIVISION = True CONCAT_COALESCE = True NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False TIME_FORMAT = "'yyyy-mm-dd hh:mm:ss'" EXPRESSION_METADATA = EXPRESSION_METADATA.copy() DATE_PART_MAPPING = { **Dialect.DATE_PART_MAPPING, "QQ": "QUARTER", "M": "MONTH", "Y": "DAYOFYEAR", "WW": "WEEK", "N": "MINUTE", "SS": "SECOND", "MCS": "MICROSECOND", "TZOFFSET": "TIMEZONE_MINUTE", "TZ": "TIMEZONE_MINUTE", "ISO_WEEK": "WEEKISO", "ISOWK": "WEEKISO", "ISOWW": "WEEKISO", } TIME_MAPPING = { "year": "%Y", "dayofyear": "%j", "day": "%d", "dy": "%d", "y": "%Y", "week": "%W", "ww": "%W", "wk": "%W", "isowk": "%V", "isoww": "%V", "iso_week": "%V", "hour": "%h", "hh": "%I", "minute": "%M", "mi": "%M", "n": "%M", "second": "%S", "ss": "%S", "s": "%-S", "millisecond": "%f", "ms": "%f", "weekday": "%w", "dw": "%w", "month": "%m", "mm": "%M", "m": "%-M", "Y": "%Y", "YYYY": "%Y", "YY": "%y", "MMMM": "%B", "MMM": "%b", "MM": "%m", "M": "%-m", "dddd": "%A", "dd": "%d", "d": "%-d", "HH": "%H", "H": "%-H", "h": "%-I", "ffffff": "%f", "yyyy": "%Y", "yy": "%y", } CONVERT_FORMAT_MAPPING = { "0": "%b %d %Y %-I:%M%p", "1": "%m/%d/%y", "2": "%y.%m.%d", "3": "%d/%m/%y", "4": "%d.%m.%y", "5": "%d-%m-%y", "6": "%d %b %y", "7": "%b %d, %y", "8": "%H:%M:%S", "9": "%b %d %Y %-I:%M:%S:%f%p", "10": "mm-dd-yy", "11": "yy/mm/dd", "12": "yymmdd", "13": "%d %b %Y %H:%M:ss:%f", "14": "%H:%M:%S:%f", "20": "%Y-%m-%d %H:%M:%S", "21": "%Y-%m-%d %H:%M:%S.%f", "22": "%m/%d/%y %-I:%M:%S %p", "23": "%Y-%m-%d", "24": "%H:%M:%S", "25": "%Y-%m-%d %H:%M:%S.%f", "100": "%b %d %Y %-I:%M%p", "101": "%m/%d/%Y", "102": "%Y.%m.%d", "103": "%d/%m/%Y", "104": "%d.%m.%Y", "105": "%d-%m-%Y", "106": "%d %b %Y", "107": "%b %d, %Y", "108": "%H:%M:%S", "109": "%b %d %Y %-I:%M:%S:%f%p", "110": "%m-%d-%Y", "111": "%Y/%m/%d", "112": "%Y%m%d", "113": "%d %b %Y %H:%M:%S:%f", "114": "%H:%M:%S:%f", "120": "%Y-%m-%d %H:%M:%S", "121": "%Y-%m-%d %H:%M:%S.%f", "126": "%Y-%m-%dT%H:%M:%S.%f", } FORMAT_TIME_MAPPING = { "y": "%B %Y", "d": "%m/%d/%Y", "H": "%-H", "h": "%-I", "s": "%Y-%m-%d %H:%M:%S", "D": "%A,%B,%Y", "f": "%A,%B,%Y %-I:%M %p", "F": "%A,%B,%Y %-I:%M:%S %p", "g": "%m/%d/%Y %-I:%M %p", "G": "%m/%d/%Y %-I:%M:%S %p", "M": "%B %-d", "m": "%B %-d", "O": "%Y-%m-%dT%H:%M:%S", "u": "%Y-%M-%D %H:%M:%S%z", "U": "%A, %B %D, %Y %H:%M:%S%z", "T": "%-I:%M:%S %p", "t": "%-I:%M", "Y": "%a %Y", } class Tokenizer(tokens.Tokenizer): IDENTIFIERS = [("[", "]"), '"'] QUOTES = ["'", '"'] HEX_STRINGS = [("0x", ""), ("0X", "")] VAR_SINGLE_TOKENS = {"@", "$", "#"} KEYWORDS = { **tokens.Tokenizer.KEYWORDS, "CLUSTERED INDEX": TokenType.INDEX, "DATETIME2": TokenType.DATETIME2, "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, "DECLARE": TokenType.DECLARE, "EXEC": TokenType.EXECUTE, "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, "GO": TokenType.COMMAND, "IMAGE": TokenType.IMAGE, "MONEY": TokenType.MONEY, "NONCLUSTERED INDEX": TokenType.INDEX, "NTEXT": TokenType.TEXT, "OPTION": TokenType.OPTION, "OUTPUT": TokenType.RETURNING, "PRINT": TokenType.COMMAND, "PROC": TokenType.PROCEDURE, "REAL": TokenType.FLOAT, "ROWVERSION": TokenType.ROWVERSION, "SMALLDATETIME": TokenType.SMALLDATETIME, "SMALLMONEY": TokenType.SMALLMONEY, "SQL_VARIANT": TokenType.VARIANT, "SYSTEM_USER": TokenType.CURRENT_USER, "TOP": TokenType.TOP, "TIMESTAMP": TokenType.ROWVERSION, "TINYINT": TokenType.UTINYINT, "UNIQUEIDENTIFIER": TokenType.UUID, "UPDATE STATISTICS": TokenType.COMMAND, "XML": TokenType.XML, } KEYWORDS.pop("/*+") COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.END} - {TokenType.EXECUTE} Parser = TSQLParser class Generator(generator.Generator): LIMIT_IS_TOP = True QUERY_HINTS = False RETURNING_END = False NVL2_SUPPORTED = False ALTER_TABLE_INCLUDE_COLUMN_KEYWORD = False LIMIT_FETCH = "FETCH" COMPUTED_COLUMN_WITH_TYPE = False CTE_RECURSIVE_KEYWORD_REQUIRED = False ENSURE_BOOLS = True NULL_ORDERING_SUPPORTED = None SUPPORTS_SINGLE_ARG_CONCAT = False TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" SUPPORTS_SELECT_INTO = True JSON_PATH_BRACKETED_KEY_SUPPORTED = False SUPPORTS_TO_NUMBER = False SET_OP_MODIFIERS = False COPY_PARAMS_EQ_REQUIRED = True PARSE_JSON_NAME = None EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False ALTER_SET_WRAPPED = True ALTER_SET_TYPE = "" EXPRESSIONS_WITHOUT_NESTED_CTES = { exp.Create, exp.Delete, exp.Insert, exp.Intersect, exp.Except, exp.Merge, exp.Select, exp.Subquery, exp.Union, exp.Update, } SUPPORTED_JSON_PATH_PARTS = { exp.JSONPathKey, exp.JSONPathRoot, exp.JSONPathSubscript, } TYPE_MAPPING = { **generator.Generator.TYPE_MAPPING, exp.DType.BOOLEAN: "BIT", exp.DType.DATETIME2: "DATETIME2", exp.DType.DECIMAL: "NUMERIC", exp.DType.DOUBLE: "FLOAT", exp.DType.INT: "INTEGER", exp.DType.ROWVERSION: "ROWVERSION", exp.DType.TEXT: "VARCHAR(MAX)", exp.DType.TIMESTAMP: "DATETIME2", exp.DType.TIMESTAMPNTZ: "DATETIME2", exp.DType.TIMESTAMPTZ: "DATETIMEOFFSET", exp.DType.SMALLDATETIME: "SMALLDATETIME", exp.DType.UTINYINT: "TINYINT", exp.DType.VARIANT: "SQL_VARIANT", exp.DType.UUID: "UNIQUEIDENTIFIER", } TYPE_MAPPING.pop(exp.DType.NCHAR) TYPE_MAPPING.pop(exp.DType.NVARCHAR) TRANSFORMS = { **generator.Generator.TRANSFORMS, exp.AnyValue: any_value_to_max_sql, exp.Atan2: rename_func("ATN2"), exp.ArrayToString: rename_func("STRING_AGG"), exp.AutoIncrementColumnConstraint: lambda *_: "IDENTITY", exp.Ceil: rename_func("CEILING"), exp.Chr: rename_func("CHAR"), exp.DateAdd: date_delta_sql("DATEADD"), exp.CTE: transforms.preprocess([qualify_derived_table_outputs]), exp.CurrentDate: rename_func("GETDATE"), exp.CurrentTimestamp: rename_func("GETDATE"), exp.CurrentTimestampLTZ: rename_func("SYSDATETIMEOFFSET"), exp.DateStrToDate: datestrtodate_sql, exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, exp.GroupConcat: _string_agg_sql, exp.If: rename_func("IIF"), exp.JSONExtract: _json_extract_sql, exp.JSONExtractScalar: _json_extract_sql, exp.LastDay: lambda self, e: self.func("EOMONTH", e.this), exp.Ln: rename_func("LOG"), exp.Max: max_or_greatest, exp.MD5: lambda self, e: self.func("HASHBYTES", exp.Literal.string("MD5"), e.this), exp.Min: min_or_least, exp.NumberToStr: _format_sql, exp.Repeat: rename_func("REPLICATE"), exp.CurrentSchema: rename_func("SCHEMA_NAME"), exp.Select: transforms.preprocess( [ transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins, transforms.eliminate_qualify, transforms.unnest_generate_date_array_using_recursive_cte, ] ), exp.Stddev: rename_func("STDEV"), exp.StrPosition: lambda self, e: strposition_sql( self, e, func_name="CHARINDEX", supports_position=True ), exp.Subquery: transforms.preprocess([qualify_derived_table_outputs]), exp.SHA: lambda self, e: self.func("HASHBYTES", exp.Literal.string("SHA1"), e.this), exp.SHA1Digest: lambda self, e: self.func( "HASHBYTES", exp.Literal.string("SHA1"), e.this ), exp.SHA2: lambda self, e: self.func( "HASHBYTES", exp.Literal.string(f"SHA2_{e.args.get('length', 256)}"), e.this ), exp.TemporaryProperty: lambda self, e: "", exp.TimeStrToTime: _timestrtotime_sql, exp.TimeToStr: _format_sql, exp.Trim: trim_sql, exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), exp.TimestampTrunc: lambda self, e: self.func("DATETRUNC", e.unit, e.this), exp.Trunc: lambda self, e: self.func( "ROUND", e.this, e.args.get("decimals") or exp.Literal.number(0), exp.Literal.number(1), ), exp.Uuid: lambda *_: "NEWID()", exp.DateFromParts: rename_func("DATEFROMPARTS"), } TRANSFORMS.pop(exp.ReturnsProperty) PROPERTIES_LOCATION = { **generator.Generator.PROPERTIES_LOCATION, exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, } def scope_resolution(self, rhs: str, scope_name: str) -> str: return f"{scope_name}::{rhs}" def select_sql(self, expression: exp.Select) -> str: limit = expression.args.get("limit") offset = expression.args.get("offset") if isinstance(limit, exp.Fetch) and not offset: # Dialects like Oracle can FETCH directly from a row set but # T-SQL requires an ORDER BY + OFFSET clause in order to FETCH offset = exp.Offset(expression=exp.Literal.number(0)) expression.set("offset", offset) if offset: if not expression.args.get("order"): # ORDER BY is required in order to use OFFSET in a query, so we use # a noop order by, since we don't really care about the order. # See: https://www.microsoftpressstore.com/articles/article.aspx?p=2314819 expression.order_by(exp.select(exp.null()).subquery(), copy=False) if isinstance(limit, exp.Limit): # TOP and OFFSET can't be combined, we need use FETCH instead of TOP # we replace here because otherwise TOP would be generated in select_sql limit.replace(exp.Fetch(direction="FIRST", count=limit.expression)) return super().select_sql(expression) def convert_sql(self, expression: exp.Convert) -> str: name = "TRY_CONVERT" if expression.args.get("safe") else "CONVERT" return self.func( name, expression.this, expression.expression, expression.args.get("style") ) def queryoption_sql(self, expression: exp.QueryOption) -> str: option = self.sql(expression, "this") value = self.sql(expression, "expression") if value: optional_equal_sign = "= " if option in OPTIONS_THAT_REQUIRE_EQUAL else "" return f"{option} {optional_equal_sign}{value}" return option def lateral_op(self, expression: exp.Lateral) -> str: cross_apply = expression.args.get("cross_apply") if cross_apply is True: return "CROSS APPLY" if cross_apply is False: return "OUTER APPLY" # TODO: perhaps we can check if the parent is a Join and transpile it appropriately self.unsupported("LATERAL clause is not supported.") return "LATERAL" def splitpart_sql(self: TSQL.Generator, expression: exp.SplitPart) -> str: this = expression.this split_count = len(this.name.split(".")) delimiter = expression.args.get("delimiter") part_index = expression.args.get("part_index") if ( not all(isinstance(arg, exp.Literal) for arg in (this, delimiter, part_index)) or (delimiter and delimiter.name != ".") or not part_index or split_count > 4 ): self.unsupported( "SPLIT_PART can be transpiled to PARSENAME only for '.' delimiter and literal values" ) return "" return self.func( "PARSENAME", this, exp.Literal.number(split_count + 1 - part_index.to_py()) ) def extract_sql(self, expression: exp.Extract) -> str: part = expression.this name = DATE_PART_UNMAPPING.get(part.name.upper()) or part return self.func("DATEPART", name, expression.expression) def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: nano = expression.args.get("nano") if nano is not None: nano.pop() self.unsupported("Specifying nanoseconds is not supported in TIMEFROMPARTS.") if expression.args.get("fractions") is None: expression.set("fractions", exp.Literal.number(0)) if expression.args.get("precision") is None: expression.set("precision", exp.Literal.number(0)) return rename_func("TIMEFROMPARTS")(self, expression) def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: zone = expression.args.get("zone") if zone is not None: zone.pop() self.unsupported("Time zone is not supported in DATETIMEFROMPARTS.") nano = expression.args.get("nano") if nano is not None: nano.pop() self.unsupported("Specifying nanoseconds is not supported in DATETIMEFROMPARTS.") if expression.args.get("milli") is None: expression.set("milli", exp.Literal.number(0)) return rename_func("DATETIMEFROMPARTS")(self, expression) def setitem_sql(self, expression: exp.SetItem) -> str: this = expression.this if isinstance(this, exp.EQ) and not isinstance(this.left, exp.Parameter): # T-SQL does not use '=' in SET command, except when the LHS is a variable. return f"{self.sql(this.left)} {self.sql(this.right)}" return super().setitem_sql(expression) def boolean_sql(self, expression: exp.Boolean) -> str: if type(expression.parent) in BIT_TYPES or isinstance( expression.find_ancestor(exp.Values, exp.Select), exp.Values ): return "1" if expression.this else "0" return "(1 = 1)" if expression.this else "(1 = 0)" def is_sql(self, expression: exp.Is) -> str: if isinstance(expression.expression, exp.Boolean): return self.binary(expression, "=") return self.binary(expression, "IS") def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: sql = self.sql(expression, "this") properties = expression.args.get("properties") if sql[:1] != "#" and any( isinstance(prop, exp.TemporaryProperty) for prop in (properties.expressions if properties else []) ): sql = f"[#{sql[1:]}" if sql.startswith("[") else f"#{sql}" return sql def create_sql(self, expression: exp.Create) -> str: kind = expression.kind exists = expression.args.get("exists") expression.set("exists", None) like_property = expression.find(exp.LikeProperty) if like_property: ctas_expression = like_property.this else: ctas_expression = expression.expression if kind == "VIEW": expression.this.set("catalog", None) with_ = expression.args.get("with_") if ctas_expression and with_: # We've already preprocessed the Create expression to bubble up any nested CTEs, # but CREATE VIEW actually requires the WITH clause to come after it so we need # to amend the AST by moving the CTEs to the CREATE VIEW statement's query. ctas_expression.set("with_", with_.pop()) table = expression.find(exp.Table) # Convert CTAS statement to SELECT .. INTO .. if kind == "TABLE" and ctas_expression: if isinstance(ctas_expression, exp.UNWRAPPED_QUERIES): ctas_expression = ctas_expression.subquery() properties = expression.args.get("properties") or exp.Properties() is_temp = any(isinstance(p, exp.TemporaryProperty) for p in properties.expressions) select_into = exp.select("*").from_(exp.alias_(ctas_expression, "temp", table=True)) select_into.set("into", exp.Into(this=table, temporary=is_temp)) if like_property: select_into.limit(0, copy=False) sql = self.sql(select_into) else: sql = super().create_sql(expression) if exists: identifier = self.sql(exp.Literal.string(exp.table_name(table) if table else "")) sql_with_ctes = self.prepend_ctes(expression, sql) sql_literal = self.sql(exp.Literal.string(sql_with_ctes)) if kind == "SCHEMA": return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = {identifier}) EXEC({sql_literal})""" elif kind == "TABLE": assert table where = exp.and_( exp.column("TABLE_NAME").eq(table.name), exp.column("TABLE_SCHEMA").eq(table.db) if table.db else None, exp.column("TABLE_CATALOG").eq(table.catalog) if table.catalog else None, ) return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE {where}) EXEC({sql_literal})""" elif kind == "INDEX": index = self.sql(exp.Literal.string(expression.this.text("this"))) return f"""IF NOT EXISTS (SELECT * FROM sys.indexes WHERE object_id = object_id({identifier}) AND name = {index}) EXEC({sql_literal})""" elif expression.args.get("replace"): sql = sql.replace("CREATE OR REPLACE ", "CREATE OR ALTER ", 1) return self.prepend_ctes(expression, sql) @generator.unsupported_args("unlogged", "expressions") def into_sql(self, expression: exp.Into) -> str: if expression.args.get("temporary"): # If the Into expression has a temporary property, push this down to the Identifier table = expression.find(exp.Table) if table and isinstance(table.this, exp.Identifier): table.this.set("temporary", True) return f"{self.seg('INTO')} {self.sql(expression, 'this')}" def count_sql(self, expression: exp.Count) -> str: func_name = "COUNT_BIG" if expression.args.get("big_int") else "COUNT" return rename_func(func_name)(self, expression) def datediff_sql(self, expression: exp.DateDiff) -> str: func_name = "DATEDIFF_BIG" if expression.args.get("big_int") else "DATEDIFF" return date_delta_sql(func_name)(self, expression) def offset_sql(self, expression: exp.Offset) -> str: return f"{super().offset_sql(expression)} ROWS" def version_sql(self, expression: exp.Version) -> str: name = "SYSTEM_TIME" if expression.name == "TIMESTAMP" else expression.name this = f"FOR {name}" expr = expression.expression kind = expression.text("kind") if kind in ("FROM", "BETWEEN"): args = expr.expressions sep = "TO" if kind == "FROM" else "AND" expr_sql = f"{self.sql(seq_get(args, 0))} {sep} {self.sql(seq_get(args, 1))}" else: expr_sql = self.sql(expr) expr_sql = f" {expr_sql}" if expr_sql else "" return f"{this} {kind}{expr_sql}" def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: table = expression.args.get("table") table = f"{table} " if table else "" return f"RETURNS {table}{self.sql(expression, 'this')}" def returning_sql(self, expression: exp.Returning) -> str: into = self.sql(expression, "into") into = self.seg(f"INTO {into}") if into else "" return f"{self.seg('OUTPUT')} {self.expressions(expression, flat=True)}{into}" def transaction_sql(self, expression: exp.Transaction) -> str: this = self.sql(expression, "this") this = f" {this}" if this else "" mark = self.sql(expression, "mark") mark = f" WITH MARK {mark}" if mark else "" return f"BEGIN TRANSACTION{this}{mark}" def commit_sql(self, expression: exp.Commit) -> str: this = self.sql(expression, "this") this = f" {this}" if this else "" durability = expression.args.get("durability") durability = ( f" WITH (DELAYED_DURABILITY = {'ON' if durability else 'OFF'})" if durability is not None else "" ) return f"COMMIT TRANSACTION{this}{durability}" def rollback_sql(self, expression: exp.Rollback) -> str: this = self.sql(expression, "this") this = f" {this}" if this else "" return f"ROLLBACK TRANSACTION{this}" def identifier_sql(self, expression: exp.Identifier) -> str: identifier = super().identifier_sql(expression) if expression.args.get("global_"): identifier = f"##{identifier}" elif expression.args.get("temporary"): identifier = f"#{identifier}" return identifier def constraint_sql(self, expression: exp.Constraint) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression, flat=True, sep=" ") return f"CONSTRAINT {this} {expressions}" def length_sql(self, expression: exp.Length) -> str: return self._uncast_text(expression, "LEN") def right_sql(self, expression: exp.Right) -> str: return self._uncast_text(expression, "RIGHT") def left_sql(self, expression: exp.Left) -> str: return self._uncast_text(expression, "LEFT") def _uncast_text(self, expression: exp.Expr, name: str) -> str: this = expression.this if isinstance(this, exp.Cast) and this.is_type(exp.DType.TEXT): this_sql = self.sql(this, "this") else: this_sql = self.sql(this) expression_sql = self.sql(expression, "expression") return self.func(name, this_sql, expression_sql if expression_sql else None) def partition_sql(self, expression: exp.Partition) -> str: return f"WITH (PARTITIONS({self.expressions(expression, flat=True)}))" def alter_sql(self, expression: exp.Alter) -> str: action = seq_get(expression.args.get("actions") or [], 0) if isinstance(action, exp.AlterRename): return f"EXEC sp_rename '{self.sql(expression.this)}', '{action.this.name}'" return super().alter_sql(expression) def drop_sql(self, expression: exp.Drop) -> str: if expression.args["kind"] == "VIEW": expression.this.set("catalog", None) return super().drop_sql(expression) def options_modifier(self, expression: exp.Expr) -> str: options = self.expressions(expression, key="options") return f" OPTION{self.wrap(options)}" if options else "" def dpipe_sql(self, expression: exp.DPipe) -> str: return self.sql( reduce(lambda x, y: exp.Add(this=x, expression=y), expression.flatten()) ) def isascii_sql(self, expression: exp.IsAscii) -> str: return f"(PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, {self.sql(expression.this)}) = 0)" def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: this = super().columndef_sql(expression, sep) default = self.sql(expression, "default") default = f" = {default}" if default else "" output = self.sql(expression, "output") output = f" {output}" if output else "" return f"{this}{default}{output}" def coalesce_sql(self, expression: exp.Coalesce) -> str: func_name = "ISNULL" if expression.args.get("is_null") else "COALESCE" return rename_func(func_name)(self, expression) def storedprocedure_sql(self, expression: exp.StoredProcedure) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression) expressions = ( self.wrap(expressions) if expression.args.get("wrapped") else f" {expressions}" ) return f"{this}{expressions}" if expressions.strip() != "" else this def ifblock_sql(self, expression: exp.IfBlock) -> str: this = self.sql(expression, "this") true = self.sql(expression, "true") true = f" {true}" if true else " " false = self.sql(expression, "false") false = f"; ELSE BEGIN {false}" if false else "" return f"IF {this} BEGIN{true}{false}" def whileblock_sql(self, expression: exp.WhileBlock) -> str: this = self.sql(expression, "this") body = self.sql(expression, "body") body = f" {body}" if body else " " return f"WHILE {this} BEGIN{body}" def execute_sql(self, expression: exp.Execute) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression) expressions = f" {expressions}" if expressions else "" return f"EXECUTE {this}{expressions}" def executesql_sql(self, expression: exp.ExecuteSql) -> str: return self.execute_sql(expression) ================================================ FILE: sqlglot/diff.py ================================================ """ .. include:: ../posts/sql_diff.md ---- """ from __future__ import annotations import typing as t from collections import defaultdict from dataclasses import dataclass from heapq import heappop, heappush from itertools import chain from sqlglot import Dialect, expressions as exp from sqlglot.helper import seq_get if t.TYPE_CHECKING: from collections.abc import Iterator, Sequence from sqlglot.dialects.dialect import DialectType @dataclass(frozen=True) class Insert: """Indicates that a new node has been inserted""" expression: exp.Expr @dataclass(frozen=True) class Remove: """Indicates that an existing node has been removed""" expression: exp.Expr @dataclass(frozen=True) class Move: """Indicates that an existing node's position within the tree has changed""" source: exp.Expr target: exp.Expr @dataclass(frozen=True) class Update: """Indicates that an existing node has been updated""" source: exp.Expr target: exp.Expr @dataclass(frozen=True) class Keep: """Indicates that an existing node hasn't been changed""" source: exp.Expr target: exp.Expr if t.TYPE_CHECKING: from sqlglot._typing import T Edit = t.Union[Insert, Remove, Move, Update, Keep] def diff( source: exp.Expr, target: exp.Expr, matchings: t.List[t.Tuple[exp.Expr, exp.Expr]] | None = None, delta_only: bool = False, **kwargs: t.Any, ) -> t.List[Edit]: """ Returns the list of changes between the source and the target expressions. Examples: >>> from sqlglot import parse_one >>> diff(parse_one("a + b"), parse_one("a + c")) # doctest: +SKIP [...] Args: source: the source expression. target: the target expression against which the diff should be calculated. matchings: the list of pre-matched node pairs which is used to help the algorithm's heuristics produce better results for subtrees that are known by a caller to be matching. Note: expression references in this list must refer to the same node objects that are referenced in the source / target trees. delta_only: excludes all `Keep` nodes from the diff. kwargs: additional arguments to pass to the ChangeDistiller instance. Returns: the list of Insert, Remove, Move, Update and Keep objects for each node in the source and the target expression trees. This list represents a sequence of steps needed to transform the source expression tree into the target one. """ matchings = matchings or [] def compute_node_mappings( old_nodes: tuple[exp.Expr, ...], new_nodes: tuple[exp.Expr, ...] ) -> t.Dict[int, exp.Expr]: node_mapping = {} for old_node, new_node in zip(reversed(old_nodes), reversed(new_nodes)): new_node._hash = hash(new_node) node_mapping[id(old_node)] = new_node return node_mapping # if the source and target have any shared objects, that means there's an issue with the ast # the algorithm won't work because the parent / hierarchies will be inaccurate source_nodes = tuple(source.walk()) target_nodes = tuple(target.walk()) source_ids = {id(n) for n in source_nodes} target_ids = {id(n) for n in target_nodes} copy = ( len(source_nodes) != len(source_ids) or len(target_nodes) != len(target_ids) or source_ids & target_ids ) source_copy = source.copy() if copy else source target_copy = target.copy() if copy else target try: # We cache the hash of each new node here to speed up equality comparisons. If the input # trees aren't copied, these hashes will be evicted before returning the edit script. if copy and matchings: source_mapping = compute_node_mappings(source_nodes, tuple(source_copy.walk())) target_mapping = compute_node_mappings(target_nodes, tuple(target_copy.walk())) matchings = [(source_mapping[id(s)], target_mapping[id(t)]) for s, t in matchings] else: for node in chain(reversed(source_nodes), reversed(target_nodes)): node._hash = hash(node) edit_script = ChangeDistiller(**kwargs).diff( source_copy, target_copy, matchings=matchings, delta_only=delta_only, ) finally: if not copy: for node in chain(source_nodes, target_nodes): node._hash = None return edit_script # The expression types for which Update edits are allowed. UPDATABLE_EXPRESSION_TYPES = ( exp.Alias, exp.Boolean, exp.Column, exp.DataType, exp.Lambda, exp.Literal, exp.Table, exp.Window, ) IGNORED_LEAF_EXPRESSION_TYPES = (exp.Identifier,) class ChangeDistiller: """ The implementation of the Change Distiller algorithm described by Beat Fluri and Martin Pinzger in their paper https://ieeexplore.ieee.org/document/4339230, which in turn is based on the algorithm by Chawathe et al. described in http://ilpubs.stanford.edu:8090/115/1/1995-46.pdf. """ def __init__(self, f: float = 0.6, t: float = 0.6, dialect: DialectType = None) -> None: self.f = f self.t = t self._sql_generator = Dialect.get_or_raise(dialect).generator(comments=False) def diff( self, source: exp.Expr, target: exp.Expr, matchings: t.List[t.Tuple[exp.Expr, exp.Expr]] | None = None, delta_only: bool = False, ) -> t.List[Edit]: matchings = matchings or [] pre_matched_nodes = {id(s): id(t) for s, t in matchings} self._source = source self._target = target self._source_index = { id(n): n for n in self._source.bfs() if not isinstance(n, IGNORED_LEAF_EXPRESSION_TYPES) } self._target_index = { id(n): n for n in self._target.bfs() if not isinstance(n, IGNORED_LEAF_EXPRESSION_TYPES) } self._unmatched_source_nodes = set(self._source_index) - set(pre_matched_nodes) self._unmatched_target_nodes = set(self._target_index) - set(pre_matched_nodes.values()) self._bigram_histo_cache: t.Dict[int, t.DefaultDict[str, int]] = {} matching_set = self._compute_matching_set() | set(pre_matched_nodes.items()) return self._generate_edit_script(dict(matching_set), delta_only) def _generate_edit_script(self, matchings: t.Dict[int, int], delta_only: bool) -> t.List[Edit]: edit_script: t.List[Edit] = [] for removed_node_id in self._unmatched_source_nodes: edit_script.append(Remove(self._source_index[removed_node_id])) for inserted_node_id in self._unmatched_target_nodes: edit_script.append(Insert(self._target_index[inserted_node_id])) for kept_source_node_id, kept_target_node_id in matchings.items(): source_node = self._source_index[kept_source_node_id] target_node = self._target_index[kept_target_node_id] identical_nodes = source_node == target_node if not isinstance(source_node, UPDATABLE_EXPRESSION_TYPES) or identical_nodes: if identical_nodes: source_parent = source_node.parent target_parent = target_node.parent if ( (source_parent and not target_parent) or (not source_parent and target_parent) or ( source_parent and target_parent and matchings.get(id(source_parent)) != id(target_parent) ) ): edit_script.append(Move(source=source_node, target=target_node)) else: edit_script.extend( self._generate_move_edits(source_node, target_node, matchings) ) source_non_expression_leaves = dict(_get_non_expression_leaves(source_node)) target_non_expression_leaves = dict(_get_non_expression_leaves(target_node)) if source_non_expression_leaves != target_non_expression_leaves: edit_script.append(Update(source_node, target_node)) elif not delta_only: edit_script.append(Keep(source_node, target_node)) else: edit_script.append(Update(source_node, target_node)) return edit_script def _generate_move_edits( self, source: exp.Expr, target: exp.Expr, matchings: t.Dict[int, int] ) -> t.List[Move]: source_args = [id(e) for e in _expression_only_args(source)] target_args = [id(e) for e in _expression_only_args(target)] args_lcs = set( _lcs(source_args, target_args, lambda l, r: matchings.get(t.cast(int, l)) == r) ) move_edits = [] for a in source_args: if a not in args_lcs and a not in self._unmatched_source_nodes: move_edits.append( Move(source=self._source_index[a], target=self._target_index[matchings[a]]) ) return move_edits def _compute_matching_set(self) -> t.Set[t.Tuple[int, int]]: leaves_matching_set = self._compute_leaf_matching_set() matching_set = leaves_matching_set.copy() ordered_unmatched_source_nodes = { id(n): None for n in self._source.bfs() if id(n) in self._unmatched_source_nodes } ordered_unmatched_target_nodes = { id(n): None for n in self._target.bfs() if id(n) in self._unmatched_target_nodes } for source_node_id in ordered_unmatched_source_nodes: for target_node_id in ordered_unmatched_target_nodes: source_node = self._source_index[source_node_id] target_node = self._target_index[target_node_id] if _is_same_type(source_node, target_node): source_leaf_ids = {id(l) for l in _get_expression_leaves(source_node)} target_leaf_ids = {id(l) for l in _get_expression_leaves(target_node)} max_leaves_num = max(len(source_leaf_ids), len(target_leaf_ids)) if max_leaves_num: common_leaves_num = sum( 1 if s in source_leaf_ids and t in target_leaf_ids else 0 for s, t in leaves_matching_set ) leaf_similarity_score = common_leaves_num / max_leaves_num else: leaf_similarity_score = 0.0 adjusted_t = ( self.t if min(len(source_leaf_ids), len(target_leaf_ids)) > 4 else 0.4 ) if leaf_similarity_score >= 0.8 or ( leaf_similarity_score >= adjusted_t and self._dice_coefficient(source_node, target_node) >= self.f ): matching_set.add((source_node_id, target_node_id)) self._unmatched_source_nodes.remove(source_node_id) self._unmatched_target_nodes.remove(target_node_id) ordered_unmatched_target_nodes.pop(target_node_id, None) break return matching_set def _compute_leaf_matching_set(self) -> t.Set[t.Tuple[int, int]]: candidate_matchings: t.List[t.Tuple[float, int, int, exp.Expr, exp.Expr]] = [] source_expression_leaves = list(_get_expression_leaves(self._source)) target_expression_leaves = list(_get_expression_leaves(self._target)) for source_leaf in source_expression_leaves: for target_leaf in target_expression_leaves: if _is_same_type(source_leaf, target_leaf): similarity_score = self._dice_coefficient(source_leaf, target_leaf) if similarity_score >= self.f: heappush( candidate_matchings, ( -similarity_score, -_parent_similarity_score(source_leaf, target_leaf), len(candidate_matchings), source_leaf, target_leaf, ), ) # Pick best matchings based on the highest score matching_set = set() while candidate_matchings: _, _, _, source_leaf, target_leaf = heappop(candidate_matchings) if ( id(source_leaf) in self._unmatched_source_nodes and id(target_leaf) in self._unmatched_target_nodes ): matching_set.add((id(source_leaf), id(target_leaf))) self._unmatched_source_nodes.remove(id(source_leaf)) self._unmatched_target_nodes.remove(id(target_leaf)) return matching_set def _dice_coefficient(self, source: exp.Expr, target: exp.Expr) -> float: source_histo = self._bigram_histo(source) target_histo = self._bigram_histo(target) total_grams = sum(source_histo.values()) + sum(target_histo.values()) if not total_grams: return 1.0 if source == target else 0.0 overlap_len = 0 overlapping_grams = set(source_histo) & set(target_histo) for g in overlapping_grams: overlap_len += min(source_histo[g], target_histo[g]) return 2 * overlap_len / total_grams def _bigram_histo(self, expression: exp.Expr) -> t.DefaultDict[str, int]: if id(expression) in self._bigram_histo_cache: return self._bigram_histo_cache[id(expression)] expression_str = self._sql_generator.generate(expression) count = max(0, len(expression_str) - 1) bigram_histo: t.DefaultDict[str, int] = defaultdict(int) for i in range(count): bigram_histo[expression_str[i : i + 2]] += 1 self._bigram_histo_cache[id(expression)] = bigram_histo return bigram_histo def _get_expression_leaves(expression: exp.Expr) -> Iterator[exp.Expr]: has_child_exprs = False for node in expression.iter_expressions(): if not isinstance(node, IGNORED_LEAF_EXPRESSION_TYPES): has_child_exprs = True yield from _get_expression_leaves(node) if not has_child_exprs: yield expression def _get_non_expression_leaves(expression: exp.Expr) -> Iterator[tuple[str, t.Any]]: for arg, value in expression.args.items(): if ( value is None or isinstance(value, exp.Expr) or (isinstance(value, list) and isinstance(seq_get(value, 0), exp.Expr)) ): continue yield (arg, value) def _is_same_type(source: exp.Expr, target: exp.Expr) -> bool: if type(source) is type(target): if isinstance(source, exp.Join): return source.args.get("side") == target.args.get("side") if isinstance(source, exp.Anonymous): return source.this == target.this return True return False def _parent_similarity_score(source: t.Optional[exp.Expr], target: t.Optional[exp.Expr]) -> int: if source is None or target is None or type(source) is not type(target): return 0 return 1 + _parent_similarity_score(source.parent, target.parent) def _expression_only_args(expression: exp.Expr) -> Iterator[exp.Expr]: yield from ( arg for arg in expression.iter_expressions() if not isinstance(arg, IGNORED_LEAF_EXPRESSION_TYPES) ) def _lcs( seq_a: Sequence[T], seq_b: Sequence[T], equal: t.Callable[[T, T], bool] ) -> Sequence[t.Optional[T]]: """Calculates the longest common subsequence""" len_a = len(seq_a) len_b = len(seq_b) lcs_result = [[None] * (len_b + 1) for i in range(len_a + 1)] for i in range(len_a + 1): for j in range(len_b + 1): if i == 0 or j == 0: lcs_result[i][j] = [] # type: ignore elif equal(seq_a[i - 1], seq_b[j - 1]): lcs_result[i][j] = lcs_result[i - 1][j - 1] + [seq_a[i - 1]] # type: ignore else: lcs_result[i][j] = ( lcs_result[i - 1][j] if len(lcs_result[i - 1][j]) > len(lcs_result[i][j - 1]) # type: ignore else lcs_result[i][j - 1] ) return lcs_result[len_a][len_b] # type: ignore ================================================ FILE: sqlglot/errors.py ================================================ from __future__ import annotations import typing as t from enum import auto from collections.abc import Sequence from sqlglot.helper import AutoName # ANSI escape codes for error formatting ANSI_UNDERLINE = "\033[4m" ANSI_RESET = "\033[0m" ERROR_MESSAGE_CONTEXT_DEFAULT = 100 class ErrorLevel(AutoName): IGNORE = auto() """Ignore all errors.""" WARN = auto() """Log all errors.""" RAISE = auto() """Collect all errors and raise a single exception.""" IMMEDIATE = auto() """Immediately raise an exception on the first error found.""" class SqlglotError(Exception): pass class UnsupportedError(SqlglotError): pass class ParseError(SqlglotError): def __init__( self, message: str, errors: t.Optional[t.List[t.Dict[str, t.Any]]] = None, ): super().__init__(message) self.errors = errors or [] @classmethod def new( cls, message: str, description: t.Optional[str] = None, line: t.Optional[int] = None, col: t.Optional[int] = None, start_context: t.Optional[str] = None, highlight: t.Optional[str] = None, end_context: t.Optional[str] = None, into_expression: t.Optional[str] = None, ) -> ParseError: return cls( message, [ { "description": description, "line": line, "col": col, "start_context": start_context, "highlight": highlight, "end_context": end_context, "into_expression": into_expression, } ], ) class TokenError(SqlglotError): pass class OptimizeError(SqlglotError): pass class SchemaError(SqlglotError): pass class ExecuteError(SqlglotError): pass def highlight_sql( sql: str, positions: t.List[t.Tuple[int, int]], context_length: int = ERROR_MESSAGE_CONTEXT_DEFAULT, ) -> t.Tuple[str, str, str, str]: """ Highlight a SQL string using ANSI codes at the given positions. Args: sql: The complete SQL string. positions: List of (start, end) tuples where both start and end are inclusive 0-based indexes. For example, to highlight "foo" in "SELECT foo", use (7, 9). The positions will be sorted and de-duplicated if they overlap. context_length: Number of characters to show before the first highlight and after the last highlight. Returns: A tuple of (formatted_sql, start_context, highlight, end_context) where: - formatted_sql: The SQL with ANSI underline codes applied to highlighted sections - start_context: Plain text before the first highlight - highlight: Plain text from the first highlight start to the last highlight end, including any non-highlighted text in between (no ANSI) - end_context: Plain text after the last highlight Note: If positions is empty, raises a ValueError. """ if not positions: raise ValueError("positions must contain at least one (start, end) tuple") start_context = "" end_context = "" first_highlight_start = 0 formatted_parts = [] previous_part_end = 0 sorted_positions = sorted(positions, key=lambda pos: pos[0]) if sorted_positions[0][0] > 0: first_highlight_start = sorted_positions[0][0] start_context = sql[max(0, first_highlight_start - context_length) : first_highlight_start] formatted_parts.append(start_context) previous_part_end = first_highlight_start for start, end in sorted_positions: highlight_start = max(start, previous_part_end) highlight_end = end + 1 if highlight_start >= highlight_end: continue # Skip invalid or overlapping highlights if highlight_start > previous_part_end: formatted_parts.append(sql[previous_part_end:highlight_start]) formatted_parts.append(f"{ANSI_UNDERLINE}{sql[highlight_start:highlight_end]}{ANSI_RESET}") previous_part_end = highlight_end if previous_part_end < len(sql): end_context = sql[previous_part_end : previous_part_end + context_length] formatted_parts.append(end_context) formatted_sql = "".join(formatted_parts) highlight = sql[first_highlight_start:previous_part_end] return formatted_sql, start_context, highlight, end_context def concat_messages(errors: Sequence[t.Any], maximum: int) -> str: msg = [str(e) for e in errors[:maximum]] remaining = len(errors) - maximum if remaining > 0: msg.append(f"... and {remaining} more") return "\n\n".join(msg) def merge_errors(errors: Sequence[ParseError]) -> list[dict[str, t.Any]]: return [e_dict for error in errors for e_dict in error.errors] ================================================ FILE: sqlglot/executor/__init__.py ================================================ """ .. include:: ../../posts/python_sql_engine.md ---- """ from __future__ import annotations import logging import time import typing as t from sqlglot import exp from sqlglot.errors import ExecuteError from sqlglot.executor.python import PythonExecutor from sqlglot.executor.table import Table, ensure_tables from sqlglot.helper import dict_depth from sqlglot.optimizer import optimize from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.planner import Plan from sqlglot.schema import ensure_schema, flatten_schema, nested_get, nested_set logger = logging.getLogger("sqlglot") if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType from sqlglot.expressions import Expr from sqlglot.schema import Schema def execute( sql: str | Expr, schema: t.Optional[t.Dict | Schema] = None, dialect: DialectType = None, tables: t.Optional[t.Dict] = None, ) -> Table: """ Run a sql query against data. Args: sql: a sql statement. schema: database schema. This can either be an instance of `Schema` or a mapping in one of the following forms: 1. {table: {col: type}} 2. {db: {table: {col: type}}} 3. {catalog: {db: {table: {col: type}}}} dialect: the SQL dialect to apply during parsing (eg. "spark", "hive", "presto", "mysql"). tables: additional tables to register. Returns: Simple columnar data structure. """ tables_ = ensure_tables(tables, dialect=dialect) if not schema: schema = {} flattened_tables = flatten_schema(tables_.mapping, depth=dict_depth(tables_.mapping)) for keys in flattened_tables: table = nested_get(tables_.mapping, *zip(keys, keys)) assert table is not None for column in table.columns: value = table[0][column] column_type = ( annotate_types(exp.convert(value), dialect=dialect).type or type(value).__name__ ) nested_set(schema, [*keys, column], column_type) schema = ensure_schema(schema, dialect=dialect) if tables_.supported_table_args and tables_.supported_table_args != schema.supported_table_args: raise ExecuteError("Tables must support the same table args as schema") now = time.time() expression = optimize(sql, schema, leave_tables_isolated=True, dialect=dialect) logger.debug("Optimization finished: %f", time.time() - now) logger.debug("Optimized SQL: %s", expression.sql(pretty=True)) plan = Plan(expression) logger.debug("Logical Plan: %s", plan) now = time.time() result = PythonExecutor(tables=tables_).execute(plan) logger.debug("Query finished: %f", time.time() - now) return result ================================================ FILE: sqlglot/executor/context.py ================================================ from __future__ import annotations import typing as t from sqlglot.executor.env import ENV if t.TYPE_CHECKING: from sqlglot.executor.table import Table, TableIter class Context: """ Execution context for sql expressions. Context is used to hold relevant data tables which can then be queried on with eval. References to columns can either be scalar or vectors. When set_row is used, column references evaluate to scalars while set_range evaluates to vectors. This allows convenient and efficient evaluation of aggregation functions. """ def __init__(self, tables: t.Dict[str, Table], env: t.Optional[t.Dict] = None) -> None: """ Args tables: representing the scope of the current execution context. env: dictionary of functions within the execution context. """ self.tables = tables self._table: t.Optional[Table] = None self.range_readers = {name: table.range_reader for name, table in self.tables.items()} self.row_readers = {name: table.reader for name, table in tables.items()} self.env = {**ENV, **(env or {}), "scope": self.row_readers} def eval(self, code): return eval(code, self.env) def eval_tuple(self, codes): return tuple(self.eval(code) for code in codes) @property def table(self) -> Table: if self._table is None: self._table = list(self.tables.values())[0] for other in self.tables.values(): if self._table.columns != other.columns: raise Exception("Columns are different.") if len(self._table.rows) != len(other.rows): raise Exception("Rows are different.") return self._table def add_columns(self, *columns: str) -> None: for table in self.tables.values(): table.add_columns(*columns) @property def columns(self) -> t.Tuple: return self.table.columns def __iter__(self): self.env["scope"] = self.row_readers for i in range(len(self.table.rows)): for table in self.tables.values(): reader = table[i] yield reader, self def table_iter(self, table: str) -> TableIter: self.env["scope"] = self.row_readers return iter(self.tables[table]) def filter(self, condition) -> None: rows = [reader.row for reader, _ in self if self.eval(condition)] for table in self.tables.values(): table.rows = rows def sort(self, key) -> None: def sort_key(row: t.Tuple) -> t.Tuple: self.set_row(row) return tuple((t is None, t) for t in self.eval_tuple(key)) self.table.rows.sort(key=sort_key) def set_row(self, row: t.Tuple) -> None: for table in self.tables.values(): table.reader.row = row self.env["scope"] = self.row_readers def set_index(self, index: int) -> None: for table in self.tables.values(): table[index] self.env["scope"] = self.row_readers def set_range(self, start: int, end: int) -> None: for name in self.tables: self.range_readers[name].range = range(start, end) self.env["scope"] = self.range_readers def __contains__(self, table: str) -> bool: return table in self.tables ================================================ FILE: sqlglot/executor/env.py ================================================ import datetime import inspect import re import statistics from functools import wraps from sqlglot import exp from sqlglot.generator import Generator from sqlglot.helper import PYTHON_VERSION, is_int, seq_get class reverse_key: def __init__(self, obj): self.obj = obj def __eq__(self, other): return other.obj == self.obj def __lt__(self, other): return other.obj < self.obj def filter_nulls(func, empty_null=True): @wraps(func) def _func(values): filtered = tuple(v for v in values if v is not None) if not filtered and empty_null: return None return func(filtered) return _func def null_if_any(*required): """ Decorator that makes a function return `None` if any of the `required` arguments are `None`. This also supports decoration with no arguments, e.g.: @null_if_any def foo(a, b): ... In which case all arguments are required. """ f = None if len(required) == 1 and callable(required[0]): f = required[0] required = () def decorator(func): if required: required_indices = [ i for i, param in enumerate(inspect.signature(func).parameters) if param in required ] def predicate(*args): return any(args[i] is None for i in required_indices) else: def predicate(*args): return any(a is None for a in args) @wraps(func) def _func(*args): if predicate(*args): return None return func(*args) return _func if f: return decorator(f) return decorator @null_if_any("this", "substr") def str_position(this, substr, position=None): position = position - 1 if position is not None else position return this.find(substr, position) + 1 @null_if_any("this") def substring(this, start=None, length=None): if start is None: return this elif start == 0: return "" elif start < 0: start = len(this) + start else: start -= 1 end = None if length is None else start + length return this[start:end] @null_if_any def cast(this, to): if to == exp.DType.DATE: if isinstance(this, datetime.datetime): return this.date() if isinstance(this, datetime.date): return this if isinstance(this, str): return datetime.date.fromisoformat(this) if to == exp.DType.TIME: if isinstance(this, datetime.datetime): return this.time() if isinstance(this, datetime.time): return this if isinstance(this, str): return datetime.time.fromisoformat(this) if to in (exp.DType.DATETIME, exp.DType.TIMESTAMP): if isinstance(this, datetime.datetime): return this if isinstance(this, datetime.date): return datetime.datetime(this.year, this.month, this.day) if isinstance(this, str): return datetime.datetime.fromisoformat(this) if to == exp.DType.BOOLEAN: return bool(this) if to in exp.DataType.TEXT_TYPES: return str(this) if to in {exp.DType.FLOAT, exp.DType.DOUBLE}: return float(this) if to in exp.DataType.NUMERIC_TYPES: return int(this) raise NotImplementedError(f"Casting {this} to '{to}' not implemented.") def ordered(this, desc, nulls_first): if desc: return reverse_key(this) return this @null_if_any def interval(this, unit): plural = unit + "S" if plural in Generator.TIME_PART_SINGULARS: unit = plural return datetime.timedelta(**{unit.lower(): float(this)}) @null_if_any("this", "expression") def arraytostring(this, expression, null=None): return expression.join(x for x in (x if x is not None else null for x in this) if x is not None) @null_if_any("this", "expression") def jsonextract(this, expression): for path_segment in expression: if isinstance(this, dict): this = this.get(path_segment) elif isinstance(this, list) and is_int(path_segment): this = seq_get(this, int(path_segment)) else: raise NotImplementedError(f"Unable to extract value for {this} at {path_segment}.") if this is None: break return this ENV = { "exp": exp, # aggs "ARRAYAGG": list, "ARRAYUNIQUEAGG": filter_nulls(lambda acc: list(set(acc))), "AVG": filter_nulls(statistics.fmean if PYTHON_VERSION >= (3, 8) else statistics.mean), # type: ignore "COUNT": filter_nulls(lambda acc: sum(1 for _ in acc), False), "MAX": filter_nulls(max), "MIN": filter_nulls(min), "SUM": filter_nulls(sum), # scalar functions "ABS": null_if_any(lambda this: abs(this)), "ADD": null_if_any(lambda e, this: e + this), "ARRAYANY": null_if_any(lambda arr, func: any(func(e) for e in arr)), "ARRAYTOSTRING": arraytostring, "BETWEEN": null_if_any(lambda this, low, high: low <= this and this <= high), "BITWISEAND": null_if_any(lambda this, e: this & e), "BITWISELEFTSHIFT": null_if_any(lambda this, e: this << e), "BITWISEOR": null_if_any(lambda this, e: this | e), "BITWISERIGHTSHIFT": null_if_any(lambda this, e: this >> e), "BITWISEXOR": null_if_any(lambda this, e: this ^ e), "CAST": cast, "COALESCE": lambda *args: next((a for a in args if a is not None), None), "CONCAT": null_if_any(lambda *args: "".join(args)), "SAFECONCAT": null_if_any(lambda *args: "".join(str(arg) for arg in args)), "CONCATWS": null_if_any(lambda this, *args: this.join(args)), "DATEDIFF": null_if_any(lambda this, expression, *_: (this - expression).days), "DATESTRTODATE": null_if_any(lambda arg: datetime.date.fromisoformat(arg)), "DIV": null_if_any(lambda e, this: e / this), "DOT": null_if_any(lambda e, this: e[this]), "EQ": null_if_any(lambda this, e: this == e), "EXTRACT": null_if_any(lambda this, e: getattr(e, this)), "GT": null_if_any(lambda this, e: this > e), "GTE": null_if_any(lambda this, e: this >= e), "IF": lambda predicate, true, false: true if predicate else false, "INTDIV": null_if_any(lambda e, this: e // this), "INTERVAL": interval, "JSONEXTRACT": jsonextract, "LEFT": null_if_any(lambda this, e: this[:e]), "LIKE": null_if_any( lambda this, e: bool(re.match(e.replace("_", ".").replace("%", ".*"), this)) ), "LOWER": null_if_any(lambda arg: arg.lower()), "LT": null_if_any(lambda this, e: this < e), "LTE": null_if_any(lambda this, e: this <= e), "MAP": null_if_any(lambda *args: dict(zip(*args))), # type: ignore "MOD": null_if_any(lambda e, this: e % this), "MUL": null_if_any(lambda e, this: e * this), "NEQ": null_if_any(lambda this, e: this != e), "ORD": null_if_any(ord), "ORDERED": ordered, "POW": pow, "RIGHT": null_if_any(lambda this, e: this[-e:]), "ROUND": null_if_any(lambda this, decimals=None, truncate=None: round(this, ndigits=decimals)), "STRPOSITION": str_position, "SUB": null_if_any(lambda e, this: e - this), "SUBSTRING": substring, "TIMESTRTOTIME": null_if_any(lambda arg: datetime.datetime.fromisoformat(arg)), "UPPER": null_if_any(lambda arg: arg.upper()), "YEAR": null_if_any(lambda arg: arg.year), "MONTH": null_if_any(lambda arg: arg.month), "DAY": null_if_any(lambda arg: arg.day), "CURRENTDATETIME": datetime.datetime.now, "CURRENTTIMESTAMP": datetime.datetime.now, "CURRENTTIME": datetime.datetime.now, "CURRENTDATE": datetime.date.today, "STRFTIME": null_if_any(lambda fmt, arg: datetime.datetime.fromisoformat(arg).strftime(fmt)), "STRTOTIME": null_if_any(lambda arg, format: datetime.datetime.strptime(arg, format)), "TRIM": null_if_any(lambda this, e=None: this.strip(e)), "STRUCT": lambda *args: { args[x]: args[x + 1] for x in range(0, len(args), 2) if (args[x + 1] is not None and args[x] is not None) }, "UNIXTOTIME": null_if_any( lambda arg: datetime.datetime.fromtimestamp(arg, datetime.timezone.utc) ), } ================================================ FILE: sqlglot/executor/python.py ================================================ import collections import itertools import math from sqlglot import exp, generator, planner, tokens from sqlglot.dialects.dialect import Dialect, inline_array_sql from sqlglot.errors import ExecuteError from sqlglot.executor.context import Context from sqlglot.executor.env import ENV from sqlglot.executor.table import RowReader, Table from sqlglot.helper import subclasses class PythonExecutor: def __init__(self, env=None, tables=None): self.generator = Python().generator(identify=True, comments=False) self.env = {**ENV, **(env or {})} self.tables = tables or {} def execute(self, plan): finished = set() queue = set(plan.leaves) contexts = {} while queue: node = queue.pop() try: context = self.context( { name: table for dep in node.dependencies for name, table in contexts[dep].tables.items() } ) if isinstance(node, planner.Scan): contexts[node] = self.scan(node, context) elif isinstance(node, planner.Aggregate): contexts[node] = self.aggregate(node, context) elif isinstance(node, planner.Join): contexts[node] = self.join(node, context) elif isinstance(node, planner.Sort): contexts[node] = self.sort(node, context) elif isinstance(node, planner.SetOperation): contexts[node] = self.set_operation(node, context) else: raise NotImplementedError finished.add(node) for dep in node.dependents: if all(d in contexts for d in dep.dependencies): queue.add(dep) for dep in node.dependencies: if all(d in finished for d in dep.dependents): contexts.pop(dep) except Exception as e: raise ExecuteError(f"Step '{node.id}' failed: {e}") from e root = plan.root return contexts[root].tables[root.name] def generate(self, expression): """Convert a SQL expression into literal Python code and compile it into bytecode.""" if not expression: return None sql = self.generator.generate(expression) return compile(sql, sql, "eval", optimize=2) def generate_tuple(self, expressions): """Convert an array of SQL expressions into tuple of Python byte code.""" if not expressions: return tuple() return tuple(self.generate(expression) for expression in expressions) def context(self, tables): return Context(tables, env=self.env) def table(self, expressions): return Table( expression.alias_or_name if isinstance(expression, exp.Expr) else expression for expression in expressions ) def scan(self, step, context): source = step.source if source and isinstance(source, exp.Expr): source = source.name or source.alias if source is None: context, table_iter = self.static() elif source in context: if not step.projections and not step.condition: return self.context({step.name: context.tables[source]}) table_iter = context.table_iter(source) else: context, table_iter = self.scan_table(step) return self.context({step.name: self._project_and_filter(context, step, table_iter)}) def _project_and_filter(self, context, step, table_iter): sink = self.table(step.projections if step.projections else context.columns) condition = self.generate(step.condition) projections = self.generate_tuple(step.projections) for reader in table_iter: if len(sink) >= step.limit: break if condition and not context.eval(condition): continue if projections: sink.append(context.eval_tuple(projections)) else: sink.append(reader.row) return sink def static(self): return self.context({}), [RowReader(())] def scan_table(self, step): table = self.tables.find(step.source) context = self.context({step.source.alias_or_name: table}) return context, iter(table) def join(self, step, context): source = step.source_name source_table = context.tables[source] source_context = self.context({source: source_table}) column_ranges = {source: range(0, len(source_table.columns))} for name, join in step.joins.items(): table = context.tables[name] start = max(r.stop for r in column_ranges.values()) column_ranges[name] = range(start, len(table.columns) + start) join_context = self.context({name: table}) if join.get("source_key"): table = self.hash_join(join, source_context, join_context) else: table = self.nested_loop_join(join, source_context, join_context) source_context = self.context( { name: Table(table.columns, table.rows, column_range) for name, column_range in column_ranges.items() } ) condition = self.generate(join["condition"]) if condition: source_context.filter(condition) if not step.condition and not step.projections: return source_context sink = self._project_and_filter( source_context, step, (reader for reader, _ in iter(source_context)), ) if step.projections: return self.context({step.name: sink}) else: return self.context( { name: Table(table.columns, sink.rows, table.column_range) for name, table in source_context.tables.items() } ) def nested_loop_join(self, _join, source_context, join_context): table = Table(source_context.columns + join_context.columns) for reader_a, _ in source_context: for reader_b, _ in join_context: table.append(reader_a.row + reader_b.row) return table def hash_join(self, join, source_context, join_context): source_key = self.generate_tuple(join["source_key"]) join_key = self.generate_tuple(join["join_key"]) left = join.get("side") == "LEFT" right = join.get("side") == "RIGHT" results = collections.defaultdict(lambda: ([], [])) for reader, ctx in source_context: results[ctx.eval_tuple(source_key)][0].append(reader.row) for reader, ctx in join_context: results[ctx.eval_tuple(join_key)][1].append(reader.row) table = Table(source_context.columns + join_context.columns) nulls = [(None,) * len(join_context.columns if left else source_context.columns)] for a_group, b_group in results.values(): if left: b_group = b_group or nulls elif right: a_group = a_group or nulls for a_row, b_row in itertools.product(a_group, b_group): table.append(a_row + b_row) return table def aggregate(self, step, context): group_by = self.generate_tuple(step.group.values()) aggregations = self.generate_tuple(step.aggregations) operands = self.generate_tuple(step.operands) if operands: operand_table = Table(self.table(step.operands).columns) for reader, ctx in context: operand_table.append(ctx.eval_tuple(operands)) for i, (a, b) in enumerate(zip(context.table.rows, operand_table.rows)): context.table.rows[i] = a + b width = len(context.columns) context.add_columns(*operand_table.columns) operand_table = Table( context.columns, context.table.rows, range(width, width + len(operand_table.columns)), ) context = self.context( { None: operand_table, **context.tables, } ) context.sort(group_by) group = None start = 0 end = 1 length = len(context.table) table = self.table(list(step.group) + step.aggregations) def add_row(): table.append(group + context.eval_tuple(aggregations)) if length: for i in range(length): context.set_index(i) key = context.eval_tuple(group_by) group = key if group is None else group end += 1 if key != group: context.set_range(start, end - 2) add_row() group = key start = end - 2 if len(table.rows) >= step.limit: break if i == length - 1: context.set_range(start, end - 1) add_row() elif step.limit > 0 and not group_by: context.set_range(0, 0) table.append(context.eval_tuple(aggregations)) context = self.context({step.name: table, **{name: table for name in context.tables}}) if step.projections or step.condition: return self.scan(step, context) return context def sort(self, step, context): projections = self.generate_tuple(step.projections) projection_columns = [p.alias_or_name for p in step.projections] all_columns = list(context.columns) + projection_columns sink = self.table(all_columns) for reader, ctx in context: sink.append(reader.row + ctx.eval_tuple(projections)) sort_ctx = self.context( { None: sink, **{table: sink for table in context.tables}, } ) sort_ctx.sort(self.generate_tuple(step.key)) if not math.isinf(step.limit): sort_ctx.table.rows = sort_ctx.table.rows[0 : step.limit] output = Table( projection_columns, rows=[r[len(context.columns) : len(all_columns)] for r in sort_ctx.table.rows], ) return self.context({step.name: output}) def set_operation(self, step, context): left = context.tables[step.left] right = context.tables[step.right] sink = self.table(left.columns) if issubclass(step.op, exp.Intersect): sink.rows = list(set(left.rows).intersection(set(right.rows))) elif issubclass(step.op, exp.Except): sink.rows = list(set(left.rows).difference(set(right.rows))) elif issubclass(step.op, exp.Union) and step.distinct: sink.rows = list(set(left.rows).union(set(right.rows))) else: sink.rows = left.rows + right.rows if not math.isinf(step.limit): sink.rows = sink.rows[0 : step.limit] return self.context({step.name: sink}) def _ordered_py(self, expression): this = self.sql(expression, "this") desc = "True" if expression.args.get("desc") else "False" nulls_first = "True" if expression.args.get("nulls_first") else "False" return f"ORDERED({this}, {desc}, {nulls_first})" def _rename(self, e): try: values = list(e.args.values()) if len(values) == 1: values = values[0] if not isinstance(values, list): return self.func(e.key, values) return self.func(e.key, *values) if isinstance(e, exp.Func) and e.is_var_len_args: args = itertools.chain.from_iterable(x if isinstance(x, list) else [x] for x in values) return self.func(e.key, *args) return self.func(e.key, *values) except Exception as ex: raise Exception(f"Could not rename {repr(e)}") from ex def _case_sql(self, expression): this = self.sql(expression, "this") chain = self.sql(expression, "default") or "None" for e in reversed(expression.args["ifs"]): true = self.sql(e, "true") condition = self.sql(e, "this") condition = f"{this} = ({condition})" if this else condition chain = f"{true} if {condition} else ({chain})" return chain def _lambda_sql(self, e: exp.Lambda) -> str: names = {e.name.lower() for e in e.expressions} e = e.transform( lambda n: ( exp.var(n.name) if isinstance(n, exp.Identifier) and n.name.lower() in names else n ) ).assert_is(exp.Lambda) return f"lambda {self.expressions(e, flat=True)}: {self.sql(e, 'this')}" def _div_sql(self: generator.Generator, e: exp.Div) -> str: denominator = self.sql(e, "expression") if e.args.get("safe"): denominator += " or None" sql = f"DIV({self.sql(e, 'this')}, {denominator})" if e.args.get("typed"): sql = f"int({sql})" return sql class Python(Dialect): class Tokenizer(tokens.Tokenizer): STRING_ESCAPES = ["\\"] class Generator(generator.Generator): TRANSFORMS = { **{klass: _rename for klass in subclasses(exp.__name__, exp.Binary)}, **{klass: _rename for klass in exp.ALL_FUNCTIONS}, exp.Case: _case_sql, exp.Alias: lambda self, e: self.sql(e.this), exp.Array: inline_array_sql, exp.And: lambda self, e: self.binary(e, "and"), exp.Between: _rename, exp.Boolean: lambda self, e: "True" if e.this else "False", exp.Cast: lambda self, e: f"CAST({self.sql(e.this)}, exp.DType.{e.args['to']})", exp.Column: lambda self, e: ( f"scope[{self.sql(e, 'table') or None}][{self.sql(e.this)}]" ), exp.Concat: lambda self, e: self.func( "SAFECONCAT" if e.args.get("safe") else "CONCAT", *e.expressions ), exp.Distinct: lambda self, e: f"set({self.sql(e, 'this')})", exp.Div: _div_sql, exp.Extract: lambda self, e: ( f"EXTRACT('{e.name.lower()}', {self.sql(e, 'expression')})" ), exp.In: lambda self, e: ( f"{self.sql(e, 'this')} in {{{self.expressions(e, flat=True)}}}" ), exp.Interval: lambda self, e: f"INTERVAL({self.sql(e.this)}, '{self.sql(e.unit)}')", exp.Is: lambda self, e: ( self.binary(e, "==") if isinstance(e.this, exp.Literal) else self.binary(e, "is") ), exp.JSONExtract: lambda self, e: self.func(e.key, e.this, e.expression, *e.expressions), exp.JSONPath: lambda self, e: f"[{','.join(self.sql(p) for p in e.expressions[1:])}]", exp.JSONPathKey: lambda self, e: f"'{self.sql(e.this)}'", exp.JSONPathSubscript: lambda self, e: f"'{e.this}'", exp.Lambda: _lambda_sql, exp.Not: lambda self, e: f"not {self.sql(e.this)}", exp.Null: lambda *_: "None", exp.Or: lambda self, e: self.binary(e, "or"), exp.Ordered: _ordered_py, exp.Star: lambda *_: "1", } ================================================ FILE: sqlglot/executor/table.py ================================================ from __future__ import annotations import typing as t from sqlglot.dialects.dialect import DialectType from sqlglot.helper import dict_depth from sqlglot.schema import AbstractMappingSchema, normalize_name class Table: def __init__( self, columns: t.Any = None, rows: t.Any = None, column_range: t.Optional[range] = None, ) -> None: self.columns: t.Any = tuple(columns) if columns is not None else () self.column_range = column_range self.reader: t.Any = RowReader(self.columns, self.column_range) self.rows: t.Any = rows or [] if rows: assert len(rows[0]) == len(self.columns) self.range_reader = RangeReader(self) def add_columns(self, *columns: str) -> None: self.columns += columns if self.column_range: self.column_range = range( self.column_range.start, self.column_range.stop + len(columns) ) self.reader = RowReader(self.columns, self.column_range) def append(self, row: t.Any) -> None: assert len(row) == len(self.columns) self.rows.append(row) def pop(self) -> None: self.rows.pop() def to_pylist(self) -> t.List: return [dict(zip(self.columns, row)) for row in self.rows] @property def width(self) -> int: return len(self.columns) def __len__(self) -> int: return len(self.rows) def __iter__(self) -> TableIter: return TableIter(self) def __getitem__(self, index: int) -> RowReader: self.reader.row = self.rows[index] return self.reader def __repr__(self) -> str: columns = tuple( column for i, column in enumerate(self.columns) if not self.column_range or i in self.column_range ) widths = {column: len(column) for column in columns} lines = [" ".join(column for column in columns)] for i, row in enumerate(self): if i > 10: break lines.append( " ".join( str(row[column]).rjust(widths[column])[0 : widths[column]] for column in columns ) ) return "\n".join(lines) class TableIter: def __init__(self, table: Table) -> None: self.table = table self.index = -1 def __iter__(self) -> TableIter: return self def __next__(self) -> RowReader: self.index += 1 if self.index < len(self.table): return self.table[self.index] raise StopIteration class RangeReader: def __init__(self, table: t.Any = None) -> None: self.table: t.Any = table self.range = range(0) def __len__(self) -> int: return len(self.range) def __getitem__(self, column: str): return (self.table[i][column] for i in self.range) class RowReader: def __init__(self, columns=None, column_range=None): self.columns = ( {column: i for i, column in enumerate(columns) if not column_range or i in column_range} if columns is not None else {} ) self.row = None def __getitem__(self, column): return self.row[self.columns[column]] class Tables(AbstractMappingSchema): pass def ensure_tables(d: t.Optional[t.Dict], dialect: DialectType = None) -> Tables: return Tables(_ensure_tables(d, dialect=dialect)) def _ensure_tables(d: t.Optional[t.Dict], dialect: DialectType = None) -> t.Dict: if not d: return {} depth = dict_depth(d) if depth > 1: return { normalize_name(k, dialect=dialect, is_table=True).name: _ensure_tables( v, dialect=dialect ) for k, v in d.items() } result = {} for table_name, table in d.items(): table_name = normalize_name(table_name, dialect=dialect).name if isinstance(table, Table): result[table_name] = table else: table = [ { normalize_name(column_name, dialect=dialect).name: value for column_name, value in row.items() } for row in table ] column_names = tuple(column_name for column_name in table[0]) if table else () rows = [tuple(row[name] for name in column_names) for row in table] result[table_name] = Table(columns=column_names, rows=rows) return result ================================================ FILE: sqlglot/expressions/__init__.py ================================================ # ruff: noqa: F405 """ ## Exprs Every AST node in SQLGlot is represented by a subclass of `Expr`. This module contains the implementation of all supported `Expr` types. Additionally, it exposes a number of helper functions, which are mainly used to programmatically build SQL expressions, such as `sqlglot.expressions.select`. ---- """ import typing as t from sqlglot.expressions.core import * # noqa: F401,F403 from sqlglot.expressions.datatypes import * # noqa: F401,F403 from sqlglot.expressions.constraints import * # noqa: F401,F403 from sqlglot.expressions.properties import * # noqa: F401,F403 from sqlglot.expressions.query import * # noqa: F401,F403 from sqlglot.expressions.ddl import * # noqa: F401,F403 from sqlglot.expressions.dml import * # noqa: F401,F403 from sqlglot.expressions.math import * # noqa: F401,F403 from sqlglot.expressions.string import * # noqa: F401,F403 from sqlglot.expressions.temporal import * # noqa: F401,F403 from sqlglot.expressions.aggregate import * # noqa: F401,F403 from sqlglot.expressions.array import * # noqa: F401,F403 from sqlglot.expressions.json import * # noqa: F401,F403 from sqlglot.expressions.functions import * # noqa: F401,F403 from sqlglot.expressions.builders import * # noqa: F401,F403 # Explicitly import private helpers (not exported by star imports) from sqlglot.expressions.core import ( # noqa: F401,E402 Expression, _apply_builder, _apply_child_list_builder, _apply_list_builder, _apply_conjunction_builder, _apply_set_operation, _combine, _wrap, _is_wrong_expression, _to_s, ) from sqlglot.expressions.query import _apply_cte_builder # noqa: F401,E402 from sqlglot.expressions.dml import DML # noqa: F401,E402 from sqlglot.expressions.array import _ExplodeOuter # noqa: F401,E402 from sqlglot.helper import subclasses ALL_FUNCTIONS = subclasses(__name__, Func, {AggFunc, Anonymous, Func}) FUNCTION_BY_NAME = {name: func for func in ALL_FUNCTIONS for name in func.sql_names()} def _init_subclasses(cls: t.Type[Expr]) -> None: # mypyc fires __init_subclass__ before setting compiled ClassVar attributes, # so required_args may have been computed from the inherited arg_types rather # than the class-specific one. Recompute now that all modules are fully loaded. for sub in cls.__subclasses__(): sub.required_args = {k for k, v in sub.arg_types.items() if v} _init_subclasses(sub) _init_subclasses(Expr) ================================================ FILE: sqlglot/expressions/aggregate.py ================================================ """sqlglot expressions - aggregate, window, and statistical functions.""" from __future__ import annotations from sqlglot.expressions.core import Expression, Func, AggFunc, Binary class AIAgg(Expression, AggFunc): arg_types = {"this": True, "expression": True} _sql_names = ["AI_AGG"] class AISummarizeAgg(Expression, AggFunc): _sql_names = ["AI_SUMMARIZE_AGG"] class AnyValue(Expression, AggFunc): pass class ApproximateSimilarity(Expression, AggFunc): _sql_names = ["APPROXIMATE_SIMILARITY", "APPROXIMATE_JACCARD_INDEX"] class ApproxPercentileAccumulate(Expression, AggFunc): pass class ApproxPercentileCombine(Expression, AggFunc): pass class ApproxPercentileEstimate(Expression, Func): arg_types = {"this": True, "percentile": True} class ApproxQuantiles(Expression, AggFunc): arg_types = {"this": True, "expression": False} class ApproxTopK(Expression, AggFunc): arg_types = {"this": True, "expression": False, "counters": False} class ApproxTopKAccumulate(Expression, AggFunc): arg_types = {"this": True, "expression": False} class ApproxTopKCombine(Expression, AggFunc): arg_types = {"this": True, "expression": False} class ApproxTopKEstimate(Expression, Func): arg_types = {"this": True, "expression": False} class ApproxTopSum(Expression, AggFunc): arg_types = {"this": True, "expression": True, "count": True} class ArgMax(Expression, AggFunc): arg_types = {"this": True, "expression": True, "count": False} _sql_names = ["ARG_MAX", "ARGMAX", "MAX_BY"] class ArgMin(Expression, AggFunc): arg_types = {"this": True, "expression": True, "count": False} _sql_names = ["ARG_MIN", "ARGMIN", "MIN_BY"] class ArrayAgg(Expression, AggFunc): arg_types = {"this": True, "nulls_excluded": False} class ArrayConcatAgg(Expression, AggFunc): pass class ArrayUnionAgg(Expression, AggFunc): pass class ArrayUniqueAgg(Expression, AggFunc): pass class Avg(Expression, AggFunc): pass class Corr(Expression, AggFunc, Binary): # Correlation divides by variance(column). If a column has 0 variance, the denominator # is 0 - some dialects return NaN (DuckDB) while others return NULL (Snowflake). # `null_on_zero_variance` is set to True at parse time for dialects that return NULL. arg_types = {"this": True, "expression": True, "null_on_zero_variance": False} class Count(Expression, AggFunc): arg_types = {"this": False, "expressions": False, "big_int": False} is_var_len_args = True class CountIf(Expression, AggFunc): _sql_names = ["COUNT_IF", "COUNTIF"] class CovarPop(Expression, AggFunc): arg_types = {"this": True, "expression": True} class CovarSamp(Expression, AggFunc): arg_types = {"this": True, "expression": True} class CumeDist(Expression, AggFunc): arg_types = {"expressions": False} is_var_len_args = True class DenseRank(Expression, AggFunc): arg_types = {"expressions": False} is_var_len_args = True class First(Expression, AggFunc): arg_types = {"this": True, "expression": False} class FirstValue(Expression, AggFunc): pass class GroupConcat(Expression, AggFunc): arg_types = {"this": True, "separator": False, "on_overflow": False} class Grouping(Expression, AggFunc): arg_types = {"expressions": True} is_var_len_args = True class GroupingId(Expression, AggFunc): arg_types = {"expressions": False} is_var_len_args = True class Kurtosis(Expression, AggFunc): pass class Lag(Expression, AggFunc): arg_types = {"this": True, "offset": False, "default": False} class Last(Expression, AggFunc): arg_types = {"this": True, "expression": False} class LastValue(Expression, AggFunc): pass class Lead(Expression, AggFunc): arg_types = {"this": True, "offset": False, "default": False} class LogicalAnd(Expression, AggFunc): _sql_names = ["LOGICAL_AND", "BOOL_AND", "BOOLAND_AGG"] class LogicalOr(Expression, AggFunc): _sql_names = ["LOGICAL_OR", "BOOL_OR", "BOOLOR_AGG"] class Max(Expression, AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class Median(Expression, AggFunc): pass class Min(Expression, AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class Minhash(Expression, AggFunc): arg_types = {"this": True, "expressions": True} is_var_len_args = True class MinhashCombine(Expression, AggFunc): pass class Mode(Expression, AggFunc): arg_types = {"this": False, "deterministic": False} class Ntile(Expression, AggFunc): arg_types = {"this": False} class NthValue(Expression, AggFunc): arg_types = {"this": True, "offset": True, "from_first": False} class ObjectAgg(Expression, AggFunc): arg_types = {"this": True, "expression": True} class PercentileCont(Expression, AggFunc): arg_types = {"this": True, "expression": False} class PercentileDisc(Expression, AggFunc): arg_types = {"this": True, "expression": False} PERCENTILES = (PercentileCont, PercentileDisc) class PercentRank(Expression, AggFunc): arg_types = {"expressions": False} is_var_len_args = True class Quantile(Expression, AggFunc): arg_types = {"this": True, "quantile": True} class ApproxQuantile(Quantile): arg_types = { "this": True, "quantile": True, "accuracy": False, "weight": False, "error_tolerance": False, } class Rank(Expression, AggFunc): arg_types = {"expressions": False} is_var_len_args = True class RegrAvgx(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrAvgy(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrCount(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrIntercept(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrR2(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrSlope(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrSxx(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrSxy(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrSyy(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrValx(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RegrValy(Expression, AggFunc): arg_types = {"this": True, "expression": True} class RowNumber(Expression, Func): arg_types = {"this": False} class Skewness(Expression, AggFunc): pass class Stddev(Expression, AggFunc): _sql_names = ["STDDEV", "STDEV"] class StddevPop(Expression, AggFunc): pass class StddevSamp(Expression, AggFunc): pass class Sum(Expression, AggFunc): pass class Variance(Expression, AggFunc): _sql_names = ["VARIANCE", "VARIANCE_SAMP", "VAR_SAMP"] class VariancePop(Expression, AggFunc): _sql_names = ["VARIANCE_POP", "VAR_POP"] ================================================ FILE: sqlglot/expressions/array.py ================================================ """sqlglot expressions - array, map, struct, and table-valued functions.""" from __future__ import annotations import typing as t from sqlglot.expressions.core import ( Expression, Expr, Func, Binary, to_identifier, ) from sqlglot.helper import trait from sqlglot.expressions.query import UDTF # Array creation / construction class Array(Expression, Func): arg_types = { "expressions": False, "bracket_notation": False, "struct_name_inheritance": False, } is_var_len_args = True class ArrayConstructCompact(Expression, Func): arg_types = {"expressions": False} is_var_len_args = True class List(Expression, Func): arg_types = {"expressions": False} is_var_len_args = True class ToArray(Expression, Func): pass # Array manipulation class ArrayAppend(Expression, Func): arg_types = {"this": True, "expression": True, "null_propagation": False} class ArrayCompact(Expression, Func): pass class ArrayConcat(Expression, Func): _sql_names = ["ARRAY_CONCAT", "ARRAY_CAT"] arg_types = {"this": True, "expressions": False, "null_propagation": False} is_var_len_args = True class ArrayFilter(Expression, Func): arg_types = {"this": True, "expression": True} _sql_names = ["FILTER", "ARRAY_FILTER"] class ArrayInsert(Expression, Func): arg_types = {"this": True, "position": True, "expression": True, "offset": False} class ArrayPrepend(Expression, Func): arg_types = {"this": True, "expression": True, "null_propagation": False} class ArrayRemove(Expression, Func): arg_types = {"this": True, "expression": True, "null_propagation": False} class ArrayRemoveAt(Expression, Func): arg_types = {"this": True, "position": True} class ArrayReverse(Expression, Func): pass class ArraySlice(Expression, Func): arg_types = {"this": True, "start": True, "end": False, "step": False, "zero_based": False} class ArraySort(Expression, Func): arg_types = {"this": True, "expression": False} class SortArray(Expression, Func): arg_types = {"this": True, "asc": False, "nulls_first": False} # Array predicates / search class ArrayAll(Expression, Func): arg_types = {"this": True, "expression": True} class ArrayAny(Expression, Func): arg_types = {"this": True, "expression": True} class ArrayContains(Expression, Binary, Func): arg_types = {"this": True, "expression": True, "ensure_variant": False, "check_null": False} _sql_names = ["ARRAY_CONTAINS", "ARRAY_HAS"] class ArrayContainsAll(Expression, Binary, Func): _sql_names = ["ARRAY_CONTAINS_ALL", "ARRAY_HAS_ALL"] class ArrayExcept(Expression, Func): arg_types = {"this": True, "expression": True, "is_multiset": False} class ArrayIntersect(Expression, Func): arg_types = {"expressions": True, "is_multiset": False} is_var_len_args = True _sql_names = ["ARRAY_INTERSECT", "ARRAY_INTERSECTION"] class ArrayOverlaps(Expression, Binary, Func): arg_types = {"this": True, "expression": True, "null_safe": False} class ArrayPosition(Expression, Binary, Func): arg_types = {"this": True, "expression": True, "zero_based": False} # Array properties class ArrayDistinct(Expression, Func): arg_types = {"this": True, "check_null": False} class ArrayFirst(Expression, Func): arg_types = {"this": True, "expression": False} class ArrayLast(Expression, Func): pass class ArrayMax(Expression, Func): pass class ArrayMin(Expression, Func): pass class ArraySize(Expression, Func): arg_types = {"this": True, "expression": False} _sql_names = ["ARRAY_SIZE", "ARRAY_LENGTH"] class ArraySum(Expression, Func): arg_types = {"this": True, "expression": False} # Array conversion / utility class ArraysZip(Expression, Func): arg_types = {"expressions": False} is_var_len_args = True class ArrayToString(Expression, Func): arg_types = { "this": True, "expression": True, "null": False, "null_is_empty": False, "null_delim_is_null": False, } _sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"] class Flatten(Expression, Func): pass class StringToArray(Expression, Func): arg_types = {"this": True, "expression": False, "null": False} _sql_names = ["STRING_TO_ARRAY", "SPLIT_BY_STRING", "STRTOK_TO_ARRAY"] # Higher-order / lambda class Apply(Expression, Func): arg_types = {"this": True, "expression": True} class Reduce(Expression, Func): arg_types = {"this": True, "initial": True, "merge": True, "finish": False} class Transform(Expression, Func): arg_types = {"this": True, "expression": True} # Table-valued / UDTF class GenerateSeries(Expression, Func): arg_types = {"start": True, "end": True, "step": False, "is_end_exclusive": False} class ExplodingGenerateSeries(GenerateSeries): pass class Generator(Expression, Func, UDTF): arg_types = {"rowcount": False, "timelimit": False} class Explode(Expression, Func, UDTF): arg_types = {"this": True, "expressions": False} is_var_len_args = True class Inline(Expression, Func): pass @trait class ExplodeOuter(Expr): pass class _ExplodeOuter(Explode, ExplodeOuter): _sql_names = ["EXPLODE_OUTER"] class Posexplode(Explode): pass class PosexplodeOuter(Posexplode, ExplodeOuter): pass class PositionalColumn(Expression): pass class Unnest(Expression, Func, UDTF): arg_types = { "expressions": True, "alias": False, "offset": False, "explode_array": False, } @property def selects(self) -> t.List[Expr]: columns = super().selects offset = self.args.get("offset") if offset: columns = columns + [to_identifier("offset") if offset is True else offset] return columns # Map class Map(Expression, Func): arg_types = {"keys": False, "values": False} @property def keys(self) -> t.List[Expr]: keys = self.args.get("keys") return keys.expressions if keys else [] @property def values(self) -> t.List[Expr]: values = self.args.get("values") return values.expressions if values else [] class MapCat(Expression, Func): arg_types = {"this": True, "expression": True} class MapContainsKey(Expression, Func): arg_types = {"this": True, "key": True} class MapDelete(Expression, Func): arg_types = {"this": True, "expressions": True} is_var_len_args = True class MapFromEntries(Expression, Func): pass class MapInsert(Expression, Func): arg_types = {"this": True, "key": False, "value": True, "update_flag": False} class MapKeys(Expression, Func): pass class MapPick(Expression, Func): arg_types = {"this": True, "expressions": True} is_var_len_args = True class MapSize(Expression, Func): pass class StarMap(Expression, Func): pass class ToMap(Expression, Func): pass class VarMap(Expression, Func): arg_types = {"keys": True, "values": True} is_var_len_args = True @property def keys(self) -> t.List[Expr]: return self.args["keys"].expressions @property def values(self) -> t.List[Expr]: return self.args["values"].expressions # Struct class Struct(Expression, Func): arg_types = {"expressions": False} is_var_len_args = True class StructExtract(Expression, Func): arg_types = {"this": True, "expression": True} # Geospatial class StDistance(Expression, Func): arg_types = {"this": True, "expression": True, "use_spheroid": False} class StPoint(Expression, Func): arg_types = {"this": True, "expression": True, "null": False} _sql_names = ["ST_POINT", "ST_MAKEPOINT"] ================================================ FILE: sqlglot/expressions/builders.py ================================================ """sqlglot expressions builders.""" from __future__ import annotations import re import typing as t from sqlglot._typing import E from sqlglot.helper import seq_get, ensure_collection, split_num_words from sqlglot.errors import ParseError, TokenError from sqlglot.expressions.core import ( Alias, Anonymous, Boolean, Column, Condition, EQ, Expr, Identifier, Literal, Null, Placeholder, TABLE_PARTS, Var, logger, SAFE_IDENTIFIER_RE, maybe_parse, maybe_copy, to_identifier, convert, alias_, column, ) from sqlglot.expressions.datatypes import DataType, DType, Interval from sqlglot.expressions.query import ( CTE, From, Schema, Select, Table, TableAlias, Tuple, Values, Where, With, ) from sqlglot.expressions.ddl import Alter, AlterRename, RenameColumn from sqlglot.expressions.dml import Delete, Insert, Merge, Update, When, Whens from sqlglot.expressions.functions import Case, Cast from sqlglot.expressions.array import Array if t.TYPE_CHECKING: from collections.abc import Sequence, Iterable from sqlglot.dialects.dialect import DialectType from sqlglot.expressions.core import ExpOrStr, Func from sqlglot.expressions.datatypes import DATA_TYPE from sqlglot.expressions.query import Query def select(*expressions: ExpOrStr, dialect: DialectType = None, **opts) -> Select: """ Initializes a syntax tree from one or multiple SELECT expressions. Example: >>> select("col1", "col2").from_("tbl").sql() 'SELECT col1, col2 FROM tbl' Args: *expressions: the SQL code string to parse as the expressions of a SELECT statement. If an Expr instance is passed, this is used as-is. dialect: the dialect used to parse the input expressions (in the case that an input expression is a SQL string). **opts: other options to use to parse the input expressions (again, in the case that an input expression is a SQL string). Returns: Select: the syntax tree for the SELECT statement. """ return Select().select(*expressions, dialect=dialect, **opts) def from_(expression: ExpOrStr, dialect: DialectType = None, **opts) -> Select: """ Initializes a syntax tree from a FROM expression. Example: >>> from_("tbl").select("col1", "col2").sql() 'SELECT col1, col2 FROM tbl' Args: *expression: the SQL code string to parse as the FROM expressions of a SELECT statement. If an Expr instance is passed, this is used as-is. dialect: the dialect used to parse the input expression (in the case that the input expression is a SQL string). **opts: other options to use to parse the input expressions (again, in the case that the input expression is a SQL string). Returns: Select: the syntax tree for the SELECT statement. """ return Select().from_(expression, dialect=dialect, **opts) def update( table: str | Table, properties: t.Optional[dict] = None, where: t.Optional[ExpOrStr] = None, from_: t.Optional[ExpOrStr] = None, with_: t.Optional[t.Dict[str, ExpOrStr]] = None, dialect: DialectType = None, **opts, ) -> Update: """ Creates an update statement. Example: >>> update("my_table", {"x": 1, "y": "2", "z": None}, from_="baz_cte", where="baz_cte.id > 1 and my_table.id = baz_cte.id", with_={"baz_cte": "SELECT id FROM foo"}).sql() "WITH baz_cte AS (SELECT id FROM foo) UPDATE my_table SET x = 1, y = '2', z = NULL FROM baz_cte WHERE baz_cte.id > 1 AND my_table.id = baz_cte.id" Args: properties: dictionary of properties to SET which are auto converted to sql objects eg None -> NULL where: sql conditional parsed into a WHERE statement from_: sql statement parsed into a FROM statement with_: dictionary of CTE aliases / select statements to include in a WITH clause. dialect: the dialect used to parse the input expressions. **opts: other options to use to parse the input expressions. Returns: Update: the syntax tree for the UPDATE statement. """ update_expr = Update(this=maybe_parse(table, into=Table, dialect=dialect)) if properties: update_expr.set( "expressions", [ EQ(this=maybe_parse(k, dialect=dialect, **opts), expression=convert(v)) for k, v in properties.items() ], ) if from_: update_expr.set( "from_", maybe_parse(from_, into=From, dialect=dialect, prefix="FROM", **opts), ) if isinstance(where, Condition): where = Where(this=where) if where: update_expr.set( "where", maybe_parse(where, into=Where, dialect=dialect, prefix="WHERE", **opts), ) if with_: cte_list = [ alias_(CTE(this=maybe_parse(qry, dialect=dialect, **opts)), alias, table=True) for alias, qry in with_.items() ] update_expr.set( "with_", With(expressions=cte_list), ) return update_expr def delete( table: ExpOrStr, where: t.Optional[ExpOrStr] = None, returning: t.Optional[ExpOrStr] = None, dialect: DialectType = None, **opts, ) -> Delete: """ Builds a delete statement. Example: >>> delete("my_table", where="id > 1").sql() 'DELETE FROM my_table WHERE id > 1' Args: where: sql conditional parsed into a WHERE statement returning: sql conditional parsed into a RETURNING statement dialect: the dialect used to parse the input expressions. **opts: other options to use to parse the input expressions. Returns: Delete: the syntax tree for the DELETE statement. """ delete_expr = Delete().delete(table, dialect=dialect, copy=False, **opts) if where: delete_expr = delete_expr.where(where, dialect=dialect, copy=False, **opts) if returning: delete_expr = delete_expr.returning(returning, dialect=dialect, copy=False, **opts) return delete_expr def insert( expression: ExpOrStr, into: ExpOrStr, columns: t.Optional[Sequence[str | Identifier]] = None, overwrite: t.Optional[bool] = None, returning: t.Optional[ExpOrStr] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Insert: """ Builds an INSERT statement. Example: >>> insert("VALUES (1, 2, 3)", "tbl").sql() 'INSERT INTO tbl VALUES (1, 2, 3)' Args: expression: the sql string or expression of the INSERT statement into: the tbl to insert data to. columns: optionally the table's column names. overwrite: whether to INSERT OVERWRITE or not. returning: sql conditional parsed into a RETURNING statement dialect: the dialect used to parse the input expressions. copy: whether to copy the expression. **opts: other options to use to parse the input expressions. Returns: Insert: the syntax tree for the INSERT statement. """ expr = maybe_parse(expression, dialect=dialect, copy=copy, **opts) this: Table | Schema = maybe_parse(into, into=Table, dialect=dialect, copy=copy, **opts) if columns: this = Schema(this=this, expressions=[to_identifier(c, copy=copy) for c in columns]) insert = Insert(this=this, expression=expr, overwrite=overwrite) if returning: insert = insert.returning(returning, dialect=dialect, copy=False, **opts) return insert def merge( *when_exprs: ExpOrStr, into: ExpOrStr, using: ExpOrStr, on: ExpOrStr, returning: t.Optional[ExpOrStr] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Merge: """ Builds a MERGE statement. Example: >>> merge("WHEN MATCHED THEN UPDATE SET col1 = source_table.col1", ... "WHEN NOT MATCHED THEN INSERT (col1) VALUES (source_table.col1)", ... into="my_table", ... using="source_table", ... on="my_table.id = source_table.id").sql() 'MERGE INTO my_table USING source_table ON my_table.id = source_table.id WHEN MATCHED THEN UPDATE SET col1 = source_table.col1 WHEN NOT MATCHED THEN INSERT (col1) VALUES (source_table.col1)' Args: *when_exprs: The WHEN clauses specifying actions for matched and unmatched rows. into: The target table to merge data into. using: The source table to merge data from. on: The join condition for the merge. returning: The columns to return from the merge. dialect: The dialect used to parse the input expressions. copy: Whether to copy the expression. **opts: Other options to use to parse the input expressions. Returns: Merge: The syntax tree for the MERGE statement. """ expressions: t.List[Expr] = [] for when_expr in when_exprs: expression = maybe_parse(when_expr, dialect=dialect, copy=copy, into=Whens, **opts) expressions.extend([expression] if isinstance(expression, When) else expression.expressions) merge = Merge( this=maybe_parse(into, dialect=dialect, copy=copy, **opts), using=maybe_parse(using, dialect=dialect, copy=copy, **opts), on=maybe_parse(on, dialect=dialect, copy=copy, **opts), whens=Whens(expressions=expressions), ) if returning: merge = merge.returning(returning, dialect=dialect, copy=False, **opts) if isinstance(using_clause := merge.args.get("using"), Alias): using_clause.replace(alias_(using_clause.this, using_clause.args["alias"], table=True)) return merge def parse_identifier(name: str | Identifier, dialect: DialectType = None) -> Identifier: """ Parses a given string into an identifier. Args: name: The name to parse into an identifier. dialect: The dialect to parse against. Returns: The identifier ast node. """ try: expression = maybe_parse(name, dialect=dialect, into=Identifier) except (ParseError, TokenError): expression = to_identifier(name) return expression INTERVAL_STRING_RE = re.compile(r"\s*(-?[0-9]+(?:\.[0-9]+)?)\s*([a-zA-Z]+)\s*") INTERVAL_DAY_TIME_RE = re.compile( r"\s*-?\s*\d+(?:\.\d+)?\s+(?:-?(?:\d+:)?\d+:\d+(?:\.\d+)?|-?(?:\d+:){1,2}|:)\s*" ) def to_interval(interval: str | Expr) -> Interval: """Builds an interval expression from a string like '1 day' or '5 months'.""" if isinstance(interval, Literal): if not interval.is_string: raise ValueError("Invalid interval string.") interval = interval.this interval = maybe_parse(f"INTERVAL {interval}") assert isinstance(interval, Interval) return interval def to_table( sql_path: str | Table, dialect: DialectType = None, copy: bool = True, **kwargs ) -> Table: """ Create a table expression from a `[catalog].[schema].[table]` sql path. Catalog and schema are optional. If a table is passed in then that table is returned. Args: sql_path: a `[catalog].[schema].[table]` string. dialect: the source dialect according to which the table name will be parsed. copy: Whether to copy a table if it is passed in. kwargs: the kwargs to instantiate the resulting `Table` expression with. Returns: A table expression. """ if isinstance(sql_path, Table): return maybe_copy(sql_path, copy=copy) try: table = maybe_parse(sql_path, into=Table, dialect=dialect) except ParseError: catalog, db, this = split_num_words(sql_path, ".", 3) if not this: raise table = table_(this, db=db, catalog=catalog) for k, v in kwargs.items(): table.set(k, v) return table def to_column( sql_path: str | Column, quoted: t.Optional[bool] = None, dialect: DialectType = None, copy: bool = True, **kwargs, ) -> Column: """ Create a column from a `[table].[column]` sql path. Table is optional. If a column is passed in then that column is returned. Args: sql_path: a `[table].[column]` string. quoted: Whether or not to force quote identifiers. dialect: the source dialect according to which the column name will be parsed. copy: Whether to copy a column if it is passed in. kwargs: the kwargs to instantiate the resulting `Column` expression with. Returns: A column expression. """ if isinstance(sql_path, Column): return maybe_copy(sql_path, copy=copy) try: col = maybe_parse(sql_path, into=Column, dialect=dialect) except ParseError: return column(*reversed(sql_path.split(".")), quoted=quoted, **kwargs) for k, v in kwargs.items(): col.set(k, v) if quoted: for i in col.find_all(Identifier): i.set("quoted", True) return col def subquery( expression: ExpOrStr, alias: t.Optional[Identifier | str] = None, dialect: DialectType = None, **opts, ) -> Select: """ Build a subquery expression that's selected from. Example: >>> subquery('select x from tbl', 'bar').select('x').sql() 'SELECT x FROM (SELECT x FROM tbl) AS bar' Args: expression: the SQL code strings to parse. If an Expr instance is passed, this is used as-is. alias: the alias name to use. dialect: the dialect used to parse the input expression. **opts: other options to use to parse the input expressions. Returns: A new Select instance with the subquery expression included. """ expression = maybe_parse(expression, dialect=dialect, **opts).subquery(alias, **opts) return Select().from_(expression, dialect=dialect, **opts) def cast( expression: ExpOrStr, to: DATA_TYPE, copy: bool = True, dialect: DialectType = None, **opts ) -> Cast: """Cast an expression to a data type. Example: >>> cast('x + 1', 'int').sql() 'CAST(x + 1 AS INT)' Args: expression: The expression to cast. to: The datatype to cast to. copy: Whether to copy the supplied expressions. dialect: The target dialect. This is used to prevent a re-cast in the following scenario: - The expression to be cast is already a exp.Cast expression - The existing cast is to a type that is logically equivalent to new type For example, if :expression='CAST(x as DATETIME)' and :to=Type.TIMESTAMP, but in the target dialect DATETIME is mapped to TIMESTAMP, then we will NOT return `CAST(x (as DATETIME) as TIMESTAMP)` and instead just return the original expression `CAST(x as DATETIME)`. This is to prevent it being output as a double cast `CAST(x (as TIMESTAMP) as TIMESTAMP)` once the DATETIME -> TIMESTAMP mapping is applied in the target dialect generator. Returns: The new Cast instance. """ expr = maybe_parse(expression, copy=copy, dialect=dialect, **opts) data_type = DataType.build(to, copy=copy, dialect=dialect, **opts) # dont re-cast if the expression is already a cast to the correct type if isinstance(expr, Cast): from sqlglot.dialects.dialect import Dialect target_dialect = Dialect.get_or_raise(dialect) type_mapping = target_dialect.generator_class.TYPE_MAPPING existing_cast_type: DType = expr.to.this new_cast_type: DType = data_type.this types_are_equivalent = type_mapping.get( existing_cast_type, existing_cast_type.value ) == type_mapping.get(new_cast_type, new_cast_type.value) if expr.is_type(data_type) or types_are_equivalent: return expr expr = Cast(this=expr, to=data_type) expr.type = data_type return expr def table_( table: Identifier | str, db: t.Optional[Identifier | str] = None, catalog: t.Optional[Identifier | str] = None, quoted: t.Optional[bool] = None, alias: t.Optional[Identifier | str] = None, ) -> Table: """Build a Table. Args: table: Table name. db: Database name. catalog: Catalog name. quote: Whether to force quotes on the table's identifiers. alias: Table's alias. Returns: The new Table instance. """ return Table( this=to_identifier(table, quoted=quoted) if table else None, db=to_identifier(db, quoted=quoted) if db else None, catalog=to_identifier(catalog, quoted=quoted) if catalog else None, alias=TableAlias(this=to_identifier(alias)) if alias else None, ) def values( values: Iterable[tuple[object, ...] | Tuple], alias: t.Optional[str] = None, columns: t.Optional[Iterable[str] | dict[str, DataType]] = None, ) -> Values: """Build VALUES statement. Example: >>> values([(1, '2')]).sql() "VALUES (1, '2')" Args: values: values statements that will be converted to SQL alias: optional alias columns: Optional list of ordered column names or ordered dictionary of column names to types. If either are provided then an alias is also required. Returns: Values: the Values expression object """ if columns and not alias: raise ValueError("Alias is required when providing columns") return Values( expressions=[convert(tup) for tup in values], alias=( TableAlias(this=to_identifier(alias), columns=[to_identifier(x) for x in columns]) if columns else (TableAlias(this=to_identifier(alias)) if alias else None) ), ) def var(name: t.Optional[ExpOrStr]) -> Var: """Build a SQL variable. Example: >>> repr(var('x')) 'Var(this=x)' >>> repr(var(column('x', table='y'))) 'Var(this=x)' Args: name: The name of the var or an expression who's name will become the var. Returns: The new variable node. """ if not name: raise ValueError("Cannot convert empty name into var.") if isinstance(name, Expr): name = name.name return Var(this=name) def rename_table( old_name: str | Table, new_name: str | Table, dialect: DialectType = None, ) -> Alter: """Build ALTER TABLE... RENAME... expression Args: old_name: The old name of the table new_name: The new name of the table dialect: The dialect to parse the table. Returns: Alter table expression """ old_table = to_table(old_name, dialect=dialect) new_table = to_table(new_name, dialect=dialect) return Alter( this=old_table, kind="TABLE", actions=[ AlterRename(this=new_table), ], ) def rename_column( table_name: str | Table, old_column_name: str | Column, new_column_name: str | Column, exists: t.Optional[bool] = None, dialect: DialectType = None, ) -> Alter: """Build ALTER TABLE... RENAME COLUMN... expression Args: table_name: Name of the table old_column: The old name of the column new_column: The new name of the column exists: Whether to add the `IF EXISTS` clause dialect: The dialect to parse the table/column. Returns: Alter table expression """ table = to_table(table_name, dialect=dialect) old_column = to_column(old_column_name, dialect=dialect) new_column = to_column(new_column_name, dialect=dialect) return Alter( this=table, kind="TABLE", actions=[ RenameColumn(this=old_column, to=new_column, exists=exists), ], ) def replace_children(expression: Expr, fun: t.Callable, *args, **kwargs) -> None: """ Replace children of an expression with the result of a lambda fun(child) -> exp. """ for k, v in tuple(expression.args.items()): is_list_arg = type(v) is list child_nodes = v if is_list_arg else [v] new_child_nodes = [] for cn in child_nodes: if isinstance(cn, Expr): for child_node in ensure_collection(fun(cn, *args, **kwargs)): new_child_nodes.append(child_node) else: new_child_nodes.append(cn) if is_list_arg: expression.set(k, new_child_nodes) else: expression.set(k, seq_get(new_child_nodes, 0)) def replace_tree( expression: Expr, fun: t.Callable, prune: t.Optional[t.Callable[[Expr], bool]] = None, ) -> Expr: """ Replace an entire tree with the result of function calls on each node. This will be traversed in reverse dfs, so leaves first. If new nodes are created as a result of function calls, they will also be traversed. """ stack = list(expression.dfs(prune=prune)) while stack: node = stack.pop() new_node = fun(node) if new_node is not node: node.replace(new_node) if isinstance(new_node, Expr): stack.append(new_node) return new_node def find_tables(expression: Expr) -> t.Set[Table]: """ Find all tables referenced in a query. Args: expressions: The query to find the tables in. Returns: A set of all the tables. """ from sqlglot.optimizer.scope import traverse_scope return { table for scope in traverse_scope(expression) for table in scope.tables if isinstance(table, Table) and table.name and table.name not in scope.cte_sources } def column_table_names(expression: Expr, exclude: str = "") -> t.Set[str]: """ Return all table names referenced through columns in an expression. Example: >>> import sqlglot >>> sorted(column_table_names(sqlglot.parse_one("a.b AND c.d AND c.e"))) ['a', 'c'] Args: expression: expression to find table names. exclude: a table name to exclude Returns: A list of unique names. """ return { table for table in (column.table for column in expression.find_all(Column)) if table and table != exclude } def table_name(table: Table | str, dialect: DialectType = None, identify: bool = False) -> str: """Get the full name of a table as a string. Args: table: Table expression node or string. dialect: The dialect to generate the table name for. identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote. Examples: >>> from sqlglot import exp, parse_one >>> table_name(parse_one("select * from a.b.c").find(exp.Table)) 'a.b.c' Returns: The table name. """ expr = maybe_parse(table, into=Table, dialect=dialect) if not expr: raise ValueError(f"Cannot parse {table}") return ".".join( ( part.sql(dialect=dialect, identify=True, copy=False, comments=False) if identify or not SAFE_IDENTIFIER_RE.match(part.name) else part.name ) for part in expr.parts ) def normalize_table_name(table: str | Table, dialect: DialectType = None, copy: bool = True) -> str: """Returns a case normalized table name without quotes. Args: table: the table to normalize dialect: the dialect to use for normalization rules copy: whether to copy the expression. Examples: >>> normalize_table_name("`A-B`.c", dialect="bigquery") 'A-B.c' """ from sqlglot.optimizer.normalize_identifiers import normalize_identifiers return ".".join( p.name for p in normalize_identifiers( to_table(table, dialect=dialect, copy=copy), dialect=dialect ).parts ) def replace_tables( expression: E, mapping: t.Dict[str, str], dialect: DialectType = None, copy: bool = True ) -> E: """Replace all tables in expression according to the mapping. Args: expression: expression node to be transformed and replaced. mapping: mapping of table names. dialect: the dialect of the mapping table copy: whether to copy the expression. Examples: >>> from sqlglot import exp, parse_one >>> replace_tables(parse_one("select * from a.b"), {"a.b": "c"}).sql() 'SELECT * FROM c /* a.b */' Returns: The mapped expression. """ mapping = {normalize_table_name(k, dialect=dialect): v for k, v in mapping.items()} def _replace_tables(node: Expr) -> Expr: if isinstance(node, Table) and node.meta.get("replace") is not False: original = normalize_table_name(node, dialect=dialect) new_name = mapping.get(original) if new_name: table = to_table( new_name, **{k: v for k, v in node.args.items() if k not in TABLE_PARTS}, dialect=dialect, ) table.add_comments([original]) return table return node return expression.transform(_replace_tables, copy=copy) # type: ignore def replace_placeholders(expression: Expr, *args, **kwargs) -> Expr: """Replace placeholders in an expression. Args: expression: expression node to be transformed and replaced. args: positional names that will substitute unnamed placeholders in the given order. kwargs: keyword arguments that will substitute named placeholders. Examples: >>> from sqlglot import exp, parse_one >>> replace_placeholders( ... parse_one("select * from :tbl where ? = ?"), ... exp.to_identifier("str_col"), "b", tbl=exp.to_identifier("foo") ... ).sql() "SELECT * FROM foo WHERE str_col = 'b'" Returns: The mapped expression. """ def _replace_placeholders(node: Expr, args, **kwargs) -> Expr: if isinstance(node, Placeholder): if node.this: new_name = kwargs.get(node.this) if new_name is not None: return convert(new_name) else: try: return convert(next(args)) except StopIteration: pass return node return expression.transform(_replace_placeholders, iter(args), **kwargs) def expand( expression: Expr, sources: t.Dict[str, Query | t.Callable[[], Query]], dialect: DialectType = None, copy: bool = True, ) -> Expr: """Transforms an expression by expanding all referenced sources into subqueries. Examples: >>> from sqlglot import parse_one >>> expand(parse_one("select * from x AS z"), {"x": parse_one("select * from y")}).sql() 'SELECT * FROM (SELECT * FROM y) AS z /* source: x */' >>> expand(parse_one("select * from x AS z"), {"x": parse_one("select * from y"), "y": parse_one("select * from z")}).sql() 'SELECT * FROM (SELECT * FROM (SELECT * FROM z) AS y /* source: y */) AS z /* source: x */' Args: expression: The expression to expand. sources: A dict of name to query or a callable that provides a query on demand. dialect: The dialect of the sources dict or the callable. copy: Whether to copy the expression during transformation. Defaults to True. Returns: The transformed expression. """ normalized_sources = {normalize_table_name(k, dialect=dialect): v for k, v in sources.items()} def _expand(node: Expr): if isinstance(node, Table): name = normalize_table_name(node, dialect=dialect) source = normalized_sources.get(name) if source: # Create a subquery with the same alias (or table name if no alias) parsed_source = source() if callable(source) else source subquery = parsed_source.subquery(node.alias or name) subquery.comments = [f"source: {name}"] # Continue expanding within the subquery return subquery.transform(_expand, copy=False) return node return expression.transform(_expand, copy=copy) def func(name: str, *args, copy: bool = True, dialect: DialectType = None, **kwargs) -> Func: """ Returns a Func expression. Examples: >>> func("abs", 5).sql() 'ABS(5)' >>> func("cast", this=5, to=DataType.build("DOUBLE")).sql() 'CAST(5 AS DOUBLE)' Args: name: the name of the function to build. args: the args used to instantiate the function of interest. copy: whether to copy the argument expressions. dialect: the source dialect. kwargs: the kwargs used to instantiate the function of interest. Note: The arguments `args` and `kwargs` are mutually exclusive. Returns: An instance of the function of interest, or an anonymous function, if `name` doesn't correspond to an existing `sqlglot.expressions.Func` class. """ if args and kwargs: raise ValueError("Can't use both args and kwargs to instantiate a function.") from sqlglot.dialects.dialect import Dialect dialect = Dialect.get_or_raise(dialect) converted: t.List[Expr] = [maybe_parse(arg, dialect=dialect, copy=copy) for arg in args] kwargs = {key: maybe_parse(value, dialect=dialect, copy=copy) for key, value in kwargs.items()} constructor = dialect.parser_class.FUNCTIONS.get(name.upper()) if constructor: if converted: try: function = constructor(converted) except TypeError: function = constructor(converted, dialect=dialect) elif constructor.__name__ == "from_arg_list": function = constructor.__self__(**kwargs) # type: ignore else: from sqlglot.expressions import FUNCTION_BY_NAME as _FUNCTION_BY_NAME constructor = _FUNCTION_BY_NAME.get(name.upper()) if constructor: function = constructor(**kwargs) else: raise ValueError( f"Unable to convert '{name}' into a Func. Either manually construct " "the Func expression of interest or parse the function call." ) else: kwargs = kwargs or {"expressions": converted} function = Anonymous(this=name, **kwargs) for error_message in function.error_messages(converted): raise ValueError(error_message) return function def case( expression: t.Optional[ExpOrStr] = None, **opts, ) -> Case: """ Initialize a CASE statement. Example: case().when("a = 1", "foo").else_("bar") Args: expression: Optionally, the input expression (not all dialects support this) **opts: Extra keyword arguments for parsing `expression` """ if expression is not None: this = maybe_parse(expression, **opts) else: this = None return Case(this=this, ifs=[]) def array( *expressions: ExpOrStr, copy: bool = True, dialect: DialectType = None, **kwargs ) -> Array: """ Returns an array. Examples: >>> array(1, 'x').sql() 'ARRAY(1, x)' Args: expressions: the expressions to add to the array. copy: whether to copy the argument expressions. dialect: the source dialect. kwargs: the kwargs used to instantiate the function of interest. Returns: An array expression. """ return Array( expressions=[ maybe_parse(expression, copy=copy, dialect=dialect, **kwargs) for expression in expressions ] ) def tuple_( *expressions: ExpOrStr, copy: bool = True, dialect: DialectType = None, **kwargs ) -> Tuple: """ Returns an tuple. Examples: >>> tuple_(1, 'x').sql() '(1, x)' Args: expressions: the expressions to add to the tuple. copy: whether to copy the argument expressions. dialect: the source dialect. kwargs: the kwargs used to instantiate the function of interest. Returns: A tuple expression. """ return Tuple( expressions=[ maybe_parse(expression, copy=copy, dialect=dialect, **kwargs) for expression in expressions ] ) def true() -> Boolean: """ Returns a true Boolean expression. """ return Boolean(this=True) def false() -> Boolean: """ Returns a false Boolean expression. """ return Boolean(this=False) def null() -> Null: """ Returns a Null expression. """ return Null() def apply_index_offset( this: Expr, expressions: t.List[E], offset: int, dialect: DialectType = None, ) -> t.List[E]: if not offset or len(expressions) != 1: return expressions expression = expressions[0] from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.optimizer.simplify import simplify if not this.type: annotate_types(this, dialect=dialect) if t.cast(DataType, this.type).this not in ( DType.UNKNOWN, DType.ARRAY, ): return expressions if not expression.type: annotate_types(expression, dialect=dialect) if t.cast(DataType, expression.type).this in DataType.INTEGER_TYPES: logger.info("Applying array index offset (%s)", offset) expression = simplify(expression + offset) return [expression] return expressions NONNULL_CONSTANTS = ( Literal, Boolean, ) CONSTANTS = ( Literal, Boolean, Null, ) ================================================ FILE: sqlglot/expressions/constraints.py ================================================ """sqlglot expressions constraints.""" from __future__ import annotations from sqlglot.expressions.core import Expression, ColumnConstraintKind class IndexConstraintOption(Expression): arg_types = { "key_block_size": False, "using": False, "parser": False, "comment": False, "visible": False, "engine_attr": False, "secondary_engine_attr": False, } class Reference(Expression): arg_types = {"this": True, "expressions": False, "options": False} class ColumnConstraint(Expression): arg_types = {"this": False, "kind": True} @property def kind(self) -> ColumnConstraintKind | Reference: return self.args["kind"] class AutoIncrementColumnConstraint(Expression, ColumnConstraintKind): pass class ZeroFillColumnConstraint(ColumnConstraint): arg_types = {} class PeriodForSystemTimeConstraint(Expression, ColumnConstraintKind): arg_types = {"this": True, "expression": True} class CaseSpecificColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"not_": True} class CharacterSetColumnConstraint(Expression, ColumnConstraintKind): pass class CheckColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"this": True, "enforced": False} class AssumeColumnConstraint(Expression, ColumnConstraintKind): pass class ClusteredColumnConstraint(Expression, ColumnConstraintKind): pass class CollateColumnConstraint(Expression, ColumnConstraintKind): pass class CommentColumnConstraint(Expression, ColumnConstraintKind): pass class CompressColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"this": False} class DateFormatColumnConstraint(Expression, ColumnConstraintKind): pass class DefaultColumnConstraint(Expression, ColumnConstraintKind): pass class EncodeColumnConstraint(Expression, ColumnConstraintKind): pass class ExcludeColumnConstraint(Expression, ColumnConstraintKind): pass class EphemeralColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"this": False} class WithOperator(Expression): arg_types = {"this": True, "op": True} class GeneratedAsIdentityColumnConstraint(Expression, ColumnConstraintKind): # this: True -> ALWAYS, this: False -> BY DEFAULT arg_types = { "this": False, "expression": False, "on_null": False, "start": False, "increment": False, "minvalue": False, "maxvalue": False, "cycle": False, "order": False, } class GeneratedAsRowColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"start": False, "hidden": False} class IndexColumnConstraint(Expression, ColumnConstraintKind): arg_types = { "this": False, "expressions": False, "kind": False, "index_type": False, "options": False, "expression": False, # Clickhouse "granularity": False, } class InlineLengthColumnConstraint(Expression, ColumnConstraintKind): pass class NonClusteredColumnConstraint(Expression, ColumnConstraintKind): pass class NotForReplicationColumnConstraint(Expression, ColumnConstraintKind): arg_types = {} class MaskingPolicyColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"this": True, "expressions": False} class NotNullColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"allow_null": False} class OnUpdateColumnConstraint(Expression, ColumnConstraintKind): pass class PrimaryKeyColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"desc": False, "options": False} class TitleColumnConstraint(Expression, ColumnConstraintKind): pass class UniqueColumnConstraint(Expression, ColumnConstraintKind): arg_types = { "this": False, "index_type": False, "on_conflict": False, "nulls": False, "options": False, } class UppercaseColumnConstraint(Expression, ColumnConstraintKind): arg_types = {} class WatermarkColumnConstraint(Expression): arg_types = {"this": True, "expression": True} class PathColumnConstraint(Expression, ColumnConstraintKind): pass class ProjectionPolicyColumnConstraint(Expression, ColumnConstraintKind): pass class ComputedColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"this": True, "persisted": False, "not_null": False, "data_type": False} class InOutColumnConstraint(Expression, ColumnConstraintKind): arg_types = {"input_": False, "output": False, "variadic": False} class Constraint(Expression): arg_types = {"this": True, "expressions": True} class ForeignKey(Expression): arg_types = { "expressions": False, "reference": False, "delete": False, "update": False, "options": False, } class ColumnPrefix(Expression): arg_types = {"this": True, "expression": True} class PrimaryKey(Expression): arg_types = {"this": False, "expressions": True, "options": False, "include": False} class IndexParameters(Expression): arg_types = { "using": False, "include": False, "columns": False, "with_storage": False, "partition_by": False, "tablespace": False, "where": False, "on": False, } class AddConstraint(Expression): arg_types = {"expressions": True} ================================================ FILE: sqlglot/expressions/core.py ================================================ """sqlglot expressions core - base classes, traits, operators, and helpers.""" from __future__ import annotations import datetime import logging import math import numbers import re import sys import textwrap import typing as t from collections import deque from copy import deepcopy from decimal import Decimal from functools import reduce from collections.abc import Iterator, Sequence, Collection from sqlglot._typing import E from sqlglot.errors import ParseError from sqlglot.helper import ( camel_to_snake_case, ensure_list, seq_get, to_bool, trait, ) from sqlglot.tokenizer_core import Token from builtins import type as Type if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType from sqlglot.expressions.datatypes import DATA_TYPE, DataType, DType, Interval from sqlglot.expressions.query import Select logger = logging.getLogger("sqlglot") SQLGLOT_META: str = "sqlglot.meta" SQLGLOT_ANONYMOUS = "sqlglot.anonymous" TABLE_PARTS = ("this", "db", "catalog") COLUMN_PARTS = ("this", "table", "db", "catalog") POSITION_META_KEYS: t.Tuple[str, ...] = ("line", "col", "start", "end") UNITTEST: bool = "unittest" in sys.modules or "pytest" in sys.modules @trait class Expr: """ The base class for all expressions in a syntax tree. Each Expr encapsulates any necessary context, such as its child expressions, their names (arg keys), and whether a given child expression is optional or not. Attributes: key: a unique key for each class in the Expr hierarchy. This is useful for hashing and representing expressions as strings. arg_types: determines the arguments (child nodes) supported by an expression. It maps arg keys to booleans that indicate whether the corresponding args are optional. parent: a reference to the parent expression (or None, in case of root expressions). arg_key: the arg key an expression is associated with, i.e. the name its parent expression uses to refer to it. index: the index of an expression if it is inside of a list argument in its parent. comments: a list of comments that are associated with a given expression. This is used in order to preserve comments when transpiling SQL code. type: the `sqlglot.expressions.DataType` type of an expression. This is inferred by the optimizer, in order to enable some transformations that require type information. meta: a dictionary that can be used to store useful metadata for a given expression. Example: >>> class Foo(Expr): ... arg_types = {"this": True, "expression": False} The above definition informs us that Foo is an Expr that requires an argument called "this" and may also optionally receive an argument called "expression". Args: args: a mapping used for retrieving the arguments of an expression, given their arg keys. """ key: t.ClassVar[str] = "expression" arg_types: t.ClassVar[t.Dict[str, bool]] = {"this": True} required_args: t.ClassVar[t.Set[str]] = {"this"} is_var_len_args: t.ClassVar[bool] = False _hash_raw_args: t.ClassVar[bool] = False is_subquery: t.ClassVar[bool] = False is_cast: t.ClassVar[bool] = False args: t.Dict[str, t.Any] parent: t.Optional[Expr] arg_key: t.Optional[str] index: t.Optional[int] comments: t.Optional[t.List[str]] _type: t.Optional[DataType] _meta: t.Optional[t.Dict[str, t.Any]] _hash: t.Optional[int] @classmethod def __init_subclass__(cls, **kwargs: t.Any) -> None: super().__init_subclass__(**kwargs) # When an Expr class is created, its key is automatically set # to be the lowercase version of the class' name. cls.key = cls.__name__.lower() cls.required_args = {k for k, v in cls.arg_types.items() if v} # This is so that docstrings are not inherited in pdoc setattr(cls, "__doc__", getattr(cls, "__doc__", None) or "") is_primitive: t.ClassVar[bool] = False def __init__(self, **args: object) -> None: self.args: t.Dict[str, t.Any] = args self.parent: t.Optional[Expr] = None self.arg_key: t.Optional[str] = None self.index: t.Optional[int] = None self.comments: t.Optional[t.List[str]] = None self._type: t.Optional[DataType] = None self._meta: t.Optional[t.Dict[str, t.Any]] = None self._hash: t.Optional[int] = None if not self.is_primitive: for arg_key, value in self.args.items(): self._set_parent(arg_key, value) @property def this(self) -> t.Any: raise NotImplementedError @property def expression(self) -> t.Any: raise NotImplementedError @property def expressions(self) -> t.List[t.Any]: raise NotImplementedError def text(self, key: str) -> str: raise NotImplementedError @property def is_string(self) -> bool: raise NotImplementedError @property def is_number(self) -> bool: raise NotImplementedError def to_py(self) -> t.Any: raise NotImplementedError @property def is_int(self) -> bool: raise NotImplementedError @property def is_star(self) -> bool: raise NotImplementedError @property def alias(self) -> str: raise NotImplementedError @property def alias_column_names(self) -> t.List[str]: raise NotImplementedError @property def name(self) -> str: raise NotImplementedError @property def alias_or_name(self) -> str: raise NotImplementedError @property def output_name(self) -> str: raise NotImplementedError @property def type(self) -> t.Optional[DataType]: raise NotImplementedError @type.setter def type(self, dtype: t.Optional[DataType | DType | str]) -> None: raise NotImplementedError def is_type(self, *dtypes: DATA_TYPE) -> bool: raise NotImplementedError def is_leaf(self) -> bool: raise NotImplementedError @property def meta(self) -> t.Dict[str, t.Any]: raise NotImplementedError def __deepcopy__(self, memo: t.Any) -> Expr: raise NotImplementedError def copy(self: E) -> E: raise NotImplementedError def add_comments(self, comments: t.Optional[t.List[str]] = None, prepend: bool = False) -> None: raise NotImplementedError def pop_comments(self) -> t.List[str]: raise NotImplementedError def append(self, arg_key: str, value: t.Any) -> None: raise NotImplementedError def set( self, arg_key: str, value: object, index: t.Optional[int] = None, overwrite: bool = True, ) -> None: raise NotImplementedError def _set_parent(self, arg_key: str, value: object, index: t.Optional[int] = None) -> None: raise NotImplementedError @property def depth(self) -> int: raise NotImplementedError def iter_expressions(self: E, reverse: bool = False) -> Iterator[E]: raise NotImplementedError def find(self, *expression_types: Type[E], bfs: bool = True) -> t.Optional[E]: raise NotImplementedError def find_all(self, *expression_types: Type[E], bfs: bool = True) -> Iterator[E]: raise NotImplementedError def find_ancestor(self, *expression_types: Type[E]) -> t.Optional[E]: raise NotImplementedError @property def parent_select(self) -> t.Optional[Select]: raise NotImplementedError @property def same_parent(self) -> bool: raise NotImplementedError def root(self) -> Expr: raise NotImplementedError def walk( self, bfs: bool = True, prune: t.Optional[t.Callable[[Expr], bool]] = None ) -> Iterator[Expr]: raise NotImplementedError def dfs(self, prune: t.Optional[t.Callable[[Expr], bool]] = None) -> Iterator[Expr]: raise NotImplementedError def bfs(self, prune: t.Optional[t.Callable[[Expr], bool]] = None) -> Iterator[Expr]: raise NotImplementedError def unnest(self) -> Expr: raise NotImplementedError def unalias(self) -> Expr: raise NotImplementedError def unnest_operands(self) -> t.Tuple[Expr, ...]: raise NotImplementedError def flatten(self, unnest: bool = True) -> Iterator[Expr]: raise NotImplementedError def to_s(self) -> str: raise NotImplementedError def sql(self, dialect: DialectType = None, **opts: t.Any) -> str: raise NotImplementedError def transform( self, fun: t.Callable, *args: object, copy: bool = True, **kwargs: object ) -> t.Any: raise NotImplementedError def replace(self, expression: t.Any) -> t.Any: raise NotImplementedError def pop(self: E) -> E: raise NotImplementedError def assert_is(self, type_: Type[E]) -> E: raise NotImplementedError def error_messages(self, args: t.Optional[Sequence[object]] = None) -> list[str]: raise NotImplementedError def dump(self) -> t.Any: """ Dump this Expr to a JSON-serializable dict. """ from sqlglot.serde import dump return dump(self) @classmethod def load(cls, obj: t.Any) -> Expr: """ Load a dict (as returned by `Expr.dump`) into an Expr instance. """ from sqlglot.serde import load result = load(obj) assert isinstance(result, Expr) return result def and_( self, *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts: t.Any, ) -> Condition: raise NotImplementedError def or_( self, *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts: t.Any, ) -> Condition: raise NotImplementedError def not_(self, copy: bool = True) -> Not: raise NotImplementedError def update_positions( self: E, other: t.Optional[Token | Expr] = None, line: t.Optional[int] = None, col: t.Optional[int] = None, start: t.Optional[int] = None, end: t.Optional[int] = None, ) -> E: raise NotImplementedError def as_( self, alias: str | Identifier, quoted: t.Optional[bool] = None, dialect: DialectType = None, copy: bool = True, **opts: t.Any, ) -> Expr: raise NotImplementedError def _binop(self, klass: Type[E], other: t.Any, reverse: bool = False) -> E: raise NotImplementedError def __getitem__(self, other: ExpOrStr | t.Tuple[ExpOrStr, ...]) -> Bracket: raise NotImplementedError def __iter__(self) -> Iterator: raise NotImplementedError def isin( self, *expressions: t.Any, query: t.Optional[ExpOrStr] = None, unnest: t.Optional[ExpOrStr] | Collection[ExpOrStr] = None, copy: bool = True, **opts, ) -> In: raise NotImplementedError def between( self, low: t.Any, high: t.Any, copy: bool = True, symmetric: t.Optional[bool] = None, **opts, ) -> Between: raise NotImplementedError def is_(self, other: ExpOrStr) -> Is: raise NotImplementedError def like(self, other: ExpOrStr) -> Like: raise NotImplementedError def ilike(self, other: ExpOrStr) -> ILike: raise NotImplementedError def eq(self, other: t.Any) -> EQ: raise NotImplementedError def neq(self, other: t.Any) -> NEQ: raise NotImplementedError def rlike(self, other: ExpOrStr) -> RegexpLike: raise NotImplementedError def div(self, other: ExpOrStr, typed: bool = False, safe: bool = False) -> Div: raise NotImplementedError def asc(self, nulls_first: bool = True) -> Ordered: raise NotImplementedError def desc(self, nulls_first: bool = False) -> Ordered: raise NotImplementedError def __lt__(self, other: t.Any) -> LT: raise NotImplementedError def __le__(self, other: t.Any) -> LTE: raise NotImplementedError def __gt__(self, other: t.Any) -> GT: raise NotImplementedError def __ge__(self, other: t.Any) -> GTE: raise NotImplementedError def __add__(self, other: t.Any) -> Add: raise NotImplementedError def __radd__(self, other: t.Any) -> Add: raise NotImplementedError def __sub__(self, other: t.Any) -> Sub: raise NotImplementedError def __rsub__(self, other: t.Any) -> Sub: raise NotImplementedError def __mul__(self, other: t.Any) -> Mul: raise NotImplementedError def __rmul__(self, other: t.Any) -> Mul: raise NotImplementedError def __truediv__(self, other: t.Any) -> Div: raise NotImplementedError def __rtruediv__(self, other: t.Any) -> Div: raise NotImplementedError def __floordiv__(self, other: t.Any) -> IntDiv: raise NotImplementedError def __rfloordiv__(self, other: t.Any) -> IntDiv: raise NotImplementedError def __mod__(self, other: t.Any) -> Mod: raise NotImplementedError def __rmod__(self, other: t.Any) -> Mod: raise NotImplementedError def __pow__(self, other: t.Any) -> Pow: raise NotImplementedError def __rpow__(self, other: t.Any) -> Pow: raise NotImplementedError def __and__(self, other: t.Any) -> And: raise NotImplementedError def __rand__(self, other: t.Any) -> And: raise NotImplementedError def __or__(self, other: t.Any) -> Or: raise NotImplementedError def __ror__(self, other: t.Any) -> Or: raise NotImplementedError def __neg__(self) -> Neg: raise NotImplementedError def __invert__(self) -> Not: raise NotImplementedError class Expression(Expr): __slots__ = ( "args", "parent", "arg_key", "index", "comments", "_type", "_meta", "_hash", ) def __eq__(self, other: object) -> bool: return self is other or (type(self) is type(other) and hash(self) == hash(other)) def __ne__(self, other: object) -> bool: return not self.__eq__(other) def __hash__(self) -> int: if self._hash is None: nodes: t.List[Expr] = [] queue: t.Deque[Expr] = deque() queue.append(self) while queue: node = queue.popleft() nodes.append(node) for child in node.iter_expressions(): if child._hash is None: queue.append(child) for node in reversed(nodes): hash_ = hash(node.key) if node._hash_raw_args: for k, v in sorted(node.args.items()): if v: hash_ = hash((hash_, k, v)) else: for k, v in sorted(node.args.items()): vt = type(v) if vt is list: for x in v: if x is not None and x is not False: hash_ = hash((hash_, k, x.lower() if type(x) is str else x)) else: hash_ = hash((hash_, k)) elif v is not None and v is not False: hash_ = hash((hash_, k, v.lower() if vt is str else v)) node._hash = hash_ assert self._hash return self._hash def __reduce__(self) -> t.Tuple[t.Callable, t.Tuple[t.List[t.Dict[str, t.Any]]]]: from sqlglot.serde import dump, load return (load, (dump(self),)) @property def this(self) -> t.Any: """ Retrieves the argument with key "this". """ return self.args.get("this") @property def expression(self) -> t.Any: """ Retrieves the argument with key "expression". """ return self.args.get("expression") @property def expressions(self) -> t.List[t.Any]: """ Retrieves the argument with key "expressions". """ return self.args.get("expressions") or [] def text(self, key: str) -> str: """ Returns a textual representation of the argument corresponding to "key". This can only be used for args that are strings or leaf Expr instances, such as identifiers and literals. """ field = self.args.get(key) if isinstance(field, str): return field if isinstance(field, (Identifier, Literal, Var)): return field.this if isinstance(field, (Star, Null)): return field.name return "" @property def is_string(self) -> bool: """ Checks whether a Literal expression is a string. """ return isinstance(self, Literal) and self.args["is_string"] @property def is_number(self) -> bool: """ Checks whether a Literal expression is a number. """ return (isinstance(self, Literal) and not self.args["is_string"]) or ( isinstance(self, Neg) and self.this.is_number ) def to_py(self) -> t.Any: """ Returns a Python object equivalent of the SQL node. """ raise ValueError(f"{self} cannot be converted to a Python object.") @property def is_int(self) -> bool: """ Checks whether an expression is an integer. """ return self.is_number and isinstance(self.to_py(), int) @property def is_star(self) -> bool: """Checks whether an expression is a star.""" return isinstance(self, Star) or (isinstance(self, Column) and isinstance(self.this, Star)) @property def alias(self) -> str: """ Returns the alias of the expression, or an empty string if it's not aliased. """ alias = self.args.get("alias") if isinstance(alias, Expression): return alias.name return self.text("alias") @property def alias_column_names(self) -> t.List[str]: table_alias = self.args.get("alias") if not table_alias: return [] return [c.name for c in table_alias.args.get("columns") or []] @property def name(self) -> str: return self.text("this") @property def alias_or_name(self) -> str: return self.alias or self.name @property def output_name(self) -> str: """ Name of the output column if this expression is a selection. If the Expr has no output name, an empty string is returned. Example: >>> from sqlglot import parse_one >>> parse_one("SELECT a").expressions[0].output_name 'a' >>> parse_one("SELECT b AS c").expressions[0].output_name 'c' >>> parse_one("SELECT 1 + 2").expressions[0].output_name '' """ return "" @property def type(self) -> t.Optional[DataType]: if self.is_cast: return self._type or self.to # type: ignore[attr-defined] return self._type @type.setter def type(self, dtype: t.Optional[DataType | DType | str]) -> None: if dtype and type(dtype).__name__ != "DataType": from sqlglot.expressions.datatypes import DataType as _DataType dtype = _DataType.build(dtype) self._type = dtype # type: ignore[assignment] def is_type(self, *dtypes: DATA_TYPE) -> bool: t = self._type return t is not None and t.is_type(*dtypes) def is_leaf(self) -> bool: return not any((isinstance(v, Expr) or type(v) is list) and v for v in self.args.values()) @property def meta(self) -> t.Dict[str, t.Any]: if self._meta is None: self._meta = {} return self._meta def __deepcopy__(self, memo: t.Any) -> Expr: root = self.__class__() stack: t.List[t.Tuple[Expr, Expr]] = [(self, root)] while stack: node, copy = stack.pop() if node.comments is not None: copy.comments = deepcopy(node.comments) if node._type is not None: copy._type = deepcopy(node._type) if node._meta is not None: copy._meta = deepcopy(node._meta) if node._hash is not None: copy._hash = node._hash for k, vs in node.args.items(): if isinstance(vs, Expr): stack.append((vs, vs.__class__())) copy.set(k, stack[-1][-1]) elif type(vs) is list: copy.args[k] = [] for v in vs: if isinstance(v, Expr): stack.append((v, v.__class__())) copy.append(k, stack[-1][-1]) else: copy.append(k, v) else: copy.args[k] = vs return root def copy(self: E) -> E: """ Returns a deep copy of the expression. """ return deepcopy(self) def add_comments(self, comments: t.Optional[t.List[str]] = None, prepend: bool = False) -> None: if self.comments is None: self.comments = [] if comments: for comment in comments: _, *meta = comment.split(SQLGLOT_META) if meta: for kv in "".join(meta).split(","): k, *v = kv.split("=") self.meta[k.strip()] = to_bool(v[0].strip() if v else True) if not prepend: self.comments.append(comment) if prepend: self.comments = comments + self.comments def pop_comments(self) -> t.List[str]: comments = self.comments or [] self.comments = None return comments def append(self, arg_key: str, value: t.Any) -> None: """ Appends value to arg_key if it's a list or sets it as a new list. Args: arg_key (str): name of the list expression arg value (Any): value to append to the list """ if type(self.args.get(arg_key)) is not list: self.args[arg_key] = [] self._set_parent(arg_key, value) values = self.args[arg_key] if isinstance(value, Expr): value.index = len(values) values.append(value) def set( self, arg_key: str, value: object, index: t.Optional[int] = None, overwrite: bool = True, ) -> None: """ Sets arg_key to value. Args: arg_key: name of the expression arg. value: value to set the arg to. index: if the arg is a list, this specifies what position to add the value in it. overwrite: assuming an index is given, this determines whether to overwrite the list entry instead of only inserting a new value (i.e., like list.insert). """ node: t.Optional[Expr] = self while node and node._hash is not None: node._hash = None node = node.parent if index is not None: expressions = self.args.get(arg_key) or [] if seq_get(expressions, index) is None: return if value is None: expressions.pop(index) for v in expressions[index:]: v.index = v.index - 1 return if isinstance(value, list): expressions.pop(index) expressions[index:index] = value elif overwrite: expressions[index] = value else: expressions.insert(index, value) value = expressions elif value is None: self.args.pop(arg_key, None) return self.args[arg_key] = value self._set_parent(arg_key, value, index) def _set_parent(self, arg_key: str, value: object, index: t.Optional[int] = None) -> None: if isinstance(value, Expr): value.parent = self value.arg_key = arg_key value.index = index elif isinstance(value, list): for i, v in enumerate(value): if isinstance(v, Expr): v.parent = self v.arg_key = arg_key v.index = i @property def depth(self) -> int: """ Returns the depth of this tree. """ if self.parent: return self.parent.depth + 1 return 0 def iter_expressions(self: E, reverse: bool = False) -> Iterator[E]: """Yields the key and expression for all arguments, exploding list args.""" for vs in reversed(self.args.values()) if reverse else self.args.values(): if isinstance(vs, list): for v in reversed(vs) if reverse else vs: if isinstance(v, Expr): yield t.cast(E, v) elif isinstance(vs, Expr): yield t.cast(E, vs) def find(self, *expression_types: Type[E], bfs: bool = True) -> t.Optional[E]: """ Returns the first node in this tree which matches at least one of the specified types. Args: expression_types: the expression type(s) to match. bfs: whether to search the AST using the BFS algorithm (DFS is used if false). Returns: The node which matches the criteria or None if no such node was found. """ return next(self.find_all(*expression_types, bfs=bfs), None) def find_all(self, *expression_types: Type[E], bfs: bool = True) -> Iterator[E]: """ Returns a generator object which visits all nodes in this tree and only yields those that match at least one of the specified expression types. Args: expression_types: the expression type(s) to match. bfs: whether to search the AST using the BFS algorithm (DFS is used if false). Returns: The generator object. """ for expression in self.walk(bfs=bfs): if isinstance(expression, expression_types): yield expression def find_ancestor(self, *expression_types: Type[E]) -> t.Optional[E]: """ Returns a nearest parent matching expression_types. Args: expression_types: the expression type(s) to match. Returns: The parent node. """ ancestor = self.parent while ancestor and not isinstance(ancestor, expression_types): ancestor = ancestor.parent return ancestor # type: ignore[return-value] @property def parent_select(self) -> t.Optional[Select]: """ Returns the parent select statement. """ from sqlglot.expressions.query import Select as _Select return self.find_ancestor(_Select) @property def same_parent(self) -> bool: """Returns if the parent is the same class as itself.""" return type(self.parent) is self.__class__ def root(self) -> Expr: """ Returns the root expression of this tree. """ expression: Expr = self while expression.parent: expression = expression.parent return expression def walk( self, bfs: bool = True, prune: t.Optional[t.Callable[[Expr], bool]] = None ) -> Iterator[Expr]: """ Returns a generator object which visits all nodes in this tree. Args: bfs: if set to True the BFS traversal order will be applied, otherwise the DFS traversal will be used instead. prune: callable that returns True if the generator should stop traversing this branch of the tree. Returns: the generator object. """ if bfs: yield from self.bfs(prune=prune) else: yield from self.dfs(prune=prune) def dfs(self, prune: t.Optional[t.Callable[[Expr], bool]] = None) -> Iterator[Expr]: """ Returns a generator object which visits all nodes in this tree in the DFS (Depth-first) order. Returns: The generator object. """ stack = [self] while stack: node = stack.pop() yield node if prune and prune(node): continue for v in node.iter_expressions(reverse=True): stack.append(v) def bfs(self, prune: t.Optional[t.Callable[[Expr], bool]] = None) -> Iterator[Expr]: """ Returns a generator object which visits all nodes in this tree in the BFS (Breadth-first) order. Returns: The generator object. """ queue: t.Deque[Expr] = deque() queue.append(self) while queue: node = queue.popleft() yield node if prune and prune(node): continue for v in node.iter_expressions(): queue.append(v) def unnest(self) -> Expr: """ Returns the first non parenthesis child or self. """ expression = self while type(expression) is Paren: expression = expression.this return expression def unalias(self) -> Expr: """ Returns the inner expression if this is an Alias. """ if isinstance(self, Alias): return self.this return self def unnest_operands(self) -> t.Tuple[Expr, ...]: """ Returns unnested operands as a tuple. """ return tuple(arg.unnest() for arg in self.iter_expressions()) def flatten(self, unnest: bool = True) -> Iterator[Expr]: """ Returns a generator which yields child nodes whose parents are the same class. A AND B AND C -> [A, B, C] """ for node in self.dfs(prune=lambda n: bool(n.parent and type(n) is not self.__class__)): if type(node) is not self.__class__: yield node.unnest() if unnest and not node.is_subquery else node def __str__(self) -> str: return self.sql() def __repr__(self) -> str: return _to_s(self) def to_s(self) -> str: """ Same as __repr__, but includes additional information which can be useful for debugging, like empty or missing args and the AST nodes' object IDs. """ return _to_s(self, verbose=True) def sql(self, dialect: DialectType = None, **opts: t.Any) -> str: """ Returns SQL string representation of this tree. Args: dialect: the dialect of the output SQL string (eg. "spark", "hive", "presto", "mysql"). opts: other `sqlglot.generator.Generator` options. Returns: The SQL string. """ from sqlglot.dialects.dialect import Dialect return Dialect.get_or_raise(dialect).generate(self, **opts) def transform( self, fun: t.Callable, *args: object, copy: bool = True, **kwargs: object ) -> t.Any: """ Visits all tree nodes (excluding already transformed ones) and applies the given transformation function to each node. Args: fun: a function which takes a node as an argument and returns a new transformed node or the same node without modifications. If the function returns None, then the corresponding node will be removed from the syntax tree. copy: if set to True a new tree instance is constructed, otherwise the tree is modified in place. Returns: The transformed tree. """ root: t.Any = None new_node: t.Any = None for node in (self.copy() if copy else self).dfs(prune=lambda n: n is not new_node): parent, arg_key, index = node.parent, node.arg_key, node.index new_node = fun(node, *args, **kwargs) if not root: root = new_node elif parent and arg_key and new_node is not node: parent.set(arg_key, new_node, index) assert root return root def replace(self, expression: t.Any) -> t.Any: """ Swap out this expression with a new expression. For example:: >>> import sqlglot >>> tree = sqlglot.parse_one("SELECT x FROM tbl") >>> tree.find(sqlglot.exp.Column).replace(sqlglot.exp.column("y")) Column( this=Identifier(this=y, quoted=False)) >>> tree.sql() 'SELECT y FROM tbl' Args: expression: new node Returns: The new expression or expressions. """ parent = self.parent if not parent or parent is expression: return expression key = self.arg_key if key: value = parent.args.get(key) if type(expression) is list and isinstance(value, Expr): # We are trying to replace an Expr with a list, so it's assumed that # the intention was to really replace the parent of this expression. if value.parent: value.parent.replace(expression) else: parent.set(key, expression, self.index) if expression is not self: self.parent = None self.arg_key = None self.index = None return expression def pop(self: E) -> E: """ Remove this expression from its AST. Returns: The popped expression. """ self.replace(None) return self def assert_is(self, type_: Type[E]) -> E: """ Assert that this `Expr` is an instance of `type_`. If it is NOT an instance of `type_`, this raises an assertion error. Otherwise, this returns this expression. Examples: This is useful for type security in chained expressions: >>> import sqlglot >>> sqlglot.parse_one("SELECT x from y").assert_is(sqlglot.exp.Select).select("z").sql() 'SELECT x, z FROM y' """ if not isinstance(self, type_): raise AssertionError(f"{self} is not {type_}.") return self def error_messages(self, args: t.Optional[Sequence[object]] = None) -> list[str]: """ Checks if this expression is valid (e.g. all mandatory args are set). Args: args: a sequence of values that were used to instantiate a Func expression. This is used to check that the provided arguments don't exceed the function argument limit. Returns: A list of error messages for all possible errors that were found. """ if UNITTEST: for k in self.args: if k not in self.arg_types: raise TypeError(f"Unexpected keyword: '{k}' for {self.__class__}") errors: t.Optional[list[str]] = None for k in self.required_args: v = self.args.get(k) if v is None or (isinstance(v, list) and not v): if errors is None: errors = [] errors.append(f"Required keyword: '{k}' missing for {self.__class__}") if ( args and isinstance(self, Func) and len(args) > len(self.arg_types) and not self.is_var_len_args ): if errors is None: errors = [] errors.append( f"The number of provided arguments ({len(args)}) is greater than " f"the maximum number of supported arguments ({len(self.arg_types)})" ) return errors or [] def and_( self, *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts: t.Any, ) -> Condition: """ AND this condition with one or multiple expressions. Example: >>> condition("x=1").and_("y=1").sql() 'x = 1 AND y = 1' Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy the involved expressions (only applies to Exprs). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. opts: other options to use to parse the input expressions. Returns: The new And condition. """ return and_(self, *expressions, dialect=dialect, copy=copy, wrap=wrap, **opts) def or_( self, *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts: t.Any, ) -> Condition: """ OR this condition with one or multiple expressions. Example: >>> condition("x=1").or_("y=1").sql() 'x = 1 OR y = 1' Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy the involved expressions (only applies to Exprs). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. opts: other options to use to parse the input expressions. Returns: The new Or condition. """ return or_(self, *expressions, dialect=dialect, copy=copy, wrap=wrap, **opts) def not_(self, copy: bool = True) -> Not: """ Wrap this condition with NOT. Example: >>> condition("x=1").not_().sql() 'NOT x = 1' Args: copy: whether to copy this object. Returns: The new Not instance. """ return not_(self, copy=copy) def update_positions( self: E, other: t.Optional[Token | Expr] = None, line: t.Optional[int] = None, col: t.Optional[int] = None, start: t.Optional[int] = None, end: t.Optional[int] = None, ) -> E: """ Update this expression with positions from a token or other expression. Args: other: a token or expression to update this expression with. line: the line number to use if other is None col: column number start: start char index end: end char index Returns: The updated expression. """ if isinstance(other, Token): meta = self.meta meta["line"] = other.line meta["col"] = other.col meta["start"] = other.start meta["end"] = other.end elif other is not None: other_meta = other._meta if other_meta: meta = self.meta for k in POSITION_META_KEYS: if k in other_meta: meta[k] = other_meta[k] else: meta = self.meta meta["line"] = line meta["col"] = col meta["start"] = start meta["end"] = end return self def as_( self, alias: str | Identifier, quoted: t.Optional[bool] = None, dialect: DialectType = None, copy: bool = True, **opts: t.Any, ) -> Expr: return alias_(self, alias, quoted=quoted, dialect=dialect, copy=copy, **opts) def _binop(self, klass: Type[E], other: t.Any, reverse: bool = False) -> E: this = self.copy() other = convert(other, copy=True) if not isinstance(this, klass) and not isinstance(other, klass): this = _wrap(this, Binary) other = _wrap(other, Binary) if reverse: return klass(this=other, expression=this) return klass(this=this, expression=other) def __getitem__(self, other: ExpOrStr | tuple[ExpOrStr, ...]) -> Bracket: return Bracket( this=self.copy(), expressions=[convert(e, copy=True) for e in ensure_list(other)] ) def __iter__(self) -> Iterator: if "expressions" in self.arg_types: return iter(self.args.get("expressions") or []) # We define this because __getitem__ converts Expr into an iterable, which is # problematic because one can hit infinite loops if they do "for x in some_expr: ..." # See: https://peps.python.org/pep-0234/ raise TypeError(f"'{self.__class__.__name__}' object is not iterable") def isin( self, *expressions: t.Any, query: t.Optional[ExpOrStr] = None, unnest: t.Optional[ExpOrStr] | Collection[ExpOrStr] = None, copy: bool = True, **opts, ) -> In: subquery = maybe_parse(query, copy=copy, **opts) if query else None if subquery and not subquery.is_subquery: subquery = subquery.subquery(copy=False) return In( this=maybe_copy(self, copy), expressions=[convert(e, copy=copy) for e in expressions], query=subquery, unnest=( _lazy_unnest( expressions=[ maybe_parse(t.cast(ExpOrStr, e), copy=copy, **opts) for e in ensure_list(unnest) ] ) if unnest else None ), ) def between( self, low: t.Any, high: t.Any, copy: bool = True, symmetric: t.Optional[bool] = None, **opts, ) -> Between: between = Between( this=maybe_copy(self, copy), low=convert(low, copy=copy, **opts), high=convert(high, copy=copy, **opts), ) if symmetric is not None: between.set("symmetric", symmetric) return between def is_(self, other: ExpOrStr) -> Is: return self._binop(Is, other) def like(self, other: ExpOrStr) -> Like: return self._binop(Like, other) def ilike(self, other: ExpOrStr) -> ILike: return self._binop(ILike, other) def eq(self, other: t.Any) -> EQ: return self._binop(EQ, other) def neq(self, other: t.Any) -> NEQ: return self._binop(NEQ, other) def rlike(self, other: ExpOrStr) -> RegexpLike: return self._binop(RegexpLike, other) def div(self, other: ExpOrStr, typed: bool = False, safe: bool = False) -> Div: div = self._binop(Div, other) div.set("typed", typed) div.set("safe", safe) return div def asc(self, nulls_first: bool = True) -> Ordered: return Ordered(this=self.copy(), nulls_first=nulls_first) def desc(self, nulls_first: bool = False) -> Ordered: return Ordered(this=self.copy(), desc=True, nulls_first=nulls_first) def __lt__(self, other: t.Any) -> LT: return self._binop(LT, other) def __le__(self, other: t.Any) -> LTE: return self._binop(LTE, other) def __gt__(self, other: t.Any) -> GT: return self._binop(GT, other) def __ge__(self, other: t.Any) -> GTE: return self._binop(GTE, other) def __add__(self, other: t.Any) -> Add: return self._binop(Add, other) def __radd__(self, other: t.Any) -> Add: return self._binop(Add, other, reverse=True) def __sub__(self, other: t.Any) -> Sub: return self._binop(Sub, other) def __rsub__(self, other: t.Any) -> Sub: return self._binop(Sub, other, reverse=True) def __mul__(self, other: t.Any) -> Mul: return self._binop(Mul, other) def __rmul__(self, other: t.Any) -> Mul: return self._binop(Mul, other, reverse=True) def __truediv__(self, other: t.Any) -> Div: return self._binop(Div, other) def __rtruediv__(self, other: t.Any) -> Div: return self._binop(Div, other, reverse=True) def __floordiv__(self, other: t.Any) -> IntDiv: return self._binop(IntDiv, other) def __rfloordiv__(self, other: t.Any) -> IntDiv: return self._binop(IntDiv, other, reverse=True) def __mod__(self, other: t.Any) -> Mod: return self._binop(Mod, other) def __rmod__(self, other: t.Any) -> Mod: return self._binop(Mod, other, reverse=True) def __pow__(self, other: t.Any) -> Pow: return self._binop(Pow, other) def __rpow__(self, other: t.Any) -> Pow: return self._binop(Pow, other, reverse=True) def __and__(self, other: t.Any) -> And: return self._binop(And, other) def __rand__(self, other: t.Any) -> And: return self._binop(And, other, reverse=True) def __or__(self, other: t.Any) -> Or: return self._binop(Or, other) def __ror__(self, other: t.Any) -> Or: return self._binop(Or, other, reverse=True) def __neg__(self) -> Neg: return Neg(this=_wrap(self.copy(), Binary)) def __invert__(self) -> Not: return not_(self.copy()) IntoType = t.Union[Type[Expr], Collection[Type[Expr]]] ExpOrStr = t.Union[int, str, Expr] @trait class Condition(Expr): """Logical conditions like x AND y, or simply x""" @trait class Predicate(Condition): """Relationships like x = y, x > 1, x >= y.""" class Cache(Expression): arg_types = { "this": True, "lazy": False, "options": False, "expression": False, } class Uncache(Expression): arg_types = {"this": True, "exists": False} class Refresh(Expression): arg_types = {"this": True, "kind": True} class LockingStatement(Expression): arg_types = {"this": True, "expression": True} @trait class ColumnConstraintKind(Expr): pass @trait class SubqueryPredicate(Predicate): pass class All(Expression, SubqueryPredicate): pass class Any(Expression, SubqueryPredicate): pass @trait class Binary(Condition): arg_types: t.ClassVar[t.Dict[str, bool]] = {"this": True, "expression": True} @property def left(self) -> Expr: return self.args["this"] @property def right(self) -> Expr: return self.args["expression"] @trait class Connector(Binary): pass @trait class Func(Condition): """ The base class for all function expressions. Attributes: is_var_len_args (bool): if set to True the last argument defined in arg_types will be treated as a variable length argument and the argument's value will be stored as a list. _sql_names (list): the SQL name (1st item in the list) and aliases (subsequent items) for this function expression. These values are used to map this node to a name during parsing as well as to provide the function's name during SQL string generation. By default the SQL name is set to the expression's class name transformed to snake case. """ is_var_len_args: t.ClassVar[bool] = False _sql_names: t.ClassVar[t.List[str]] = [] @classmethod def from_arg_list(cls, args): if cls.is_var_len_args: all_arg_keys = list(cls.arg_types) # If this function supports variable length argument treat the last argument as such. non_var_len_arg_keys = all_arg_keys[:-1] if cls.is_var_len_args else all_arg_keys num_non_var = len(non_var_len_arg_keys) args_dict = {arg_key: arg for arg, arg_key in zip(args, non_var_len_arg_keys)} args_dict[all_arg_keys[-1]] = args[num_non_var:] else: args_dict = {arg_key: arg for arg, arg_key in zip(args, cls.arg_types)} return cls(**args_dict) @classmethod def sql_names(cls): if cls is Func: raise NotImplementedError( "SQL name is only supported by concrete function implementations" ) if not cls._sql_names: return [camel_to_snake_case(cls.__name__)] return cls._sql_names @classmethod def sql_name(cls): sql_names = cls.sql_names() assert sql_names, f"Expected non-empty 'sql_names' for Func: {cls.__name__}." return sql_names[0] @classmethod def default_parser_mappings(cls): return {name: cls.from_arg_list for name in cls.sql_names()} @trait class AggFunc(Func): pass class Column(Expression, Condition): arg_types = {"this": True, "table": False, "db": False, "catalog": False, "join_mark": False} @property def table(self) -> str: return self.text("table") @property def db(self) -> str: return self.text("db") @property def catalog(self) -> str: return self.text("catalog") @property def output_name(self) -> str: return self.name @property def parts(self) -> t.List[Identifier | Star]: """Return the parts of a column in order catalog, db, table, name.""" return [ self.args[part] for part in ("catalog", "db", "table", "this") if self.args.get(part) ] def to_dot(self, include_dots: bool = True) -> Dot | Identifier | Star: """Converts the column into a dot expression.""" parts = self.parts parent = self.parent if include_dots: while isinstance(parent, Dot): parts.append(parent.expression) parent = parent.parent return Dot.build(deepcopy(parts)) if len(parts) > 1 else parts[0] class Literal(Expression, Condition): arg_types = {"this": True, "is_string": True} _hash_raw_args = True is_primitive = True @classmethod def number(cls, number) -> Literal | Neg: lit = cls(this=str(number), is_string=False) try: to_py = lit.to_py() if not isinstance(to_py, str) and to_py < 0: lit.set("this", str(abs(to_py))) return Neg(this=lit) except Exception: pass return lit @classmethod def string(cls, string) -> Literal: return cls(this=str(string), is_string=True) @property def output_name(self) -> str: return self.name def to_py(self) -> int | str | Decimal: if self.is_number: try: return int(self.this) except ValueError: return Decimal(self.this) return self.this class Var(Expression): is_primitive = True class WithinGroup(Expression): arg_types = {"this": True, "expression": False} class Pseudocolumn(Column): pass class Hint(Expression): arg_types = {"expressions": True} class JoinHint(Expression): arg_types = {"this": True, "expressions": True} class Identifier(Expression): arg_types = {"this": True, "quoted": False, "global_": False, "temporary": False} is_primitive = True _hash_raw_args = True @property def quoted(self) -> bool: return bool(self.args.get("quoted")) @property def output_name(self) -> str: return self.name class Opclass(Expression): arg_types = {"this": True, "expression": True} class Star(Expression): arg_types = {"except_": False, "replace": False, "rename": False} @property def name(self) -> str: return "*" @property def output_name(self) -> str: return self.name class Parameter(Expression, Condition): arg_types = {"this": True, "expression": False} class SessionParameter(Expression, Condition): arg_types = {"this": True, "kind": False} class Placeholder(Expression, Condition): arg_types = {"this": False, "kind": False, "widget": False, "jdbc": False} @property def name(self) -> str: return self.text("this") or "?" class Null(Expression, Condition): arg_types = {} @property def name(self) -> str: return "NULL" def to_py(self) -> t.Literal[None]: return None class Boolean(Expression, Condition): is_primitive = True def to_py(self) -> bool: return self.this class Dot(Expression, Binary): @property def is_star(self) -> bool: return self.expression.is_star @property def name(self) -> str: return self.expression.name @property def output_name(self) -> str: return self.name @classmethod def build(self, expressions: Sequence[Expr]) -> Dot: """Build a Dot object with a sequence of expressions.""" if len(expressions) < 2: raise ValueError("Dot requires >= 2 expressions.") return t.cast(Dot, reduce(lambda x, y: Dot(this=x, expression=y), expressions)) @property def parts(self) -> t.List[Expr]: """Return the parts of a table / column in order catalog, db, table.""" this, *parts = self.flatten() parts.reverse() for arg in COLUMN_PARTS: part = this.args.get(arg) if isinstance(part, Expr): parts.append(part) parts.reverse() return parts class Kwarg(Expression, Binary): """Kwarg in special functions like func(kwarg => y).""" class Alias(Expression): arg_types = {"this": True, "alias": False} @property def output_name(self) -> str: return self.alias class PivotAlias(Alias): pass class PivotAny(Expression): arg_types = {"this": False} class Aliases(Expression): arg_types = {"this": True, "expressions": True} @property def aliases(self) -> t.List[Expr]: return self.expressions class Bracket(Expression, Condition): # https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#array_subscript_operator arg_types = { "this": True, "expressions": True, "offset": False, "safe": False, "returns_list_for_maps": False, } @property def output_name(self) -> str: if len(self.expressions) == 1: return self.expressions[0].output_name return super().output_name class ForIn(Expression): arg_types = {"this": True, "expression": True} class IgnoreNulls(Expression): pass class RespectNulls(Expression): pass class HavingMax(Expression): arg_types = {"this": True, "expression": True, "max": True} class SafeFunc(Expression, Func): pass class Typeof(Expression, Func): pass class ParameterizedAgg(Expression, AggFunc): arg_types = {"this": True, "expressions": True, "params": True} class Anonymous(Expression, Func): arg_types = {"this": True, "expressions": False} is_var_len_args = True @property def name(self) -> str: return self.this if isinstance(self.this, str) else self.this.name class AnonymousAggFunc(Expression, AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class CombinedAggFunc(AnonymousAggFunc): arg_types = {"this": True, "expressions": False} class CombinedParameterizedAgg(ParameterizedAgg): arg_types = {"this": True, "expressions": True, "params": True} class HashAgg(Expression, AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class Hll(Expression, AggFunc): arg_types = {"this": True, "expressions": False} is_var_len_args = True class ApproxDistinct(Expression, AggFunc): arg_types = {"this": True, "accuracy": False} _sql_names = ["APPROX_DISTINCT", "APPROX_COUNT_DISTINCT"] class Slice(Expression): arg_types = {"this": False, "expression": False, "step": False} @trait class TimeUnit(Expr): """Automatically converts unit arg into a var.""" UNABBREVIATED_UNIT_NAME: t.ClassVar[t.Dict[str, str]] = { "D": "DAY", "H": "HOUR", "M": "MINUTE", "MS": "MILLISECOND", "NS": "NANOSECOND", "Q": "QUARTER", "S": "SECOND", "US": "MICROSECOND", "W": "WEEK", "Y": "YEAR", } VAR_LIKE: t.ClassVar[t.Tuple[Type[Expr], ...]] = (Column, Literal, Var) def __init__(self, **args: object) -> None: super().__init__(**args) unit = self.args.get("unit") if ( unit and type(unit) in TimeUnit.VAR_LIKE and not (isinstance(unit, Column) and len(unit.parts) != 1) ): unit = Var(this=(self.UNABBREVIATED_UNIT_NAME.get(unit.name) or unit.name).upper()) self.args["unit"] = unit self._set_parent("unit", unit) elif type(unit).__name__ == "Week": unit.set("this", Var(this=unit.this.name.upper())) # type: ignore[union-attr] @property def unit(self) -> t.Optional[Expr]: return self.args.get("unit") class _TimeUnit(Expression, TimeUnit): """Automatically converts unit arg into a var.""" arg_types = {"unit": False} @trait class IntervalOp(TimeUnit): def interval(self) -> "Interval": from sqlglot.expressions.datatypes import Interval expr = self.expression return Interval( this=expr.copy() if expr is not None else None, unit=self.unit.copy() if self.unit else None, ) class Filter(Expression): arg_types = {"this": True, "expression": True} class Check(Expression): pass class Ordered(Expression): arg_types = {"this": True, "desc": False, "nulls_first": True, "with_fill": False} @property def name(self) -> str: return self.this.name class Add(Expression, Binary): pass class BitwiseAnd(Expression, Binary): arg_types = {"this": True, "expression": True, "padside": False} class BitwiseLeftShift(Expression, Binary): arg_types = {"this": True, "expression": True, "requires_int128": False} class BitwiseOr(Expression, Binary): arg_types = {"this": True, "expression": True, "padside": False} class BitwiseRightShift(Expression, Binary): arg_types = {"this": True, "expression": True, "requires_int128": False} class BitwiseXor(Expression, Binary): arg_types = {"this": True, "expression": True, "padside": False} class Div(Expression, Binary): arg_types = {"this": True, "expression": True, "typed": False, "safe": False} class Overlaps(Expression, Binary): pass class ExtendsLeft(Expression, Binary): pass class ExtendsRight(Expression, Binary): pass class DPipe(Expression, Binary): arg_types = {"this": True, "expression": True, "safe": False} class EQ(Expression, Binary, Predicate): pass class NullSafeEQ(Expression, Binary, Predicate): pass class NullSafeNEQ(Expression, Binary, Predicate): pass class PropertyEQ(Expression, Binary): pass class Distance(Expression, Binary): pass class Escape(Expression, Binary): pass class Glob(Expression, Binary, Predicate): pass class GT(Expression, Binary, Predicate): pass class GTE(Expression, Binary, Predicate): pass class ILike(Expression, Binary, Predicate): pass class IntDiv(Expression, Binary): pass class Is(Expression, Binary, Predicate): pass class Like(Expression, Binary, Predicate): pass class Match(Expression, Binary, Predicate): pass class LT(Expression, Binary, Predicate): pass class LTE(Expression, Binary, Predicate): pass class Mod(Expression, Binary): pass class Mul(Expression, Binary): pass class NEQ(Expression, Binary, Predicate): pass class NestedJSONSelect(Expression, Binary): pass class Operator(Expression, Binary): arg_types = {"this": True, "operator": True, "expression": True} class SimilarTo(Expression, Binary, Predicate): pass class Sub(Expression, Binary): pass class Adjacent(Expression, Binary): pass class Unary(Expression, Condition): pass class BitwiseNot(Unary): pass class Not(Unary): pass class Paren(Unary): @property def output_name(self) -> str: return self.this.name class Neg(Unary): def to_py(self) -> int | Decimal: if self.is_number: return self.this.to_py() * -1 return super().to_py() class AtIndex(Expression): arg_types = {"this": True, "expression": True} class AtTimeZone(Expression): arg_types = {"this": True, "zone": True} class FromTimeZone(Expression): arg_types = {"this": True, "zone": True} class FormatPhrase(Expression): """Format override for a column in Teradata. Can be expanded to additional dialects as needed https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Data-Types-and-Literals/Data-Type-Formats-and-Format-Phrases/FORMAT """ arg_types = {"this": True, "format": True} class Between(Expression, Predicate): arg_types = {"this": True, "low": True, "high": True, "symmetric": False} class Distinct(Expression): arg_types = {"expressions": False, "on": False} class In(Expression, Predicate): arg_types = { "this": True, "expressions": False, "query": False, "unnest": False, "field": False, "is_global": False, } class And(Expression, Connector, Func): pass class Or(Expression, Connector, Func): pass class Xor(Expression, Connector, Func): arg_types = {"this": False, "expression": False, "expressions": False, "round_input": False} is_var_len_args = True class Pow(Expression, Binary, Func): _sql_names = ["POWER", "POW"] class RegexpLike(Expression, Binary, Func): arg_types = {"this": True, "expression": True, "flag": False, "full_match": False} def not_(expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts) -> Not: """ Wrap a condition with a NOT operator. Example: >>> not_("this_suit='black'").sql() "NOT this_suit = 'black'" Args: expression: the SQL code string to parse. If an Expr instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy the expression or not. **opts: other options to use to parse the input expressions. Returns: The new condition. """ this = condition( expression, dialect=dialect, copy=copy, **opts, ) return Not(this=_wrap(this, Connector)) def _lazy_unnest(**kwargs: object) -> "Expr": from sqlglot.expressions.array import Unnest return Unnest(**kwargs) def convert(value: t.Any, copy: bool = False) -> Expr: """Convert a python value into an expression object. Raises an error if a conversion is not possible. Args: value: A python object. copy: Whether to copy `value` (only applies to Exprs and collections). Returns: The equivalent expression object. """ if isinstance(value, Expr): return maybe_copy(value, copy) if isinstance(value, str): return Literal.string(value) if isinstance(value, bool): return Boolean(this=value) if value is None or (isinstance(value, float) and math.isnan(value)): return Null() if isinstance(value, numbers.Number): return Literal.number(value) if isinstance(value, bytes): from sqlglot.expressions.query import HexString as _HexString return _HexString(this=value.hex()) if isinstance(value, datetime.datetime): datetime_literal = Literal.string(value.isoformat(sep=" ")) tz = None if value.tzinfo: # this works for zoneinfo.ZoneInfo, pytz.timezone and datetime.datetime.utc to return IANA timezone names like "America/Los_Angeles" # instead of abbreviations like "PDT". This is for consistency with other timezone handling functions in SQLGlot tz = Literal.string(str(value.tzinfo)) from sqlglot.expressions.temporal import TimeStrToTime as _TimeStrToTime return _TimeStrToTime(this=datetime_literal, zone=tz) if isinstance(value, datetime.date): date_literal = Literal.string(value.strftime("%Y-%m-%d")) from sqlglot.expressions.temporal import DateStrToDate as _DateStrToDate return _DateStrToDate(this=date_literal) if isinstance(value, datetime.time): time_literal = Literal.string(value.isoformat()) from sqlglot.expressions.temporal import TsOrDsToTime as _TsOrDsToTime return _TsOrDsToTime(this=time_literal) if isinstance(value, tuple): if hasattr(value, "_fields"): from sqlglot.expressions.array import Struct as _Struct return _Struct( expressions=[ PropertyEQ( this=to_identifier(k), expression=convert(getattr(value, k), copy=copy) ) for k in value._fields ] ) from sqlglot.expressions.query import Tuple as _Tuple return _Tuple(expressions=[convert(v, copy=copy) for v in value]) if isinstance(value, list): from sqlglot.expressions.array import Array as _Array return _Array(expressions=[convert(v, copy=copy) for v in value]) if isinstance(value, dict): from sqlglot.expressions.array import Array as _Array, Map as _Map return _Map( keys=_Array(expressions=[convert(k, copy=copy) for k in value]), values=_Array(expressions=[convert(v, copy=copy) for v in value.values()]), ) if hasattr(value, "__dict__"): from sqlglot.expressions.array import Struct as _Struct return _Struct( expressions=[ PropertyEQ(this=to_identifier(k), expression=convert(v, copy=copy)) for k, v in value.__dict__.items() ] ) raise ValueError(f"Cannot convert {value}") QUERY_MODIFIERS = { "match": False, "laterals": False, "joins": False, "connect": False, "pivots": False, "prewhere": False, "where": False, "group": False, "having": False, "qualify": False, "windows": False, "distribute": False, "sort": False, "cluster": False, "order": False, "limit": False, "offset": False, "locks": False, "sample": False, "settings": False, "format": False, "options": False, } TIMESTAMP_PARTS = { "year": False, "month": False, "day": False, "hour": False, "min": False, "sec": False, "nano": False, } @t.overload def maybe_parse( sql_or_expression: ExpOrStr, *, into: Type[E], dialect: DialectType = None, prefix: t.Optional[str] = None, copy: bool = False, **opts, ) -> E: ... @t.overload def maybe_parse( sql_or_expression: int | str | E, *, into: t.Optional[IntoType] = None, dialect: DialectType = None, prefix: t.Optional[str] = None, copy: bool = False, **opts, ) -> E: ... def maybe_parse( sql_or_expression: ExpOrStr, *, into: t.Optional[IntoType] = None, dialect: DialectType = None, prefix: t.Optional[str] = None, copy: bool = False, **opts: t.Any, ) -> Expr: """Gracefully handle a possible string or expression. Example: >>> maybe_parse("1") Literal(this=1, is_string=False) >>> maybe_parse(to_identifier("x")) Identifier(this=x, quoted=False) Args: sql_or_expression: the SQL code string or an expression into: the SQLGlot Expr to parse into dialect: the dialect used to parse the input expressions (in the case that an input expression is a SQL string). prefix: a string to prefix the sql with before it gets parsed (automatically includes a space) copy: whether to copy the expression. **opts: other options to use to parse the input expressions (again, in the case that an input expression is a SQL string). Returns: Expr: the parsed or given expression. """ if isinstance(sql_or_expression, Expr): if copy: return sql_or_expression.copy() return sql_or_expression if sql_or_expression is None: raise ParseError("SQL cannot be None") import sqlglot sql = str(sql_or_expression) if prefix: sql = f"{prefix} {sql}" return sqlglot.parse_one(sql, read=dialect, into=into, **opts) @t.overload def maybe_copy(instance: None, copy: bool = True) -> None: ... @t.overload def maybe_copy(instance: E, copy: bool = True) -> E: ... def maybe_copy(instance, copy=True): return instance.copy() if copy and instance else instance def _to_s(node: t.Any, verbose: bool = False, level: int = 0, repr_str: bool = False) -> str: """Generate a textual representation of an Expr tree""" indent = "\n" + (" " * (level + 1)) delim = f",{indent}" if isinstance(node, Expr): args = {k: v for k, v in node.args.items() if (v is not None and v != []) or verbose} if (node.type or verbose) and type(node).__name__ != "DataType": args["_type"] = node.type if node.comments or verbose: args["_comments"] = node.comments if verbose: args["_id"] = id(node) # Inline leaves for a more compact representation if node.is_leaf(): indent = "" delim = ", " repr_str = node.is_string or (isinstance(node, Identifier) and node.quoted) items = delim.join( [f"{k}={_to_s(v, verbose, level + 1, repr_str=repr_str)}" for k, v in args.items()] ) return f"{node.__class__.__name__}({indent}{items})" if isinstance(node, list): items = delim.join(_to_s(i, verbose, level + 1) for i in node) items = f"{indent}{items}" if items else "" return f"[{items}]" # We use the representation of the string to avoid stripping out important whitespace if repr_str and isinstance(node, str): node = repr(node) # Indent multiline strings to match the current level return indent.join(textwrap.dedent(str(node).strip("\n")).splitlines()) def _is_wrong_expression(expression, into): return isinstance(expression, Expr) and not isinstance(expression, into) def _apply_builder( expression, instance, arg, copy=True, prefix=None, into=None, dialect=None, into_arg="this", **opts, ): if _is_wrong_expression(expression, into): expression = into(**{into_arg: expression}) instance = maybe_copy(instance, copy) expression = maybe_parse( sql_or_expression=expression, prefix=prefix, into=into, dialect=dialect, **opts, ) instance.set(arg, expression) return instance def _apply_child_list_builder( *expressions, instance, arg, append=True, copy=True, prefix=None, into=None, dialect=None, properties=None, **opts, ): instance = maybe_copy(instance, copy) parsed = [] properties = {} if properties is None else properties for expression in expressions: if expression is not None: if _is_wrong_expression(expression, into): expression = into(expressions=[expression]) expression = maybe_parse( expression, into=into, dialect=dialect, prefix=prefix, **opts, ) for k, v in expression.args.items(): if k == "expressions": parsed.extend(v) else: properties[k] = v existing = instance.args.get(arg) if append and existing: parsed = existing.expressions + parsed child = into(expressions=parsed) for k, v in properties.items(): child.set(k, v) instance.set(arg, child) return instance def _apply_list_builder( *expressions, instance, arg, append=True, copy=True, prefix=None, into=None, dialect=None, **opts, ): inst = maybe_copy(instance, copy) parsed = [ maybe_parse( sql_or_expression=expression, into=into, prefix=prefix, dialect=dialect, **opts, ) for expression in expressions if expression is not None ] existing_expressions = inst.args.get(arg) if append and existing_expressions: parsed = existing_expressions + parsed inst.set(arg, parsed) return inst def _apply_conjunction_builder( *expressions, instance, arg, into=None, append=True, copy=True, dialect=None, **opts, ): filtered = [exp for exp in expressions if exp is not None and exp != ""] if not filtered: return instance inst = maybe_copy(instance, copy) existing = inst.args.get(arg) if append and existing is not None: filtered = [existing.this if into else existing] + filtered node = and_(*filtered, dialect=dialect, copy=copy, **opts) inst.set(arg, into(this=node) if into else node) return inst def _combine( expressions: Sequence[t.Optional[ExpOrStr]], operator: t.Any, dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Expr: conditions = [ condition(expression, dialect=dialect, copy=copy, **opts) for expression in expressions if expression is not None ] this, *rest = conditions if rest and wrap: this = _wrap(this, Connector) for expression in rest: this = operator(this=this, expression=_wrap(expression, Connector) if wrap else expression) return this @t.overload def _wrap(expression: None, kind: Type[Expr]) -> None: ... @t.overload def _wrap(expression: E, kind: Type[Expr]) -> E | Paren: ... def _wrap(expression: t.Optional[E], kind: Type[Expr]) -> t.Optional[E] | Paren: return Paren(this=expression) if isinstance(expression, kind) else expression def _apply_set_operation( *expressions: ExpOrStr, set_operation: Type, distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> t.Any: return reduce( lambda x, y: set_operation(this=x, expression=y, distinct=distinct, **opts), (maybe_parse(e, dialect=dialect, copy=copy, **opts) for e in expressions), ) SAFE_IDENTIFIER_RE: t.Pattern[str] = re.compile(r"^[_a-zA-Z][\w]*$") @t.overload def to_identifier(name: None, quoted: t.Optional[bool] = None, copy: bool = True) -> None: ... @t.overload def to_identifier( name: int | str | Identifier, quoted: t.Optional[bool] = None, copy: bool = True ) -> Identifier: ... def to_identifier(name, quoted=None, copy=True): """Builds an identifier. Args: name: The name to turn into an identifier. quoted: Whether to force quote the identifier. copy: Whether to copy name if it's an Identifier. Returns: The identifier ast node. """ if name is None: return None if isinstance(name, Identifier): identifier = maybe_copy(name, copy) elif isinstance(name, str): identifier = Identifier( this=name, quoted=not SAFE_IDENTIFIER_RE.match(name) if quoted is None else quoted, ) else: raise ValueError(f"Name needs to be a string or an Identifier, got: {name.__class__}") return identifier def condition(expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts) -> Expr: """ Initialize a logical condition expression. Example: >>> condition("x=1").sql() 'x = 1' This is helpful for composing larger logical syntax trees: >>> where = condition("x=1") >>> where = where.and_("y=1") >>> where.sql() 'x = 1 AND y = 1' Args: *expression: the SQL code string to parse. If an Expr instance is passed, this is used as-is. dialect: the dialect used to parse the input expression (in the case that the input expression is a SQL string). copy: Whether to copy `expression` (only applies to expressions). **opts: other options to use to parse the input expressions (again, in the case that the input expression is a SQL string). Returns: The new Condition instance """ return maybe_parse( expression, into=Condition, dialect=dialect, copy=copy, **opts, ) def and_( *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ Combine multiple conditions with an AND logical operator. Example: >>> and_("x=1", and_("y=1", "z=1")).sql() 'x = 1 AND (y = 1 AND z = 1)' Args: *expressions: the SQL code strings to parse. If an Expr instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy `expressions` (only applies to Exprs). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. **opts: other options to use to parse the input expressions. Returns: The new condition """ return t.cast(Condition, _combine(expressions, And, dialect, copy=copy, wrap=wrap, **opts)) def or_( *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ Combine multiple conditions with an OR logical operator. Example: >>> or_("x=1", or_("y=1", "z=1")).sql() 'x = 1 OR (y = 1 OR z = 1)' Args: *expressions: the SQL code strings to parse. If an Expr instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy `expressions` (only applies to Exprs). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. **opts: other options to use to parse the input expressions. Returns: The new condition """ return t.cast(Condition, _combine(expressions, Or, dialect, copy=copy, wrap=wrap, **opts)) def xor( *expressions: t.Optional[ExpOrStr], dialect: DialectType = None, copy: bool = True, wrap: bool = True, **opts, ) -> Condition: """ Combine multiple conditions with an XOR logical operator. Example: >>> xor("x=1", xor("y=1", "z=1")).sql() 'x = 1 XOR (y = 1 XOR z = 1)' Args: *expressions: the SQL code strings to parse. If an Expr instance is passed, this is used as-is. dialect: the dialect used to parse the input expression. copy: whether to copy `expressions` (only applies to Exprs). wrap: whether to wrap the operands in `Paren`s. This is true by default to avoid precedence issues, but can be turned off when the produced AST is too deep and causes recursion-related issues. **opts: other options to use to parse the input expressions. Returns: The new condition """ return t.cast(Condition, _combine(expressions, Xor, dialect, copy=copy, wrap=wrap, **opts)) def paren(expression: ExpOrStr, copy: bool = True) -> Paren: """ Wrap an expression in parentheses. Example: >>> paren("5 + 3").sql() '(5 + 3)' Args: expression: the SQL code string to parse. If an Expr instance is passed, this is used as-is. copy: whether to copy the expression or not. Returns: The wrapped expression. """ return Paren(this=maybe_parse(expression, copy=copy)) def alias_( expression: ExpOrStr, alias: t.Optional[str | Identifier], table: bool | Sequence[str | Identifier] = False, quoted: t.Optional[bool] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Expr: """Create an Alias expression. Example: >>> alias_('foo', 'bar').sql() 'foo AS bar' >>> alias_('(select 1, 2)', 'bar', table=['a', 'b']).sql() '(SELECT 1, 2) AS bar(a, b)' Args: expression: the SQL code strings to parse. If an Expr instance is passed, this is used as-is. alias: the alias name to use. If the name has special characters it is quoted. table: Whether to create a table alias, can also be a list of columns. quoted: whether to quote the alias dialect: the dialect used to parse the input expression. copy: Whether to copy the expression. **opts: other options to use to parse the input expressions. Returns: Alias: the aliased expression """ exp = maybe_parse(expression, dialect=dialect, copy=copy, **opts) alias = to_identifier(alias, quoted=quoted) if table: from sqlglot.expressions.query import TableAlias as _TableAlias table_alias = _TableAlias(this=alias) exp.set("alias", table_alias) if not isinstance(table, bool): for column in table: table_alias.append("columns", to_identifier(column, quoted=quoted)) return exp # We don't set the "alias" arg for Window expressions, because that would add an IDENTIFIER node in # the AST, representing a "named_window" [1] construct (eg. bigquery). What we want is an ALIAS node # for the complete Window expression. # # [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/window-function-calls if "alias" in exp.arg_types and type(exp).__name__ != "Window": exp.set("alias", alias) return exp return Alias(this=exp, alias=alias) @t.overload def column( col: str | Identifier, table: t.Optional[str | Identifier] = None, db: t.Optional[str | Identifier] = None, catalog: t.Optional[str | Identifier] = None, *, fields: Collection[t.Union[str, Identifier]], quoted: t.Optional[bool] = None, copy: bool = True, ) -> Dot: pass @t.overload def column( col: str | Identifier | Star, table: t.Optional[str | Identifier] = None, db: t.Optional[str | Identifier] = None, catalog: t.Optional[str | Identifier] = None, *, fields: t.Literal[None] = None, quoted: t.Optional[bool] = None, copy: bool = True, ) -> Column: pass def column( col, table=None, db=None, catalog=None, *, fields=None, quoted=None, copy=True, ): """ Build a Column. Args: col: Column name. table: Table name. db: Database name. catalog: Catalog name. fields: Additional fields using dots. quoted: Whether to force quotes on the column's identifiers. copy: Whether to copy identifiers if passed in. Returns: The new Column instance. """ if not isinstance(col, Star): col = to_identifier(col, quoted=quoted, copy=copy) this = Column( this=col, table=to_identifier(table, quoted=quoted, copy=copy), db=to_identifier(db, quoted=quoted, copy=copy), catalog=to_identifier(catalog, quoted=quoted, copy=copy), ) if fields: this = Dot.build( (this, *(to_identifier(field, quoted=quoted, copy=copy) for field in fields)) ) return this ================================================ FILE: sqlglot/expressions/datatypes.py ================================================ """sqlglot expressions datatypes.""" from __future__ import annotations import typing as t from enum import auto from sqlglot.helper import AutoName from sqlglot.errors import ErrorLevel, ParseError from sqlglot.expressions.core import ( Expression, _TimeUnit, Identifier, Dot, maybe_copy, ) if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType class DataTypeParam(Expression): arg_types = {"this": True, "expression": False} @property def name(self) -> str: return self.this.name class DType(AutoName): ARRAY = auto() AGGREGATEFUNCTION = auto() SIMPLEAGGREGATEFUNCTION = auto() BIGDECIMAL = auto() BIGINT = auto() BIGNUM = auto() BIGSERIAL = auto() BINARY = auto() BIT = auto() BLOB = auto() BOOLEAN = auto() BPCHAR = auto() CHAR = auto() CHARACTER_SET = auto() DATE = auto() DATE32 = auto() DATEMULTIRANGE = auto() DATERANGE = auto() DATETIME = auto() DATETIME2 = auto() DATETIME64 = auto() DECIMAL = auto() DECIMAL32 = auto() DECIMAL64 = auto() DECIMAL128 = auto() DECIMAL256 = auto() DECFLOAT = auto() DOUBLE = auto() DYNAMIC = auto() ENUM = auto() ENUM8 = auto() ENUM16 = auto() FILE = auto() FIXEDSTRING = auto() FLOAT = auto() GEOGRAPHY = auto() GEOGRAPHYPOINT = auto() GEOMETRY = auto() POINT = auto() RING = auto() LINESTRING = auto() MULTILINESTRING = auto() POLYGON = auto() MULTIPOLYGON = auto() HLLSKETCH = auto() HSTORE = auto() IMAGE = auto() INET = auto() INT = auto() INT128 = auto() INT256 = auto() INT4MULTIRANGE = auto() INT4RANGE = auto() INT8MULTIRANGE = auto() INT8RANGE = auto() INTERVAL = auto() IPADDRESS = auto() IPPREFIX = auto() IPV4 = auto() IPV6 = auto() JSON = auto() JSONB = auto() LIST = auto() LONGBLOB = auto() LONGTEXT = auto() LOWCARDINALITY = auto() MAP = auto() MEDIUMBLOB = auto() MEDIUMINT = auto() MEDIUMTEXT = auto() MONEY = auto() NAME = auto() NCHAR = auto() NESTED = auto() NOTHING = auto() NULL = auto() NUMMULTIRANGE = auto() NUMRANGE = auto() NVARCHAR = auto() OBJECT = auto() RANGE = auto() ROWVERSION = auto() SERIAL = auto() SET = auto() SMALLDATETIME = auto() SMALLINT = auto() SMALLMONEY = auto() SMALLSERIAL = auto() STRUCT = auto() SUPER = auto() TEXT = auto() TINYBLOB = auto() TINYTEXT = auto() TIME = auto() TIMETZ = auto() TIME_NS = auto() TIMESTAMP = auto() TIMESTAMPNTZ = auto() TIMESTAMPLTZ = auto() TIMESTAMPTZ = auto() TIMESTAMP_S = auto() TIMESTAMP_MS = auto() TIMESTAMP_NS = auto() TINYINT = auto() TSMULTIRANGE = auto() TSRANGE = auto() TSTZMULTIRANGE = auto() TSTZRANGE = auto() UBIGINT = auto() UINT = auto() UINT128 = auto() UINT256 = auto() UMEDIUMINT = auto() UDECIMAL = auto() UDOUBLE = auto() UNION = auto() UNKNOWN = auto() # Sentinel value, useful for type annotation USERDEFINED = "USER-DEFINED" USMALLINT = auto() UTINYINT = auto() UUID = auto() VARBINARY = auto() VARCHAR = auto() VARIANT = auto() VECTOR = auto() XML = auto() YEAR = auto() TDIGEST = auto() class DataType(Expression): arg_types = { "this": True, "expressions": False, "nested": False, "values": False, "kind": False, "nullable": False, } Type: t.ClassVar[t.Type[DType]] = DType STRUCT_TYPES: t.ClassVar[t.Set[DType]] = { DType.FILE, DType.NESTED, DType.OBJECT, DType.STRUCT, DType.UNION, } ARRAY_TYPES: t.ClassVar[t.Set[DType]] = { DType.ARRAY, DType.LIST, } NESTED_TYPES: t.ClassVar[t.Set[DType]] = { DType.FILE, DType.NESTED, DType.OBJECT, DType.STRUCT, DType.UNION, DType.ARRAY, DType.LIST, DType.MAP, } TEXT_TYPES: t.ClassVar[t.Set[DType]] = { DType.CHAR, DType.NCHAR, DType.NVARCHAR, DType.TEXT, DType.VARCHAR, DType.NAME, } SIGNED_INTEGER_TYPES: t.ClassVar[t.Set[DType]] = { DType.BIGINT, DType.INT, DType.INT128, DType.INT256, DType.MEDIUMINT, DType.SMALLINT, DType.TINYINT, } UNSIGNED_INTEGER_TYPES: t.ClassVar[t.Set[DType]] = { DType.UBIGINT, DType.UINT, DType.UINT128, DType.UINT256, DType.UMEDIUMINT, DType.USMALLINT, DType.UTINYINT, } INTEGER_TYPES: t.ClassVar[t.Set[DType]] = { DType.BIGINT, DType.INT, DType.INT128, DType.INT256, DType.MEDIUMINT, DType.SMALLINT, DType.TINYINT, DType.UBIGINT, DType.UINT, DType.UINT128, DType.UINT256, DType.UMEDIUMINT, DType.USMALLINT, DType.UTINYINT, DType.BIT, } FLOAT_TYPES: t.ClassVar[t.Set[DType]] = { DType.DOUBLE, DType.FLOAT, } REAL_TYPES: t.ClassVar[t.Set[DType]] = { DType.DOUBLE, DType.FLOAT, DType.BIGDECIMAL, DType.DECIMAL, DType.DECIMAL32, DType.DECIMAL64, DType.DECIMAL128, DType.DECIMAL256, DType.DECFLOAT, DType.MONEY, DType.SMALLMONEY, DType.UDECIMAL, DType.UDOUBLE, } NUMERIC_TYPES: t.ClassVar[t.Set[DType]] = { DType.BIGINT, DType.INT, DType.INT128, DType.INT256, DType.MEDIUMINT, DType.SMALLINT, DType.TINYINT, DType.UBIGINT, DType.UINT, DType.UINT128, DType.UINT256, DType.UMEDIUMINT, DType.USMALLINT, DType.UTINYINT, DType.BIT, DType.DOUBLE, DType.FLOAT, DType.BIGDECIMAL, DType.DECIMAL, DType.DECIMAL32, DType.DECIMAL64, DType.DECIMAL128, DType.DECIMAL256, DType.DECFLOAT, DType.MONEY, DType.SMALLMONEY, DType.UDECIMAL, DType.UDOUBLE, } TEMPORAL_TYPES: t.ClassVar[t.Set[DType]] = { DType.DATE, DType.DATE32, DType.DATETIME, DType.DATETIME2, DType.DATETIME64, DType.SMALLDATETIME, DType.TIME, DType.TIMESTAMP, DType.TIMESTAMPNTZ, DType.TIMESTAMPLTZ, DType.TIMESTAMPTZ, DType.TIMESTAMP_MS, DType.TIMESTAMP_NS, DType.TIMESTAMP_S, DType.TIMETZ, } @classmethod def build( cls, dtype: DATA_TYPE, dialect: DialectType = None, udt: bool = False, copy: bool = True, **kwargs, ) -> DataType: """ Constructs a DataType object. Args: dtype: the data type of interest. dialect: the dialect to use for parsing `dtype`, in case it's a string. udt: when set to True, `dtype` will be used as-is if it can't be parsed into a DataType, thus creating a user-defined type. copy: whether to copy the data type. kwargs: additional arguments to pass in the constructor of DataType. Returns: The constructed DataType object. """ from sqlglot import parse_one if isinstance(dtype, str): if dtype.upper() == "UNKNOWN": return DataType(this=DType.UNKNOWN, **kwargs) try: data_type_exp = parse_one( dtype, read=dialect, into=DataType, error_level=ErrorLevel.IGNORE ) except ParseError: if udt: return DataType(this=DType.USERDEFINED, kind=dtype, **kwargs) raise elif isinstance(dtype, (Identifier, Dot)) and udt: return DataType(this=DType.USERDEFINED, kind=dtype, **kwargs) elif isinstance(dtype, DType): data_type_exp = DataType(this=dtype) elif isinstance(dtype, DataType): return maybe_copy(dtype, copy) else: raise ValueError(f"Invalid data type: {type(dtype)}. Expected str or DType") if kwargs: for k, v in kwargs.items(): data_type_exp.set(k, v) return data_type_exp def is_type(self, *dtypes: DATA_TYPE, check_nullable: bool = False) -> bool: """ Checks whether this DataType matches one of the provided data types. Nested types or precision will be compared using "structural equivalence" semantics, so e.g. array != array. Args: dtypes: the data types to compare this DataType to. check_nullable: whether to take the NULLABLE type constructor into account for the comparison. If false, it means that NULLABLE is equivalent to INT. Returns: True, if and only if there is a type in `dtypes` which is equal to this DataType. """ self_is_nullable = self.args.get("nullable") for dtype in dtypes: other_type = DataType.build(dtype, copy=False, udt=True) other_is_nullable = other_type.args.get("nullable") if ( other_type.expressions or (check_nullable and (self_is_nullable or other_is_nullable)) or self.this == DType.USERDEFINED or other_type.this == DType.USERDEFINED ): matches = self == other_type else: matches = self.this == other_type.this if matches: return True return False class PseudoType(DataType): arg_types = {"this": True} class ObjectIdentifier(DataType): arg_types = {"this": True} class IntervalSpan(DataType): arg_types = {"this": True, "expression": True} class Interval(_TimeUnit): arg_types = {"this": False, "unit": False} DATA_TYPE = t.Union[str, Identifier, Dot, DataType, DType] ================================================ FILE: sqlglot/expressions/ddl.py ================================================ """sqlglot expressions DDL.""" from __future__ import annotations import typing as t from sqlglot.helper import trait from sqlglot.expressions.core import Expression, Expr, Func from sqlglot.expressions.query import Query, Selectable if t.TYPE_CHECKING: from sqlglot.expressions.query import CTE @trait class DDL(Selectable): @property def ctes(self) -> t.List[CTE]: """Returns a list of all the CTEs attached to this statement.""" with_ = self.args.get("with_") return with_.expressions if with_ else [] @property def selects(self) -> t.List[Expr]: """If this statement contains a query (e.g. a CTAS), this returns the query's projections.""" expression = self.expression return expression.selects if isinstance(expression, Query) else [] @property def named_selects(self) -> t.List[str]: """ If this statement contains a query (e.g. a CTAS), this returns the output names of the query's projections. """ expression = self.expression return expression.named_selects if isinstance(expression, Query) else [] class Create(Expression, DDL): arg_types = { "with_": False, "this": True, "kind": True, "expression": False, "exists": False, "properties": False, "replace": False, "refresh": False, "unique": False, "indexes": False, "no_schema_binding": False, "begin": False, "clone": False, "concurrently": False, "clustered": False, } @property def kind(self) -> t.Optional[str]: kind = self.args.get("kind") return kind and kind.upper() class SequenceProperties(Expression): arg_types = { "increment": False, "minvalue": False, "maxvalue": False, "cache": False, "start": False, "owned": False, "options": False, } class TriggerProperties(Expression): arg_types = { "table": True, "timing": True, "events": True, "execute": True, "constraint": False, "referenced_table": False, "deferrable": False, "initially": False, "referencing": False, "for_each": False, "when": False, } class TriggerExecute(Expression): pass class TriggerEvent(Expression): arg_types = {"this": True, "columns": False} class TriggerReferencing(Expression): arg_types = {"old": False, "new": False} class TruncateTable(Expression): arg_types = { "expressions": True, "is_database": False, "exists": False, "only": False, "cluster": False, "identity": False, "option": False, "partition": False, } class Clone(Expression): arg_types = {"this": True, "shallow": False, "copy": False} class Describe(Expression): arg_types = { "this": True, "style": False, "kind": False, "properties": False, "expressions": False, "partition": False, "format": False, "as_json": False, } class Attach(Expression): arg_types = {"this": True, "exists": False, "expressions": False} class Detach(Expression): arg_types = { "this": True, "kind": False, "exists": False, "cluster": False, "permanent": False, "sync": False, } class Install(Expression): arg_types = {"this": True, "from_": False, "force": False} class Summarize(Expression): arg_types = {"this": True, "table": False} class Kill(Expression): arg_types = {"this": True, "kind": False} class Pragma(Expression): pass class Declare(Expression): arg_types = {"expressions": True, "replace": False} class DeclareItem(Expression): arg_types = {"this": True, "kind": False, "default": False} class Set(Expression): arg_types = {"expressions": False, "unset": False, "tag": False} class Heredoc(Expression): arg_types = {"this": True, "tag": False} class SetItem(Expression): arg_types = { "this": False, "expressions": False, "kind": False, "collate": False, # MySQL SET NAMES statement "global_": False, } class Show(Expression): arg_types = { "this": True, "history": False, "terse": False, "target": False, "offset": False, "starts_with": False, "limit": False, "from_": False, "like": False, "where": False, "db": False, "scope": False, "scope_kind": False, "full": False, "mutex": False, "query": False, "channel": False, "global_": False, "log": False, "position": False, "types": False, "privileges": False, "for_table": False, "for_group": False, "for_user": False, "for_role": False, "into_outfile": False, "json": False, } class UserDefinedFunction(Expression): arg_types = {"this": True, "expressions": False, "wrapped": False} class CharacterSet(Expression): arg_types = {"this": True, "default": False} class AlterColumn(Expression): arg_types = { "this": True, "dtype": False, "collate": False, "using": False, "default": False, "drop": False, "comment": False, "allow_null": False, "visible": False, "rename_to": False, } class AlterIndex(Expression): arg_types = {"this": True, "visible": True} class AlterDistStyle(Expression): pass class AlterSortKey(Expression): arg_types = {"this": False, "expressions": False, "compound": False} class AlterSet(Expression): arg_types = { "expressions": False, "option": False, "tablespace": False, "access_method": False, "file_format": False, "copy_options": False, "tag": False, "location": False, "serde": False, } class RenameColumn(Expression): arg_types = {"this": True, "to": True, "exists": False} class AlterRename(Expression): pass class AlterModifySqlSecurity(Expression): arg_types = {"expressions": True} class SwapTable(Expression): pass class Comment(Expression): arg_types = { "this": True, "kind": True, "expression": True, "exists": False, "materialized": False, } class Comprehension(Expression): arg_types = { "this": True, "expression": True, "position": False, "iterator": True, "condition": False, } class MergeTreeTTLAction(Expression): arg_types = { "this": True, "delete": False, "recompress": False, "to_disk": False, "to_volume": False, } class MergeTreeTTL(Expression): arg_types = { "expressions": True, "where": False, "group": False, "aggregates": False, } class Drop(Expression): arg_types = { "this": False, "kind": False, "expressions": False, "exists": False, "temporary": False, "materialized": False, "cascade": False, "constraints": False, "purge": False, "cluster": False, "concurrently": False, "sync": False, } @property def kind(self) -> t.Optional[str]: kind = self.args.get("kind") return kind and kind.upper() class Command(Expression): arg_types = {"this": True, "expression": False} class Transaction(Expression): arg_types = {"this": False, "modes": False, "mark": False} class Commit(Expression): arg_types = {"chain": False, "this": False, "durability": False} class Rollback(Expression): arg_types = {"savepoint": False, "this": False} class Alter(Expression): arg_types = { "this": False, "kind": True, "actions": True, "exists": False, "only": False, "options": False, "cluster": False, "not_valid": False, "check": False, "cascade": False, } @property def kind(self) -> t.Optional[str]: kind = self.args.get("kind") return kind and kind.upper() @property def actions(self) -> t.List[Expr]: return self.args.get("actions") or [] class AlterSession(Expression): arg_types = {"expressions": True, "unset": False} class Use(Expression): arg_types = {"this": False, "expressions": False, "kind": False} class NextValueFor(Expression, Func): arg_types = {"this": True, "order": False} class Execute(Expression): arg_types = {"this": True, "expressions": False} @property def name(self) -> str: return self.this.name class ExecuteSql(Execute): pass ================================================ FILE: sqlglot/expressions/dml.py ================================================ """sqlglot expressions DML.""" from __future__ import annotations import typing as t from sqlglot.helper import trait from sqlglot.expressions.core import ( Expr, Expression, _apply_builder, _apply_list_builder, maybe_copy, _apply_conjunction_builder, ) from sqlglot.expressions.ddl import DDL from sqlglot.expressions.query import ( Table, Where, From, _apply_cte_builder, ) if t.TYPE_CHECKING: from typing_extensions import Self from sqlglot.dialects.dialect import DialectType from sqlglot.expressions.core import ExpOrStr @trait class DML(Expr): """Trait for data manipulation language statements.""" def returning( self, expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts, ) -> "Self": """ Set the RETURNING expression. Not supported by all dialects. Example: >>> Delete().delete("tbl").returning("*", dialect="postgres").sql() 'DELETE FROM tbl RETURNING *' Args: expression: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Delete: the modified expression. """ return _apply_builder( expression=expression, instance=self, arg="returning", prefix="RETURNING", dialect=dialect, copy=copy, into=Returning, **opts, ) class Delete(Expression, DML): arg_types = { "with_": False, "this": False, "using": False, "where": False, "returning": False, "order": False, "limit": False, "tables": False, # Multiple-Table Syntax (MySQL) "cluster": False, # Clickhouse } def delete( self, table: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts, ) -> Delete: """ Create a DELETE expression or replace the table on an existing DELETE expression. Example: >>> Delete().delete("tbl").sql() 'DELETE FROM tbl' Args: table: the table from which to delete. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Delete: the modified expression. """ return _apply_builder( expression=table, instance=self, arg="this", dialect=dialect, into=Table, copy=copy, **opts, ) def where( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Delete: """ Append to or set the WHERE expressions. Example: >>> Delete().delete("tbl").where("x = 'a' OR x < 'b'").sql() "DELETE FROM tbl WHERE x = 'a' OR x < 'b'" Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Delete: the modified expression. """ return _apply_conjunction_builder( *expressions, instance=self, arg="where", append=append, into=Where, dialect=dialect, copy=copy, **opts, ) class Export(Expression): arg_types = {"this": True, "connection": False, "options": True} class CopyParameter(Expression): arg_types = {"this": True, "expression": False, "expressions": False} class Copy(Expression, DML): arg_types = { "this": True, "kind": True, "files": False, "credentials": False, "format": False, "params": False, } class Credentials(Expression): arg_types = { "credentials": False, "encryption": False, "storage": False, "iam_role": False, "region": False, } class Directory(Expression): arg_types = {"this": True, "local": False, "row_format": False} class DirectoryStage(Expression): pass class Insert(Expression, DDL, DML): arg_types = { "hint": False, "with_": False, "is_function": False, "this": False, "expression": False, "conflict": False, "returning": False, "overwrite": False, "exists": False, "alternative": False, "where": False, "ignore": False, "by_name": False, "stored": False, "partition": False, "settings": False, "source": False, "default": False, } def with_( self, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Insert: """ Append to or set the common table expressions. Example: >>> import sqlglot >>> sqlglot.parse_one("INSERT INTO t SELECT x FROM cte").with_("cte", as_="SELECT * FROM tbl").sql() 'WITH cte AS (SELECT * FROM tbl) INSERT INTO t SELECT x FROM cte' Args: alias: the SQL code string to parse as the table name. If an `Expr` instance is passed, this is used as-is. as_: the SQL code string to parse as the table expression. If an `Expr` instance is passed, it will be used as-is. recursive: set the RECURSIVE part of the expression. Defaults to `False`. materialized: set the MATERIALIZED part of the expression. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified expression. """ return _apply_cte_builder( self, alias, as_, recursive=recursive, materialized=materialized, append=append, dialect=dialect, copy=copy, **opts, ) class OnConflict(Expression): arg_types = { "duplicate": False, "expressions": False, "action": False, "conflict_keys": False, "index_predicate": False, "constraint": False, "where": False, } class Returning(Expression): arg_types = {"expressions": True, "into": False} class LoadData(Expression): arg_types = { "this": True, "local": False, "overwrite": False, "inpath": True, "partition": False, "input_format": False, "serde": False, } class Update(Expression, DML): arg_types = { "with_": False, "this": False, "expressions": False, "from_": False, "where": False, "returning": False, "order": False, "limit": False, "options": False, } def table( self, expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts ) -> Update: """ Set the table to update. Example: >>> Update().table("my_table").set_("x = 1").sql() 'UPDATE my_table SET x = 1' Args: expression : the SQL code strings to parse. If a `Table` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `Table`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Update expression. """ return _apply_builder( expression=expression, instance=self, arg="this", into=Table, prefix=None, dialect=dialect, copy=copy, **opts, ) def set_( self, *expressions: ExpOrStr, append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Update: """ Append to or set the SET expressions. Example: >>> Update().table("my_table").set_("x = 1").sql() 'UPDATE my_table SET x = 1' Args: *expressions: the SQL code strings to parse. If `Expr` instance(s) are passed, they will be used as-is. Multiple expressions are combined with a comma. append: if `True`, add the new expressions to any existing SET expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. """ return _apply_list_builder( *expressions, instance=self, arg="expressions", append=append, into=Expr, prefix=None, dialect=dialect, copy=copy, **opts, ) def where( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Update: """ Append to or set the WHERE expressions. Example: >>> Update().table("tbl").set_("x = 1").where("x = 'a' OR x < 'b'").sql() "UPDATE tbl SET x = 1 WHERE x = 'a' OR x < 'b'" Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Update: the modified expression. """ return _apply_conjunction_builder( *expressions, instance=self, arg="where", append=append, into=Where, dialect=dialect, copy=copy, **opts, ) def from_( self, expression: t.Optional[ExpOrStr] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Update: """ Set the FROM expression. Example: >>> Update().table("my_table").set_("x = 1").from_("baz").sql() 'UPDATE my_table SET x = 1 FROM baz' Args: expression : the SQL code strings to parse. If a `From` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `From`. If nothing is passed in then a from is not applied to the expression dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Update expression. """ if not expression: return maybe_copy(self, copy) return _apply_builder( expression=expression, instance=self, arg="from_", into=From, prefix="FROM", dialect=dialect, copy=copy, **opts, ) def with_( self, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Update: """ Append to or set the common table expressions. Example: >>> Update().table("my_table").set_("x = 1").from_("baz").with_("baz", "SELECT id FROM foo").sql() 'WITH baz AS (SELECT id FROM foo) UPDATE my_table SET x = 1 FROM baz' Args: alias: the SQL code string to parse as the table name. If an `Expr` instance is passed, this is used as-is. as_: the SQL code string to parse as the table expression. If an `Expr` instance is passed, it will be used as-is. recursive: set the RECURSIVE part of the expression. Defaults to `False`. materialized: set the MATERIALIZED part of the expression. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified expression. """ return _apply_cte_builder( self, alias, as_, recursive=recursive, materialized=materialized, append=append, dialect=dialect, copy=copy, **opts, ) class Merge(Expression, DML): arg_types = { "this": True, "using": True, "on": False, "using_cond": False, "whens": True, "with_": False, "returning": False, } class When(Expression): arg_types = {"matched": True, "source": False, "condition": False, "then": True} class Whens(Expression): """Wraps around one or more WHEN [NOT] MATCHED [...] clauses.""" arg_types = {"expressions": True} ================================================ FILE: sqlglot/expressions/functions.py ================================================ """sqlglot expressions functions.""" from __future__ import annotations import typing as t from sqlglot.expressions.core import ( Expression, Func, Binary, SubqueryPredicate, ExpOrStr, maybe_parse, maybe_copy, ) # Re-export from focused submodules (backward compatibility) from sqlglot.expressions.math import * # noqa: F401,F403 from sqlglot.expressions.string import * # noqa: F401,F403 from sqlglot.expressions.temporal import * # noqa: F401,F403 from sqlglot.expressions.aggregate import * # noqa: F401,F403 from sqlglot.expressions.array import * # noqa: F401,F403 from sqlglot.expressions.json import * # noqa: F401,F403 if t.TYPE_CHECKING: from sqlglot.expressions.datatypes import DataType, DATA_TYPE # Cast / type conversion class Cast(Expression, Func): is_cast: t.ClassVar[bool] = True arg_types = { "this": True, "to": True, "format": False, "safe": False, "action": False, "default": False, } @property def name(self) -> str: return self.this.name @property def to(self) -> DataType: return self.args["to"] @property def output_name(self) -> str: return self.name def is_type(self, *dtypes: DATA_TYPE) -> bool: """ Checks whether this Cast's DataType matches one of the provided data types. Nested types like arrays or structs will be compared using "structural equivalence" semantics, so e.g. array != array. Args: dtypes: the data types to compare this Cast's DataType to. Returns: True, if and only if there is a type in `dtypes` which is equal to this Cast's DataType. """ return self.to.is_type(*dtypes) class TryCast(Cast): arg_types = {**Cast.arg_types, "requires_string": False} class JSONCast(Cast): pass class CastToStrType(Expression, Func): arg_types = {"this": True, "to": True} class Convert(Expression, Func): arg_types = {"this": True, "expression": True, "style": False, "safe": False} # Conditional class If(Expression, Func): arg_types = {"this": True, "true": True, "false": False} _sql_names = ["IF", "IIF"] class Case(Expression, Func): arg_types = {"this": False, "ifs": True, "default": False} def when(self, condition: ExpOrStr, then: ExpOrStr, copy: bool = True, **opts) -> Case: instance = maybe_copy(self, copy) instance.append( "ifs", If( this=maybe_parse(condition, copy=copy, **opts), true=maybe_parse(then, copy=copy, **opts), ), ) return instance def else_(self, condition: ExpOrStr, copy: bool = True, **opts) -> Case: instance = maybe_copy(self, copy) instance.set("default", maybe_parse(condition, copy=copy, **opts)) return instance class Coalesce(Expression, Func): arg_types = {"this": True, "expressions": False, "is_nvl": False, "is_null": False} is_var_len_args = True _sql_names = ["COALESCE", "IFNULL", "NVL"] class DecodeCase(Expression, Func): arg_types = {"expressions": True} is_var_len_args = True class EqualNull(Expression, Func): arg_types = {"this": True, "expression": True} class Greatest(Expression, Func): arg_types = {"this": True, "expressions": False, "ignore_nulls": True} is_var_len_args = True class Least(Expression, Func): arg_types = {"this": True, "expressions": False, "ignore_nulls": True} is_var_len_args = True class Nullif(Expression, Func): arg_types = {"this": True, "expression": True} class Nvl2(Expression, Func): arg_types = {"this": True, "true": True, "false": False} class Try(Expression, Func): pass # Predicates / misc functions class Collate(Expression, Binary, Func): pass class Collation(Expression, Func): pass class ConnectByRoot(Expression, Func): pass class CheckXml(Expression, Func): arg_types = {"this": True, "disable_auto_convert": False} class Exists(Expression, Func, SubqueryPredicate): arg_types = {"this": True, "expression": False} # Type coercions / lax types class Float64(Expression, Func): arg_types = {"this": True, "expression": False} class Int64(Expression, Func): pass class IsArray(Expression, Func): pass class IsNullValue(Expression, Func): pass class LaxBool(Expression, Func): pass class LaxFloat64(Expression, Func): pass class LaxInt64(Expression, Func): pass class LaxString(Expression, Func): pass class ToBoolean(Expression, Func): arg_types = {"this": True, "safe": False} class ToVariant(Expression, Func): pass # Session / context functions class CurrentAccount(Expression, Func): arg_types = {} class CurrentAccountName(Expression, Func): arg_types = {} class CurrentAvailableRoles(Expression, Func): arg_types = {} class CurrentCatalog(Expression, Func): arg_types = {} class CurrentClient(Expression, Func): arg_types = {} class CurrentDatabase(Expression, Func): arg_types = {} class CurrentIpAddress(Expression, Func): arg_types = {} class CurrentOrganizationName(Expression, Func): arg_types = {} class CurrentOrganizationUser(Expression, Func): arg_types = {} class CurrentRegion(Expression, Func): arg_types = {} class CurrentRole(Expression, Func): arg_types = {} class CurrentRoleType(Expression, Func): arg_types = {} class CurrentSchema(Expression, Func): arg_types = {"this": False} class CurrentSchemas(Expression, Func): arg_types = {"this": False} class CurrentSecondaryRoles(Expression, Func): arg_types = {} class CurrentSession(Expression, Func): arg_types = {} class CurrentStatement(Expression, Func): arg_types = {} class CurrentTransaction(Expression, Func): arg_types = {} class CurrentUser(Expression, Func): arg_types = {"this": False} class CurrentVersion(Expression, Func): arg_types = {} class CurrentWarehouse(Expression, Func): arg_types = {} class SessionUser(Expression, Func): arg_types = {} # ML / AI class AIClassify(Expression, Func): arg_types = {"this": True, "categories": True, "config": False} _sql_names = ["AI_CLASSIFY"] class FeaturesAtTime(Expression, Func): arg_types = {"this": True, "time": False, "num_rows": False, "ignore_feature_nulls": False} class GenerateEmbedding(Expression, Func): arg_types = {"this": True, "expression": True, "params_struct": False, "is_text": False} class MLForecast(Expression, Func): arg_types = {"this": True, "expression": False, "params_struct": False} class MLTranslate(Expression, Func): arg_types = {"this": True, "expression": True, "params_struct": True} class Predict(Expression, Func): arg_types = {"this": True, "expression": True, "params_struct": False} class VectorSearch(Expression, Func): arg_types = { "this": True, "column_to_search": True, "query_table": True, "query_column_to_search": False, "top_k": False, "distance_type": False, "options": False, } # Data reading class ReadCSV(Expression, Func): _sql_names = ["READ_CSV"] is_var_len_args = True arg_types = {"this": True, "expressions": False} class ReadParquet(Expression, Func): is_var_len_args = True arg_types = {"expressions": True} # XML class XMLElement(Expression, Func): _sql_names = ["XMLELEMENT"] arg_types = {"this": True, "expressions": False, "evalname": False} class XMLGet(Expression, Func): _sql_names = ["XMLGET"] arg_types = {"this": True, "expression": True, "instance": False} class XMLTable(Expression, Func): arg_types = { "this": True, "namespaces": False, "passing": False, "columns": False, "by_ref": False, } # Network / domain class Host(Expression, Func): pass class NetFunc(Expression, Func): pass class ParseIp(Expression, Func): arg_types = {"this": True, "type": True, "permissive": False} class RegDomain(Expression, Func): pass # Misc utility class Columns(Expression, Func): arg_types = {"this": True, "unpack": False} class Normal(Expression, Func): arg_types = {"this": True, "stddev": True, "gen": True} class Rand(Expression, Func): _sql_names = ["RAND", "RANDOM"] arg_types = {"this": False, "lower": False, "upper": False} class Randn(Expression, Func): arg_types = {"this": False} class Randstr(Expression, Func): arg_types = {"this": True, "generator": False} class RangeBucket(Expression, Func): arg_types = {"this": True, "expression": True} class RangeN(Expression, Func): arg_types = {"this": True, "expressions": True, "each": False} class Seq1(Expression, Func): arg_types = {"this": False} class Seq2(Expression, Func): arg_types = {"this": False} class Seq4(Expression, Func): arg_types = {"this": False} class Seq8(Expression, Func): arg_types = {"this": False} class Uniform(Expression, Func): arg_types = {"this": True, "expression": True, "gen": False, "seed": False} class Uuid(Expression, Func): _sql_names = ["UUID", "GEN_RANDOM_UUID", "GENERATE_UUID", "UUID_STRING"] arg_types = {"this": False, "name": False, "is_string": False} class WeekStart(Expression, Func): pass class WidthBucket(Expression, Func): arg_types = { "this": True, "min_value": False, "max_value": False, "num_buckets": False, "threshold": False, } class Zipf(Expression, Func): arg_types = {"this": True, "elementcount": True, "gen": True} ================================================ FILE: sqlglot/expressions/json.py ================================================ """sqlglot expressions - JSON functions.""" from __future__ import annotations from sqlglot.expressions.core import Expression, Func, AggFunc, Binary, Predicate class CheckJson(Expression, Func): arg_types = {"this": True} class JSONArray(Expression, Func): arg_types = { "expressions": False, "null_handling": False, "return_type": False, "strict": False, } class JSONArrayAgg(Expression, AggFunc): arg_types = { "this": True, "order": False, "null_handling": False, "return_type": False, "strict": False, } class JSONArrayAppend(Expression, Func): arg_types = {"this": True, "expressions": True} is_var_len_args = True _sql_names = ["JSON_ARRAY_APPEND"] class JSONArrayContains(Expression, Binary, Predicate, Func): arg_types = {"this": True, "expression": True, "json_type": False} _sql_names = ["JSON_ARRAY_CONTAINS"] class JSONArrayInsert(Expression, Func): arg_types = {"this": True, "expressions": True} is_var_len_args = True _sql_names = ["JSON_ARRAY_INSERT"] class JSONBContains(Expression, Binary, Func): _sql_names = ["JSONB_CONTAINS"] class JSONBContainsAllTopKeys(Expression, Binary, Func): pass class JSONBContainsAnyTopKeys(Expression, Binary, Func): pass class JSONBDeleteAtPath(Expression, Binary, Func): pass class JSONBExists(Expression, Func): arg_types = {"this": True, "path": True} _sql_names = ["JSONB_EXISTS"] class JSONBExtract(Expression, Binary, Func): _sql_names = ["JSONB_EXTRACT"] class JSONBExtractScalar(Expression, Binary, Func): arg_types = {"this": True, "expression": True, "json_type": False} _sql_names = ["JSONB_EXTRACT_SCALAR"] class JSONBObjectAgg(Expression, AggFunc): arg_types = {"this": True, "expression": True} class JSONBool(Expression, Func): pass class JSONExists(Expression, Func): arg_types = { "this": True, "path": True, "passing": False, "on_condition": False, "from_dcolonqmark": False, } class JSONExtract(Expression, Binary, Func): arg_types = { "this": True, "expression": True, "only_json_types": False, "expressions": False, "variant_extract": False, "json_query": False, "option": False, "quote": False, "on_condition": False, "requires_json": False, "emits": False, } _sql_names = ["JSON_EXTRACT"] is_var_len_args = True @property def output_name(self) -> str: return self.expression.output_name if not self.expressions else "" class JSONExtractArray(Expression, Func): arg_types = {"this": True, "expression": False} _sql_names = ["JSON_EXTRACT_ARRAY"] class JSONExtractScalar(Expression, Binary, Func): arg_types = { "this": True, "expression": True, "only_json_types": False, "expressions": False, "json_type": False, "scalar_only": False, } _sql_names = ["JSON_EXTRACT_SCALAR"] is_var_len_args = True @property def output_name(self) -> str: return self.expression.output_name class JSONFormat(Expression, Func): arg_types = {"this": False, "options": False, "is_json": False, "to_json": False} _sql_names = ["JSON_FORMAT"] class JSONKeys(Expression, Func): arg_types = {"this": True, "expression": False, "expressions": False} is_var_len_args = True _sql_names = ["JSON_KEYS"] class JSONKeysAtDepth(Expression, Func): arg_types = {"this": True, "expression": False, "mode": False} class JSONObject(Expression, Func): arg_types = { "expressions": False, "null_handling": False, "unique_keys": False, "return_type": False, "encoding": False, } class JSONObjectAgg(Expression, AggFunc): arg_types = { "expressions": False, "null_handling": False, "unique_keys": False, "return_type": False, "encoding": False, } class JSONRemove(Expression, Func): arg_types = {"this": True, "expressions": True} is_var_len_args = True _sql_names = ["JSON_REMOVE"] class JSONSet(Expression, Func): arg_types = {"this": True, "expressions": True} is_var_len_args = True _sql_names = ["JSON_SET"] class JSONStripNulls(Expression, Func): arg_types = { "this": True, "expression": False, "include_arrays": False, "remove_empty": False, } _sql_names = ["JSON_STRIP_NULLS"] class JSONTable(Expression, Func): arg_types = { "this": True, "schema": True, "path": False, "error_handling": False, "empty_handling": False, } class JSONType(Expression, Func): arg_types = {"this": True, "expression": False} _sql_names = ["JSON_TYPE"] class ObjectId(Expression, Func): arg_types = {"this": True, "expression": False} class ObjectInsert(Expression, Func): arg_types = { "this": True, "key": True, "value": True, "update_flag": False, } class OpenJSON(Expression, Func): arg_types = {"this": True, "path": False, "expressions": False} class ParseJSON(Expression, Func): # BigQuery, Snowflake have PARSE_JSON, Presto has JSON_PARSE # Snowflake also has TRY_PARSE_JSON, which is represented using `safe` _sql_names = ["PARSE_JSON", "JSON_PARSE"] arg_types = {"this": True, "expression": False, "safe": False} ================================================ FILE: sqlglot/expressions/math.py ================================================ """sqlglot expressions - math, trigonometry, and bitwise functions.""" from __future__ import annotations from sqlglot.expressions.core import Expression, Func, AggFunc # Trigonometric class Acos(Expression, Func): pass class Acosh(Expression, Func): pass class Asin(Expression, Func): pass class Asinh(Expression, Func): pass class Atan(Expression, Func): arg_types = {"this": True, "expression": False} class Atanh(Expression, Func): pass class Atan2(Expression, Func): arg_types = {"this": True, "expression": True} class Cos(Expression, Func): pass class Cosh(Expression, Func): pass class Cot(Expression, Func): pass class Coth(Expression, Func): pass class Csc(Expression, Func): pass class Csch(Expression, Func): pass class Degrees(Expression, Func): pass class Radians(Expression, Func): pass class Sec(Expression, Func): pass class Sech(Expression, Func): pass class Sin(Expression, Func): pass class Sinh(Expression, Func): pass class Tan(Expression, Func): pass class Tanh(Expression, Func): pass # Geometric distance / similarity class CosineDistance(Expression, Func): arg_types = {"this": True, "expression": True} class DotProduct(Expression, Func): arg_types = {"this": True, "expression": True} class EuclideanDistance(Expression, Func): arg_types = {"this": True, "expression": True} class JarowinklerSimilarity(Expression, Func): arg_types = {"this": True, "expression": True, "case_insensitive": False} class ManhattanDistance(Expression, Func): arg_types = {"this": True, "expression": True} # Basic arithmetic / math class Abs(Expression, Func): pass class Cbrt(Expression, Func): pass class Ceil(Expression, Func): arg_types = {"this": True, "decimals": False, "to": False} _sql_names = ["CEIL", "CEILING"] class Exp(Expression, Func): pass class Factorial(Expression, Func): pass class Floor(Expression, Func): arg_types = {"this": True, "decimals": False, "to": False} class IsInf(Expression, Func): _sql_names = ["IS_INF", "ISINF"] class IsNan(Expression, Func): _sql_names = ["IS_NAN", "ISNAN"] class Ln(Expression, Func): pass class Log(Expression, Func): arg_types = {"this": True, "expression": False} class Pi(Expression, Func): arg_types = {} class Round(Expression, Func): arg_types = { "this": True, "decimals": False, "truncate": False, "casts_non_integer_decimals": False, } class Sign(Expression, Func): _sql_names = ["SIGN", "SIGNUM"] class Sqrt(Expression, Func): pass class Trunc(Expression, Func): arg_types = {"this": True, "decimals": False} _sql_names = ["TRUNC", "TRUNCATE"] # Safe arithmetic class SafeAdd(Expression, Func): arg_types = {"this": True, "expression": True} class SafeDivide(Expression, Func): arg_types = {"this": True, "expression": True} class SafeMultiply(Expression, Func): arg_types = {"this": True, "expression": True} class SafeNegate(Expression, Func): pass class SafeSubtract(Expression, Func): arg_types = {"this": True, "expression": True} # Bitwise class BitwiseAndAgg(Expression, AggFunc): pass class BitwiseCount(Expression, Func): pass class BitwiseOrAgg(Expression, AggFunc): pass class BitwiseXorAgg(Expression, AggFunc): pass class BitmapBitPosition(Expression, Func): pass class BitmapBucketNumber(Expression, Func): pass class BitmapConstructAgg(Expression, AggFunc): pass class BitmapCount(Expression, Func): pass class BitmapOrAgg(Expression, AggFunc): pass class Booland(Expression, Func): arg_types = {"this": True, "expression": True, "round_input": False} class Boolnot(Expression, Func): arg_types = {"this": True, "round_input": False} class Boolor(Expression, Func): arg_types = {"this": True, "expression": True, "round_input": False} class BoolxorAgg(Expression, AggFunc): pass class Getbit(Expression, Func): _sql_names = ["GETBIT", "GET_BIT"] # zero_is_msb means the most significant bit is indexed 0 arg_types = {"this": True, "expression": True, "zero_is_msb": False} ================================================ FILE: sqlglot/expressions/properties.py ================================================ """sqlglot expressions properties.""" from __future__ import annotations import typing as t from enum import auto from sqlglot.helper import AutoName from sqlglot.expressions.core import Expression, ColumnConstraintKind, Literal, convert class Property(Expression): arg_types = {"this": True, "value": True} class GrantPrivilege(Expression): arg_types = {"this": True, "expressions": False} class GrantPrincipal(Expression): arg_types = {"this": True, "kind": False} class AllowedValuesProperty(Expression): arg_types = {"expressions": True} class AlgorithmProperty(Property): arg_types = {"this": True} class ApiProperty(Property): arg_types = {} class ApplicationProperty(Property): arg_types = {} class AutoIncrementProperty(Property): arg_types = {"this": True} class AutoRefreshProperty(Property): arg_types = {"this": True} class BackupProperty(Property): arg_types = {"this": True} class BuildProperty(Property): arg_types = {"this": True} class BlockCompressionProperty(Property): arg_types = { "autotemp": False, "always": False, "default": False, "manual": False, "never": False, } class CatalogProperty(Property): arg_types = {} class CharacterSetProperty(Property): arg_types = {"this": True, "default": True} class ChecksumProperty(Property): arg_types = {"on": False, "default": False} class CollateProperty(Property): arg_types = {"this": True, "default": False} class ComputeProperty(Property): arg_types = {} class CopyGrantsProperty(Property): arg_types = {} class DataBlocksizeProperty(Property): arg_types = { "size": False, "units": False, "minimum": False, "maximum": False, "default": False, } class DataDeletionProperty(Property): arg_types = {"on": True, "filter_column": False, "retention_period": False} class DatabaseProperty(Property): arg_types = {} class DefinerProperty(Property): arg_types = {"this": True} class DistKeyProperty(Property): arg_types = {"this": True} class DistributedByProperty(Property): arg_types = {"expressions": False, "kind": True, "buckets": False, "order": False} class DistStyleProperty(Property): arg_types = {"this": True} class DuplicateKeyProperty(Property): arg_types = {"expressions": True} class EngineProperty(Property): arg_types = {"this": True} class HeapProperty(Property): arg_types = {} class HybridProperty(Property): arg_types = {} class HandlerProperty(Property): arg_types = {"this": True} class ParameterStyleProperty(Property): arg_types = {"this": True} class ToTableProperty(Property): arg_types = {"this": True} class ExecuteAsProperty(Property): arg_types = {"this": True} class ExternalProperty(Property): arg_types = {"this": False} class FallbackProperty(Property): arg_types = {"no": True, "protection": False} class FileFormatProperty(Property): arg_types = {"this": False, "expressions": False, "hive_format": False} class CredentialsProperty(Property): arg_types = {"expressions": True} class FreespaceProperty(Property): arg_types = {"this": True, "percent": False} class GlobalProperty(Property): arg_types = {} class IcebergProperty(Property): arg_types = {} class InheritsProperty(Property): arg_types = {"expressions": True} class InputModelProperty(Property): arg_types = {"this": True} class OutputModelProperty(Property): arg_types = {"this": True} class IsolatedLoadingProperty(Property): arg_types = {"no": False, "concurrent": False, "target": False} class JournalProperty(Property): arg_types = { "no": False, "dual": False, "before": False, "local": False, "after": False, } class LanguageProperty(Property): arg_types = {"this": True} class EnviromentProperty(Property): arg_types = {"expressions": True} class ClusteredByProperty(Property): arg_types = {"expressions": True, "sorted_by": False, "buckets": True} class DictProperty(Property): arg_types = {"this": True, "kind": True, "settings": False} class DictSubProperty(Property): pass class DictRange(Property): arg_types = {"this": True, "min": True, "max": True} class DynamicProperty(Property): arg_types = {} class OnCluster(Property): arg_types = {"this": True} class EmptyProperty(Property): arg_types = {} class LikeProperty(Property): arg_types = {"this": True, "expressions": False} class LocationProperty(Property): arg_types = {"this": True} class LockProperty(Property): arg_types = {"this": True} class LockingProperty(Property): arg_types = { "this": False, "kind": True, "for_or_in": False, "lock_type": True, "override": False, } class LogProperty(Property): arg_types = {"no": True} class MaskingProperty(Property): arg_types = {} class MaterializedProperty(Property): arg_types = {"this": False} class MergeBlockRatioProperty(Property): arg_types = {"this": False, "no": False, "default": False, "percent": False} class NetworkProperty(Property): arg_types = {} class NoPrimaryIndexProperty(Property): arg_types = {} class OnProperty(Property): arg_types = {"this": True} class OnCommitProperty(Property): arg_types = {"delete": False} class PartitionedByProperty(Property): arg_types = {"this": True} class PartitionedByBucket(Property): arg_types = {"this": True, "expression": True} class PartitionByTruncate(Property): arg_types = {"this": True, "expression": True} class PartitionByRangeProperty(Property): arg_types = {"partition_expressions": True, "create_expressions": True} class PartitionByRangePropertyDynamic(Expression): arg_types = {"this": False, "start": True, "end": True, "every": True} class RollupProperty(Property): arg_types = {"expressions": True} class RollupIndex(Expression): arg_types = {"this": True, "expressions": True, "from_index": False, "properties": False} class RowAccessProperty(Property): arg_types = {} class PartitionByListProperty(Property): arg_types = {"partition_expressions": True, "create_expressions": True} class PartitionList(Expression): arg_types = {"this": True, "expressions": True} class RefreshTriggerProperty(Property): arg_types = { "method": False, "kind": False, "every": False, "unit": False, "starts": False, } class UniqueKeyProperty(Property): arg_types = {"expressions": True} class PartitionBoundSpec(Expression): # this -> IN / MODULUS, expression -> REMAINDER, from_expressions -> FROM (...), to_expressions -> TO (...) arg_types = { "this": False, "expression": False, "from_expressions": False, "to_expressions": False, } class PartitionedOfProperty(Property): # this -> parent_table (schema), expression -> FOR VALUES ... / DEFAULT arg_types = {"this": True, "expression": True} class StreamingTableProperty(Property): arg_types = {} class RemoteWithConnectionModelProperty(Property): arg_types = {"this": True} class ReturnsProperty(Property): arg_types = {"this": False, "is_table": False, "table": False, "null": False} class StrictProperty(Property): arg_types = {} class RowFormatProperty(Property): arg_types = {"this": True} class RowFormatDelimitedProperty(Property): # https://cwiki.apache.org/confluence/display/hive/languagemanual+dml arg_types = { "fields": False, "escaped": False, "collection_items": False, "map_keys": False, "lines": False, "null": False, "serde": False, } class RowFormatSerdeProperty(Property): arg_types = {"this": True, "serde_properties": False} class QueryTransform(Expression): arg_types = { "expressions": True, "command_script": True, "schema": False, "row_format_before": False, "record_writer": False, "row_format_after": False, "record_reader": False, } class SampleProperty(Property): arg_types = {"this": True} class SchemaCommentProperty(Property): arg_types = {"this": True} class SemanticView(Expression): arg_types = { "this": True, "metrics": False, "dimensions": False, "facts": False, "where": False, } class SerdeProperties(Property): arg_types = {"expressions": True, "with_": False} class SetProperty(Property): arg_types = {"multi": True} class SharingProperty(Property): arg_types = {"this": False} class SetConfigProperty(Property): arg_types = {"this": True} class SettingsProperty(Property): arg_types = {"expressions": True} class SortKeyProperty(Property): arg_types = {"this": True, "compound": False} class SqlReadWriteProperty(Property): arg_types = {"this": True} class SqlSecurityProperty(Property): arg_types = {"this": True} class StabilityProperty(Property): arg_types = {"this": True} class StorageHandlerProperty(Property): arg_types = {"this": True} class TemporaryProperty(Property): arg_types = {"this": False} class SecureProperty(Property): arg_types = {} class SecurityIntegrationProperty(Property): arg_types = {} class Tags(Property, ColumnConstraintKind): arg_types = {"expressions": True} class PropertiesLocation(AutoName): POST_CREATE = auto() POST_NAME = auto() POST_SCHEMA = auto() POST_WITH = auto() POST_ALIAS = auto() POST_EXPRESSION = auto() POST_INDEX = auto() UNSUPPORTED = auto() class TransformModelProperty(Property): arg_types = {"expressions": True} class TransientProperty(Property): arg_types = {"this": False} class UnloggedProperty(Property): arg_types = {} class UsingTemplateProperty(Property): arg_types = {"this": True} class ViewAttributeProperty(Property): arg_types = {"this": True} class VolatileProperty(Property): arg_types = {"this": False} class WithDataProperty(Property): arg_types = {"no": True, "statistics": False} class WithJournalTableProperty(Property): arg_types = {"this": True} class WithSchemaBindingProperty(Property): arg_types = {"this": True} class WithSystemVersioningProperty(Property): arg_types = { "on": False, "this": False, "data_consistency": False, "retention_period": False, "with_": True, } class WithProcedureOptions(Property): arg_types = {"expressions": True} class EncodeProperty(Property): arg_types = {"this": True, "properties": False, "key": False} class IncludeProperty(Property): arg_types = {"this": True, "alias": False, "column_def": False} class ForceProperty(Property): arg_types = {} class Properties(Expression): arg_types = {"expressions": True} NAME_TO_PROPERTY: t.ClassVar[t.Dict[str, t.Type[Property]]] = { "ALGORITHM": AlgorithmProperty, "AUTO_INCREMENT": AutoIncrementProperty, "CHARACTER SET": CharacterSetProperty, "CLUSTERED_BY": ClusteredByProperty, "COLLATE": CollateProperty, "COMMENT": SchemaCommentProperty, "CREDENTIALS": CredentialsProperty, "DEFINER": DefinerProperty, "DISTKEY": DistKeyProperty, "DISTRIBUTED_BY": DistributedByProperty, "DISTSTYLE": DistStyleProperty, "ENGINE": EngineProperty, "EXECUTE AS": ExecuteAsProperty, "FORMAT": FileFormatProperty, "LANGUAGE": LanguageProperty, "LOCATION": LocationProperty, "LOCK": LockProperty, "PARTITIONED_BY": PartitionedByProperty, "RETURNS": ReturnsProperty, "ROW_FORMAT": RowFormatProperty, "SORTKEY": SortKeyProperty, "ENCODE": EncodeProperty, "INCLUDE": IncludeProperty, } PROPERTY_TO_NAME: t.ClassVar[t.Dict[t.Type[Property], str]] = {} # CREATE property locations # Form: schema specified # create [POST_CREATE] # table a [POST_NAME] # (b int) [POST_SCHEMA] # with ([POST_WITH]) # index (b) [POST_INDEX] # # Form: alias selection # create [POST_CREATE] # table a [POST_NAME] # as [POST_ALIAS] (select * from b) [POST_EXPRESSION] # index (c) [POST_INDEX] Location: t.ClassVar[t.Type[PropertiesLocation]] = PropertiesLocation @classmethod def from_dict(cls, properties_dict: t.Dict) -> Properties: expressions = [] for key, value in properties_dict.items(): property_cls = cls.NAME_TO_PROPERTY.get(key.upper()) if property_cls: expressions.append(property_cls(this=convert(value))) else: expressions.append(Property(this=Literal.string(key), value=convert(value))) return cls(expressions=expressions) # TODO (mypyc) Properties.PROPERTY_TO_NAME = {v: k for k, v in Properties.NAME_TO_PROPERTY.items()} ================================================ FILE: sqlglot/expressions/query.py ================================================ """sqlglot expressions query.""" from __future__ import annotations import typing as t from sqlglot._typing import E from sqlglot.errors import ParseError from sqlglot.helper import trait, ensure_list from sqlglot.expressions.core import ( Aliases, Condition, Distinct, Dot, Expr, Expression, Func, Hint, Identifier, In, _apply_builder, _apply_child_list_builder, _apply_list_builder, _apply_conjunction_builder, _apply_set_operation, ExpOrStr, QUERY_MODIFIERS, maybe_parse, maybe_copy, to_identifier, convert, and_, alias_, column, ) if t.TYPE_CHECKING: from collections.abc import Collection from sqlglot.dialects.dialect import DialectType from sqlglot.expressions.datatypes import DataType from sqlglot.expressions.constraints import ColumnConstraint from sqlglot.expressions.ddl import Create from sqlglot.expressions.array import Unnest S = t.TypeVar("S", bound="SetOperation") Q = t.TypeVar("Q", bound="Query") def _apply_cte_builder( instance: E, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, scalar: t.Optional[bool] = None, **opts, ) -> E: alias_expression = maybe_parse(alias, dialect=dialect, into=TableAlias, **opts) as_expression = maybe_parse(as_, dialect=dialect, copy=copy, **opts) if scalar and not isinstance(as_expression, Subquery): # scalar CTE must be wrapped in a subquery as_expression = Subquery(this=as_expression) cte = CTE(this=as_expression, alias=alias_expression, materialized=materialized, scalar=scalar) return _apply_child_list_builder( cte, instance=instance, arg="with_", append=append, copy=copy, into=With, properties={"recursive": recursive} if recursive else {}, ) @trait class Selectable(Expr): @property def selects(self) -> t.List[Expr]: raise NotImplementedError("Subclasses must implement selects") @property def named_selects(self) -> t.List[str]: return _named_selects(self) def _named_selects(self: Expr) -> t.List[str]: selectable = t.cast(Selectable, self) return [select.output_name for select in selectable.selects] @trait class DerivedTable(Selectable): @property def selects(self) -> t.List[Expr]: this = self.this return this.selects if isinstance(this, Query) else [] @trait class UDTF(DerivedTable): @property def selects(self) -> t.List[Expr]: alias = self.args.get("alias") return alias.columns if alias else [] @trait class Query(Selectable): """Trait for any SELECT/UNION/etc. query expression.""" @property def ctes(self) -> t.List[CTE]: with_ = self.args.get("with_") return with_.expressions if with_ else [] def select( self: Q, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Q: raise NotImplementedError("Query objects must implement `select`") def subquery(self, alias: t.Optional[ExpOrStr] = None, copy: bool = True) -> Subquery: """ Returns a `Subquery` that wraps around this query. Example: >>> subquery = Select().select("x").from_("tbl").subquery() >>> Select().select("x").from_(subquery).sql() 'SELECT x FROM (SELECT x FROM tbl)' Args: alias: an optional alias for the subquery. copy: if `False`, modify this expression instance in-place. """ instance = maybe_copy(self, copy) if not isinstance(alias, Expr): alias = TableAlias(this=to_identifier(alias)) if alias else None return Subquery(this=instance, alias=alias) def limit( self: Q, expression: ExpOrStr | int, dialect: DialectType = None, copy: bool = True, **opts ) -> Q: """ Adds a LIMIT clause to this query. Example: >>> Select().select("1").union(Select().select("1")).limit(1).sql() 'SELECT 1 UNION SELECT 1 LIMIT 1' Args: expression: the SQL code string to parse. This can also be an integer. If a `Limit` instance is passed, it will be used as-is. If another `Expr` instance is passed, it will be wrapped in a `Limit`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: A limited Select expression. """ return _apply_builder( expression=expression, instance=self, arg="limit", into=Limit, prefix="LIMIT", dialect=dialect, copy=copy, into_arg="expression", **opts, ) def offset( self: Q, expression: ExpOrStr | int, dialect: DialectType = None, copy: bool = True, **opts ) -> Q: """ Set the OFFSET expression. Example: >>> Select().from_("tbl").select("x").offset(10).sql() 'SELECT x FROM tbl OFFSET 10' Args: expression: the SQL code string to parse. This can also be an integer. If a `Offset` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `Offset`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_builder( expression=expression, instance=self, arg="offset", into=Offset, prefix="OFFSET", dialect=dialect, copy=copy, into_arg="expression", **opts, ) def order_by( self: Q, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Q: """ Set the ORDER BY expression. Example: >>> Select().from_("tbl").select("x").order_by("x DESC").sql() 'SELECT x FROM tbl ORDER BY x DESC' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `Order`. append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Order` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_child_list_builder( *expressions, instance=self, arg="order", append=append, copy=copy, prefix="ORDER BY", into=Order, dialect=dialect, **opts, ) def where( self: Q, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Q: """ Append to or set the WHERE expressions. Examples: >>> Select().select("x").from_("tbl").where("x = 'a' OR x < 'b'").sql() "SELECT x FROM tbl WHERE x = 'a' OR x < 'b'" Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified expression. """ return _apply_conjunction_builder( *[expr.this if isinstance(expr, Where) else expr for expr in expressions], instance=self, arg="where", append=append, into=Where, dialect=dialect, copy=copy, **opts, ) def with_( self: Q, alias: ExpOrStr, as_: ExpOrStr, recursive: t.Optional[bool] = None, materialized: t.Optional[bool] = None, append: bool = True, dialect: DialectType = None, copy: bool = True, scalar: t.Optional[bool] = None, **opts, ) -> Q: """ Append to or set the common table expressions. Example: >>> Select().with_("tbl2", as_="SELECT * FROM tbl").select("x").from_("tbl2").sql() 'WITH tbl2 AS (SELECT * FROM tbl) SELECT x FROM tbl2' Args: alias: the SQL code string to parse as the table name. If an `Expr` instance is passed, this is used as-is. as_: the SQL code string to parse as the table expression. If an `Expr` instance is passed, it will be used as-is. recursive: set the RECURSIVE part of the expression. Defaults to `False`. materialized: set the MATERIALIZED part of the expression. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. scalar: if `True`, this is a scalar common table expression. opts: other options to use to parse the input expressions. Returns: The modified expression. """ return _apply_cte_builder( self, alias, as_, recursive=recursive, materialized=materialized, append=append, dialect=dialect, copy=copy, scalar=scalar, **opts, ) def union( self, *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, **opts ) -> Union: """ Builds a UNION expression. Example: >>> import sqlglot >>> sqlglot.parse_one("SELECT * FROM foo").union("SELECT * FROM bla").sql() 'SELECT * FROM foo UNION SELECT * FROM bla' Args: expressions: the SQL code strings. If `Expr` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. opts: other options to use to parse the input expressions. Returns: The new Union expression. """ return union(self, *expressions, distinct=distinct, dialect=dialect, **opts) def intersect( self, *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, **opts ) -> Intersect: """ Builds an INTERSECT expression. Example: >>> import sqlglot >>> sqlglot.parse_one("SELECT * FROM foo").intersect("SELECT * FROM bla").sql() 'SELECT * FROM foo INTERSECT SELECT * FROM bla' Args: expressions: the SQL code strings. If `Expr` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. opts: other options to use to parse the input expressions. Returns: The new Intersect expression. """ return intersect(self, *expressions, distinct=distinct, dialect=dialect, **opts) def except_( self, *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, **opts ) -> Except: """ Builds an EXCEPT expression. Example: >>> import sqlglot >>> sqlglot.parse_one("SELECT * FROM foo").except_("SELECT * FROM bla").sql() 'SELECT * FROM foo EXCEPT SELECT * FROM bla' Args: expressions: the SQL code strings. If `Expr` instance are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. opts: other options to use to parse the input expressions. Returns: The new Except expression. """ return except_(self, *expressions, distinct=distinct, dialect=dialect, **opts) class QueryBand(Expression): arg_types = {"this": True, "scope": False, "update": False} class RecursiveWithSearch(Expression): arg_types = {"kind": True, "this": True, "expression": True, "using": False} class With(Expression): arg_types = {"expressions": True, "recursive": False, "search": False} @property def recursive(self) -> bool: return bool(self.args.get("recursive")) class CTE(Expression, DerivedTable): arg_types = { "this": True, "alias": True, "scalar": False, "materialized": False, "key_expressions": False, } class ProjectionDef(Expression): arg_types = {"this": True, "expression": True} class TableAlias(Expression): arg_types = {"this": False, "columns": False} @property def columns(self) -> t.List[t.Any]: return self.args.get("columns") or [] class BitString(Expression, Condition): is_primitive = True class HexString(Expression, Condition): arg_types = {"this": True, "is_integer": False} is_primitive = True class ByteString(Expression, Condition): arg_types = {"this": True, "is_bytes": False} is_primitive = True class RawString(Expression, Condition): is_primitive = True class UnicodeString(Expression, Condition): arg_types = {"this": True, "escape": False} class ColumnPosition(Expression): arg_types = {"this": False, "position": True} class ColumnDef(Expression): arg_types = { "this": True, "kind": False, "constraints": False, "exists": False, "position": False, "default": False, "output": False, } @property def constraints(self) -> t.List[ColumnConstraint]: return self.args.get("constraints") or [] @property def kind(self) -> t.Optional[DataType]: return self.args.get("kind") class Changes(Expression): arg_types = {"information": True, "at_before": False, "end": False} class Connect(Expression): arg_types = {"start": False, "connect": True, "nocycle": False} class Prior(Expression): pass class Into(Expression): arg_types = { "this": False, "temporary": False, "unlogged": False, "bulk_collect": False, "expressions": False, } class From(Expression): @property def name(self) -> str: return self.this.name @property def alias_or_name(self) -> str: return self.this.alias_or_name class Having(Expression): pass class Index(Expression): arg_types = { "this": False, "table": False, "unique": False, "primary": False, "amp": False, # teradata "params": False, } class ConditionalInsert(Expression): arg_types = {"this": True, "expression": False, "else_": False} class MultitableInserts(Expression): arg_types = {"expressions": True, "kind": True, "source": True} class OnCondition(Expression): arg_types = {"error": False, "empty": False, "null": False} class Introducer(Expression): arg_types = {"this": True, "expression": True} class National(Expression): is_primitive = True class Partition(Expression): arg_types = {"expressions": True, "subpartition": False} class PartitionRange(Expression): arg_types = {"this": True, "expression": False, "expressions": False} class PartitionId(Expression): pass class Fetch(Expression): arg_types = { "direction": False, "count": False, "limit_options": False, } class Grant(Expression): arg_types = { "privileges": True, "kind": False, "securable": True, "principals": True, "grant_option": False, } class Revoke(Expression): arg_types = {**Grant.arg_types, "cascade": False} class Group(Expression): arg_types = { "expressions": False, "grouping_sets": False, "cube": False, "rollup": False, "totals": False, "all": False, } class Cube(Expression): arg_types = {"expressions": False} class Rollup(Expression): arg_types = {"expressions": False} class GroupingSets(Expression): arg_types = {"expressions": True} class Lambda(Expression): arg_types = {"this": True, "expressions": True, "colon": False} class Limit(Expression): arg_types = { "this": False, "expression": True, "offset": False, "limit_options": False, "expressions": False, } class LimitOptions(Expression): arg_types = { "percent": False, "rows": False, "with_ties": False, } class Join(Expression): arg_types = { "this": True, "on": False, "side": False, "kind": False, "using": False, "method": False, "global_": False, "hint": False, "match_condition": False, # Snowflake "directed": False, # Snowflake "expressions": False, "pivots": False, } @property def method(self) -> str: return self.text("method").upper() @property def kind(self) -> str: return self.text("kind").upper() @property def side(self) -> str: return self.text("side").upper() @property def hint(self) -> str: return self.text("hint").upper() @property def alias_or_name(self) -> str: return self.this.alias_or_name @property def is_semi_or_anti_join(self) -> bool: return self.kind in ("SEMI", "ANTI") def on( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Join: """ Append to or set the ON expressions. Example: >>> import sqlglot >>> sqlglot.parse_one("JOIN x", into=Join).on("y = 1").sql() 'JOIN x ON y = 1' Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Join expression. """ join = _apply_conjunction_builder( *expressions, instance=self, arg="on", append=append, dialect=dialect, copy=copy, **opts, ) if join.kind == "CROSS": join.set("kind", None) return join def using( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Join: """ Append to or set the USING expressions. Example: >>> import sqlglot >>> sqlglot.parse_one("JOIN x", into=Join).using("foo", "bla").sql() 'JOIN x USING (foo, bla)' Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. append: if `True`, concatenate the new expressions to the existing "using" list. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Join expression. """ join = _apply_list_builder( *expressions, instance=self, arg="using", append=append, dialect=dialect, copy=copy, **opts, ) if join.kind == "CROSS": join.set("kind", None) return join class Lateral(Expression, UDTF): arg_types = { "this": True, "view": False, "outer": False, "alias": False, "cross_apply": False, # True -> CROSS APPLY, False -> OUTER APPLY "ordinality": False, } class TableFromRows(Expression, UDTF): arg_types = { "this": True, "alias": False, "joins": False, "pivots": False, "sample": False, } class MatchRecognizeMeasure(Expression): arg_types = { "this": True, "window_frame": False, } class MatchRecognize(Expression): arg_types = { "partition_by": False, "order": False, "measures": False, "rows": False, "after": False, "pattern": False, "define": False, "alias": False, } class Final(Expression): pass class Offset(Expression): arg_types = {"this": False, "expression": True, "expressions": False} class Order(Expression): arg_types = {"this": False, "expressions": True, "siblings": False} class WithFill(Expression): arg_types = { "from_": False, "to": False, "step": False, "interpolate": False, } class SkipJSONColumn(Expression): arg_types = {"regexp": False, "expression": True} class Cluster(Order): pass class Distribute(Order): pass class Sort(Order): pass class Qualify(Expression): pass class InputOutputFormat(Expression): arg_types = {"input_format": False, "output_format": False} class Return(Expression): pass class Tuple(Expression): arg_types = {"expressions": False} def isin( self, *expressions: t.Any, query: t.Optional[ExpOrStr] = None, unnest: t.Optional[ExpOrStr] | Collection[ExpOrStr] = None, copy: bool = True, **opts, ) -> In: return In( this=maybe_copy(self, copy), expressions=[convert(e, copy=copy) for e in expressions], query=maybe_parse(query, copy=copy, **opts) if query else None, unnest=( Unnest( expressions=[ maybe_parse(t.cast(ExpOrStr, e), copy=copy, **opts) for e in ensure_list(unnest) ] ) if unnest else None ), ) class QueryOption(Expression): arg_types = {"this": True, "expression": False} class WithTableHint(Expression): arg_types = {"expressions": True} class IndexTableHint(Expression): arg_types = {"this": True, "expressions": False, "target": False} class HistoricalData(Expression): arg_types = {"this": True, "kind": True, "expression": True} class Put(Expression): arg_types = {"this": True, "target": True, "properties": False} class Get(Expression): arg_types = {"this": True, "target": True, "properties": False} class Table(Expression, Selectable): arg_types = { "this": False, "alias": False, "db": False, "catalog": False, "laterals": False, "joins": False, "pivots": False, "hints": False, "system_time": False, "version": False, "format": False, "pattern": False, "ordinality": False, "when": False, "only": False, "partition": False, "changes": False, "rows_from": False, "sample": False, "indexed": False, } @property def name(self) -> str: if not self.this or isinstance(self.this, Func): return "" return self.this.name @property def db(self) -> str: return self.text("db") @property def catalog(self) -> str: return self.text("catalog") @property def selects(self) -> t.List[Expr]: return [] @property def named_selects(self) -> t.List[str]: return [] @property def parts(self) -> t.List[Expr]: """Return the parts of a table in order catalog, db, table.""" parts: t.List[Expr] = [] for arg in ("catalog", "db", "this"): part = self.args.get(arg) if isinstance(part, Dot): parts.extend(part.flatten()) elif isinstance(part, Expr): parts.append(part) return parts def to_column(self, copy: bool = True) -> Expr: parts = self.parts last_part = parts[-1] if isinstance(last_part, Identifier): col: Expr = column(*reversed(parts[0:4]), fields=parts[4:], copy=copy) # type: ignore else: # This branch will be reached if a function or array is wrapped in a `Table` col = last_part alias = self.args.get("alias") if alias: col = alias_(col, alias.this, copy=copy) return col class SetOperation(Expression, Query): arg_types = { "with_": False, "this": True, "expression": True, "distinct": False, "by_name": False, "side": False, "kind": False, "on": False, **QUERY_MODIFIERS, } def select( self: S, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> S: this = maybe_copy(self, copy) this.this.unnest().select(*expressions, append=append, dialect=dialect, copy=False, **opts) this.expression.unnest().select( *expressions, append=append, dialect=dialect, copy=False, **opts ) return this @property def named_selects(self) -> t.List[str]: expr: Expr = self while isinstance(expr, SetOperation): expr = expr.this.unnest() return _named_selects(expr) @property def is_star(self) -> bool: return self.this.is_star or self.expression.is_star @property def selects(self) -> t.List[Expr]: expr: Expr = self while isinstance(expr, SetOperation): expr = expr.this.unnest() return getattr(expr, "selects", []) @property def left(self) -> Query: return self.this @property def right(self) -> Query: return self.expression @property def kind(self) -> str: return self.text("kind").upper() @property def side(self) -> str: return self.text("side").upper() class Union(SetOperation): pass class Except(SetOperation): pass class Intersect(SetOperation): pass class Values(Expression, UDTF): arg_types = { "expressions": True, "alias": False, "order": False, "limit": False, "offset": False, } class Version(Expression): """ Time travel, iceberg, bigquery etc https://trino.io/docs/current/connector/iceberg.html?highlight=snapshot#using-snapshots https://www.databricks.com/blog/2019/02/04/introducing-delta-time-travel-for-large-scale-data-lakes.html https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#for_system_time_as_of https://learn.microsoft.com/en-us/sql/relational-databases/tables/querying-data-in-a-system-versioned-temporal-table?view=sql-server-ver16 this is either TIMESTAMP or VERSION kind is ("AS OF", "BETWEEN") """ arg_types = {"this": True, "kind": True, "expression": False} class Schema(Expression): arg_types = {"this": False, "expressions": False} class Lock(Expression): arg_types = {"update": True, "expressions": False, "wait": False, "key": False} class Select(Expression, Query): arg_types = { "with_": False, "kind": False, "expressions": False, "hint": False, "distinct": False, "into": False, "from_": False, "operation_modifiers": False, "exclude": False, **QUERY_MODIFIERS, } def from_( self, expression: ExpOrStr, dialect: DialectType = None, copy: bool = True, **opts ) -> Select: """ Set the FROM expression. Example: >>> Select().from_("tbl").select("x").sql() 'SELECT x FROM tbl' Args: expression : the SQL code strings to parse. If a `From` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `From`. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_builder( expression=expression, instance=self, arg="from_", into=From, prefix="FROM", dialect=dialect, copy=copy, **opts, ) def group_by( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Set the GROUP BY expression. Example: >>> Select().from_("tbl").select("x", "COUNT(1)").group_by("x").sql() 'SELECT x, COUNT(1) FROM tbl GROUP BY x' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `Group`. If nothing is passed in then a group by is not applied to the expression append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Group` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ if not expressions: return self if not copy else self.copy() return _apply_child_list_builder( *expressions, instance=self, arg="group", append=append, copy=copy, prefix="GROUP BY", into=Group, dialect=dialect, **opts, ) def sort_by( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Set the SORT BY expression. Example: >>> Select().from_("tbl").select("x").sort_by("x DESC").sql(dialect="hive") 'SELECT x FROM tbl SORT BY x DESC' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `SORT`. append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Order` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_child_list_builder( *expressions, instance=self, arg="sort", append=append, copy=copy, prefix="SORT BY", into=Sort, dialect=dialect, **opts, ) def cluster_by( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Set the CLUSTER BY expression. Example: >>> Select().from_("tbl").select("x").cluster_by("x DESC").sql(dialect="hive") 'SELECT x FROM tbl CLUSTER BY x DESC' Args: *expressions: the SQL code strings to parse. If a `Group` instance is passed, this is used as-is. If another `Expr` instance is passed, it will be wrapped in a `Cluster`. append: if `True`, add to any existing expressions. Otherwise, this flattens all the `Order` expression into a single expression. dialect: the dialect used to parse the input expression. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_child_list_builder( *expressions, instance=self, arg="cluster", append=append, copy=copy, prefix="CLUSTER BY", into=Cluster, dialect=dialect, **opts, ) def select( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: return _apply_list_builder( *expressions, instance=self, arg="expressions", append=append, dialect=dialect, into=Expr, copy=copy, **opts, ) def lateral( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the LATERAL expressions. Example: >>> Select().select("x").lateral("OUTER explode(y) tbl2 AS z").from_("tbl").sql() 'SELECT x FROM tbl LATERAL VIEW OUTER EXPLODE(y) tbl2 AS z' Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_list_builder( *expressions, instance=self, arg="laterals", append=append, into=Lateral, prefix="LATERAL VIEW", dialect=dialect, copy=copy, **opts, ) def join( self, expression: ExpOrStr, on: t.Optional[ExpOrStr | list[ExpOrStr]] = None, using: t.Optional[ExpOrStr | Collection[ExpOrStr]] = None, append: bool = True, join_type: t.Optional[str] = None, join_alias: t.Optional[Identifier | str] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the JOIN expressions. Example: >>> Select().select("*").from_("tbl").join("tbl2", on="tbl1.y = tbl2.y").sql() 'SELECT * FROM tbl JOIN tbl2 ON tbl1.y = tbl2.y' >>> Select().select("1").from_("a").join("b", using=["x", "y", "z"]).sql() 'SELECT 1 FROM a JOIN b USING (x, y, z)' Use `join_type` to change the type of join: >>> Select().select("*").from_("tbl").join("tbl2", on="tbl1.y = tbl2.y", join_type="left outer").sql() 'SELECT * FROM tbl LEFT OUTER JOIN tbl2 ON tbl1.y = tbl2.y' Args: expression: the SQL code string to parse. If an `Expr` instance is passed, it will be used as-is. on: optionally specify the join "on" criteria as a SQL string. If an `Expr` instance is passed, it will be used as-is. using: optionally specify the join "using" criteria as a SQL string. If an `Expr` instance is passed, it will be used as-is. append: if `True`, add to any existing expressions. Otherwise, this resets the expressions. join_type: if set, alter the parsed join type. join_alias: an optional alias for the joined source. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: Select: the modified expression. """ parse_args: t.Dict[str, t.Any] = {"dialect": dialect, **opts} try: expression = maybe_parse(expression, into=Join, prefix="JOIN", **parse_args) except ParseError: expression = maybe_parse(expression, into=(Join, Expr), **parse_args) join = expression if isinstance(expression, Join) else Join(this=expression) if isinstance(join.this, Select): join.this.replace(join.this.subquery()) if join_type: new_join = maybe_parse(f"FROM _ {join_type} JOIN _", **parse_args).find(Join) method = new_join.method side = new_join.side kind = new_join.kind if method: join.set("method", method) if side: join.set("side", side) if kind: join.set("kind", kind) if on: on = and_( *t.cast(t.List[ExpOrStr], ensure_list(on)), dialect=dialect, copy=copy, **opts ) join.set("on", on) if using: join = _apply_list_builder( *ensure_list(using), instance=join, arg="using", append=append, copy=copy, into=Identifier, **opts, ) if join_alias: join.set("this", alias_(join.this, join_alias, table=True)) return _apply_list_builder( join, instance=self, arg="joins", append=append, copy=copy, **opts, ) def having( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: """ Append to or set the HAVING expressions. Example: >>> Select().select("x", "COUNT(y)").from_("tbl").group_by("x").having("COUNT(y) > 3").sql() 'SELECT x, COUNT(y) FROM tbl GROUP BY x HAVING COUNT(y) > 3' Args: *expressions: the SQL code strings to parse. If an `Expr` instance is passed, it will be used as-is. Multiple expressions are combined with an AND operator. append: if `True`, AND the new expressions to any existing expression. Otherwise, this resets the expression. dialect: the dialect used to parse the input expressions. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input expressions. Returns: The modified Select expression. """ return _apply_conjunction_builder( *expressions, instance=self, arg="having", append=append, into=Having, dialect=dialect, copy=copy, **opts, ) def window( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: return _apply_list_builder( *expressions, instance=self, arg="windows", append=append, into=Window, dialect=dialect, copy=copy, **opts, ) def qualify( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Select: return _apply_conjunction_builder( *expressions, instance=self, arg="qualify", append=append, into=Qualify, dialect=dialect, copy=copy, **opts, ) def distinct( self, *ons: t.Optional[ExpOrStr], distinct: bool = True, copy: bool = True ) -> Select: """ Set the OFFSET expression. Example: >>> Select().from_("tbl").select("x").distinct().sql() 'SELECT DISTINCT x FROM tbl' Args: ons: the expressions to distinct on distinct: whether the Select should be distinct copy: if `False`, modify this expression instance in-place. Returns: Select: the modified expression. """ instance = maybe_copy(self, copy) on = Tuple(expressions=[maybe_parse(on, copy=copy) for on in ons if on]) if ons else None instance.set("distinct", Distinct(on=on) if distinct else None) return instance def ctas( self, table: ExpOrStr, properties: t.Optional[t.Dict] = None, dialect: DialectType = None, copy: bool = True, **opts, ) -> Create: """ Convert this expression to a CREATE TABLE AS statement. Example: >>> Select().select("*").from_("tbl").ctas("x").sql() 'CREATE TABLE x AS SELECT * FROM tbl' Args: table: the SQL code string to parse as the table name. If another `Expr` instance is passed, it will be used as-is. properties: an optional mapping of table properties dialect: the dialect used to parse the input table. copy: if `False`, modify this expression instance in-place. opts: other options to use to parse the input table. Returns: The new Create expression. """ instance = maybe_copy(self, copy) table_expression = maybe_parse(table, into=Table, dialect=dialect, **opts) properties_expression = None if properties: from sqlglot.expressions.properties import Properties as _Properties properties_expression = _Properties.from_dict(properties) from sqlglot.expressions.ddl import Create as _Create return _Create( this=table_expression, kind="TABLE", expression=instance, properties=properties_expression, ) def lock(self, update: bool = True, copy: bool = True) -> Select: """ Set the locking read mode for this expression. Examples: >>> Select().select("x").from_("tbl").where("x = 'a'").lock().sql("mysql") "SELECT x FROM tbl WHERE x = 'a' FOR UPDATE" >>> Select().select("x").from_("tbl").where("x = 'a'").lock(update=False).sql("mysql") "SELECT x FROM tbl WHERE x = 'a' FOR SHARE" Args: update: if `True`, the locking type will be `FOR UPDATE`, else it will be `FOR SHARE`. copy: if `False`, modify this expression instance in-place. Returns: The modified expression. """ inst = maybe_copy(self, copy) inst.set("locks", [Lock(update=update)]) return inst def hint(self, *hints: ExpOrStr, dialect: DialectType = None, copy: bool = True) -> Select: """ Set hints for this expression. Examples: >>> Select().select("x").from_("tbl").hint("BROADCAST(y)").sql(dialect="spark") 'SELECT /*+ BROADCAST(y) */ x FROM tbl' Args: hints: The SQL code strings to parse as the hints. If an `Expr` instance is passed, it will be used as-is. dialect: The dialect used to parse the hints. copy: If `False`, modify this expression instance in-place. Returns: The modified expression. """ inst = maybe_copy(self, copy) inst.set( "hint", Hint(expressions=[maybe_parse(h, copy=copy, dialect=dialect) for h in hints]) ) return inst @property def named_selects(self) -> t.List[str]: selects = [] for e in self.expressions: if e.alias_or_name: selects.append(e.output_name) elif isinstance(e, Aliases): selects.extend([a.name for a in e.aliases]) return selects @property def is_star(self) -> bool: return any(expression.is_star for expression in self.expressions) @property def selects(self) -> t.List[Expr]: return self.expressions class Subquery(Expression, DerivedTable, Query): is_subquery: t.ClassVar[bool] = True arg_types = { "this": True, "alias": False, "with_": False, **QUERY_MODIFIERS, } def unnest(self) -> Expr: """Returns the first non subquery.""" expression: Expr = self while isinstance(expression, Subquery): expression = expression.this return expression def unwrap(self) -> Subquery: expression = self while expression.same_parent and expression.is_wrapper: expression = t.cast(Subquery, expression.parent) return expression def select( self, *expressions: t.Optional[ExpOrStr], append: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Subquery: this = maybe_copy(self, copy) inner = this.unnest() if hasattr(inner, "select"): inner.select(*expressions, append=append, dialect=dialect, copy=False, **opts) return this @property def is_wrapper(self) -> bool: """ Whether this Subquery acts as a simple wrapper around another expression. SELECT * FROM (((SELECT * FROM t))) ^ This corresponds to a "wrapper" Subquery node """ return all(v is None for k, v in self.args.items() if k != "this") @property def is_star(self) -> bool: return self.this.is_star @property def output_name(self) -> str: return self.alias class TableSample(Expression): arg_types = { "expressions": False, "method": False, "bucket_numerator": False, "bucket_denominator": False, "bucket_field": False, "percent": False, "rows": False, "size": False, "seed": False, } class Tag(Expression): """Tags are used for generating arbitrary sql like SELECT x.""" arg_types = { "this": False, "prefix": False, "postfix": False, } class Pivot(Expression): arg_types = { "this": False, "alias": False, "expressions": False, "fields": False, "unpivot": False, "using": False, "group": False, "columns": False, "include_nulls": False, "default_on_null": False, "into": False, "with_": False, } @property def unpivot(self) -> bool: return bool(self.args.get("unpivot")) @property def fields(self) -> t.List[Expr]: return self.args.get("fields", []) class UnpivotColumns(Expression): arg_types = {"this": True, "expressions": True} class Window(Expression, Condition): arg_types = { "this": True, "partition_by": False, "order": False, "spec": False, "alias": False, "over": False, "first": False, } class WindowSpec(Expression): arg_types = { "kind": False, "start": False, "start_side": False, "end": False, "end_side": False, "exclude": False, } class PreWhere(Expression): pass class Where(Expression): pass class Analyze(Expression): arg_types = { "kind": False, "this": False, "options": False, "mode": False, "partition": False, "expression": False, "properties": False, } class AnalyzeStatistics(Expression): arg_types = { "kind": True, "option": False, "this": False, "expressions": False, } class AnalyzeHistogram(Expression): arg_types = { "this": True, "expressions": True, "expression": False, "update_options": False, } class AnalyzeSample(Expression): arg_types = {"kind": True, "sample": True} class AnalyzeListChainedRows(Expression): arg_types = {"expression": False} class AnalyzeDelete(Expression): arg_types = {"kind": False} class AnalyzeWith(Expression): arg_types = {"expressions": True} class AnalyzeValidate(Expression): arg_types = { "kind": True, "this": False, "expression": False, } class AnalyzeColumns(Expression): pass class UsingData(Expression): pass class AddPartition(Expression): arg_types = {"this": True, "exists": False, "location": False} class AttachOption(Expression): arg_types = {"this": True, "expression": False} class DropPartition(Expression): arg_types = {"expressions": True, "exists": False} class ReplacePartition(Expression): arg_types = {"expression": True, "source": True} class TranslateCharacters(Expression): arg_types = {"this": True, "expression": True, "with_error": False} class OverflowTruncateBehavior(Expression): arg_types = {"this": False, "with_count": True} class JSON(Expression): arg_types = {"this": False, "with_": False, "unique": False} class JSONPath(Expression): arg_types = {"expressions": True, "escape": False} @property def output_name(self) -> str: last_segment = self.expressions[-1].this return last_segment if isinstance(last_segment, str) else "" class JSONPathPart(Expression): arg_types = {} class JSONPathFilter(JSONPathPart): arg_types = {"this": True} class JSONPathKey(JSONPathPart): arg_types = {"this": True} class JSONPathRecursive(JSONPathPart): arg_types = {"this": False} class JSONPathRoot(JSONPathPart): pass class JSONPathScript(JSONPathPart): arg_types = {"this": True} class JSONPathSlice(JSONPathPart): arg_types = {"start": False, "end": False, "step": False} class JSONPathSelector(JSONPathPart): arg_types = {"this": True} class JSONPathSubscript(JSONPathPart): arg_types = {"this": True} class JSONPathUnion(JSONPathPart): arg_types = {"expressions": True} class JSONPathWildcard(JSONPathPart): pass class FormatJson(Expression): pass class JSONKeyValue(Expression): arg_types = {"this": True, "expression": True} class JSONColumnDef(Expression): arg_types = { "this": False, "kind": False, "path": False, "nested_schema": False, "ordinality": False, } class JSONSchema(Expression): arg_types = {"expressions": True} class JSONValue(Expression): arg_types = { "this": True, "path": True, "returning": False, "on_condition": False, } class JSONValueArray(Expression, Func): arg_types = {"this": True, "expression": False} class OpenJSONColumnDef(Expression): arg_types = {"this": True, "kind": True, "path": False, "as_json": False} class JSONExtractQuote(Expression): arg_types = { "option": True, "scalar": False, } class ScopeResolution(Expression): arg_types = {"this": False, "expression": True} class Stream(Expression): pass class ModelAttribute(Expression): arg_types = {"this": True, "expression": True} class XMLNamespace(Expression): pass class XMLKeyValueOption(Expression): arg_types = {"this": True, "expression": False} class Semicolon(Expression): arg_types = {} class TableColumn(Expression): pass class Variadic(Expression): pass class StoredProcedure(Expression): arg_types = {"this": True, "expressions": False, "wrapped": False} class Block(Expression): arg_types = {"expressions": True} class IfBlock(Expression): arg_types = {"this": True, "true": True, "false": False} class WhileBlock(Expression): arg_types = {"this": True, "body": True} class EndStatement(Expression): arg_types = {} UNWRAPPED_QUERIES = (Select, SetOperation) def union( *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Union: """ Initializes a syntax tree for the `UNION` operation. Example: >>> union("SELECT * FROM foo", "SELECT * FROM bla").sql() 'SELECT * FROM foo UNION SELECT * FROM bla' Args: expressions: the SQL code strings, corresponding to the `UNION`'s operands. If `Expr` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. copy: whether to copy the expression. opts: other options to use to parse the input expressions. Returns: The new Union instance. """ assert len(expressions) >= 2, "At least two expressions are required by `union`." return _apply_set_operation( *expressions, set_operation=Union, distinct=distinct, dialect=dialect, copy=copy, **opts ) def intersect( *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Intersect: """ Initializes a syntax tree for the `INTERSECT` operation. Example: >>> intersect("SELECT * FROM foo", "SELECT * FROM bla").sql() 'SELECT * FROM foo INTERSECT SELECT * FROM bla' Args: expressions: the SQL code strings, corresponding to the `INTERSECT`'s operands. If `Expr` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. copy: whether to copy the expression. opts: other options to use to parse the input expressions. Returns: The new Intersect instance. """ assert len(expressions) >= 2, "At least two expressions are required by `intersect`." return _apply_set_operation( *expressions, set_operation=Intersect, distinct=distinct, dialect=dialect, copy=copy, **opts ) def except_( *expressions: ExpOrStr, distinct: bool = True, dialect: DialectType = None, copy: bool = True, **opts, ) -> Except: """ Initializes a syntax tree for the `EXCEPT` operation. Example: >>> except_("SELECT * FROM foo", "SELECT * FROM bla").sql() 'SELECT * FROM foo EXCEPT SELECT * FROM bla' Args: expressions: the SQL code strings, corresponding to the `EXCEPT`'s operands. If `Expr` instances are passed, they will be used as-is. distinct: set the DISTINCT flag if and only if this is true. dialect: the dialect used to parse the input expression. copy: whether to copy the expression. opts: other options to use to parse the input expressions. Returns: The new Except instance. """ assert len(expressions) >= 2, "At least two expressions are required by `except_`." return _apply_set_operation( *expressions, set_operation=Except, distinct=distinct, dialect=dialect, copy=copy, **opts ) ================================================ FILE: sqlglot/expressions/string.py ================================================ """sqlglot expressions - string, encoding, hashing, and regex functions.""" from __future__ import annotations from sqlglot.expressions.core import Expression, Func, Binary # String basics class Ascii(Expression, Func): pass class BitLength(Expression, Func): pass class ByteLength(Expression, Func): pass class Chr(Expression, Func): arg_types = {"expressions": True, "charset": False} is_var_len_args = True _sql_names = ["CHR", "CHAR"] class Concat(Expression, Func): arg_types = {"expressions": True, "safe": False, "coalesce": False} is_var_len_args = True class ConcatWs(Concat): _sql_names = ["CONCAT_WS"] class Contains(Expression, Func): arg_types = {"this": True, "expression": True, "json_scope": False} class Elt(Expression, Func): arg_types = {"this": True, "expressions": True} is_var_len_args = True class EndsWith(Expression, Func): _sql_names = ["ENDS_WITH", "ENDSWITH"] arg_types = {"this": True, "expression": True} class Format(Expression, Func): arg_types = {"this": True, "expressions": False} is_var_len_args = True class Initcap(Expression, Func): arg_types = {"this": True, "expression": False} class IsAscii(Expression, Func): pass class Left(Expression, Func): arg_types = {"this": True, "expression": True} class Length(Expression, Func): arg_types = {"this": True, "binary": False, "encoding": False} _sql_names = ["LENGTH", "LEN", "CHAR_LENGTH", "CHARACTER_LENGTH"] class Levenshtein(Expression, Func): arg_types = { "this": True, "expression": False, "ins_cost": False, "del_cost": False, "sub_cost": False, "max_dist": False, } class Lower(Expression, Func): _sql_names = ["LOWER", "LCASE"] class MatchAgainst(Expression, Func): arg_types = {"this": True, "expressions": True, "modifier": False} class Normalize(Expression, Func): arg_types = {"this": True, "form": False, "is_casefold": False} class NumberToStr(Expression, Func): arg_types = {"this": True, "format": True, "culture": False} class Overlay(Expression, Func): arg_types = {"this": True, "expression": True, "from_": True, "for_": False} class Pad(Expression, Func): arg_types = {"this": True, "expression": True, "fill_pattern": False, "is_left": True} class Repeat(Expression, Func): arg_types = {"this": True, "times": True} class Replace(Expression, Func): arg_types = {"this": True, "expression": True, "replacement": False} class Reverse(Expression, Func): pass class Right(Expression, Func): arg_types = {"this": True, "expression": True} class RtrimmedLength(Expression, Func): pass class Search(Expression, Func): arg_types = { "this": True, # data_to_search / search_data "expression": True, # search_query / search_string "json_scope": False, # BigQuery: JSON_VALUES | JSON_KEYS | JSON_KEYS_AND_VALUES "analyzer": False, # Both: analyzer / ANALYZER "analyzer_options": False, # BigQuery: analyzer_options_values "search_mode": False, # Snowflake: OR | AND } class SearchIp(Expression, Func): arg_types = {"this": True, "expression": True} class Soundex(Expression, Func): pass class SoundexP123(Expression, Func): pass class Space(Expression, Func): """ SPACE(n) → string consisting of n blank characters """ pass class Split(Expression, Func): arg_types = { "this": True, "expression": True, "limit": False, "null_returns_null": False, "empty_delimiter_returns_whole": False, } class SplitPart(Expression, Func): arg_types = { "this": True, "delimiter": False, "part_index": False, "part_index_zero_as_one": False, # usually part_index is 1-based, however Snowflake allows 0 and treats it as 1 "empty_delimiter_returns_whole": False, # whether the whole input string should be returned if the delimiter string is empty (i.e. Snowflake) } class Strtok(Expression, Func): arg_types = { "this": True, "delimiter": False, "part_index": False, } class StartsWith(Expression, Func): _sql_names = ["STARTS_WITH", "STARTSWITH"] arg_types = {"this": True, "expression": True} class StrPosition(Expression, Func): arg_types = { "this": True, "substr": True, "position": False, "occurrence": False, "clamp_position": False, } class StrToMap(Expression, Func): arg_types = { "this": True, "pair_delim": False, "key_value_delim": False, "duplicate_resolution_callback": False, } class String(Expression, Func): arg_types = {"this": True, "zone": False} class Stuff(Expression, Func): _sql_names = ["STUFF", "INSERT"] arg_types = {"this": True, "start": True, "length": True, "expression": True} class Substring(Expression, Func): _sql_names = ["SUBSTRING", "SUBSTR"] arg_types = {"this": True, "start": False, "length": False} class SubstringIndex(Expression, Func): """ SUBSTRING_INDEX(str, delim, count) *count* > 0 → left slice before the *count*-th delimiter *count* < 0 → right slice after the |count|-th delimiter """ arg_types = {"this": True, "delimiter": True, "count": True} class Translate(Expression, Func): arg_types = {"this": True, "from_": True, "to": True} class Trim(Expression, Func): arg_types = { "this": True, "expression": False, "position": False, "collation": False, } class Unicode(Expression, Func): pass class Upper(Expression, Func): _sql_names = ["UPPER", "UCASE"] # Encoding / base conversion class Base64DecodeBinary(Expression, Func): arg_types = {"this": True, "alphabet": False} class Base64DecodeString(Expression, Func): arg_types = {"this": True, "alphabet": False} class Base64Encode(Expression, Func): arg_types = {"this": True, "max_line_length": False, "alphabet": False} class CodePointsToBytes(Expression, Func): pass class CodePointsToString(Expression, Func): pass class ConvertToCharset(Expression, Func): arg_types = {"this": True, "dest": True, "source": False} class Decode(Expression, Func): arg_types = {"this": True, "charset": True, "replace": False} class Encode(Expression, Func): arg_types = {"this": True, "charset": True} class FromBase(Expression, Func): arg_types = {"this": True, "expression": True} class FromBase32(Expression, Func): pass class FromBase64(Expression, Func): pass class Hex(Expression, Func): pass class HexDecodeString(Expression, Func): pass class HexEncode(Expression, Func): arg_types = {"this": True, "case": False} class LowerHex(Hex): pass class SafeConvertBytesToString(Expression, Func): pass class ToBase32(Expression, Func): pass class ToBase64(Expression, Func): pass class ToBinary(Expression, Func): arg_types = {"this": True, "format": False, "safe": False} class ToChar(Expression, Func): arg_types = { "this": True, "format": False, "nlsparam": False, "is_numeric": False, } class ToCodePoints(Expression, Func): pass class ToDecfloat(Expression, Func): arg_types = { "this": True, "format": False, } class ToDouble(Expression, Func): arg_types = { "this": True, "format": False, "safe": False, } class ToFile(Expression, Func): arg_types = { "this": True, "path": False, "safe": False, } class ToNumber(Expression, Func): arg_types = { "this": True, "format": False, "nlsparam": False, "precision": False, "scale": False, "safe": False, "safe_name": False, } class TryBase64DecodeBinary(Expression, Func): arg_types = {"this": True, "alphabet": False} class TryBase64DecodeString(Expression, Func): arg_types = {"this": True, "alphabet": False} class TryHexDecodeBinary(Expression, Func): pass class TryHexDecodeString(Expression, Func): pass class TryToDecfloat(Expression, Func): arg_types = { "this": True, "format": False, } class Unhex(Expression, Func): arg_types = {"this": True, "expression": False} # Regex class RegexpCount(Expression, Func): arg_types = { "this": True, "expression": True, "position": False, "parameters": False, } class RegexpExtract(Expression, Func): arg_types = { "this": True, "expression": True, "position": False, "occurrence": False, "parameters": False, "group": False, "null_if_pos_overflow": False, # for transpilation target behavior } class RegexpExtractAll(Expression, Func): arg_types = { "this": True, "expression": True, "group": False, "parameters": False, "position": False, "occurrence": False, } class RegexpFullMatch(Expression, Binary, Func): arg_types = {"this": True, "expression": True, "options": False} class RegexpILike(Expression, Binary, Func): arg_types = {"this": True, "expression": True, "flag": False} class RegexpInstr(Expression, Func): arg_types = { "this": True, "expression": True, "position": False, "occurrence": False, "option": False, "parameters": False, "group": False, } class RegexpReplace(Expression, Func): arg_types = { "this": True, "expression": True, "replacement": False, "position": False, "occurrence": False, "modifiers": False, "single_replace": False, } class RegexpSplit(Expression, Func): arg_types = {"this": True, "expression": True, "limit": False} # Hashing / cryptographic class Compress(Expression, Func): arg_types = {"this": True, "method": False} class Decrypt(Expression, Func): arg_types = { "this": True, "passphrase": True, "aad": False, "encryption_method": False, "safe": False, } class DecryptRaw(Expression, Func): arg_types = { "this": True, "key": True, "iv": True, "aad": False, "encryption_method": False, "aead": False, "safe": False, } class DecompressBinary(Expression, Func): arg_types = {"this": True, "method": True} class DecompressString(Expression, Func): arg_types = {"this": True, "method": True} class Encrypt(Expression, Func): arg_types = {"this": True, "passphrase": True, "aad": False, "encryption_method": False} class EncryptRaw(Expression, Func): arg_types = {"this": True, "key": True, "iv": True, "aad": False, "encryption_method": False} class CityHash64(Expression, Func): arg_types = {"expressions": False} is_var_len_args = True class FarmFingerprint(Expression, Func): arg_types = {"expressions": True} is_var_len_args = True _sql_names = ["FARM_FINGERPRINT", "FARMFINGERPRINT64"] class MD5(Expression, Func): _sql_names = ["MD5"] class MD5Digest(Expression, Func): arg_types = {"this": True, "expressions": False} is_var_len_args = True _sql_names = ["MD5_DIGEST"] class MD5NumberLower64(Expression, Func): pass class MD5NumberUpper64(Expression, Func): pass class SHA(Expression, Func): _sql_names = ["SHA", "SHA1"] class SHA1Digest(Expression, Func): pass class SHA2(Expression, Func): _sql_names = ["SHA2"] arg_types = {"this": True, "length": False} class SHA2Digest(Expression, Func): arg_types = {"this": True, "length": False} class StandardHash(Expression, Func): arg_types = {"this": True, "expression": False} # Parse class ParseBignumeric(Expression, Func): pass class ParseNumeric(Expression, Func): pass class ParseUrl(Expression, Func): arg_types = {"this": True, "part_to_extract": False, "key": False, "permissive": False} ================================================ FILE: sqlglot/expressions/temporal.py ================================================ """sqlglot expressions - date, time, and timestamp functions.""" from __future__ import annotations import typing as t from sqlglot.expressions.core import ( Expression, Func, TimeUnit, IntervalOp, Literal, Column, TIMESTAMP_PARTS, ) from sqlglot.expressions.datatypes import DataType, DType if t.TYPE_CHECKING: from sqlglot.expressions.core import Expr, Neg # Current date/time class CurrentDate(Expression, Func): arg_types = {"this": False} class CurrentDatetime(Expression, Func): arg_types = {"this": False} class CurrentTime(Expression, Func): arg_types = {"this": False} class CurrentTimestamp(Expression, Func): arg_types = {"this": False, "sysdate": False} class CurrentTimestampLTZ(Expression, Func): arg_types = {} class CurrentTimezone(Expression, Func): arg_types = {} class Localtime(Expression, Func): arg_types = {"this": False} class Localtimestamp(Expression, Func): arg_types = {"this": False} class Systimestamp(Expression, Func): arg_types = {"this": False} class UtcDate(Expression, Func): arg_types = {} class UtcTime(Expression, Func): arg_types = {"this": False} class UtcTimestamp(Expression, Func): arg_types = {"this": False} # Date arithmetic class AddMonths(Expression, Func): arg_types = {"this": True, "expression": True, "preserve_end_of_month": False} class DateAdd(Expression, Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class DateBin(Expression, Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False, "zone": False, "origin": False} class DateDiff(Expression, Func, TimeUnit): _sql_names = ["DATEDIFF", "DATE_DIFF"] arg_types = { "this": True, "expression": True, "unit": False, "zone": False, "big_int": False, "date_part_boundary": False, } class DateSub(Expression, Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class DatetimeAdd(Expression, Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class DatetimeDiff(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class DatetimeSub(Expression, Func, IntervalOp): arg_types = {"this": True, "expression": True, "unit": False} class MonthsBetween(Expression, Func): arg_types = {"this": True, "expression": True, "roundoff": False} class TimeAdd(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimeDiff(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimeSub(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimestampAdd(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TimestampDiff(Expression, Func, TimeUnit): _sql_names = ["TIMESTAMPDIFF", "TIMESTAMP_DIFF"] arg_types = {"this": True, "expression": True, "unit": False} class TimestampSub(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} class TsOrDsAdd(Expression, Func, TimeUnit): # return_type is used to correctly cast the arguments of this expression when transpiling it arg_types = {"this": True, "expression": True, "unit": False, "return_type": False} @property def return_type(self) -> DataType: return DataType.build(self.args.get("return_type") or DType.DATE) class TsOrDsDiff(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": False} # Truncation class DatetimeTrunc(Expression, Func, TimeUnit): arg_types = {"this": True, "unit": True, "zone": False} class DateTrunc(Expression, Func): arg_types = {"unit": True, "this": True, "zone": False, "input_type_preserved": False} def __init__(self, **args): # Across most dialects it's safe to unabbreviate the unit (e.g. 'Q' -> 'QUARTER') except Oracle # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ROUND-and-TRUNC-Date-Functions.html unabbreviate = args.pop("unabbreviate", True) unit = args.get("unit") if isinstance(unit, TimeUnit.VAR_LIKE) and not ( isinstance(unit, Column) and len(unit.parts) != 1 ): unit_name = unit.name.upper() if unabbreviate and unit_name in TimeUnit.UNABBREVIATED_UNIT_NAME: unit_name = TimeUnit.UNABBREVIATED_UNIT_NAME[unit_name] args["unit"] = Literal.string(unit_name) super().__init__(**args) @property def unit(self) -> Expr: return self.args["unit"] class TimestampTrunc(Expression, Func, TimeUnit): arg_types = {"this": True, "unit": True, "zone": False, "input_type_preserved": False} class TimeSlice(Expression, Func, TimeUnit): arg_types = {"this": True, "expression": True, "unit": True, "kind": False} class TimeTrunc(Expression, Func, TimeUnit): arg_types = {"this": True, "unit": True, "zone": False} # Date/time extraction class Day(Expression, Func): pass class DayOfMonth(Expression, Func): _sql_names = ["DAY_OF_MONTH", "DAYOFMONTH"] class DayOfWeek(Expression, Func): _sql_names = ["DAY_OF_WEEK", "DAYOFWEEK"] class DayOfWeekIso(Expression, Func): _sql_names = ["DAYOFWEEK_ISO", "ISODOW"] class DayOfYear(Expression, Func): _sql_names = ["DAY_OF_YEAR", "DAYOFYEAR"] class Dayname(Expression, Func): arg_types = {"this": True, "abbreviated": False} class Extract(Expression, Func): arg_types = {"this": True, "expression": True} class GetExtract(Expression, Func): arg_types = {"this": True, "expression": True} class Hour(Expression, Func): pass class Minute(Expression, Func): pass class Month(Expression, Func): pass class Monthname(Expression, Func): arg_types = {"this": True, "abbreviated": False} class Quarter(Expression, Func): pass class Second(Expression, Func): pass class ToDays(Expression, Func): pass class Week(Expression, Func): arg_types = {"this": True, "mode": False} class WeekOfYear(Expression, Func): _sql_names = ["WEEK_OF_YEAR", "WEEKOFYEAR"] class Year(Expression, Func): pass class YearOfWeek(Expression, Func): _sql_names = ["YEAR_OF_WEEK", "YEAROFWEEK"] class YearOfWeekIso(Expression, Func): _sql_names = ["YEAR_OF_WEEK_ISO", "YEAROFWEEKISO"] # Date/time construction class Date(Expression, Func): arg_types = {"this": False, "zone": False, "expressions": False} is_var_len_args = True class DateFromParts(Expression, Func): _sql_names = ["DATE_FROM_PARTS", "DATEFROMPARTS"] arg_types = {"year": True, "month": False, "day": False, "allow_overflow": False} class DateFromUnixDate(Expression, Func): pass class Datetime(Expression, Func): arg_types = {"this": True, "expression": False} class GapFill(Expression, Func): arg_types = { "this": True, "ts_column": True, "bucket_width": True, "partitioning_columns": False, "value_columns": False, "origin": False, "ignore_nulls": False, } class GenerateDateArray(Expression, Func): arg_types = {"start": True, "end": True, "step": False} class GenerateTimestampArray(Expression, Func): arg_types = {"start": True, "end": True, "step": True} class JustifyDays(Expression, Func): pass class JustifyHours(Expression, Func): pass class JustifyInterval(Expression, Func): pass class LastDay(Expression, Func, TimeUnit): _sql_names = ["LAST_DAY", "LAST_DAY_OF_MONTH"] arg_types = {"this": True, "unit": False} class MakeInterval(Expression, Func): arg_types = { "year": False, "month": False, "week": False, "day": False, "hour": False, "minute": False, "second": False, } class NextDay(Expression, Func): arg_types = {"this": True, "expression": True} class PreviousDay(Expression, Func): arg_types = {"this": True, "expression": True} class Time(Expression, Func): arg_types = {"this": False, "zone": False} class TimeFromParts(Expression, Func): _sql_names = ["TIME_FROM_PARTS", "TIMEFROMPARTS"] arg_types = { "hour": True, "min": True, "sec": True, "nano": False, "fractions": False, "precision": False, "overflow": False, } class Timestamp(Expression, Func): arg_types = {"this": False, "zone": False, "with_tz": False} class TimestampFromParts(Expression, Func): _sql_names = ["TIMESTAMP_FROM_PARTS", "TIMESTAMPFROMPARTS"] arg_types = { **TIMESTAMP_PARTS, "zone": False, "milli": False, "this": False, "expression": False, } class TimestampLtzFromParts(Expression, Func): _sql_names = ["TIMESTAMP_LTZ_FROM_PARTS", "TIMESTAMPLTZFROMPARTS"] arg_types = TIMESTAMP_PARTS.copy() class TimestampTzFromParts(Expression, Func): _sql_names = ["TIMESTAMP_TZ_FROM_PARTS", "TIMESTAMPTZFROMPARTS"] arg_types = { **TIMESTAMP_PARTS, "zone": False, } # Date/time conversion class ConvertTimezone(Expression, Func): arg_types = { "source_tz": False, "target_tz": True, "timestamp": True, "options": False, } class DateStrToDate(Expression, Func): pass class DateToDateStr(Expression, Func): pass class DateToDi(Expression, Func): pass class DiToDate(Expression, Func): pass class FromISO8601Timestamp(Expression, Func): _sql_names = ["FROM_ISO8601_TIMESTAMP"] class ParseDatetime(Expression, Func): arg_types = {"this": True, "format": False, "zone": False} class ParseTime(Expression, Func): arg_types = {"this": True, "format": True} class StrToDate(Expression, Func): arg_types = {"this": True, "format": False, "safe": False} class StrToTime(Expression, Func): arg_types = {"this": True, "format": True, "zone": False, "safe": False, "target_type": False} class StrToUnix(Expression, Func): arg_types = {"this": False, "format": False} class TimeStrToDate(Expression, Func): pass class TimeStrToTime(Expression, Func): arg_types = {"this": True, "zone": False} class TimeStrToUnix(Expression, Func): pass class TimeToStr(Expression, Func): arg_types = {"this": True, "format": True, "culture": False, "zone": False} class TimeToTimeStr(Expression, Func): pass class TimeToUnix(Expression, Func): pass class TsOrDiToDi(Expression, Func): pass class TsOrDsToDate(Expression, Func): arg_types = {"this": True, "format": False, "safe": False} class TsOrDsToDateStr(Expression, Func): pass class TsOrDsToDatetime(Expression, Func): pass class TsOrDsToTime(Expression, Func): arg_types = {"this": True, "format": False, "safe": False} class TsOrDsToTimestamp(Expression, Func): pass class UnixDate(Expression, Func): pass class UnixMicros(Expression, Func): pass class UnixMillis(Expression, Func): pass class UnixSeconds(Expression, Func): pass class UnixToStr(Expression, Func): arg_types = {"this": True, "format": False} class UnixToTime(Expression, Func): arg_types = { "this": True, "scale": False, "zone": False, "hours": False, "minutes": False, "format": False, "target_type": False, } SECONDS: t.ClassVar[Literal | Neg] = Literal.number(0) DECIS: t.ClassVar[Literal | Neg] = Literal.number(1) CENTIS: t.ClassVar[Literal | Neg] = Literal.number(2) MILLIS: t.ClassVar[Literal | Neg] = Literal.number(3) DECIMILLIS: t.ClassVar[Literal | Neg] = Literal.number(4) CENTIMILLIS: t.ClassVar[Literal | Neg] = Literal.number(5) MICROS: t.ClassVar[Literal | Neg] = Literal.number(6) DECIMICROS: t.ClassVar[Literal | Neg] = Literal.number(7) CENTIMICROS: t.ClassVar[Literal | Neg] = Literal.number(8) NANOS: t.ClassVar[Literal | Neg] = Literal.number(9) class UnixToTimeStr(Expression, Func): pass ================================================ FILE: sqlglot/generator.py ================================================ from __future__ import annotations import logging import re import typing as t from collections import defaultdict from functools import reduce, wraps from sqlglot import exp from sqlglot.errors import ErrorLevel, UnsupportedError, concat_messages from sqlglot.expressions import apply_index_offset from sqlglot.helper import csv, name_sequence, seq_get from sqlglot.jsonpath import ALL_JSON_PATH_PARTS, JSON_PATH_PART_TRANSFORMS from sqlglot.time import format_time from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot._typing import E from sqlglot.dialects.dialect import DialectType G = t.TypeVar("G", bound="Generator") GeneratorMethod = t.Callable[[G, E], str] logger = logging.getLogger("sqlglot") ESCAPED_UNICODE_RE = re.compile(r"\\(\d+)") UNSUPPORTED_TEMPLATE = "Argument '{}' is not supported for expression '{}' when targeting {}." def unsupported_args( *args: t.Union[str, t.Tuple[str, str]], ) -> t.Callable[[GeneratorMethod], GeneratorMethod]: """ Decorator that can be used to mark certain args of an `Expr` subclass as unsupported. It expects a sequence of argument names or pairs of the form (argument_name, diagnostic_msg). """ diagnostic_by_arg: t.Dict[str, t.Optional[str]] = {} for arg in args: if isinstance(arg, str): diagnostic_by_arg[arg] = None else: diagnostic_by_arg[arg[0]] = arg[1] def decorator(func: GeneratorMethod) -> GeneratorMethod: @wraps(func) def _func(generator: G, expression: E) -> str: expression_name = expression.__class__.__name__ dialect_name = generator.dialect.__class__.__name__ for arg_name, diagnostic in diagnostic_by_arg.items(): if expression.args.get(arg_name): diagnostic = diagnostic or UNSUPPORTED_TEMPLATE.format( arg_name, expression_name, dialect_name ) generator.unsupported(diagnostic) return func(generator, expression) return _func return decorator class _Generator(type): def __new__(cls, clsname, bases, attrs): klass = super().__new__(cls, clsname, bases, attrs) # Remove transforms that correspond to unsupported JSONPathPart expressions for part in ALL_JSON_PATH_PARTS - klass.SUPPORTED_JSON_PATH_PARTS: klass.TRANSFORMS.pop(part, None) return klass class Generator(metaclass=_Generator): """ Generator converts a given syntax tree to the corresponding SQL string. Args: pretty: Whether to format the produced SQL string. Default: False. identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive. normalize: Whether to normalize identifiers to lowercase. Default: False. pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2. indent: The indentation size in a formatted string. For example, this affects the indentation of subqueries and filters under a `WHERE` clause. Default: 2. normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization. unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN. max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3 leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80 comments: Whether to preserve comments in the output SQL code. Default: True """ TRANSFORMS: t.Dict[t.Type[exp.Expr], t.Callable[..., str]] = { **JSON_PATH_PART_TRANSFORMS, exp.Adjacent: lambda self, e: self.binary(e, "-|-"), exp.AllowedValuesProperty: lambda self, e: ( f"ALLOWED_VALUES {self.expressions(e, flat=True)}" ), exp.AnalyzeColumns: lambda self, e: self.sql(e, "this"), exp.AnalyzeWith: lambda self, e: self.expressions(e, prefix="WITH ", sep=" "), exp.ArrayContainsAll: lambda self, e: self.binary(e, "@>"), exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"), exp.AssumeColumnConstraint: lambda self, e: f"ASSUME ({self.sql(e, 'this')})", exp.AutoRefreshProperty: lambda self, e: f"AUTO REFRESH {self.sql(e, 'this')}", exp.BackupProperty: lambda self, e: f"BACKUP {self.sql(e, 'this')}", exp.CaseSpecificColumnConstraint: lambda _, e: ( f"{'NOT ' if e.args.get('not_') else ''}CASESPECIFIC" ), exp.Ceil: lambda self, e: self.ceil_floor(e), exp.CharacterSetColumnConstraint: lambda self, e: f"CHARACTER SET {self.sql(e, 'this')}", exp.CharacterSetProperty: lambda self, e: ( f"{'DEFAULT ' if e.args.get('default') else ''}CHARACTER SET={self.sql(e, 'this')}" ), exp.ClusteredColumnConstraint: lambda self, e: ( f"CLUSTERED ({self.expressions(e, 'this', indent=False)})" ), exp.CollateColumnConstraint: lambda self, e: f"COLLATE {self.sql(e, 'this')}", exp.CommentColumnConstraint: lambda self, e: f"COMMENT {self.sql(e, 'this')}", exp.ConnectByRoot: lambda self, e: f"CONNECT_BY_ROOT {self.sql(e, 'this')}", exp.ConvertToCharset: lambda self, e: self.func( "CONVERT", e.this, e.args["dest"], e.args.get("source") ), exp.CopyGrantsProperty: lambda *_: "COPY GRANTS", exp.CredentialsProperty: lambda self, e: ( f"CREDENTIALS=({self.expressions(e, 'expressions', sep=' ')})" ), exp.CurrentCatalog: lambda *_: "CURRENT_CATALOG", exp.SessionUser: lambda *_: "SESSION_USER", exp.DateFormatColumnConstraint: lambda self, e: f"FORMAT {self.sql(e, 'this')}", exp.DefaultColumnConstraint: lambda self, e: f"DEFAULT {self.sql(e, 'this')}", exp.ApiProperty: lambda *_: "API", exp.ApplicationProperty: lambda *_: "APPLICATION", exp.CatalogProperty: lambda *_: "CATALOG", exp.ComputeProperty: lambda *_: "COMPUTE", exp.DatabaseProperty: lambda *_: "DATABASE", exp.DynamicProperty: lambda *_: "DYNAMIC", exp.EmptyProperty: lambda *_: "EMPTY", exp.EncodeColumnConstraint: lambda self, e: f"ENCODE {self.sql(e, 'this')}", exp.EndStatement: lambda *_: "END", exp.EnviromentProperty: lambda self, e: f"ENVIRONMENT ({self.expressions(e, flat=True)})", exp.HandlerProperty: lambda self, e: f"HANDLER {self.sql(e, 'this')}", exp.ParameterStyleProperty: lambda self, e: f"PARAMETER STYLE {self.sql(e, 'this')}", exp.EphemeralColumnConstraint: lambda self, e: ( f"EPHEMERAL{(' ' + self.sql(e, 'this')) if e.this else ''}" ), exp.ExcludeColumnConstraint: lambda self, e: f"EXCLUDE {self.sql(e, 'this').lstrip()}", exp.ExecuteAsProperty: lambda self, e: self.naked_property(e), exp.Except: lambda self, e: self.set_operations(e), exp.ExternalProperty: lambda *_: "EXTERNAL", exp.Floor: lambda self, e: self.ceil_floor(e), exp.Get: lambda self, e: self.get_put_sql(e), exp.GlobalProperty: lambda *_: "GLOBAL", exp.HeapProperty: lambda *_: "HEAP", exp.HybridProperty: lambda *_: "HYBRID", exp.IcebergProperty: lambda *_: "ICEBERG", exp.InheritsProperty: lambda self, e: f"INHERITS ({self.expressions(e, flat=True)})", exp.InlineLengthColumnConstraint: lambda self, e: f"INLINE LENGTH {self.sql(e, 'this')}", exp.InputModelProperty: lambda self, e: f"INPUT{self.sql(e, 'this')}", exp.Intersect: lambda self, e: self.set_operations(e), exp.IntervalSpan: lambda self, e: f"{self.sql(e, 'this')} TO {self.sql(e, 'expression')}", exp.Int64: lambda self, e: self.sql(exp.cast(e.this, exp.DType.BIGINT)), exp.JSONBContainsAnyTopKeys: lambda self, e: self.binary(e, "?|"), exp.JSONBContainsAllTopKeys: lambda self, e: self.binary(e, "?&"), exp.JSONBDeleteAtPath: lambda self, e: self.binary(e, "#-"), exp.JSONObject: lambda self, e: self._jsonobject_sql(e), exp.JSONObjectAgg: lambda self, e: self._jsonobject_sql(e), exp.LanguageProperty: lambda self, e: self.naked_property(e), exp.LocationProperty: lambda self, e: self.naked_property(e), exp.LogProperty: lambda _, e: f"{'NO ' if e.args.get('no') else ''}LOG", exp.MaskingProperty: lambda *_: "MASKING", exp.MaterializedProperty: lambda *_: "MATERIALIZED", exp.NetFunc: lambda self, e: f"NET.{self.sql(e, 'this')}", exp.NetworkProperty: lambda *_: "NETWORK", exp.NonClusteredColumnConstraint: lambda self, e: ( f"NONCLUSTERED ({self.expressions(e, 'this', indent=False)})" ), exp.NoPrimaryIndexProperty: lambda *_: "NO PRIMARY INDEX", exp.NotForReplicationColumnConstraint: lambda *_: "NOT FOR REPLICATION", exp.OnCommitProperty: lambda _, e: ( f"ON COMMIT {'DELETE' if e.args.get('delete') else 'PRESERVE'} ROWS" ), exp.OnProperty: lambda self, e: f"ON {self.sql(e, 'this')}", exp.OnUpdateColumnConstraint: lambda self, e: f"ON UPDATE {self.sql(e, 'this')}", exp.Operator: lambda self, e: self.binary(e, ""), # The operator is produced in `binary` exp.OutputModelProperty: lambda self, e: f"OUTPUT{self.sql(e, 'this')}", exp.ExtendsLeft: lambda self, e: self.binary(e, "&<"), exp.ExtendsRight: lambda self, e: self.binary(e, "&>"), exp.PathColumnConstraint: lambda self, e: f"PATH {self.sql(e, 'this')}", exp.PartitionedByBucket: lambda self, e: self.func("BUCKET", e.this, e.expression), exp.PartitionByTruncate: lambda self, e: self.func("TRUNCATE", e.this, e.expression), exp.PivotAny: lambda self, e: f"ANY{self.sql(e, 'this')}", exp.PositionalColumn: lambda self, e: f"#{self.sql(e, 'this')}", exp.ProjectionPolicyColumnConstraint: lambda self, e: ( f"PROJECTION POLICY {self.sql(e, 'this')}" ), exp.ZeroFillColumnConstraint: lambda self, e: "ZEROFILL", exp.Put: lambda self, e: self.get_put_sql(e), exp.RemoteWithConnectionModelProperty: lambda self, e: ( f"REMOTE WITH CONNECTION {self.sql(e, 'this')}" ), exp.ReturnsProperty: lambda self, e: ( "RETURNS NULL ON NULL INPUT" if e.args.get("null") else self.naked_property(e) ), exp.RowAccessProperty: lambda *_: "ROW ACCESS", exp.SafeFunc: lambda self, e: f"SAFE.{self.sql(e, 'this')}", exp.SampleProperty: lambda self, e: f"SAMPLE BY {self.sql(e, 'this')}", exp.SecureProperty: lambda *_: "SECURE", exp.SecurityIntegrationProperty: lambda *_: "SECURITY", exp.SetConfigProperty: lambda self, e: self.sql(e, "this"), exp.SetProperty: lambda _, e: f"{'MULTI' if e.args.get('multi') else ''}SET", exp.SettingsProperty: lambda self, e: f"SETTINGS{self.seg('')}{(self.expressions(e))}", exp.SharingProperty: lambda self, e: f"SHARING={self.sql(e, 'this')}", exp.SqlReadWriteProperty: lambda _, e: e.name, exp.SqlSecurityProperty: lambda self, e: f"SQL SECURITY {self.sql(e, 'this')}", exp.StabilityProperty: lambda _, e: e.name, exp.Stream: lambda self, e: f"STREAM {self.sql(e, 'this')}", exp.StreamingTableProperty: lambda *_: "STREAMING", exp.StrictProperty: lambda *_: "STRICT", exp.SwapTable: lambda self, e: f"SWAP WITH {self.sql(e, 'this')}", exp.TableColumn: lambda self, e: self.sql(e.this), exp.Tags: lambda self, e: f"TAG ({self.expressions(e, flat=True)})", exp.TemporaryProperty: lambda *_: "TEMPORARY", exp.TitleColumnConstraint: lambda self, e: f"TITLE {self.sql(e, 'this')}", exp.ToMap: lambda self, e: f"MAP {self.sql(e, 'this')}", exp.ToTableProperty: lambda self, e: f"TO {self.sql(e.this)}", exp.TransformModelProperty: lambda self, e: self.func("TRANSFORM", *e.expressions), exp.TransientProperty: lambda *_: "TRANSIENT", exp.TriggerExecute: lambda self, e: f"EXECUTE FUNCTION {self.sql(e, 'this')}", exp.Union: lambda self, e: self.set_operations(e), exp.UnloggedProperty: lambda *_: "UNLOGGED", exp.UsingTemplateProperty: lambda self, e: f"USING TEMPLATE {self.sql(e, 'this')}", exp.UsingData: lambda self, e: f"USING DATA {self.sql(e, 'this')}", exp.UppercaseColumnConstraint: lambda *_: "UPPERCASE", exp.UtcDate: lambda self, e: self.sql(exp.CurrentDate(this=exp.Literal.string("UTC"))), exp.UtcTime: lambda self, e: self.sql(exp.CurrentTime(this=exp.Literal.string("UTC"))), exp.UtcTimestamp: lambda self, e: self.sql( exp.CurrentTimestamp(this=exp.Literal.string("UTC")) ), exp.Variadic: lambda self, e: f"VARIADIC {self.sql(e, 'this')}", exp.VarMap: lambda self, e: self.func("MAP", e.args["keys"], e.args["values"]), exp.ViewAttributeProperty: lambda self, e: f"WITH {self.sql(e, 'this')}", exp.VolatileProperty: lambda *_: "VOLATILE", exp.WithJournalTableProperty: lambda self, e: f"WITH JOURNAL TABLE={self.sql(e, 'this')}", exp.WithProcedureOptions: lambda self, e: f"WITH {self.expressions(e, flat=True)}", exp.WithSchemaBindingProperty: lambda self, e: f"WITH SCHEMA {self.sql(e, 'this')}", exp.WithOperator: lambda self, e: f"{self.sql(e, 'this')} WITH {self.sql(e, 'op')}", exp.ForceProperty: lambda *_: "FORCE", } # Whether null ordering is supported in order by # True: Full Support, None: No support, False: No support for certain cases # such as window specifications, aggregate functions etc NULL_ORDERING_SUPPORTED: t.Optional[bool] = True # Window functions that support NULLS FIRST/LAST WINDOW_FUNCS_WITH_NULL_ORDERING: t.ClassVar[t.Tuple[t.Type[exp.Expression], ...]] = () # Whether ignore nulls is inside the agg or outside. # FIRST(x IGNORE NULLS) OVER vs FIRST (x) IGNORE NULLS OVER IGNORE_NULLS_IN_FUNC = False # Whether IGNORE NULLS is placed before ORDER BY in the agg. # FIRST(x IGNORE NULLS ORDER BY y) vs FIRST(x ORDER BY y IGNORE NULLS) IGNORE_NULLS_BEFORE_ORDER = True # Whether locking reads (i.e. SELECT ... FOR UPDATE/SHARE) are supported LOCKING_READS_SUPPORTED = False # Whether the EXCEPT and INTERSECT operations can return duplicates EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = True # Wrap derived values in parens, usually standard but spark doesn't support it WRAP_DERIVED_VALUES = True # Whether create function uses an AS before the RETURN CREATE_FUNCTION_RETURN_AS = True # Whether MERGE ... WHEN MATCHED BY SOURCE is allowed MATCHED_BY_SOURCE = True # Whether the INTERVAL expression works only with values like '1 day' SINGLE_STRING_INTERVAL = False # Whether the plural form of date parts like day (i.e. "days") is supported in INTERVALs INTERVAL_ALLOWS_PLURAL_FORM = True # Whether limit and fetch are supported (possible values: "ALL", "LIMIT", "FETCH") LIMIT_FETCH = "ALL" # Whether limit and fetch allows expresions or just limits LIMIT_ONLY_LITERALS = False # Whether a table is allowed to be renamed with a db RENAME_TABLE_WITH_DB = True # The separator for grouping sets and rollups GROUPINGS_SEP = "," # The string used for creating an index on a table INDEX_ON = "ON" # Separator for IN/OUT parameter mode (Oracle uses " " for "IN OUT", PostgreSQL uses "" for "INOUT") INOUT_SEPARATOR = " " # Whether join hints should be generated JOIN_HINTS = True # Whether directed joins are supported DIRECTED_JOINS = False # Whether table hints should be generated TABLE_HINTS = True # Whether query hints should be generated QUERY_HINTS = True # What kind of separator to use for query hints QUERY_HINT_SEP = ", " # Whether comparing against booleans (e.g. x IS TRUE) is supported IS_BOOL_ALLOWED = True # Whether to include the "SET" keyword in the "INSERT ... ON DUPLICATE KEY UPDATE" statement DUPLICATE_KEY_UPDATE_WITH_SET = True # Whether to generate the limit as TOP instead of LIMIT LIMIT_IS_TOP = False # Whether to generate INSERT INTO ... RETURNING or INSERT INTO RETURNING ... RETURNING_END = True # Whether to generate an unquoted value for EXTRACT's date part argument EXTRACT_ALLOWS_QUOTES = True # Whether TIMETZ / TIMESTAMPTZ will be generated using the "WITH TIME ZONE" syntax TZ_TO_WITH_TIME_ZONE = False # Whether the NVL2 function is supported NVL2_SUPPORTED = True # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax SELECT_KINDS: t.Tuple[str, ...] = ("STRUCT", "VALUE") # Whether VALUES statements can be used as derived tables. # MySQL 5 and Redshift do not allow this, so when False, it will convert # SELECT * VALUES into SELECT UNION VALUES_AS_TABLE = True # Whether the word COLUMN is included when adding a column with ALTER TABLE ALTER_TABLE_INCLUDE_COLUMN_KEYWORD = True # UNNEST WITH ORDINALITY (presto) instead of UNNEST WITH OFFSET (bigquery) UNNEST_WITH_ORDINALITY = True # Whether FILTER (WHERE cond) can be used for conditional aggregation AGGREGATE_FILTER_SUPPORTED = True # Whether JOIN sides (LEFT, RIGHT) are supported in conjunction with SEMI/ANTI join kinds SEMI_ANTI_JOIN_WITH_SIDE = True # Whether to include the type of a computed column in the CREATE DDL COMPUTED_COLUMN_WITH_TYPE = True # Whether CREATE TABLE .. COPY .. is supported. False means we'll generate CLONE instead of COPY SUPPORTS_TABLE_COPY = True # Whether parentheses are required around the table sample's expression TABLESAMPLE_REQUIRES_PARENS = True # Whether a table sample clause's size needs to be followed by the ROWS keyword TABLESAMPLE_SIZE_IS_ROWS = True # The keyword(s) to use when generating a sample clause TABLESAMPLE_KEYWORDS = "TABLESAMPLE" # Whether the TABLESAMPLE clause supports a method name, like BERNOULLI TABLESAMPLE_WITH_METHOD = True # The keyword to use when specifying the seed of a sample clause TABLESAMPLE_SEED_KEYWORD = "SEED" # Whether COLLATE is a function instead of a binary operator COLLATE_IS_FUNC = False # Whether data types support additional specifiers like e.g. CHAR or BYTE (oracle) DATA_TYPE_SPECIFIERS_ALLOWED = False # Whether conditions require booleans WHERE x = 0 vs WHERE x ENSURE_BOOLS = False # Whether the "RECURSIVE" keyword is required when defining recursive CTEs CTE_RECURSIVE_KEYWORD_REQUIRED = True # Whether CONCAT requires >1 arguments SUPPORTS_SINGLE_ARG_CONCAT = True # Whether LAST_DAY function supports a date part argument LAST_DAY_SUPPORTS_DATE_PART = True # Whether named columns are allowed in table aliases SUPPORTS_TABLE_ALIAS_COLUMNS = True # Whether UNPIVOT aliases are Identifiers (False means they're Literals) UNPIVOT_ALIASES_ARE_IDENTIFIERS = True # What delimiter to use for separating JSON key/value pairs JSON_KEY_VALUE_PAIR_SEP = ":" # INSERT OVERWRITE TABLE x override INSERT_OVERWRITE = " OVERWRITE TABLE" # Whether the SELECT .. INTO syntax is used instead of CTAS SUPPORTS_SELECT_INTO = False # Whether UNLOGGED tables can be created SUPPORTS_UNLOGGED_TABLES = False # Whether the CREATE TABLE LIKE statement is supported SUPPORTS_CREATE_TABLE_LIKE = True # Whether the LikeProperty needs to be specified inside of the schema clause LIKE_PROPERTY_INSIDE_SCHEMA = False # Whether DISTINCT can be followed by multiple args in an AggFunc. If not, it will be # transpiled into a series of CASE-WHEN-ELSE, ultimately using a tuple conseisting of the args MULTI_ARG_DISTINCT = True # Whether the JSON extraction operators expect a value of type JSON JSON_TYPE_REQUIRED_FOR_EXTRACTION = False # Whether bracketed keys like ["foo"] are supported in JSON paths JSON_PATH_BRACKETED_KEY_SUPPORTED = True # Whether to escape keys using single quotes in JSON paths JSON_PATH_SINGLE_QUOTE_ESCAPE = False # The JSONPathPart expressions supported by this dialect SUPPORTED_JSON_PATH_PARTS = ALL_JSON_PATH_PARTS.copy() # Whether any(f(x) for x in array) can be implemented by this dialect CAN_IMPLEMENT_ARRAY_ANY = False # Whether the function TO_NUMBER is supported SUPPORTS_TO_NUMBER = True # Whether EXCLUDE in window specification is supported SUPPORTS_WINDOW_EXCLUDE = False # Whether or not set op modifiers apply to the outer set op or select. # SELECT * FROM x UNION SELECT * FROM y LIMIT 1 # True means limit 1 happens after the set op, False means it it happens on y. SET_OP_MODIFIERS = True # Whether parameters from COPY statement are wrapped in parentheses COPY_PARAMS_ARE_WRAPPED = True # Whether values of params are set with "=" token or empty space COPY_PARAMS_EQ_REQUIRED = False # Whether COPY statement has INTO keyword COPY_HAS_INTO_KEYWORD = True # Whether the conditional TRY(expression) function is supported TRY_SUPPORTED = True # Whether the UESCAPE syntax in unicode strings is supported SUPPORTS_UESCAPE = True # Function used to replace escaped unicode codes in unicode strings UNICODE_SUBSTITUTE: t.Optional[t.Callable[[re.Match[str]], str]] = None # The keyword to use when generating a star projection with excluded columns STAR_EXCEPT = "EXCEPT" # The HEX function name HEX_FUNC = "HEX" # The keywords to use when prefixing & separating WITH based properties WITH_PROPERTIES_PREFIX = "WITH" # Whether to quote the generated expression of exp.JsonPath QUOTE_JSON_PATH = True # Whether the text pattern/fill (3rd) parameter of RPAD()/LPAD() is optional (defaults to space) PAD_FILL_PATTERN_IS_REQUIRED = False # Whether a projection can explode into multiple rows, e.g. by unnesting an array. SUPPORTS_EXPLODING_PROJECTIONS = True # Whether ARRAY_CONCAT can be generated with varlen args or if it should be reduced to 2-arg version ARRAY_CONCAT_IS_VAR_LEN = True # Whether CONVERT_TIMEZONE() is supported; if not, it will be generated as exp.AtTimeZone SUPPORTS_CONVERT_TIMEZONE = False # Whether MEDIAN(expr) is supported; if not, it will be generated as PERCENTILE_CONT(expr, 0.5) SUPPORTS_MEDIAN = True # Whether UNIX_SECONDS(timestamp) is supported SUPPORTS_UNIX_SECONDS = False # Whether to wrap in `AlterSet`, e.g., ALTER ... SET () ALTER_SET_WRAPPED = False # Whether to normalize the date parts in EXTRACT( FROM ) into a common representation # For instance, to extract the day of week in ISO semantics, one can use ISODOW, DAYOFWEEKISO etc depending on the dialect. # TODO: The normalization should be done by default once we've tested it across all dialects. NORMALIZE_EXTRACT_DATE_PARTS = False # The name to generate for the JSONPath expression. If `None`, only `this` will be generated PARSE_JSON_NAME: t.Optional[str] = "PARSE_JSON" # The function name of the exp.ArraySize expression ARRAY_SIZE_NAME: str = "ARRAY_LENGTH" # The syntax to use when altering the type of a column ALTER_SET_TYPE = "SET DATA TYPE" # Whether exp.ArraySize should generate the dimension arg too (valid for Postgres & DuckDB) # None -> Doesn't support it at all # False (DuckDB) -> Has backwards-compatible support, but preferably generated without # True (Postgres) -> Explicitly requires it ARRAY_SIZE_DIM_REQUIRED: t.Optional[bool] = None # Whether a multi-argument DECODE(...) function is supported. If not, a CASE expression is generated SUPPORTS_DECODE_CASE = True # Whether SYMMETRIC and ASYMMETRIC flags are supported with BETWEEN expression SUPPORTS_BETWEEN_FLAGS = False # Whether LIKE and ILIKE support quantifiers such as LIKE ANY/ALL/SOME SUPPORTS_LIKE_QUANTIFIERS = True # Prefix which is appended to exp.Table expressions in MATCH AGAINST MATCH_AGAINST_TABLE_PREFIX: t.Optional[str] = None # Whether to include the VARIABLE keyword for SET assignments SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = False # The keyword to use for default value assignment in DECLARE statements DECLARE_DEFAULT_ASSIGNMENT = "=" # Whether FROM is supported in UPDATE statements or if joins must be generated instead, e.g: # Supported (Postgres, Doris etc): UPDATE t1 SET t1.a = t2.b FROM t2 # Unsupported (MySQL, SingleStore): UPDATE t1 JOIN t2 ON TRUE SET t1.a = t2.b UPDATE_STATEMENT_SUPPORTS_FROM = True # Whether SELECT *, ... EXCLUDE requires wrapping in a subquery for transpilation. STAR_EXCLUDE_REQUIRES_DERIVED_TABLE = True TYPE_MAPPING = { exp.DType.DATETIME2: "TIMESTAMP", exp.DType.NCHAR: "CHAR", exp.DType.NVARCHAR: "VARCHAR", exp.DType.MEDIUMTEXT: "TEXT", exp.DType.LONGTEXT: "TEXT", exp.DType.TINYTEXT: "TEXT", exp.DType.BLOB: "VARBINARY", exp.DType.MEDIUMBLOB: "BLOB", exp.DType.LONGBLOB: "BLOB", exp.DType.TINYBLOB: "BLOB", exp.DType.INET: "INET", exp.DType.ROWVERSION: "VARBINARY", exp.DType.SMALLDATETIME: "TIMESTAMP", } UNSUPPORTED_TYPES: set[exp.DType] = set() TIME_PART_SINGULARS = { "MICROSECONDS": "MICROSECOND", "SECONDS": "SECOND", "MINUTES": "MINUTE", "HOURS": "HOUR", "DAYS": "DAY", "WEEKS": "WEEK", "MONTHS": "MONTH", "QUARTERS": "QUARTER", "YEARS": "YEAR", } AFTER_HAVING_MODIFIER_TRANSFORMS = { "cluster": lambda self, e: self.sql(e, "cluster"), "distribute": lambda self, e: self.sql(e, "distribute"), "sort": lambda self, e: self.sql(e, "sort"), "windows": lambda self, e: ( self.seg("WINDOW ") + self.expressions(e, key="windows", flat=True) if e.args.get("windows") else "" ), "qualify": lambda self, e: self.sql(e, "qualify"), } TOKEN_MAPPING: t.Dict[TokenType, str] = {} STRUCT_DELIMITER = ("<", ">") PARAMETER_TOKEN = "@" NAMED_PLACEHOLDER_TOKEN = ":" EXPRESSION_PRECEDES_PROPERTIES_CREATABLES: t.Set[str] = set() PROPERTIES_LOCATION = { exp.AllowedValuesProperty: exp.Properties.Location.POST_SCHEMA, exp.AlgorithmProperty: exp.Properties.Location.POST_CREATE, exp.ApiProperty: exp.Properties.Location.POST_CREATE, exp.ApplicationProperty: exp.Properties.Location.POST_CREATE, exp.AutoIncrementProperty: exp.Properties.Location.POST_SCHEMA, exp.AutoRefreshProperty: exp.Properties.Location.POST_SCHEMA, exp.BackupProperty: exp.Properties.Location.POST_SCHEMA, exp.BlockCompressionProperty: exp.Properties.Location.POST_NAME, exp.CatalogProperty: exp.Properties.Location.POST_CREATE, exp.CharacterSetProperty: exp.Properties.Location.POST_SCHEMA, exp.ChecksumProperty: exp.Properties.Location.POST_NAME, exp.CollateProperty: exp.Properties.Location.POST_SCHEMA, exp.ComputeProperty: exp.Properties.Location.POST_CREATE, exp.CopyGrantsProperty: exp.Properties.Location.POST_SCHEMA, exp.Cluster: exp.Properties.Location.POST_SCHEMA, exp.ClusteredByProperty: exp.Properties.Location.POST_SCHEMA, exp.DistributedByProperty: exp.Properties.Location.POST_SCHEMA, exp.DuplicateKeyProperty: exp.Properties.Location.POST_SCHEMA, exp.DataBlocksizeProperty: exp.Properties.Location.POST_NAME, exp.DatabaseProperty: exp.Properties.Location.POST_CREATE, exp.DataDeletionProperty: exp.Properties.Location.POST_SCHEMA, exp.DefinerProperty: exp.Properties.Location.POST_CREATE, exp.DictRange: exp.Properties.Location.POST_SCHEMA, exp.DictProperty: exp.Properties.Location.POST_SCHEMA, exp.DynamicProperty: exp.Properties.Location.POST_CREATE, exp.DistKeyProperty: exp.Properties.Location.POST_SCHEMA, exp.DistStyleProperty: exp.Properties.Location.POST_SCHEMA, exp.EmptyProperty: exp.Properties.Location.POST_SCHEMA, exp.EncodeProperty: exp.Properties.Location.POST_EXPRESSION, exp.EngineProperty: exp.Properties.Location.POST_SCHEMA, exp.EnviromentProperty: exp.Properties.Location.POST_SCHEMA, exp.HandlerProperty: exp.Properties.Location.POST_SCHEMA, exp.ParameterStyleProperty: exp.Properties.Location.POST_SCHEMA, exp.ExecuteAsProperty: exp.Properties.Location.POST_SCHEMA, exp.ExternalProperty: exp.Properties.Location.POST_CREATE, exp.FallbackProperty: exp.Properties.Location.POST_NAME, exp.FileFormatProperty: exp.Properties.Location.POST_WITH, exp.FreespaceProperty: exp.Properties.Location.POST_NAME, exp.GlobalProperty: exp.Properties.Location.POST_CREATE, exp.HeapProperty: exp.Properties.Location.POST_WITH, exp.HybridProperty: exp.Properties.Location.POST_CREATE, exp.InheritsProperty: exp.Properties.Location.POST_SCHEMA, exp.IcebergProperty: exp.Properties.Location.POST_CREATE, exp.IncludeProperty: exp.Properties.Location.POST_SCHEMA, exp.InputModelProperty: exp.Properties.Location.POST_SCHEMA, exp.IsolatedLoadingProperty: exp.Properties.Location.POST_NAME, exp.JournalProperty: exp.Properties.Location.POST_NAME, exp.LanguageProperty: exp.Properties.Location.POST_SCHEMA, exp.LikeProperty: exp.Properties.Location.POST_SCHEMA, exp.LocationProperty: exp.Properties.Location.POST_SCHEMA, exp.LockProperty: exp.Properties.Location.POST_SCHEMA, exp.LockingProperty: exp.Properties.Location.POST_ALIAS, exp.LogProperty: exp.Properties.Location.POST_NAME, exp.MaskingProperty: exp.Properties.Location.POST_CREATE, exp.MaterializedProperty: exp.Properties.Location.POST_CREATE, exp.MergeBlockRatioProperty: exp.Properties.Location.POST_NAME, exp.NetworkProperty: exp.Properties.Location.POST_CREATE, exp.NoPrimaryIndexProperty: exp.Properties.Location.POST_EXPRESSION, exp.OnProperty: exp.Properties.Location.POST_SCHEMA, exp.OnCommitProperty: exp.Properties.Location.POST_EXPRESSION, exp.Order: exp.Properties.Location.POST_SCHEMA, exp.OutputModelProperty: exp.Properties.Location.POST_SCHEMA, exp.PartitionedByProperty: exp.Properties.Location.POST_WITH, exp.PartitionedOfProperty: exp.Properties.Location.POST_SCHEMA, exp.PrimaryKey: exp.Properties.Location.POST_SCHEMA, exp.Property: exp.Properties.Location.POST_WITH, exp.RefreshTriggerProperty: exp.Properties.Location.POST_SCHEMA, exp.RemoteWithConnectionModelProperty: exp.Properties.Location.POST_SCHEMA, exp.ReturnsProperty: exp.Properties.Location.POST_SCHEMA, exp.RollupProperty: exp.Properties.Location.UNSUPPORTED, exp.RowAccessProperty: exp.Properties.Location.POST_CREATE, exp.RowFormatProperty: exp.Properties.Location.POST_SCHEMA, exp.RowFormatDelimitedProperty: exp.Properties.Location.POST_SCHEMA, exp.RowFormatSerdeProperty: exp.Properties.Location.POST_SCHEMA, exp.SampleProperty: exp.Properties.Location.POST_SCHEMA, exp.SchemaCommentProperty: exp.Properties.Location.POST_SCHEMA, exp.SecureProperty: exp.Properties.Location.POST_CREATE, exp.SecurityIntegrationProperty: exp.Properties.Location.POST_CREATE, exp.SerdeProperties: exp.Properties.Location.POST_SCHEMA, exp.Set: exp.Properties.Location.POST_SCHEMA, exp.SettingsProperty: exp.Properties.Location.POST_SCHEMA, exp.SetProperty: exp.Properties.Location.POST_CREATE, exp.SetConfigProperty: exp.Properties.Location.POST_SCHEMA, exp.SharingProperty: exp.Properties.Location.POST_EXPRESSION, exp.SequenceProperties: exp.Properties.Location.POST_EXPRESSION, exp.TriggerProperties: exp.Properties.Location.POST_EXPRESSION, exp.SortKeyProperty: exp.Properties.Location.POST_SCHEMA, exp.SqlReadWriteProperty: exp.Properties.Location.POST_SCHEMA, exp.SqlSecurityProperty: exp.Properties.Location.POST_SCHEMA, exp.StabilityProperty: exp.Properties.Location.POST_SCHEMA, exp.StorageHandlerProperty: exp.Properties.Location.POST_SCHEMA, exp.StreamingTableProperty: exp.Properties.Location.POST_CREATE, exp.StrictProperty: exp.Properties.Location.POST_SCHEMA, exp.Tags: exp.Properties.Location.POST_WITH, exp.TemporaryProperty: exp.Properties.Location.POST_CREATE, exp.ToTableProperty: exp.Properties.Location.POST_SCHEMA, exp.TransientProperty: exp.Properties.Location.POST_CREATE, exp.TransformModelProperty: exp.Properties.Location.POST_SCHEMA, exp.MergeTreeTTL: exp.Properties.Location.POST_SCHEMA, exp.UnloggedProperty: exp.Properties.Location.POST_CREATE, exp.UsingTemplateProperty: exp.Properties.Location.POST_SCHEMA, exp.ViewAttributeProperty: exp.Properties.Location.POST_SCHEMA, exp.VolatileProperty: exp.Properties.Location.POST_CREATE, exp.WithDataProperty: exp.Properties.Location.POST_EXPRESSION, exp.WithJournalTableProperty: exp.Properties.Location.POST_NAME, exp.WithProcedureOptions: exp.Properties.Location.POST_SCHEMA, exp.WithSchemaBindingProperty: exp.Properties.Location.POST_SCHEMA, exp.WithSystemVersioningProperty: exp.Properties.Location.POST_SCHEMA, exp.ForceProperty: exp.Properties.Location.POST_CREATE, } # Keywords that can't be used as unquoted identifier names RESERVED_KEYWORDS: t.Set[str] = set() # Exprs whose comments are separated from them for better formatting WITH_SEPARATED_COMMENTS: t.Tuple[t.Type[exp.Expr], ...] = ( exp.Command, exp.Create, exp.Describe, exp.Delete, exp.Drop, exp.From, exp.Insert, exp.Join, exp.MultitableInserts, exp.Order, exp.Group, exp.Having, exp.Select, exp.SetOperation, exp.Update, exp.Where, exp.With, ) # Exprs that should not have their comments generated in maybe_comment EXCLUDE_COMMENTS: t.Tuple[t.Type[exp.Expr], ...] = ( exp.Binary, exp.SetOperation, ) # Exprs that can remain unwrapped when appearing in the context of an INTERVAL UNWRAPPED_INTERVAL_VALUES: t.Tuple[t.Type[exp.Expr], ...] = ( exp.Column, exp.Literal, exp.Neg, exp.Paren, ) PARAMETERIZABLE_TEXT_TYPES = { exp.DType.NVARCHAR, exp.DType.VARCHAR, exp.DType.CHAR, exp.DType.NCHAR, } # Exprs that need to have all CTEs under them bubbled up to them EXPRESSIONS_WITHOUT_NESTED_CTES: t.Set[t.Type[exp.Expr]] = set() RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS: t.Tuple[t.Type[exp.Expr], ...] = () SAFE_JSON_PATH_KEY_RE = exp.SAFE_IDENTIFIER_RE SENTINEL_LINE_BREAK = "__SQLGLOT__LB__" __slots__ = ( "pretty", "identify", "normalize", "pad", "_indent", "normalize_functions", "unsupported_level", "max_unsupported", "leading_comma", "max_text_width", "comments", "dialect", "unsupported_messages", "_escaped_quote_end", "_escaped_byte_quote_end", "_escaped_identifier_end", "_next_name", "_identifier_start", "_identifier_end", "_quote_json_path_key_using_brackets", ) def __init__( self, pretty: t.Optional[bool] = None, identify: str | bool = False, normalize: bool = False, pad: int = 2, indent: int = 2, normalize_functions: t.Optional[str | bool] = None, unsupported_level: ErrorLevel = ErrorLevel.WARN, max_unsupported: int = 3, leading_comma: bool = False, max_text_width: int = 80, comments: bool = True, dialect: DialectType = None, ): import sqlglot from sqlglot.dialects import Dialect self.pretty = pretty if pretty is not None else sqlglot.pretty self.identify = identify self.normalize = normalize self.pad = pad self._indent = indent self.unsupported_level = unsupported_level self.max_unsupported = max_unsupported self.leading_comma = leading_comma self.max_text_width = max_text_width self.comments = comments self.dialect = Dialect.get_or_raise(dialect) # This is both a Dialect property and a Generator argument, so we prioritize the latter self.normalize_functions = ( self.dialect.NORMALIZE_FUNCTIONS if normalize_functions is None else normalize_functions ) self.unsupported_messages: t.List[str] = [] self._escaped_quote_end: str = ( self.dialect.tokenizer_class.STRING_ESCAPES[0] + self.dialect.QUOTE_END ) self._escaped_byte_quote_end: str = ( self.dialect.tokenizer_class.STRING_ESCAPES[0] + self.dialect.BYTE_END if self.dialect.BYTE_END else "" ) self._escaped_identifier_end = self.dialect.IDENTIFIER_END * 2 self._next_name = name_sequence("_t") self._identifier_start = self.dialect.IDENTIFIER_START self._identifier_end = self.dialect.IDENTIFIER_END self._quote_json_path_key_using_brackets = True def generate(self, expression: exp.Expr, copy: bool = True) -> str: """ Generates the SQL string corresponding to the given syntax tree. Args: expression: The syntax tree. copy: Whether to copy the expression. The generator performs mutations so it is safer to copy. Returns: The SQL string corresponding to `expression`. """ if copy: expression = expression.copy() expression = self.preprocess(expression) self.unsupported_messages = [] sql = self.sql(expression).strip() if self.pretty: sql = sql.replace(self.SENTINEL_LINE_BREAK, "\n") if self.unsupported_level == ErrorLevel.IGNORE: return sql if self.unsupported_level == ErrorLevel.WARN: for msg in self.unsupported_messages: logger.warning(msg) elif self.unsupported_level == ErrorLevel.RAISE and self.unsupported_messages: raise UnsupportedError(concat_messages(self.unsupported_messages, self.max_unsupported)) return sql def preprocess(self, expression: exp.Expr) -> exp.Expr: """Apply generic preprocessing transformations to a given expression.""" expression = self._move_ctes_to_top_level(expression) if self.ENSURE_BOOLS: from sqlglot.transforms import ensure_bools expression = ensure_bools(expression) return expression def _move_ctes_to_top_level(self, expression: E) -> E: if ( not expression.parent and type(expression) in self.EXPRESSIONS_WITHOUT_NESTED_CTES and any(node.parent is not expression for node in expression.find_all(exp.With)) ): from sqlglot.transforms import move_ctes_to_top_level expression = move_ctes_to_top_level(expression) return expression def unsupported(self, message: str) -> None: if self.unsupported_level == ErrorLevel.IMMEDIATE: raise UnsupportedError(message) self.unsupported_messages.append(message) def sep(self, sep: str = " ") -> str: return f"{sep.strip()}\n" if self.pretty else sep def seg(self, sql: str, sep: str = " ") -> str: return f"{self.sep(sep)}{sql}" def sanitize_comment(self, comment: str) -> str: comment = " " + comment if comment[0].strip() else comment comment = comment + " " if comment[-1].strip() else comment # Escape block comment markers to prevent premature closure or unintended nesting. # This is necessary because single-line comments (--) are converted to block comments # (/* */) on output, and any */ in the original text would close the comment early. comment = comment.replace("*/", "* /").replace("/*", "/ *") return comment def maybe_comment( self, sql: str, expression: t.Optional[exp.Expr] = None, comments: t.Optional[t.List[str]] = None, separated: bool = False, ) -> str: comments = ( ((expression and expression.comments) if comments is None else comments) # type: ignore if self.comments else None ) if not comments or isinstance(expression, self.EXCLUDE_COMMENTS): return sql comments_sql = " ".join( f"/*{self.sanitize_comment(comment)}*/" for comment in comments if comment ) if not comments_sql: return sql comments_sql = self._replace_line_breaks(comments_sql) if separated or isinstance(expression, self.WITH_SEPARATED_COMMENTS): return ( f"{self.sep()}{comments_sql}{sql}" if not sql or sql[0].isspace() else f"{comments_sql}{self.sep()}{sql}" ) return f"{sql} {comments_sql}" def wrap(self, expression: exp.Expr | str) -> str: this_sql = ( self.sql(expression) if isinstance(expression, exp.UNWRAPPED_QUERIES) else self.sql(expression, "this") ) if not this_sql: return "()" this_sql = self.indent(this_sql, level=1, pad=0) return f"({self.sep('')}{this_sql}{self.seg(')', sep='')}" def no_identify(self, func: t.Callable[..., str], *args, **kwargs) -> str: original = self.identify self.identify = False result = func(*args, **kwargs) self.identify = original return result def normalize_func(self, name: str) -> str: if self.normalize_functions == "upper" or self.normalize_functions is True: return name.upper() if self.normalize_functions == "lower": return name.lower() return name def indent( self, sql: str, level: int = 0, pad: t.Optional[int] = None, skip_first: bool = False, skip_last: bool = False, ) -> str: if not self.pretty or not sql: return sql pad = self.pad if pad is None else pad lines = sql.split("\n") return "\n".join( ( line if (skip_first and i == 0) or (skip_last and i == len(lines) - 1) else f"{' ' * (level * self._indent + pad)}{line}" ) for i, line in enumerate(lines) ) def sql( self, expression: t.Optional[str | exp.Expr], key: t.Optional[str] = None, comment: bool = True, ) -> str: if not expression: return "" if isinstance(expression, str): return expression if key: value = expression.args.get(key) if value: return self.sql(value) return "" transform = self.TRANSFORMS.get(expression.__class__) if transform: sql = transform(self, expression) else: exp_handler_name = expression.key + "_sql" if handler := getattr(self, exp_handler_name, None): sql = handler(expression) elif isinstance(expression, exp.Func): sql = self.function_fallback_sql(expression) elif isinstance(expression, exp.Property): sql = self.property_sql(expression) else: raise ValueError(f"Unsupported expression type {expression.__class__.__name__}") return self.maybe_comment(sql, expression) if self.comments and comment else sql def uncache_sql(self, expression: exp.Uncache) -> str: table = self.sql(expression, "this") exists_sql = " IF EXISTS" if expression.args.get("exists") else "" return f"UNCACHE TABLE{exists_sql} {table}" def cache_sql(self, expression: exp.Cache) -> str: lazy = " LAZY" if expression.args.get("lazy") else "" table = self.sql(expression, "this") options = expression.args.get("options") options = f" OPTIONS({self.sql(options[0])} = {self.sql(options[1])})" if options else "" sql = self.sql(expression, "expression") sql = f" AS{self.sep()}{sql}" if sql else "" sql = f"CACHE{lazy} TABLE {table}{options}{sql}" return self.prepend_ctes(expression, sql) def characterset_sql(self, expression: exp.CharacterSet) -> str: default = "DEFAULT " if expression.args.get("default") else "" return f"{default}CHARACTER SET={self.sql(expression, 'this')}" def column_parts(self, expression: exp.Column) -> str: return ".".join( self.sql(part) for part in ( expression.args.get("catalog"), expression.args.get("db"), expression.args.get("table"), expression.args.get("this"), ) if part ) def column_sql(self, expression: exp.Column) -> str: join_mark = " (+)" if expression.args.get("join_mark") else "" if join_mark and not self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: join_mark = "" self.unsupported("Outer join syntax using the (+) operator is not supported.") return f"{self.column_parts(expression)}{join_mark}" def pseudocolumn_sql(self, expression: exp.Pseudocolumn) -> str: return self.column_sql(expression) def columnposition_sql(self, expression: exp.ColumnPosition) -> str: this = self.sql(expression, "this") this = f" {this}" if this else "" position = self.sql(expression, "position") return f"{position}{this}" def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: column = self.sql(expression, "this") kind = self.sql(expression, "kind") constraints = self.expressions(expression, key="constraints", sep=" ", flat=True) exists = "IF NOT EXISTS " if expression.args.get("exists") else "" kind = f"{sep}{kind}" if kind else "" constraints = f" {constraints}" if constraints else "" position = self.sql(expression, "position") position = f" {position}" if position else "" if expression.find(exp.ComputedColumnConstraint) and not self.COMPUTED_COLUMN_WITH_TYPE: kind = "" return f"{exists}{column}{kind}{constraints}{position}" def columnconstraint_sql(self, expression: exp.ColumnConstraint) -> str: this = self.sql(expression, "this") kind_sql = self.sql(expression, "kind").strip() return f"CONSTRAINT {this} {kind_sql}" if this else kind_sql def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: this = self.sql(expression, "this") if expression.args.get("not_null"): persisted = " PERSISTED NOT NULL" elif expression.args.get("persisted"): persisted = " PERSISTED" else: persisted = "" return f"AS {this}{persisted}" def autoincrementcolumnconstraint_sql(self, _: exp.AutoIncrementColumnConstraint) -> str: return self.token_sql(TokenType.AUTO_INCREMENT) def compresscolumnconstraint_sql(self, expression: exp.CompressColumnConstraint) -> str: if isinstance(expression.this, list): this = self.wrap(self.expressions(expression, key="this", flat=True)) else: this = self.sql(expression, "this") return f"COMPRESS {this}" def generatedasidentitycolumnconstraint_sql( self, expression: exp.GeneratedAsIdentityColumnConstraint ) -> str: this = "" if expression.this is not None: on_null = " ON NULL" if expression.args.get("on_null") else "" this = " ALWAYS" if expression.this else f" BY DEFAULT{on_null}" start = expression.args.get("start") start = f"START WITH {start}" if start else "" increment = expression.args.get("increment") increment = f" INCREMENT BY {increment}" if increment else "" minvalue = expression.args.get("minvalue") minvalue = f" MINVALUE {minvalue}" if minvalue else "" maxvalue = expression.args.get("maxvalue") maxvalue = f" MAXVALUE {maxvalue}" if maxvalue else "" cycle = expression.args.get("cycle") cycle_sql = "" if cycle is not None: cycle_sql = f"{' NO' if not cycle else ''} CYCLE" cycle_sql = cycle_sql.strip() if not start and not increment else cycle_sql sequence_opts = "" if start or increment or cycle_sql: sequence_opts = f"{start}{increment}{minvalue}{maxvalue}{cycle_sql}" sequence_opts = f" ({sequence_opts.strip()})" expr = self.sql(expression, "expression") expr = f"({expr})" if expr else "IDENTITY" return f"GENERATED{this} AS {expr}{sequence_opts}" def generatedasrowcolumnconstraint_sql( self, expression: exp.GeneratedAsRowColumnConstraint ) -> str: start = "START" if expression.args.get("start") else "END" hidden = " HIDDEN" if expression.args.get("hidden") else "" return f"GENERATED ALWAYS AS ROW {start}{hidden}" def periodforsystemtimeconstraint_sql( self, expression: exp.PeriodForSystemTimeConstraint ) -> str: return f"PERIOD FOR SYSTEM_TIME ({self.sql(expression, 'this')}, {self.sql(expression, 'expression')})" def notnullcolumnconstraint_sql(self, expression: exp.NotNullColumnConstraint) -> str: return f"{'' if expression.args.get('allow_null') else 'NOT '}NULL" def primarykeycolumnconstraint_sql(self, expression: exp.PrimaryKeyColumnConstraint) -> str: desc = expression.args.get("desc") if desc is not None: return f"PRIMARY KEY{' DESC' if desc else ' ASC'}" options = self.expressions(expression, key="options", flat=True, sep=" ") options = f" {options}" if options else "" return f"PRIMARY KEY{options}" def uniquecolumnconstraint_sql(self, expression: exp.UniqueColumnConstraint) -> str: this = self.sql(expression, "this") this = f" {this}" if this else "" index_type = expression.args.get("index_type") index_type = f" USING {index_type}" if index_type else "" on_conflict = self.sql(expression, "on_conflict") on_conflict = f" {on_conflict}" if on_conflict else "" nulls_sql = " NULLS NOT DISTINCT" if expression.args.get("nulls") else "" options = self.expressions(expression, key="options", flat=True, sep=" ") options = f" {options}" if options else "" return f"UNIQUE{nulls_sql}{this}{index_type}{on_conflict}{options}" def inoutcolumnconstraint_sql(self, expression: exp.InOutColumnConstraint) -> str: input_ = expression.args.get("input_") output = expression.args.get("output") variadic = expression.args.get("variadic") # VARIADIC is mutually exclusive with IN/OUT/INOUT if variadic: return "VARIADIC" if input_ and output: return f"IN{self.INOUT_SEPARATOR}OUT" if input_: return "IN" if output: return "OUT" return "" def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: return self.sql(expression, "this") def create_sql(self, expression: exp.Create) -> str: kind = self.sql(expression, "kind") kind = self.dialect.INVERSE_CREATABLE_KIND_MAPPING.get(kind) or kind properties = expression.args.get("properties") if ( kind == "TRIGGER" and properties and properties.expressions and isinstance(properties.expressions[0], exp.TriggerProperties) and properties.expressions[0].args.get("constraint") ): kind = f"CONSTRAINT {kind}" properties_locs = self.locate_properties(properties) if properties else defaultdict() this = self.createable_sql(expression, properties_locs) properties_sql = "" if properties_locs.get(exp.Properties.Location.POST_SCHEMA) or properties_locs.get( exp.Properties.Location.POST_WITH ): props_ast = exp.Properties( expressions=[ *properties_locs[exp.Properties.Location.POST_SCHEMA], *properties_locs[exp.Properties.Location.POST_WITH], ] ) props_ast.parent = expression properties_sql = self.sql(props_ast) if properties_locs.get(exp.Properties.Location.POST_SCHEMA): properties_sql = self.sep() + properties_sql elif not self.pretty: # Standalone POST_WITH properties need a leading whitespace in non-pretty mode properties_sql = f" {properties_sql}" begin = " BEGIN" if expression.args.get("begin") else "" expression_sql = self.sql(expression, "expression") if expression_sql: expression_sql = f"{begin}{self.sep()}{expression_sql}" if self.CREATE_FUNCTION_RETURN_AS or not isinstance(expression.expression, exp.Return): postalias_props_sql = "" if properties_locs.get(exp.Properties.Location.POST_ALIAS): postalias_props_sql = self.properties( exp.Properties( expressions=properties_locs[exp.Properties.Location.POST_ALIAS] ), wrapped=False, ) postalias_props_sql = f" {postalias_props_sql}" if postalias_props_sql else "" expression_sql = f" AS{postalias_props_sql}{expression_sql}" postindex_props_sql = "" if properties_locs.get(exp.Properties.Location.POST_INDEX): postindex_props_sql = self.properties( exp.Properties(expressions=properties_locs[exp.Properties.Location.POST_INDEX]), wrapped=False, prefix=" ", ) indexes = self.expressions(expression, key="indexes", indent=False, sep=" ") indexes = f" {indexes}" if indexes else "" index_sql = indexes + postindex_props_sql replace = " OR REPLACE" if expression.args.get("replace") else "" refresh = " OR REFRESH" if expression.args.get("refresh") else "" unique = " UNIQUE" if expression.args.get("unique") else "" clustered = expression.args.get("clustered") if clustered is None: clustered_sql = "" elif clustered: clustered_sql = " CLUSTERED COLUMNSTORE" else: clustered_sql = " NONCLUSTERED COLUMNSTORE" postcreate_props_sql = "" if properties_locs.get(exp.Properties.Location.POST_CREATE): postcreate_props_sql = self.properties( exp.Properties(expressions=properties_locs[exp.Properties.Location.POST_CREATE]), sep=" ", prefix=" ", wrapped=False, ) modifiers = "".join((clustered_sql, replace, refresh, unique, postcreate_props_sql)) postexpression_props_sql = "" if properties_locs.get(exp.Properties.Location.POST_EXPRESSION): postexpression_props_sql = self.properties( exp.Properties( expressions=properties_locs[exp.Properties.Location.POST_EXPRESSION] ), sep=" ", prefix=" ", wrapped=False, ) concurrently = " CONCURRENTLY" if expression.args.get("concurrently") else "" exists_sql = " IF NOT EXISTS" if expression.args.get("exists") else "" no_schema_binding = ( " WITH NO SCHEMA BINDING" if expression.args.get("no_schema_binding") else "" ) clone = self.sql(expression, "clone") clone = f" {clone}" if clone else "" if kind in self.EXPRESSION_PRECEDES_PROPERTIES_CREATABLES: properties_expression = f"{expression_sql}{properties_sql}" else: properties_expression = f"{properties_sql}{expression_sql}" expression_sql = f"CREATE{modifiers} {kind}{concurrently}{exists_sql} {this}{properties_expression}{postexpression_props_sql}{index_sql}{no_schema_binding}{clone}" return self.prepend_ctes(expression, expression_sql) def sequenceproperties_sql(self, expression: exp.SequenceProperties) -> str: start = self.sql(expression, "start") start = f"START WITH {start}" if start else "" increment = self.sql(expression, "increment") increment = f" INCREMENT BY {increment}" if increment else "" minvalue = self.sql(expression, "minvalue") minvalue = f" MINVALUE {minvalue}" if minvalue else "" maxvalue = self.sql(expression, "maxvalue") maxvalue = f" MAXVALUE {maxvalue}" if maxvalue else "" owned = self.sql(expression, "owned") owned = f" OWNED BY {owned}" if owned else "" cache = expression.args.get("cache") if cache is None: cache_str = "" elif cache is True: cache_str = " CACHE" else: cache_str = f" CACHE {cache}" options = self.expressions(expression, key="options", flat=True, sep=" ") options = f" {options}" if options else "" return f"{start}{increment}{minvalue}{maxvalue}{cache_str}{options}{owned}".lstrip() def triggerproperties_sql(self, expression: exp.TriggerProperties) -> str: timing = expression.args.get("timing", "") events = " OR ".join(self.sql(event) for event in expression.args.get("events") or []) timing_events = f"{timing} {events}".strip() if timing or events else "" parts = [timing_events, "ON", self.sql(expression, "table")] if referenced_table := expression.args.get("referenced_table"): parts.extend(["FROM", self.sql(referenced_table)]) if deferrable := expression.args.get("deferrable"): parts.append(deferrable) if initially := expression.args.get("initially"): parts.append(f"INITIALLY {initially}") if referencing := expression.args.get("referencing"): parts.append(self.sql(referencing)) if for_each := expression.args.get("for_each"): parts.append(f"FOR EACH {for_each}") if when := expression.args.get("when"): parts.append(f"WHEN ({self.sql(when)})") parts.append(self.sql(expression, "execute")) return self.sep().join(parts) def triggerreferencing_sql(self, expression: exp.TriggerReferencing) -> str: parts = [] if old_alias := expression.args.get("old"): parts.append(f"OLD TABLE AS {self.sql(old_alias)}") if new_alias := expression.args.get("new"): parts.append(f"NEW TABLE AS {self.sql(new_alias)}") return f"REFERENCING {' '.join(parts)}" def triggerevent_sql(self, expression: exp.TriggerEvent) -> str: columns = expression.args.get("columns") if columns: return f"{expression.this} OF {self.expressions(expression, key='columns', flat=True)}" return self.sql(expression, "this") def clone_sql(self, expression: exp.Clone) -> str: this = self.sql(expression, "this") shallow = "SHALLOW " if expression.args.get("shallow") else "" keyword = "COPY" if expression.args.get("copy") and self.SUPPORTS_TABLE_COPY else "CLONE" return f"{shallow}{keyword} {this}" def describe_sql(self, expression: exp.Describe) -> str: style = expression.args.get("style") style = f" {style}" if style else "" partition = self.sql(expression, "partition") partition = f" {partition}" if partition else "" format = self.sql(expression, "format") format = f" {format}" if format else "" as_json = " AS JSON" if expression.args.get("as_json") else "" return f"DESCRIBE{style}{format} {self.sql(expression, 'this')}{partition}{as_json}" def heredoc_sql(self, expression: exp.Heredoc) -> str: tag = self.sql(expression, "tag") return f"${tag}${self.sql(expression, 'this')}${tag}$" def prepend_ctes(self, expression: exp.Expr, sql: str) -> str: with_ = self.sql(expression, "with_") if with_: sql = f"{with_}{self.sep()}{sql}" return sql def with_sql(self, expression: exp.With) -> str: sql = self.expressions(expression, flat=True) recursive = ( "RECURSIVE " if self.CTE_RECURSIVE_KEYWORD_REQUIRED and expression.args.get("recursive") else "" ) search = self.sql(expression, "search") search = f" {search}" if search else "" return f"WITH {recursive}{sql}{search}" def cte_sql(self, expression: exp.CTE) -> str: alias = expression.args.get("alias") if alias: alias.add_comments(expression.pop_comments()) alias_sql = self.sql(expression, "alias") materialized = expression.args.get("materialized") if materialized is False: materialized = "NOT MATERIALIZED " elif materialized: materialized = "MATERIALIZED " key_expressions = self.expressions(expression, key="key_expressions", flat=True) key_expressions = f" USING KEY ({key_expressions})" if key_expressions else "" return f"{alias_sql}{key_expressions} AS {materialized or ''}{self.wrap(expression)}" def tablealias_sql(self, expression: exp.TableAlias) -> str: alias = self.sql(expression, "this") columns = self.expressions(expression, key="columns", flat=True) columns = f"({columns})" if columns else "" if columns and not self.SUPPORTS_TABLE_ALIAS_COLUMNS: columns = "" self.unsupported("Named columns are not supported in table alias.") if not alias and not self.dialect.UNNEST_COLUMN_ONLY: alias = self._next_name() return f"{alias}{columns}" def bitstring_sql(self, expression: exp.BitString) -> str: this = self.sql(expression, "this") if self.dialect.BIT_START: return f"{self.dialect.BIT_START}{this}{self.dialect.BIT_END}" return f"{int(this, 2)}" def hexstring_sql( self, expression: exp.HexString, binary_function_repr: t.Optional[str] = None ) -> str: this = self.sql(expression, "this") is_integer_type = expression.args.get("is_integer") if (is_integer_type and not self.dialect.HEX_STRING_IS_INTEGER_TYPE) or ( not self.dialect.HEX_START and not binary_function_repr ): # Integer representation will be returned if: # - The read dialect treats the hex value as integer literal but not the write # - The transpilation is not supported (write dialect hasn't set HEX_START or the param flag) return f"{int(this, 16)}" if not is_integer_type: # Read dialect treats the hex value as BINARY/BLOB if binary_function_repr: # The write dialect supports the transpilation to its equivalent BINARY/BLOB return self.func(binary_function_repr, exp.Literal.string(this)) if self.dialect.HEX_STRING_IS_INTEGER_TYPE: # The write dialect does not support the transpilation, it'll treat the hex value as INTEGER self.unsupported("Unsupported transpilation from BINARY/BLOB hex string") return f"{self.dialect.HEX_START}{this}{self.dialect.HEX_END}" def bytestring_sql(self, expression: exp.ByteString) -> str: this = self.sql(expression, "this") if self.dialect.BYTE_START: escaped_byte_string = self.escape_str( this, escape_backslash=False, delimiter=self.dialect.BYTE_END, escaped_delimiter=self._escaped_byte_quote_end, is_byte_string=True, ) is_bytes = expression.args.get("is_bytes", False) delimited_byte_string = ( f"{self.dialect.BYTE_START}{escaped_byte_string}{self.dialect.BYTE_END}" ) if is_bytes and not self.dialect.BYTE_STRING_IS_BYTES_TYPE: return self.sql( exp.cast(delimited_byte_string, exp.DType.BINARY, dialect=self.dialect) ) if not is_bytes and self.dialect.BYTE_STRING_IS_BYTES_TYPE: return self.sql( exp.cast(delimited_byte_string, exp.DType.VARCHAR, dialect=self.dialect) ) return delimited_byte_string return this def unicodestring_sql(self, expression: exp.UnicodeString) -> str: this = self.sql(expression, "this") escape = expression.args.get("escape") if self.dialect.UNICODE_START: escape_substitute = r"\\\1" left_quote, right_quote = self.dialect.UNICODE_START, self.dialect.UNICODE_END else: escape_substitute = r"\\u\1" left_quote, right_quote = self.dialect.QUOTE_START, self.dialect.QUOTE_END if escape: escape_pattern = re.compile(rf"{escape.name}(\d+)") escape_sql = f" UESCAPE {self.sql(escape)}" if self.SUPPORTS_UESCAPE else "" else: escape_pattern = ESCAPED_UNICODE_RE escape_sql = "" if not self.dialect.UNICODE_START or (escape and not self.SUPPORTS_UESCAPE): this = escape_pattern.sub(self.UNICODE_SUBSTITUTE or escape_substitute, this) return f"{left_quote}{this}{right_quote}{escape_sql}" def rawstring_sql(self, expression: exp.RawString) -> str: string = expression.this if "\\" in self.dialect.tokenizer_class.STRING_ESCAPES: string = string.replace("\\", "\\\\") string = self.escape_str(string, escape_backslash=False) return f"{self.dialect.QUOTE_START}{string}{self.dialect.QUOTE_END}" def datatypeparam_sql(self, expression: exp.DataTypeParam) -> str: this = self.sql(expression, "this") specifier = self.sql(expression, "expression") specifier = f" {specifier}" if specifier and self.DATA_TYPE_SPECIFIERS_ALLOWED else "" return f"{this}{specifier}" def datatype_sql(self, expression: exp.DataType) -> str: nested = "" values = "" expr_nested = expression.args.get("nested") interior = ( self.expressions( expression, dynamic=True, new_line=True, skip_first=True, skip_last=True ) if expr_nested and self.pretty else self.expressions(expression, flat=True) ) type_value = expression.this if type_value in self.UNSUPPORTED_TYPES: self.unsupported( f"Data type {type_value.value} is not supported when targeting {self.dialect.__class__.__name__}" ) if type_value == exp.DType.USERDEFINED and expression.args.get("kind"): type_sql = self.sql(expression, "kind") elif type_value == exp.DType.CHARACTER_SET: return f"CHAR CHARACTER SET {self.sql(expression, 'kind')}" else: type_sql = ( self.TYPE_MAPPING.get(type_value, type_value.value) if isinstance(type_value, exp.DType) else type_value ) if interior: if expr_nested: nested = f"{self.STRUCT_DELIMITER[0]}{interior}{self.STRUCT_DELIMITER[1]}" if expression.args.get("values") is not None: delimiters = ("[", "]") if type_value == exp.DType.ARRAY else ("(", ")") values = self.expressions(expression, key="values", flat=True) values = f"{delimiters[0]}{values}{delimiters[1]}" elif type_value == exp.DType.INTERVAL: nested = f" {interior}" else: nested = f"({interior})" type_sql = f"{type_sql}{nested}{values}" if self.TZ_TO_WITH_TIME_ZONE and type_value in ( exp.DType.TIMETZ, exp.DType.TIMESTAMPTZ, ): type_sql = f"{type_sql} WITH TIME ZONE" return type_sql def directory_sql(self, expression: exp.Directory) -> str: local = "LOCAL " if expression.args.get("local") else "" row_format = self.sql(expression, "row_format") row_format = f" {row_format}" if row_format else "" return f"{local}DIRECTORY {self.sql(expression, 'this')}{row_format}" def delete_sql(self, expression: exp.Delete) -> str: this = self.sql(expression, "this") this = f" FROM {this}" if this else "" using = self.expressions(expression, key="using") using = f" USING {using}" if using else "" cluster = self.sql(expression, "cluster") cluster = f" {cluster}" if cluster else "" where = self.sql(expression, "where") returning = self.sql(expression, "returning") order = self.sql(expression, "order") limit = self.sql(expression, "limit") tables = self.expressions(expression, key="tables") tables = f" {tables}" if tables else "" if self.RETURNING_END: expression_sql = f"{this}{using}{cluster}{where}{returning}{order}{limit}" else: expression_sql = f"{returning}{this}{using}{cluster}{where}{order}{limit}" return self.prepend_ctes(expression, f"DELETE{tables}{expression_sql}") def drop_sql(self, expression: exp.Drop) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression, flat=True) expressions = f" ({expressions})" if expressions else "" kind = expression.args["kind"] kind = self.dialect.INVERSE_CREATABLE_KIND_MAPPING.get(kind) or kind exists_sql = " IF EXISTS " if expression.args.get("exists") else " " concurrently_sql = " CONCURRENTLY" if expression.args.get("concurrently") else "" on_cluster = self.sql(expression, "cluster") on_cluster = f" {on_cluster}" if on_cluster else "" temporary = " TEMPORARY" if expression.args.get("temporary") else "" materialized = " MATERIALIZED" if expression.args.get("materialized") else "" cascade = " CASCADE" if expression.args.get("cascade") else "" constraints = " CONSTRAINTS" if expression.args.get("constraints") else "" purge = " PURGE" if expression.args.get("purge") else "" sync = " SYNC" if expression.args.get("sync") else "" return f"DROP{temporary}{materialized} {kind}{concurrently_sql}{exists_sql}{this}{on_cluster}{expressions}{cascade}{constraints}{purge}{sync}" def set_operation(self, expression: exp.SetOperation) -> str: op_type = type(expression) op_name = op_type.key.upper() distinct = expression.args.get("distinct") if ( distinct is False and op_type in (exp.Except, exp.Intersect) and not self.EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE ): self.unsupported(f"{op_name} ALL is not supported") default_distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[op_type] if distinct is None: distinct = default_distinct if distinct is None: self.unsupported(f"{op_name} requires DISTINCT or ALL to be specified") if distinct is default_distinct: distinct_or_all = "" else: distinct_or_all = " DISTINCT" if distinct else " ALL" side_kind = " ".join(filter(None, [expression.side, expression.kind])) side_kind = f"{side_kind} " if side_kind else "" by_name = " BY NAME" if expression.args.get("by_name") else "" on = self.expressions(expression, key="on", flat=True) on = f" ON ({on})" if on else "" return f"{side_kind}{op_name}{distinct_or_all}{by_name}{on}" def set_operations(self, expression: exp.SetOperation) -> str: if not self.SET_OP_MODIFIERS: limit = expression.args.get("limit") order = expression.args.get("order") if limit or order: select = self._move_ctes_to_top_level( exp.subquery(expression, "_l_0", copy=False).select("*", copy=False) ) if limit: select = select.limit(limit.pop(), copy=False) if order: select = select.order_by(order.pop(), copy=False) return self.sql(select) sqls: t.List[str] = [] stack: t.List[t.Union[str, exp.Expr]] = [expression] while stack: node = stack.pop() if isinstance(node, exp.SetOperation): stack.append(node.expression) stack.append( self.maybe_comment( self.set_operation(node), comments=node.comments, separated=True ) ) stack.append(node.this) else: sqls.append(self.sql(node)) this = self.sep().join(sqls) this = self.query_modifiers(expression, this) return self.prepend_ctes(expression, this) def fetch_sql(self, expression: exp.Fetch) -> str: direction = expression.args.get("direction") direction = f" {direction}" if direction else "" count = self.sql(expression, "count") count = f" {count}" if count else "" limit_options = self.sql(expression, "limit_options") limit_options = f"{limit_options}" if limit_options else " ROWS ONLY" return f"{self.seg('FETCH')}{direction}{count}{limit_options}" def limitoptions_sql(self, expression: exp.LimitOptions) -> str: percent = " PERCENT" if expression.args.get("percent") else "" rows = " ROWS" if expression.args.get("rows") else "" with_ties = " WITH TIES" if expression.args.get("with_ties") else "" if not with_ties and rows: with_ties = " ONLY" return f"{percent}{rows}{with_ties}" def filter_sql(self, expression: exp.Filter) -> str: if self.AGGREGATE_FILTER_SUPPORTED: this = self.sql(expression, "this") where = self.sql(expression, "expression").strip() return f"{this} FILTER({where})" agg = expression.this agg_arg = agg.this cond = expression.expression.this agg_arg.replace(exp.If(this=cond.copy(), true=agg_arg.copy())) return self.sql(agg) def hint_sql(self, expression: exp.Hint) -> str: if not self.QUERY_HINTS: self.unsupported("Hints are not supported") return "" return f" /*+ {self.expressions(expression, sep=self.QUERY_HINT_SEP).strip()} */" def indexparameters_sql(self, expression: exp.IndexParameters) -> str: using = self.sql(expression, "using") using = f" USING {using}" if using else "" columns = self.expressions(expression, key="columns", flat=True) columns = f"({columns})" if columns else "" partition_by = self.expressions(expression, key="partition_by", flat=True) partition_by = f" PARTITION BY {partition_by}" if partition_by else "" where = self.sql(expression, "where") include = self.expressions(expression, key="include", flat=True) if include: include = f" INCLUDE ({include})" with_storage = self.expressions(expression, key="with_storage", flat=True) with_storage = f" WITH ({with_storage})" if with_storage else "" tablespace = self.sql(expression, "tablespace") tablespace = f" USING INDEX TABLESPACE {tablespace}" if tablespace else "" on = self.sql(expression, "on") on = f" ON {on}" if on else "" return f"{using}{columns}{include}{with_storage}{tablespace}{partition_by}{where}{on}" def index_sql(self, expression: exp.Index) -> str: unique = "UNIQUE " if expression.args.get("unique") else "" primary = "PRIMARY " if expression.args.get("primary") else "" amp = "AMP " if expression.args.get("amp") else "" name = self.sql(expression, "this") name = f"{name} " if name else "" table = self.sql(expression, "table") table = f"{self.INDEX_ON} {table}" if table else "" index = "INDEX " if not table else "" params = self.sql(expression, "params") return f"{unique}{primary}{amp}{index}{name}{table}{params}" def identifier_sql(self, expression: exp.Identifier) -> str: text = expression.name lower = text.lower() quoted = expression.quoted text = lower if self.normalize and not quoted else text text = text.replace(self._identifier_end, self._escaped_identifier_end) if ( quoted or self.dialect.can_quote(expression, self.identify) or lower in self.RESERVED_KEYWORDS or (not self.dialect.IDENTIFIERS_CAN_START_WITH_DIGIT and text[:1].isdigit()) ): text = f"{self._identifier_start}{text}{self._identifier_end}" return text def hex_sql(self, expression: exp.Hex) -> str: text = self.func(self.HEX_FUNC, self.sql(expression, "this")) if self.dialect.HEX_LOWERCASE: text = self.func("LOWER", text) return text def lowerhex_sql(self, expression: exp.LowerHex) -> str: text = self.func(self.HEX_FUNC, self.sql(expression, "this")) if not self.dialect.HEX_LOWERCASE: text = self.func("LOWER", text) return text def inputoutputformat_sql(self, expression: exp.InputOutputFormat) -> str: input_format = self.sql(expression, "input_format") input_format = f"INPUTFORMAT {input_format}" if input_format else "" output_format = self.sql(expression, "output_format") output_format = f"OUTPUTFORMAT {output_format}" if output_format else "" return self.sep().join((input_format, output_format)) def national_sql(self, expression: exp.National, prefix: str = "N") -> str: string = self.sql(exp.Literal.string(expression.name)) return f"{prefix}{string}" def partition_sql(self, expression: exp.Partition) -> str: partition_keyword = "SUBPARTITION" if expression.args.get("subpartition") else "PARTITION" return f"{partition_keyword}({self.expressions(expression, flat=True)})" def properties_sql(self, expression: exp.Properties) -> str: root_properties = [] with_properties = [] for p in expression.expressions: p_loc = self.PROPERTIES_LOCATION[p.__class__] if p_loc == exp.Properties.Location.POST_WITH: with_properties.append(p) elif p_loc == exp.Properties.Location.POST_SCHEMA: root_properties.append(p) root_props_ast = exp.Properties(expressions=root_properties) root_props_ast.parent = expression.parent with_props_ast = exp.Properties(expressions=with_properties) with_props_ast.parent = expression.parent root_props = self.root_properties(root_props_ast) with_props = self.with_properties(with_props_ast) if root_props and with_props and not self.pretty: with_props = " " + with_props return root_props + with_props def root_properties(self, properties: exp.Properties) -> str: if properties.expressions: return self.expressions(properties, indent=False, sep=" ") return "" def properties( self, properties: exp.Properties, prefix: str = "", sep: str = ", ", suffix: str = "", wrapped: bool = True, ) -> str: if properties.expressions: expressions = self.expressions(properties, sep=sep, indent=False) if expressions: expressions = self.wrap(expressions) if wrapped else expressions return f"{prefix}{' ' if prefix.strip() else ''}{expressions}{suffix}" return "" def with_properties(self, properties: exp.Properties) -> str: return self.properties(properties, prefix=self.seg(self.WITH_PROPERTIES_PREFIX, sep="")) def locate_properties(self, properties: exp.Properties) -> t.DefaultDict: properties_locs = defaultdict(list) for p in properties.expressions: p_loc = self.PROPERTIES_LOCATION[p.__class__] if p_loc != exp.Properties.Location.UNSUPPORTED: properties_locs[p_loc].append(p) else: self.unsupported(f"Unsupported property {p.key}") return properties_locs def property_name(self, expression: exp.Property, string_key: bool = False) -> str: if isinstance(expression.this, exp.Dot): return self.sql(expression, "this") return f"'{expression.name}'" if string_key else expression.name def property_sql(self, expression: exp.Property) -> str: property_cls = expression.__class__ if property_cls == exp.Property: return f"{self.property_name(expression)}={self.sql(expression, 'value')}" property_name = exp.Properties.PROPERTY_TO_NAME.get(property_cls) if not property_name: self.unsupported(f"Unsupported property {expression.key}") return f"{property_name}={self.sql(expression, 'this')}" def likeproperty_sql(self, expression: exp.LikeProperty) -> str: if self.SUPPORTS_CREATE_TABLE_LIKE: options = " ".join(f"{e.name} {self.sql(e, 'value')}" for e in expression.expressions) options = f" {options}" if options else "" like = f"LIKE {self.sql(expression, 'this')}{options}" if self.LIKE_PROPERTY_INSIDE_SCHEMA and not isinstance(expression.parent, exp.Schema): like = f"({like})" return like if expression.expressions: self.unsupported("Transpilation of LIKE property options is unsupported") select = exp.select("*").from_(expression.this).limit(0) return f"AS {self.sql(select)}" def fallbackproperty_sql(self, expression: exp.FallbackProperty) -> str: no = "NO " if expression.args.get("no") else "" protection = " PROTECTION" if expression.args.get("protection") else "" return f"{no}FALLBACK{protection}" def journalproperty_sql(self, expression: exp.JournalProperty) -> str: no = "NO " if expression.args.get("no") else "" local = expression.args.get("local") local = f"{local} " if local else "" dual = "DUAL " if expression.args.get("dual") else "" before = "BEFORE " if expression.args.get("before") else "" after = "AFTER " if expression.args.get("after") else "" return f"{no}{local}{dual}{before}{after}JOURNAL" def freespaceproperty_sql(self, expression: exp.FreespaceProperty) -> str: freespace = self.sql(expression, "this") percent = " PERCENT" if expression.args.get("percent") else "" return f"FREESPACE={freespace}{percent}" def checksumproperty_sql(self, expression: exp.ChecksumProperty) -> str: if expression.args.get("default"): property = "DEFAULT" elif expression.args.get("on"): property = "ON" else: property = "OFF" return f"CHECKSUM={property}" def mergeblockratioproperty_sql(self, expression: exp.MergeBlockRatioProperty) -> str: if expression.args.get("no"): return "NO MERGEBLOCKRATIO" if expression.args.get("default"): return "DEFAULT MERGEBLOCKRATIO" percent = " PERCENT" if expression.args.get("percent") else "" return f"MERGEBLOCKRATIO={self.sql(expression, 'this')}{percent}" def datablocksizeproperty_sql(self, expression: exp.DataBlocksizeProperty) -> str: default = expression.args.get("default") minimum = expression.args.get("minimum") maximum = expression.args.get("maximum") if default or minimum or maximum: if default: prop = "DEFAULT" elif minimum: prop = "MINIMUM" else: prop = "MAXIMUM" return f"{prop} DATABLOCKSIZE" units = expression.args.get("units") units = f" {units}" if units else "" return f"DATABLOCKSIZE={self.sql(expression, 'size')}{units}" def blockcompressionproperty_sql(self, expression: exp.BlockCompressionProperty) -> str: autotemp = expression.args.get("autotemp") always = expression.args.get("always") default = expression.args.get("default") manual = expression.args.get("manual") never = expression.args.get("never") if autotemp is not None: prop = f"AUTOTEMP({self.expressions(autotemp)})" elif always: prop = "ALWAYS" elif default: prop = "DEFAULT" elif manual: prop = "MANUAL" elif never: prop = "NEVER" return f"BLOCKCOMPRESSION={prop}" def isolatedloadingproperty_sql(self, expression: exp.IsolatedLoadingProperty) -> str: no = expression.args.get("no") no = " NO" if no else "" concurrent = expression.args.get("concurrent") concurrent = " CONCURRENT" if concurrent else "" target = self.sql(expression, "target") target = f" {target}" if target else "" return f"WITH{no}{concurrent} ISOLATED LOADING{target}" def partitionboundspec_sql(self, expression: exp.PartitionBoundSpec) -> str: if isinstance(expression.this, list): return f"IN ({self.expressions(expression, key='this', flat=True)})" if expression.this: modulus = self.sql(expression, "this") remainder = self.sql(expression, "expression") return f"WITH (MODULUS {modulus}, REMAINDER {remainder})" from_expressions = self.expressions(expression, key="from_expressions", flat=True) to_expressions = self.expressions(expression, key="to_expressions", flat=True) return f"FROM ({from_expressions}) TO ({to_expressions})" def partitionedofproperty_sql(self, expression: exp.PartitionedOfProperty) -> str: this = self.sql(expression, "this") for_values_or_default = expression.expression if isinstance(for_values_or_default, exp.PartitionBoundSpec): for_values_or_default = f" FOR VALUES {self.sql(for_values_or_default)}" else: for_values_or_default = " DEFAULT" return f"PARTITION OF {this}{for_values_or_default}" def lockingproperty_sql(self, expression: exp.LockingProperty) -> str: kind = expression.args.get("kind") this = f" {self.sql(expression, 'this')}" if expression.this else "" for_or_in = expression.args.get("for_or_in") for_or_in = f" {for_or_in}" if for_or_in else "" lock_type = expression.args.get("lock_type") override = " OVERRIDE" if expression.args.get("override") else "" return f"LOCKING {kind}{this}{for_or_in} {lock_type}{override}" def withdataproperty_sql(self, expression: exp.WithDataProperty) -> str: data_sql = f"WITH {'NO ' if expression.args.get('no') else ''}DATA" statistics = expression.args.get("statistics") statistics_sql = "" if statistics is not None: statistics_sql = f" AND {'NO ' if not statistics else ''}STATISTICS" return f"{data_sql}{statistics_sql}" def withsystemversioningproperty_sql(self, expression: exp.WithSystemVersioningProperty) -> str: this = self.sql(expression, "this") this = f"HISTORY_TABLE={this}" if this else "" data_consistency: t.Optional[str] = self.sql(expression, "data_consistency") data_consistency = ( f"DATA_CONSISTENCY_CHECK={data_consistency}" if data_consistency else None ) retention_period: t.Optional[str] = self.sql(expression, "retention_period") retention_period = ( f"HISTORY_RETENTION_PERIOD={retention_period}" if retention_period else None ) if this: on_sql = self.func("ON", this, data_consistency, retention_period) else: on_sql = "ON" if expression.args.get("on") else "OFF" sql = f"SYSTEM_VERSIONING={on_sql}" return f"WITH({sql})" if expression.args.get("with_") else sql def insert_sql(self, expression: exp.Insert) -> str: hint = self.sql(expression, "hint") overwrite = expression.args.get("overwrite") if isinstance(expression.this, exp.Directory): this = " OVERWRITE" if overwrite else " INTO" else: this = self.INSERT_OVERWRITE if overwrite else " INTO" stored = self.sql(expression, "stored") stored = f" {stored}" if stored else "" alternative = expression.args.get("alternative") alternative = f" OR {alternative}" if alternative else "" ignore = " IGNORE" if expression.args.get("ignore") else "" is_function = expression.args.get("is_function") if is_function: this = f"{this} FUNCTION" this = f"{this} {self.sql(expression, 'this')}" exists = " IF EXISTS" if expression.args.get("exists") else "" where = self.sql(expression, "where") where = f"{self.sep()}REPLACE WHERE {where}" if where else "" expression_sql = f"{self.sep()}{self.sql(expression, 'expression')}" on_conflict = self.sql(expression, "conflict") on_conflict = f" {on_conflict}" if on_conflict else "" by_name = " BY NAME" if expression.args.get("by_name") else "" default_values = "DEFAULT VALUES" if expression.args.get("default") else "" returning = self.sql(expression, "returning") if self.RETURNING_END: expression_sql = f"{expression_sql}{on_conflict}{default_values}{returning}" else: expression_sql = f"{returning}{expression_sql}{on_conflict}" partition_by = self.sql(expression, "partition") partition_by = f" {partition_by}" if partition_by else "" settings = self.sql(expression, "settings") settings = f" {settings}" if settings else "" source = self.sql(expression, "source") source = f"TABLE {source}" if source else "" sql = f"INSERT{hint}{alternative}{ignore}{this}{stored}{by_name}{exists}{partition_by}{settings}{where}{expression_sql}{source}" return self.prepend_ctes(expression, sql) def introducer_sql(self, expression: exp.Introducer) -> str: return f"{self.sql(expression, 'this')} {self.sql(expression, 'expression')}" def kill_sql(self, expression: exp.Kill) -> str: kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" this = self.sql(expression, "this") this = f" {this}" if this else "" return f"KILL{kind}{this}" def pseudotype_sql(self, expression: exp.PseudoType) -> str: return expression.name def objectidentifier_sql(self, expression: exp.ObjectIdentifier) -> str: return expression.name def onconflict_sql(self, expression: exp.OnConflict) -> str: conflict = "ON DUPLICATE KEY" if expression.args.get("duplicate") else "ON CONFLICT" constraint = self.sql(expression, "constraint") constraint = f" ON CONSTRAINT {constraint}" if constraint else "" conflict_keys = self.expressions(expression, key="conflict_keys", flat=True) if conflict_keys: conflict_keys = f"({conflict_keys})" index_predicate = self.sql(expression, "index_predicate") conflict_keys = f"{conflict_keys}{index_predicate} " action = self.sql(expression, "action") expressions = self.expressions(expression, flat=True) if expressions: set_keyword = "SET " if self.DUPLICATE_KEY_UPDATE_WITH_SET else "" expressions = f" {set_keyword}{expressions}" where = self.sql(expression, "where") return f"{conflict}{constraint}{conflict_keys}{action}{expressions}{where}" def returning_sql(self, expression: exp.Returning) -> str: return f"{self.seg('RETURNING')} {self.expressions(expression, flat=True)}" def rowformatdelimitedproperty_sql(self, expression: exp.RowFormatDelimitedProperty) -> str: fields = self.sql(expression, "fields") fields = f" FIELDS TERMINATED BY {fields}" if fields else "" escaped = self.sql(expression, "escaped") escaped = f" ESCAPED BY {escaped}" if escaped else "" items = self.sql(expression, "collection_items") items = f" COLLECTION ITEMS TERMINATED BY {items}" if items else "" keys = self.sql(expression, "map_keys") keys = f" MAP KEYS TERMINATED BY {keys}" if keys else "" lines = self.sql(expression, "lines") lines = f" LINES TERMINATED BY {lines}" if lines else "" null = self.sql(expression, "null") null = f" NULL DEFINED AS {null}" if null else "" return f"ROW FORMAT DELIMITED{fields}{escaped}{items}{keys}{lines}{null}" def withtablehint_sql(self, expression: exp.WithTableHint) -> str: return f"WITH ({self.expressions(expression, flat=True)})" def indextablehint_sql(self, expression: exp.IndexTableHint) -> str: this = f"{self.sql(expression, 'this')} INDEX" target = self.sql(expression, "target") target = f" FOR {target}" if target else "" return f"{this}{target} ({self.expressions(expression, flat=True)})" def historicaldata_sql(self, expression: exp.HistoricalData) -> str: this = self.sql(expression, "this") kind = self.sql(expression, "kind") expr = self.sql(expression, "expression") return f"{this} ({kind} => {expr})" def table_parts(self, expression: exp.Table) -> str: return ".".join( self.sql(part) for part in ( expression.args.get("catalog"), expression.args.get("db"), expression.args.get("this"), ) if part is not None ) def table_sql(self, expression: exp.Table, sep: str = " AS ") -> str: table = self.table_parts(expression) only = "ONLY " if expression.args.get("only") else "" partition = self.sql(expression, "partition") partition = f" {partition}" if partition else "" version = self.sql(expression, "version") version = f" {version}" if version else "" alias = self.sql(expression, "alias") alias = f"{sep}{alias}" if alias else "" sample = self.sql(expression, "sample") post_alias = "" pre_alias = "" if self.dialect.ALIAS_POST_TABLESAMPLE: pre_alias = sample else: post_alias = sample if self.dialect.ALIAS_POST_VERSION: pre_alias = f"{pre_alias}{version}" else: post_alias = f"{post_alias}{version}" hints = self.expressions(expression, key="hints", sep=" ") hints = f" {hints}" if hints and self.TABLE_HINTS else "" pivots = self.expressions(expression, key="pivots", sep="", flat=True) joins = self.indent( self.expressions(expression, key="joins", sep="", flat=True), skip_first=True ) laterals = self.expressions(expression, key="laterals", sep="") file_format = self.sql(expression, "format") if file_format: pattern = self.sql(expression, "pattern") pattern = f", PATTERN => {pattern}" if pattern else "" file_format = f" (FILE_FORMAT => {file_format}{pattern})" ordinality = expression.args.get("ordinality") or "" if ordinality: ordinality = f" WITH ORDINALITY{alias}" alias = "" when = self.sql(expression, "when") if when: table = f"{table} {when}" changes = self.sql(expression, "changes") changes = f" {changes}" if changes else "" rows_from = self.expressions(expression, key="rows_from") if rows_from: table = f"ROWS FROM {self.wrap(rows_from)}" indexed = expression.args.get("indexed") if indexed is not None: indexed = f" INDEXED BY {self.sql(indexed)}" if indexed else " NOT INDEXED" else: indexed = "" return f"{only}{table}{changes}{partition}{file_format}{pre_alias}{alias}{indexed}{hints}{pivots}{post_alias}{joins}{laterals}{ordinality}" def tablefromrows_sql(self, expression: exp.TableFromRows) -> str: table = self.func("TABLE", expression.this) alias = self.sql(expression, "alias") alias = f" AS {alias}" if alias else "" sample = self.sql(expression, "sample") pivots = self.expressions(expression, key="pivots", sep="", flat=True) joins = self.indent( self.expressions(expression, key="joins", sep="", flat=True), skip_first=True ) return f"{table}{alias}{pivots}{sample}{joins}" def tablesample_sql( self, expression: exp.TableSample, tablesample_keyword: t.Optional[str] = None, ) -> str: method = self.sql(expression, "method") method = f"{method} " if method and self.TABLESAMPLE_WITH_METHOD else "" numerator = self.sql(expression, "bucket_numerator") denominator = self.sql(expression, "bucket_denominator") field = self.sql(expression, "bucket_field") field = f" ON {field}" if field else "" bucket = f"BUCKET {numerator} OUT OF {denominator}{field}" if numerator else "" seed = self.sql(expression, "seed") seed = f" {self.TABLESAMPLE_SEED_KEYWORD} ({seed})" if seed else "" size = self.sql(expression, "size") if size and self.TABLESAMPLE_SIZE_IS_ROWS: size = f"{size} ROWS" percent = self.sql(expression, "percent") if percent and not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: percent = f"{percent} PERCENT" expr = f"{bucket}{percent}{size}" if self.TABLESAMPLE_REQUIRES_PARENS: expr = f"({expr})" return f" {tablesample_keyword or self.TABLESAMPLE_KEYWORDS} {method}{expr}{seed}" def pivot_sql(self, expression: exp.Pivot) -> str: expressions = self.expressions(expression, flat=True) direction = "UNPIVOT" if expression.unpivot else "PIVOT" group = self.sql(expression, "group") if expression.this: this = self.sql(expression, "this") if not expressions: sql = f"UNPIVOT {this}" else: on = f"{self.seg('ON')} {expressions}" into = self.sql(expression, "into") into = f"{self.seg('INTO')} {into}" if into else "" using = self.expressions(expression, key="using", flat=True) using = f"{self.seg('USING')} {using}" if using else "" sql = f"{direction} {this}{on}{into}{using}{group}" return self.prepend_ctes(expression, sql) alias = self.sql(expression, "alias") alias = f" AS {alias}" if alias else "" fields = self.expressions( expression, "fields", sep=" ", dynamic=True, new_line=True, skip_first=True, skip_last=True, ) include_nulls = expression.args.get("include_nulls") if include_nulls is not None: nulls = " INCLUDE NULLS " if include_nulls else " EXCLUDE NULLS " else: nulls = "" default_on_null = self.sql(expression, "default_on_null") default_on_null = f" DEFAULT ON NULL ({default_on_null})" if default_on_null else "" sql = f"{self.seg(direction)}{nulls}({expressions} FOR {fields}{default_on_null}{group}){alias}" return self.prepend_ctes(expression, sql) def version_sql(self, expression: exp.Version) -> str: this = f"FOR {expression.name}" kind = expression.text("kind") expr = self.sql(expression, "expression") return f"{this} {kind} {expr}" def tuple_sql(self, expression: exp.Tuple) -> str: return f"({self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)})" def _update_from_joins_sql(self, expression: exp.Update) -> t.Tuple[str, str]: """ Returns (join_sql, from_sql) for UPDATE statements. - join_sql: placed after UPDATE table, before SET - from_sql: placed after SET clause (standard position) Dialects like MySQL need to convert FROM to JOIN syntax. """ if self.UPDATE_STATEMENT_SUPPORTS_FROM or not (from_expr := expression.args.get("from_")): return ("", self.sql(expression, "from_")) # Qualify unqualified columns in SET clause with the target table # MySQL requires qualified column names in multi-table UPDATE to avoid ambiguity target_table = expression.this if isinstance(target_table, exp.Table): target_name = exp.to_identifier(target_table.alias_or_name) for eq in expression.expressions: col = eq.this if isinstance(col, exp.Column) and not col.table: col.set("table", target_name) table = from_expr.this if nested_joins := table.args.get("joins", []): table.set("joins", None) join_sql = self.sql(exp.Join(this=table, on=exp.true())) for nested in nested_joins: if not nested.args.get("on") and not nested.args.get("using"): nested.set("on", exp.true()) join_sql += self.sql(nested) return (join_sql, "") def update_sql(self, expression: exp.Update) -> str: this = self.sql(expression, "this") join_sql, from_sql = self._update_from_joins_sql(expression) set_sql = self.expressions(expression, flat=True) where_sql = self.sql(expression, "where") returning = self.sql(expression, "returning") order = self.sql(expression, "order") limit = self.sql(expression, "limit") if self.RETURNING_END: expression_sql = f"{from_sql}{where_sql}{returning}" else: expression_sql = f"{returning}{from_sql}{where_sql}" options = self.expressions(expression, key="options") options = f" OPTION({options})" if options else "" sql = f"UPDATE {this}{join_sql} SET {set_sql}{expression_sql}{order}{limit}{options}" return self.prepend_ctes(expression, sql) def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: values_as_table = values_as_table and self.VALUES_AS_TABLE # The VALUES clause is still valid in an `INSERT INTO ..` statement, for example if values_as_table or not expression.find_ancestor(exp.From, exp.Join): args = self.expressions(expression) alias = self.sql(expression, "alias") values = f"VALUES{self.seg('')}{args}" values = ( f"({values})" if self.WRAP_DERIVED_VALUES and (alias or isinstance(expression.parent, (exp.From, exp.Table))) else values ) values = self.query_modifiers(expression, values) return f"{values} AS {alias}" if alias else values # Converts `VALUES...` expression into a series of select unions. alias_node = expression.args.get("alias") column_names = alias_node and alias_node.columns selects: t.List[exp.Query] = [] for i, tup in enumerate(expression.expressions): row = tup.expressions if i == 0 and column_names: row = [ exp.alias_(value, column_name) for value, column_name in zip(row, column_names) ] selects.append(exp.Select(expressions=row)) if self.pretty: # This may result in poor performance for large-cardinality `VALUES` tables, due to # the deep nesting of the resulting exp.Unions. If this is a problem, either increase # `sys.setrecursionlimit` to avoid RecursionErrors, or don't set `pretty`. query = reduce(lambda x, y: exp.union(x, y, distinct=False, copy=False), selects) return self.subquery_sql(query.subquery(alias_node and alias_node.this, copy=False)) alias = f" AS {self.sql(alias_node, 'this')}" if alias_node else "" unions = " UNION ALL ".join(self.sql(select) for select in selects) return f"({unions}){alias}" def var_sql(self, expression: exp.Var) -> str: return self.sql(expression, "this") @unsupported_args("expressions") def into_sql(self, expression: exp.Into) -> str: temporary = " TEMPORARY" if expression.args.get("temporary") else "" unlogged = " UNLOGGED" if expression.args.get("unlogged") else "" return f"{self.seg('INTO')}{temporary or unlogged} {self.sql(expression, 'this')}" def from_sql(self, expression: exp.From) -> str: return f"{self.seg('FROM')} {self.sql(expression, 'this')}" def groupingsets_sql(self, expression: exp.GroupingSets) -> str: grouping_sets = self.expressions(expression, indent=False) return f"GROUPING SETS {self.wrap(grouping_sets)}" def rollup_sql(self, expression: exp.Rollup) -> str: expressions = self.expressions(expression, indent=False) return f"ROLLUP {self.wrap(expressions)}" if expressions else "WITH ROLLUP" def rollupindex_sql(self, expression: exp.RollupIndex) -> str: this = self.sql(expression, "this") columns = self.expressions(expression, flat=True) from_sql = self.sql(expression, "from_index") from_sql = f" FROM {from_sql}" if from_sql else "" properties = expression.args.get("properties") properties_sql = ( f" {self.properties(properties, prefix='PROPERTIES')}" if properties else "" ) return f"{this}({columns}){from_sql}{properties_sql}" def rollupproperty_sql(self, expression: exp.RollupProperty) -> str: return f"ROLLUP ({self.expressions(expression, flat=True)})" def cube_sql(self, expression: exp.Cube) -> str: expressions = self.expressions(expression, indent=False) return f"CUBE {self.wrap(expressions)}" if expressions else "WITH CUBE" def group_sql(self, expression: exp.Group) -> str: group_by_all = expression.args.get("all") if group_by_all is True: modifier = " ALL" elif group_by_all is False: modifier = " DISTINCT" else: modifier = "" group_by = self.op_expressions(f"GROUP BY{modifier}", expression) grouping_sets = self.expressions(expression, key="grouping_sets") cube = self.expressions(expression, key="cube") rollup = self.expressions(expression, key="rollup") groupings = csv( self.seg(grouping_sets) if grouping_sets else "", self.seg(cube) if cube else "", self.seg(rollup) if rollup else "", self.seg("WITH TOTALS") if expression.args.get("totals") else "", sep=self.GROUPINGS_SEP, ) if ( expression.expressions and groupings and groupings.strip() not in ("WITH CUBE", "WITH ROLLUP") ): group_by = f"{group_by}{self.GROUPINGS_SEP}" return f"{group_by}{groupings}" def having_sql(self, expression: exp.Having) -> str: this = self.indent(self.sql(expression, "this")) return f"{self.seg('HAVING')}{self.sep()}{this}" def connect_sql(self, expression: exp.Connect) -> str: start = self.sql(expression, "start") start = self.seg(f"START WITH {start}") if start else "" nocycle = " NOCYCLE" if expression.args.get("nocycle") else "" connect = self.sql(expression, "connect") connect = self.seg(f"CONNECT BY{nocycle} {connect}") return start + connect def prior_sql(self, expression: exp.Prior) -> str: return f"PRIOR {self.sql(expression, 'this')}" def join_sql(self, expression: exp.Join) -> str: if not self.SEMI_ANTI_JOIN_WITH_SIDE and expression.kind in ("SEMI", "ANTI"): side = None else: side = expression.side op_sql = " ".join( op for op in ( expression.method, "GLOBAL" if expression.args.get("global_") else None, side, expression.kind, expression.hint if self.JOIN_HINTS else None, "DIRECTED" if expression.args.get("directed") and self.DIRECTED_JOINS else None, ) if op ) match_cond = self.sql(expression, "match_condition") match_cond = f" MATCH_CONDITION ({match_cond})" if match_cond else "" on_sql = self.sql(expression, "on") using = expression.args.get("using") if not on_sql and using: on_sql = csv(*(self.sql(column) for column in using)) this = expression.this this_sql = self.sql(this) exprs = self.expressions(expression) if exprs: this_sql = f"{this_sql},{self.seg(exprs)}" if on_sql: on_sql = self.indent(on_sql, skip_first=True) space = self.seg(" " * self.pad) if self.pretty else " " if using: on_sql = f"{space}USING ({on_sql})" else: on_sql = f"{space}ON {on_sql}" elif not op_sql: if isinstance(this, exp.Lateral) and this.args.get("cross_apply") is not None: return f" {this_sql}" return f", {this_sql}" if op_sql != "STRAIGHT_JOIN": op_sql = f"{op_sql} JOIN" if op_sql else "JOIN" pivots = self.expressions(expression, key="pivots", sep="", flat=True) return f"{self.seg(op_sql)} {this_sql}{match_cond}{on_sql}{pivots}" def lambda_sql(self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True) -> str: args = self.expressions(expression, flat=True) args = f"({args})" if wrap and len(args.split(",")) > 1 else args return f"{args} {arrow_sep} {self.sql(expression, 'this')}" def lateral_op(self, expression: exp.Lateral) -> str: cross_apply = expression.args.get("cross_apply") # https://www.mssqltips.com/sqlservertip/1958/sql-server-cross-apply-and-outer-apply/ if cross_apply is True: op = "INNER JOIN " elif cross_apply is False: op = "LEFT JOIN " else: op = "" return f"{op}LATERAL" def lateral_sql(self, expression: exp.Lateral) -> str: this = self.sql(expression, "this") if expression.args.get("view"): alias = expression.args["alias"] columns = self.expressions(alias, key="columns", flat=True) table = f" {alias.name}" if alias.name else "" columns = f" AS {columns}" if columns else "" op_sql = self.seg(f"LATERAL VIEW{' OUTER' if expression.args.get('outer') else ''}") return f"{op_sql}{self.sep()}{this}{table}{columns}" alias = self.sql(expression, "alias") alias = f" AS {alias}" if alias else "" ordinality = expression.args.get("ordinality") or "" if ordinality: ordinality = f" WITH ORDINALITY{alias}" alias = "" return f"{self.lateral_op(expression)} {this}{alias}{ordinality}" def limit_sql(self, expression: exp.Limit, top: bool = False) -> str: this = self.sql(expression, "this") args = [ self._simplify_unless_literal(e) if self.LIMIT_ONLY_LITERALS else e for e in (expression.args.get(k) for k in ("offset", "expression")) if e ] args_sql = ", ".join(self.sql(e) for e in args) args_sql = f"({args_sql})" if top and any(not e.is_number for e in args) else args_sql expressions = self.expressions(expression, flat=True) limit_options = self.sql(expression, "limit_options") expressions = f" BY {expressions}" if expressions else "" return f"{this}{self.seg('TOP' if top else 'LIMIT')} {args_sql}{limit_options}{expressions}" def offset_sql(self, expression: exp.Offset) -> str: this = self.sql(expression, "this") value = expression.expression value = self._simplify_unless_literal(value) if self.LIMIT_ONLY_LITERALS else value expressions = self.expressions(expression, flat=True) expressions = f" BY {expressions}" if expressions else "" return f"{this}{self.seg('OFFSET')} {self.sql(value)}{expressions}" def setitem_sql(self, expression: exp.SetItem) -> str: kind = self.sql(expression, "kind") if not self.SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD and kind == "VARIABLE": kind = "" else: kind = f"{kind} " if kind else "" this = self.sql(expression, "this") expressions = self.expressions(expression) collate = self.sql(expression, "collate") collate = f" COLLATE {collate}" if collate else "" global_ = "GLOBAL " if expression.args.get("global_") else "" return f"{global_}{kind}{this}{expressions}{collate}" def set_sql(self, expression: exp.Set) -> str: expressions = f" {self.expressions(expression, flat=True)}" tag = " TAG" if expression.args.get("tag") else "" return f"{'UNSET' if expression.args.get('unset') else 'SET'}{tag}{expressions}" def queryband_sql(self, expression: exp.QueryBand) -> str: this = self.sql(expression, "this") update = " UPDATE" if expression.args.get("update") else "" scope = self.sql(expression, "scope") scope = f" FOR {scope}" if scope else "" return f"QUERY_BAND = {this}{update}{scope}" def pragma_sql(self, expression: exp.Pragma) -> str: return f"PRAGMA {self.sql(expression, 'this')}" def lock_sql(self, expression: exp.Lock) -> str: if not self.LOCKING_READS_SUPPORTED: self.unsupported("Locking reads using 'FOR UPDATE/SHARE' are not supported") return "" update = expression.args["update"] key = expression.args.get("key") if update: lock_type = "FOR NO KEY UPDATE" if key else "FOR UPDATE" else: lock_type = "FOR KEY SHARE" if key else "FOR SHARE" expressions = self.expressions(expression, flat=True) expressions = f" OF {expressions}" if expressions else "" wait = expression.args.get("wait") if wait is not None: if isinstance(wait, exp.Literal): wait = f" WAIT {self.sql(wait)}" else: wait = " NOWAIT" if wait else " SKIP LOCKED" return f"{lock_type}{expressions}{wait or ''}" def literal_sql(self, expression: exp.Literal) -> str: text = expression.this or "" if expression.is_string: text = f"{self.dialect.QUOTE_START}{self.escape_str(text)}{self.dialect.QUOTE_END}" return text def escape_str( self, text: str, escape_backslash: bool = True, delimiter: t.Optional[str] = None, escaped_delimiter: t.Optional[str] = None, is_byte_string: bool = False, ) -> str: if is_byte_string: supports_escape_sequences = self.dialect.BYTE_STRINGS_SUPPORT_ESCAPED_SEQUENCES else: supports_escape_sequences = self.dialect.STRINGS_SUPPORT_ESCAPED_SEQUENCES if supports_escape_sequences: text = "".join( self.dialect.ESCAPED_SEQUENCES.get(ch, ch) if escape_backslash or ch != "\\" else ch for ch in text ) delimiter = delimiter or self.dialect.QUOTE_END escaped_delimiter = escaped_delimiter or self._escaped_quote_end return self._replace_line_breaks(text).replace(delimiter, escaped_delimiter) def loaddata_sql(self, expression: exp.LoadData) -> str: local = " LOCAL" if expression.args.get("local") else "" inpath = f" INPATH {self.sql(expression, 'inpath')}" overwrite = " OVERWRITE" if expression.args.get("overwrite") else "" this = f" INTO TABLE {self.sql(expression, 'this')}" partition = self.sql(expression, "partition") partition = f" {partition}" if partition else "" input_format = self.sql(expression, "input_format") input_format = f" INPUTFORMAT {input_format}" if input_format else "" serde = self.sql(expression, "serde") serde = f" SERDE {serde}" if serde else "" return f"LOAD DATA{local}{inpath}{overwrite}{this}{partition}{input_format}{serde}" def null_sql(self, *_) -> str: return "NULL" def boolean_sql(self, expression: exp.Boolean) -> str: return "TRUE" if expression.this else "FALSE" def booland_sql(self, expression: exp.Booland) -> str: return f"(({self.sql(expression, 'this')}) AND ({self.sql(expression, 'expression')}))" def boolor_sql(self, expression: exp.Boolor) -> str: return f"(({self.sql(expression, 'this')}) OR ({self.sql(expression, 'expression')}))" def order_sql(self, expression: exp.Order, flat: bool = False) -> str: this = self.sql(expression, "this") this = f"{this} " if this else this siblings = "SIBLINGS " if expression.args.get("siblings") else "" return self.op_expressions(f"{this}ORDER {siblings}BY", expression, flat=this or flat) # type: ignore def withfill_sql(self, expression: exp.WithFill) -> str: from_sql = self.sql(expression, "from_") from_sql = f" FROM {from_sql}" if from_sql else "" to_sql = self.sql(expression, "to") to_sql = f" TO {to_sql}" if to_sql else "" step_sql = self.sql(expression, "step") step_sql = f" STEP {step_sql}" if step_sql else "" interpolated_values = [ f"{self.sql(e, 'alias')} AS {self.sql(e, 'this')}" if isinstance(e, exp.Alias) else self.sql(e, "this") for e in expression.args.get("interpolate") or [] ] interpolate = ( f" INTERPOLATE ({', '.join(interpolated_values)})" if interpolated_values else "" ) return f"WITH FILL{from_sql}{to_sql}{step_sql}{interpolate}" def cluster_sql(self, expression: exp.Cluster) -> str: return self.op_expressions("CLUSTER BY", expression) def distribute_sql(self, expression: exp.Distribute) -> str: return self.op_expressions("DISTRIBUTE BY", expression) def sort_sql(self, expression: exp.Sort) -> str: return self.op_expressions("SORT BY", expression) def ordered_sql(self, expression: exp.Ordered) -> str: desc = expression.args.get("desc") asc = not desc nulls_first = expression.args.get("nulls_first") nulls_last = not nulls_first nulls_are_large = self.dialect.NULL_ORDERING == "nulls_are_large" nulls_are_small = self.dialect.NULL_ORDERING == "nulls_are_small" nulls_are_last = self.dialect.NULL_ORDERING == "nulls_are_last" this = self.sql(expression, "this") sort_order = " DESC" if desc else (" ASC" if desc is False else "") nulls_sort_change = "" if nulls_first and ( (asc and nulls_are_large) or (desc and nulls_are_small) or nulls_are_last ): nulls_sort_change = " NULLS FIRST" elif ( nulls_last and ((asc and nulls_are_small) or (desc and nulls_are_large)) and not nulls_are_last ): nulls_sort_change = " NULLS LAST" # If the NULLS FIRST/LAST clause is unsupported, we add another sort key to simulate it if nulls_sort_change and not self.NULL_ORDERING_SUPPORTED: window = expression.find_ancestor(exp.Window, exp.Select) if isinstance(window, exp.Window): window_this = window.this spec = window.args.get("spec") else: window_this = None spec = None # Some window functions (e.g. LAST_VALUE, RANK) support NULLS FIRST/LAST # without a spec or with a ROWS spec, but not with RANGE if not ( isinstance(window_this, self.WINDOW_FUNCS_WITH_NULL_ORDERING) and (not spec or spec.text("kind").upper() == "ROWS") ): if window_this and spec: self.unsupported( f"'{nulls_sort_change.strip()}' translation not supported in window function {window_this.sql_name()}" ) nulls_sort_change = "" elif self.NULL_ORDERING_SUPPORTED is False and ( (asc and nulls_sort_change == " NULLS LAST") or (desc and nulls_sort_change == " NULLS FIRST") ): # BigQuery does not allow these ordering/nulls combinations when used under # an aggregation func or under a window containing one ancestor = expression.find_ancestor(exp.AggFunc, exp.Window, exp.Select) if isinstance(ancestor, exp.Window): ancestor = ancestor.this if isinstance(ancestor, exp.AggFunc): self.unsupported( f"'{nulls_sort_change.strip()}' translation not supported for aggregate function {ancestor.sql_name()} with {sort_order} sort order" ) nulls_sort_change = "" elif self.NULL_ORDERING_SUPPORTED is None: if expression.this.is_int: self.unsupported( f"'{nulls_sort_change.strip()}' translation not supported with positional ordering" ) elif not isinstance(expression.this, exp.Rand): null_sort_order = " DESC" if nulls_sort_change == " NULLS FIRST" else "" this = ( f"CASE WHEN {this} IS NULL THEN 1 ELSE 0 END{null_sort_order}, {this}" ) nulls_sort_change = "" with_fill = self.sql(expression, "with_fill") with_fill = f" {with_fill}" if with_fill else "" return f"{this}{sort_order}{nulls_sort_change}{with_fill}" def matchrecognizemeasure_sql(self, expression: exp.MatchRecognizeMeasure) -> str: window_frame = self.sql(expression, "window_frame") window_frame = f"{window_frame} " if window_frame else "" this = self.sql(expression, "this") return f"{window_frame}{this}" def matchrecognize_sql(self, expression: exp.MatchRecognize) -> str: partition = self.partition_by_sql(expression) order = self.sql(expression, "order") measures = self.expressions(expression, key="measures") measures = self.seg(f"MEASURES{self.seg(measures)}") if measures else "" rows = self.sql(expression, "rows") rows = self.seg(rows) if rows else "" after = self.sql(expression, "after") after = self.seg(after) if after else "" pattern = self.sql(expression, "pattern") pattern = self.seg(f"PATTERN ({pattern})") if pattern else "" definition_sqls = [ f"{self.sql(definition, 'alias')} AS {self.sql(definition, 'this')}" for definition in expression.args.get("define", []) ] definitions = self.expressions(sqls=definition_sqls) define = self.seg(f"DEFINE{self.seg(definitions)}") if definitions else "" body = "".join( ( partition, order, measures, rows, after, pattern, define, ) ) alias = self.sql(expression, "alias") alias = f" {alias}" if alias else "" return f"{self.seg('MATCH_RECOGNIZE')} {self.wrap(body)}{alias}" def query_modifiers(self, expression: exp.Expr, *sqls: str) -> str: limit = expression.args.get("limit") if self.LIMIT_FETCH == "LIMIT" and isinstance(limit, exp.Fetch): limit = exp.Limit(expression=exp.maybe_copy(limit.args.get("count"))) elif self.LIMIT_FETCH == "FETCH" and isinstance(limit, exp.Limit): limit = exp.Fetch(direction="FIRST", count=exp.maybe_copy(limit.expression)) return csv( *sqls, *[self.sql(join) for join in expression.args.get("joins") or []], self.sql(expression, "match"), *[self.sql(lateral) for lateral in expression.args.get("laterals") or []], self.sql(expression, "prewhere"), self.sql(expression, "where"), self.sql(expression, "connect"), self.sql(expression, "group"), self.sql(expression, "having"), *[gen(self, expression) for gen in self.AFTER_HAVING_MODIFIER_TRANSFORMS.values()], self.sql(expression, "order"), *self.offset_limit_modifiers(expression, isinstance(limit, exp.Fetch), limit), *self.after_limit_modifiers(expression), self.options_modifier(expression), self.for_modifiers(expression), sep="", ) def options_modifier(self, expression: exp.Expr) -> str: options = self.expressions(expression, key="options") return f" {options}" if options else "" def for_modifiers(self, expression: exp.Expr) -> str: for_modifiers = self.expressions(expression, key="for_") return f"{self.sep()}FOR XML{self.seg(for_modifiers)}" if for_modifiers else "" def queryoption_sql(self, expression: exp.QueryOption) -> str: self.unsupported("Unsupported query option.") return "" def offset_limit_modifiers( self, expression: exp.Expr, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] ) -> t.List[str]: return [ self.sql(expression, "offset") if fetch else self.sql(limit), self.sql(limit) if fetch else self.sql(expression, "offset"), ] def after_limit_modifiers(self, expression: exp.Expr) -> t.List[str]: locks = self.expressions(expression, key="locks", sep=" ") locks = f" {locks}" if locks else "" return [locks, self.sql(expression, "sample")] def select_sql(self, expression: exp.Select) -> str: into = expression.args.get("into") if not self.SUPPORTS_SELECT_INTO and into: into.pop() hint = self.sql(expression, "hint") distinct = self.sql(expression, "distinct") distinct = f" {distinct}" if distinct else "" kind = self.sql(expression, "kind") limit = expression.args.get("limit") if isinstance(limit, exp.Limit) and self.LIMIT_IS_TOP: top = self.limit_sql(limit, top=True) limit.pop() else: top = "" expressions = self.expressions(expression) if kind: if kind in self.SELECT_KINDS: kind = f" AS {kind}" else: if kind == "STRUCT": expressions = self.expressions( sqls=[ self.sql( exp.Struct( expressions=[ exp.PropertyEQ(this=e.args.get("alias"), expression=e.this) if isinstance(e, exp.Alias) else e for e in expression.expressions ] ) ) ] ) kind = "" operation_modifiers = self.expressions(expression, key="operation_modifiers", sep=" ") operation_modifiers = f"{self.sep()}{operation_modifiers}" if operation_modifiers else "" exclude = expression.args.get("exclude") if not self.STAR_EXCLUDE_REQUIRES_DERIVED_TABLE and exclude: exclude_sql = self.expressions(sqls=exclude, flat=True) expressions = f"{expressions}{self.seg('EXCLUDE')} ({exclude_sql})" # We use LIMIT_IS_TOP as a proxy for whether DISTINCT should go first because tsql and Teradata # are the only dialects that use LIMIT_IS_TOP and both place DISTINCT first. top_distinct = f"{distinct}{hint}{top}" if self.LIMIT_IS_TOP else f"{top}{hint}{distinct}" expressions = f"{self.sep()}{expressions}" if expressions else expressions sql = self.query_modifiers( expression, f"SELECT{top_distinct}{operation_modifiers}{kind}{expressions}", self.sql(expression, "into", comment=False), self.sql(expression, "from_", comment=False), ) # If both the CTE and SELECT clauses have comments, generate the latter earlier if expression.args.get("with_"): sql = self.maybe_comment(sql, expression) expression.pop_comments() sql = self.prepend_ctes(expression, sql) if self.STAR_EXCLUDE_REQUIRES_DERIVED_TABLE and exclude: expression.set("exclude", None) subquery = expression.subquery(copy=False) star = exp.Star(except_=exclude) sql = self.sql(exp.select(star).from_(subquery, copy=False)) if not self.SUPPORTS_SELECT_INTO and into: if into.args.get("temporary"): table_kind = " TEMPORARY" elif self.SUPPORTS_UNLOGGED_TABLES and into.args.get("unlogged"): table_kind = " UNLOGGED" else: table_kind = "" sql = f"CREATE{table_kind} TABLE {self.sql(into.this)} AS {sql}" return sql def schema_sql(self, expression: exp.Schema) -> str: this = self.sql(expression, "this") sql = self.schema_columns_sql(expression) return f"{this} {sql}" if this and sql else this or sql def schema_columns_sql(self, expression: exp.Schema) -> str: if expression.expressions: return f"({self.sep('')}{self.expressions(expression)}{self.seg(')', sep='')}" return "" def star_sql(self, expression: exp.Star) -> str: except_ = self.expressions(expression, key="except_", flat=True) except_ = f"{self.seg(self.STAR_EXCEPT)} ({except_})" if except_ else "" replace = self.expressions(expression, key="replace", flat=True) replace = f"{self.seg('REPLACE')} ({replace})" if replace else "" rename = self.expressions(expression, key="rename", flat=True) rename = f"{self.seg('RENAME')} ({rename})" if rename else "" return f"*{except_}{replace}{rename}" def parameter_sql(self, expression: exp.Parameter) -> str: this = self.sql(expression, "this") return f"{self.PARAMETER_TOKEN}{this}" def sessionparameter_sql(self, expression: exp.SessionParameter) -> str: this = self.sql(expression, "this") kind = expression.text("kind") if kind: kind = f"{kind}." return f"@@{kind}{this}" def placeholder_sql(self, expression: exp.Placeholder) -> str: return f"{self.NAMED_PLACEHOLDER_TOKEN}{expression.name}" if expression.this else "?" def subquery_sql(self, expression: exp.Subquery, sep: str = " AS ") -> str: alias = self.sql(expression, "alias") alias = f"{sep}{alias}" if alias else "" sample = self.sql(expression, "sample") if self.dialect.ALIAS_POST_TABLESAMPLE and sample: alias = f"{sample}{alias}" # Set to None so it's not generated again by self.query_modifiers() expression.set("sample", None) pivots = self.expressions(expression, key="pivots", sep="", flat=True) sql = self.query_modifiers(expression, self.wrap(expression), alias, pivots) return self.prepend_ctes(expression, sql) def qualify_sql(self, expression: exp.Qualify) -> str: this = self.indent(self.sql(expression, "this")) return f"{self.seg('QUALIFY')}{self.sep()}{this}" def unnest_sql(self, expression: exp.Unnest) -> str: args = self.expressions(expression, flat=True) alias = expression.args.get("alias") offset = expression.args.get("offset") if self.UNNEST_WITH_ORDINALITY: if alias and isinstance(offset, exp.Expr): alias.append("columns", offset) if alias and self.dialect.UNNEST_COLUMN_ONLY: columns = alias.columns alias = self.sql(columns[0]) if columns else "" else: alias = self.sql(alias) alias = f" AS {alias}" if alias else alias if self.UNNEST_WITH_ORDINALITY: suffix = f" WITH ORDINALITY{alias}" if offset else alias else: if isinstance(offset, exp.Expr): suffix = f"{alias} WITH OFFSET AS {self.sql(offset)}" elif offset: suffix = f"{alias} WITH OFFSET" else: suffix = alias return f"UNNEST({args}){suffix}" def prewhere_sql(self, expression: exp.PreWhere) -> str: return "" def where_sql(self, expression: exp.Where) -> str: this = self.indent(self.sql(expression, "this")) return f"{self.seg('WHERE')}{self.sep()}{this}" def window_sql(self, expression: exp.Window) -> str: this = self.sql(expression, "this") partition = self.partition_by_sql(expression) order = expression.args.get("order") order = self.order_sql(order, flat=True) if order else "" spec = self.sql(expression, "spec") alias = self.sql(expression, "alias") over = self.sql(expression, "over") or "OVER" this = f"{this} {'AS' if expression.arg_key == 'windows' else over}" first = expression.args.get("first") if first is None: first = "" else: first = "FIRST" if first else "LAST" if not partition and not order and not spec and alias: return f"{this} {alias}" args = self.format_args( *[arg for arg in (alias, first, partition, order, spec) if arg], sep=" " ) return f"{this} ({args})" def partition_by_sql(self, expression: exp.Window | exp.MatchRecognize) -> str: partition = self.expressions(expression, key="partition_by", flat=True) return f"PARTITION BY {partition}" if partition else "" def windowspec_sql(self, expression: exp.WindowSpec) -> str: kind = self.sql(expression, "kind") start = csv(self.sql(expression, "start"), self.sql(expression, "start_side"), sep=" ") end = ( csv(self.sql(expression, "end"), self.sql(expression, "end_side"), sep=" ") or "CURRENT ROW" ) window_spec = f"{kind} BETWEEN {start} AND {end}" exclude = self.sql(expression, "exclude") if exclude: if self.SUPPORTS_WINDOW_EXCLUDE: window_spec += f" EXCLUDE {exclude}" else: self.unsupported("EXCLUDE clause is not supported in the WINDOW clause") return window_spec def withingroup_sql(self, expression: exp.WithinGroup) -> str: this = self.sql(expression, "this") expression_sql = self.sql(expression, "expression")[1:] # order has a leading space return f"{this} WITHIN GROUP ({expression_sql})" def between_sql(self, expression: exp.Between) -> str: this = self.sql(expression, "this") low = self.sql(expression, "low") high = self.sql(expression, "high") symmetric = expression.args.get("symmetric") if symmetric and not self.SUPPORTS_BETWEEN_FLAGS: return f"({this} BETWEEN {low} AND {high} OR {this} BETWEEN {high} AND {low})" flag = ( " SYMMETRIC" if symmetric else " ASYMMETRIC" if symmetric is False and self.SUPPORTS_BETWEEN_FLAGS else "" # silently drop ASYMMETRIC – semantics identical ) return f"{this} BETWEEN{flag} {low} AND {high}" def bracket_offset_expressions( self, expression: exp.Bracket, index_offset: t.Optional[int] = None ) -> t.List[exp.Expr]: return apply_index_offset( expression.this, expression.expressions, (index_offset or self.dialect.INDEX_OFFSET) - expression.args.get("offset", 0), dialect=self.dialect, ) def bracket_sql(self, expression: exp.Bracket) -> str: expressions = self.bracket_offset_expressions(expression) expressions_sql = ", ".join(self.sql(e) for e in expressions) return f"{self.sql(expression, 'this')}[{expressions_sql}]" def all_sql(self, expression: exp.All) -> str: this = self.sql(expression, "this") if not isinstance(expression.this, (exp.Tuple, exp.Paren)): this = self.wrap(this) return f"ALL {this}" def any_sql(self, expression: exp.Any) -> str: this = self.sql(expression, "this") if isinstance(expression.this, (*exp.UNWRAPPED_QUERIES, exp.Paren)): if isinstance(expression.this, exp.UNWRAPPED_QUERIES): this = self.wrap(this) return f"ANY{this}" return f"ANY {this}" def exists_sql(self, expression: exp.Exists) -> str: return f"EXISTS{self.wrap(expression)}" def case_sql(self, expression: exp.Case) -> str: this = self.sql(expression, "this") statements = [f"CASE {this}" if this else "CASE"] for e in expression.args["ifs"]: statements.append(f"WHEN {self.sql(e, 'this')}") statements.append(f"THEN {self.sql(e, 'true')}") default = self.sql(expression, "default") if default: statements.append(f"ELSE {default}") statements.append("END") if self.pretty and self.too_wide(statements): return self.indent("\n".join(statements), skip_first=True, skip_last=True) return " ".join(statements) def constraint_sql(self, expression: exp.Constraint) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression, flat=True) return f"CONSTRAINT {this} {expressions}" def nextvaluefor_sql(self, expression: exp.NextValueFor) -> str: order = expression.args.get("order") order = f" OVER ({self.order_sql(order, flat=True)})" if order else "" return f"NEXT VALUE FOR {self.sql(expression, 'this')}{order}" def extract_sql(self, expression: exp.Extract) -> str: from sqlglot.dialects.dialect import map_date_part this = ( map_date_part(expression.this, self.dialect) if self.NORMALIZE_EXTRACT_DATE_PARTS else expression.this ) this_sql = self.sql(this) if self.EXTRACT_ALLOWS_QUOTES else this.name expression_sql = self.sql(expression, "expression") return f"EXTRACT({this_sql} FROM {expression_sql})" def trim_sql(self, expression: exp.Trim) -> str: trim_type = self.sql(expression, "position") if trim_type == "LEADING": func_name = "LTRIM" elif trim_type == "TRAILING": func_name = "RTRIM" else: func_name = "TRIM" return self.func(func_name, expression.this, expression.expression) def convert_concat_args(self, expression: exp.Concat | exp.ConcatWs) -> t.List[exp.Expr]: args = expression.expressions if isinstance(expression, exp.ConcatWs): args = args[1:] # Skip the delimiter if self.dialect.STRICT_STRING_CONCAT and expression.args.get("safe"): args = [exp.cast(e, exp.DType.TEXT) for e in args] if not self.dialect.CONCAT_COALESCE and expression.args.get("coalesce"): def _wrap_with_coalesce(e: exp.Expr) -> exp.Expr: if not e.type: from sqlglot.optimizer.annotate_types import annotate_types e = annotate_types(e, dialect=self.dialect) if e.is_string or e.is_type(exp.DType.ARRAY): return e return exp.func("coalesce", e, exp.Literal.string("")) args = [_wrap_with_coalesce(e) for e in args] return args def concat_sql(self, expression: exp.Concat) -> str: if self.dialect.CONCAT_COALESCE and not expression.args.get("coalesce"): # Dialect's CONCAT function coalesces NULLs to empty strings, but the expression does not. # Transpile to double pipe operators, which typically returns NULL if any args are NULL # instead of coalescing them to empty string. from sqlglot.dialects.dialect import concat_to_dpipe_sql return concat_to_dpipe_sql(self, expression) expressions = self.convert_concat_args(expression) # Some dialects don't allow a single-argument CONCAT call if not self.SUPPORTS_SINGLE_ARG_CONCAT and len(expressions) == 1: return self.sql(expressions[0]) return self.func("CONCAT", *expressions) def concatws_sql(self, expression: exp.ConcatWs) -> str: return self.func( "CONCAT_WS", seq_get(expression.expressions, 0), *self.convert_concat_args(expression) ) def check_sql(self, expression: exp.Check) -> str: this = self.sql(expression, key="this") return f"CHECK ({this})" def foreignkey_sql(self, expression: exp.ForeignKey) -> str: expressions = self.expressions(expression, flat=True) expressions = f" ({expressions})" if expressions else "" reference = self.sql(expression, "reference") reference = f" {reference}" if reference else "" delete = self.sql(expression, "delete") delete = f" ON DELETE {delete}" if delete else "" update = self.sql(expression, "update") update = f" ON UPDATE {update}" if update else "" options = self.expressions(expression, key="options", flat=True, sep=" ") options = f" {options}" if options else "" return f"FOREIGN KEY{expressions}{reference}{delete}{update}{options}" def primarykey_sql(self, expression: exp.PrimaryKey) -> str: this = self.sql(expression, "this") this = f" {this}" if this else "" expressions = self.expressions(expression, flat=True) include = self.sql(expression, "include") options = self.expressions(expression, key="options", flat=True, sep=" ") options = f" {options}" if options else "" return f"PRIMARY KEY{this} ({expressions}){include}{options}" def if_sql(self, expression: exp.If) -> str: return self.case_sql(exp.Case(ifs=[expression], default=expression.args.get("false"))) def matchagainst_sql(self, expression: exp.MatchAgainst) -> str: if self.MATCH_AGAINST_TABLE_PREFIX: expressions = [] for expr in expression.expressions: if isinstance(expr, exp.Table): expressions.append(f"TABLE {self.sql(expr)}") else: expressions.append(expr) else: expressions = expression.expressions modifier = expression.args.get("modifier") modifier = f" {modifier}" if modifier else "" return ( f"{self.func('MATCH', *expressions)} AGAINST({self.sql(expression, 'this')}{modifier})" ) def jsonkeyvalue_sql(self, expression: exp.JSONKeyValue) -> str: return f"{self.sql(expression, 'this')}{self.JSON_KEY_VALUE_PAIR_SEP} {self.sql(expression, 'expression')}" def jsonpath_sql(self, expression: exp.JSONPath) -> str: path = self.expressions(expression, sep="", flat=True).lstrip(".") if expression.args.get("escape"): path = self.escape_str(path) if self.QUOTE_JSON_PATH: path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}" return path def json_path_part(self, expression: int | str | exp.JSONPathPart) -> str: if isinstance(expression, exp.JSONPathPart): transform = self.TRANSFORMS.get(expression.__class__) if not callable(transform): self.unsupported(f"Unsupported JSONPathPart type {expression.__class__.__name__}") return "" return transform(self, expression) if isinstance(expression, int): return str(expression) if self._quote_json_path_key_using_brackets and self.JSON_PATH_SINGLE_QUOTE_ESCAPE: escaped = expression.replace("'", "\\'") escaped = f"\\'{expression}\\'" else: escaped = expression.replace('"', '\\"') escaped = f'"{escaped}"' return escaped def formatjson_sql(self, expression: exp.FormatJson) -> str: return f"{self.sql(expression, 'this')} FORMAT JSON" def formatphrase_sql(self, expression: exp.FormatPhrase) -> str: # Output the Teradata column FORMAT override. # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Data-Types-and-Literals/Data-Type-Formats-and-Format-Phrases/FORMAT this = self.sql(expression, "this") fmt = self.sql(expression, "format") return f"{this} (FORMAT {fmt})" def _jsonobject_sql( self, expression: exp.JSONObject | exp.JSONObjectAgg, name: str = "" ) -> str: null_handling = expression.args.get("null_handling") null_handling = f" {null_handling}" if null_handling else "" unique_keys = expression.args.get("unique_keys") if unique_keys is not None: unique_keys = f" {'WITH' if unique_keys else 'WITHOUT'} UNIQUE KEYS" else: unique_keys = "" return_type = self.sql(expression, "return_type") return_type = f" RETURNING {return_type}" if return_type else "" encoding = self.sql(expression, "encoding") encoding = f" ENCODING {encoding}" if encoding else "" if not name: name = "JSON_OBJECT" if isinstance(expression, exp.JSONObject) else "JSON_OBJECTAGG" return self.func( name, *expression.expressions, suffix=f"{null_handling}{unique_keys}{return_type}{encoding})", ) def jsonarray_sql(self, expression: exp.JSONArray) -> str: null_handling = expression.args.get("null_handling") null_handling = f" {null_handling}" if null_handling else "" return_type = self.sql(expression, "return_type") return_type = f" RETURNING {return_type}" if return_type else "" strict = " STRICT" if expression.args.get("strict") else "" return self.func( "JSON_ARRAY", *expression.expressions, suffix=f"{null_handling}{return_type}{strict})" ) def jsonarrayagg_sql(self, expression: exp.JSONArrayAgg) -> str: this = self.sql(expression, "this") order = self.sql(expression, "order") null_handling = expression.args.get("null_handling") null_handling = f" {null_handling}" if null_handling else "" return_type = self.sql(expression, "return_type") return_type = f" RETURNING {return_type}" if return_type else "" strict = " STRICT" if expression.args.get("strict") else "" return self.func( "JSON_ARRAYAGG", this, suffix=f"{order}{null_handling}{return_type}{strict})", ) def jsoncolumndef_sql(self, expression: exp.JSONColumnDef) -> str: path = self.sql(expression, "path") path = f" PATH {path}" if path else "" nested_schema = self.sql(expression, "nested_schema") if nested_schema: return f"NESTED{path} {nested_schema}" this = self.sql(expression, "this") kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" ordinality = " FOR ORDINALITY" if expression.args.get("ordinality") else "" return f"{this}{kind}{path}{ordinality}" def jsonschema_sql(self, expression: exp.JSONSchema) -> str: return self.func("COLUMNS", *expression.expressions) def jsontable_sql(self, expression: exp.JSONTable) -> str: this = self.sql(expression, "this") path = self.sql(expression, "path") path = f", {path}" if path else "" error_handling = expression.args.get("error_handling") error_handling = f" {error_handling}" if error_handling else "" empty_handling = expression.args.get("empty_handling") empty_handling = f" {empty_handling}" if empty_handling else "" schema = self.sql(expression, "schema") return self.func( "JSON_TABLE", this, suffix=f"{path}{error_handling}{empty_handling} {schema})" ) def openjsoncolumndef_sql(self, expression: exp.OpenJSONColumnDef) -> str: this = self.sql(expression, "this") kind = self.sql(expression, "kind") path = self.sql(expression, "path") path = f" {path}" if path else "" as_json = " AS JSON" if expression.args.get("as_json") else "" return f"{this} {kind}{path}{as_json}" def openjson_sql(self, expression: exp.OpenJSON) -> str: this = self.sql(expression, "this") path = self.sql(expression, "path") path = f", {path}" if path else "" expressions = self.expressions(expression) with_ = ( f" WITH ({self.seg(self.indent(expressions), sep='')}{self.seg(')', sep='')}" if expressions else "" ) return f"OPENJSON({this}{path}){with_}" def in_sql(self, expression: exp.In) -> str: query = expression.args.get("query") unnest = expression.args.get("unnest") field = expression.args.get("field") is_global = " GLOBAL" if expression.args.get("is_global") else "" if query: in_sql = self.sql(query) elif unnest: in_sql = self.in_unnest_op(unnest) elif field: in_sql = self.sql(field) else: in_sql = f"({self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)})" return f"{self.sql(expression, 'this')}{is_global} IN {in_sql}" def in_unnest_op(self, unnest: exp.Unnest) -> str: return f"(SELECT {self.sql(unnest)})" def interval_sql(self, expression: exp.Interval) -> str: unit_expression = expression.args.get("unit") unit = self.sql(unit_expression) if unit_expression else "" if not self.INTERVAL_ALLOWS_PLURAL_FORM: unit = self.TIME_PART_SINGULARS.get(unit, unit) unit = f" {unit}" if unit else "" if self.SINGLE_STRING_INTERVAL: this = expression.this.name if expression.this else "" if this: if unit_expression and isinstance(unit_expression, exp.IntervalSpan): return f"INTERVAL '{this}'{unit}" return f"INTERVAL '{this}{unit}'" return f"INTERVAL{unit}" this = self.sql(expression, "this") if this: unwrapped = isinstance(expression.this, self.UNWRAPPED_INTERVAL_VALUES) this = f" {this}" if unwrapped else f" ({this})" return f"INTERVAL{this}{unit}" def return_sql(self, expression: exp.Return) -> str: return f"RETURN {self.sql(expression, 'this')}" def reference_sql(self, expression: exp.Reference) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression, flat=True) expressions = f"({expressions})" if expressions else "" options = self.expressions(expression, key="options", flat=True, sep=" ") options = f" {options}" if options else "" return f"REFERENCES {this}{expressions}{options}" def anonymous_sql(self, expression: exp.Anonymous) -> str: # We don't normalize qualified functions such as a.b.foo(), because they can be case-sensitive parent = expression.parent is_qualified = isinstance(parent, exp.Dot) and expression is parent.expression return self.func( self.sql(expression, "this"), *expression.expressions, normalize=not is_qualified ) def paren_sql(self, expression: exp.Paren) -> str: sql = self.seg(self.indent(self.sql(expression, "this")), sep="") return f"({sql}{self.seg(')', sep='')}" def neg_sql(self, expression: exp.Neg) -> str: # This makes sure we don't convert "- - 5" to "--5", which is a comment this_sql = self.sql(expression, "this") sep = " " if this_sql[0] == "-" else "" return f"-{sep}{this_sql}" def not_sql(self, expression: exp.Not) -> str: return f"NOT {self.sql(expression, 'this')}" def alias_sql(self, expression: exp.Alias) -> str: alias = self.sql(expression, "alias") alias = f" AS {alias}" if alias else "" return f"{self.sql(expression, 'this')}{alias}" def pivotalias_sql(self, expression: exp.PivotAlias) -> str: alias = expression.args["alias"] parent = expression.parent pivot = parent and parent.parent if isinstance(pivot, exp.Pivot) and pivot.unpivot: identifier_alias = isinstance(alias, exp.Identifier) literal_alias = isinstance(alias, exp.Literal) if identifier_alias and not self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: alias.replace(exp.Literal.string(alias.output_name)) elif not identifier_alias and literal_alias and self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: alias.replace(exp.to_identifier(alias.output_name)) return self.alias_sql(expression) def aliases_sql(self, expression: exp.Aliases) -> str: return f"{self.sql(expression, 'this')} AS ({self.expressions(expression, flat=True)})" def atindex_sql(self, expression: exp.AtTimeZone) -> str: this = self.sql(expression, "this") index = self.sql(expression, "expression") return f"{this} AT {index}" def attimezone_sql(self, expression: exp.AtTimeZone) -> str: this = self.sql(expression, "this") zone = self.sql(expression, "zone") return f"{this} AT TIME ZONE {zone}" def fromtimezone_sql(self, expression: exp.FromTimeZone) -> str: this = self.sql(expression, "this") zone = self.sql(expression, "zone") return f"{this} AT TIME ZONE {zone} AT TIME ZONE 'UTC'" def add_sql(self, expression: exp.Add) -> str: return self.binary(expression, "+") def and_sql(self, expression: exp.And, stack: t.Optional[t.List[str | exp.Expr]] = None) -> str: return self.connector_sql(expression, "AND", stack) def or_sql(self, expression: exp.Or, stack: t.Optional[t.List[str | exp.Expr]] = None) -> str: return self.connector_sql(expression, "OR", stack) def xor_sql(self, expression: exp.Xor, stack: t.Optional[t.List[str | exp.Expr]] = None) -> str: return self.connector_sql(expression, "XOR", stack) def connector_sql( self, expression: exp.Connector, op: str, stack: t.Optional[t.List[str | exp.Expr]] = None, ) -> str: if stack is not None: if expression.expressions: stack.append(self.expressions(expression, sep=f" {op} ")) else: stack.append(expression.right) if expression.comments and self.comments: for comment in expression.comments: if comment: op += f" /*{self.sanitize_comment(comment)}*/" stack.extend((op, expression.left)) return op stack = [expression] sqls: t.List[str] = [] ops = set() while stack: node = stack.pop() if isinstance(node, exp.Connector): ops.add(getattr(self, f"{node.key}_sql")(node, stack)) else: sql = self.sql(node) if sqls and sqls[-1] in ops: sqls[-1] += f" {sql}" else: sqls.append(sql) sep = "\n" if self.pretty and self.too_wide(sqls) else " " return sep.join(sqls) def bitwiseand_sql(self, expression: exp.BitwiseAnd) -> str: return self.binary(expression, "&") def bitwiseleftshift_sql(self, expression: exp.BitwiseLeftShift) -> str: return self.binary(expression, "<<") def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str: return f"~{self.sql(expression, 'this')}" def bitwiseor_sql(self, expression: exp.BitwiseOr) -> str: return self.binary(expression, "|") def bitwiserightshift_sql(self, expression: exp.BitwiseRightShift) -> str: return self.binary(expression, ">>") def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str: return self.binary(expression, "^") def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: format_sql = self.sql(expression, "format") format_sql = f" FORMAT {format_sql}" if format_sql else "" to_sql = self.sql(expression, "to") to_sql = f" {to_sql}" if to_sql else "" action = self.sql(expression, "action") action = f" {action}" if action else "" default = self.sql(expression, "default") default = f" DEFAULT {default} ON CONVERSION ERROR" if default else "" return f"{safe_prefix or ''}CAST({self.sql(expression, 'this')} AS{to_sql}{default}{format_sql}{action})" # Base implementation that excludes safe, zone, and target_type metadata args def strtotime_sql(self, expression: exp.StrToTime) -> str: return self.func("STR_TO_TIME", expression.this, expression.args.get("format")) def currentdate_sql(self, expression: exp.CurrentDate) -> str: zone = self.sql(expression, "this") return f"CURRENT_DATE({zone})" if zone else "CURRENT_DATE" def collate_sql(self, expression: exp.Collate) -> str: if self.COLLATE_IS_FUNC: return self.function_fallback_sql(expression) return self.binary(expression, "COLLATE") def command_sql(self, expression: exp.Command) -> str: return f"{self.sql(expression, 'this')} {expression.text('expression').strip()}" def comment_sql(self, expression: exp.Comment) -> str: this = self.sql(expression, "this") kind = expression.args["kind"] materialized = " MATERIALIZED" if expression.args.get("materialized") else "" exists_sql = " IF EXISTS " if expression.args.get("exists") else " " expression_sql = self.sql(expression, "expression") return f"COMMENT{exists_sql}ON{materialized} {kind} {this} IS {expression_sql}" def mergetreettlaction_sql(self, expression: exp.MergeTreeTTLAction) -> str: this = self.sql(expression, "this") delete = " DELETE" if expression.args.get("delete") else "" recompress = self.sql(expression, "recompress") recompress = f" RECOMPRESS {recompress}" if recompress else "" to_disk = self.sql(expression, "to_disk") to_disk = f" TO DISK {to_disk}" if to_disk else "" to_volume = self.sql(expression, "to_volume") to_volume = f" TO VOLUME {to_volume}" if to_volume else "" return f"{this}{delete}{recompress}{to_disk}{to_volume}" def mergetreettl_sql(self, expression: exp.MergeTreeTTL) -> str: where = self.sql(expression, "where") group = self.sql(expression, "group") aggregates = self.expressions(expression, key="aggregates") aggregates = self.seg("SET") + self.seg(aggregates) if aggregates else "" if not (where or group or aggregates) and len(expression.expressions) == 1: return f"TTL {self.expressions(expression, flat=True)}" return f"TTL{self.seg(self.expressions(expression))}{where}{group}{aggregates}" def transaction_sql(self, expression: exp.Transaction) -> str: modes = self.expressions(expression, key="modes") modes = f" {modes}" if modes else "" return f"BEGIN{modes}" def commit_sql(self, expression: exp.Commit) -> str: chain = expression.args.get("chain") if chain is not None: chain = " AND CHAIN" if chain else " AND NO CHAIN" return f"COMMIT{chain or ''}" def rollback_sql(self, expression: exp.Rollback) -> str: savepoint = expression.args.get("savepoint") savepoint = f" TO {savepoint}" if savepoint else "" return f"ROLLBACK{savepoint}" def altercolumn_sql(self, expression: exp.AlterColumn) -> str: this = self.sql(expression, "this") dtype = self.sql(expression, "dtype") if dtype: collate = self.sql(expression, "collate") collate = f" COLLATE {collate}" if collate else "" using = self.sql(expression, "using") using = f" USING {using}" if using else "" alter_set_type = self.ALTER_SET_TYPE + " " if self.ALTER_SET_TYPE else "" return f"ALTER COLUMN {this} {alter_set_type}{dtype}{collate}{using}" default = self.sql(expression, "default") if default: return f"ALTER COLUMN {this} SET DEFAULT {default}" comment = self.sql(expression, "comment") if comment: return f"ALTER COLUMN {this} COMMENT {comment}" visible = expression.args.get("visible") if visible: return f"ALTER COLUMN {this} SET {visible}" allow_null = expression.args.get("allow_null") drop = expression.args.get("drop") if not drop and not allow_null: self.unsupported("Unsupported ALTER COLUMN syntax") if allow_null is not None: keyword = "DROP" if drop else "SET" return f"ALTER COLUMN {this} {keyword} NOT NULL" return f"ALTER COLUMN {this} DROP DEFAULT" def alterindex_sql(self, expression: exp.AlterIndex) -> str: this = self.sql(expression, "this") visible = expression.args.get("visible") visible_sql = "VISIBLE" if visible else "INVISIBLE" return f"ALTER INDEX {this} {visible_sql}" def alterdiststyle_sql(self, expression: exp.AlterDistStyle) -> str: this = self.sql(expression, "this") if not isinstance(expression.this, exp.Var): this = f"KEY DISTKEY {this}" return f"ALTER DISTSTYLE {this}" def altersortkey_sql(self, expression: exp.AlterSortKey) -> str: compound = " COMPOUND" if expression.args.get("compound") else "" this = self.sql(expression, "this") expressions = self.expressions(expression, flat=True) expressions = f"({expressions})" if expressions else "" return f"ALTER{compound} SORTKEY {this or expressions}" def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str: if not self.RENAME_TABLE_WITH_DB: # Remove db from tables expression = expression.transform( lambda n: exp.table_(n.this) if isinstance(n, exp.Table) else n ).assert_is(exp.AlterRename) this = self.sql(expression, "this") to_kw = " TO" if include_to else "" return f"RENAME{to_kw} {this}" def renamecolumn_sql(self, expression: exp.RenameColumn) -> str: exists = " IF EXISTS" if expression.args.get("exists") else "" old_column = self.sql(expression, "this") new_column = self.sql(expression, "to") return f"RENAME COLUMN{exists} {old_column} TO {new_column}" def alterset_sql(self, expression: exp.AlterSet) -> str: exprs = self.expressions(expression, flat=True) if self.ALTER_SET_WRAPPED: exprs = f"({exprs})" return f"SET {exprs}" def alter_sql(self, expression: exp.Alter) -> str: actions = expression.args["actions"] if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and isinstance( actions[0], exp.ColumnDef ): actions_sql = self.expressions(expression, key="actions", flat=True) actions_sql = f"ADD {actions_sql}" else: actions_list = [] for action in actions: if isinstance(action, (exp.ColumnDef, exp.Schema)): action_sql = self.add_column_sql(action) else: action_sql = self.sql(action) if isinstance(action, exp.Query): action_sql = f"AS {action_sql}" actions_list.append(action_sql) actions_sql = self.format_args(*actions_list).lstrip("\n") exists = " IF EXISTS" if expression.args.get("exists") else "" on_cluster = self.sql(expression, "cluster") on_cluster = f" {on_cluster}" if on_cluster else "" only = " ONLY" if expression.args.get("only") else "" options = self.expressions(expression, key="options") options = f", {options}" if options else "" kind = self.sql(expression, "kind") not_valid = " NOT VALID" if expression.args.get("not_valid") else "" check = " WITH CHECK" if expression.args.get("check") else "" cascade = ( " CASCADE" if expression.args.get("cascade") and self.dialect.ALTER_TABLE_SUPPORTS_CASCADE else "" ) this = self.sql(expression, "this") this = f" {this}" if this else "" return f"ALTER {kind}{exists}{only}{this}{on_cluster}{check}{self.sep()}{actions_sql}{not_valid}{options}{cascade}" def altersession_sql(self, expression: exp.AlterSession) -> str: items_sql = self.expressions(expression, flat=True) keyword = "UNSET" if expression.args.get("unset") else "SET" return f"{keyword} {items_sql}" def add_column_sql(self, expression: exp.Expr) -> str: sql = self.sql(expression) if isinstance(expression, exp.Schema): column_text = " COLUMNS" elif isinstance(expression, exp.ColumnDef) and self.ALTER_TABLE_INCLUDE_COLUMN_KEYWORD: column_text = " COLUMN" else: column_text = "" return f"ADD{column_text} {sql}" def droppartition_sql(self, expression: exp.DropPartition) -> str: expressions = self.expressions(expression) exists = " IF EXISTS " if expression.args.get("exists") else " " return f"DROP{exists}{expressions}" def addconstraint_sql(self, expression: exp.AddConstraint) -> str: return f"ADD {self.expressions(expression, indent=False)}" def addpartition_sql(self, expression: exp.AddPartition) -> str: exists = "IF NOT EXISTS " if expression.args.get("exists") else "" location = self.sql(expression, "location") location = f" {location}" if location else "" return f"ADD {exists}{self.sql(expression.this)}{location}" def distinct_sql(self, expression: exp.Distinct) -> str: this = self.expressions(expression, flat=True) if not self.MULTI_ARG_DISTINCT and len(expression.expressions) > 1: case = exp.case() for arg in expression.expressions: case = case.when(arg.is_(exp.null()), exp.null()) this = self.sql(case.else_(f"({this})")) this = f" {this}" if this else "" on = self.sql(expression, "on") on = f" ON {on}" if on else "" return f"DISTINCT{this}{on}" def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: return self._embed_ignore_nulls(expression, "IGNORE NULLS") def respectnulls_sql(self, expression: exp.RespectNulls) -> str: return self._embed_ignore_nulls(expression, "RESPECT NULLS") def havingmax_sql(self, expression: exp.HavingMax) -> str: this_sql = self.sql(expression, "this") expression_sql = self.sql(expression, "expression") kind = "MAX" if expression.args.get("max") else "MIN" return f"{this_sql} HAVING {kind} {expression_sql}" def intdiv_sql(self, expression: exp.IntDiv) -> str: return self.sql( exp.Cast( this=exp.Div(this=expression.this, expression=expression.expression), to=exp.DataType(this=exp.DType.INT), ) ) def dpipe_sql(self, expression: exp.DPipe) -> str: if self.dialect.STRICT_STRING_CONCAT and expression.args.get("safe"): return self.func("CONCAT", *(exp.cast(e, exp.DType.TEXT) for e in expression.flatten())) return self.binary(expression, "||") def div_sql(self, expression: exp.Div) -> str: l, r = expression.left, expression.right if not self.dialect.SAFE_DIVISION and expression.args.get("safe"): r.replace(exp.Nullif(this=r.copy(), expression=exp.Literal.number(0))) if self.dialect.TYPED_DIVISION and not expression.args.get("typed"): if not l.is_type(*exp.DataType.REAL_TYPES) and not r.is_type(*exp.DataType.REAL_TYPES): l.replace(exp.cast(l.copy(), to=exp.DType.DOUBLE)) elif not self.dialect.TYPED_DIVISION and expression.args.get("typed"): if l.is_type(*exp.DataType.INTEGER_TYPES) and r.is_type(*exp.DataType.INTEGER_TYPES): return self.sql( exp.cast( l / r, to=exp.DType.BIGINT, ) ) return self.binary(expression, "/") def safedivide_sql(self, expression: exp.SafeDivide) -> str: n = exp._wrap(expression.this, exp.Binary) d = exp._wrap(expression.expression, exp.Binary) return self.sql(exp.If(this=d.neq(0), true=n / d, false=exp.Null())) def overlaps_sql(self, expression: exp.Overlaps) -> str: return self.binary(expression, "OVERLAPS") def distance_sql(self, expression: exp.Distance) -> str: return self.binary(expression, "<->") def dot_sql(self, expression: exp.Dot) -> str: return f"{self.sql(expression, 'this')}.{self.sql(expression, 'expression')}" def eq_sql(self, expression: exp.EQ) -> str: return self.binary(expression, "=") def propertyeq_sql(self, expression: exp.PropertyEQ) -> str: return self.binary(expression, ":=") def escape_sql(self, expression: exp.Escape) -> str: return self.binary(expression, "ESCAPE") def glob_sql(self, expression: exp.Glob) -> str: return self.binary(expression, "GLOB") def gt_sql(self, expression: exp.GT) -> str: return self.binary(expression, ">") def gte_sql(self, expression: exp.GTE) -> str: return self.binary(expression, ">=") def is_sql(self, expression: exp.Is) -> str: if not self.IS_BOOL_ALLOWED and isinstance(expression.expression, exp.Boolean): return self.sql( expression.this if expression.expression.this else exp.not_(expression.this) ) return self.binary(expression, "IS") def _like_sql(self, expression: exp.Like | exp.ILike) -> str: this = expression.this rhs = expression.expression if isinstance(expression, exp.Like): exp_class: t.Type[exp.Like | exp.ILike] = exp.Like op = "LIKE" else: exp_class = exp.ILike op = "ILIKE" if isinstance(rhs, (exp.All, exp.Any)) and not self.SUPPORTS_LIKE_QUANTIFIERS: exprs = rhs.this.unnest() if isinstance(exprs, exp.Tuple): exprs = exprs.expressions else: exprs = [exprs] connective = exp.or_ if isinstance(rhs, exp.Any) else exp.and_ like_expr: exp.Expr = exp_class(this=this, expression=exprs[0]) for expr in exprs[1:]: like_expr = connective(like_expr, exp_class(this=this, expression=expr)) parent = expression.parent if not isinstance(parent, type(like_expr)) and isinstance(parent, exp.Condition): like_expr = exp.paren(like_expr, copy=False) return self.sql(like_expr) return self.binary(expression, op) def like_sql(self, expression: exp.Like) -> str: return self._like_sql(expression) def ilike_sql(self, expression: exp.ILike) -> str: return self._like_sql(expression) def match_sql(self, expression: exp.Match) -> str: return self.binary(expression, "MATCH") def similarto_sql(self, expression: exp.SimilarTo) -> str: return self.binary(expression, "SIMILAR TO") def lt_sql(self, expression: exp.LT) -> str: return self.binary(expression, "<") def lte_sql(self, expression: exp.LTE) -> str: return self.binary(expression, "<=") def mod_sql(self, expression: exp.Mod) -> str: return self.binary(expression, "%") def mul_sql(self, expression: exp.Mul) -> str: return self.binary(expression, "*") def neq_sql(self, expression: exp.NEQ) -> str: return self.binary(expression, "<>") def nullsafeeq_sql(self, expression: exp.NullSafeEQ) -> str: return self.binary(expression, "IS NOT DISTINCT FROM") def nullsafeneq_sql(self, expression: exp.NullSafeNEQ) -> str: return self.binary(expression, "IS DISTINCT FROM") def sub_sql(self, expression: exp.Sub) -> str: return self.binary(expression, "-") def trycast_sql(self, expression: exp.TryCast) -> str: return self.cast_sql(expression, safe_prefix="TRY_") def jsoncast_sql(self, expression: exp.JSONCast) -> str: return self.cast_sql(expression) def try_sql(self, expression: exp.Try) -> str: if not self.TRY_SUPPORTED: self.unsupported("Unsupported TRY function") return self.sql(expression, "this") return self.func("TRY", expression.this) def log_sql(self, expression: exp.Log) -> str: this = expression.this expr = expression.expression if self.dialect.LOG_BASE_FIRST is False: this, expr = expr, this elif self.dialect.LOG_BASE_FIRST is None and expr: if this.name in ("2", "10"): return self.func(f"LOG{this.name}", expr) self.unsupported(f"Unsupported logarithm with base {self.sql(this)}") return self.func("LOG", this, expr) def use_sql(self, expression: exp.Use) -> str: kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" this = self.sql(expression, "this") or self.expressions(expression, flat=True) this = f" {this}" if this else "" return f"USE{kind}{this}" def binary(self, expression: exp.Binary, op: str) -> str: sqls: t.List[str] = [] stack: t.List[None | str | exp.Expr] = [expression] binary_type = type(expression) while stack: node = stack.pop() if type(node) is binary_type: op_func = node.args.get("operator") if op_func: op = f"OPERATOR({self.sql(op_func)})" stack.append(node.args.get("expression")) stack.append(f" {self.maybe_comment(op, comments=node.comments)} ") stack.append(node.args.get("this")) else: sqls.append(self.sql(node)) return "".join(sqls) def ceil_floor(self, expression: exp.Ceil | exp.Floor) -> str: to_clause = self.sql(expression, "to") if to_clause: return f"{expression.sql_name()}({self.sql(expression, 'this')} TO {to_clause})" return self.function_fallback_sql(expression) def function_fallback_sql(self, expression: exp.Func) -> str: args = [] for key in expression.arg_types: arg_value = expression.args.get(key) if isinstance(arg_value, list): for value in arg_value: args.append(value) elif arg_value is not None: args.append(arg_value) if self.dialect.PRESERVE_ORIGINAL_NAMES: name = (expression._meta and expression.meta.get("name")) or expression.sql_name() else: name = expression.sql_name() return self.func(name, *args) def func( self, name: str, *args: t.Optional[exp.Expr | str], prefix: str = "(", suffix: str = ")", normalize: bool = True, ) -> str: name = self.normalize_func(name) if normalize else name return f"{name}{prefix}{self.format_args(*args)}{suffix}" def format_args(self, *args: t.Optional[str | exp.Expr], sep: str = ", ") -> str: arg_sqls = tuple( self.sql(arg) for arg in args if arg is not None and not isinstance(arg, bool) ) if self.pretty and self.too_wide(arg_sqls): return self.indent( "\n" + f"{sep.strip()}\n".join(arg_sqls) + "\n", skip_first=True, skip_last=True ) return sep.join(arg_sqls) def too_wide(self, args: t.Iterable) -> bool: return sum(len(arg) for arg in args) > self.max_text_width def format_time( self, expression: exp.Expr, inverse_time_mapping: t.Optional[t.Dict[str, str]] = None, inverse_time_trie: t.Optional[t.Dict] = None, ) -> t.Optional[str]: return format_time( self.sql(expression, "format"), inverse_time_mapping or self.dialect.INVERSE_TIME_MAPPING, inverse_time_trie or self.dialect.INVERSE_TIME_TRIE, ) def expressions( self, expression: t.Optional[exp.Expr] = None, key: t.Optional[str] = None, sqls: t.Optional[t.Collection[str | exp.Expr]] = None, flat: bool = False, indent: bool = True, skip_first: bool = False, skip_last: bool = False, sep: str = ", ", prefix: str = "", dynamic: bool = False, new_line: bool = False, ) -> str: expressions = expression.args.get(key or "expressions") if expression else sqls if not expressions: return "" if flat: return sep.join(sql for sql in (self.sql(e) for e in expressions) if sql) num_sqls = len(expressions) result_sqls = [] for i, e in enumerate(expressions): sql = self.sql(e, comment=False) if not sql: continue comments = self.maybe_comment("", e) if isinstance(e, exp.Expr) else "" if self.pretty: if self.leading_comma: result_sqls.append(f"{sep if i > 0 else ''}{prefix}{sql}{comments}") else: result_sqls.append( f"{prefix}{sql}{(sep.rstrip() if comments else sep) if i + 1 < num_sqls else ''}{comments}" ) else: result_sqls.append(f"{prefix}{sql}{comments}{sep if i + 1 < num_sqls else ''}") if self.pretty and (not dynamic or self.too_wide(result_sqls)): if new_line: result_sqls.insert(0, "") result_sqls.append("") result_sql = "\n".join(s.rstrip() for s in result_sqls) else: result_sql = "".join(result_sqls) return ( self.indent(result_sql, skip_first=skip_first, skip_last=skip_last) if indent else result_sql ) def op_expressions(self, op: str, expression: exp.Expr, flat: bool = False) -> str: flat = flat or isinstance(expression.parent, exp.Properties) expressions_sql = self.expressions(expression, flat=flat) if flat: return f"{op} {expressions_sql}" return f"{self.seg(op)}{self.sep() if expressions_sql else ''}{expressions_sql}" def naked_property(self, expression: exp.Property) -> str: property_name = exp.Properties.PROPERTY_TO_NAME.get(expression.__class__) if not property_name: self.unsupported(f"Unsupported property {expression.__class__.__name__}") return f"{property_name} {self.sql(expression, 'this')}" def tag_sql(self, expression: exp.Tag) -> str: return f"{expression.args.get('prefix')}{self.sql(expression.this)}{expression.args.get('postfix')}" def token_sql(self, token_type: TokenType) -> str: return self.TOKEN_MAPPING.get(token_type, token_type.name) def userdefinedfunction_sql(self, expression: exp.UserDefinedFunction) -> str: this = self.sql(expression, "this") expressions = self.no_identify(self.expressions, expression) expressions = ( self.wrap(expressions) if expression.args.get("wrapped") else f" {expressions}" ) return f"{this}{expressions}" if expressions.strip() != "" else this def joinhint_sql(self, expression: exp.JoinHint) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression, flat=True) return f"{this}({expressions})" def kwarg_sql(self, expression: exp.Kwarg) -> str: return self.binary(expression, "=>") def when_sql(self, expression: exp.When) -> str: matched = "MATCHED" if expression.args["matched"] else "NOT MATCHED" source = " BY SOURCE" if self.MATCHED_BY_SOURCE and expression.args.get("source") else "" condition = self.sql(expression, "condition") condition = f" AND {condition}" if condition else "" then_expression = expression.args.get("then") if isinstance(then_expression, exp.Insert): this = self.sql(then_expression, "this") this = f"INSERT {this}" if this else "INSERT" then = self.sql(then_expression, "expression") then = f"{this} VALUES {then}" if then else this elif isinstance(then_expression, exp.Update): if isinstance(then_expression.args.get("expressions"), exp.Star): then = f"UPDATE {self.sql(then_expression, 'expressions')}" else: expressions_sql = self.expressions(then_expression) then = f"UPDATE SET{self.sep()}{expressions_sql}" if expressions_sql else "UPDATE" else: then = self.sql(then_expression) return f"WHEN {matched}{source}{condition} THEN {then}" def whens_sql(self, expression: exp.Whens) -> str: return self.expressions(expression, sep=" ", indent=False) def merge_sql(self, expression: exp.Merge) -> str: table = expression.this table_alias = "" hints = table.args.get("hints") if hints and table.alias and isinstance(hints[0], exp.WithTableHint): # T-SQL syntax is MERGE ... [WITH ()] [[AS] table_alias] table_alias = f" AS {self.sql(table.args['alias'].pop())}" this = self.sql(table) using = f"USING {self.sql(expression, 'using')}" whens = self.sql(expression, "whens") on = self.sql(expression, "on") on = f"ON {on}" if on else "" if not on: on = self.expressions(expression, key="using_cond") on = f"USING ({on})" if on else "" returning = self.sql(expression, "returning") if returning: whens = f"{whens}{returning}" sep = self.sep() return self.prepend_ctes( expression, f"MERGE INTO {this}{table_alias}{sep}{using}{sep}{on}{sep}{whens}", ) @unsupported_args("format") def tochar_sql(self, expression: exp.ToChar) -> str: return self.sql(exp.cast(expression.this, exp.DType.TEXT)) def tonumber_sql(self, expression: exp.ToNumber) -> str: if not self.SUPPORTS_TO_NUMBER: self.unsupported("Unsupported TO_NUMBER function") return self.sql(exp.cast(expression.this, exp.DType.DOUBLE)) fmt = expression.args.get("format") if not fmt: self.unsupported("Conversion format is required for TO_NUMBER") return self.sql(exp.cast(expression.this, exp.DType.DOUBLE)) return self.func("TO_NUMBER", expression.this, fmt) def dictproperty_sql(self, expression: exp.DictProperty) -> str: this = self.sql(expression, "this") kind = self.sql(expression, "kind") settings_sql = self.expressions(expression, key="settings", sep=" ") args = f"({self.sep('')}{settings_sql}{self.seg(')', sep='')}" if settings_sql else "()" return f"{this}({kind}{args})" def dictrange_sql(self, expression: exp.DictRange) -> str: this = self.sql(expression, "this") max = self.sql(expression, "max") min = self.sql(expression, "min") return f"{this}(MIN {min} MAX {max})" def dictsubproperty_sql(self, expression: exp.DictSubProperty) -> str: return f"{self.sql(expression, 'this')} {self.sql(expression, 'value')}" def duplicatekeyproperty_sql(self, expression: exp.DuplicateKeyProperty) -> str: return f"DUPLICATE KEY ({self.expressions(expression, flat=True)})" # https://docs.starrocks.io/docs/sql-reference/sql-statements/table_bucket_part_index/CREATE_TABLE/ def uniquekeyproperty_sql( self, expression: exp.UniqueKeyProperty, prefix: str = "UNIQUE KEY" ) -> str: return f"{prefix} ({self.expressions(expression, flat=True)})" # https://docs.starrocks.io/docs/sql-reference/sql-statements/data-definition/CREATE_TABLE/#distribution_desc def distributedbyproperty_sql(self, expression: exp.DistributedByProperty) -> str: expressions = self.expressions(expression, flat=True) expressions = f" {self.wrap(expressions)}" if expressions else "" buckets = self.sql(expression, "buckets") kind = self.sql(expression, "kind") buckets = f" BUCKETS {buckets}" if buckets else "" order = self.sql(expression, "order") return f"DISTRIBUTED BY {kind}{expressions}{buckets}{order}" def oncluster_sql(self, expression: exp.OnCluster) -> str: return "" def clusteredbyproperty_sql(self, expression: exp.ClusteredByProperty) -> str: expressions = self.expressions(expression, key="expressions", flat=True) sorted_by = self.expressions(expression, key="sorted_by", flat=True) sorted_by = f" SORTED BY ({sorted_by})" if sorted_by else "" buckets = self.sql(expression, "buckets") return f"CLUSTERED BY ({expressions}){sorted_by} INTO {buckets} BUCKETS" def anyvalue_sql(self, expression: exp.AnyValue) -> str: this = self.sql(expression, "this") having = self.sql(expression, "having") if having: this = f"{this} HAVING {'MAX' if expression.args.get('max') else 'MIN'} {having}" return self.func("ANY_VALUE", this) def querytransform_sql(self, expression: exp.QueryTransform) -> str: transform = self.func("TRANSFORM", *expression.expressions) row_format_before = self.sql(expression, "row_format_before") row_format_before = f" {row_format_before}" if row_format_before else "" record_writer = self.sql(expression, "record_writer") record_writer = f" RECORDWRITER {record_writer}" if record_writer else "" using = f" USING {self.sql(expression, 'command_script')}" schema = self.sql(expression, "schema") schema = f" AS {schema}" if schema else "" row_format_after = self.sql(expression, "row_format_after") row_format_after = f" {row_format_after}" if row_format_after else "" record_reader = self.sql(expression, "record_reader") record_reader = f" RECORDREADER {record_reader}" if record_reader else "" return f"{transform}{row_format_before}{record_writer}{using}{schema}{row_format_after}{record_reader}" def indexconstraintoption_sql(self, expression: exp.IndexConstraintOption) -> str: key_block_size = self.sql(expression, "key_block_size") if key_block_size: return f"KEY_BLOCK_SIZE = {key_block_size}" using = self.sql(expression, "using") if using: return f"USING {using}" parser = self.sql(expression, "parser") if parser: return f"WITH PARSER {parser}" comment = self.sql(expression, "comment") if comment: return f"COMMENT {comment}" visible = expression.args.get("visible") if visible is not None: return "VISIBLE" if visible else "INVISIBLE" engine_attr = self.sql(expression, "engine_attr") if engine_attr: return f"ENGINE_ATTRIBUTE = {engine_attr}" secondary_engine_attr = self.sql(expression, "secondary_engine_attr") if secondary_engine_attr: return f"SECONDARY_ENGINE_ATTRIBUTE = {secondary_engine_attr}" self.unsupported("Unsupported index constraint option.") return "" def checkcolumnconstraint_sql(self, expression: exp.CheckColumnConstraint) -> str: enforced = " ENFORCED" if expression.args.get("enforced") else "" return f"CHECK ({self.sql(expression, 'this')}){enforced}" def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: kind = self.sql(expression, "kind") kind = f"{kind} INDEX" if kind else "INDEX" this = self.sql(expression, "this") this = f" {this}" if this else "" index_type = self.sql(expression, "index_type") index_type = f" USING {index_type}" if index_type else "" expressions = self.expressions(expression, flat=True) expressions = f" ({expressions})" if expressions else "" options = self.expressions(expression, key="options", sep=" ") options = f" {options}" if options else "" return f"{kind}{this}{index_type}{expressions}{options}" def nvl2_sql(self, expression: exp.Nvl2) -> str: if self.NVL2_SUPPORTED: return self.function_fallback_sql(expression) case = exp.Case().when( expression.this.is_(exp.null()).not_(copy=False), expression.args["true"], copy=False, ) else_cond = expression.args.get("false") if else_cond: case.else_(else_cond, copy=False) return self.sql(case) def comprehension_sql(self, expression: exp.Comprehension) -> str: this = self.sql(expression, "this") expr = self.sql(expression, "expression") position = self.sql(expression, "position") position = f", {position}" if position else "" iterator = self.sql(expression, "iterator") condition = self.sql(expression, "condition") condition = f" IF {condition}" if condition else "" return f"{this} FOR {expr}{position} IN {iterator}{condition}" def columnprefix_sql(self, expression: exp.ColumnPrefix) -> str: return f"{self.sql(expression, 'this')}({self.sql(expression, 'expression')})" def opclass_sql(self, expression: exp.Opclass) -> str: return f"{self.sql(expression, 'this')} {self.sql(expression, 'expression')}" def _ml_sql(self, expression: exp.Func, name: str) -> str: model = self.sql(expression, "this") model = f"MODEL {model}" expr = expression.expression if expr: expr_sql = self.sql(expression, "expression") expr_sql = f"TABLE {expr_sql}" if not isinstance(expr, exp.Subquery) else expr_sql else: expr_sql = None parameters = self.sql(expression, "params_struct") or None return self.func(name, model, expr_sql, parameters) def predict_sql(self, expression: exp.Predict) -> str: return self._ml_sql(expression, "PREDICT") def generateembedding_sql(self, expression: exp.GenerateEmbedding) -> str: name = "GENERATE_TEXT_EMBEDDING" if expression.args.get("is_text") else "GENERATE_EMBEDDING" return self._ml_sql(expression, name) def mltranslate_sql(self, expression: exp.MLTranslate) -> str: return self._ml_sql(expression, "TRANSLATE") def mlforecast_sql(self, expression: exp.MLForecast) -> str: return self._ml_sql(expression, "FORECAST") def featuresattime_sql(self, expression: exp.FeaturesAtTime) -> str: this_sql = self.sql(expression, "this") if isinstance(expression.this, exp.Table): this_sql = f"TABLE {this_sql}" return self.func( "FEATURES_AT_TIME", this_sql, expression.args.get("time"), expression.args.get("num_rows"), expression.args.get("ignore_feature_nulls"), ) def vectorsearch_sql(self, expression: exp.VectorSearch) -> str: this_sql = self.sql(expression, "this") if isinstance(expression.this, exp.Table): this_sql = f"TABLE {this_sql}" query_table = self.sql(expression, "query_table") if isinstance(expression.args["query_table"], exp.Table): query_table = f"TABLE {query_table}" return self.func( "VECTOR_SEARCH", this_sql, expression.args.get("column_to_search"), query_table, expression.args.get("query_column_to_search"), expression.args.get("top_k"), expression.args.get("distance_type"), expression.args.get("options"), ) def forin_sql(self, expression: exp.ForIn) -> str: this = self.sql(expression, "this") expression_sql = self.sql(expression, "expression") return f"FOR {this} DO {expression_sql}" def refresh_sql(self, expression: exp.Refresh) -> str: this = self.sql(expression, "this") kind = "" if isinstance(expression.this, exp.Literal) else f"{expression.text('kind')} " return f"REFRESH {kind}{this}" def toarray_sql(self, expression: exp.ToArray) -> str: arg = expression.this if not arg.type: from sqlglot.optimizer.annotate_types import annotate_types arg = annotate_types(arg, dialect=self.dialect) if arg.is_type(exp.DType.ARRAY): return self.sql(arg) cond_for_null = arg.is_(exp.null()) return self.sql(exp.func("IF", cond_for_null, exp.null(), exp.array(arg, copy=False))) def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str: this = expression.this time_format = self.format_time(expression) if time_format: return self.sql( exp.cast( exp.StrToTime(this=this, format=expression.args["format"]), exp.DType.TIME, ) ) if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DType.TIME): return self.sql(this) return self.sql(exp.cast(this, exp.DType.TIME)) def tsordstotimestamp_sql(self, expression: exp.TsOrDsToTimestamp) -> str: this = expression.this if isinstance(this, exp.TsOrDsToTimestamp) or this.is_type(exp.DType.TIMESTAMP): return self.sql(this) return self.sql(exp.cast(this, exp.DType.TIMESTAMP, dialect=self.dialect)) def tsordstodatetime_sql(self, expression: exp.TsOrDsToDatetime) -> str: this = expression.this if isinstance(this, exp.TsOrDsToDatetime) or this.is_type(exp.DType.DATETIME): return self.sql(this) return self.sql(exp.cast(this, exp.DType.DATETIME, dialect=self.dialect)) def tsordstodate_sql(self, expression: exp.TsOrDsToDate) -> str: this = expression.this time_format = self.format_time(expression) safe = expression.args.get("safe") if time_format and time_format not in (self.dialect.TIME_FORMAT, self.dialect.DATE_FORMAT): return self.sql( exp.cast( exp.StrToTime(this=this, format=expression.args["format"], safe=safe), exp.DType.DATE, ) ) if isinstance(this, exp.TsOrDsToDate) or this.is_type(exp.DType.DATE): return self.sql(this) if safe: return self.sql(exp.TryCast(this=this, to=exp.DataType(this=exp.DType.DATE))) return self.sql(exp.cast(this, exp.DType.DATE)) def unixdate_sql(self, expression: exp.UnixDate) -> str: return self.sql( exp.func( "DATEDIFF", expression.this, exp.cast(exp.Literal.string("1970-01-01"), exp.DType.DATE), "day", ) ) def lastday_sql(self, expression: exp.LastDay) -> str: if self.LAST_DAY_SUPPORTS_DATE_PART: return self.function_fallback_sql(expression) unit = expression.text("unit") if unit and unit != "MONTH": self.unsupported("Date parts are not supported in LAST_DAY.") return self.func("LAST_DAY", expression.this) def dateadd_sql(self, expression: exp.DateAdd) -> str: from sqlglot.dialects.dialect import unit_to_str return self.func( "DATE_ADD", expression.this, expression.expression, unit_to_str(expression) ) def arrayany_sql(self, expression: exp.ArrayAny) -> str: if self.CAN_IMPLEMENT_ARRAY_ANY: filtered = exp.ArrayFilter(this=expression.this, expression=expression.expression) filtered_not_empty = exp.ArraySize(this=filtered).neq(0) original_is_empty = exp.ArraySize(this=expression.this).eq(0) return self.sql(exp.paren(original_is_empty.or_(filtered_not_empty))) from sqlglot.dialects import Dialect # SQLGlot's executor supports ARRAY_ANY, so we don't wanna warn for the SQLGlot dialect if self.dialect.__class__ != Dialect: self.unsupported("ARRAY_ANY is unsupported") return self.function_fallback_sql(expression) def struct_sql(self, expression: exp.Struct) -> str: expression.set( "expressions", [ exp.alias_(e.expression, e.name if e.this.is_string else e.this) if isinstance(e, exp.PropertyEQ) else e for e in expression.expressions ], ) return self.function_fallback_sql(expression) def partitionrange_sql(self, expression: exp.PartitionRange) -> str: low = self.sql(expression, "this") high = self.sql(expression, "expression") return f"{low} TO {high}" def truncatetable_sql(self, expression: exp.TruncateTable) -> str: target = "DATABASE" if expression.args.get("is_database") else "TABLE" tables = f" {self.expressions(expression)}" exists = " IF EXISTS" if expression.args.get("exists") else "" on_cluster = self.sql(expression, "cluster") on_cluster = f" {on_cluster}" if on_cluster else "" identity = self.sql(expression, "identity") identity = f" {identity} IDENTITY" if identity else "" option = self.sql(expression, "option") option = f" {option}" if option else "" partition = self.sql(expression, "partition") partition = f" {partition}" if partition else "" return f"TRUNCATE {target}{exists}{tables}{on_cluster}{identity}{option}{partition}" # This transpiles T-SQL's CONVERT function # https://learn.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-ver16 def convert_sql(self, expression: exp.Convert) -> str: to = expression.this value = expression.expression style = expression.args.get("style") safe = expression.args.get("safe") strict = expression.args.get("strict") if not to or not value: return "" # Retrieve length of datatype and override to default if not specified if not seq_get(to.expressions, 0) and to.this in self.PARAMETERIZABLE_TEXT_TYPES: to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) transformed: t.Optional[exp.Expr] = None cast = exp.Cast if strict else exp.TryCast # Check whether a conversion with format (T-SQL calls this 'style') is applicable if isinstance(style, exp.Literal) and style.is_int: from sqlglot.dialects.tsql import TSQL style_value = style.name converted_style = TSQL.CONVERT_FORMAT_MAPPING.get(style_value) if not converted_style: self.unsupported(f"Unsupported T-SQL 'style' value: {style_value}") fmt = exp.Literal.string(converted_style) if to.this == exp.DType.DATE: transformed = exp.StrToDate(this=value, format=fmt) elif to.this in (exp.DType.DATETIME, exp.DType.DATETIME2): transformed = exp.StrToTime(this=value, format=fmt) elif to.this in self.PARAMETERIZABLE_TEXT_TYPES: transformed = cast(this=exp.TimeToStr(this=value, format=fmt), to=to, safe=safe) elif to.this == exp.DType.TEXT: transformed = exp.TimeToStr(this=value, format=fmt) if not transformed: transformed = cast(this=value, to=to, safe=safe) return self.sql(transformed) def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: this = expression.this if isinstance(this, exp.JSONPathWildcard): this = self.json_path_part(this) return f".{this}" if this else "" if self.SAFE_JSON_PATH_KEY_RE.match(this): return f".{this}" this = self.json_path_part(this) return ( f"[{this}]" if self._quote_json_path_key_using_brackets and self.JSON_PATH_BRACKETED_KEY_SUPPORTED else f".{this}" ) def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: this = self.json_path_part(expression.this) return f"[{this}]" if this else "" def _simplify_unless_literal(self, expression: E) -> E: if not isinstance(expression, exp.Literal): from sqlglot.optimizer.simplify import simplify expression = simplify(expression, dialect=self.dialect) return expression def _embed_ignore_nulls(self, expression: exp.IgnoreNulls | exp.RespectNulls, text: str) -> str: this = expression.this if isinstance(this, self.RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS): self.unsupported( f"RESPECT/IGNORE NULLS is not supported for {type(this).key} in {self.dialect.__class__.__name__}" ) return self.sql(this) if self.IGNORE_NULLS_IN_FUNC and not expression.meta.get("inline"): if self.IGNORE_NULLS_BEFORE_ORDER: # The first modifier here will be the one closest to the AggFunc's arg mods = sorted( expression.find_all(exp.HavingMax, exp.Order, exp.Limit), key=lambda x: ( 0 if isinstance(x, exp.HavingMax) else (1 if isinstance(x, exp.Order) else 2) ), ) if mods: mod = mods[0] this = expression.__class__(this=mod.this.copy()) this.meta["inline"] = True mod.this.replace(this) return self.sql(expression.this) agg_func = expression.find(exp.AggFunc) if agg_func: agg_func_sql = self.sql(agg_func, comment=False)[:-1] + f" {text})" return self.maybe_comment(agg_func_sql, comments=agg_func.comments) return f"{self.sql(expression, 'this')} {text}" def _replace_line_breaks(self, string: str) -> str: """We don't want to extra indent line breaks so we temporarily replace them with sentinels.""" if self.pretty: return string.replace("\n", self.SENTINEL_LINE_BREAK) return string def copyparameter_sql(self, expression: exp.CopyParameter) -> str: option = self.sql(expression, "this") if expression.expressions: upper = option.upper() # Snowflake FILE_FORMAT options are separated by whitespace sep = " " if upper == "FILE_FORMAT" else ", " # Databricks copy/format options do not set their list of values with EQ op = " " if upper in ("COPY_OPTIONS", "FORMAT_OPTIONS") else " = " values = self.expressions(expression, flat=True, sep=sep) return f"{option}{op}({values})" value = self.sql(expression, "expression") if not value: return option op = " = " if self.COPY_PARAMS_EQ_REQUIRED else " " return f"{option}{op}{value}" def credentials_sql(self, expression: exp.Credentials) -> str: cred_expr = expression.args.get("credentials") if isinstance(cred_expr, exp.Literal): # Redshift case: CREDENTIALS credentials = self.sql(expression, "credentials") credentials = f"CREDENTIALS {credentials}" if credentials else "" else: # Snowflake case: CREDENTIALS = (...) credentials = self.expressions(expression, key="credentials", flat=True, sep=" ") credentials = f"CREDENTIALS = ({credentials})" if cred_expr is not None else "" storage = self.sql(expression, "storage") storage = f"STORAGE_INTEGRATION = {storage}" if storage else "" encryption = self.expressions(expression, key="encryption", flat=True, sep=" ") encryption = f" ENCRYPTION = ({encryption})" if encryption else "" iam_role = self.sql(expression, "iam_role") iam_role = f"IAM_ROLE {iam_role}" if iam_role else "" region = self.sql(expression, "region") region = f" REGION {region}" if region else "" return f"{credentials}{storage}{encryption}{iam_role}{region}" def copy_sql(self, expression: exp.Copy) -> str: this = self.sql(expression, "this") this = f" INTO {this}" if self.COPY_HAS_INTO_KEYWORD else f" {this}" credentials = self.sql(expression, "credentials") credentials = self.seg(credentials) if credentials else "" files = self.expressions(expression, key="files", flat=True) kind = self.seg("FROM" if expression.args.get("kind") else "TO") if files else "" sep = ", " if self.dialect.COPY_PARAMS_ARE_CSV else " " params = self.expressions( expression, key="params", sep=sep, new_line=True, skip_last=True, skip_first=True, indent=self.COPY_PARAMS_ARE_WRAPPED, ) if params: if self.COPY_PARAMS_ARE_WRAPPED: params = f" WITH ({params})" elif not self.pretty and (files or credentials): params = f" {params}" return f"COPY{this}{kind} {files}{credentials}{params}" def semicolon_sql(self, expression: exp.Semicolon) -> str: return "" def datadeletionproperty_sql(self, expression: exp.DataDeletionProperty) -> str: on_sql = "ON" if expression.args.get("on") else "OFF" filter_col: t.Optional[str] = self.sql(expression, "filter_column") filter_col = f"FILTER_COLUMN={filter_col}" if filter_col else None retention_period: t.Optional[str] = self.sql(expression, "retention_period") retention_period = f"RETENTION_PERIOD={retention_period}" if retention_period else None if filter_col or retention_period: on_sql = self.func("ON", filter_col, retention_period) return f"DATA_DELETION={on_sql}" def maskingpolicycolumnconstraint_sql( self, expression: exp.MaskingPolicyColumnConstraint ) -> str: this = self.sql(expression, "this") expressions = self.expressions(expression, flat=True) expressions = f" USING ({expressions})" if expressions else "" return f"MASKING POLICY {this}{expressions}" def gapfill_sql(self, expression: exp.GapFill) -> str: this = self.sql(expression, "this") this = f"TABLE {this}" return self.func("GAP_FILL", this, *[v for k, v in expression.args.items() if k != "this"]) def scope_resolution(self, rhs: str, scope_name: str) -> str: return self.func("SCOPE_RESOLUTION", scope_name or None, rhs) def scoperesolution_sql(self, expression: exp.ScopeResolution) -> str: this = self.sql(expression, "this") expr = expression.expression if isinstance(expr, exp.Func): # T-SQL's CLR functions are case sensitive expr = f"{self.sql(expr, 'this')}({self.format_args(*expr.expressions)})" else: expr = self.sql(expression, "expression") return self.scope_resolution(expr, this) def parsejson_sql(self, expression: exp.ParseJSON) -> str: if self.PARSE_JSON_NAME is None: return self.sql(expression.this) return self.func(self.PARSE_JSON_NAME, expression.this, expression.expression) def rand_sql(self, expression: exp.Rand) -> str: lower = self.sql(expression, "lower") upper = self.sql(expression, "upper") if lower and upper: return f"({upper} - {lower}) * {self.func('RAND', expression.this)} + {lower}" return self.func("RAND", expression.this) def changes_sql(self, expression: exp.Changes) -> str: information = self.sql(expression, "information") information = f"INFORMATION => {information}" at_before = self.sql(expression, "at_before") at_before = f"{self.seg('')}{at_before}" if at_before else "" end = self.sql(expression, "end") end = f"{self.seg('')}{end}" if end else "" return f"CHANGES ({information}){at_before}{end}" def pad_sql(self, expression: exp.Pad) -> str: prefix = "L" if expression.args.get("is_left") else "R" fill_pattern = self.sql(expression, "fill_pattern") or None if not fill_pattern and self.PAD_FILL_PATTERN_IS_REQUIRED: fill_pattern = "' '" return self.func(f"{prefix}PAD", expression.this, expression.expression, fill_pattern) def summarize_sql(self, expression: exp.Summarize) -> str: table = " TABLE" if expression.args.get("table") else "" return f"SUMMARIZE{table} {self.sql(expression.this)}" def explodinggenerateseries_sql(self, expression: exp.ExplodingGenerateSeries) -> str: generate_series = exp.GenerateSeries(**expression.args) parent = expression.parent if isinstance(parent, (exp.Alias, exp.TableAlias)): parent = parent.parent if self.SUPPORTS_EXPLODING_PROJECTIONS and not isinstance(parent, (exp.Table, exp.Unnest)): return self.sql(exp.Unnest(expressions=[generate_series])) if isinstance(parent, exp.Select): self.unsupported("GenerateSeries projection unnesting is not supported.") return self.sql(generate_series) def converttimezone_sql(self, expression: exp.ConvertTimezone) -> str: if self.SUPPORTS_CONVERT_TIMEZONE: return self.function_fallback_sql(expression) source_tz = expression.args.get("source_tz") target_tz = expression.args.get("target_tz") timestamp = expression.args.get("timestamp") if source_tz and timestamp: timestamp = exp.AtTimeZone( this=exp.cast(timestamp, exp.DType.TIMESTAMPNTZ), zone=source_tz ) expr = exp.AtTimeZone(this=timestamp, zone=target_tz) return self.sql(expr) def json_sql(self, expression: exp.JSON) -> str: this = self.sql(expression, "this") this = f" {this}" if this else "" _with = expression.args.get("with_") if _with is None: with_sql = "" elif not _with: with_sql = " WITHOUT" else: with_sql = " WITH" unique_sql = " UNIQUE KEYS" if expression.args.get("unique") else "" return f"JSON{this}{with_sql}{unique_sql}" def jsonvalue_sql(self, expression: exp.JSONValue) -> str: path = self.sql(expression, "path") returning = self.sql(expression, "returning") returning = f" RETURNING {returning}" if returning else "" on_condition = self.sql(expression, "on_condition") on_condition = f" {on_condition}" if on_condition else "" return self.func("JSON_VALUE", expression.this, f"{path}{returning}{on_condition}") def skipjsoncolumn_sql(self, expression: exp.SkipJSONColumn) -> str: regexp = " REGEXP" if expression.args.get("regexp") else "" return f"SKIP{regexp} {self.sql(expression.expression)}" def conditionalinsert_sql(self, expression: exp.ConditionalInsert) -> str: else_ = "ELSE " if expression.args.get("else_") else "" condition = self.sql(expression, "expression") condition = f"WHEN {condition} THEN " if condition else else_ insert = self.sql(expression, "this")[len("INSERT") :].strip() return f"{condition}{insert}" def multitableinserts_sql(self, expression: exp.MultitableInserts) -> str: kind = self.sql(expression, "kind") expressions = self.seg(self.expressions(expression, sep=" ")) res = f"INSERT {kind}{expressions}{self.seg(self.sql(expression, 'source'))}" return res def oncondition_sql(self, expression: exp.OnCondition) -> str: # Static options like "NULL ON ERROR" are stored as strings, in contrast to "DEFAULT ON ERROR" empty = expression.args.get("empty") empty = ( f"DEFAULT {empty} ON EMPTY" if isinstance(empty, exp.Expr) else self.sql(expression, "empty") ) error = expression.args.get("error") error = ( f"DEFAULT {error} ON ERROR" if isinstance(error, exp.Expr) else self.sql(expression, "error") ) if error and empty: error = ( f"{empty} {error}" if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR else f"{error} {empty}" ) empty = "" null = self.sql(expression, "null") return f"{empty}{error}{null}" def jsonextractquote_sql(self, expression: exp.JSONExtractQuote) -> str: scalar = " ON SCALAR STRING" if expression.args.get("scalar") else "" return f"{self.sql(expression, 'option')} QUOTES{scalar}" def jsonexists_sql(self, expression: exp.JSONExists) -> str: this = self.sql(expression, "this") path = self.sql(expression, "path") passing = self.expressions(expression, "passing") passing = f" PASSING {passing}" if passing else "" on_condition = self.sql(expression, "on_condition") on_condition = f" {on_condition}" if on_condition else "" path = f"{path}{passing}{on_condition}" return self.func("JSON_EXISTS", this, path) def _add_arrayagg_null_filter( self, array_agg_sql: str, array_agg_expr: exp.ArrayAgg, column_expr: exp.Expr, ) -> str: """ Add NULL filter to ARRAY_AGG if dialect requires it. Args: array_agg_sql: The generated ARRAY_AGG SQL string array_agg_expr: The ArrayAgg expression node column_expr: The column/expression to filter (before ORDER BY wrapping) Returns: SQL string with FILTER clause added if needed """ # Add a NULL FILTER on the column to mimic the results going from a dialect that excludes nulls # on ARRAY_AGG (e.g Spark) to one that doesn't (e.g. DuckDB) if not ( self.dialect.ARRAY_AGG_INCLUDES_NULLS and array_agg_expr.args.get("nulls_excluded") ): return array_agg_sql parent = array_agg_expr.parent if isinstance(parent, exp.Filter): parent_cond = parent.expression.this parent_cond.replace(parent_cond.and_(column_expr.is_(exp.null()).not_())) elif column_expr.find(exp.Column): # Do not add the filter if the input is not a column (e.g. literal, struct etc) # DISTINCT is already present in the agg function, do not propagate it to FILTER as well this_sql = ( self.expressions(column_expr) if isinstance(column_expr, exp.Distinct) else self.sql(column_expr) ) array_agg_sql = f"{array_agg_sql} FILTER(WHERE {this_sql} IS NOT NULL)" return array_agg_sql def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: array_agg = self.function_fallback_sql(expression) return self._add_arrayagg_null_filter(array_agg, expression, expression.this) def slice_sql(self, expression: exp.Slice) -> str: step = self.sql(expression, "step") end = self.sql(expression.expression) begin = self.sql(expression.this) sql = f"{end}:{step}" if step else end return f"{begin}:{sql}" if sql else f"{begin}:" def apply_sql(self, expression: exp.Apply) -> str: this = self.sql(expression, "this") expr = self.sql(expression, "expression") return f"{this} APPLY({expr})" def _grant_or_revoke_sql( self, expression: exp.Grant | exp.Revoke, keyword: str, preposition: str, grant_option_prefix: str = "", grant_option_suffix: str = "", ) -> str: privileges_sql = self.expressions(expression, key="privileges", flat=True) kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" securable = self.sql(expression, "securable") securable = f" {securable}" if securable else "" principals = self.expressions(expression, key="principals", flat=True) if not expression.args.get("grant_option"): grant_option_prefix = grant_option_suffix = "" # cascade for revoke only cascade = self.sql(expression, "cascade") cascade = f" {cascade}" if cascade else "" return f"{keyword} {grant_option_prefix}{privileges_sql} ON{kind}{securable} {preposition} {principals}{grant_option_suffix}{cascade}" def grant_sql(self, expression: exp.Grant) -> str: return self._grant_or_revoke_sql( expression, keyword="GRANT", preposition="TO", grant_option_suffix=" WITH GRANT OPTION", ) def revoke_sql(self, expression: exp.Revoke) -> str: return self._grant_or_revoke_sql( expression, keyword="REVOKE", preposition="FROM", grant_option_prefix="GRANT OPTION FOR ", ) def grantprivilege_sql(self, expression: exp.GrantPrivilege) -> str: this = self.sql(expression, "this") columns = self.expressions(expression, flat=True) columns = f"({columns})" if columns else "" return f"{this}{columns}" def grantprincipal_sql(self, expression: exp.GrantPrincipal) -> str: this = self.sql(expression, "this") kind = self.sql(expression, "kind") kind = f"{kind} " if kind else "" return f"{kind}{this}" def columns_sql(self, expression: exp.Columns) -> str: func = self.function_fallback_sql(expression) if expression.args.get("unpack"): func = f"*{func}" return func def overlay_sql(self, expression: exp.Overlay) -> str: this = self.sql(expression, "this") expr = self.sql(expression, "expression") from_sql = self.sql(expression, "from_") for_sql = self.sql(expression, "for_") for_sql = f" FOR {for_sql}" if for_sql else "" return f"OVERLAY({this} PLACING {expr} FROM {from_sql}{for_sql})" @unsupported_args("format") def todouble_sql(self, expression: exp.ToDouble) -> str: cast = exp.TryCast if expression.args.get("safe") else exp.Cast return self.sql(cast(this=expression.this, to=exp.DataType.build(exp.DType.DOUBLE))) def string_sql(self, expression: exp.String) -> str: this = expression.this zone = expression.args.get("zone") if zone: # This is a BigQuery specific argument for STRING(, ) # BigQuery stores timestamps internally as UTC, so ConvertTimezone is used with UTC # set for source_tz to transpile the time conversion before the STRING cast this = exp.ConvertTimezone( source_tz=exp.Literal.string("UTC"), target_tz=zone, timestamp=this ) return self.sql(exp.cast(this, exp.DType.VARCHAR)) def median_sql(self, expression: exp.Median) -> str: if not self.SUPPORTS_MEDIAN: return self.sql( exp.PercentileCont(this=expression.this, expression=exp.Literal.number(0.5)) ) return self.function_fallback_sql(expression) def overflowtruncatebehavior_sql(self, expression: exp.OverflowTruncateBehavior) -> str: filler = self.sql(expression, "this") filler = f" {filler}" if filler else "" with_count = "WITH COUNT" if expression.args.get("with_count") else "WITHOUT COUNT" return f"TRUNCATE{filler} {with_count}" def unixseconds_sql(self, expression: exp.UnixSeconds) -> str: if self.SUPPORTS_UNIX_SECONDS: return self.function_fallback_sql(expression) start_ts = exp.cast(exp.Literal.string("1970-01-01 00:00:00+00"), to=exp.DType.TIMESTAMPTZ) return self.sql( exp.TimestampDiff(this=expression.this, expression=start_ts, unit=exp.var("SECONDS")) ) def arraysize_sql(self, expression: exp.ArraySize) -> str: dim = expression.expression # For dialects that don't support the dimension arg, we can safely transpile it's default value (1st dimension) if dim and self.ARRAY_SIZE_DIM_REQUIRED is None: if not (dim.is_int and dim.name == "1"): self.unsupported("Cannot transpile dimension argument for ARRAY_LENGTH") dim = None # If dimension is required but not specified, default initialize it if self.ARRAY_SIZE_DIM_REQUIRED and not dim: dim = exp.Literal.number(1) return self.func(self.ARRAY_SIZE_NAME, expression.this, dim) def attach_sql(self, expression: exp.Attach) -> str: this = self.sql(expression, "this") exists_sql = " IF NOT EXISTS" if expression.args.get("exists") else "" expressions = self.expressions(expression) expressions = f" ({expressions})" if expressions else "" return f"ATTACH{exists_sql} {this}{expressions}" def detach_sql(self, expression: exp.Detach) -> str: kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" # the DATABASE keyword is required if IF EXISTS is set for DuckDB # ref: https://duckdb.org/docs/stable/sql/statements/attach.html#detach-syntax exists = " IF EXISTS" if expression.args.get("exists") else "" if exists: kind = kind or " DATABASE" this = self.sql(expression, "this") this = f" {this}" if this else "" cluster = self.sql(expression, "cluster") cluster = f" {cluster}" if cluster else "" permanent = " PERMANENTLY" if expression.args.get("permanent") else "" sync = " SYNC" if expression.args.get("sync") else "" return f"DETACH{kind}{exists}{this}{cluster}{permanent}{sync}" def attachoption_sql(self, expression: exp.AttachOption) -> str: this = self.sql(expression, "this") value = self.sql(expression, "expression") value = f" {value}" if value else "" return f"{this}{value}" def watermarkcolumnconstraint_sql(self, expression: exp.WatermarkColumnConstraint) -> str: return ( f"WATERMARK FOR {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" ) def encodeproperty_sql(self, expression: exp.EncodeProperty) -> str: encode = "KEY ENCODE" if expression.args.get("key") else "ENCODE" encode = f"{encode} {self.sql(expression, 'this')}" properties = expression.args.get("properties") if properties: encode = f"{encode} {self.properties(properties)}" return encode def includeproperty_sql(self, expression: exp.IncludeProperty) -> str: this = self.sql(expression, "this") include = f"INCLUDE {this}" column_def = self.sql(expression, "column_def") if column_def: include = f"{include} {column_def}" alias = self.sql(expression, "alias") if alias: include = f"{include} AS {alias}" return include def xmlelement_sql(self, expression: exp.XMLElement) -> str: prefix = "EVALNAME" if expression.args.get("evalname") else "NAME" name = f"{prefix} {self.sql(expression, 'this')}" return self.func("XMLELEMENT", name, *expression.expressions) def xmlkeyvalueoption_sql(self, expression: exp.XMLKeyValueOption) -> str: this = self.sql(expression, "this") expr = self.sql(expression, "expression") expr = f"({expr})" if expr else "" return f"{this}{expr}" def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str: partitions = self.expressions(expression, "partition_expressions") create = self.expressions(expression, "create_expressions") return f"PARTITION BY RANGE {self.wrap(partitions)} {self.wrap(create)}" def partitionbyrangepropertydynamic_sql( self, expression: exp.PartitionByRangePropertyDynamic ) -> str: start = self.sql(expression, "start") end = self.sql(expression, "end") every = expression.args["every"] if isinstance(every, exp.Interval) and every.this.is_string: every.this.replace(exp.Literal.number(every.name)) return f"START {self.wrap(start)} END {self.wrap(end)} EVERY {self.wrap(self.sql(every))}" def unpivotcolumns_sql(self, expression: exp.UnpivotColumns) -> str: name = self.sql(expression, "this") values = self.expressions(expression, flat=True) return f"NAME {name} VALUE {values}" def analyzesample_sql(self, expression: exp.AnalyzeSample) -> str: kind = self.sql(expression, "kind") sample = self.sql(expression, "sample") return f"SAMPLE {sample} {kind}" def analyzestatistics_sql(self, expression: exp.AnalyzeStatistics) -> str: kind = self.sql(expression, "kind") option = self.sql(expression, "option") option = f" {option}" if option else "" this = self.sql(expression, "this") this = f" {this}" if this else "" columns = self.expressions(expression) columns = f" {columns}" if columns else "" return f"{kind}{option} STATISTICS{this}{columns}" def analyzehistogram_sql(self, expression: exp.AnalyzeHistogram) -> str: this = self.sql(expression, "this") columns = self.expressions(expression) inner_expression = self.sql(expression, "expression") inner_expression = f" {inner_expression}" if inner_expression else "" update_options = self.sql(expression, "update_options") update_options = f" {update_options} UPDATE" if update_options else "" return f"{this} HISTOGRAM ON {columns}{inner_expression}{update_options}" def analyzedelete_sql(self, expression: exp.AnalyzeDelete) -> str: kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" return f"DELETE{kind} STATISTICS" def analyzelistchainedrows_sql(self, expression: exp.AnalyzeListChainedRows) -> str: inner_expression = self.sql(expression, "expression") return f"LIST CHAINED ROWS{inner_expression}" def analyzevalidate_sql(self, expression: exp.AnalyzeValidate) -> str: kind = self.sql(expression, "kind") this = self.sql(expression, "this") this = f" {this}" if this else "" inner_expression = self.sql(expression, "expression") return f"VALIDATE {kind}{this}{inner_expression}" def analyze_sql(self, expression: exp.Analyze) -> str: options = self.expressions(expression, key="options", sep=" ") options = f" {options}" if options else "" kind = self.sql(expression, "kind") kind = f" {kind}" if kind else "" this = self.sql(expression, "this") this = f" {this}" if this else "" mode = self.sql(expression, "mode") mode = f" {mode}" if mode else "" properties = self.sql(expression, "properties") properties = f" {properties}" if properties else "" partition = self.sql(expression, "partition") partition = f" {partition}" if partition else "" inner_expression = self.sql(expression, "expression") inner_expression = f" {inner_expression}" if inner_expression else "" return f"ANALYZE{options}{kind}{this}{partition}{mode}{inner_expression}{properties}" def xmltable_sql(self, expression: exp.XMLTable) -> str: this = self.sql(expression, "this") namespaces = self.expressions(expression, key="namespaces") namespaces = f"XMLNAMESPACES({namespaces}), " if namespaces else "" passing = self.expressions(expression, key="passing") passing = f"{self.sep()}PASSING{self.seg(passing)}" if passing else "" columns = self.expressions(expression, key="columns") columns = f"{self.sep()}COLUMNS{self.seg(columns)}" if columns else "" by_ref = f"{self.sep()}RETURNING SEQUENCE BY REF" if expression.args.get("by_ref") else "" return f"XMLTABLE({self.sep('')}{self.indent(namespaces + this + passing + by_ref + columns)}{self.seg(')', sep='')}" def xmlnamespace_sql(self, expression: exp.XMLNamespace) -> str: this = self.sql(expression, "this") return this if isinstance(expression.this, exp.Alias) else f"DEFAULT {this}" def export_sql(self, expression: exp.Export) -> str: this = self.sql(expression, "this") connection = self.sql(expression, "connection") connection = f"WITH CONNECTION {connection} " if connection else "" options = self.sql(expression, "options") return f"EXPORT DATA {connection}{options} AS {this}" def declare_sql(self, expression: exp.Declare) -> str: replace = "OR REPLACE " if expression.args.get("replace") else "" return f"DECLARE {replace}{self.expressions(expression, flat=True)}" def declareitem_sql(self, expression: exp.DeclareItem) -> str: variables = self.expressions(expression, "this") default = self.sql(expression, "default") default = f" {self.DECLARE_DEFAULT_ASSIGNMENT} {default}" if default else "" kind = self.sql(expression, "kind") if isinstance(expression.args.get("kind"), exp.Schema): kind = f"TABLE {kind}" kind = f" {kind}" if kind else "" return f"{variables}{kind}{default}" def recursivewithsearch_sql(self, expression: exp.RecursiveWithSearch) -> str: kind = self.sql(expression, "kind") this = self.sql(expression, "this") set = self.sql(expression, "expression") using = self.sql(expression, "using") using = f" USING {using}" if using else "" kind_sql = kind if kind == "CYCLE" else f"SEARCH {kind} FIRST BY" return f"{kind_sql} {this} SET {set}{using}" def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: params = self.expressions(expression, key="params", flat=True) return self.func(expression.name, *expression.expressions) + f"({params})" def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: return self.func(expression.name, *expression.expressions) def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: return self.anonymousaggfunc_sql(expression) def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: return self.parameterizedagg_sql(expression) def show_sql(self, expression: exp.Show) -> str: self.unsupported("Unsupported SHOW statement") return "" def install_sql(self, expression: exp.Install) -> str: self.unsupported("Unsupported INSTALL statement") return "" def get_put_sql(self, expression: exp.Put | exp.Get) -> str: # Snowflake GET/PUT statements: # PUT # GET props = expression.args.get("properties") props_sql = self.properties(props, prefix=" ", sep=" ", wrapped=False) if props else "" this = self.sql(expression, "this") target = self.sql(expression, "target") if isinstance(expression, exp.Put): return f"PUT {this} {target}{props_sql}" else: return f"GET {target} {this}{props_sql}" def translatecharacters_sql(self, expression: exp.TranslateCharacters) -> str: this = self.sql(expression, "this") expr = self.sql(expression, "expression") with_error = " WITH ERROR" if expression.args.get("with_error") else "" return f"TRANSLATE({this} USING {expr}{with_error})" def decodecase_sql(self, expression: exp.DecodeCase) -> str: if self.SUPPORTS_DECODE_CASE: return self.func("DECODE", *expression.expressions) expression, *expressions = expression.expressions ifs = [] for search, result in zip(expressions[::2], expressions[1::2]): if isinstance(search, exp.Literal): ifs.append(exp.If(this=expression.eq(search), true=result)) elif isinstance(search, exp.Null): ifs.append(exp.If(this=expression.is_(exp.Null()), true=result)) else: if isinstance(search, exp.Binary): search = exp.paren(search) cond = exp.or_( expression.eq(search), exp.and_(expression.is_(exp.Null()), search.is_(exp.Null()), copy=False), copy=False, ) ifs.append(exp.If(this=cond, true=result)) case = exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) return self.sql(case) def semanticview_sql(self, expression: exp.SemanticView) -> str: this = self.sql(expression, "this") this = self.seg(this, sep="") dimensions = self.expressions( expression, "dimensions", dynamic=True, skip_first=True, skip_last=True ) dimensions = self.seg(f"DIMENSIONS {dimensions}") if dimensions else "" metrics = self.expressions( expression, "metrics", dynamic=True, skip_first=True, skip_last=True ) metrics = self.seg(f"METRICS {metrics}") if metrics else "" facts = self.expressions(expression, "facts", dynamic=True, skip_first=True, skip_last=True) facts = self.seg(f"FACTS {facts}") if facts else "" where = self.sql(expression, "where") where = self.seg(f"WHERE {where}") if where else "" body = self.indent(this + metrics + dimensions + facts + where, skip_first=True) return f"SEMANTIC_VIEW({body}{self.seg(')', sep='')}" def getextract_sql(self, expression: exp.GetExtract) -> str: this = expression.this expr = expression.expression if not this.type or not expression.type: from sqlglot.optimizer.annotate_types import annotate_types this = annotate_types(this, dialect=self.dialect) if this.is_type(*(exp.DType.ARRAY, exp.DType.MAP)): return self.sql(exp.Bracket(this=this, expressions=[expr])) return self.sql(exp.JSONExtract(this=this, expression=self.dialect.to_json_path(expr))) def datefromunixdate_sql(self, expression: exp.DateFromUnixDate) -> str: return self.sql( exp.DateAdd( this=exp.cast(exp.Literal.string("1970-01-01"), exp.DType.DATE), expression=expression.this, unit=exp.var("DAY"), ) ) def space_sql(self: Generator, expression: exp.Space) -> str: return self.sql(exp.Repeat(this=exp.Literal.string(" "), times=expression.this)) def buildproperty_sql(self, expression: exp.BuildProperty) -> str: return f"BUILD {self.sql(expression, 'this')}" def refreshtriggerproperty_sql(self, expression: exp.RefreshTriggerProperty) -> str: method = self.sql(expression, "method") kind = expression.args.get("kind") if not kind: return f"REFRESH {method}" every = self.sql(expression, "every") unit = self.sql(expression, "unit") every = f" EVERY {every} {unit}" if every else "" starts = self.sql(expression, "starts") starts = f" STARTS {starts}" if starts else "" return f"REFRESH {method} ON {kind}{every}{starts}" def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: self.unsupported("The model!attribute syntax is not supported") return "" def directorystage_sql(self, expression: exp.DirectoryStage) -> str: return self.func("DIRECTORY", expression.this) def uuid_sql(self, expression: exp.Uuid) -> str: is_string = expression.args.get("is_string", False) uuid_func_sql = self.func("UUID") if is_string and not self.dialect.UUID_IS_STRING_TYPE: return self.sql(exp.cast(uuid_func_sql, exp.DType.VARCHAR, dialect=self.dialect)) return uuid_func_sql def initcap_sql(self, expression: exp.Initcap) -> str: delimiters = expression.expression if delimiters: # do not generate delimiters arg if we are round-tripping from default delimiters if ( delimiters.is_string and delimiters.this == self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS ): delimiters = None elif not self.dialect.INITCAP_SUPPORTS_CUSTOM_DELIMITERS: self.unsupported("INITCAP does not support custom delimiters") delimiters = None return self.func("INITCAP", expression.this, delimiters) def localtime_sql(self, expression: exp.Localtime) -> str: this = expression.this return self.func("LOCALTIME", this) if this else "LOCALTIME" def localtimestamp_sql(self, expression: exp.Localtime) -> str: this = expression.this return self.func("LOCALTIMESTAMP", this) if this else "LOCALTIMESTAMP" def weekstart_sql(self, expression: exp.WeekStart) -> str: this = expression.this.name.upper() if self.dialect.WEEK_OFFSET == -1 and this == "SUNDAY": # BigQuery specific optimization since WEEK(SUNDAY) == WEEK return "WEEK" return self.func("WEEK", expression.this) def chr_sql(self, expression: exp.Chr, name: str = "CHR") -> str: this = self.expressions(expression) charset = self.sql(expression, "charset") using = f" USING {charset}" if charset else "" return self.func(name, this + using) def block_sql(self, expression: exp.Block) -> str: expressions = self.expressions(expression, sep="; ", flat=True) return f"{expressions}" if expressions else "" def storedprocedure_sql(self, expression: exp.StoredProcedure) -> str: self.unsupported("Unsupported Stored Procedure syntax") return "" def ifblock_sql(self, expression: exp.IfBlock) -> str: self.unsupported("Unsupported If block syntax") return "" def whileblock_sql(self, expression: exp.WhileBlock) -> str: self.unsupported("Unsupported While block syntax") return "" def execute_sql(self, expression: exp.Execute) -> str: self.unsupported("Unsupported Execute syntax") return "" def executesql_sql(self, expression: exp.ExecuteSql) -> str: self.unsupported("Unsupported Execute syntax") return "" def altermodifysqlsecurity_sql(self, expression: exp.AlterModifySqlSecurity) -> str: props = self.expressions(expression, sep=" ") return f"MODIFY {props}" ================================================ FILE: sqlglot/helper.py ================================================ from __future__ import annotations import datetime import inspect import logging import re import sys import typing as t from collections.abc import Collection, Set, Iterable, Sequence, Iterator, Mapping from copy import copy from difflib import get_close_matches from enum import Enum from itertools import count from builtins import type as Type try: from mypy_extensions import mypyc_attr, trait, i64 except ImportError: def mypyc_attr(*attrs: str, **kwattrs: object) -> t.Callable[[t.Any], t.Any]: # type: ignore[misc] return lambda f: f def trait(f: t.Any) -> t.Any: # type: ignore[misc] return f i64 = int # type: ignore[misc,assignment] T = t.TypeVar("T") E = t.TypeVar("E") if t.TYPE_CHECKING: from sqlglot.expressions import Expr CAMEL_CASE_PATTERN = re.compile("(? None: close_matches = get_close_matches(word, possibilities, n=1) similar = seq_get(close_matches, 0) or "" if similar: similar = f" Did you mean {similar}?" raise ValueError(f"Unknown {kind} '{word}'.{similar}") def seq_get(seq: Sequence[T], index: int) -> t.Optional[T]: """Returns the value in `seq` at position `index`, or `None` if `index` is out of bounds.""" try: return seq[index] except IndexError: return None @t.overload def ensure_list(value: Collection[T]) -> list[T]: ... @t.overload def ensure_list(value: None) -> t.List: ... @t.overload def ensure_list(value: T) -> list[T]: ... def ensure_list(value): """ Ensures that a value is a list, otherwise casts or wraps it into one. Args: value: The value of interest. Returns: The value cast as a list if it's a list or a tuple, or else the value wrapped in a list. """ if value is None: return [] if isinstance(value, (list, tuple)): return list(value) return [value] @t.overload def ensure_collection(value: Collection[T]) -> Collection[T]: ... @t.overload def ensure_collection(value: T) -> Collection[T]: ... def ensure_collection(value): """ Ensures that a value is a collection (excluding `str` and `bytes`), otherwise wraps it into a list. Args: value: The value of interest. Returns: The value if it's a collection, or else the value wrapped in a list. """ if value is None: return [] return ( value if isinstance(value, Collection) and not isinstance(value, (str, bytes)) else [value] ) def csv(*args: str, sep: str = ", ") -> str: """ Formats any number of string arguments as CSV. Args: args: The string arguments to format. sep: The argument separator. Returns: The arguments formatted as a CSV string. """ return sep.join(arg for arg in args if arg) def subclasses( module_name: str, classes: Type[T] | tuple[Type[T], ...], exclude: set[Type[T]] = set(), ) -> list[Type[T]]: """ Returns all subclasses for a collection of classes, possibly excluding some of them. Args: module_name: The name of the module to search for subclasses in. classes: Class(es) we want to find the subclasses of. exclude: Classes we want to exclude from the returned list. Returns: The target subclasses. """ return [ obj for _, obj in inspect.getmembers( sys.modules[module_name], lambda obj: inspect.isclass(obj) and issubclass(obj, classes) and obj not in exclude, ) ] def camel_to_snake_case(name: str) -> str: """Converts `name` from camelCase to snake_case and returns the result.""" return CAMEL_CASE_PATTERN.sub("_", name).upper() def while_changing(expression: E, func: t.Callable[[E], E]) -> E: """ Applies a transformation to a given expression until a fix point is reached. Args: expression: The expression to be transformed. func: The transformation to be applied. Returns: The transformed expression. """ while True: start_hash = hash(expression) expression = func(expression) end_hash = hash(expression) if start_hash == end_hash: break return expression def tsort(dag: t.Dict[T, t.Set[T]]) -> t.List[T]: """ Sorts a given directed acyclic graph in topological order. Args: dag: The graph to be sorted. Returns: A list that contains all of the graph's nodes in topological order. """ result = [] for node, deps in tuple(dag.items()): for dep in deps: if dep not in dag: dag[dep] = set() while dag: current = {node for node, deps in dag.items() if not deps} if not current: raise ValueError("Cycle error") for node in current: dag.pop(node) for deps in dag.values(): deps -= current result.extend(sorted(current)) # type: ignore return result def find_new_name(taken: Collection[str], base: str) -> str: """ Searches for a new name. Args: taken: A collection of taken names. base: Base name to alter. Returns: The new, available name. """ if base not in taken: return base i = 2 new = f"{base}_{i}" while new in taken: i += 1 new = f"{base}_{i}" return new def is_int(text: str) -> bool: return is_type(text, int) def is_float(text: str) -> bool: return is_type(text, float) def is_type(text: str, target_type: Type) -> bool: try: target_type(text) return True except ValueError: return False def name_sequence(prefix: str) -> t.Callable[[], str]: """Returns a name generator given a prefix (e.g. a0, a1, a2, ... if the prefix is "a").""" sequence = count() return lambda: f"{prefix}{next(sequence)}" def object_to_dict(obj: t.Any, **kwargs) -> t.Dict: """Returns a dictionary created from an object's attributes.""" return { **{k: v.copy() if hasattr(v, "copy") else copy(v) for k, v in vars(obj).items()}, **kwargs, } def split_num_words( value: str, sep: str, min_num_words: int, fill_from_start: bool = True ) -> t.List[t.Optional[str]]: """ Perform a split on a value and return N words as a result with `None` used for words that don't exist. Args: value: The value to be split. sep: The value to use to split on. min_num_words: The minimum number of words that are going to be in the result. fill_from_start: Indicates that if `None` values should be inserted at the start or end of the list. Examples: >>> split_num_words("db.table", ".", 3) [None, 'db', 'table'] >>> split_num_words("db.table", ".", 3, fill_from_start=False) ['db', 'table', None] >>> split_num_words("db.table", ".", 1) ['db', 'table'] Returns: The list of words returned by `split`, possibly augmented by a number of `None` values. """ words = value.split(sep) if fill_from_start: return [None] * (min_num_words - len(words)) + words return words + [None] * (min_num_words - len(words)) def is_iterable(value: t.Any) -> bool: """ Checks if the value is an iterable, excluding the types `str` and `bytes`. Examples: >>> is_iterable([1,2]) True >>> is_iterable("test") False Args: value: The value to check if it is an iterable. Returns: A `bool` value indicating if it is an iterable. """ from sqlglot.expressions import Expr return hasattr(value, "__iter__") and not isinstance(value, (str, bytes, Expr)) def flatten(values: Iterable[Iterable[t.Any] | t.Any]) -> Iterator[t.Any]: """ Flattens an iterable that can contain both iterable and non-iterable elements. Objects of type `str` and `bytes` are not regarded as iterables. Examples: >>> list(flatten([[1, 2], 3, {4}, (5, "bla")])) [1, 2, 3, 4, 5, 'bla'] >>> list(flatten([1, 2, 3])) [1, 2, 3] Args: values: The value to be flattened. Yields: Non-iterable elements in `values`. """ for value in values: if is_iterable(value): yield from flatten(value) else: yield value def dict_depth(d: t.Any) -> int: """ Get the nesting depth of a dictionary. Example: >>> dict_depth(None) 0 >>> dict_depth({}) 1 >>> dict_depth({"a": "b"}) 1 >>> dict_depth({"a": {}}) 2 >>> dict_depth({"a": {"b": {}}}) 3 """ try: return 1 + dict_depth(next(iter(d.values()))) except AttributeError: # d doesn't have attribute "values" return 0 except StopIteration: # d.values() returns an empty sequence return 1 def first(it: Iterable[T]) -> T: """Returns the first element from an iterable (useful for sets).""" return next(i for i in it) def to_bool(value: t.Optional[str | bool]) -> t.Optional[str | bool]: if isinstance(value, bool) or value is None: return value # Coerce the value to boolean if it matches to the truthy/falsy values below value_lower = value.lower() if value_lower in ("true", "1"): return True if value_lower in ("false", "0"): return False return value def merge_ranges(ranges: t.List[t.Tuple[t.Any, t.Any]]) -> t.List[t.Tuple[t.Any, t.Any]]: """ Merges a sequence of ranges, represented as tuples (low, high) whose values belong to some totally-ordered set. Example: >>> merge_ranges([(1, 3), (2, 6)]) [(1, 6)] """ if not ranges: return [] ranges = sorted(ranges) merged = [ranges[0]] for start, end in ranges[1:]: last_start, last_end = merged[-1] if start <= last_end: merged[-1] = (last_start, max(last_end, end)) else: merged.append((start, end)) return merged def is_iso_date(text: str) -> bool: try: datetime.date.fromisoformat(text) return True except ValueError: return False def is_iso_datetime(text: str) -> bool: try: datetime.datetime.fromisoformat(text) return True except ValueError: return False # Interval units that operate on date components DATE_UNITS = {"day", "week", "month", "quarter", "year", "year_month"} def is_date_unit(expression: t.Optional[Expr]) -> bool: return expression is not None and expression.name.lower() in DATE_UNITS K = t.TypeVar("K") V = t.TypeVar("V") class SingleValuedMapping(Mapping[K, V]): """ Mapping where all keys return the same value. This rigamarole is meant to avoid copying keys, which was originally intended as an optimization while qualifying columns for tables with lots of columns. """ def __init__(self, keys: Collection[K], value: V): self._keys = keys if isinstance(keys, Set) else set(keys) self._value = value def __getitem__(self, key: K) -> V: if key in self._keys: return self._value raise KeyError(key) def __len__(self) -> int: return len(self._keys) def __iter__(self) -> Iterator[K]: return iter(self._keys) ================================================ FILE: sqlglot/jsonpath.py ================================================ from __future__ import annotations import typing as t import sqlglot.expressions as exp from sqlglot.errors import ParseError from sqlglot.tokens import Token, Tokenizer, TokenType if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType from collections.abc import Collection class JSONPathTokenizer(Tokenizer): SINGLE_TOKENS = { "(": TokenType.L_PAREN, ")": TokenType.R_PAREN, "[": TokenType.L_BRACKET, "]": TokenType.R_BRACKET, ":": TokenType.COLON, ",": TokenType.COMMA, "-": TokenType.DASH, ".": TokenType.DOT, "?": TokenType.PLACEHOLDER, "@": TokenType.PARAMETER, "'": TokenType.QUOTE, '"': TokenType.QUOTE, "$": TokenType.DOLLAR, "*": TokenType.STAR, } KEYWORDS = { "..": TokenType.DOT, } IDENTIFIER_ESCAPES = ["\\"] STRING_ESCAPES = ["\\"] VAR_TOKENS = { TokenType.VAR, } def parse(path: str, dialect: DialectType = None) -> exp.JSONPath: """Takes in a JSON path string and parses it into a JSONPath expression.""" from sqlglot.dialects import Dialect jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer() tokens = jsonpath_tokenizer.tokenize(path) size = len(tokens) i = 0 def _curr() -> t.Optional[TokenType]: return tokens[i].token_type if i < size else None def _prev() -> Token: return tokens[i - 1] def _advance() -> Token: nonlocal i i += 1 return _prev() def _error(msg: str) -> str: return f"{msg} at index {i}: {path}" @t.overload def _match(token_type: TokenType, raise_unmatched: t.Literal[True] = True) -> Token: pass @t.overload def _match( token_type: TokenType, raise_unmatched: t.Literal[False] = False ) -> t.Optional[Token]: pass def _match(token_type, raise_unmatched=False): if _curr() == token_type: return _advance() if raise_unmatched: raise ParseError(_error(f"Expected {token_type}")) return None def _match_set(types: Collection[TokenType]) -> t.Optional[Token]: return _advance() if _curr() in types else None def _parse_literal() -> t.Any: token = _match(TokenType.STRING) or _match(TokenType.IDENTIFIER) if token: return token.text if _match(TokenType.STAR): return exp.JSONPathWildcard() if _match(TokenType.PLACEHOLDER) or _match(TokenType.L_PAREN): script = _prev().text == "(" start = i while True: if _match(TokenType.L_BRACKET): _parse_bracket() # nested call which we can throw away if _curr() in (TokenType.R_BRACKET, None): break _advance() expr_type = exp.JSONPathScript if script else exp.JSONPathFilter return expr_type(this=path[tokens[start].start : tokens[i].end]) number = "-" if _match(TokenType.DASH) else "" token = _match(TokenType.NUMBER) if token: number += token.text if number: return int(number) return False def _parse_slice() -> t.Any: start = _parse_literal() end = _parse_literal() if _match(TokenType.COLON) else None step = _parse_literal() if _match(TokenType.COLON) else None if end is None and step is None: return start return exp.JSONPathSlice(start=start, end=end, step=step) def _parse_bracket() -> exp.JSONPathPart: literal = _parse_slice() if isinstance(literal, str) or literal is not False: indexes = [literal] while _match(TokenType.COMMA): literal = _parse_slice() if literal: indexes.append(literal) if len(indexes) == 1: if isinstance(literal, str): node: exp.JSONPathPart = exp.JSONPathKey(this=indexes[0]) elif isinstance(literal, exp.JSONPathPart) and isinstance( literal, (exp.JSONPathScript, exp.JSONPathFilter) ): node = exp.JSONPathSelector(this=indexes[0]) else: node = exp.JSONPathSubscript(this=indexes[0]) else: node = exp.JSONPathUnion(expressions=indexes) else: raise ParseError(_error("Cannot have empty segment")) _match(TokenType.R_BRACKET, raise_unmatched=True) return node def _parse_var_text() -> str: """ Consumes & returns the text for a var. In BigQuery it's valid to have a key with spaces in it, e.g JSON_QUERY(..., '$. a b c ') should produce a single JSONPathKey(' a b c '). This is done by merging "consecutive" vars until a key separator is found (dot, colon etc) or the path string is exhausted. """ prev_index = i - 2 while _match_set(jsonpath_tokenizer.VAR_TOKENS): pass start = 0 if prev_index < 0 else tokens[prev_index].end + 1 if i >= len(tokens): # This key is the last token for the path, so it's text is the remaining path text = path[start:] else: text = path[start : tokens[i].start] return text # We canonicalize the JSON path AST so that it always starts with a # "root" element, so paths like "field" will be generated as "$.field" _match(TokenType.DOLLAR) expressions: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()] while _curr(): if _match(TokenType.DOT) or _match(TokenType.COLON): recursive = _prev().text == ".." if _match_set(jsonpath_tokenizer.VAR_TOKENS): value: t.Optional[str | exp.JSONPathWildcard] = _parse_var_text() elif _match(TokenType.IDENTIFIER): value = _prev().text elif _match(TokenType.STAR): value = exp.JSONPathWildcard() else: value = None if recursive: expressions.append(exp.JSONPathRecursive(this=value)) elif value: expressions.append(exp.JSONPathKey(this=value)) else: raise ParseError(_error("Expected key name or * after DOT")) elif _match(TokenType.L_BRACKET): expressions.append(_parse_bracket()) elif _match_set(jsonpath_tokenizer.VAR_TOKENS): expressions.append(exp.JSONPathKey(this=_parse_var_text())) elif _match(TokenType.IDENTIFIER): expressions.append(exp.JSONPathKey(this=_prev().text)) elif _match(TokenType.STAR): expressions.append(exp.JSONPathWildcard()) else: raise ParseError(_error(f"Unexpected {tokens[i].token_type}")) return exp.JSONPath(expressions=expressions) JSON_PATH_PART_TRANSFORMS: t.Dict[t.Type[exp.Expr], t.Callable[..., str]] = { exp.JSONPathFilter: lambda _, e: f"?{e.this}", exp.JSONPathKey: lambda self, e: self._jsonpathkey_sql(e), exp.JSONPathRecursive: lambda _, e: f"..{e.this or ''}", exp.JSONPathRoot: lambda *_: "$", exp.JSONPathScript: lambda _, e: f"({e.this}", exp.JSONPathSelector: lambda self, e: f"[{self.json_path_part(e.this)}]", exp.JSONPathSlice: lambda self, e: ":".join( "" if p is False else self.json_path_part(p) for p in [e.args.get("start"), e.args.get("end"), e.args.get("step")] if p is not None ), exp.JSONPathSubscript: lambda self, e: self._jsonpathsubscript_sql(e), exp.JSONPathUnion: lambda self, e: ( f"[{','.join(self.json_path_part(p) for p in e.expressions)}]" ), exp.JSONPathWildcard: lambda *_: "*", } ALL_JSON_PATH_PARTS = set(JSON_PATH_PART_TRANSFORMS) ================================================ FILE: sqlglot/lineage.py ================================================ from __future__ import annotations import json import logging import typing as t from dataclasses import dataclass, field from sqlglot import Schema, exp, maybe_parse from sqlglot.errors import SqlglotError from sqlglot.optimizer import Scope, build_scope, find_all_in_scope, normalize_identifiers, qualify from sqlglot.optimizer.scope import ScopeType if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType from collections.abc import Iterator, Mapping, Sequence logger = logging.getLogger("sqlglot") @dataclass(frozen=True) class Node: name: str expression: exp.Expr source: exp.Expr downstream: list[Node] = field(default_factory=list) source_name: str = "" reference_node_name: str = "" def walk(self) -> Iterator[Node]: visited: set[int] = set() queue = [self] while queue: node = queue.pop() node_id = id(node) if node_id in visited: continue visited.add(node_id) yield node queue.extend(reversed(node.downstream)) def to_html(self, dialect: DialectType = None, **opts) -> GraphHTML: nodes = {} edges = [] for node in self.walk(): if isinstance(node.expression, exp.Table): label = f"FROM {node.expression.this}" title = f"
SELECT {node.name} FROM {node.expression.this}
" group = 1 else: label = node.expression.sql(pretty=True, dialect=dialect) source = node.source.transform( lambda n: ( exp.Tag(this=n, prefix="", postfix="") if n is node.expression else n ), copy=False, ).sql(pretty=True, dialect=dialect) title = f"
{source}
" group = 0 node_id = id(node) nodes[node_id] = { "id": node_id, "label": label, "title": title, "group": group, } for d in node.downstream: edges.append({"from": node_id, "to": id(d)}) return GraphHTML(nodes, edges, **opts) def lineage( column: str | exp.Column, sql: str | exp.Expr, schema: t.Optional[dict | Schema] = None, sources: t.Optional[Mapping[str, str | exp.Query]] = None, dialect: DialectType = None, scope: t.Optional[Scope] = None, trim_selects: bool = True, copy: bool = True, **kwargs, ) -> Node: """Build the lineage graph for a column of a SQL query. Args: column: The column to build the lineage for. sql: The SQL string or expression. schema: The schema of tables. sources: A mapping of queries which will be used to continue building lineage. dialect: The dialect of input SQL. scope: A pre-created scope to use instead. trim_selects: Whether to clean up selects by trimming to only relevant columns. copy: Whether to copy the Expr arguments. **kwargs: Qualification optimizer kwargs. Returns: A lineage node. """ expression = maybe_parse(sql, copy=copy, dialect=dialect) column = normalize_identifiers.normalize_identifiers(column, dialect=dialect).name if sources: expression = exp.expand( expression, { k: t.cast(exp.Query, maybe_parse(v, copy=copy, dialect=dialect)) for k, v in sources.items() }, dialect=dialect, copy=copy, ) if not scope: expression = qualify.qualify( expression, dialect=dialect, schema=schema, **{"validate_qualify_columns": False, "identify": False, **kwargs}, # type: ignore ) scope = build_scope(expression) if not scope: raise SqlglotError("Cannot build lineage, sql must be SELECT") selectable = scope.expression if not isinstance(selectable, exp.Selectable) or not any( select.alias_or_name == column for select in selectable.selects ): raise SqlglotError(f"Cannot find column '{column}' in query.") return to_node(column, scope, dialect, trim_selects=trim_selects, _cache={}) def to_node( column: str | int, scope: Scope, dialect: DialectType, scope_name: t.Optional[str] = None, upstream: t.Optional[Node] = None, source_name: t.Optional[str] = None, reference_node_name: t.Optional[str] = None, trim_selects: bool = True, _cache: t.Optional[t.Dict[t.Tuple, Node]] = None, ) -> Node: cache_key = (column, id(scope), scope_name, source_name, reference_node_name) if _cache is not None and cache_key in _cache: cached_node = _cache[cache_key] if upstream: upstream.downstream.append(cached_node) return cached_node # Find the specific select clause that is the source of the column we want. # This can either be a specific, named select or a generic `*` clause. selectable = t.cast(exp.Selectable, scope.expression) if isinstance(column, int): if column >= len(selectable.selects): raise SqlglotError( f"Cannot find column's source with index {column} in query: {selectable.sql(dialect=dialect)}" ) select = selectable.selects[column] else: select = next( (select for select in selectable.selects if select.alias_or_name == column), exp.Star() if selectable.is_star else scope.expression, ) if isinstance(scope.expression, exp.Subquery): for inner_scope in scope.subquery_scopes: result = to_node( column, scope=inner_scope, dialect=dialect, upstream=upstream, source_name=source_name, reference_node_name=reference_node_name, trim_selects=trim_selects, _cache=_cache, ) if _cache is not None: _cache[cache_key] = result return result if isinstance(scope.expression, exp.SetOperation): name = type(scope.expression).__name__.upper() upstream = upstream or Node(name=name, source=scope.expression, expression=select) index = ( column if isinstance(column, int) else next( ( i for i, select in enumerate(selectable.selects) if select.alias_or_name == column or select.is_star ), -1, # mypy will not allow a None here, but a negative index should never be returned ) ) if index == -1: raise ValueError(f"Could not find {column} in {scope.expression}") for s in scope.union_scopes: to_node( index, scope=s, dialect=dialect, upstream=upstream, source_name=source_name, reference_node_name=reference_node_name, trim_selects=trim_selects, _cache=_cache, ) if _cache is not None: _cache[cache_key] = upstream return upstream if trim_selects and isinstance(scope.expression, exp.Select): # For better ergonomics in our node labels, replace the full select with # a version that has only the column we care about. # "x", SELECT x, y FROM foo # => "x", SELECT x FROM foo source: exp.Expr = scope.expression.select(select, append=False) else: source = scope.expression # Create the node for this step in the lineage chain, and attach it to the previous one. node = Node( name=f"{scope_name}.{column}" if scope_name else str(column), source=source, expression=select, source_name=source_name or "", reference_node_name=reference_node_name or "", ) if upstream: upstream.downstream.append(node) subquery_scopes = { id(subquery_scope.expression): subquery_scope for subquery_scope in scope.subquery_scopes } for subquery in find_all_in_scope(select, *exp.UNWRAPPED_QUERIES): subquery_scope: t.Optional[Scope] = subquery_scopes.get(id(subquery)) if not subquery_scope: logger.warning(f"Unknown subquery scope: {subquery.sql(dialect=dialect)}") continue for name in subquery.named_selects: to_node( name, scope=subquery_scope, dialect=dialect, upstream=node, trim_selects=trim_selects, _cache=_cache, ) # if the select is a star add all scope sources as downstreams if isinstance(select, exp.Star): for src in scope.sources.values(): src_expr = src.expression if isinstance(src, Scope) else src node.downstream.append( Node(name=select.sql(comments=False), source=src_expr, expression=src_expr) ) # Find all columns that went into creating this one to list their lineage nodes. source_columns = set(find_all_in_scope(select, exp.Column)) # If the source is a UDTF find columns used in the UDTF to generate the table if isinstance(source, exp.UDTF): source_columns |= set(source.find_all(exp.Column)) derived_tables: Sequence[exp.Expr] = [ src.expression.parent for src in scope.sources.values() if isinstance(src, Scope) and src.is_derived_table and src.expression.parent ] else: derived_tables = scope.derived_tables source_names = { dt.alias: dt.comments[0].split()[1] for dt in derived_tables if dt.comments and dt.comments[0].startswith("source: ") } pivots = scope.pivots pivot = pivots[0] if len(pivots) == 1 and not pivots[0].unpivot else None if pivot: # For each aggregation function, the pivot creates a new column for each field in category # combined with the aggfunc. So the columns parsed have this order: cat_a_value_sum, cat_a, # b_value_sum, b. Because of this step wise manner the aggfunc 'sum(value) as value_sum' # belongs to the column indices 0, 2, and the aggfunc 'max(price)' without an alias belongs # to the column indices 1, 3. Here, only the columns used in the aggregations are of interest # in the lineage, so lookup the pivot column name by index and map that with the columns used # in the aggregation. # # Example: PIVOT (SUM(value) AS value_sum, MAX(price)) FOR category IN ('a' AS cat_a, 'b') pivot_columns = pivot.args["columns"] pivot_aggs_count = len(pivot.expressions) pivot_column_mapping = {} for i, agg in enumerate(pivot.expressions): agg_cols = list(agg.find_all(exp.Column)) for col_index in range(i, len(pivot_columns), pivot_aggs_count): pivot_column_mapping[pivot_columns[col_index].name] = agg_cols for c in source_columns: table = c.table col_source: t.Optional[exp.Table | Scope] = scope.sources.get(table) if isinstance(col_source, Scope): reference_node_name = None if col_source.scope_type == ScopeType.DERIVED_TABLE and table not in source_names: reference_node_name = table elif col_source.scope_type == ScopeType.CTE: selected_node, _ = scope.selected_sources.get(table, (None, None)) reference_node_name = selected_node.name if selected_node else None # The table itself came from a more specific scope. Recurse into that one using the unaliased column name. to_node( c.name, scope=col_source, dialect=dialect, scope_name=table, upstream=node, source_name=source_names.get(table) or source_name, reference_node_name=reference_node_name, trim_selects=trim_selects, _cache=_cache, ) elif pivot and pivot.alias_or_name == c.table: downstream_columns = [] column_name = c.name if any(column_name == pivot_column.name for pivot_column in pivot_columns): downstream_columns.extend(pivot_column_mapping[column_name]) else: # The column is not in the pivot, so it must be an implicit column of the # pivoted source -- adapt column to be from the implicit pivoted source. pivot_parent = pivot.parent downstream_columns.append( exp.column(c.this, table=pivot_parent.alias_or_name if pivot_parent else "") ) for downstream_column in downstream_columns: table = downstream_column.table col_source = scope.sources.get(table) if isinstance(col_source, Scope): to_node( downstream_column.name, scope=col_source, scope_name=table, dialect=dialect, upstream=node, source_name=source_names.get(table) or source_name, reference_node_name=reference_node_name, trim_selects=trim_selects, _cache=_cache, ) else: col_expr = col_source or exp.Placeholder() node.downstream.append( Node( name=downstream_column.sql(comments=False), source=col_expr, expression=col_expr, ) ) else: # The source is not a scope and the column is not in any pivot - we've reached the end # of the line. At this point, if a source is not found it means this column's lineage # is unknown. This can happen if the definition of a source used in a query is not # passed into the `sources` map. col_expr = col_source or exp.Placeholder() node.downstream.append( Node(name=c.sql(comments=False), source=col_expr, expression=col_expr) ) if _cache is not None: _cache[cache_key] = node return node class GraphHTML: """Node to HTML generator using vis.js. https://visjs.github.io/vis-network/docs/network/ """ def __init__( self, nodes: t.Dict, edges: t.List, imports: bool = True, options: t.Optional[t.Dict] = None ): self.imports = imports self.options = { "height": "500px", "width": "100%", "layout": { "hierarchical": { "enabled": True, "nodeSpacing": 200, "sortMethod": "directed", }, }, "interaction": { "dragNodes": False, "selectable": False, }, "physics": { "enabled": False, }, "edges": { "arrows": "to", }, "nodes": { "font": "20px monaco", "shape": "box", "widthConstraint": { "maximum": 300, }, }, **(options or {}), } self.nodes = nodes self.edges = edges def __str__(self): nodes = json.dumps(list(self.nodes.values())) edges = json.dumps(self.edges) options = json.dumps(self.options) imports = ( """ """ if self.imports else "" ) return f"""
{imports}
""" def _repr_html_(self) -> str: return self.__str__() ================================================ FILE: sqlglot/optimizer/__init__.py ================================================ # ruff: noqa: F401 from sqlglot.optimizer.optimizer import RULES as RULES, optimize as optimize from sqlglot.optimizer.scope import ( Scope as Scope, build_scope as build_scope, find_all_in_scope as find_all_in_scope, find_in_scope as find_in_scope, traverse_scope as traverse_scope, walk_in_scope as walk_in_scope, ) ================================================ FILE: sqlglot/optimizer/annotate_types.py ================================================ from __future__ import annotations import functools import logging import typing as t from sqlglot import exp from sqlglot.dialects.dialect import Dialect from sqlglot.helper import ( ensure_list, is_date_unit, is_iso_date, is_iso_datetime, seq_get, ) from sqlglot.optimizer.scope import Scope, traverse_scope from sqlglot.schema import MappingSchema, Schema, ensure_schema if t.TYPE_CHECKING: from sqlglot._typing import B, E BinaryCoercionFunc = t.Callable[[exp.Expr, exp.Expr], exp.DType] BinaryCoercions = t.Dict[ t.Tuple[exp.DType, exp.DType], BinaryCoercionFunc, ] from sqlglot.dialects.dialect import DialectType from sqlglot.typing import ExprMetadataType logger = logging.getLogger("sqlglot") # EXTRACT/DATE_PART specifiers that return BIGINT instead of INT BIGINT_EXTRACT_DATE_PARTS = { "EPOCH_SECOND", "EPOCH_MILLISECOND", "EPOCH_MICROSECOND", "EPOCH_NANOSECOND", "NANOSECOND", } def annotate_types( expression: E, schema: t.Optional[t.Dict | Schema] = None, expression_metadata: t.Optional[ExprMetadataType] = None, coerces_to: t.Optional[t.Dict[exp.DType, t.Set[exp.DType]]] = None, dialect: DialectType = None, overwrite_types: bool = True, ) -> E: """ Infers the types of an expression, annotating its AST accordingly. Example: >>> import sqlglot >>> schema = {"y": {"cola": "SMALLINT"}} >>> sql = "SELECT x.cola + 2.5 AS cola FROM (SELECT y.cola AS cola FROM y AS y) AS x" >>> annotated_expr = annotate_types(sqlglot.parse_one(sql), schema=schema) >>> annotated_expr.expressions[0].type.this # Get the type of "x.cola + 2.5 AS cola" Args: expression: Expr to annotate. schema: Database schema. expression_metadata: Maps expression type to corresponding annotation function. coerces_to: Maps expression type to set of types that it can be coerced into. overwrite_types: Re-annotate the existing AST types. Returns: The expression annotated with types. """ schema = ensure_schema(schema, dialect=dialect) return TypeAnnotator( schema=schema, expression_metadata=expression_metadata, coerces_to=coerces_to, overwrite_types=overwrite_types, ).annotate(expression) def _coerce_date_literal(l: exp.Expr, unit: t.Optional[exp.Expr]) -> exp.DType: date_text = l.name is_iso_date_ = is_iso_date(date_text) if is_iso_date_ and is_date_unit(unit): return exp.DType.DATE # An ISO date is also an ISO datetime, but not vice versa if is_iso_date_ or is_iso_datetime(date_text): return exp.DType.DATETIME return exp.DType.UNKNOWN def _coerce_date(l: exp.Expr, unit: t.Optional[exp.Expr]) -> exp.DType: if not is_date_unit(unit): return exp.DType.DATETIME return l.type.this if l.type else exp.DType.UNKNOWN def swap_args(func: BinaryCoercionFunc) -> BinaryCoercionFunc: @functools.wraps(func) def _swapped(l: exp.Expr, r: exp.Expr) -> exp.DType: return func(r, l) return _swapped def swap_all(coercions: BinaryCoercions) -> BinaryCoercions: return {**coercions, **{(b, a): swap_args(func) for (a, b), func in coercions.items()}} class _TypeAnnotator(type): def __new__(cls, clsname, bases, attrs): klass = super().__new__(cls, clsname, bases, attrs) # Highest-to-lowest type precedence, as specified in Spark's docs (ANSI): # https://spark.apache.org/docs/3.2.0/sql-ref-ansi-compliance.html text_precedence = ( exp.DType.TEXT, exp.DType.NVARCHAR, exp.DType.VARCHAR, exp.DType.NCHAR, exp.DType.CHAR, ) numeric_precedence = ( exp.DType.DECFLOAT, exp.DType.DOUBLE, exp.DType.FLOAT, exp.DType.BIGDECIMAL, exp.DType.DECIMAL, exp.DType.BIGINT, exp.DType.INT, exp.DType.SMALLINT, exp.DType.TINYINT, ) timelike_precedence = ( exp.DType.TIMESTAMPLTZ, exp.DType.TIMESTAMPTZ, exp.DType.TIMESTAMP, exp.DType.DATETIME, exp.DType.DATE, ) for type_precedence in (text_precedence, numeric_precedence, timelike_precedence): coerces_to = set() for data_type in type_precedence: klass.COERCES_TO[data_type] = coerces_to.copy() coerces_to |= {data_type} return klass class TypeAnnotator(metaclass=_TypeAnnotator): NESTED_TYPES = { exp.DType.ARRAY, } # Specifies what types a given type can be coerced into (autofilled) COERCES_TO: t.Dict[exp.DType, t.Set[exp.DType]] = {} # Coercion functions for binary operations. # Map of type pairs to a callable that takes both sides of the binary operation and returns the resulting type. BINARY_COERCIONS: BinaryCoercions = { **swap_all( { (t, exp.DType.INTERVAL): lambda l, r: _coerce_date_literal(l, r.args.get("unit")) for t in exp.DataType.TEXT_TYPES } ), **swap_all( { # text + numeric will yield the numeric type to match most dialects' semantics (text, numeric): lambda l, r: t.cast( exp.DType, l.type if l.type in exp.DataType.NUMERIC_TYPES else r.type ) for text in exp.DataType.TEXT_TYPES for numeric in exp.DataType.NUMERIC_TYPES } ), **swap_all( { (exp.DType.DATE, exp.DType.INTERVAL): lambda l, r: _coerce_date( l, r.args.get("unit") ), } ), } def __init__( self, schema: Schema, expression_metadata: t.Optional[ExprMetadataType] = None, coerces_to: t.Optional[t.Dict[exp.DType, t.Set[exp.DType]]] = None, binary_coercions: t.Optional[BinaryCoercions] = None, overwrite_types: bool = True, ) -> None: self.schema = schema dialect = schema.dialect or Dialect() self.dialect = dialect self.expression_metadata = expression_metadata or dialect.EXPRESSION_METADATA self.coerces_to = coerces_to or dialect.COERCES_TO or self.COERCES_TO self.binary_coercions = binary_coercions or self.BINARY_COERCIONS # Caches the ids of annotated sub-Exprs, to ensure we only visit them once self._visited: t.Set[int] = set() # Caches NULL-annotated expressions to set them to UNKNOWN after type inference is completed self._null_expressions: t.Dict[int, exp.Expr] = {} # Databricks and Spark ≥v3 actually support NULL (i.e., VOID) as a type self._supports_null_type = dialect.SUPPORTS_NULL_TYPE # Maps an exp.SetOperation's id (e.g. UNION) to its projection types. This is computed if the # exp.SetOperation is the expression of a scope source, as selecting from it multiple times # would reprocess the entire subtree to coerce the types of its operands' projections self._setop_column_types: t.Dict[int, t.Dict[str, exp.DataType | exp.DType]] = {} # When set to False, this enables partial annotation by skipping already-annotated nodes self._overwrite_types = overwrite_types # Maps Scope to its corresponding selected sources self._scope_selects: t.Dict[Scope, t.Dict[str, t.Dict[str, t.Any]]] = {} def clear(self) -> None: self._visited.clear() self._null_expressions.clear() self._setop_column_types.clear() self._scope_selects.clear() def _set_type(self, expression: E, target_type: t.Optional[exp.DataType | exp.DType]) -> E: prev_type = expression.type expression_id = id(expression) expression.type = target_type or exp.DType.UNKNOWN # type: ignore self._visited.add(expression_id) if ( not self._supports_null_type and t.cast(exp.DataType, expression.type).this == exp.DType.NULL ): self._null_expressions[expression_id] = expression elif prev_type and t.cast(exp.DataType, prev_type).this == exp.DType.NULL: self._null_expressions.pop(expression_id, None) return expression def annotate(self, expression: E, annotate_scope: bool = True) -> E: # This flag is used to avoid costly scope traversals when we only care about annotating # non-column expressions (partial type inference), e.g., when simplifying in the optimizer if annotate_scope: for scope in traverse_scope(expression): self.annotate_scope(scope) # This takes care of non-traversable expressions self._annotate_expression(expression) # Replace NULL type with the default type of the targeted dialect, since the former is not an actual type; # it is mostly used to aid type coercion, e.g. in query set operations. for expr in self._null_expressions.values(): expr.type = self.dialect.DEFAULT_NULL_TYPE return expression def _get_scope_selects(self, scope: Scope) -> t.Dict[str, t.Dict[str, t.Any]]: if scope not in self._scope_selects: selects = {} for name, source in scope.sources.items(): if not isinstance(source, Scope): continue expression = source.expression if isinstance(expression, exp.UDTF): values = [] if isinstance(expression, exp.Lateral): if isinstance(expression.this, exp.Explode): values = [expression.this.this] elif isinstance(expression, exp.Unnest): values = [expression] elif not isinstance(expression, exp.TableFromRows): values = expression.expressions[0].expressions if not values: continue alias_column_names = expression.alias_column_names if ( isinstance(expression, exp.Unnest) and expression.type and expression.type.is_type(exp.DType.STRUCT) ): selects[name] = { col_def.name: t.cast(t.Union[exp.DataType, exp.DType], col_def.kind) for col_def in expression.type.expressions if isinstance(col_def, exp.ColumnDef) and col_def.kind } else: selects[name] = { alias: column.type for alias, column in zip(alias_column_names, values) } elif isinstance(expression, exp.SetOperation) and len( expression.left.selects ) == len(expression.right.selects): selects[name] = self._get_setop_column_types(expression) elif isinstance(expression, exp.Selectable): selects[name] = {s.alias_or_name: s.type for s in expression.selects if s.type} self._scope_selects[scope] = selects return self._scope_selects[scope] def annotate_scope(self, scope: Scope) -> None: if isinstance(self.schema, MappingSchema): for table_column in scope.table_columns: source = scope.sources.get(table_column.name) if isinstance(source, exp.Table): schema = self.schema.find( source, raise_on_missing=False, ensure_data_types=True ) if not isinstance(schema, dict): continue struct_type = exp.DataType( this=exp.DType.STRUCT, expressions=[ exp.ColumnDef(this=exp.to_identifier(c), kind=kind) for c, kind in schema.items() ], nested=True, ) self._set_type(table_column, struct_type) elif ( isinstance(source, Scope) and isinstance(source.expression, exp.Query) and ( source.expression.meta.get("query_type") or exp.DataType.build("UNKNOWN") ).is_type(exp.DType.STRUCT) ): self._set_type(table_column, source.expression.meta["query_type"]) # Iterate through all the expressions of the current scope in post-order, and annotate self._annotate_expression(scope.expression, scope) if self.dialect.QUERY_RESULTS_ARE_STRUCTS and isinstance(scope.expression, exp.Query): struct_type = exp.DataType( this=exp.DType.STRUCT, expressions=[ exp.ColumnDef( this=exp.to_identifier(select.output_name), kind=select.type.copy() if select.type else None, ) for select in scope.expression.selects ], nested=True, ) if not any( cd.kind.is_type(exp.DType.UNKNOWN) for cd in struct_type.expressions if cd.kind ): # We don't use `_set_type` on purpose here. If we annotated the query directly, then # using it in other contexts (e.g., ARRAY()) could result in incorrect type # annotations, i.e., it shouldn't be interpreted as a STRUCT value. scope.expression.meta["query_type"] = struct_type def _annotate_expression( self, expression: exp.Expr, scope: t.Optional[Scope] = None, ) -> None: stack = [(expression, False)] while stack: expr, children_annotated = stack.pop() if id(expr) in self._visited or ( not self._overwrite_types and expr.type and not expr.is_type(exp.DType.UNKNOWN) ): continue # We've already inferred the expression's type if not children_annotated: stack.append((expr, True)) for child_expr in expr.iter_expressions(): stack.append((child_expr, False)) continue if scope and isinstance(expr, exp.Column) and expr.table: source = None source_scope: t.Optional[Scope] = scope while source_scope and not source: source = source_scope.sources.get(expr.table) if not source: source_scope = source_scope.parent if isinstance(source, exp.Table): self._set_type(expr, self.schema.get_column_type(source, expr)) elif source and source_scope: col_type = ( self._get_scope_selects(source_scope).get(expr.table, {}).get(expr.name) ) if col_type: self._set_type(expr, col_type) elif isinstance(source.expression, exp.Unnest): self._set_type(expr, source.expression.type) else: self._set_type(expr, exp.DType.UNKNOWN) else: self._set_type(expr, exp.DType.UNKNOWN) if expr.is_type(exp.DType.JSON) and (dot_parts := expr.meta.get("dot_parts")): # JSON dot access is case sensitive across all dialects, so we need to undo the normalization. i = iter(dot_parts) parent = expr.parent while isinstance(parent, exp.Dot): parent.expression.replace(exp.to_identifier(next(i), quoted=True)) parent = parent.parent expr.meta.pop("dot_parts", None) if expr.type and expr.type.args.get("nullable") is False: expr.meta["nonnull"] = True continue spec = self.expression_metadata.get(expr.__class__) if spec and (annotator := spec.get("annotator")): annotator(self, expr) elif spec and (returns := spec.get("returns")): self._set_type(expr, t.cast(exp.DType, returns)) else: self._set_type(expr, exp.DType.UNKNOWN) def _maybe_coerce( self, type1: exp.DataType | exp.DType, type2: exp.DataType | exp.DType, ) -> exp.DataType | exp.DType: """ Returns type2 if type1 can be coerced into it, otherwise type1. If either type is parameterized (e.g. DECIMAL(18, 2) contains two parameters), we assume type1 does not coerce into type2, so we also return it in this case. """ if isinstance(type1, exp.DataType): if type1.expressions: return type1 type1_value = type1.this else: type1_value = type1 if isinstance(type2, exp.DataType): if type2.expressions: return type2 type2_value = type2.this else: type2_value = type2 # We propagate the UNKNOWN type upwards if found if exp.DType.UNKNOWN in (type1_value, type2_value): return exp.DType.UNKNOWN if type1_value == exp.DType.NULL: return type2_value if type2_value == exp.DType.NULL: return type1_value return type2_value if type2_value in self.coerces_to.get(type1_value, {}) else type1_value def _get_setop_column_types( self, setop: exp.SetOperation ) -> t.Dict[str, exp.DataType | exp.DType]: """ Computes and returns the coerced column types for a SetOperation. This handles UNION, INTERSECT, EXCEPT, etc., coercing types across left and right operands for all projections/columns. Args: setop: The SetOperation expression to analyze Returns: Dictionary mapping column names to their coerced types """ setop_id = id(setop) if setop_id in self._setop_column_types: return self._setop_column_types[setop_id] col_types: t.Dict[str, exp.DataType | exp.DType] = {} # Validate that left and right have same number of projections if not ( isinstance(setop, exp.SetOperation) and setop.left.selects and setop.right.selects and len(setop.left.selects) == len(setop.right.selects) ): return col_types # Process a chain / sub-tree of set operations for set_op in setop.walk( prune=lambda n: not isinstance(n, (exp.SetOperation, exp.Subquery)) ): if not isinstance(set_op, exp.SetOperation): continue if set_op.args.get("by_name"): r_type_by_select = {s.alias_or_name: s.type for s in set_op.right.selects} setop_cols = { s.alias_or_name: self._maybe_coerce( t.cast(exp.DataType, s.type), r_type_by_select.get(s.alias_or_name) or exp.DType.UNKNOWN, ) for s in set_op.left.selects } else: setop_cols = { ls.alias_or_name: self._maybe_coerce( t.cast(exp.DataType, ls.type), t.cast(exp.DataType, rs.type) ) for ls, rs in zip(set_op.left.selects, set_op.right.selects) } # Coerce intermediate results with the previously registered types, if they exist for col_name, col_type in setop_cols.items(): col_types[col_name] = self._maybe_coerce( col_type, col_types.get(col_name, exp.DType.NULL) ) self._setop_column_types[setop_id] = col_types return col_types def _annotate_binary(self, expression: B) -> B: left, right = expression.left, expression.right if not left or not right: expression_sql = expression.sql(self.dialect) logger.warning(f"Failed to annotate badly formed binary expression: {expression_sql}") self._set_type(expression, None) return expression left_type, right_type = left.type.this, right.type.this # type: ignore if isinstance(expression, (exp.Connector, exp.Predicate)): self._set_type(expression, exp.DType.BOOLEAN) elif (left_type, right_type) in self.binary_coercions: self._set_type(expression, self.binary_coercions[(left_type, right_type)](left, right)) else: self._annotate_by_args(expression, left, right) if isinstance(expression, exp.Is) or ( left.meta.get("nonnull") is True and right.meta.get("nonnull") is True ): expression.meta["nonnull"] = True return expression def _annotate_unary(self, expression: E) -> E: if isinstance(expression, exp.Not): self._set_type(expression, exp.DType.BOOLEAN) else: self._set_type(expression, expression.this.type) if expression.this.meta.get("nonnull") is True: expression.meta["nonnull"] = True return expression def _annotate_literal(self, expression: exp.Literal) -> exp.Literal: if expression.is_string: self._set_type(expression, exp.DType.VARCHAR) elif expression.is_int: self._set_type(expression, exp.DType.INT) else: self._set_type(expression, exp.DType.DOUBLE) expression.meta["nonnull"] = True return expression @t.no_type_check def _annotate_by_args( self, expression: E, *args: str | exp.Expr, promote: bool = False, array: bool = False, ) -> E: literal_type = None non_literal_type = None nested_type = None for arg in args: if isinstance(arg, str): expressions = expression.args.get(arg) else: expressions = arg for expr in ensure_list(expressions): expr_type = expr.type # Stop at the first nested data type found - we don't want to _maybe_coerce nested types if expr_type.args.get("nested"): nested_type = expr_type break if isinstance(expr, exp.Literal): literal_type = self._maybe_coerce(literal_type or expr_type, expr_type) else: non_literal_type = self._maybe_coerce(non_literal_type or expr_type, expr_type) if nested_type: break result_type = None if nested_type: result_type = nested_type elif literal_type and non_literal_type: if self.dialect.PRIORITIZE_NON_LITERAL_TYPES: literal_this_type = ( literal_type.this if isinstance(literal_type, exp.DataType) else literal_type ) non_literal_this_type = ( non_literal_type.this if isinstance(non_literal_type, exp.DataType) else non_literal_type ) if ( literal_this_type in exp.DataType.INTEGER_TYPES and non_literal_this_type in exp.DataType.INTEGER_TYPES ) or ( literal_this_type in exp.DataType.REAL_TYPES and non_literal_this_type in exp.DataType.REAL_TYPES ): result_type = non_literal_type else: result_type = literal_type or non_literal_type or exp.DType.UNKNOWN self._set_type( expression, result_type or self._maybe_coerce(non_literal_type, literal_type) ) if promote: if expression.type.this in exp.DataType.INTEGER_TYPES: self._set_type(expression, exp.DType.BIGINT) elif expression.type.this in exp.DataType.FLOAT_TYPES: self._set_type(expression, exp.DType.DOUBLE) if array: self._set_type( expression, exp.DataType(this=exp.DType.ARRAY, expressions=[expression.type], nested=True), ) return expression def _annotate_timeunit( self, expression: exp.TimeUnit | exp.DateTrunc ) -> exp.TimeUnit | exp.DateTrunc: if expression.this.type.this in exp.DataType.TEXT_TYPES: datatype = _coerce_date_literal(expression.this, expression.unit) elif expression.this.type.this in exp.DataType.TEMPORAL_TYPES: datatype = _coerce_date(expression.this, expression.unit) else: datatype = exp.DType.UNKNOWN self._set_type(expression, datatype) return expression def _annotate_bracket(self, expression: exp.Bracket) -> exp.Bracket: bracket_arg = expression.expressions[0] this = expression.this if isinstance(bracket_arg, exp.Slice): self._set_type(expression, this.type) elif this.type.is_type(exp.DType.ARRAY): self._set_type(expression, seq_get(this.type.expressions, 0)) elif isinstance(this, (exp.Map, exp.VarMap)) and bracket_arg in this.keys: index = this.keys.index(bracket_arg) value = seq_get(this.values, index) self._set_type(expression, value.type if value else None) else: self._set_type(expression, exp.DType.UNKNOWN) return expression def _annotate_div(self, expression: exp.Div) -> exp.Div: left_type, right_type = expression.left.type.this, expression.right.type.this # type: ignore if ( expression.args.get("typed") and left_type in exp.DataType.INTEGER_TYPES and right_type in exp.DataType.INTEGER_TYPES ): self._set_type(expression, exp.DType.BIGINT) else: self._set_type(expression, self._maybe_coerce(left_type, right_type)) if expression.type and expression.type.this not in exp.DataType.REAL_TYPES: self._set_type(expression, self._maybe_coerce(expression.type, exp.DType.DOUBLE)) return expression def _annotate_dot(self, expression: exp.Dot) -> exp.Dot: self._set_type(expression, None) # Propagate type from qualified UDF calls (e.g., db.my_udf(...)) if isinstance(expression.expression, exp.Anonymous): self._set_type(expression, expression.expression.type) return expression this_type = expression.this.type if this_type and this_type.is_type(exp.DType.STRUCT): for e in this_type.expressions: if e.name == expression.expression.name: self._set_type(expression, e.kind) break return expression def _annotate_explode(self, expression: exp.Explode) -> exp.Explode: self._set_type(expression, seq_get(expression.this.type.expressions, 0)) return expression def _annotate_unnest(self, expression: exp.Unnest) -> exp.Unnest: child = seq_get(expression.expressions, 0) if child and child.is_type(exp.DType.ARRAY): expr_type = seq_get(child.type.expressions, 0) else: expr_type = None self._set_type(expression, expr_type) return expression def _annotate_subquery(self, expression: exp.Subquery) -> exp.Subquery: # For scalar subqueries (subqueries with a single projection), infer the type # from that single projection. This allows type propagation in cases like: # SELECT (SELECT 1 AS c) AS c query = expression.unnest() if isinstance(query, exp.Query): selects = query.selects if len(selects) == 1: self._set_type(expression, selects[0].type) return expression self._set_type(expression, exp.DType.UNKNOWN) return expression def _annotate_struct_value( self, expression: exp.Expr ) -> t.Optional[exp.DataType] | exp.ColumnDef: # Case: STRUCT(key AS value) this: t.Optional[exp.Expr] = None kind = expression.type if alias := expression.args.get("alias"): this = alias.copy() elif expression.expression: # Case: STRUCT(key = value) or STRUCT(key := value) this = expression.this.copy() kind = expression.expression.type elif isinstance(expression, exp.Column): # Case: STRUCT(c) this = expression.this.copy() if kind and kind.is_type(exp.DType.UNKNOWN): return None if this: return exp.ColumnDef(this=this, kind=kind) return kind def _annotate_struct(self, expression: exp.Struct) -> exp.Struct: expressions = [] for expr in expression.expressions: struct_field_type = self._annotate_struct_value(expr) if struct_field_type is None: self._set_type(expression, None) return expression expressions.append(struct_field_type) self._set_type( expression, exp.DataType(this=exp.DType.STRUCT, expressions=expressions, nested=True), ) return expression @t.overload def _annotate_map(self, expression: exp.Map) -> exp.Map: ... @t.overload def _annotate_map(self, expression: exp.VarMap) -> exp.VarMap: ... def _annotate_map(self, expression): keys = expression.args.get("keys") values = expression.args.get("values") map_type = exp.DataType(this=exp.DType.MAP) if isinstance(keys, exp.Array) and isinstance(values, exp.Array): key_type = seq_get(keys.type.expressions, 0) or exp.DType.UNKNOWN value_type = seq_get(values.type.expressions, 0) or exp.DType.UNKNOWN if key_type != exp.DType.UNKNOWN and value_type != exp.DType.UNKNOWN: map_type.set("expressions", [key_type, value_type]) map_type.set("nested", True) self._set_type(expression, map_type) return expression def _annotate_to_map(self, expression: exp.ToMap) -> exp.ToMap: map_type = exp.DataType(this=exp.DType.MAP) arg = expression.this if arg.is_type(exp.DType.STRUCT): for coldef in arg.type.expressions: kind = coldef.kind if kind != exp.DType.UNKNOWN: map_type.set("expressions", [exp.DataType.build("varchar"), kind]) map_type.set("nested", True) break self._set_type(expression, map_type) return expression def _annotate_extract(self, expression: exp.Extract) -> exp.Extract: part = expression.name if part == "TIME": self._set_type(expression, exp.DType.TIME) elif part == "DATE": self._set_type(expression, exp.DType.DATE) elif part in BIGINT_EXTRACT_DATE_PARTS: self._set_type(expression, exp.DType.BIGINT) else: self._set_type(expression, exp.DType.INT) return expression def _annotate_by_array_element(self, expression: exp.Expr) -> exp.Expr: array_arg = expression.this if array_arg.type.is_type(exp.DType.ARRAY): element_type = seq_get(array_arg.type.expressions, 0) or exp.DType.UNKNOWN self._set_type(expression, element_type) else: self._set_type(expression, exp.DType.UNKNOWN) return expression ================================================ FILE: sqlglot/optimizer/canonicalize.py ================================================ from __future__ import annotations import itertools import typing as t from sqlglot import exp from sqlglot.dialects.dialect import Dialect, DialectType from sqlglot.helper import is_date_unit, is_iso_date, is_iso_datetime from sqlglot.optimizer.annotate_types import TypeAnnotator def canonicalize(expression: exp.Expr, dialect: DialectType = None) -> exp.Expr: """Converts a sql expression into a standard form. This method relies on annotate_types because many of the conversions rely on type inference. Args: expression: The expression to canonicalize. """ _dialect = Dialect.get_or_raise(dialect) def _canonicalize(expression: exp.Expr) -> exp.Expr: if not isinstance(expression, _CANONICALIZE_TYPES): return expression expression = add_text_to_concat(expression) expression = replace_date_funcs(expression, dialect=_dialect) expression = coerce_type(expression, _dialect.PROMOTE_TO_INFERRED_DATETIME_TYPE) expression = remove_redundant_casts(expression) expression = ensure_bools(expression, _replace_int_predicate) expression = remove_ascending_order(expression) return expression return exp.replace_tree(expression, _canonicalize) COERCIBLE_DATE_OPS = ( exp.Add, exp.Sub, exp.EQ, exp.NEQ, exp.GT, exp.GTE, exp.LT, exp.LTE, exp.NullSafeEQ, exp.NullSafeNEQ, ) # All expression types that any of the canonicalize functions can act on _CANONICALIZE_TYPES = tuple( { # add_text_to_concat exp.Add, # replace_date_funcs exp.Date, exp.TsOrDsToDate, exp.Timestamp, # coerce_type (COERCIBLE_DATE_OPS + Between, Extract, DateAdd, DateSub, DateTrunc, DateDiff) *COERCIBLE_DATE_OPS, exp.Between, exp.Extract, exp.DateAdd, exp.DateSub, exp.DateTrunc, exp.DateDiff, # remove_redundant_casts exp.Cast, # ensure_bools (Connector, Not, If, Where, Having) exp.Connector, exp.Not, exp.If, exp.Where, exp.Having, # remove_ascending_order exp.Ordered, } ) def add_text_to_concat(node: exp.Expr) -> exp.Expr: if isinstance(node, exp.Add) and node.type and node.type.this in exp.DataType.TEXT_TYPES: node = exp.Concat( expressions=[node.left, node.right], # All known dialects, i.e. Redshift and T-SQL, that support # concatenating strings with the + operator do not coalesce NULLs. coalesce=False, ) return node def replace_date_funcs(node: exp.Expr, dialect: DialectType) -> exp.Expr: if ( isinstance(node, (exp.Date, exp.TsOrDsToDate)) and not node.expressions and not node.args.get("zone") and node.this.is_string and is_iso_date(node.this.name) ): return exp.cast(node.this, to=exp.DType.DATE) if isinstance(node, exp.Timestamp) and not node.args.get("zone"): if not node.type: from sqlglot.optimizer.annotate_types import annotate_types node = annotate_types(node, dialect=dialect) return exp.cast(node.this, to=node.type or exp.DType.TIMESTAMP) return node def coerce_type(node: exp.Expr, promote_to_inferred_datetime_type: bool) -> exp.Expr: if isinstance(node, COERCIBLE_DATE_OPS): _coerce_date(node.left, node.right, promote_to_inferred_datetime_type) elif isinstance(node, exp.Between): _coerce_date(node.this, node.args["low"], promote_to_inferred_datetime_type) elif isinstance(node, exp.Extract) and not node.expression.is_type( *exp.DataType.TEMPORAL_TYPES ): _replace_cast(node.expression, exp.DType.DATETIME) elif isinstance(node, (exp.DateAdd, exp.DateSub, exp.DateTrunc)): _coerce_timeunit_arg(node.this, node.unit) elif isinstance(node, exp.DateDiff): _coerce_datediff_args(node) return node def remove_redundant_casts(expression: exp.Expr) -> exp.Expr: if ( isinstance(expression, exp.Cast) and expression.this.type and expression.to == expression.this.type ): return expression.this if ( isinstance(expression, (exp.Date, exp.TsOrDsToDate)) and expression.this.type and expression.this.type.this == exp.DType.DATE and not expression.this.type.expressions ): return expression.this return expression def ensure_bools(expression: exp.Expr, replace_func: t.Callable[[exp.Expr], None]) -> exp.Expr: if isinstance(expression, exp.Connector): replace_func(expression.left) replace_func(expression.right) elif isinstance(expression, exp.Not): replace_func(expression.this) # We can't replace num in CASE x WHEN num ..., because it's not the full predicate elif isinstance(expression, exp.If) and not ( isinstance(expression.parent, exp.Case) and expression.parent.this ): replace_func(expression.this) elif isinstance(expression, (exp.Where, exp.Having)): replace_func(expression.this) return expression def remove_ascending_order(expression: exp.Expr) -> exp.Expr: if isinstance(expression, exp.Ordered) and expression.args.get("desc") is False: # Convert ORDER BY a ASC to ORDER BY a expression.set("desc", None) return expression def _coerce_date( a: exp.Expr, b: exp.Expr, promote_to_inferred_datetime_type: bool, ) -> None: for a, b in itertools.permutations([a, b]): if isinstance(b, exp.Interval): a = _coerce_timeunit_arg(a, b.unit) a_type = a.type if ( not a_type or a_type.this not in exp.DataType.TEMPORAL_TYPES or not b.type or b.type.this not in exp.DataType.TEXT_TYPES ): continue if promote_to_inferred_datetime_type: if b.is_string: date_text = b.name if is_iso_date(date_text): b_type = exp.DType.DATE elif is_iso_datetime(date_text): b_type = exp.DType.DATETIME else: b_type = a_type.this else: # If b is not a datetime string, we conservatively promote it to a DATETIME, # in order to ensure there are no surprising truncations due to downcasting b_type = exp.DType.DATETIME target_type = ( b_type if b_type in TypeAnnotator.COERCES_TO.get(a_type.this, {}) else a_type ) else: target_type = a_type if target_type != a_type: _replace_cast(a, target_type) _replace_cast(b, target_type) def _coerce_timeunit_arg(arg: exp.Expr, unit: t.Optional[exp.Expr]) -> exp.Expr: if not arg.type: return arg if arg.type.this in exp.DataType.TEXT_TYPES: date_text = arg.name is_iso_date_ = is_iso_date(date_text) if is_iso_date_ and is_date_unit(unit): return arg.replace(exp.cast(arg.copy(), to=exp.DType.DATE)) # An ISO date is also an ISO datetime, but not vice versa if is_iso_date_ or is_iso_datetime(date_text): return arg.replace(exp.cast(arg.copy(), to=exp.DType.DATETIME)) elif arg.type.this == exp.DType.DATE and not is_date_unit(unit): return arg.replace(exp.cast(arg.copy(), to=exp.DType.DATETIME)) return arg def _coerce_datediff_args(node: exp.DateDiff) -> None: for e in (node.this, node.expression): if e.type.this not in exp.DataType.TEMPORAL_TYPES: e.replace(exp.cast(e.copy(), to=exp.DType.DATETIME)) def _replace_cast(node: exp.Expr, to: exp.DATA_TYPE) -> None: node.replace(exp.cast(node.copy(), to=to)) # this was originally designed for presto, there is a similar transform for tsql # this is different in that it only operates on int types, this is because # presto has a boolean type whereas tsql doesn't (people use bits) # with y as (select true as x) select x = 0 FROM y -- illegal presto query def _replace_int_predicate(expression: exp.Expr) -> None: if isinstance(expression, exp.Coalesce): for child in expression.iter_expressions(): _replace_int_predicate(child) elif expression.type and expression.type.this in exp.DataType.INTEGER_TYPES: expression.replace(expression.neq(0)) ================================================ FILE: sqlglot/optimizer/eliminate_ctes.py ================================================ from __future__ import annotations import typing as t from sqlglot.optimizer.scope import Scope, build_scope if t.TYPE_CHECKING: from sqlglot._typing import E def eliminate_ctes(expression: E) -> E: """ Remove unused CTEs from an expression. Example: >>> import sqlglot >>> sql = "WITH y AS (SELECT a FROM x) SELECT a FROM z" >>> expression = sqlglot.parse_one(sql) >>> eliminate_ctes(expression).sql() 'SELECT a FROM z' Args: expression (sqlglot.Expr): expression to optimize Returns: sqlglot.Expr: optimized expression """ root = build_scope(expression) if root: ref_count = root.ref_count() # Traverse the scope tree in reverse so we can remove chains of unused CTEs for scope in reversed(list(root.traverse())): if scope.is_cte: count = ref_count[id(scope)] if count <= 0: cte_node = scope.expression.parent if not cte_node: continue with_node = cte_node.parent cte_node.pop() # Pop the entire WITH clause if this is the last CTE if with_node and len(with_node.expressions) <= 0: with_node.pop() # Decrement the ref count for all sources this CTE selects from for _, source in scope.selected_sources.values(): if isinstance(source, Scope): ref_count[id(source)] -= 1 return expression ================================================ FILE: sqlglot/optimizer/eliminate_joins.py ================================================ from __future__ import annotations import typing as t from sqlglot import expressions as exp from sqlglot.optimizer.normalize import normalized from sqlglot.optimizer.scope import Scope, traverse_scope if t.TYPE_CHECKING: from sqlglot._typing import E def eliminate_joins(expression: E) -> E: """ Remove unused joins from an expression. This only removes joins when we know that the join condition doesn't produce duplicate rows. Example: >>> import sqlglot >>> sql = "SELECT x.a FROM x LEFT JOIN (SELECT DISTINCT y.b FROM y) AS y ON x.b = y.b" >>> expression = sqlglot.parse_one(sql) >>> eliminate_joins(expression).sql() 'SELECT x.a FROM x' Args: expression: expression to optimize Returns: The optimized expression """ for scope in traverse_scope(expression): joins = scope.expression.args.get("joins", []) if not joins: continue # If any columns in this scope aren't qualified, it's hard to determine if a join isn't used. # It's probably possible to infer this from the outputs of derived tables. # But for now, let's just skip this rule. if scope.unqualified_columns: continue # Reverse the joins so we can remove chains of unused joins for join in reversed(joins): if join.is_semi_or_anti_join: continue alias = join.alias_or_name if _should_eliminate_join(scope, join, alias): join.pop() scope.remove_source(alias) return expression def _should_eliminate_join(scope, join, alias): inner_source = scope.sources.get(alias) return ( isinstance(inner_source, Scope) and not _join_is_used(scope, join, alias) and ( (join.side == "LEFT" and _is_joined_on_all_unique_outputs(inner_source, join)) or (not join.args.get("on") and _has_single_output_row(inner_source)) ) ) def _join_is_used(scope, join, alias): # We need to find all columns that reference this join. # But columns in the ON clause shouldn't count. on = join.args.get("on") if on: on_clause_columns = {id(column) for column in on.find_all(exp.Column)} else: on_clause_columns = set() return any( column for column in scope.source_columns(alias) if id(column) not in on_clause_columns ) def _is_joined_on_all_unique_outputs(scope, join): unique_outputs = _unique_outputs(scope) if not unique_outputs: return False _, join_keys, _ = join_condition(join) remaining_unique_outputs = unique_outputs - {c.name for c in join_keys} return not remaining_unique_outputs def _unique_outputs(scope): """Determine output columns of `scope` that must have a unique combination per row""" if scope.expression.args.get("distinct"): return set(scope.expression.named_selects) group = scope.expression.args.get("group") if group: grouped_expressions = set(group.expressions) grouped_outputs = set() unique_outputs = set() for select in scope.expression.selects: output = select.unalias() if output in grouped_expressions: grouped_outputs.add(output) unique_outputs.add(select.alias_or_name) # All the grouped expressions must be in the output if not grouped_expressions.difference(grouped_outputs): return unique_outputs else: return set() if _has_single_output_row(scope): return set(scope.expression.named_selects) return set() def _has_single_output_row(scope): return isinstance(scope.expression, exp.Select) and ( all(isinstance(e.unalias(), exp.AggFunc) for e in scope.expression.selects) or _is_limit_1(scope) or not scope.expression.args.get("from_") ) def _is_limit_1(scope): limit = scope.expression.args.get("limit") return limit and limit.expression.this == "1" def join_condition(join): """ Extract the join condition from a join expression. Args: join (exp.Join) Returns: tuple[list[str], list[str], exp.Expr]: Tuple of (source key, join key, remaining predicate) """ name = join.alias_or_name on = (join.args.get("on") or exp.true()).copy() source_key = [] join_key = [] def extract_condition(condition): left, right = condition.unnest_operands() left_tables = exp.column_table_names(left) right_tables = exp.column_table_names(right) if name in left_tables and name not in right_tables: join_key.append(left) source_key.append(right) condition.replace(exp.true()) elif name in right_tables and name not in left_tables: join_key.append(right) source_key.append(left) condition.replace(exp.true()) # find the join keys # SELECT # FROM x # JOIN y # ON x.a = y.b AND y.b > 1 # # should pull y.b as the join key and x.a as the source key if normalized(on): on = on if isinstance(on, exp.And) else exp.and_(on, exp.true(), copy=False) for condition in on.flatten(): if isinstance(condition, exp.EQ): extract_condition(condition) elif normalized(on, dnf=True): conditions = None for condition in on.flatten(): parts = [part for part in condition.flatten() if isinstance(part, exp.EQ)] if conditions is None: conditions = parts else: temp = [] for p in parts: cs = [c for c in conditions if p == c] if cs: temp.append(p) temp.extend(cs) conditions = temp for condition in conditions: extract_condition(condition) return source_key, join_key, on ================================================ FILE: sqlglot/optimizer/eliminate_subqueries.py ================================================ from __future__ import annotations import itertools import typing as t from sqlglot import expressions as exp from sqlglot.helper import find_new_name from sqlglot.optimizer.scope import Scope, build_scope if t.TYPE_CHECKING: ExistingCTEsMapping = t.Dict[exp.Expr, str] TakenNameMapping = t.Dict[str, t.Union[Scope, exp.Expr]] def eliminate_subqueries(expression: exp.Expr) -> exp.Expr: """ Rewrite derived tables as CTES, deduplicating if possible. Example: >>> import sqlglot >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT * FROM x) AS y") >>> eliminate_subqueries(expression).sql() 'WITH y AS (SELECT * FROM x) SELECT a FROM y AS y' This also deduplicates common subqueries: >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT * FROM x) AS y CROSS JOIN (SELECT * FROM x) AS z") >>> eliminate_subqueries(expression).sql() 'WITH y AS (SELECT * FROM x) SELECT a FROM y AS y CROSS JOIN y AS z' Args: expression (sqlglot.Expr): expression Returns: sqlglot.Expr: expression """ if isinstance(expression, exp.Subquery): # It's possible to have subqueries at the root, e.g. (SELECT * FROM x) LIMIT 1 eliminate_subqueries(expression.this) return expression root = build_scope(expression) if not root: return expression # Map of alias->Scope|Table # These are all aliases that are already used in the expression. # We don't want to create new CTEs that conflict with these names. taken: TakenNameMapping = {} # All CTE aliases in the root scope are taken for scope in root.cte_scopes: parent = scope.expression.parent if parent: taken[parent.alias] = scope # All table names are taken for scope in root.traverse(): taken.update( { source.name: source for _, source in scope.sources.items() if isinstance(source, exp.Table) } ) # Map of Expr->alias # Existing CTES in the root expression. We'll use this for deduplication. existing_ctes: ExistingCTEsMapping = {} with_ = root.expression.args.get("with_") recursive = False if with_: recursive = with_.args.get("recursive") for cte in with_.expressions: existing_ctes[cte.this] = cte.alias new_ctes = [] # We're adding more CTEs, but we want to maintain the DAG order. # Derived tables within an existing CTE need to come before the existing CTE. for cte_scope in root.cte_scopes: # Append all the new CTEs from this existing CTE for scope in cte_scope.traverse(): if scope is cte_scope: # Don't try to eliminate this CTE itself continue new_cte = _eliminate(scope, existing_ctes, taken) if new_cte: new_ctes.append(new_cte) # Append the existing CTE itself cte_parent = cte_scope.expression.parent if cte_parent: new_ctes.append(cte_parent) # Now append the rest for scope in itertools.chain(root.union_scopes, root.subquery_scopes, root.table_scopes): for child_scope in scope.traverse(): new_cte = _eliminate(child_scope, existing_ctes, taken) if new_cte: new_ctes.append(new_cte) if new_ctes: query = expression.expression if isinstance(expression, exp.DDL) else expression query.set("with_", exp.With(expressions=new_ctes, recursive=recursive)) return expression def _eliminate( scope: Scope, existing_ctes: ExistingCTEsMapping, taken: TakenNameMapping ) -> t.Optional[exp.Expr]: if scope.is_derived_table: return _eliminate_derived_table(scope, existing_ctes, taken) if scope.is_cte: return _eliminate_cte(scope, existing_ctes, taken) return None def _eliminate_derived_table( scope: Scope, existing_ctes: ExistingCTEsMapping, taken: TakenNameMapping ) -> t.Optional[exp.Expr]: # This makes sure that we don't: # - drop the "pivot" arg from a pivoted subquery # - eliminate a lateral correlated subquery parent_scope = scope.parent if not parent_scope or parent_scope.pivots or isinstance(parent_scope.expression, exp.Lateral): return None expr_parent = scope.expression.parent if not isinstance(expr_parent, exp.Subquery): return None # Get rid of redundant exp.Subquery expressions, i.e. those that are just used as wrappers to_replace = expr_parent.unwrap() name, cte = _new_cte(scope, existing_ctes, taken) table = exp.alias_(exp.table_(name), alias=to_replace.alias or name) table.set("joins", to_replace.args.get("joins")) to_replace.replace(table) return cte def _eliminate_cte( scope: Scope, existing_ctes: ExistingCTEsMapping, taken: TakenNameMapping ) -> t.Optional[exp.Expr]: parent = scope.expression.parent if not parent: return None name, cte = _new_cte(scope, existing_ctes, taken) with_ = parent.parent parent.pop() if with_ and not with_.expressions: with_.pop() # Rename references to this CTE if not scope.parent: return cte for child_scope in scope.parent.traverse(): for table, source in child_scope.selected_sources.values(): if source is scope: new_table = exp.alias_(exp.table_(name), alias=table.alias_or_name, copy=False) table.replace(new_table) return cte def _new_cte( scope: Scope, existing_ctes: ExistingCTEsMapping, taken: TakenNameMapping ) -> t.Tuple[str, t.Optional[exp.Expr]]: """ Returns: tuple of (name, cte) where `name` is a new name for this CTE in the root scope and `cte` is a new CTE instance. If this CTE duplicates an existing CTE, `cte` will be None. """ duplicate_cte_alias = existing_ctes.get(scope.expression) parent = scope.expression.parent name = parent.alias if parent else "" if not name: name = find_new_name(taken=taken, base="cte") if duplicate_cte_alias: name = duplicate_cte_alias elif taken.get(name): name = find_new_name(taken=taken, base=name) taken[name] = scope if not duplicate_cte_alias: existing_ctes[scope.expression] = name cte = exp.CTE( this=scope.expression, alias=exp.TableAlias(this=exp.to_identifier(name)), ) else: cte = None return name, cte ================================================ FILE: sqlglot/optimizer/isolate_table_selects.py ================================================ from __future__ import annotations import typing as t from sqlglot import alias, exp from sqlglot.errors import OptimizeError from sqlglot.optimizer.scope import traverse_scope from sqlglot.schema import ensure_schema if t.TYPE_CHECKING: from sqlglot._typing import E from sqlglot.schema import Schema from sqlglot.dialects.dialect import DialectType def isolate_table_selects( expression: E, schema: t.Optional[t.Dict | Schema] = None, dialect: DialectType = None, ) -> E: schema = ensure_schema(schema, dialect=dialect) for scope in traverse_scope(expression): if len(scope.selected_sources) == 1: continue for _, source in scope.selected_sources.values(): assert source.parent if ( not isinstance(source, exp.Table) or not schema.column_names(source) or isinstance(source.parent, exp.Subquery) or isinstance(source.parent.parent, exp.Table) ): continue if not source.alias: raise OptimizeError("Tables require an alias. Run qualify_tables optimization.") source.replace( exp.select("*") .from_( alias(source, source.alias_or_name, table=True), copy=False, ) .subquery(source.alias, copy=False) ) return expression ================================================ FILE: sqlglot/optimizer/merge_subqueries.py ================================================ from __future__ import annotations import typing as t from collections import defaultdict from sqlglot import expressions as exp from sqlglot.helper import find_new_name, seq_get from sqlglot.optimizer.scope import Scope, traverse_scope if t.TYPE_CHECKING: from sqlglot._typing import E FromOrJoin = t.Union[exp.From, exp.Join] def merge_subqueries(expression: E, leave_tables_isolated: bool = False) -> E: """ Rewrite sqlglot AST to merge derived tables into the outer query. This also merges CTEs if they are selected from only once. Example: >>> import sqlglot >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT x.a FROM x) CROSS JOIN y") >>> merge_subqueries(expression).sql() 'SELECT x.a FROM x CROSS JOIN y' If `leave_tables_isolated` is True, this will not merge inner queries into outer queries if it would result in multiple table selects in a single query: >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT x.a FROM x) CROSS JOIN y") >>> merge_subqueries(expression, leave_tables_isolated=True).sql() 'SELECT a FROM (SELECT x.a FROM x) CROSS JOIN y' Inspired by https://dev.mysql.com/doc/refman/8.0/en/derived-table-optimization.html Args: expression (sqlglot.Expr): expression to optimize leave_tables_isolated (bool): Returns: sqlglot.Expr: optimized expression """ expression = merge_ctes(expression, leave_tables_isolated) expression = merge_derived_tables(expression, leave_tables_isolated) return expression # If a derived table has these Select args, it can't be merged UNMERGABLE_ARGS = set(exp.Select.arg_types) - { "expressions", "from_", "joins", "where", "order", "hint", } # Projections in the outer query that are instances of these types can be replaced # without getting wrapped in parentheses, because the precedence won't be altered. SAFE_TO_REPLACE_UNWRAPPED = ( exp.Column, exp.EQ, exp.Func, exp.NEQ, exp.Paren, ) def merge_ctes(expression: E, leave_tables_isolated: bool = False) -> E: scopes = traverse_scope(expression) # All places where we select from CTEs. # We key on the CTE scope so we can detect CTES that are selected from multiple times. cte_selections = defaultdict(list) for outer_scope in scopes: for table, inner_scope in outer_scope.selected_sources.values(): if isinstance(inner_scope, Scope) and inner_scope.is_cte: cte_selections[id(inner_scope)].append( ( outer_scope, inner_scope, table, ) ) singular_cte_selections = [v[0] for k, v in cte_selections.items() if len(v) == 1] for outer_scope, inner_scope, table in singular_cte_selections: from_or_join = table.find_ancestor(exp.From, exp.Join) if not isinstance(from_or_join, (exp.From, exp.Join)): continue if _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join): alias = table.alias_or_name _rename_inner_sources(outer_scope, inner_scope, alias) _merge_from( outer_scope, inner_scope, t.cast(t.Union[exp.Subquery, exp.Table], table), alias ) _merge_expressions(outer_scope, inner_scope, alias) _merge_order(outer_scope, inner_scope) _merge_joins(outer_scope, inner_scope, from_or_join) _merge_where(outer_scope, inner_scope, from_or_join) _merge_hints(outer_scope, inner_scope) _pop_cte(inner_scope) outer_scope.clear_cache() return expression def merge_derived_tables(expression: E, leave_tables_isolated: bool = False) -> E: for outer_scope in traverse_scope(expression): for subquery in outer_scope.derived_tables: from_or_join = subquery.find_ancestor(exp.From, exp.Join) if not isinstance(from_or_join, (exp.From, exp.Join)): continue alias = subquery.alias_or_name inner_scope = outer_scope.sources[alias] if not isinstance(inner_scope, Scope): continue if _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join): _rename_inner_sources(outer_scope, inner_scope, alias) _merge_from(outer_scope, inner_scope, subquery, alias) _merge_expressions(outer_scope, inner_scope, alias) _merge_order(outer_scope, inner_scope) _merge_joins(outer_scope, inner_scope, from_or_join) _merge_where(outer_scope, inner_scope, from_or_join) _merge_hints(outer_scope, inner_scope) outer_scope.clear_cache() return expression def _mergeable( outer_scope: Scope, inner_scope: Scope, leave_tables_isolated: bool, from_or_join: FromOrJoin ) -> bool: """ Return True if `inner_select` can be merged into outer query. """ inner_select = inner_scope.expression.unnest() def _is_a_window_expression_in_unmergable_operation(): window_aliases = {s.alias_or_name for s in inner_select.selects if s.find(exp.Window)} if not window_aliases: return False inner_select_name = from_or_join.alias_or_name unmergable_window_columns = [ column for column in outer_scope.columns if column.find_ancestor( exp.Where, exp.Group, exp.Order, exp.Join, exp.Having, exp.AggFunc ) ] return any( column.table == inner_select_name and column.name in window_aliases for column in unmergable_window_columns ) def _outer_select_joins_on_inner_select_join(): """ All columns from the inner select in the ON clause must be from the first FROM table. That is, this can be merged: SELECT * FROM x JOIN (SELECT y.a AS a FROM y JOIN z) AS q ON x.a = q.a ^^^ ^ But this can't: SELECT * FROM x JOIN (SELECT z.a AS a FROM y JOIN z) AS q ON x.a = q.a ^^^ ^ """ if not isinstance(from_or_join, exp.Join): return False alias = from_or_join.alias_or_name on = from_or_join.args.get("on") if not on: return False selections = [c.name for c in on.find_all(exp.Column) if c.table == alias] inner_from = inner_scope.expression.args.get("from_") if not inner_from: return False inner_from_table = inner_from.alias_or_name inner_projections = {s.alias_or_name: s for s in inner_scope.expression.selects} return any( col.table != inner_from_table for selection in selections for col in inner_projections[selection].find_all(exp.Column) ) def _is_recursive(): # Recursive CTEs look like this: # WITH RECURSIVE cte AS ( # SELECT * FROM x <-- inner scope # UNION ALL # SELECT * FROM cte <-- outer scope # ) cte = inner_scope.expression.parent node = outer_scope.expression.parent while node: if node is cte: return True node = node.parent return False return ( isinstance(outer_scope.expression, exp.Select) and not outer_scope.expression.is_star and isinstance(inner_select, exp.Select) and not any(inner_select.args.get(arg) for arg in UNMERGABLE_ARGS) and inner_select.args.get("from_") is not None and not outer_scope.pivots and not any(e.find(exp.AggFunc, exp.Select, exp.Explode) for e in inner_select.expressions) and not (leave_tables_isolated and len(outer_scope.selected_sources) > 1) and not (isinstance(from_or_join, exp.Join) and inner_select.args.get("joins")) and not ( isinstance(from_or_join, exp.Join) and inner_select.args.get("where") and from_or_join.side in ("FULL", "LEFT", "RIGHT") ) and not ( isinstance(from_or_join, exp.From) and inner_select.args.get("where") and any( j.side in ("FULL", "RIGHT") for j in outer_scope.expression.args.get("joins", []) ) ) and not _outer_select_joins_on_inner_select_join() and not _is_a_window_expression_in_unmergable_operation() and not _is_recursive() and not (inner_select.args.get("order") and outer_scope.is_union) and not isinstance(seq_get(inner_select.expressions, 0), exp.QueryTransform) ) def _rename_inner_sources(outer_scope: Scope, inner_scope: Scope, alias: str) -> None: """ Renames any sources in the inner query that conflict with names in the outer query. """ inner_taken = set(inner_scope.selected_sources) outer_taken = set(outer_scope.selected_sources) conflicts = outer_taken.intersection(inner_taken) conflicts -= {alias} taken = outer_taken.union(inner_taken) for conflict in conflicts: new_name = find_new_name(taken, conflict) source, _ = inner_scope.selected_sources[conflict] new_alias = exp.to_identifier(new_name) if isinstance(source, exp.Table) and source.alias: source.set("alias", new_alias) elif isinstance(source, exp.Table): source.replace(exp.alias_(source, new_alias)) elif isinstance(source.parent, exp.Subquery): source.parent.set("alias", exp.TableAlias(this=new_alias)) for column in inner_scope.source_columns(conflict): column.set("table", exp.to_identifier(new_name)) inner_scope.rename_source(conflict, new_name) def _merge_from( outer_scope: Scope, inner_scope: Scope, node_to_replace: t.Union[exp.Subquery, exp.Table], alias: str, ) -> None: """ Merge FROM clause of inner query into outer query. """ new_subquery = inner_scope.expression.args["from_"].this new_subquery.set("joins", node_to_replace.args.get("joins")) node_to_replace.replace(new_subquery) for join_hint in outer_scope.join_hints: tables = join_hint.find_all(exp.Table) for table in tables: if table.alias_or_name == node_to_replace.alias_or_name: table.set("this", exp.to_identifier(new_subquery.alias_or_name)) outer_scope.remove_source(alias) outer_scope.add_source( new_subquery.alias_or_name, inner_scope.sources[new_subquery.alias_or_name] ) def _merge_joins(outer_scope: Scope, inner_scope: Scope, from_or_join: FromOrJoin) -> None: """ Merge JOIN clauses of inner query into outer query. """ new_joins = [] joins = inner_scope.expression.args.get("joins") or [] for join in joins: new_joins.append(join) outer_scope.add_source(join.alias_or_name, inner_scope.sources[join.alias_or_name]) if new_joins: outer_joins = outer_scope.expression.args.get("joins", []) # Maintain the join order if isinstance(from_or_join, exp.From): position = 0 else: position = outer_joins.index(from_or_join) + 1 outer_joins[position:position] = new_joins outer_scope.expression.set("joins", outer_joins) def _merge_expressions(outer_scope: Scope, inner_scope: Scope, alias: str) -> None: """ Merge projections of inner query into outer query. Args: outer_scope (sqlglot.optimizer.scope.Scope) inner_scope (sqlglot.optimizer.scope.Scope) alias (str) """ # Collect all columns that reference the alias of the inner query outer_columns = defaultdict(list) for column in outer_scope.columns: if column.table == alias: outer_columns[column.name].append(column) # Replace columns with the projection expression in the inner query for expression in inner_scope.expression.expressions: projection_name = expression.alias_or_name if not projection_name: continue columns_to_replace = outer_columns.get(projection_name, []) if not columns_to_replace: continue expression = expression.unalias() must_wrap_expression = not isinstance(expression, SAFE_TO_REPLACE_UNWRAPPED) is_number = expression.is_number last = len(columns_to_replace) - 1 for i, column in enumerate(columns_to_replace): parent = column.parent # Ensures that we don't merge literal numbers in GROUP BY as they have positional context # e.g don't trasform `SELECT a FROM (SELECT 6 AS a) GROUP BY a` to `SELECT 6 AS a GROUP BY 6`, # as this would attempt to GROUP BY the 6th projection instead of the column `a` if is_number and isinstance(parent, exp.Group): column.replace(exp.to_identifier(column.name)) continue # Ensures we don't alter the intended operator precedence if there's additional # context surrounding the outer expression (i.e. it's not a simple projection). if isinstance(parent, (exp.Unary, exp.Binary)) and must_wrap_expression: expression = exp.paren(expression, copy=False) # make sure we do not accidentally change the name of the column if isinstance(parent, exp.Select) and column.name != expression.name: expression = exp.alias_(expression, column.name) # Skip the expensive deep copy for the last reference since the inner query # is about to be removed, so we can move the expression directly column.replace(expression.copy() if i < last else expression) def _merge_where(outer_scope: Scope, inner_scope: Scope, from_or_join: FromOrJoin) -> None: """ Merge WHERE clause of inner query into outer query. Args: outer_scope (sqlglot.optimizer.scope.Scope) inner_scope (sqlglot.optimizer.scope.Scope) from_or_join (exp.From|exp.Join) """ where = inner_scope.expression.args.get("where") if not where or not where.this: return expression = outer_scope.expression if isinstance(from_or_join, exp.Join): # Merge predicates from an outer join to the ON clause # if it only has columns that are already joined from_ = expression.args.get("from_") sources = {from_.alias_or_name} if from_ else set() for join in expression.args["joins"]: source = join.alias_or_name sources.add(source) if source == from_or_join.alias_or_name: break if exp.column_table_names(where.this) <= sources: from_or_join.on(where.this, copy=False) from_or_join.set("on", from_or_join.args.get("on")) return t.cast(exp.Select, expression).where(where.this, copy=False) def _merge_order(outer_scope: Scope, inner_scope: Scope) -> None: """ Merge ORDER clause of inner query into outer query. Args: outer_scope (sqlglot.optimizer.scope.Scope) inner_scope (sqlglot.optimizer.scope.Scope) """ inner_order = inner_scope.expression.args.get("order") if not inner_order: return if ( any( outer_scope.expression.args.get(arg) for arg in ["group", "distinct", "having", "order"] ) or len(outer_scope.selected_sources) != 1 or any(expression.find(exp.AggFunc) for expression in outer_scope.expression.expressions) ): return outer_scope.expression.set("order", inner_order) def _merge_hints(outer_scope: Scope, inner_scope: Scope) -> None: inner_scope_hint = inner_scope.expression.args.get("hint") if not inner_scope_hint: return outer_scope_hint = outer_scope.expression.args.get("hint") if outer_scope_hint: for hint_expression in inner_scope_hint.expressions: outer_scope_hint.append("expressions", hint_expression) else: outer_scope.expression.set("hint", inner_scope_hint) def _pop_cte(inner_scope: Scope) -> None: """ Remove CTE from the AST. Args: inner_scope (sqlglot.optimizer.scope.Scope) """ cte = inner_scope.expression.parent if not cte: return with_ = cte.parent if not with_: return if len(with_.expressions) == 1: with_.pop() else: cte.pop() ================================================ FILE: sqlglot/optimizer/normalize.py ================================================ from __future__ import annotations import logging from sqlglot import exp from sqlglot.errors import OptimizeError from sqlglot.helper import while_changing from sqlglot.optimizer.scope import find_all_in_scope from sqlglot.optimizer.simplify import Simplifier, flatten logger = logging.getLogger("sqlglot") def normalize(expression: exp.Expr, dnf: bool = False, max_distance: int = 128) -> exp.Expr: """ Rewrite sqlglot AST into conjunctive normal form or disjunctive normal form. Example: >>> import sqlglot >>> expression = sqlglot.parse_one("(x AND y) OR z") >>> normalize(expression, dnf=False).sql() '(x OR z) AND (y OR z)' Args: expression: expression to normalize dnf: rewrite in disjunctive normal form instead. max_distance (int): the maximal estimated distance from cnf/dnf to attempt conversion Returns: sqlglot.Expr: normalized expression """ simplifier = Simplifier(annotate_new_expressions=False) for node in tuple(expression.walk(prune=lambda e: isinstance(e, exp.Connector))): if isinstance(node, exp.Connector): if normalized(node, dnf=dnf): continue root = node is expression original = node.copy() node.transform(simplifier.rewrite_between, copy=False) distance = normalization_distance(node, dnf=dnf, max_=max_distance) if distance > max_distance: logger.info( f"Skipping normalization because distance {distance} exceeds max {max_distance}" ) return expression try: node = node.replace( while_changing( node, lambda e: distributive_law(e, dnf, max_distance, simplifier=simplifier), ) ) except OptimizeError as e: logger.info(e) node.replace(original) if root: return original return expression if root: expression = node return expression def normalized(expression: exp.Expr, dnf: bool = False) -> bool: """ Checks whether a given expression is in a normal form of interest. Example: >>> from sqlglot import parse_one >>> normalized(parse_one("(a AND b) OR c OR (d AND e)"), dnf=True) True >>> normalized(parse_one("(a OR b) AND c")) # Checks CNF by default True >>> normalized(parse_one("a AND (b OR c)"), dnf=True) False Args: expression: The expression to check if it's normalized. dnf: Whether to check if the expression is in Disjunctive Normal Form (DNF). Default: False, i.e. we check if it's in Conjunctive Normal Form (CNF). """ ancestor, root = (exp.And, exp.Or) if dnf else (exp.Or, exp.And) return not any( connector.find_ancestor(ancestor) for connector in find_all_in_scope(expression, root) ) def normalization_distance( expression: exp.Expr, dnf: bool = False, max_: float = float("inf") ) -> int: """ The difference in the number of predicates between a given expression and its normalized form. This is used as an estimate of the cost of the conversion which is exponential in complexity. Example: >>> import sqlglot >>> expression = sqlglot.parse_one("(a AND b) OR (c AND d)") >>> normalization_distance(expression) 4 Args: expression: The expression to compute the normalization distance for. dnf: Whether to check if the expression is in Disjunctive Normal Form (DNF). Default: False, i.e. we check if it's in Conjunctive Normal Form (CNF). max_: stop early if count exceeds this. Returns: The normalization distance. """ total = -(sum(1 for _ in expression.find_all(exp.Connector)) + 1) for length in _predicate_lengths(expression, dnf, max_): total += length if total > max_: return total return total def _predicate_lengths(expression, dnf, max_=float("inf"), depth=0): """ Returns a list of predicate lengths when expanded to normalized form. (A AND B) OR C -> [2, 2] because len(A OR C), len(B OR C). """ if depth > max_: yield depth return expression = expression.unnest() if not isinstance(expression, exp.Connector): yield 1 return depth += 1 left, right = expression.args.values() if isinstance(expression, exp.And if dnf else exp.Or): for a in _predicate_lengths(left, dnf, max_, depth): for b in _predicate_lengths(right, dnf, max_, depth): yield a + b else: yield from _predicate_lengths(left, dnf, max_, depth) yield from _predicate_lengths(right, dnf, max_, depth) def distributive_law(expression, dnf, max_distance, simplifier=None): """ x OR (y AND z) -> (x OR y) AND (x OR z) (x AND y) OR (y AND z) -> (x OR y) AND (x OR z) AND (y OR y) AND (y OR z) """ if normalized(expression, dnf=dnf): return expression distance = normalization_distance(expression, dnf=dnf, max_=max_distance) if distance > max_distance: raise OptimizeError(f"Normalization distance {distance} exceeds max {max_distance}") exp.replace_children(expression, lambda e: distributive_law(e, dnf, max_distance)) to_exp, from_exp = (exp.Or, exp.And) if dnf else (exp.And, exp.Or) if isinstance(expression, from_exp): a, b = expression.unnest_operands() from_func = exp.and_ if from_exp == exp.And else exp.or_ to_func = exp.and_ if to_exp == exp.And else exp.or_ simplifier = simplifier or Simplifier(annotate_new_expressions=False) if isinstance(a, to_exp) and isinstance(b, to_exp): if len(tuple(a.find_all(exp.Connector))) > len(tuple(b.find_all(exp.Connector))): return _distribute(a, b, from_func, to_func, simplifier) return _distribute(b, a, from_func, to_func, simplifier) if isinstance(a, to_exp): return _distribute(b, a, from_func, to_func, simplifier) if isinstance(b, to_exp): return _distribute(a, b, from_func, to_func, simplifier) return expression def _distribute(a, b, from_func, to_func, simplifier): if isinstance(a, exp.Connector): exp.replace_children( a, lambda c: to_func( simplifier.uniq_sort(flatten(from_func(c, b.left))), simplifier.uniq_sort(flatten(from_func(c, b.right))), copy=False, ), ) else: a = to_func( simplifier.uniq_sort(flatten(from_func(a, b.left))), simplifier.uniq_sort(flatten(from_func(a, b.right))), copy=False, ) return a ================================================ FILE: sqlglot/optimizer/normalize_identifiers.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.dialects.dialect import Dialect, DialectType if t.TYPE_CHECKING: from sqlglot._typing import E @t.overload def normalize_identifiers( expression: E, dialect: DialectType = None, store_original_column_identifiers: bool = False ) -> E: ... @t.overload def normalize_identifiers( expression: str, dialect: DialectType = None, store_original_column_identifiers: bool = False ) -> exp.Identifier: ... def normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False): """ Normalize identifiers by converting them to either lower or upper case, ensuring the semantics are preserved in each case (e.g. by respecting case-sensitivity). This transformation reflects how identifiers would be resolved by the engine corresponding to each SQL dialect, and plays a very important role in the standardization of the AST. It's possible to make this a no-op by adding a special comment next to the identifier of interest: SELECT a /* sqlglot.meta case_sensitive */ FROM table In this example, the identifier `a` will not be normalized. Note: Some dialects (e.g. DuckDB) treat all identifiers as case-insensitive even when they're quoted, so in these cases all identifiers are normalized. Example: >>> import sqlglot >>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar') >>> normalize_identifiers(expression).sql() 'SELECT bar.a AS a FROM "Foo".bar' >>> normalize_identifiers("foo", dialect="snowflake").sql(dialect="snowflake") 'FOO' Args: expression: The expression to transform. dialect: The dialect to use in order to decide how to normalize identifiers. store_original_column_identifiers: Whether to store the original column identifiers in the meta data of the expression in case we want to undo the normalization at a later point. Returns: The transformed expression. """ dialect = Dialect.get_or_raise(dialect) if isinstance(expression, str): expression = exp.parse_identifier(expression, dialect=dialect) for node in expression.walk(prune=lambda n: bool(n.meta.get("case_sensitive"))): if not node.meta.get("case_sensitive"): if store_original_column_identifiers and isinstance(node, exp.Column): # TODO: This does not handle non-column cases, e.g PARSE_JSON(...).key parent = node while parent and isinstance(parent.parent, exp.Dot): parent = parent.parent node.meta["dot_parts"] = [p.name for p in parent.parts] dialect.normalize_identifier(node) return expression ================================================ FILE: sqlglot/optimizer/optimize_joins.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.helper import tsort JOIN_ATTRS = ("on", "side", "kind", "using", "method") def optimize_joins(expression): """ Removes cross joins if possible and reorder joins based on predicate dependencies. Example: >>> from sqlglot import parse_one >>> optimize_joins(parse_one("SELECT * FROM x CROSS JOIN y JOIN z ON x.a = z.a AND y.a = z.a")).sql() 'SELECT * FROM x JOIN z ON x.a = z.a AND TRUE JOIN y ON y.a = z.a' """ for select in expression.find_all(exp.Select): joins = select.args.get("joins", []) if not _is_reorderable(joins): continue references = {} cross_joins = [] for join in joins: tables = other_table_names(join) if tables: for table in tables: references[table] = references.get(table, []) + [join] else: cross_joins.append((join.alias_or_name, join)) for name, join in cross_joins: for dep in references.get(name, []): on = dep.args["on"] if isinstance(on, exp.Connector): if len(other_table_names(dep)) < 2: continue operator = type(on) for predicate in on.flatten(): if name in exp.column_table_names(predicate): predicate.replace(exp.true()) predicate = exp._combine( [join.args.get("on"), predicate], operator, copy=False ) join.on(predicate, append=False, copy=False) expression = reorder_joins(expression) expression = normalize(expression) return expression def reorder_joins(expression): """ Reorder joins by topological sort order based on predicate references. """ for from_ in expression.find_all(exp.From): parent = from_.parent joins = parent.args.get("joins", []) if not _is_reorderable(joins): continue joins_by_name = {join.alias_or_name: join for join in joins} dag = {name: other_table_names(join) for name, join in joins_by_name.items()} parent.set( "joins", [ joins_by_name[name] for name in tsort(dag) if name != from_.alias_or_name and name in joins_by_name ], ) return expression def normalize(expression): """ Remove INNER and OUTER from joins as they are optional. """ for join in expression.find_all(exp.Join): if not any(join.args.get(k) for k in JOIN_ATTRS): join.set("kind", "CROSS") if join.kind == "CROSS": join.set("on", None) else: if join.kind in ("INNER", "OUTER"): join.set("kind", None) if not join.args.get("on") and not join.args.get("using"): join.set("on", exp.true()) return expression def other_table_names(join: exp.Join) -> t.Set[str]: on = join.args.get("on") return exp.column_table_names(on, join.alias_or_name) if on else set() def _is_reorderable(joins: t.List[exp.Join]) -> bool: """ Checks if joins can be reordered without changing query semantics. Joins with a side (LEFT, RIGHT, FULL) cannot be reordered easily, the order affects which rows are included in the result. Example: >>> from sqlglot import parse_one, exp >>> from sqlglot.optimizer.optimize_joins import _is_reorderable >>> ast = parse_one("SELECT * FROM x JOIN y ON x.id = y.id JOIN z ON y.id = z.id") >>> _is_reorderable(ast.find(exp.Select).args.get("joins", [])) True >>> ast = parse_one("SELECT * FROM x LEFT JOIN y ON x.id = y.id JOIN z ON y.id = z.id") >>> _is_reorderable(ast.find(exp.Select).args.get("joins", [])) False """ return not any(join.side for join in joins) ================================================ FILE: sqlglot/optimizer/optimizer.py ================================================ from __future__ import annotations import inspect import typing as t from collections.abc import Sequence from sqlglot import Schema, exp from sqlglot.dialects.dialect import DialectType from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.optimizer.canonicalize import canonicalize from sqlglot.optimizer.eliminate_ctes import eliminate_ctes from sqlglot.optimizer.eliminate_joins import eliminate_joins from sqlglot.optimizer.eliminate_subqueries import eliminate_subqueries from sqlglot.optimizer.merge_subqueries import merge_subqueries from sqlglot.optimizer.normalize import normalize from sqlglot.optimizer.optimize_joins import optimize_joins from sqlglot.optimizer.pushdown_predicates import pushdown_predicates from sqlglot.optimizer.pushdown_projections import pushdown_projections from sqlglot.optimizer.qualify import qualify from sqlglot.optimizer.qualify_columns import quote_identifiers from sqlglot.optimizer.simplify import simplify from sqlglot.optimizer.unnest_subqueries import unnest_subqueries from sqlglot.schema import ensure_schema RULES = ( qualify, pushdown_projections, normalize, unnest_subqueries, pushdown_predicates, optimize_joins, eliminate_subqueries, merge_subqueries, eliminate_joins, eliminate_ctes, quote_identifiers, annotate_types, canonicalize, simplify, ) def optimize( expression: str | exp.Expr, schema: t.Optional[dict | Schema] = None, db: t.Optional[str | exp.Identifier] = None, catalog: t.Optional[str | exp.Identifier] = None, dialect: DialectType = None, rules: Sequence[t.Callable] = RULES, sql: t.Optional[str] = None, **kwargs, ) -> exp.Expr: """ Rewrite a sqlglot AST into an optimized form. Args: expression: expression to optimize schema: database schema. This can either be an instance of `sqlglot.optimizer.Schema` or a mapping in one of the following forms: 1. {table: {col: type}} 2. {db: {table: {col: type}}} 3. {catalog: {db: {table: {col: type}}}} If no schema is provided then the default schema defined at `sqlgot.schema` will be used db: specify the default database, as might be set by a `USE DATABASE db` statement catalog: specify the default catalog, as might be set by a `USE CATALOG c` statement dialect: The dialect to parse the sql string. rules: sequence of optimizer rules to use. Many of the rules require tables and columns to be qualified. Do not remove `qualify` from the sequence of rules unless you know what you're doing! sql: Original SQL string for error highlighting. If not provided, errors will not include highlighting. Requires that the expression has position metadata from parsing. **kwargs: If a rule has a keyword argument with a same name in **kwargs, it will be passed in. Returns: The optimized expression. """ schema = ensure_schema(schema, dialect=dialect) possible_kwargs = { "db": db, "catalog": catalog, "schema": schema, "dialect": dialect, "sql": sql, "isolate_tables": True, # needed for other optimizations to perform well "quote_identifiers": False, **kwargs, } optimized = exp.maybe_parse(expression, dialect=dialect, copy=True) for rule in rules: # Find any additional rule parameters, beyond `expression` rule_params = inspect.getfullargspec(rule).args rule_kwargs = { param: possible_kwargs[param] for param in rule_params if param in possible_kwargs } optimized = rule(optimized, **rule_kwargs) return optimized ================================================ FILE: sqlglot/optimizer/pushdown_predicates.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.optimizer.normalize import normalized from sqlglot.optimizer.scope import build_scope, find_in_scope from sqlglot.optimizer.simplify import simplify from sqlglot import Dialect if t.TYPE_CHECKING: from sqlglot._typing import E from sqlglot.dialects.dialect import DialectType def pushdown_predicates(expression: E, dialect: DialectType = None) -> E: """ Rewrite sqlglot AST to pushdown predicates in FROMS and JOINS Example: >>> import sqlglot >>> sql = "SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y WHERE y.a = 1" >>> expression = sqlglot.parse_one(sql) >>> pushdown_predicates(expression).sql() 'SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x WHERE x.a = 1) AS y WHERE TRUE' Args: expression (sqlglot.Expr): expression to optimize Returns: sqlglot.Expr: optimized expression """ from sqlglot.dialects.athena import Athena from sqlglot.dialects.presto import Presto root = build_scope(expression) dialect = Dialect.get_or_raise(dialect) unnest_requires_cross_join = isinstance(dialect, (Athena, Presto)) if root: scope_ref_count = root.ref_count() for scope in reversed(list(root.traverse())): select = scope.expression where = select.args.get("where") if where: selected_sources = scope.selected_sources join_index = { join.alias_or_name: i for i, join in enumerate(select.args.get("joins") or []) } # a right join can only push down to itself and not the source FROM table # presto, trino and athena don't support inner joins where the RHS is an UNNEST expression pushdown_allowed = True for k, (node, source) in selected_sources.items(): parent = node.find_ancestor(exp.Join, exp.From) if isinstance(parent, exp.Join): if parent.side == "RIGHT": selected_sources = {k: (node, source)} break if isinstance(node, exp.Unnest) and unnest_requires_cross_join: pushdown_allowed = False break if pushdown_allowed: pushdown(where.this, selected_sources, scope_ref_count, dialect, join_index) # joins should only pushdown into itself, not to other joins # so we limit the selected sources to only itself for join in select.args.get("joins") or []: name = join.alias_or_name if name in scope.selected_sources: pushdown( join.args.get("on"), {name: scope.selected_sources[name]}, scope_ref_count, dialect, ) return expression def pushdown(condition, sources, scope_ref_count, dialect, join_index=None): if not condition: return condition = condition.replace(simplify(condition, dialect=dialect)) cnf_like = normalized(condition) or not normalized(condition, dnf=True) predicates = list( condition.flatten() if isinstance(condition, exp.And if cnf_like else exp.Or) else [condition] ) if cnf_like: pushdown_cnf(predicates, sources, scope_ref_count, join_index=join_index) else: pushdown_dnf(predicates, sources, scope_ref_count, join_index=join_index) def pushdown_cnf(predicates, sources, scope_ref_count, join_index=None): """ If the predicates are in CNF like form, we can simply replace each block in the parent. """ for predicate in predicates: for node in nodes_for_predicate(predicate, sources, scope_ref_count).values(): if isinstance(node, exp.Join): name = node.alias_or_name predicate_tables = exp.column_table_names(predicate, name) if join_index: # Don't push the predicate if it references tables that appear in later joins this_index = join_index[name] if all(join_index.get(table, -1) < this_index for table in predicate_tables): predicate.replace(exp.true()) node.on(predicate, copy=False) break if isinstance(node, exp.Select): predicate.replace(exp.true()) inner_predicate = replace_aliases(node, predicate) if find_in_scope(inner_predicate, exp.AggFunc): node.having(inner_predicate, copy=False) else: node.where(inner_predicate, copy=False) def pushdown_dnf(predicates, sources, scope_ref_count, join_index=None): """ If the predicates are in DNF form, we can only push down conditions that are in all blocks. Additionally, we can't remove predicates from their original form. """ # find all the tables that can be pushdown too # these are tables that are referenced in all blocks of a DNF # (a.x AND b.x) OR (a.y AND c.y) # only table a can be push down pushdown_tables = set() for a in predicates: a_tables = exp.column_table_names(a) for b in predicates: a_tables &= exp.column_table_names(b) pushdown_tables.update(a_tables) conditions = {} # pushdown all predicates to their respective nodes for table in sorted(pushdown_tables): for predicate in predicates: nodes = nodes_for_predicate(predicate, sources, scope_ref_count) if table not in nodes: continue conditions[table] = ( exp.or_(conditions[table], predicate) if table in conditions else predicate ) for name, node in nodes.items(): if name not in conditions: continue predicate = conditions[name] if isinstance(node, exp.Join): if join_index: this_index = join_index[name] predicate_tables = exp.column_table_names(predicate, name) if not all(join_index.get(t, -1) < this_index for t in predicate_tables): continue node.on(predicate, copy=False) elif isinstance(node, exp.Select): inner_predicate = replace_aliases(node, predicate) if find_in_scope(inner_predicate, exp.AggFunc): node.having(inner_predicate, copy=False) else: node.where(inner_predicate, copy=False) def nodes_for_predicate(predicate, sources, scope_ref_count): nodes = {} tables = exp.column_table_names(predicate) where_condition = isinstance(predicate.find_ancestor(exp.Join, exp.Where), exp.Where) for table in sorted(tables): node, source = sources.get(table) or (None, None) # if the predicate is in a where statement we can try to push it down # we want to find the root join or from statement if node and where_condition: node = node.find_ancestor(exp.Join, exp.From) # a node can reference a CTE which should be pushed down if isinstance(node, exp.From) and not isinstance(source, exp.Table): with_ = source.parent.expression.args.get("with_") if with_ and with_.recursive: return {} node = source.expression if isinstance(node, exp.Join): if node.side and node.side != "RIGHT": return {} nodes[table] = node elif isinstance(node, exp.Select) and len(tables) == 1: # We can't push down window expressions has_window_expression = any( select for select in node.selects if select.find(exp.Window) ) # we can't push down predicates to select statements if they are referenced in # multiple places. if ( not node.args.get("group") and scope_ref_count[id(source)] < 2 and not has_window_expression ): nodes[table] = node return nodes def replace_aliases(source, predicate): aliases = {} for select in source.selects: if isinstance(select, exp.Alias): aliases[select.alias] = select.this else: aliases[select.name] = select def _replace_alias(column): if isinstance(column, exp.Column) and column.name in aliases: return aliases[column.name].copy() return column return predicate.transform(_replace_alias) ================================================ FILE: sqlglot/optimizer/pushdown_projections.py ================================================ from __future__ import annotations import typing as t from collections import defaultdict from sqlglot import alias, exp from sqlglot.optimizer.qualify_columns import Resolver from sqlglot.optimizer.scope import Scope, traverse_scope from sqlglot.schema import ensure_schema from sqlglot.errors import OptimizeError from sqlglot.helper import seq_get if t.TYPE_CHECKING: from sqlglot._typing import E from sqlglot.schema import Schema from sqlglot.dialects.dialect import DialectType # Sentinel value that means an outer query selecting ALL columns SELECT_ALL = object() # Selection to use if selection list is empty def default_selection(is_agg: bool) -> exp.Alias: return alias(exp.Max(this=exp.Literal.number(1)) if is_agg else "1", "_").assert_is(exp.Alias) def pushdown_projections( expression: E, schema: t.Optional[t.Dict | Schema] = None, remove_unused_selections: bool = True, dialect: DialectType = None, ) -> E: """ Rewrite sqlglot AST to remove unused columns projections. Example: >>> import sqlglot >>> sql = "SELECT y.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x) AS y" >>> expression = sqlglot.parse_one(sql) >>> pushdown_projections(expression).sql() 'SELECT y.a AS a FROM (SELECT x.a AS a FROM x) AS y' Args: expression (sqlglot.Expr): expression to optimize remove_unused_selections (bool): remove selects that are unused Returns: sqlglot.Expr: optimized expression """ # Map of Scope to all columns being selected by outer queries. schema = ensure_schema(schema, dialect=dialect) source_column_alias_count: t.Dict[exp.Expr | Scope, int] = {} referenced_columns: t.DefaultDict[Scope, t.Set[str | object]] = defaultdict(set) # We build the scope tree (which is traversed in DFS postorder), then iterate # over the result in reverse order. This should ensure that the set of selected # columns for a particular scope are completely build by the time we get to it. for scope in reversed(traverse_scope(expression)): parent_selections = referenced_columns.get(scope, {SELECT_ALL}) alias_count = source_column_alias_count.get(scope, 0) # We can't remove columns SELECT DISTINCT nor UNION DISTINCT. if scope.expression.args.get("distinct"): parent_selections = {SELECT_ALL} if isinstance(scope.expression, exp.SetOperation): set_op = scope.expression if set_op.kind or set_op.side: # Do not optimize this set operation if it's using the BigQuery specific # kind / side syntax (e.g INNER UNION ALL BY NAME) which changes the semantics of the operation continue left, right = scope.union_scopes le = left.expression re = right.expression if not (isinstance(le, exp.Selectable) and isinstance(re, exp.Selectable)): continue if len(le.selects) != len(re.selects): scope_sql = scope.expression.sql(dialect=dialect) raise OptimizeError(f"Invalid set operation due to column mismatch: {scope_sql}.") referenced_columns[left] = parent_selections if re.is_star: referenced_columns[right] = parent_selections elif not le.is_star: if scope.expression.args.get("by_name"): referenced_columns[right] = referenced_columns[left] else: referenced_columns[right] = { re.selects[i].alias_or_name for i, select in enumerate(le.selects) if SELECT_ALL in parent_selections or select.alias_or_name in parent_selections } if isinstance(scope.expression, exp.Select): if remove_unused_selections: _remove_unused_selections(scope, parent_selections, schema, alias_count) if scope.expression.is_star: continue # Group columns by source name selects: t.Dict[str, t.Set[object]] = defaultdict(set) for col in scope.columns: table_name = col.table col_name = col.name selects[table_name].add(col_name) # Push the selected columns down to the next scope for name, (node, source) in scope.selected_sources.items(): if isinstance(source, Scope) and isinstance(source.expression, exp.Selectable): select = seq_get(source.expression.selects, 0) if scope.pivots or isinstance(select, exp.QueryTransform): columns: t.Set[object] = {SELECT_ALL} else: columns = selects.get(name) or set() referenced_columns[source].update(columns) column_aliases = node.alias_column_names if column_aliases: source_column_alias_count[source] = len(column_aliases) return expression def _remove_unused_selections(scope, parent_selections, schema, alias_count): order = scope.expression.args.get("order") if order: # Assume columns without a qualified table are references to output columns order_refs = {c.name for c in order.find_all(exp.Column) if not c.table} else: order_refs = set() new_selections = [] removed = False star = False is_agg = False select_all = SELECT_ALL in parent_selections for selection in scope.expression.selects: name = selection.alias_or_name if select_all or name in parent_selections or name in order_refs or alias_count > 0: new_selections.append(selection) alias_count -= 1 else: if selection.is_star: star = True removed = True if not is_agg and selection.find(exp.AggFunc): is_agg = True if star: resolver = Resolver(scope, schema) names = {s.alias_or_name for s in new_selections} for name in sorted(parent_selections): if name not in names: new_selections.append( alias(exp.column(name, table=resolver.get_table(name)), name, copy=False) ) # If there are no remaining selections, just select a single constant if not new_selections: new_selections.append(default_selection(is_agg)) scope.expression.select(*new_selections, append=False, copy=False) if removed: scope.clear_cache() ================================================ FILE: sqlglot/optimizer/qualify.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.dialects.dialect import Dialect, DialectType from sqlglot.optimizer.isolate_table_selects import isolate_table_selects from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.qualify_columns import ( qualify_columns as qualify_columns_func, quote_identifiers as quote_identifiers_func, validate_qualify_columns as validate_qualify_columns_func, ) from sqlglot.optimizer.qualify_tables import qualify_tables from sqlglot.schema import Schema, ensure_schema def qualify( expression: exp.Expr, dialect: DialectType = None, db: t.Optional[str] = None, catalog: t.Optional[str] = None, schema: t.Optional[dict | Schema] = None, expand_alias_refs: bool = True, expand_stars: bool = True, infer_schema: t.Optional[bool] = None, isolate_tables: bool = False, qualify_columns: bool = True, allow_partial_qualification: bool = False, validate_qualify_columns: bool = True, quote_identifiers: bool = True, identify: bool = True, canonicalize_table_aliases: bool = False, on_qualify: t.Optional[t.Callable[[exp.Expr], None]] = None, sql: t.Optional[str] = None, ) -> exp.Expr: """ Rewrite sqlglot AST to have normalized and qualified tables and columns. This step is necessary for all further SQLGlot optimizations. Example: >>> import sqlglot >>> schema = {"tbl": {"col": "INT"}} >>> expression = sqlglot.parse_one("SELECT col FROM tbl") >>> qualify(expression, schema=schema).sql() 'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"' Args: expression: Expr to qualify. db: Default database name for tables. catalog: Default catalog name for tables. schema: Schema to infer column names and types. expand_alias_refs: Whether to expand references to aliases. expand_stars: Whether to expand star queries. This is a necessary step for most of the optimizer's rules to work; do not set to False unless you know what you're doing! infer_schema: Whether to infer the schema if missing. isolate_tables: Whether to isolate table selects. qualify_columns: Whether to qualify columns. allow_partial_qualification: Whether to allow partial qualification. validate_qualify_columns: Whether to validate columns. quote_identifiers: Whether to run the quote_identifiers step. This step is necessary to ensure correctness for case sensitive queries. But this flag is provided in case this step is performed at a later time. identify: If True, quote all identifiers, else only necessary ones. canonicalize_table_aliases: Whether to use canonical aliases (_0, _1, ...) for all sources instead of preserving table names. on_qualify: Callback after a table has been qualified. sql: Original SQL string for error highlighting. If not provided, errors will not include highlighting. Requires that the expression has position metadata from parsing. Returns: The qualified expression. """ schema = ensure_schema(schema, dialect=dialect) dialect = Dialect.get_or_raise(dialect) expression = normalize_identifiers( expression, dialect=dialect, store_original_column_identifiers=True, ) expression = qualify_tables( expression, db=db, catalog=catalog, dialect=dialect, on_qualify=on_qualify, canonicalize_table_aliases=canonicalize_table_aliases, ) if isolate_tables: expression = isolate_table_selects(expression, schema=schema) if qualify_columns: expression = qualify_columns_func( expression, schema, expand_alias_refs=expand_alias_refs, expand_stars=expand_stars, infer_schema=infer_schema, allow_partial_qualification=allow_partial_qualification, ) if quote_identifiers: expression = quote_identifiers_func(expression, dialect=dialect, identify=identify) if validate_qualify_columns: validate_qualify_columns_func(expression, sql=sql) return expression ================================================ FILE: sqlglot/optimizer/qualify_columns.py ================================================ from __future__ import annotations import itertools import typing as t from sqlglot import alias, exp from sqlglot.dialects.dialect import Dialect, DialectType from sqlglot.errors import OptimizeError, highlight_sql from sqlglot.helper import seq_get from sqlglot.optimizer.annotate_types import TypeAnnotator from sqlglot.optimizer.resolver import Resolver from sqlglot.optimizer.scope import Scope, build_scope, traverse_scope, walk_in_scope from sqlglot.optimizer.simplify import simplify_parens from sqlglot.schema import Schema, ensure_schema if t.TYPE_CHECKING: from sqlglot._typing import E from collections.abc import Iterator, Iterable def qualify_columns( expression: exp.Expr, schema: dict | Schema, expand_alias_refs: bool = True, expand_stars: bool = True, infer_schema: t.Optional[bool] = None, allow_partial_qualification: bool = False, dialect: DialectType = None, ) -> exp.Expr: """ Rewrite sqlglot AST to have fully qualified columns. Example: >>> import sqlglot >>> schema = {"tbl": {"col": "INT"}} >>> expression = sqlglot.parse_one("SELECT col FROM tbl") >>> qualify_columns(expression, schema).sql() 'SELECT tbl.col AS col FROM tbl' Args: expression: Expr to qualify. schema: Database schema. expand_alias_refs: Whether to expand references to aliases. expand_stars: Whether to expand star queries. This is a necessary step for most of the optimizer's rules to work; do not set to False unless you know what you're doing! infer_schema: Whether to infer the schema if missing. allow_partial_qualification: Whether to allow partial qualification. Returns: The qualified expression. Notes: - Currently only handles a single PIVOT or UNPIVOT operator """ schema = ensure_schema(schema, dialect=dialect) annotator = TypeAnnotator(schema) infer_schema = schema.empty if infer_schema is None else infer_schema dialect = schema.dialect or Dialect() pseudocolumns = dialect.PSEUDOCOLUMNS for scope in traverse_scope(expression): if dialect.PREFER_CTE_ALIAS_COLUMN: pushdown_cte_alias_columns(scope) scope_expression = scope.expression is_select = isinstance(scope_expression, exp.Select) _separate_pseudocolumns(scope, pseudocolumns) resolver = Resolver(scope, schema, infer_schema=infer_schema) _pop_table_column_aliases(scope.ctes) _pop_table_column_aliases(scope.derived_tables) using_column_tables = _expand_using(scope, resolver) if (schema.empty or dialect.FORCE_EARLY_ALIAS_REF_EXPANSION) and expand_alias_refs: _expand_alias_refs( scope, resolver, dialect, expand_only_groupby=dialect.EXPAND_ONLY_GROUP_ALIAS_REF, ) _convert_columns_to_dots(scope, resolver) _qualify_columns( scope, resolver, allow_partial_qualification=allow_partial_qualification, ) if not schema.empty and expand_alias_refs: _expand_alias_refs(scope, resolver, dialect) if is_select: if expand_stars: _expand_stars( scope, resolver, using_column_tables, pseudocolumns, annotator, ) qualify_outputs(scope) _expand_group_by(scope, dialect) # DISTINCT ON and ORDER BY follow the same rules (tested in DuckDB, Postgres, ClickHouse) # https://www.postgresql.org/docs/current/sql-select.html#SQL-DISTINCT _expand_order_by_and_distinct_on(scope, resolver) if dialect.ANNOTATE_ALL_SCOPES: annotator.annotate_scope(scope) return expression def validate_qualify_columns(expression: E, sql: t.Optional[str] = None) -> E: """Raise an `OptimizeError` if any columns aren't qualified""" all_unqualified_columns = [] for scope in traverse_scope(expression): if isinstance(scope.expression, exp.Select): unqualified_columns = scope.unqualified_columns if scope.external_columns and not scope.is_correlated_subquery and not scope.pivots: column = scope.external_columns[0] for_table = f" for table: '{column.table}'" if column.table else "" line = column.this.meta.get("line") col = column.this.meta.get("col") start = column.this.meta.get("start") end = column.this.meta.get("end") error_msg = f"Column '{column.name}' could not be resolved{for_table}." if line and col: error_msg += f" Line: {line}, Col: {col}" if sql and start is not None and end is not None: formatted_sql = highlight_sql(sql, [(start, end)])[0] error_msg += f"\n {formatted_sql}" raise OptimizeError(error_msg) if unqualified_columns and scope.pivots and scope.pivots[0].unpivot: # New columns produced by the UNPIVOT can't be qualified, but there may be columns # under the UNPIVOT's IN clause that can and should be qualified. We recompute # this list here to ensure those in the former category will be excluded. unpivot_columns = set(_unpivot_columns(scope.pivots[0])) unqualified_columns = [c for c in unqualified_columns if c not in unpivot_columns] all_unqualified_columns.extend(unqualified_columns) if all_unqualified_columns: first_column = all_unqualified_columns[0] line = first_column.this.meta.get("line") col = first_column.this.meta.get("col") start = first_column.this.meta.get("start") end = first_column.this.meta.get("end") error_msg = f"Ambiguous column '{first_column.name}'" if line and col: error_msg += f" (Line: {line}, Col: {col})" if sql and start is not None and end is not None: formatted_sql = highlight_sql(sql, [(start, end)])[0] error_msg += f"\n {formatted_sql}" raise OptimizeError(error_msg) return expression def _separate_pseudocolumns(scope: Scope, pseudocolumns: t.Set[str]) -> None: if not pseudocolumns: return has_pseudocolumns = False scope_expression = scope.expression for column in scope.columns: name = column.name.upper() if name not in pseudocolumns: continue if name != "LEVEL" or ( isinstance(scope_expression, exp.Select) and scope_expression.args.get("connect") ): column.replace(exp.Pseudocolumn(**column.args)) has_pseudocolumns = True if has_pseudocolumns: scope.clear_cache() def _unpivot_columns(unpivot: exp.Pivot) -> Iterator[exp.Column]: name_columns = [ field.this for field in unpivot.fields if isinstance(field, exp.In) and isinstance(field.this, exp.Column) ] value_columns = (c for e in unpivot.expressions for c in e.find_all(exp.Column)) return itertools.chain(name_columns, value_columns) def _pop_table_column_aliases(derived_tables: Iterable[exp.Expr]) -> None: """ Remove table column aliases. For example, `col1` and `col2` will be dropped in SELECT ... FROM (SELECT ...) AS foo(col1, col2) """ for derived_table in derived_tables: if isinstance(derived_table.parent, exp.With) and derived_table.parent.recursive: continue table_alias = derived_table.args.get("alias") if table_alias: table_alias.set("columns", None) def _expand_using(scope: Scope, resolver: Resolver) -> t.Dict[str, t.Any]: columns = {} def _update_source_columns(source_name: str) -> None: for column_name in resolver.get_source_columns(source_name): if column_name not in columns: columns[column_name] = source_name joins = list(scope.find_all(exp.Join)) names = {join.alias_or_name for join in joins} ordered = [key for key in scope.selected_sources if key not in names] if names and not ordered: raise OptimizeError(f"Joins {names} missing source table {scope.expression}") # Mapping of automatically joined column names to an ordered set of source names (dict). column_tables: t.Dict[str, t.Dict[str, t.Any]] = {} for source_name in ordered: _update_source_columns(source_name) for i, join in enumerate(joins): source_table = ordered[-1] if source_table: _update_source_columns(source_table) join_table = join.alias_or_name ordered.append(join_table) using = join.args.get("using") if not using: continue join_columns = resolver.get_source_columns(join_table) conditions = [] using_identifier_count = len(using) is_semi_or_anti_join = join.is_semi_or_anti_join for identifier in using: identifier = identifier.name table = columns.get(identifier) if not table or identifier not in join_columns: if (columns and "*" not in columns) and join_columns: raise OptimizeError(f"Cannot automatically join: {identifier}") table = table or source_table if i == 0 or using_identifier_count == 1: lhs: exp.Expr = exp.column(identifier, table=table) else: coalesce_columns = [ exp.column(identifier, table=t) for t in ordered[:-1] if identifier in resolver.get_source_columns(t) ] if len(coalesce_columns) > 1: lhs = exp.func("coalesce", *coalesce_columns) else: lhs = exp.column(identifier, table=table) conditions.append(lhs.eq(exp.column(identifier, table=join_table))) # Set all values in the dict to None, because we only care about the key ordering tables = column_tables.setdefault(identifier, {}) # Do not update the dict if this was a SEMI/ANTI join in # order to avoid generating COALESCE columns for this join pair if not is_semi_or_anti_join: if table not in tables: tables[table] = None if join_table not in tables: tables[join_table] = None join.set("using", None) join.set("on", exp.and_(*conditions, copy=False)) if column_tables: for column in scope.columns: if not column.table and column.name in column_tables: tables = column_tables[column.name] coalesce_args = [exp.column(column.name, table=table) for table in tables] replacement: exp.Expr = exp.func("coalesce", *coalesce_args) if isinstance(column.parent, exp.Select): # Ensure the USING column keeps its name if it's projected replacement = alias(replacement, alias=column.name, copy=False) elif isinstance(column.parent, exp.Struct): # Ensure the USING column keeps its name if it's an anonymous STRUCT field replacement = exp.PropertyEQ( this=exp.to_identifier(column.name), expression=replacement ) scope.replace(column, replacement) return column_tables def _expand_alias_refs( scope: Scope, resolver: Resolver, dialect: Dialect, expand_only_groupby: bool = False ) -> None: """ Expand references to aliases. Example: SELECT y.foo AS bar, bar * 2 AS baz FROM y => SELECT y.foo AS bar, y.foo * 2 AS baz FROM y """ expression = scope.expression if not isinstance(expression, exp.Select) or dialect.DISABLES_ALIAS_REF_EXPANSION: return alias_to_expression: t.Dict[str, t.Tuple[exp.Expr, int]] = {} projections = {s.alias_or_name for s in expression.selects} replaced = False def replace_columns( node: t.Optional[exp.Expr], resolve_table: bool = False, literal_index: bool = False ) -> None: nonlocal replaced is_group_by = isinstance(node, exp.Group) is_having = isinstance(node, exp.Having) if not node or (expand_only_groupby and not is_group_by): return for column in walk_in_scope(node, prune=lambda node: node.is_star): if not isinstance(column, exp.Column): continue # BigQuery's GROUP BY allows alias expansion only for standalone names, e.g: # SELECT FUNC(col) AS col FROM t GROUP BY col --> Can be expanded # SELECT FUNC(col) AS col FROM t GROUP BY FUNC(col) --> Shouldn't be expanded, will result to FUNC(FUNC(col)) # This not required for the HAVING clause as it can evaluate expressions using both the alias & the table columns if expand_only_groupby and is_group_by and column.parent is not node: continue skip_replace = False table = resolver.get_table(column.name) if resolve_table and not column.table else None alias_expr, i = alias_to_expression.get(column.name, (None, 1)) if alias_expr: skip_replace = bool( alias_expr.find(exp.AggFunc) and column.find_ancestor(exp.AggFunc) and not isinstance(column.find_ancestor(exp.Window, exp.Select), exp.Window) ) # BigQuery's having clause gets confused if an alias matches a source. # SELECT x.a, max(x.b) as x FROM x GROUP BY 1 HAVING x > 1; # If "HAVING x" is expanded to "HAVING max(x.b)", BQ would blindly replace the "x" reference with the projection MAX(x.b) # i.e HAVING MAX(MAX(x.b).b), resulting in the error: "Aggregations of aggregations are not allowed" if is_having and dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES: skip_replace = skip_replace or any( node.parts[0].name in projections for node in alias_expr.find_all(exp.Column) ) elif dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES and (is_group_by or is_having): column_table = table.name if table else column.table if column_table in projections: # BigQuery's GROUP BY and HAVING clauses get confused if the column name # matches a source name and a projection. For instance: # SELECT id, ARRAY_AGG(col) AS custom_fields FROM custom_fields GROUP BY id HAVING id >= 1 # We should not qualify "id" with "custom_fields" in either clause, since the aggregation shadows the actual table # and we'd get the error: "Column custom_fields contains an aggregation function, which is not allowed in GROUP BY clause" column.replace(exp.to_identifier(column.name)) replaced = True return if table and (not alias_expr or skip_replace): column.set("table", table) elif not column.table and alias_expr and not skip_replace: if (isinstance(alias_expr, exp.Literal) or alias_expr.is_number) and ( literal_index or resolve_table ): if literal_index: column.replace(exp.Literal.number(i)) replaced = True else: replaced = True column = column.replace(exp.paren(alias_expr)) simplified = simplify_parens(column, dialect) if simplified is not column: column.replace(simplified) for i, projection in enumerate(expression.selects): replace_columns(projection) if isinstance(projection, exp.Alias): alias_to_expression[projection.alias] = (projection.this, i + 1) parent_scope: t.Optional[Scope] = scope on_right_sub_tree = False while parent_scope and not parent_scope.is_cte: if parent_scope := parent_scope.parent: if isinstance(parent_scope.expression, exp.Union): on_right_sub_tree = parent_scope.expression.right is parent_scope.expression # We shouldn't expand aliases if they match the recursive CTE's columns # and we are in the recursive part (right sub tree) of the CTE if parent_scope and on_right_sub_tree: if cte := parent_scope.expression.parent: with_ = cte.find_ancestor(exp.With) if with_ and with_.recursive: for recursive_cte_column in cte.args["alias"].columns or cte.this.selects: alias_to_expression.pop(recursive_cte_column.output_name, None) replace_columns(expression.args.get("where")) replace_columns(expression.args.get("group"), literal_index=True) replace_columns(expression.args.get("having"), resolve_table=True) replace_columns(expression.args.get("qualify"), resolve_table=True) if dialect.SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS: for join in expression.args.get("joins") or []: replace_columns(join) if replaced: scope.clear_cache() def _expand_group_by(scope: Scope, dialect: Dialect) -> None: expression = scope.expression group = expression.args.get("group") if not group: return group.set("expressions", _expand_positional_references(scope, group.expressions, dialect)) expression.set("group", group) def _expand_order_by_and_distinct_on(scope: Scope, resolver: Resolver) -> None: expression = scope.expression if not isinstance(expression, exp.Selectable): return # TODO (mypyc): rebind to exp.Expr to avoid Selectable trait vtable dispatch for .args expr: exp.Expr = expression for modifier_key in ("order", "distinct"): modifier = expr.args.get(modifier_key) if isinstance(modifier, exp.Distinct): modifier = modifier.args.get("on") if not isinstance(modifier, exp.Expr): continue modifier_expressions = modifier.expressions if modifier_key == "order": modifier_expressions = [ordered.this for ordered in modifier_expressions] for original, expanded in zip( modifier_expressions, _expand_positional_references( scope, modifier_expressions, resolver.dialect, alias=True ), ): for agg in original.find_all(exp.AggFunc): for col in agg.find_all(exp.Column): if not col.table: col.set("table", resolver.get_table(col.name)) original.replace(expanded) if expr.args.get("group"): selects = {s.this: exp.column(s.alias_or_name) for s in expression.selects} for node in modifier_expressions: node.replace( exp.to_identifier(_select_by_pos(expression, node).alias) if node.is_int else selects.get(node, node) ) def _expand_positional_references( scope: Scope, expressions: Iterable[exp.Expr], dialect: Dialect, alias: bool = False ) -> list[exp.Expr]: new_nodes: list[exp.Expr] = [] ambiguous_projections = None expression = scope.expression if not isinstance(expression, exp.Selectable): return new_nodes for node in expressions: if node.is_int and isinstance(node, exp.Literal): select = _select_by_pos(expression, node) if alias: new_nodes.append(exp.column(select.args["alias"].copy())) else: # TODO (mypyc): use a separate variable to avoid reusing `select` (Alias) with a different type select_expr: exp.Expr = select.this if dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES: if ambiguous_projections is None: # When a projection name is also a source name and it is referenced in the # GROUP BY clause, BQ can't understand what the identifier corresponds to ambiguous_projections = { s.alias_or_name for s in expression.selects if s.alias_or_name in scope.selected_sources } ambiguous = any( column.parts[0].name in ambiguous_projections for column in select_expr.find_all(exp.Column) ) else: ambiguous = False if ( isinstance(select_expr, exp.CONSTANTS) or select_expr.is_number or select_expr.find(exp.Explode, exp.Unnest) or ambiguous ): new_nodes.append(node) else: new_nodes.append(select_expr.copy()) else: new_nodes.append(node) return new_nodes def _select_by_pos(expression: exp.Selectable, node: exp.Literal) -> exp.Alias: try: return expression.selects[int(node.this) - 1].assert_is(exp.Alias) except IndexError: raise OptimizeError(f"Unknown output column: {node.name}") def _convert_columns_to_dots(scope: Scope, resolver: Resolver) -> None: """ Converts `Column` instances that represent STRUCT or JSON field lookup into chained `Dots`. These lookups may be parsed as columns (e.g. "col"."field"."field2"), but they need to be normalized to `Dot(Dot(...(
., field1), field2, ...))` to be qualified properly. """ converted = False for column in itertools.chain(scope.columns, scope.stars): if isinstance(column, exp.Dot): continue column_table: str | exp.Identifier | None = column.table dot_parts = column.meta.pop("dot_parts", []) if ( column_table and column_table not in scope.sources and ( not scope.parent or column_table not in scope.parent.sources or not scope.is_correlated_subquery ) ): root, *parts = column.parts if isinstance(root, exp.Identifier) and root.name in scope.sources: # The struct is already qualified, but we still need to change the AST column_table = root root, *parts = parts was_qualified = True else: column_table = resolver.get_table(root.name) was_qualified = False if column_table: converted = True new_column = exp.column(root, table=column_table) if dot_parts: # Remove the actual column parts from the rest of dot parts new_column.meta["dot_parts"] = dot_parts[2 if was_qualified else 1 :] column.replace(exp.Dot.build([new_column, *parts])) if converted: # We want to re-aggregate the converted columns, otherwise they'd be skipped in # a `for column in scope.columns` iteration, even though they shouldn't be scope.clear_cache() def _qualify_columns( scope: Scope, resolver: Resolver, allow_partial_qualification: bool, ) -> None: """Disambiguate columns, ensuring each column specifies a source""" for column in scope.columns: column_table = column.table column_name = column.name if column_table and column_table in scope.sources: source_columns = resolver.get_source_columns(column_table) if ( not allow_partial_qualification and source_columns and column_name not in source_columns and "*" not in source_columns ): raise OptimizeError(f"Unknown column: {column_name}") if not column_table: if scope.pivots and not column.find_ancestor(exp.Pivot): # If the column is under the Pivot expression, we need to qualify it # using the name of the pivoted source instead of the pivot's alias column.set("table", exp.to_identifier(scope.pivots[0].alias)) continue # column_table can be a '' because bigquery unnest has no table alias table = resolver.get_table(column) if ( table and isinstance(source := scope.sources.get(table.name), Scope) and id(column) in source.column_index ): continue if table: column.set("table", table) elif ( resolver.dialect.TABLES_REFERENCEABLE_AS_COLUMNS and len(column.parts) == 1 and column_name in scope.selected_sources ): # BigQuery and Postgres allow tables to be referenced as columns, treating them as structs/records scope.replace(column, exp.TableColumn(this=column.this)) for pivot in scope.pivots: for column in pivot.find_all(exp.Column): if not column.table and column.name in resolver.all_columns: table = resolver.get_table(column.name) if table: column.set("table", table) def _expand_struct_stars_no_parens( expression: exp.Dot, ) -> t.List[exp.Alias]: """[BigQuery] Expand/Flatten foo.bar.* where bar is a struct column""" dot_column = expression.find(exp.Column) if not isinstance(dot_column, exp.Column) or not dot_column.is_type(exp.DType.STRUCT): return [] # All nested struct values are ColumnDefs, so normalize the first exp.Column in one dot_column = dot_column.copy() starting_struct = exp.ColumnDef(this=dot_column.this, kind=dot_column.type) # First part is the table name and last part is the star so they can be dropped dot_parts = expression.parts[1:-1] # If we're expanding a nested struct eg. t.c.f1.f2.* find the last struct (f2 in this case) for part in dot_parts[1:]: for field in t.cast(exp.DataType, starting_struct.kind).expressions: # Unable to expand star unless all fields are named if not isinstance(field.this, exp.Identifier): return [] if field.name == part.name and field.kind.is_type(exp.DType.STRUCT): starting_struct = field break else: # There is no matching field in the struct return [] taken_names = set() new_selections = [] for field in t.cast(exp.DataType, starting_struct.kind).expressions: name = field.name # Ambiguous or anonymous fields can't be expanded if name in taken_names or not isinstance(field.this, exp.Identifier): return [] taken_names.add(name) this = field.this.copy() root, *parts = [part.copy() for part in itertools.chain(dot_parts, [this])] new_column = exp.column( t.cast(exp.Identifier, root), table=dot_column.args.get("table"), fields=t.cast(t.List[exp.Identifier], parts), ) new_selections.append(alias(new_column, this, copy=False).assert_is(exp.Alias)) return new_selections def _expand_struct_stars_with_parens(expression: exp.Dot) -> t.List[exp.Alias]: """[RisingWave] Expand/Flatten (.bar).*, where bar is a struct column""" # it is not ().* pattern, which means we can't expand if not isinstance(expression.this, exp.Paren): return [] # find column definition to get data-type dot_column = expression.find(exp.Column) if not isinstance(dot_column, exp.Column) or not dot_column.is_type(exp.DType.STRUCT): return [] parent = dot_column.parent starting_struct = dot_column.type # walk up AST and down into struct definition in sync while parent is not None: if isinstance(parent, exp.Paren): parent = parent.parent continue # if parent is not a dot, then something is wrong if not isinstance(parent, exp.Dot): return [] # if the rhs of the dot is star we are done rhs = parent.right if isinstance(rhs, exp.Star): break # if it is not identifier, then something is wrong if not isinstance(rhs, exp.Identifier): return [] # Check if current rhs identifier is in struct matched = False for struct_field_def in t.cast(exp.DataType, starting_struct).expressions: if struct_field_def.name == rhs.name: matched = True starting_struct = struct_field_def.kind # update struct break if not matched: return [] parent = parent.parent # build new aliases to expand star new_selections = [] # fetch the outermost parentheses for new aliaes outer_paren = expression.this for struct_field_def in t.cast(exp.DataType, starting_struct).expressions: new_identifier = struct_field_def.this.copy() new_dot = exp.Dot.build([outer_paren.copy(), new_identifier]) new_alias = alias(new_dot, new_identifier, copy=False).assert_is(exp.Alias) new_selections.append(new_alias) return new_selections def _expand_stars( scope: Scope, resolver: Resolver, using_column_tables: t.Dict[str, t.Any], pseudocolumns: t.Set[str], annotator: TypeAnnotator, ) -> None: """Expand stars to lists of column selections""" new_selections: t.List[exp.Expr] = [] except_columns: t.Dict[int, t.Set[str]] = {} replace_columns: t.Dict[int, t.Dict[str, exp.Alias]] = {} rename_columns: t.Dict[int, t.Dict[str, str]] = {} coalesced_columns = set() dialect = resolver.dialect pivot_output_columns = None pivot_exclude_columns: t.Set[str] = set() pivot = t.cast(t.Optional[exp.Pivot], seq_get(scope.pivots, 0)) if isinstance(pivot, exp.Pivot) and not pivot.alias_column_names: if pivot.unpivot: pivot_output_columns = [c.output_name for c in _unpivot_columns(pivot)] for field in pivot.fields: if isinstance(field, exp.In): pivot_exclude_columns.update( c.output_name for e in field.expressions for c in e.find_all(exp.Column) ) else: pivot_exclude_columns = set(c.output_name for c in pivot.find_all(exp.Column)) pivot_output_columns = [c.output_name for c in pivot.args.get("columns", [])] if not pivot_output_columns: pivot_output_columns = [c.alias_or_name for c in pivot.expressions] if dialect.SUPPORTS_STRUCT_STAR_EXPANSION and any( isinstance(col, exp.Dot) for col in scope.stars ): # Found struct expansion, annotate scope ahead of time annotator.annotate_scope(scope) scope_expression = scope.expression if not isinstance(scope_expression, exp.Selectable): return for expression in scope_expression.selects: tables: t.List[str] = [] if isinstance(expression, exp.Star): tables.extend(scope.selected_sources) _add_except_columns(expression, tables, except_columns) _add_replace_columns(expression, tables, replace_columns) _add_rename_columns(expression, tables, rename_columns) elif expression.is_star: if isinstance(expression, exp.Column): tables.append(expression.table) _add_except_columns(expression.this, tables, except_columns) _add_replace_columns(expression.this, tables, replace_columns) _add_rename_columns(expression.this, tables, rename_columns) elif isinstance(expression, exp.Dot): if ( dialect.SUPPORTS_STRUCT_STAR_EXPANSION and not dialect.REQUIRES_PARENTHESIZED_STRUCT_ACCESS ): struct_fields = _expand_struct_stars_no_parens(expression) if struct_fields: new_selections.extend(struct_fields) continue elif dialect.REQUIRES_PARENTHESIZED_STRUCT_ACCESS: struct_fields = _expand_struct_stars_with_parens(expression) if struct_fields: new_selections.extend(struct_fields) continue if not tables: new_selections.append(expression) continue for table in tables: if table not in scope.sources: raise OptimizeError(f"Unknown table: {table}") columns = resolver.get_source_columns(table, only_visible=True) columns = columns or scope.outer_columns if pseudocolumns and dialect.EXCLUDES_PSEUDOCOLUMNS_FROM_STAR: columns = [name for name in columns if name.upper() not in pseudocolumns] if not columns or "*" in columns: return table_id = id(table) columns_to_exclude = except_columns.get(table_id) or set() renamed_columns = rename_columns.get(table_id, {}) replaced_columns = replace_columns.get(table_id, {}) if pivot: if pivot_output_columns and pivot_exclude_columns: pivot_columns = [c for c in columns if c not in pivot_exclude_columns] pivot_columns.extend(pivot_output_columns) else: pivot_columns = pivot.alias_column_names if pivot_columns: new_selections.extend( alias(exp.column(name, table=pivot.alias), name, copy=False) for name in pivot_columns if name not in columns_to_exclude ) continue for name in columns: if name in columns_to_exclude or name in coalesced_columns: continue if name in using_column_tables and table in using_column_tables[name]: coalesced_columns.add(name) # TODO (mypyc): use a separate variable to avoid reusing `tables` (list) with dict type using_tables = using_column_tables[name] coalesce_args = [exp.column(name, table=table) for table in using_tables] new_selections.append( alias(exp.func("coalesce", *coalesce_args), alias=name, copy=False) ) else: alias_ = renamed_columns.get(name, name) selection_expr = replaced_columns.get(name) or exp.column(name, table=table) new_selections.append( alias(selection_expr, alias_, copy=False) if alias_ != name else selection_expr ) # Ensures we don't overwrite the initial selections with an empty list if new_selections and isinstance(scope_expression, exp.Select): scope_expression.set("expressions", new_selections) def _add_except_columns( expression: exp.Expr, tables, except_columns: t.Dict[int, t.Set[str]] ) -> None: except_ = expression.args.get("except_") if not except_: return columns = {e.name for e in except_} for table in tables: except_columns[id(table)] = columns def _add_rename_columns( expression: exp.Expr, tables, rename_columns: t.Dict[int, t.Dict[str, str]] ) -> None: rename = expression.args.get("rename") if not rename: return columns = {e.this.name: e.alias for e in rename} for table in tables: rename_columns[id(table)] = columns def _add_replace_columns( expression: exp.Expr, tables, replace_columns: t.Dict[int, t.Dict[str, exp.Alias]] ) -> None: replace = expression.args.get("replace") if not replace: return columns = {e.alias: e for e in replace} for table in tables: replace_columns[id(table)] = columns def qualify_outputs(scope_or_expression: Scope | exp.Expr) -> None: """Ensure all output columns are aliased""" if isinstance(scope_or_expression, exp.Expr): scope = build_scope(scope_or_expression) if not isinstance(scope, Scope): return else: scope = scope_or_expression expression = scope.expression if not isinstance(expression, exp.Selectable): return new_selections = [] for i, (selection, aliased_column) in enumerate( itertools.zip_longest(expression.selects, scope.outer_columns) ): if selection is None or isinstance(selection, exp.QueryTransform): break if isinstance(selection, exp.Subquery): if not selection.output_name: selection.set("alias", exp.TableAlias(this=exp.to_identifier(f"_col_{i}"))) elif not isinstance(selection, (exp.Alias, exp.Aliases)) and not selection.is_star: selection = alias( selection, alias=selection.output_name or f"_col_{i}", copy=False, ) if aliased_column: selection.set("alias", exp.to_identifier(aliased_column)) new_selections.append(selection) if new_selections and isinstance(expression, exp.Select): expression.set("expressions", new_selections) def quote_identifiers(expression: E, dialect: DialectType = None, identify: bool = True) -> E: """Makes sure all identifiers that need to be quoted are quoted.""" return expression.transform( Dialect.get_or_raise(dialect).quote_identifier, identify=identify, copy=False ) # type: ignore def pushdown_cte_alias_columns(scope: Scope) -> None: """ Pushes down the CTE alias columns into the projection, This step is useful in Snowflake where the CTE alias columns can be referenced in the HAVING. Args: scope: Scope to find ctes to pushdown aliases. """ for cte in scope.ctes: if cte.alias_column_names and isinstance(cte.this, exp.Select): new_expressions = [] for _alias, projection in zip(cte.alias_column_names, cte.this.expressions): if isinstance(projection, exp.Alias): projection.set("alias", exp.to_identifier(_alias)) else: projection = alias(projection, alias=_alias) new_expressions.append(projection) cte.this.set("expressions", new_expressions) ================================================ FILE: sqlglot/optimizer/qualify_tables.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.dialects.dialect import Dialect, DialectType from sqlglot.helper import name_sequence, seq_get, ensure_list from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.scope import Scope, traverse_scope if t.TYPE_CHECKING: from sqlglot._typing import E def qualify_tables( expression: E, db: t.Optional[str | exp.Identifier] = None, catalog: t.Optional[str | exp.Identifier] = None, on_qualify: t.Optional[t.Callable[[exp.Table], None]] = None, dialect: DialectType = None, canonicalize_table_aliases: bool = False, ) -> E: """ Rewrite sqlglot AST to have fully qualified tables. Join constructs such as (t1 JOIN t2) AS t will be expanded into (SELECT * FROM t1 AS t1, t2 AS t2) AS t. Examples: >>> import sqlglot >>> expression = sqlglot.parse_one("SELECT 1 FROM tbl") >>> qualify_tables(expression, db="db").sql() 'SELECT 1 FROM db.tbl AS tbl' >>> >>> expression = sqlglot.parse_one("SELECT 1 FROM (t1 JOIN t2) AS t") >>> qualify_tables(expression).sql() 'SELECT 1 FROM (SELECT * FROM t1 AS t1, t2 AS t2) AS t' Args: expression: Expr to qualify db: Database name catalog: Catalog name on_qualify: Callback after a table has been qualified. dialect: The dialect to parse catalog and schema into. canonicalize_table_aliases: Whether to use canonical aliases (_0, _1, ...) for all sources instead of preserving table names. Defaults to False. Returns: The qualified expression. """ dialect = Dialect.get_or_raise(dialect) next_alias_name = name_sequence("_") if db := db or None: db = exp.parse_identifier(db, dialect=dialect) db.meta["is_table"] = True db = normalize_identifiers(db, dialect=dialect) if catalog := catalog or None: catalog = exp.parse_identifier(catalog, dialect=dialect) catalog.meta["is_table"] = True catalog = normalize_identifiers(catalog, dialect=dialect) def _qualify(table: exp.Table) -> None: if isinstance(table.this, exp.Identifier): if db and not table.args.get("db"): table.set("db", db.copy()) if catalog and not table.args.get("catalog") and table.args.get("db"): table.set("catalog", catalog.copy()) if (db or catalog) and not isinstance(expression, exp.Query): with_ = expression.args.get("with_") or exp.With() cte_names = {cte.alias_or_name for cte in with_.expressions} for node in expression.walk(prune=lambda n: isinstance(n, exp.Query)): if isinstance(node, exp.Table) and node.name not in cte_names: _qualify(node) def _set_alias( expression: exp.Expr, canonical_aliases: t.Dict[str, str], target_alias: t.Optional[str] = None, scope: t.Optional[Scope] = None, normalize: bool = False, columns: t.Optional[t.List[t.Union[str, exp.Identifier]]] = None, ) -> None: alias = expression.args.get("alias") or exp.TableAlias() if canonicalize_table_aliases: new_alias_name = next_alias_name() canonical_aliases[alias.name or target_alias or ""] = new_alias_name elif not alias.name: new_alias_name = target_alias or next_alias_name() if normalize and target_alias: new_alias_name = normalize_identifiers(new_alias_name, dialect=dialect).name else: return alias.set("this", exp.to_identifier(new_alias_name)) if columns: alias.set("columns", [exp.to_identifier(c) for c in columns]) expression.set("alias", alias) if scope: scope.rename_source(None, new_alias_name) for scope in traverse_scope(expression): local_columns = scope.local_columns canonical_aliases: t.Dict[str, str] = {} for query in scope.subqueries: subquery = query.parent if isinstance(subquery, exp.Subquery): subquery.unwrap().replace(subquery) for derived_table in scope.derived_tables: unnested = derived_table.unnest() if isinstance(unnested, exp.Table): joins = unnested.args.get("joins") unnested.set("joins", None) derived_table.this.replace(exp.select("*").from_(unnested.copy(), copy=False)) derived_table.this.set("joins", joins) _set_alias(derived_table, canonical_aliases, scope=scope) if pivot := seq_get(derived_table.args.get("pivots") or [], 0): _set_alias(pivot, canonical_aliases) table_aliases = {} for name, source in scope.sources.items(): if isinstance(source, exp.Table): # When the name is empty, it means that we have a non-table source, e.g. a pivoted cte is_real_table_source = bool(name) if pivot := seq_get(source.args.get("pivots") or [], 0): name = source.name table_this = source.this table_alias = source.args.get("alias") function_columns: t.List[t.Union[str, exp.Identifier]] = [] if isinstance(table_this, exp.Func): if not table_alias: function_columns = ensure_list( dialect.DEFAULT_FUNCTIONS_COLUMN_NAMES.get(type(table_this)) ) elif columns := table_alias.columns: function_columns = columns elif type(table_this) in dialect.DEFAULT_FUNCTIONS_COLUMN_NAMES: function_columns = ensure_list(source.alias_or_name) source.set("alias", None) name = "" _set_alias( source, canonical_aliases, target_alias=name or source.name or None, normalize=True, columns=function_columns, ) source_fqn = ".".join(p.name for p in source.parts) had_explicit_alias = table_alias and table_alias.name if not had_explicit_alias or source_fqn not in table_aliases: table_aliases[source_fqn] = source.args["alias"].this.copy() if pivot: target_alias = source.alias if pivot.unpivot else None _set_alias(pivot, canonical_aliases, target_alias=target_alias, normalize=True) # This case corresponds to a pivoted CTE, we don't want to qualify that if isinstance(scope.sources.get(source.alias_or_name), Scope): continue if is_real_table_source: _qualify(source) if on_qualify: on_qualify(source) elif isinstance(source, Scope) and source.is_udtf: _set_alias(udtf := source.expression, canonical_aliases) table_alias = udtf.args["alias"] if isinstance(udtf, exp.Values) and not table_alias.columns: column_aliases = [ normalize_identifiers(i, dialect=dialect) for i in dialect.generate_values_aliases(udtf) ] table_alias.set("columns", column_aliases) for table in scope.tables: if not table.alias and isinstance(table.parent, (exp.From, exp.Join)): _set_alias(table, canonical_aliases, target_alias=table.name) for column in local_columns: column_table = column.table if column.db: table_alias = table_aliases.get(".".join(p.name for p in column.parts[0:-1])) if table_alias: for p in exp.COLUMN_PARTS[1:]: column.set(p, None) column.set("table", table_alias.copy()) elif ( canonical_aliases and column_table and (canonical_table := canonical_aliases.get(column_table, "")) != column_table ): # Amend existing aliases, e.g. t.c -> _0.c if t is aliased to _0 column.set("table", exp.to_identifier(canonical_table)) return expression ================================================ FILE: sqlglot/optimizer/resolver.py ================================================ from __future__ import annotations import itertools import typing as t from sqlglot import exp from sqlglot.dialects.dialect import Dialect from sqlglot.errors import OptimizeError from sqlglot.helper import seq_get, SingleValuedMapping from sqlglot.optimizer.scope import Scope if t.TYPE_CHECKING: from sqlglot.schema import Schema from collections.abc import Sequence, Mapping class Resolver: """ Helper for resolving columns. This is a class so we can lazily load some things and easily share them across functions. """ def __init__(self, scope: Scope, schema: Schema, infer_schema: bool = True): self.scope = scope self.schema = schema self.dialect = schema.dialect or Dialect() self._source_columns: t.Optional[dict[str, Sequence[str]]] = None self._unambiguous_columns: t.Optional[Mapping[str, str]] = None self._all_columns: t.Optional[set[str]] = None self._infer_schema = infer_schema self._get_source_columns_cache: dict[tuple[str, bool], Sequence[str]] = {} def get_table(self, column: str | exp.Column) -> t.Optional[exp.Identifier]: """ Get the table for a column name. Args: column: The column expression (or column name) to find the table for. Returns: The table name if it can be found/inferred. """ column_name = column if isinstance(column, str) else column.name table_name = self._get_table_name_from_sources(column_name) if not table_name and isinstance(column, exp.Column): # Fall-back case: If we couldn't find the `table_name` from ALL of the sources, # attempt to disambiguate the column based on other characteristics e.g if this column is in a join condition, # we may be able to disambiguate based on the source order. if join_context := self._get_column_join_context(column): # In this case, the return value will be the join that _may_ be able to disambiguate the column # and we can use the source columns available at that join to get the table name # catch OptimizeError if column is still ambiguous and try to resolve with schema inference below try: table_name = self._get_table_name_from_sources( column_name, self._get_available_source_columns(join_context) ) except OptimizeError: pass if not table_name and self._infer_schema: sources_without_schema = tuple( source for source, columns in self._get_all_source_columns().items() if not columns or "*" in columns ) if len(sources_without_schema) == 1: table_name = sources_without_schema[0] if table_name not in self.scope.selected_sources: return exp.to_identifier(table_name) node: exp.Expr = self.scope.selected_sources[table_name][0] if isinstance(node, exp.Query): while node and node.alias != table_name and node.parent: node = node.parent node_alias = node.args.get("alias") if node_alias: return exp.to_identifier(node_alias.this) return exp.to_identifier(table_name) @property def all_columns(self) -> t.Set[str]: """All available columns of all sources in this scope""" if self._all_columns is None: self._all_columns = { column for columns in self._get_all_source_columns().values() for column in columns } return self._all_columns def get_source_columns_from_set_op(self, expression: exp.Expr) -> t.List[str]: if isinstance(expression, exp.Select): return expression.named_selects if isinstance(expression, exp.Subquery) and isinstance(expression.this, exp.SetOperation): # Different types of SET modifiers can be chained together if they're explicitly grouped by nesting return self.get_source_columns_from_set_op(expression.this) if not isinstance(expression, exp.SetOperation): raise OptimizeError(f"Unknown set operation: {expression}") set_op = expression # BigQuery specific set operations modifiers, e.g INNER UNION ALL BY NAME on_column_list = set_op.args.get("on") if on_column_list: # The resulting columns are the columns in the ON clause: # {INNER | LEFT | FULL} UNION ALL BY NAME ON (col1, col2, ...) columns = [col.name for col in on_column_list] elif set_op.side or set_op.kind: side = set_op.side kind = set_op.kind # Visit the children UNIONs (if any) in a post-order traversal left = self.get_source_columns_from_set_op(set_op.left) right = self.get_source_columns_from_set_op(set_op.right) # We use dict.fromkeys to deduplicate keys and maintain insertion order if side == "LEFT": columns = left elif side == "FULL": columns = list(dict.fromkeys(left + right)) elif kind == "INNER": columns = list(dict.fromkeys(left).keys() & dict.fromkeys(right).keys()) else: columns = set_op.named_selects return columns def get_source_columns(self, name: str, only_visible: bool = False) -> Sequence[str]: """Resolve the source columns for a given source `name`.""" cache_key = (name, only_visible) if cache_key not in self._get_source_columns_cache: if name not in self.scope.sources: raise OptimizeError(f"Unknown table: {name}") source = self.scope.sources[name] if isinstance(source, exp.Table): columns = self.schema.column_names(source, only_visible) elif isinstance(source, Scope) and isinstance( source.expression, (exp.Values, exp.Unnest) ): columns = source.expression.named_selects # in bigquery, unnest structs are automatically scoped as tables, so you can # directly select a struct field in a query. # this handles the case where the unnest is statically defined. if self.dialect.UNNEST_COLUMN_ONLY and isinstance(source.expression, exp.Unnest): unnest = source.expression # if type is not annotated yet, try to get it from the schema if not unnest.type or unnest.type.is_type(exp.DType.UNKNOWN): unnest_expr = seq_get(unnest.expressions, 0) if isinstance(unnest_expr, exp.Column) and self.scope.parent: col_type = self._get_unnest_column_type(unnest_expr) # extract element type if it's an ARRAY if col_type and col_type.is_type(exp.DType.ARRAY): element_types = col_type.expressions if element_types: unnest.type = element_types[0].copy() else: if col_type: unnest.type = col_type.copy() # check if the result type is a STRUCT - extract struct field names if unnest.is_type(exp.DType.STRUCT): for k in unnest.type.expressions: # type: ignore columns.append(k.name) elif isinstance(source, Scope) and isinstance(source.expression, exp.SetOperation): columns = self.get_source_columns_from_set_op(source.expression) else: selectable = source.expression.assert_is(exp.Selectable) select = seq_get(selectable.selects, 0) if isinstance(select, exp.QueryTransform): # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-qry-select-transform.html schema = select.args.get("schema") columns = [c.name for c in schema.expressions] if schema else ["key", "value"] else: columns = selectable.named_selects node, _ = self.scope.selected_sources.get(name) or (None, None) if isinstance(node, Scope): column_aliases = node.expression.alias_column_names elif isinstance(node, exp.Expr): column_aliases = node.alias_column_names else: column_aliases = [] if column_aliases: # If the source's columns are aliased, their aliases shadow the corresponding column names. # This can be expensive if there are lots of columns, so only do this if column_aliases exist. columns = [ alias or name for (name, alias) in itertools.zip_longest(columns, column_aliases) ] self._get_source_columns_cache[cache_key] = columns return self._get_source_columns_cache[cache_key] def _get_all_source_columns(self) -> dict[str, Sequence[str]]: if self._source_columns is None: self._source_columns = { source_name: self.get_source_columns(source_name) for source_name, source in itertools.chain( self.scope.selected_sources.items(), self.scope.lateral_sources.items() ) } return self._source_columns def _get_table_name_from_sources( self, column_name: str, source_columns: t.Optional[dict[str, Sequence[str]]] = None ) -> t.Optional[str]: if not source_columns: # If not supplied, get all sources to calculate unambiguous columns if self._unambiguous_columns is None: self._unambiguous_columns = self._get_unambiguous_columns( self._get_all_source_columns() ) unambiguous_columns = self._unambiguous_columns else: unambiguous_columns = self._get_unambiguous_columns(source_columns) return unambiguous_columns.get(column_name) def _get_column_join_context(self, column: exp.Column) -> t.Optional[exp.Join]: """ Check if a column participating in a join can be qualified based on the source order. """ args = self.scope.expression.args joins = args.get("joins") if not joins or args.get("laterals") or args.get("pivots"): # Feature gap: We currently don't try to disambiguate columns if other sources # (e.g laterals, pivots) exist alongside joins return None join_ancestor = column.find_ancestor(exp.Join, exp.Select) if ( isinstance(join_ancestor, exp.Join) and join_ancestor.alias_or_name in self.scope.selected_sources ): # Ensure that the found ancestor is a join that contains an actual source, # e.g in Clickhouse `b` is an array expression in `a ARRAY JOIN b` return join_ancestor return None def _get_available_source_columns(self, join_ancestor: exp.Join) -> dict[str, Sequence[str]]: """ Get the source columns that are available at the point where a column is referenced. For columns in JOIN conditions, this only includes tables that have been joined up to that point. Example: ``` SELECT * FROM t_1 INNER JOIN ... INNER JOIN t_n ON t_1.a = c INNER JOIN t_n+1 ON ... ``` ^ | +----------------------------------+ | ⌄ The unqualified column `c` is not ambiguous if no other sources up until that join i.e t_1, ..., t_n, contain a column named `c`. """ args = self.scope.expression.args # Collect tables in order: FROM clause tables + joined tables up to current join from_name = args["from_"].alias_or_name available_sources = {from_name: self.get_source_columns(from_name)} for join in args["joins"][: t.cast(int, join_ancestor.index) + 1]: available_sources[join.alias_or_name] = self.get_source_columns(join.alias_or_name) return available_sources def _get_unambiguous_columns( self, source_columns: dict[str, Sequence[str]] ) -> Mapping[str, str]: """ Find all the unambiguous columns in sources. Args: source_columns: Mapping of names to source columns. Returns: Mapping of column name to source name. """ if not source_columns: return {} source_columns_pairs = list(source_columns.items()) first_table, first_columns = source_columns_pairs[0] if len(source_columns_pairs) == 1: # Performance optimization - avoid copying first_columns if there is only one table. return SingleValuedMapping(first_columns, first_table) # For BigQuery UNNEST_COLUMN_ONLY, build a mapping of original UNNEST aliases # from alias.columns[0] to their source names. This is used to resolve shadowing # where an UNNEST alias shadows a column name from another table. unnest_original_aliases: t.Dict[str, str] = {} if self.dialect.UNNEST_COLUMN_ONLY: unnest_original_aliases = { alias_arg.columns[0].name: source_name for source_name, source in self.scope.sources.items() if ( isinstance(source.expression, exp.Unnest) and (alias_arg := source.expression.args.get("alias")) and alias_arg.columns ) } unambiguous_columns = {col: first_table for col in first_columns} all_columns = set(unambiguous_columns) for table, columns in source_columns_pairs[1:]: unique = set(columns) ambiguous = all_columns.intersection(unique) all_columns.update(columns) for column in ambiguous: if column in unnest_original_aliases: unambiguous_columns[column] = unnest_original_aliases[column] continue unambiguous_columns.pop(column, None) for column in unique.difference(ambiguous): unambiguous_columns[column] = table return unambiguous_columns def _get_unnest_column_type(self, column: exp.Column) -> t.Optional[exp.DataType]: """ Get the type of a column being unnested, tracing through CTEs/subqueries to find the base table. Args: column: The column expression being unnested. Returns: The DataType of the column, or None if not found. """ scope = self.scope.parent assert scope # if column is qualified, use that table, otherwise disambiguate using the resolver if column.table: table_name = column.table else: # use the parent scope's resolver to disambiguate the column parent_resolver = Resolver(scope, self.schema, self._infer_schema) table_identifier = parent_resolver.get_table(column) if not table_identifier: return None table_name = table_identifier.name source = scope.sources.get(table_name) return self._get_column_type_from_scope(source, column) if source else None def _get_column_type_from_scope( self, source: t.Union[Scope, exp.Table], column: exp.Column ) -> t.Optional[exp.DataType]: """ Get a column's type by tracing through scopes/tables to find the base table. Args: source: The source to search - can be a Scope (to iterate its sources) or a Table. column: The column to find the type for. Returns: The DataType of the column, or None if not found. """ if isinstance(source, exp.Table): # base table - get the column type from schema col_type: t.Optional[exp.DataType] = self.schema.get_column_type(source, column) if col_type and not col_type.is_type(exp.DType.UNKNOWN): return col_type elif isinstance(source, Scope): # iterate over all sources in the scope for source_name, nested_source in source.sources.items(): col_type = self._get_column_type_from_scope(nested_source, column) if col_type and not col_type.is_type(exp.DType.UNKNOWN): return col_type return None ================================================ FILE: sqlglot/optimizer/scope.py ================================================ from __future__ import annotations import itertools import logging import typing as t from collections import defaultdict from enum import Enum, auto from sqlglot import exp from sqlglot.errors import OptimizeError from sqlglot.helper import find_new_name, mypyc_attr, seq_get from builtins import type as Type logger = logging.getLogger("sqlglot") if t.TYPE_CHECKING: from sqlglot._typing import E from collections.abc import Iterator TRAVERSABLES = (exp.Query, exp.DDL, exp.DML) class ScopeType(Enum): ROOT = auto() SUBQUERY = auto() DERIVED_TABLE = auto() CTE = auto() UNION = auto() UDTF = auto() @mypyc_attr(native_class=True) class Scope: """ Selection scope. Attributes: expression: Root expression of this scope sources: Mapping of source name to either a Table expression or another Scope instance. For example: SELECT * FROM x {"x": Table(this="x")} SELECT * FROM x AS y {"y": Table(this="x")} SELECT * FROM (SELECT ...) AS y {"y": Scope(...)} lateral_sources: Sources from laterals For example: SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; The LATERAL VIEW EXPLODE gets x as a source. cte_sources: Sources from CTES outer_columns: If this is a derived table or CTE, and the outer query defines a column list for the alias of this scope, this is that list of columns. For example: SELECT * FROM (SELECT ...) AS y(col1, col2) The inner query would have `["col1", "col2"]` for its `outer_columns` parent: Parent scope scope_type: Type of this scope, relative to it's parent subquery_scopes: List of all child scopes for subqueries cte_scopes: List of all child scopes for CTEs derived_table_scopes: List of all child scopes for derived_tables udtf_scopes: List of all child scopes for user defined tabular functions table_scopes: derived_table_scopes + udtf_scopes, in the order that they're defined union_scopes: If this Scope is for a Union expression, this will be a list of the left and right child scopes. """ _collected: bool _raw_columns: t.List[exp.Column] _table_columns: t.List[exp.TableColumn] _stars: t.List[exp.Column | exp.Dot] _derived_tables: t.List[exp.Subquery] _udtfs: t.List[exp.UDTF] _tables: t.List[exp.Table] _ctes: t.List[exp.CTE] _subqueries: t.List[exp.Select | exp.SetOperation] _join_hints: t.List[exp.JoinHint] _semi_anti_join_tables: t.Set[str] _column_index: t.Set[int] _selected_sources: t.Optional[t.Dict[str, t.Tuple[exp.Selectable, exp.Table | Scope]]] _columns: t.Optional[t.List[exp.Column]] _external_columns: t.Optional[t.List[exp.Column]] _local_columns: t.Optional[t.List[exp.Column]] _pivots: t.Optional[t.List[exp.Pivot]] _references: t.Optional[t.List[t.Tuple[str, exp.Selectable]]] def __init__( self, expression: exp.Expr, sources: t.Optional[t.Dict[str, exp.Table | Scope]] = None, outer_columns: t.Optional[t.List[str]] = None, parent: t.Optional[Scope] = None, scope_type: ScopeType = ScopeType.ROOT, lateral_sources: t.Optional[t.Dict[str, exp.Table | Scope]] = None, cte_sources: t.Optional[t.Dict[str, exp.Table | Scope]] = None, can_be_correlated: t.Optional[bool] = None, ) -> None: self.expression = expression self.sources = sources or {} self.lateral_sources = lateral_sources or {} self.cte_sources = cte_sources or {} self.sources.update(self.lateral_sources) self.sources.update(self.cte_sources) self.outer_columns = outer_columns or [] self.parent = parent self.scope_type = scope_type self.subquery_scopes: t.List[Scope] = [] self.derived_table_scopes: t.List[Scope] = [] self.table_scopes: t.List[Scope] = [] self.cte_scopes: t.List[Scope] = [] self.union_scopes: t.List[Scope] = [] self.udtf_scopes: t.List[Scope] = [] self.can_be_correlated = can_be_correlated self.clear_cache() def clear_cache(self) -> None: self._collected = False self._raw_columns = [] self._table_columns = [] self._stars = [] self._derived_tables = [] self._udtfs = [] self._tables = [] self._ctes = [] self._subqueries = [] self._join_hints = [] self._semi_anti_join_tables = set() self._column_index = set() self._selected_sources = None self._columns = None self._external_columns = None self._local_columns = None self._pivots = None self._references = None def branch( self, expression: exp.Expr, scope_type: ScopeType, sources: t.Optional[t.Dict[str, exp.Table | Scope]] = None, cte_sources: t.Optional[t.Dict[str, exp.Table | Scope]] = None, lateral_sources: t.Optional[t.Dict[str, exp.Table | Scope]] = None, outer_columns: t.Optional[t.List[str]] = None, ) -> Scope: """Branch from the current scope to a new, inner scope""" return Scope( expression=expression.unnest(), sources=sources.copy() if sources else None, parent=self, scope_type=scope_type, cte_sources={**self.cte_sources, **(cte_sources or {})}, lateral_sources=lateral_sources.copy() if lateral_sources else None, can_be_correlated=self.can_be_correlated or scope_type in (ScopeType.SUBQUERY, ScopeType.UDTF), outer_columns=outer_columns, ) def _collect(self) -> None: self._tables = [] self._ctes = [] self._subqueries = [] self._derived_tables = [] self._udtfs = [] self._raw_columns = [] self._table_columns = [] self._stars = [] self._join_hints = [] self._semi_anti_join_tables = set() self._column_index = set() for node in self.walk(): if node is self.expression: continue if isinstance(node, exp.Dot) and node.is_star: self._stars.append(node) elif type(node) is exp.Column: self._column_index.add(id(node)) if isinstance(node.this, exp.Star): self._stars.append(node) else: self._raw_columns.append(node) elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint): parent = node.parent if isinstance(parent, exp.Join) and parent.is_semi_or_anti_join: self._semi_anti_join_tables.add(node.alias_or_name) self._tables.append(node) elif isinstance(node, exp.JoinHint): self._join_hints.append(node) elif isinstance(node, exp.UDTF): self._udtfs.append(node) elif isinstance(node, exp.CTE): self._ctes.append(node) elif _is_derived_table(node) and _is_from_or_join(node): self._derived_tables.append(t.cast(exp.Subquery, node)) elif isinstance(node, exp.UNWRAPPED_QUERIES) and not _is_from_or_join(node): self._subqueries.append(node) elif isinstance(node, exp.TableColumn): self._table_columns.append(node) self._collected = True def _ensure_collected(self) -> None: if not self._collected: self._collect() def walk(self, prune: t.Optional[t.Callable[[exp.Expr], bool]] = None) -> Iterator[exp.Expr]: return walk_in_scope(self.expression, prune=prune) def find(self, *expression_types: Type[E]) -> t.Optional[E]: return find_in_scope(self.expression, *expression_types) def find_all(self, *expression_types: Type[E]) -> Iterator[E]: return find_all_in_scope(self.expression, *expression_types) def replace(self, old: exp.Expr, new: exp.Expr) -> None: """ Replace `old` with `new`. This can be used instead of `exp.Expr.replace` to ensure the `Scope` is kept up-to-date. Args: old (exp.Expr): old node new (exp.Expr): new node """ old.replace(new) self.clear_cache() @property def tables(self) -> t.List[exp.Table]: """ List of tables in this scope. Returns: list[exp.Table]: tables """ self._ensure_collected() return self._tables @property def ctes(self) -> t.List[exp.CTE]: """ List of CTEs in this scope. Returns: list[exp.CTE]: ctes """ self._ensure_collected() return self._ctes @property def derived_tables(self) -> t.List[exp.Subquery]: """ List of derived tables in this scope. For example: SELECT * FROM (SELECT ...) <- that's a derived table Returns: list[exp.Subquery]: derived tables """ self._ensure_collected() return self._derived_tables @property def udtfs(self) -> t.List[exp.UDTF]: """ List of "User Defined Tabular Functions" in this scope. Returns: list[exp.UDTF]: UDTFs """ self._ensure_collected() return self._udtfs @property def subqueries(self) -> t.List[exp.Select | exp.SetOperation]: """ List of subqueries in this scope. For example: SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery Returns: list[exp.Select | exp.SetOperation]: subqueries """ self._ensure_collected() return self._subqueries @property def stars(self) -> t.List[exp.Column | exp.Dot]: """ List of star expressions (columns or dots) in this scope. """ self._ensure_collected() return self._stars @property def column_index(self) -> t.Set[int]: """ Set of column object IDs that belong to this scope's expression. """ self._ensure_collected() return self._column_index @property def columns(self) -> t.List[exp.Column]: """ List of columns in this scope. Returns: list[exp.Column]: Column instances in this scope, plus any Columns that reference this scope from correlated subqueries. """ if self._columns is None: self._ensure_collected() columns = self._raw_columns external_columns = [ column for scope in itertools.chain( self.subquery_scopes, self.udtf_scopes, (dts for dts in self.derived_table_scopes if dts.can_be_correlated), ) for column in scope.external_columns ] expr = self.expression named_selects = set(expr.named_selects) if isinstance(expr, exp.Query) else set() self._columns = [] for column in columns + external_columns: ancestor = column.find_ancestor( exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star, exp.Distinct, ) if ( not ancestor or column.text("table") or isinstance(ancestor, exp.Select) or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func)) or ( isinstance(ancestor, (exp.Order, exp.Distinct)) and ( isinstance(ancestor.parent, (exp.Window, exp.WithinGroup)) or not isinstance(ancestor.parent, exp.Select) or column.name not in named_selects ) ) or (isinstance(ancestor, exp.Star) and not column.arg_key == "except_") ): self._columns.append(column) return self._columns @property def table_columns(self) -> t.List[exp.TableColumn]: self._ensure_collected() return self._table_columns @property def selected_sources(self) -> t.Dict[str, t.Tuple[exp.Selectable, exp.Table | Scope]]: """ Mapping of nodes and sources that are actually selected from in this scope. That is, all tables in a schema are selectable at any point. But a table only becomes a selected source if it's included in a FROM or JOIN clause. Returns: dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes """ if self._selected_sources is None: result: t.Dict[str, t.Tuple[exp.Selectable, exp.Table | Scope]] = {} for name, node in self.references: if name in self._semi_anti_join_tables: # The RHS table of SEMI/ANTI joins shouldn't be collected as a # selected source continue if name in result: raise OptimizeError(f"Alias already used: {name}") if name in self.sources: result[name] = (node, self.sources[name]) self._selected_sources = result return self._selected_sources @property def references(self) -> t.List[t.Tuple[str, exp.Selectable]]: if self._references is None: self._references = [] for table in self.tables: self._references.append((table.alias_or_name, table)) for _expr in itertools.chain(self.derived_tables, self.udtfs): # TODO (mypyc): rebind to exp.Expr to avoid DerivedTable trait vtable dispatch expression: exp.Expr = _expr self._references.append( ( _get_source_alias(expression), ( expression if expression.args.get("pivots") else expression.unnest() ).assert_is(exp.Selectable), ) ) return self._references @property def external_columns(self) -> t.List[exp.Column]: """ Columns that appear to reference sources in outer scopes. Returns: list[exp.Column]: Column instances that don't reference sources in the current scope. """ if self._external_columns is None: if isinstance(self.expression, exp.SetOperation): left, right = self.union_scopes self._external_columns = left.external_columns + right.external_columns else: self._external_columns = [ c for c in self.columns if c.text("table") not in self.sources and c.text("table") not in self.semi_or_anti_join_tables ] return self._external_columns @property def local_columns(self) -> t.List[exp.Column]: """ Columns in this scope that are not external. Returns: list[exp.Column]: Column instances that reference sources in the current scope. """ if self._local_columns is None: external_columns = set(self.external_columns) self._local_columns = [c for c in self.columns if c not in external_columns] return self._local_columns @property def unqualified_columns(self) -> t.List[exp.Column]: """ Unqualified columns in the current scope. Returns: list[exp.Column]: Unqualified columns """ return [c for c in self.columns if not c.text("table")] @property def join_hints(self) -> t.List[exp.JoinHint]: """ Hints that exist in the scope that reference tables Returns: list[exp.JoinHint]: Join hints that are referenced within the scope """ self._ensure_collected() return self._join_hints @property def pivots(self) -> t.List[exp.Pivot]: if self._pivots is None: self._pivots = [ pivot for _, node in self.references for pivot in node.args.get("pivots") or [] ] return self._pivots @property def semi_or_anti_join_tables(self) -> t.Set[str]: self._ensure_collected() return self._semi_anti_join_tables def source_columns(self, source_name: str) -> t.List[exp.Column]: """ Get all columns in the current scope for a particular source. Args: source_name (str): Name of the source Returns: list[exp.Column]: Column instances that reference `source_name` """ return [column for column in self.columns if column.text("table") == source_name] @property def is_subquery(self) -> bool: """Determine if this scope is a subquery""" return self.scope_type == ScopeType.SUBQUERY @property def is_derived_table(self) -> bool: """Determine if this scope is a derived table""" return self.scope_type == ScopeType.DERIVED_TABLE @property def is_union(self) -> bool: """Determine if this scope is a union""" return self.scope_type == ScopeType.UNION @property def is_cte(self) -> bool: """Determine if this scope is a common table expression""" return self.scope_type == ScopeType.CTE @property def is_root(self) -> bool: """Determine if this is the root scope""" return self.scope_type == ScopeType.ROOT @property def is_udtf(self) -> bool: """Determine if this scope is a UDTF (User Defined Table Function)""" return self.scope_type == ScopeType.UDTF @property def is_correlated_subquery(self) -> bool: """Determine if this scope is a correlated subquery""" return bool(self.can_be_correlated and self.external_columns) def rename_source(self, old_name: t.Optional[str], new_name: str) -> None: """Rename a source in this scope""" old_name = old_name or "" if old_name in self.sources: self.sources[new_name] = self.sources.pop(old_name) def add_source(self, name: str, source: exp.Table | Scope) -> None: """Add a source to this scope""" self.sources[name] = source self.clear_cache() def remove_source(self, name: str) -> None: """Remove a source from this scope""" self.sources.pop(name, None) self.clear_cache() def __repr__(self) -> str: return f"Scope<{self.expression.sql()}>" def traverse(self) -> Iterator[Scope]: """ Traverse the scope tree from this node. Yields: Scope: scope instances in depth-first-search post-order """ stack: list[Scope] = [self] result: list[Scope] = [] while stack: scope = stack.pop() result.append(scope) stack.extend( itertools.chain( scope.cte_scopes, scope.union_scopes, scope.table_scopes, scope.subquery_scopes, ) ) yield from reversed(result) def ref_count(self) -> t.Dict[int, int]: """ Count the number of times each scope in this tree is referenced. Returns: dict[int, int]: Mapping of Scope instance ID to reference count """ scope_ref_count: t.Dict[int, int] = defaultdict(int) for scope in self.traverse(): for _, source in scope.selected_sources.values(): scope_ref_count[id(source)] += 1 for name in scope._semi_anti_join_tables: # semi/anti join sources are not actually selected but we still need to # increment their ref count to avoid them being optimized away if name in scope.sources: scope_ref_count[id(scope.sources[name])] += 1 return scope_ref_count def traverse_scope(expression: exp.Expr) -> t.List[Scope]: """ Traverse an expression by its "scopes". "Scope" represents the current context of a Select statement. This is helpful for optimizing queries, where we need more information than the expression tree itself. For example, we might care about the source names within a subquery. Returns a list because a generator could result in incomplete properties which is confusing. Examples: >>> import sqlglot >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") >>> scopes = traverse_scope(expression) >>> scopes[0].expression.sql(), list(scopes[0].sources) ('SELECT a FROM x', ['x']) >>> scopes[1].expression.sql(), list(scopes[1].sources) ('SELECT a FROM (SELECT a FROM x) AS y', ['y']) Args: expression: Expr to traverse Returns: A list of the created scope instances """ if isinstance(expression, TRAVERSABLES): return list(_traverse_scope(Scope(expression))) return [] def build_scope(expression: exp.Expr) -> t.Optional[Scope]: """ Build a scope tree. Args: expression: Expr to build the scope tree for. Returns: The root scope """ return seq_get(traverse_scope(expression), -1) def _traverse_scope(scope: Scope) -> Iterator[Scope]: expression = scope.expression if isinstance(expression, exp.Select): yield from _traverse_select(scope) elif isinstance(expression, exp.SetOperation): yield from _traverse_ctes(scope) yield from _traverse_union(scope) return elif isinstance(expression, exp.Subquery): if scope.is_root: yield from _traverse_select(scope) else: yield from _traverse_subqueries(scope) elif isinstance(expression, exp.Table): yield from _traverse_tables(scope) elif isinstance(expression, exp.UDTF): yield from _traverse_udtfs(scope) elif isinstance(expression, exp.DDL): # TODO (mypyc): change to ddl_expression = expression.expression ddl_expression = expression.args.get("expression") if isinstance(ddl_expression, exp.Query): yield from _traverse_ctes(scope) yield from _traverse_scope(Scope(ddl_expression, cte_sources=scope.cte_sources)) return elif isinstance(expression, exp.DML): yield from _traverse_ctes(scope) for query in find_all_in_scope(expression, exp.Query): # This check ensures we don't yield the CTE/nested queries twice if not isinstance(query.parent, (exp.CTE, exp.Subquery)): yield from _traverse_scope(Scope(query, cte_sources=scope.cte_sources)) return else: logger.warning("Cannot traverse scope %s with type '%s'", expression, type(expression)) return yield scope def _traverse_select(scope: Scope) -> Iterator[Scope]: yield from _traverse_ctes(scope) yield from _traverse_tables(scope) yield from _traverse_subqueries(scope) def _traverse_union(scope: Scope) -> Iterator[Scope]: prev_scope: t.Optional[Scope] = None union_scope_stack: t.List[Scope] = [scope] set_op = scope.expression assert isinstance(set_op, exp.SetOperation) expression_stack: t.List[exp.Expr] = [set_op.right, set_op.left] while expression_stack: expression = expression_stack.pop() union_scope = union_scope_stack[-1] new_scope = union_scope.branch( expression, outer_columns=union_scope.outer_columns, scope_type=ScopeType.UNION, ) if isinstance(expression, exp.SetOperation): yield from _traverse_ctes(new_scope) union_scope_stack.append(new_scope) expression_stack.extend([expression.right, expression.left]) continue for scope in _traverse_scope(new_scope): yield scope if prev_scope: union_scope_stack.pop() union_scope.union_scopes = [prev_scope, scope] prev_scope = union_scope yield union_scope else: prev_scope = scope def _traverse_ctes(scope: Scope) -> Iterator[Scope]: sources: dict[str, exp.Table | Scope] = {} for cte in scope.ctes: cte_name = cte.alias # if the scope is a recursive cte, it must be in the form of base_case UNION recursive. # thus the recursive scope is the first section of the union. with_ = scope.expression.args.get("with_") if with_ and with_.recursive: union = cte.this if isinstance(union, exp.SetOperation): sources[cte_name] = scope.branch(union.this, scope_type=ScopeType.CTE) child_scope: t.Optional[Scope] = None for child_scope in _traverse_scope( scope.branch( cte.this, cte_sources=sources, outer_columns=cte.alias_column_names, scope_type=ScopeType.CTE, ) ): yield child_scope # append the final child_scope yielded if child_scope: sources[cte_name] = child_scope scope.cte_scopes.append(child_scope) scope.sources.update(sources) scope.cte_sources.update(sources) def _is_derived_table(expression: exp.Expr) -> bool: """ We represent (tbl1 JOIN tbl2) as a Subquery, but it's not really a "derived table", as it doesn't introduce a new scope. If an alias is present, it shadows all names under the Subquery, so that's one exception to this rule. """ return isinstance(expression, exp.Subquery) and bool( expression.alias or isinstance(expression.this, exp.UNWRAPPED_QUERIES) ) def _is_from_or_join(expression: exp.Expr) -> bool: """ Determine if `expression` is the FROM or JOIN clause of a SELECT statement. """ parent = expression.parent # Subqueries can be arbitrarily nested while type(parent) is exp.Subquery: parent = parent.parent return type(parent) in (exp.From, exp.Join) def _traverse_tables(scope: Scope) -> Iterator[Scope]: sources: dict[str, exp.Table | Scope] = {} # Traverse FROMs, JOINs, and LATERALs in the order they are defined expressions: list[exp.Expr] = [] from_ = scope.expression.args.get("from_") if from_: expressions.append(from_.this) for join in scope.expression.args.get("joins") or []: expressions.append(join.this) if isinstance(scope.expression, exp.Table): expressions.append(scope.expression) expressions.extend(scope.expression.args.get("laterals") or []) for expression in expressions: if isinstance(expression, exp.Final): expression = expression.this if isinstance(expression, exp.Table): table_name = expression.name source_name = expression.alias_or_name if table_name in scope.sources and not expression.db: # This is a reference to a parent source (e.g. a CTE), not an actual table, unless # it is pivoted, because then we get back a new table and hence a new source. pivots = expression.args.get("pivots") if pivots: sources[pivots[0].alias] = expression else: sources[source_name] = scope.sources[table_name] elif source_name in sources: sources[find_new_name(sources, table_name)] = expression else: sources[source_name] = expression # Make sure to not include the joins twice if expression is not scope.expression: expressions.extend(join.this for join in expression.args.get("joins") or []) continue if not isinstance(expression, exp.DerivedTable): continue # TODO (mypyc): rebind to exp.Expr to avoid DerivedTable/UDTF trait vtable dispatch node: exp.Expr = expression if isinstance(expression, exp.UDTF): lateral_sources = sources scope_type = ScopeType.UDTF scopes = scope.udtf_scopes elif _is_derived_table(expression): lateral_sources = None scope_type = ScopeType.DERIVED_TABLE scopes = scope.derived_table_scopes expressions.extend(join.this for join in node.args.get("joins") or []) else: # Makes sure we check for possible sources in nested table constructs expressions.append(node.this) expressions.extend(join.this for join in node.args.get("joins") or []) continue child_scope: t.Optional[Scope] = None for child_scope in _traverse_scope( scope.branch( node, lateral_sources=lateral_sources, outer_columns=node.alias_column_names, scope_type=scope_type, ) ): yield child_scope # Tables without aliases will be set as "" # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything. # Until then, this means that only a single, unaliased derived table is allowed (rather, # the latest one wins. sources[_get_source_alias(node)] = child_scope # append the final child_scope yielded if child_scope: scopes.append(child_scope) scope.table_scopes.append(child_scope) scope.sources.update(sources) def _traverse_subqueries(scope: Scope) -> Iterator[Scope]: for subquery in scope.subqueries: top: t.Optional[Scope] = None for child_scope in _traverse_scope(scope.branch(subquery, scope_type=ScopeType.SUBQUERY)): yield child_scope top = child_scope if top is not None: scope.subquery_scopes.append(top) def _traverse_udtfs(scope: Scope) -> Iterator[Scope]: if isinstance(scope.expression, exp.Unnest): udtf_expressions = scope.expression.expressions elif isinstance(scope.expression, exp.Lateral): udtf_expressions = [scope.expression.this] else: udtf_expressions = [] sources: t.Dict[str, exp.Table | Scope] = {} for expression in udtf_expressions: if isinstance(expression, exp.Subquery): top: t.Optional[Scope] = None for child_scope in _traverse_scope( scope.branch( expression, scope_type=ScopeType.SUBQUERY, outer_columns=expression.alias_column_names, ) ): yield child_scope top = child_scope sources[_get_source_alias(expression)] = child_scope if top is not None: scope.subquery_scopes.append(top) scope.sources.update(sources) def walk_in_scope( expression: exp.Expr, prune: t.Optional[t.Callable[[exp.Expr], bool]] = None, ) -> Iterator[exp.Expr]: """ Returns a generator object which visits all nodes in the syntrax tree, stopping at nodes that start child scopes. This does a custom DFS traversal rather than using expression.walk() because the nested generators aren't optimized in mypyc. Args: expression: prune: callable that returns True if the generator should stop traversing this branch of the tree. Yields: exp.Expr: each node in scope """ stack: t.List[exp.Expr] = [expression] while stack: node = stack.pop() yield node if node is not expression and ( isinstance(node, exp.CTE) or (isinstance(node.parent, (exp.From, exp.Join)) and _is_derived_table(node)) or (isinstance(node.parent, exp.UDTF) and isinstance(node, exp.Query)) or isinstance(node, exp.UNWRAPPED_QUERIES) ): if isinstance(node, (exp.Subquery, exp.UDTF)): for key in ("joins", "laterals", "pivots"): for arg in node.args.get(key) or []: yield from walk_in_scope(arg) continue if prune and prune(node): continue for vs in reversed(node.args.values()): if isinstance(vs, list): for v in reversed(vs): if isinstance(v, exp.Expr): stack.append(v) elif isinstance(vs, exp.Expr): stack.append(vs) def find_all_in_scope( expression: exp.Expr, *expression_types: Type[E], ) -> Iterator[E]: """ Returns a generator object which visits all nodes in this scope and only yields those that match at least one of the specified expression types. This does NOT traverse into subscopes. Args: expression: the expression to search. expression_types: the expression type(s) to match. Yields: The matching nodes. """ for node in walk_in_scope(expression): if isinstance(node, expression_types): yield node def find_in_scope( expression: exp.Expr, *expression_types: Type[E], ) -> t.Optional[E]: """ Returns the first node in this scope which matches at least one of the specified types. This does NOT traverse into subscopes. Args: expression: the expression to search. expression_types: the expression type(s) to match. Returns: The node which matches the criteria or None if no node matching the criteria was found. """ return next(find_all_in_scope(expression, *expression_types), None) def _get_source_alias(expression: exp.Expr) -> str: alias_arg = expression.args.get("alias") alias_name = expression.alias if not alias_name and isinstance(alias_arg, exp.TableAlias) and len(alias_arg.columns) == 1: alias_name = alias_arg.columns[0].name return alias_name ================================================ FILE: sqlglot/optimizer/simplify.py ================================================ from __future__ import annotations import datetime import logging import functools import itertools import typing as t from collections import deque, defaultdict from functools import reduce, wraps import sqlglot from sqlglot import Dialect, exp from sqlglot.helper import first, merge_ranges, while_changing from sqlglot.optimizer.annotate_types import TypeAnnotator from sqlglot.optimizer.scope import find_all_in_scope, walk_in_scope from sqlglot.schema import ensure_schema if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType DateRange = t.Tuple[datetime.date, datetime.date] DateTruncBinaryTransform = t.Callable[ [exp.Expr, datetime.date, str, Dialect, exp.DataType], t.Optional[exp.Expr] ] logger = logging.getLogger("sqlglot") # Final means that an expression should not be simplified FINAL = "final" SIMPLIFIABLE = ( exp.Binary, exp.Func, exp.Lambda, exp.Predicate, exp.Unary, ) def simplify( expression: exp.Expr, constant_propagation: bool = False, coalesce_simplification: bool = False, dialect: DialectType = None, ): """ Rewrite sqlglot AST to simplify expressions. Example: >>> import sqlglot >>> expression = sqlglot.parse_one("TRUE AND TRUE") >>> simplify(expression).sql() 'TRUE' Args: expression: expression to simplify constant_propagation: whether the constant propagation rule should be used coalesce_simplification: whether the simplify coalesce rule should be used. This rule tries to remove coalesce functions, which can be useful in certain analyses but can leave the query more verbose. Returns: sqlglot.Expr: simplified expression """ return Simplifier(dialect=dialect).simplify( expression, constant_propagation=constant_propagation, coalesce_simplification=coalesce_simplification, ) class UnsupportedUnit(Exception): pass def catch(*exceptions): """Decorator that ignores a simplification function if any of `exceptions` are raised""" def decorator(func): def wrapped(expression, *args, **kwargs): try: return func(expression, *args, **kwargs) except exceptions: return expression return wrapped return decorator def annotate_types_on_change(func): @wraps(func) def _func(self, expression: exp.Expr, *args, **kwargs) -> t.Optional[exp.Expr]: new_expression = func(self, expression, *args, **kwargs) if new_expression is None: return new_expression if self.annotate_new_expressions and expression != new_expression: self._annotator.clear() # We annotate this to ensure new children nodes are also annotated new_expression = self._annotator.annotate( expression=new_expression, annotate_scope=False, ) # Whatever expression the original expression is transformed into needs to preserve # the original type, otherwise the simplification could result in a different schema new_expression.type = expression.type return new_expression return _func def flatten(expression): """ A AND (B AND C) -> A AND B AND C A OR (B OR C) -> A OR B OR C """ if isinstance(expression, exp.Connector): for node in expression.args.values(): child = node.unnest() if isinstance(child, expression.__class__): node.replace(child) return expression def simplify_parens(expression: exp.Expr, dialect: DialectType) -> exp.Expr: if not isinstance(expression, exp.Paren): return expression this = expression.this parent = expression.parent parent_is_predicate = isinstance(parent, exp.Predicate) if isinstance(this, exp.Select): return expression if isinstance(parent, (exp.SubqueryPredicate, exp.Bracket)): return expression if ( Dialect.get_or_raise(dialect).REQUIRES_PARENTHESIZED_STRUCT_ACCESS and isinstance(parent, exp.Dot) and (isinstance(parent.right, (exp.Identifier, exp.Star))) ): return expression if isinstance(this, exp.Predicate) and ( not ( parent_is_predicate or isinstance(parent, exp.Neg) or (isinstance(parent, exp.Binary) and not isinstance(parent, exp.Connector)) ) ): return this if ( not isinstance(parent, (exp.Condition, exp.Binary)) or isinstance(parent, exp.Paren) or ( not isinstance(this, exp.Binary) and not (isinstance(this, (exp.Not, exp.Is)) and parent_is_predicate) ) or (isinstance(this, exp.Add) and isinstance(parent, exp.Add)) or (isinstance(this, exp.Mul) and isinstance(parent, exp.Mul)) or (isinstance(this, exp.Mul) and isinstance(parent, (exp.Add, exp.Sub))) ): return this return expression def propagate_constants(expression, root=True): """ Propagate constants for conjunctions in DNF: SELECT * FROM t WHERE a = b AND b = 5 becomes SELECT * FROM t WHERE a = 5 AND b = 5 Reference: https://www.sqlite.org/optoverview.html """ if ( isinstance(expression, exp.And) and (root or not expression.same_parent) and sqlglot.optimizer.normalize.normalized(expression, dnf=True) ): constant_mapping = {} for expr in walk_in_scope(expression, prune=lambda node: isinstance(node, exp.If)): if isinstance(expr, exp.EQ): l, r = expr.left, expr.right # TODO: create a helper that can be used to detect nested literal expressions such # as CAST(123456 AS BIGINT), since we usually want to treat those as literals too if isinstance(l, exp.Column) and isinstance(r, exp.Literal): constant_mapping[l] = (id(l), r) if constant_mapping: for column in find_all_in_scope(expression, exp.Column): parent = column.parent column_id, constant = constant_mapping.get(column) or (None, None) if ( column_id is not None and id(column) != column_id and not (isinstance(parent, exp.Is) and isinstance(parent.expression, exp.Null)) ): column.replace(constant.copy()) return expression def _is_number(expression: exp.Expr) -> bool: return expression.is_number def _is_interval(expression: exp.Expr) -> bool: return isinstance(expression, exp.Interval) and extract_interval(expression) is not None def _is_nonnull_constant(expression: exp.Expr) -> bool: return isinstance(expression, exp.NONNULL_CONSTANTS) or _is_date_literal(expression) def _is_constant(expression: exp.Expr) -> bool: return isinstance(expression, exp.CONSTANTS) or _is_date_literal(expression) def _datetrunc_range(date: datetime.date, unit: str, dialect: Dialect) -> t.Optional[DateRange]: """ Get the date range for a DATE_TRUNC equality comparison: Example: _datetrunc_range(date(2021-01-01), 'year') == (date(2021-01-01), date(2022-01-01)) Returns: tuple of [min, max) or None if a value can never be equal to `date` for `unit` """ floor = date_floor(date, unit, dialect) if date != floor: # This will always be False, except for NULL values. return None return floor, floor + interval(unit) def _datetrunc_eq_expression( left: exp.Expr, drange: DateRange, target_type: t.Optional[exp.DataType] ) -> exp.Expr: """Get the logical expression for a date range""" return exp.and_( left >= date_literal(drange[0], target_type), left < date_literal(drange[1], target_type), copy=False, ) def _datetrunc_eq( left: exp.Expr, date: datetime.date, unit: str, dialect: Dialect, target_type: t.Optional[exp.DataType], ) -> t.Optional[exp.Expr]: drange = _datetrunc_range(date, unit, dialect) if not drange: return None return _datetrunc_eq_expression(left, drange, target_type) def _datetrunc_neq( left: exp.Expr, date: datetime.date, unit: str, dialect: Dialect, target_type: t.Optional[exp.DataType], ) -> t.Optional[exp.Expr]: drange = _datetrunc_range(date, unit, dialect) if not drange: return None return exp.and_( left < date_literal(drange[0], target_type), left >= date_literal(drange[1], target_type), copy=False, ) def always_true(expression): return (isinstance(expression, exp.Boolean) and expression.this) or ( isinstance(expression, exp.Literal) and expression.is_number and not is_zero(expression) ) def always_false(expression): return is_false(expression) or is_null(expression) or is_zero(expression) def is_zero(expression): return isinstance(expression, exp.Literal) and expression.to_py() == 0 def is_complement(a, b): return isinstance(b, exp.Not) and b.this == a def is_false(a: exp.Expr) -> bool: return type(a) is exp.Boolean and not a.this def is_null(a: exp.Expr) -> bool: return type(a) is exp.Null def eval_boolean(expression, a, b): if isinstance(expression, (exp.EQ, exp.Is)): return boolean_literal(a == b) if isinstance(expression, exp.NEQ): return boolean_literal(a != b) if isinstance(expression, exp.GT): return boolean_literal(a > b) if isinstance(expression, exp.GTE): return boolean_literal(a >= b) if isinstance(expression, exp.LT): return boolean_literal(a < b) if isinstance(expression, exp.LTE): return boolean_literal(a <= b) return None def cast_as_date(value: t.Any) -> t.Optional[datetime.date]: if isinstance(value, datetime.datetime): return value.date() if isinstance(value, datetime.date): return value try: return datetime.datetime.fromisoformat(value).date() except ValueError: return None def cast_as_datetime(value: t.Any) -> t.Optional[datetime.datetime]: if isinstance(value, datetime.datetime): return value if isinstance(value, datetime.date): return datetime.datetime(year=value.year, month=value.month, day=value.day) try: return datetime.datetime.fromisoformat(value) except ValueError: return None def cast_value(value: t.Any, to: exp.DataType) -> t.Optional[t.Union[datetime.date, datetime.date]]: if not value: return None if to.is_type(exp.DType.DATE): return cast_as_date(value) if to.is_type(*exp.DataType.TEMPORAL_TYPES): return cast_as_datetime(value) return None def extract_date(cast: exp.Expr) -> t.Optional[t.Union[datetime.date, datetime.date]]: if isinstance(cast, exp.Cast): to = cast.to elif isinstance(cast, exp.TsOrDsToDate) and not cast.args.get("format"): to = exp.DataType.build(exp.DType.DATE) else: return None if isinstance(cast.this, exp.Literal): value: t.Any = cast.this.name elif isinstance(cast.this, (exp.Cast, exp.TsOrDsToDate)): value = extract_date(cast.this) else: return None return cast_value(value, to) def _is_date_literal(expression: exp.Expr) -> bool: return extract_date(expression) is not None def extract_interval(expression): try: n = int(expression.this.to_py()) unit = expression.text("unit").lower() return interval(unit, n) except (UnsupportedUnit, ModuleNotFoundError, ValueError): return None def extract_type(*expressions): target_type = None for expression in expressions: target_type = expression.to if isinstance(expression, exp.Cast) else expression.type if target_type: break return target_type def date_literal(date, target_type=None): if not target_type or not target_type.is_type(*exp.DataType.TEMPORAL_TYPES): target_type = exp.DType.DATETIME if isinstance(date, datetime.datetime) else exp.DType.DATE return exp.cast(exp.Literal.string(date), target_type) def interval(unit: str, n: int = 1): from dateutil.relativedelta import relativedelta if unit == "year": return relativedelta(years=1 * n) if unit == "quarter": return relativedelta(months=3 * n) if unit == "month": return relativedelta(months=1 * n) if unit == "week": return relativedelta(weeks=1 * n) if unit == "day": return relativedelta(days=1 * n) if unit == "hour": return relativedelta(hours=1 * n) if unit == "minute": return relativedelta(minutes=1 * n) if unit == "second": return relativedelta(seconds=1 * n) raise UnsupportedUnit(f"Unsupported unit: {unit}") def date_floor(d: datetime.date, unit: str, dialect: Dialect) -> datetime.date: if unit == "year": return d.replace(month=1, day=1) if unit == "quarter": if d.month <= 3: return d.replace(month=1, day=1) elif d.month <= 6: return d.replace(month=4, day=1) elif d.month <= 9: return d.replace(month=7, day=1) else: return d.replace(month=10, day=1) if unit == "month": return d.replace(month=d.month, day=1) if unit == "week": # Assuming week starts on Monday (0) and ends on Sunday (6) return d - datetime.timedelta(days=d.weekday() - dialect.WEEK_OFFSET) if unit == "day": return d raise UnsupportedUnit(f"Unsupported unit: {unit}") def date_ceil(d: datetime.date, unit: str, dialect: Dialect) -> datetime.date: floor = date_floor(d, unit, dialect) if floor == d: return d return floor + interval(unit) def boolean_literal(condition): return exp.true() if condition else exp.false() class Simplifier: def __init__(self, dialect: DialectType = None, annotate_new_expressions: bool = True): self.dialect = Dialect.get_or_raise(dialect) self.annotate_new_expressions = annotate_new_expressions self._annotator: TypeAnnotator = TypeAnnotator( schema=ensure_schema(None, dialect=self.dialect), overwrite_types=False ) # Value ranges for byte-sized signed/unsigned integers TINYINT_MIN = -128 TINYINT_MAX = 127 UTINYINT_MIN = 0 UTINYINT_MAX = 255 COMPLEMENT_COMPARISONS = { exp.LT: exp.GTE, exp.GT: exp.LTE, exp.LTE: exp.GT, exp.GTE: exp.LT, exp.EQ: exp.NEQ, exp.NEQ: exp.EQ, } COMPLEMENT_SUBQUERY_PREDICATES = { exp.All: exp.Any, exp.Any: exp.All, } LT_LTE = (exp.LT, exp.LTE) GT_GTE = (exp.GT, exp.GTE) COMPARISONS = ( *LT_LTE, *GT_GTE, exp.EQ, exp.NEQ, exp.Is, ) INVERSE_COMPARISONS: t.Dict[t.Type[exp.Expr], t.Type[exp.Expr]] = { exp.LT: exp.GT, exp.GT: exp.LT, exp.LTE: exp.GTE, exp.GTE: exp.LTE, } NONDETERMINISTIC = (exp.Rand, exp.Randn) AND_OR = (exp.And, exp.Or) INVERSE_DATE_OPS: t.Dict[t.Type[exp.Expr], t.Type[exp.Expr]] = { exp.DateAdd: exp.Sub, exp.DateSub: exp.Add, exp.DatetimeAdd: exp.Sub, exp.DatetimeSub: exp.Add, } INVERSE_OPS: t.Dict[t.Type[exp.Expr], t.Type[exp.Expr]] = { **INVERSE_DATE_OPS, exp.Add: exp.Sub, exp.Sub: exp.Add, } NULL_OK = (exp.NullSafeEQ, exp.NullSafeNEQ, exp.PropertyEQ) CONCATS = (exp.Concat, exp.DPipe) DATETRUNC_BINARY_COMPARISONS: t.Dict[t.Type[exp.Expr], DateTruncBinaryTransform] = { exp.LT: lambda l, dt, u, d, t: ( l < date_literal( dt if dt == date_floor(dt, u, d) else date_floor(dt, u, d) + interval(u), t ) ), exp.GT: lambda l, dt, u, d, t: l >= date_literal(date_floor(dt, u, d) + interval(u), t), exp.LTE: lambda l, dt, u, d, t: l < date_literal(date_floor(dt, u, d) + interval(u), t), exp.GTE: lambda l, dt, u, d, t: l >= date_literal(date_ceil(dt, u, d), t), exp.EQ: _datetrunc_eq, exp.NEQ: _datetrunc_neq, } DATETRUNC_COMPARISONS = {exp.In, *DATETRUNC_BINARY_COMPARISONS} DATETRUNCS = (exp.DateTrunc, exp.TimestampTrunc) SAFE_CONNECTOR_ELIMINATION_RESULT = (exp.Connector, exp.Boolean) # CROSS joins result in an empty table if the right table is empty. # So we can only simplify certain types of joins to CROSS. # Or in other words, LEFT JOIN x ON TRUE != CROSS JOIN x JOINS = { ("", ""), ("", "INNER"), ("RIGHT", ""), ("RIGHT", "OUTER"), } def simplify( self, expression: exp.Expr, constant_propagation: bool = False, coalesce_simplification: bool = False, ): wheres = [] joins = [] for node in expression.walk( prune=lambda n: bool(isinstance(n, exp.Condition) or n.meta.get(FINAL)) ): if node.meta.get(FINAL): continue # group by expressions cannot be simplified, for example # select x + 1 + 1 FROM y GROUP BY x + 1 + 1 # the projection must exactly match the group by key group = node.args.get("group") if group and hasattr(node, "selects"): groups = set(group.expressions) group.meta[FINAL] = True for s in node.selects: for n in s.walk(): if n in groups: s.meta[FINAL] = True break having = node.args.get("having") if having: for n in having.walk(): if n in groups: having.meta[FINAL] = True break if isinstance(node, exp.Condition): simplified = while_changing( node, lambda e: self._simplify(e, constant_propagation, coalesce_simplification) ) if node is expression: expression = simplified elif isinstance(node, exp.Where): wheres.append(node) elif isinstance(node, exp.Join): # snowflake match_conditions have very strict ordering rules if match := node.args.get("match_condition"): match.meta[FINAL] = True joins.append(node) for where in wheres: if always_true(where.this): where.pop() for join in joins: if ( always_true(join.args.get("on")) and not join.args.get("using") and not join.args.get("method") and (join.side, join.kind) in self.JOINS ): join.args["on"].pop() join.set("side", None) join.set("kind", "CROSS") return expression def _simplify( self, expression: exp.Expr, constant_propagation: bool, coalesce_simplification: bool ): pre_transformation_stack = [expression] post_transformation_stack = [] while pre_transformation_stack: original = pre_transformation_stack.pop() node = original if not isinstance(node, SIMPLIFIABLE): if isinstance(node, exp.Query): self.simplify(node, constant_propagation, coalesce_simplification) continue parent = node.parent root = node is expression node = self.rewrite_between(node) node = self.uniq_sort(node, root) node = self.absorb_and_eliminate(node, root) node = self.simplify_concat(node) node = self.simplify_conditionals(node) if constant_propagation: node = propagate_constants(node, root) if node is not original: original.replace(node) for n in node.iter_expressions(reverse=True): if n.meta.get(FINAL): raise pre_transformation_stack.extend( n for n in node.iter_expressions(reverse=True) if not n.meta.get(FINAL) ) post_transformation_stack.append((node, parent)) while post_transformation_stack: original, parent = post_transformation_stack.pop() root = original is expression # Resets parent, arg_key, index pointers– this is needed because some of the # previous transformations mutate the AST, leading to an inconsistent state for k, v in tuple(original.args.items()): original.set(k, v) # Post-order transformations node = self.simplify_not(original) node = flatten(node) node = self.simplify_connectors(node, root) node = self.remove_complements(node, root) if coalesce_simplification: node = self.simplify_coalesce(node) node.parent = parent node = self.simplify_literals(node, root) node = self.simplify_equality(node) node = simplify_parens(node, dialect=self.dialect) node = self.simplify_datetrunc(node) node = self.sort_comparison(node) node = self.simplify_startswith(node) if node is not original: original.replace(node) return node @annotate_types_on_change def rewrite_between(self, expression: exp.Expr) -> exp.Expr: """Rewrite x between y and z to x >= y AND x <= z. This is done because comparison simplification is only done on lt/lte/gt/gte. """ if isinstance(expression, exp.Between): negate = isinstance(expression.parent, exp.Not) expression = exp.and_( exp.GTE(this=expression.this.copy(), expression=expression.args["low"]), exp.LTE(this=expression.this.copy(), expression=expression.args["high"]), copy=False, ) if negate: expression = exp.paren(expression, copy=False) return expression @annotate_types_on_change def simplify_not(self, expression: exp.Expr) -> exp.Expr: """ Demorgan's Law NOT (x OR y) -> NOT x AND NOT y NOT (x AND y) -> NOT x OR NOT y """ if isinstance(expression, exp.Not): this = expression.this if is_null(this): return exp.and_(exp.null(), exp.true(), copy=False) if this.__class__ in self.COMPLEMENT_COMPARISONS: right = this.expression complement_subquery_predicate = self.COMPLEMENT_SUBQUERY_PREDICATES.get( right.__class__ ) if complement_subquery_predicate: right = complement_subquery_predicate(this=right.this) return self.COMPLEMENT_COMPARISONS[this.__class__](this=this.this, expression=right) if isinstance(this, exp.Paren): condition = this.unnest() if isinstance(condition, exp.And): return exp.paren( exp.or_( exp.not_(condition.left, copy=False), exp.not_(condition.right, copy=False), copy=False, ), copy=False, ) if isinstance(condition, exp.Or): return exp.paren( exp.and_( exp.not_(condition.left, copy=False), exp.not_(condition.right, copy=False), copy=False, ), copy=False, ) if is_null(condition): return exp.and_(exp.null(), exp.true(), copy=False) if always_true(this): return exp.false() if is_false(this): return exp.true() if isinstance(this, exp.Not) and self.dialect.SAFE_TO_ELIMINATE_DOUBLE_NEGATION: inner = this.this if inner.is_type(exp.DType.BOOLEAN): # double negation # NOT NOT x -> x, if x is BOOLEAN type return inner return expression @annotate_types_on_change def simplify_connectors(self, expression, root=True): def _simplify_connectors(expression, left, right): if isinstance(expression, exp.And): if is_false(left) or is_false(right): return exp.false() if is_zero(left) or is_zero(right): return exp.false() if ( (is_null(left) and is_null(right)) or (is_null(left) and always_true(right)) or (always_true(left) and is_null(right)) ): return exp.null() if always_true(left) and always_true(right): return exp.true() if always_true(left): return right if always_true(right): return left return self._simplify_comparison(expression, left, right) elif isinstance(expression, exp.Or): if always_true(left) or always_true(right): return exp.true() if ( (is_null(left) and is_null(right)) or (is_null(left) and always_false(right)) or (always_false(left) and is_null(right)) ): return exp.null() if is_false(left): return right if is_false(right): return left return self._simplify_comparison(expression, left, right, or_=True) if isinstance(expression, exp.Connector): original_parent = expression.parent expression = self._flat_simplify(expression, _simplify_connectors, root) # If we reduced a connector to, e.g., a column (t1 AND ... AND tn -> Tk), then we need # to ensure that the resulting type is boolean. We know this is true only for connectors, # boolean values and columns that are essentially operands to a connector: # # A AND (((B))) # ~ this is safe to keep because it will eventually be part of another connector if not isinstance( expression, self.SAFE_CONNECTOR_ELIMINATION_RESULT ) and not expression.is_type(exp.DType.BOOLEAN): while True: if isinstance(original_parent, exp.Connector): break if not isinstance(original_parent, exp.Paren): expression = expression.and_(exp.true(), copy=False) break original_parent = original_parent.parent return expression @annotate_types_on_change def _simplify_comparison(self, expression, left, right, or_=False): if isinstance(left, self.COMPARISONS) and isinstance(right, self.COMPARISONS): ll, lr = left.args.values() rl, rr = right.args.values() largs = {ll, lr} rargs = {rl, rr} matching = largs & rargs columns = { m for m in matching if not _is_constant(m) and not m.find(*self.NONDETERMINISTIC) } if matching and columns: try: l = first(largs - columns) r = first(rargs - columns) except StopIteration: return expression if l.is_number and r.is_number: l = l.to_py() r = r.to_py() elif l.is_string and r.is_string: l = l.name r = r.name else: l = extract_date(l) if not l: return None r = extract_date(r) if not r: return None # python won't compare date and datetime, but many engines will upcast l, r = cast_as_datetime(l), cast_as_datetime(r) for (a, av), (b, bv) in itertools.permutations(((left, l), (right, r))): if isinstance(a, self.LT_LTE) and isinstance(b, self.LT_LTE): return left if (av > bv if or_ else av <= bv) else right if isinstance(a, self.GT_GTE) and isinstance(b, self.GT_GTE): return left if (av < bv if or_ else av >= bv) else right # we can't ever shortcut to true because the column could be null if not or_: if isinstance(a, exp.LT) and isinstance(b, self.GT_GTE): if av <= bv: return exp.false() elif isinstance(a, exp.GT) and isinstance(b, self.LT_LTE): if av >= bv: return exp.false() elif isinstance(a, exp.EQ): if isinstance(b, exp.LT): return exp.false() if av >= bv else a if isinstance(b, exp.LTE): return exp.false() if av > bv else a if isinstance(b, exp.GT): return exp.false() if av <= bv else a if isinstance(b, exp.GTE): return exp.false() if av < bv else a if isinstance(b, exp.NEQ): return exp.false() if av == bv else a return None @annotate_types_on_change def remove_complements(self, expression, root=True): """ Removing complements. A AND NOT A -> FALSE (only for non-NULL A) A OR NOT A -> TRUE (only for non-NULL A) """ if isinstance(expression, self.AND_OR) and (root or not expression.same_parent): ops = set(expression.flatten()) for op in ops: if isinstance(op, exp.Not) and op.this in ops: if expression.meta.get("nonnull") is True: return exp.false() if isinstance(expression, exp.And) else exp.true() return expression @annotate_types_on_change def uniq_sort(self, expression, root=True): """ Uniq and sort a connector. C AND A AND B AND B -> A AND B AND C """ if isinstance(expression, exp.Connector) and (root or not expression.same_parent): flattened = tuple(expression.flatten()) if isinstance(expression, exp.Xor): result_func = exp.xor # Do not deduplicate XOR as A XOR A != A if A == True deduped = None arr = tuple((gen(e), e) for e in flattened) else: result_func = exp.and_ if isinstance(expression, exp.And) else exp.or_ deduped = {gen(e): e for e in flattened} arr = tuple(deduped.items()) # check if the operands are already sorted, if not sort them # A AND C AND B -> A AND B AND C for i, (sql, e) in enumerate(arr[1:]): if sql < arr[i][0]: expression = result_func(*(e for _, e in sorted(arr)), copy=False) break else: # we didn't have to sort but maybe we need to dedup if deduped and len(deduped) < len(flattened): unique_operand = flattened[0] if len(deduped) == 1: expression = unique_operand.and_(exp.true(), copy=False) else: expression = result_func(*deduped.values(), copy=False) return expression @annotate_types_on_change def absorb_and_eliminate(self, expression, root=True): """ absorption: A AND (A OR B) -> A A OR (A AND B) -> A A AND (NOT A OR B) -> A AND B A OR (NOT A AND B) -> A OR B elimination: (A AND B) OR (A AND NOT B) -> A (A OR B) AND (A OR NOT B) -> A """ if isinstance(expression, self.AND_OR) and (root or not expression.same_parent): kind = exp.Or if isinstance(expression, exp.And) else exp.And ops = tuple(expression.flatten()) # Initialize lookup tables: # Set of all operands, used to find complements for absorption. op_set = set() # Sub-operands, used to find subsets for absorption. subops = defaultdict(list) # Pairs of complements, used for elimination. pairs = defaultdict(list) # Populate the lookup tables for op in ops: op_set.add(op) if not isinstance(op, kind): # In cases like: A OR (A AND B) # Subop will be: ^ subops[op].append({op}) continue # In cases like: (A AND B) OR (A AND B AND C) # Subops will be: ^ ^ subset = set(op.flatten()) for i in subset: subops[i].append(subset) a, b = op.unnest_operands() if isinstance(a, exp.Not): pairs[frozenset((a.this, b))].append((op, b)) if isinstance(b, exp.Not): pairs[frozenset((a, b.this))].append((op, a)) for op in ops: if not isinstance(op, kind): continue a, b = op.unnest_operands() # Absorb if isinstance(a, exp.Not) and a.this in op_set: a.replace(exp.true() if kind == exp.And else exp.false()) continue if isinstance(b, exp.Not) and b.this in op_set: b.replace(exp.true() if kind == exp.And else exp.false()) continue superset = set(op.flatten()) if any(any(subset < superset for subset in subops[i]) for i in superset): op.replace(exp.false() if kind == exp.And else exp.true()) continue # Eliminate for other, complement in pairs[frozenset((a, b))]: op.replace(complement) other.replace(complement) return expression @annotate_types_on_change @catch(ModuleNotFoundError, UnsupportedUnit) def simplify_equality(self, expression: exp.Expr) -> exp.Expr: """ Use the subtraction and addition properties of equality to simplify expressions: x + 1 = 3 becomes x = 2 There are two binary operations in the above expression: + and = Here's how we reference all the operands in the code below: l r x + 1 = 3 a b """ if isinstance(expression, self.COMPARISONS): l, r = expression.left, expression.right if l.__class__ not in self.INVERSE_OPS: return expression if r.is_number: a_predicate = _is_number b_predicate = _is_number elif _is_date_literal(r): a_predicate = _is_date_literal b_predicate = _is_interval else: return expression if l.__class__ in self.INVERSE_DATE_OPS: l = t.cast(exp.IntervalOp, l) a: exp.Expr = l.this b: exp.Expr = l.interval() else: l = t.cast(exp.Binary, l) a, b = l.left, l.right if not a_predicate(a) and b_predicate(b): pass elif not a_predicate(b) and b_predicate(a): a, b = b, a else: return expression return expression.__class__( this=a, expression=self.INVERSE_OPS[l.__class__](this=r, expression=b) ) return expression @annotate_types_on_change def simplify_literals(self, expression, root=True): if isinstance(expression, exp.Binary) and not isinstance(expression, exp.Connector): return self._flat_simplify(expression, self._simplify_binary, root) if isinstance(expression, exp.Neg) and isinstance(expression.this, exp.Neg): return expression.this.this if type(expression) in self.INVERSE_DATE_OPS: return ( self._simplify_binary(expression, expression.this, expression.interval()) or expression ) return expression def _simplify_integer_cast(self, expr: exp.Expr) -> exp.Expr: if isinstance(expr, exp.Cast) and isinstance(expr.this, exp.Cast): this = self._simplify_integer_cast(expr.this) else: this = expr.this if isinstance(expr, exp.Cast) and this.is_int: num = this.to_py() # Remove the (up)cast from small (byte-sized) integers in predicates which is side-effect free. Downcasts on any # integer type might cause overflow, thus the cast cannot be eliminated and the behavior is # engine-dependent if ( self.TINYINT_MIN <= num <= self.TINYINT_MAX and expr.to.this in exp.DataType.SIGNED_INTEGER_TYPES ) or ( self.UTINYINT_MIN <= num <= self.UTINYINT_MAX and expr.to.this in exp.DataType.UNSIGNED_INTEGER_TYPES ): return this return expr def _simplify_binary(self, expression, a, b): if isinstance(expression, self.COMPARISONS): a = self._simplify_integer_cast(a) b = self._simplify_integer_cast(b) if isinstance(expression, exp.Is): if isinstance(b, exp.Not): c = b.this not_ = True else: c = b not_ = False if is_null(c): if isinstance(a, exp.Literal): return exp.true() if not_ else exp.false() if is_null(a): return exp.false() if not_ else exp.true() elif isinstance(expression, self.NULL_OK): return None elif (is_null(a) or is_null(b)) and isinstance(expression.parent, exp.If): return exp.null() if a.is_number and b.is_number: num_a = a.to_py() num_b = b.to_py() if isinstance(expression, exp.Add): return exp.Literal.number(num_a + num_b) if isinstance(expression, exp.Mul): return exp.Literal.number(num_a * num_b) # We only simplify Sub, Div if a and b have the same parent because they're not associative if isinstance(expression, exp.Sub): return exp.Literal.number(num_a - num_b) if a.parent is b.parent else None if isinstance(expression, exp.Div): # engines have differing int div behavior so intdiv is not safe if (isinstance(num_a, int) and isinstance(num_b, int)) or a.parent is not b.parent: return None return exp.Literal.number(num_a / num_b) boolean = eval_boolean(expression, num_a, num_b) if boolean: return boolean elif a.is_string and b.is_string: boolean = eval_boolean(expression, a.this, b.this) if boolean: return boolean elif _is_date_literal(a) and isinstance(b, exp.Interval): date, b = extract_date(a), extract_interval(b) if date and b: if isinstance(expression, (exp.Add, exp.DateAdd, exp.DatetimeAdd)): return date_literal(date + b, extract_type(a)) if isinstance(expression, (exp.Sub, exp.DateSub, exp.DatetimeSub)): return date_literal(date - b, extract_type(a)) elif isinstance(a, exp.Interval) and _is_date_literal(b): a, date = extract_interval(a), extract_date(b) # you cannot subtract a date from an interval if a and b and isinstance(expression, exp.Add): return date_literal(a + date, extract_type(b)) elif _is_date_literal(a) and _is_date_literal(b): if isinstance(expression, exp.Predicate): a, b = extract_date(a), extract_date(b) boolean = eval_boolean(expression, a, b) if boolean: return boolean return None @annotate_types_on_change def simplify_coalesce(self, expression: exp.Expr) -> exp.Expr: # COALESCE(x) -> x if ( isinstance(expression, exp.Coalesce) and (not expression.expressions or _is_nonnull_constant(expression.this)) # COALESCE is also used as a Spark partitioning hint and not isinstance(expression.parent, exp.Hint) ): return expression.this if self.dialect.COALESCE_COMPARISON_NON_STANDARD: return expression if not isinstance(expression, self.COMPARISONS): return expression if isinstance(expression.left, exp.Coalesce): coalesce = expression.left other = expression.right elif isinstance(expression.right, exp.Coalesce): coalesce = expression.right other = expression.left else: return expression # This transformation is valid for non-constants, # but it really only does anything if they are both constants. if not _is_constant(other): return expression # Find the first constant arg for arg_index, arg in enumerate(coalesce.expressions): if _is_constant(arg): break else: return expression coalesce.set("expressions", coalesce.expressions[:arg_index]) # Remove the COALESCE function. This is an optimization, skipping a simplify iteration, # since we already remove COALESCE at the top of this function. coalesce = coalesce if coalesce.expressions else coalesce.this # This expression is more complex than when we started, but it will get simplified further return exp.paren( exp.or_( exp.and_( coalesce.is_(exp.null()).not_(copy=False), expression.copy(), copy=False, ), exp.and_( coalesce.is_(exp.null()), type(expression)(this=arg.copy(), expression=other.copy()), copy=False, ), copy=False, ), copy=False, ) @annotate_types_on_change def simplify_concat(self, expression): """Reduces all groups that contain string literals by concatenating them.""" if not isinstance(expression, self.CONCATS) or ( # We can't reduce a CONCAT_WS call if we don't statically know the separator isinstance(expression, exp.ConcatWs) and not expression.expressions[0].is_string ): return expression if isinstance(expression, exp.ConcatWs): sep_expr, *expressions = expression.expressions sep = sep_expr.name concat_type = exp.ConcatWs args = {} else: expressions = expression.expressions sep = "" concat_type = exp.Concat args = { "safe": expression.args.get("safe"), "coalesce": expression.args.get("coalesce"), } new_args = [] for is_string_group, group in itertools.groupby( expressions or expression.flatten(), lambda e: e.is_string ): if is_string_group: new_args.append(exp.Literal.string(sep.join(string.name for string in group))) else: new_args.extend(group) if len(new_args) == 1 and new_args[0].is_string: return new_args[0] if concat_type is exp.ConcatWs: new_args = [sep_expr] + new_args elif isinstance(expression, exp.DPipe): return reduce(lambda x, y: exp.DPipe(this=x, expression=y), new_args) return concat_type(expressions=new_args, **args) @annotate_types_on_change def simplify_conditionals(self, expression): """Simplifies expressions like IF, CASE if their condition is statically known.""" if isinstance(expression, exp.Case): this = expression.this for case in expression.args["ifs"]: cond = case.this if this: # Convert CASE x WHEN matching_value ... to CASE WHEN x = matching_value ... cond = cond.replace(this.pop().eq(cond)) if always_true(cond): return case.args["true"] if always_false(cond): case.pop() if not expression.args["ifs"]: return expression.args.get("default") or exp.null() elif isinstance(expression, exp.If) and not isinstance(expression.parent, exp.Case): if always_true(expression.this): return expression.args["true"] if always_false(expression.this): return expression.args.get("false") or exp.null() return expression @annotate_types_on_change def simplify_startswith(self, expression: exp.Expr) -> exp.Expr: """ Reduces a prefix check to either TRUE or FALSE if both the string and the prefix are statically known. Example: >>> from sqlglot import parse_one >>> Simplifier().simplify_startswith(parse_one("STARTSWITH('foo', 'f')")).sql() 'TRUE' """ if ( isinstance(expression, exp.StartsWith) and expression.this.is_string and expression.expression.is_string ): return exp.convert(expression.name.startswith(expression.expression.name)) return expression def _is_datetrunc_predicate(self, left: exp.Expr, right: exp.Expr) -> bool: return isinstance(left, self.DATETRUNCS) and _is_date_literal(right) @annotate_types_on_change @catch(ModuleNotFoundError, UnsupportedUnit) def simplify_datetrunc(self, expression: exp.Expr) -> exp.Expr: """Simplify expressions like `DATE_TRUNC('year', x) >= CAST('2021-01-01' AS DATE)`""" comparison = expression.__class__ if isinstance(expression, self.DATETRUNCS): this = expression.this trunc_type = extract_type(this) date = extract_date(this) if date and expression.unit: return date_literal( date_floor(date, expression.unit.name.lower(), self.dialect), trunc_type ) elif comparison not in self.DATETRUNC_COMPARISONS: return expression if isinstance(expression, exp.Binary): l, r = expression.left, expression.right if not self._is_datetrunc_predicate(l, r): return expression l = t.cast(exp.DateTrunc, l) trunc_arg = l.this unit = l.unit.name.lower() date = extract_date(r) if not date: return expression return ( self.DATETRUNC_BINARY_COMPARISONS[comparison]( trunc_arg, date, unit, self.dialect, extract_type(r) ) or expression ) if isinstance(expression, exp.In): l = expression.this rs = expression.expressions if rs and all(self._is_datetrunc_predicate(l, r) for r in rs): l = t.cast(exp.DateTrunc, l) unit = l.unit.name.lower() ranges = [] for r in rs: date = extract_date(r) if not date: return expression drange = _datetrunc_range(date, unit, self.dialect) if drange: ranges.append(drange) if not ranges: return expression ranges = merge_ranges(ranges) target_type = extract_type(*rs) return exp.or_( *[_datetrunc_eq_expression(l, drange, target_type) for drange in ranges], copy=False, ) return expression @annotate_types_on_change def sort_comparison(self, expression: exp.Expr) -> exp.Expr: if expression.__class__ in self.COMPLEMENT_COMPARISONS: l, r = expression.this, expression.expression l_column = isinstance(l, exp.Column) r_column = isinstance(r, exp.Column) l_const = _is_constant(l) r_const = _is_constant(r) if ( (l_column and not r_column) or (r_const and not l_const) or isinstance(r, exp.SubqueryPredicate) ): return expression if (r_column and not l_column) or (l_const and not r_const) or (gen(l) > gen(r)): return self.INVERSE_COMPARISONS.get(expression.__class__, expression.__class__)( this=r, expression=l ) return expression def _flat_simplify(self, expression, simplifier, root=True): if root or not expression.same_parent: operands = [] queue = deque(expression.flatten(unnest=False)) size = len(queue) while queue: a = queue.popleft() for b in queue: result = simplifier(expression, a, b) if result and result is not expression: queue.remove(b) queue.appendleft(result) break else: operands.append(a) if len(operands) < size: return functools.reduce( lambda a, b: expression.__class__(this=a, expression=b), operands ) return expression def gen(expression: t.Any, comments: bool = False) -> str: """Simple pseudo sql generator for quickly generating sortable and uniq strings. Sorting and deduping sql is a necessary step for optimization. Calling the actual generator is expensive so we have a bare minimum sql generator here. Args: expression: the expression to convert into a SQL string. comments: whether to include the expression's comments. """ return Gen().gen(expression, comments=comments) class Gen: def __init__(self): self.stack = [] self.sqls = [] def gen(self, expression: exp.Expr, comments: bool = False) -> str: self.stack = [expression] self.sqls.clear() while self.stack: node = self.stack.pop() if isinstance(node, exp.Expr): if comments and node.comments: self.stack.append(f" /*{','.join(node.comments)}*/") exp_handler_name = f"{node.key}_sql" if hasattr(self, exp_handler_name): getattr(self, exp_handler_name)(node) elif isinstance(node, exp.Func): self._function(node) else: key = node.key.upper() self.stack.append(f"{key} " if self._args(node) else key) elif type(node) is list: for n in reversed(node): if n is not None: self.stack.extend((n, ",")) if node: self.stack.pop() else: if node is not None: self.sqls.append(str(node)) return "".join(self.sqls) def add_sql(self, e: exp.Add) -> None: self._binary(e, " + ") def alias_sql(self, e: exp.Alias) -> None: self.stack.extend( ( e.args.get("alias"), " AS ", e.args.get("this"), ) ) def and_sql(self, e: exp.And) -> None: self._binary(e, " AND ") def anonymous_sql(self, e: exp.Anonymous) -> None: this = e.this if isinstance(this, str): name = this.upper() elif isinstance(this, exp.Identifier): name = this.this name = f'"{name}"' if this.quoted else name.upper() else: raise ValueError( f"Anonymous.this expects a str or an Identifier, got '{this.__class__.__name__}'." ) self.stack.extend( ( ")", e.expressions, "(", name, ) ) def between_sql(self, e: exp.Between) -> None: self.stack.extend( ( e.args.get("high"), " AND ", e.args.get("low"), " BETWEEN ", e.this, ) ) def boolean_sql(self, e: exp.Boolean) -> None: self.stack.append("TRUE" if e.this else "FALSE") def bracket_sql(self, e: exp.Bracket) -> None: self.stack.extend( ( "]", e.expressions, "[", e.this, ) ) def column_sql(self, e: exp.Column) -> None: for p in reversed(e.parts): self.stack.extend((p, ".")) self.stack.pop() def datatype_sql(self, e: exp.DataType) -> None: self._args(e, 1) self.stack.append(f"{e.this.name} ") def div_sql(self, e: exp.Div) -> None: self._binary(e, " / ") def dot_sql(self, e: exp.Dot) -> None: self._binary(e, ".") def eq_sql(self, e: exp.EQ) -> None: self._binary(e, " = ") def from_sql(self, e: exp.From) -> None: self.stack.extend((e.this, "FROM ")) def gt_sql(self, e: exp.GT) -> None: self._binary(e, " > ") def gte_sql(self, e: exp.GTE) -> None: self._binary(e, " >= ") def identifier_sql(self, e: exp.Identifier) -> None: self.stack.append(f'"{e.this}"' if e.quoted else e.this) def ilike_sql(self, e: exp.ILike) -> None: self._binary(e, " ILIKE ") def in_sql(self, e: exp.In) -> None: self.stack.append(")") self._args(e, 1) self.stack.extend( ( "(", " IN ", e.this, ) ) def intdiv_sql(self, e: exp.IntDiv) -> None: self._binary(e, " DIV ") def is_sql(self, e: exp.Is) -> None: self._binary(e, " IS ") def like_sql(self, e: exp.Like) -> None: self._binary(e, " Like ") def literal_sql(self, e: exp.Literal) -> None: self.stack.append(f"'{e.this}'" if e.is_string else e.this) def lt_sql(self, e: exp.LT) -> None: self._binary(e, " < ") def lte_sql(self, e: exp.LTE) -> None: self._binary(e, " <= ") def mod_sql(self, e: exp.Mod) -> None: self._binary(e, " % ") def mul_sql(self, e: exp.Mul) -> None: self._binary(e, " * ") def neg_sql(self, e: exp.Neg) -> None: self._unary(e, "-") def neq_sql(self, e: exp.NEQ) -> None: self._binary(e, " <> ") def not_sql(self, e: exp.Not) -> None: self._unary(e, "NOT ") def null_sql(self, e: exp.Null) -> None: self.stack.append("NULL") def or_sql(self, e: exp.Or) -> None: self._binary(e, " OR ") def paren_sql(self, e: exp.Paren) -> None: self.stack.extend( ( ")", e.this, "(", ) ) def sub_sql(self, e: exp.Sub) -> None: self._binary(e, " - ") def subquery_sql(self, e: exp.Subquery) -> None: self._args(e, 2) alias = e.args.get("alias") if alias: self.stack.append(alias) self.stack.extend((")", e.this, "(")) def table_sql(self, e: exp.Table) -> None: self._args(e, 4) alias = e.args.get("alias") if alias: self.stack.append(alias) for p in reversed(e.parts): self.stack.extend((p, ".")) self.stack.pop() def tablealias_sql(self, e: exp.TableAlias) -> None: columns = e.columns if columns: self.stack.extend((")", columns, "(")) self.stack.extend((e.this, " AS ")) def var_sql(self, e: exp.Var) -> None: self.stack.append(e.this) def _binary(self, e: exp.Binary, op: str) -> None: self.stack.extend((e.expression, op, e.this)) def _unary(self, e: exp.Unary, op: str) -> None: self.stack.extend((e.this, op)) def _function(self, e: exp.Func) -> None: self.stack.extend( ( ")", list(e.args.values()), "(", e.sql_name(), ) ) def _args(self, node: exp.Expr, arg_index: int = 0) -> bool: kvs = [] arg_types = list(node.arg_types)[arg_index:] if arg_index else node.arg_types for k in arg_types: v = node.args.get(k) if v is not None: kvs.append([f":{k}", v]) if kvs: self.stack.append(kvs) return True return False ================================================ FILE: sqlglot/optimizer/unnest_subqueries.py ================================================ from sqlglot import exp from sqlglot.helper import name_sequence from sqlglot.optimizer.scope import ScopeType, find_in_scope, traverse_scope def unnest_subqueries(expression): """ Rewrite sqlglot AST to convert some predicates with subqueries into joins. Convert scalar subqueries into cross joins. Convert correlated or vectorized subqueries into a group by so it is not a many to many left join. Example: >>> import sqlglot >>> expression = sqlglot.parse_one("SELECT * FROM x AS x WHERE (SELECT y.a AS a FROM y AS y WHERE x.a = y.a) = 1 ") >>> unnest_subqueries(expression).sql() 'SELECT * FROM x AS x LEFT JOIN (SELECT y.a AS a FROM y AS y WHERE TRUE GROUP BY y.a) AS _u_0 ON x.a = _u_0.a WHERE _u_0.a = 1' Args: expression (sqlglot.Expr): expression to unnest Returns: sqlglot.Expr: unnested expression """ next_alias_name = name_sequence("_u_") for scope in traverse_scope(expression): select = scope.expression parent = select.parent_select if not parent: continue if scope.external_columns: decorrelate(select, parent, scope.external_columns, next_alias_name) elif scope.scope_type == ScopeType.SUBQUERY: unnest(select, parent, next_alias_name) return expression def unnest(select, parent_select, next_alias_name): if len(select.selects) > 1: return predicate = select.find_ancestor(exp.Condition) if ( not predicate # Do not unnest subqueries inside table-valued functions such as # FROM GENERATE_SERIES(...), FROM UNNEST(...) etc in order to preserve join order or ( isinstance(predicate, exp.Func) and isinstance(predicate.parent, (exp.Table, exp.From, exp.Join)) ) or parent_select is not predicate.parent_select or not parent_select.args.get("from_") ): return if isinstance(select, exp.SetOperation): select = exp.select(*select.selects).from_(select.subquery(next_alias_name())) alias = next_alias_name() clause = predicate.find_ancestor(exp.Having, exp.Where, exp.Join) # This subquery returns a scalar and can just be converted to a cross join if not isinstance(predicate, (exp.In, exp.Any)): column = exp.column(select.selects[0].alias_or_name, alias) clause_parent_select = clause.parent_select if clause else None if (isinstance(clause, exp.Having) and clause_parent_select is parent_select) or ( (not clause or clause_parent_select is not parent_select) and ( parent_select.args.get("group") or any(find_in_scope(select, exp.AggFunc) for select in parent_select.selects) ) ): column = exp.Max(this=column) elif not isinstance(select.parent, exp.Subquery): return join_type = "CROSS" on_clause = None if isinstance(predicate, exp.Exists): # If a subquery returns no rows, cross-joining against it incorrectly eliminates all rows # from the parent query. Therefore, we use a LEFT JOIN that always matches (ON TRUE), then # check for non-NULL column values to determine whether the subquery contained rows. column = column.is_(exp.null()).not_() join_type = "LEFT" on_clause = exp.true() _replace(select.parent, column) parent_select.join(select, on=on_clause, join_type=join_type, join_alias=alias, copy=False) return if select.find(exp.Limit, exp.Offset): return if isinstance(predicate, exp.Any): predicate = predicate.find_ancestor(exp.EQ) if not predicate or parent_select is not predicate.parent_select: return column = _other_operand(predicate) value = select.selects[0] join_key = exp.column(value.alias, alias) join_key_not_null = join_key.is_(exp.null()).not_() if isinstance(clause, exp.Join): _replace(predicate, exp.true()) parent_select.where(join_key_not_null, copy=False) else: _replace(predicate, join_key_not_null) group = select.args.get("group") if group: if {value.this} != set(group.expressions): select = ( exp.select(exp.alias_(exp.column(value.alias, "_q"), value.alias)) .from_(select.subquery("_q", copy=False), copy=False) .group_by(exp.column(value.alias, "_q"), copy=False) ) elif not find_in_scope(value.this, exp.AggFunc): select = select.group_by(value.this, copy=False) parent_select.join( select, on=column.eq(join_key), join_type="LEFT", join_alias=alias, copy=False, ) def decorrelate(select, parent_select, external_columns, next_alias_name): where = select.args.get("where") if not where or where.find(exp.Or) or select.find(exp.Limit, exp.Offset): return table_alias = next_alias_name() keys = [] # for all external columns in the where statement, find the relevant predicate # keys to convert it into a join for column in external_columns: if column.find_ancestor(exp.Where) is not where: return predicate = column.find_ancestor(exp.Predicate) if not predicate or predicate.find_ancestor(exp.Where) is not where: return if isinstance(predicate, exp.Binary): key = ( predicate.right if any(node is column for node in predicate.left.walk()) else predicate.left ) else: return keys.append((key, column, predicate)) if not any(isinstance(predicate, exp.EQ) for *_, predicate in keys): return is_subquery_projection = any( node is select.parent for node in map(lambda s: s.unalias(), parent_select.selects) if isinstance(node, exp.Subquery) ) value = select.selects[0] key_aliases = {} group_by = [] for key, _, predicate in keys: # if we filter on the value of the subquery, it needs to be unique if key == value.this: key_aliases[key] = value.alias group_by.append(key) else: if key not in key_aliases: key_aliases[key] = next_alias_name() # all predicates that are equalities must also be in the unique # so that we don't do a many to many join if isinstance(predicate, exp.EQ) and key not in group_by: group_by.append(key) parent_predicate = select.find_ancestor(exp.Predicate) # When the subquery is embedded inside a function (e.g. COALESCE, TRIM) in the SELECT list, # the ancestor chain contains no Predicate node AND the subquery is not a direct projection. if parent_predicate is None and not is_subquery_projection: return # if the value of the subquery is not an agg or a key, we need to collect it into an array # so that it can be grouped. For subquery projections, we use a MAX aggregation instead. agg_func = exp.Max if is_subquery_projection else exp.ArrayAgg if not value.find(exp.AggFunc) and value.this not in group_by: select.select( exp.alias_(agg_func(this=value.this), value.alias, quoted=False), append=False, copy=False, ) # exists queries should not have any selects as it only checks if there are any rows # all selects will be added by the optimizer and only used for join keys if isinstance(parent_predicate, exp.Exists): select.set("expressions", []) for key, alias in key_aliases.items(): if key in group_by: # add all keys to the projections of the subquery # so that we can use it as a join key if isinstance(parent_predicate, exp.Exists) or key != value.this: select.select(f"{key} AS {alias}", copy=False) else: select.select(exp.alias_(agg_func(this=key.copy()), alias, quoted=False), copy=False) alias = exp.column(value.alias, table_alias) other = _other_operand(parent_predicate) op_type = type(parent_predicate.parent) if parent_predicate else None if isinstance(parent_predicate, exp.Exists): alias = exp.column(list(key_aliases.values())[0], table_alias) parent_predicate = _replace(parent_predicate, f"NOT {alias} IS NULL") elif isinstance(parent_predicate, exp.All): assert issubclass(op_type, exp.Binary) predicate = op_type(this=other, expression=exp.column("_x")) parent_predicate = _replace( parent_predicate.parent, f"ARRAY_ALL({alias}, _x -> {predicate})" ) elif isinstance(parent_predicate, exp.Any): assert issubclass(op_type, exp.Binary) if value.this in group_by: predicate = op_type(this=other, expression=alias) parent_predicate = _replace(parent_predicate.parent, predicate) else: predicate = op_type(this=other, expression=exp.column("_x")) parent_predicate = _replace(parent_predicate, f"ARRAY_ANY({alias}, _x -> {predicate})") elif isinstance(parent_predicate, exp.In): if value.this in group_by: parent_predicate = _replace(parent_predicate, f"{other} = {alias}") else: parent_predicate = _replace( parent_predicate, f"ARRAY_ANY({alias}, _x -> _x = {parent_predicate.this})", ) else: if is_subquery_projection and select.parent.alias: alias = exp.alias_(alias, select.parent.alias) # COUNT always returns 0 on empty datasets, so we need take that into consideration here # by transforming all counts into 0 and using that as the coalesced value if value.find(exp.Count): def remove_aggs(node): if isinstance(node, exp.Count): return exp.Literal.number(0) elif isinstance(node, exp.AggFunc): return exp.null() return node alias = exp.Coalesce(this=alias, expressions=[value.this.transform(remove_aggs)]) select.parent.replace(alias) for key, column, predicate in keys: predicate.replace(exp.true()) nested = exp.column(key_aliases[key], table_alias) if is_subquery_projection: key.replace(nested) if not isinstance(predicate, exp.EQ): parent_select.where(predicate, copy=False) continue if key in group_by: key.replace(nested) elif isinstance(predicate, exp.EQ): parent_predicate = _replace( parent_predicate, f"({parent_predicate} AND ARRAY_CONTAINS({nested}, {column}))", ) else: key.replace(exp.to_identifier("_x")) parent_predicate = _replace( parent_predicate, f"({parent_predicate} AND ARRAY_ANY({nested}, _x -> {predicate}))", ) parent_select.join( select.group_by(*group_by, copy=False), on=[predicate for *_, predicate in keys if isinstance(predicate, exp.EQ)], join_type="LEFT", join_alias=table_alias, copy=False, ) def _replace(expression, condition): return expression.replace(exp.condition(condition)) def _other_operand(expression): if isinstance(expression, exp.In): return expression.this if isinstance(expression, (exp.Any, exp.All)): return _other_operand(expression.parent) if isinstance(expression, exp.Binary): return ( expression.right if isinstance(expression.left, (exp.Subquery, exp.Any, exp.Exists, exp.All)) else expression.left ) return None ================================================ FILE: sqlglot/parser.py ================================================ from __future__ import annotations import itertools import logging import re import typing as t from collections import defaultdict from sqlglot import exp from sqlglot.errors import ( ErrorLevel, ParseError, TokenError, concat_messages, highlight_sql, merge_errors, ) from sqlglot.expressions import apply_index_offset from sqlglot.helper import ensure_list, i64, seq_get from sqlglot.trie import new_trie from sqlglot.time import format_time from sqlglot.tokens import Token, Tokenizer, TokenType from sqlglot.trie import TrieResult, in_trie from collections.abc import Sequence if t.TYPE_CHECKING: from sqlglot._typing import E from sqlglot.dialects.dialect import Dialect, DialectType from re import Pattern T = t.TypeVar("T") TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) logger = logging.getLogger("sqlglot") OPTIONS_TYPE = dict[str, Sequence[t.Union[Sequence[str], str]]] # Used to detect alphabetical characters and +/- in timestamp literals TIME_ZONE_RE: Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") def build_var_map(args: list) -> exp.StarMap | exp.VarMap: if len(args) == 1 and args[0].is_star: return exp.StarMap(this=args[0]) keys = [] values = [] for i in range(0, len(args), 2): keys.append(args[i]) values.append(args[i + 1]) return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) def build_like(args: t.List) -> exp.Escape | exp.Like: like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like def binary_range_parser( expr_type: t.Type[exp.Expr], reverse_args: bool = False ) -> t.Callable[[Parser, t.Optional[exp.Expr]], t.Optional[exp.Expr]]: def _parse_binary_range(self: Parser, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: expression = self._parse_bitwise() if reverse_args: this, expression = expression, this return self._parse_escape(self.expression(expr_type(this=this, expression=expression))) return _parse_binary_range def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: # Default argument order is base, expression this = seq_get(args, 0) expression = seq_get(args, 1) if expression: if not dialect.LOG_BASE_FIRST: this, expression = expression, this return exp.Log(this=this, expression=expression) return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: arg = seq_get(args, 0) return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) def build_lower(args: t.List) -> exp.Lower | exp.Hex: # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation arg = seq_get(args, 0) return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) def build_upper(args: t.List) -> exp.Upper | exp.Hex: # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation arg = seq_get(args, 0) return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: def _builder(args: t.List, dialect: Dialect) -> E: expression = expr_type( this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) ) if len(args) > 2 and expr_type is exp.JSONExtract: expression.set("expressions", args[2:]) if expr_type is exp.JSONExtractScalar: expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) return expression return _builder def build_mod(args: t.List) -> exp.Mod: this = seq_get(args, 0) expression = seq_get(args, 1) # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression return exp.Mod(this=this, expression=expression) def build_pad(args: t.List, is_left: bool = True): return exp.Pad( this=seq_get(args, 0), expression=seq_get(args, 1), fill_pattern=seq_get(args, 2), is_left=is_left, ) def build_array_constructor( exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect ) -> exp.Expr: array_exp = exp_class(expressions=args) if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) return array_exp def build_convert_timezone( args: t.List, default_source_tz: t.Optional[str] = None ) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: if len(args) == 2: source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None return exp.ConvertTimezone( source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) ) return exp.ConvertTimezone.from_arg_list(args) def build_trim(args: t.List, is_left: bool = True, reverse_args: bool = False): this, expression = seq_get(args, 0), seq_get(args, 1) if expression and reverse_args: this, expression = expression, this return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING") def build_coalesce( args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None ) -> exp.Coalesce: return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) def build_locate_strposition(args: t.List): return exp.StrPosition( this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2), ) def build_array_append(args: t.List, dialect: Dialect) -> exp.ArrayAppend: """ Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead. Args: args: Function arguments [array, element] dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from Returns: ArrayAppend expression with appropriate null_propagation flag """ return exp.ArrayAppend( this=seq_get(args, 0), expression=seq_get(args, 1), null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, ) def build_array_prepend(args: t.List, dialect: Dialect) -> exp.ArrayPrepend: """ Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead. Args: args: Function arguments [array, element] dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from Returns: ArrayPrepend expression with appropriate null_propagation flag """ return exp.ArrayPrepend( this=seq_get(args, 0), expression=seq_get(args, 1), null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, ) def build_array_concat(args: t.List, dialect: Dialect) -> exp.ArrayConcat: """ Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. Args: args: Function arguments [array1, array2, ...] (variadic) dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from Returns: ArrayConcat expression with appropriate null_propagation flag """ return exp.ArrayConcat( this=seq_get(args, 0), expressions=args[1:], null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, ) def build_array_remove(args: t.List, dialect: Dialect) -> exp.ArrayRemove: """ Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics. Args: args: Function arguments [array, value_to_remove] dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from Returns: ArrayRemove expression with appropriate null_propagation flag """ return exp.ArrayRemove( this=seq_get(args, 0), expression=seq_get(args, 1), null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, ) def _resolve_dialect(dialect: t.Any) -> t.Any: from sqlglot.dialects.dialect import Dialect return Dialect.get_or_raise(dialect) SENTINEL_NONE: Token = Token(TokenType.SENTINEL, "SENTINEL") class Parser: """ Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. Args: error_level: The desired error level. Default: ErrorLevel.IMMEDIATE error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100 max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3 """ __slots__ = ( "error_level", "error_message_context", "max_errors", "dialect", "sql", "errors", "_tokens", "_index", "_curr", "_next", "_prev", "_prev_comments", "_pipe_cte_counter", "_chunks", "_chunk_index", "_tokens_size", ) FUNCTIONS: t.ClassVar[t.Dict[str, t.Callable]] = { **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), "ARRAY": lambda args, dialect: exp.Array(expressions=args), "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None ), "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None ), "ARRAY_APPEND": build_array_append, "ARRAY_CAT": build_array_concat, "ARRAY_CONCAT": build_array_concat, "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args), "ARRAY_PREPEND": build_array_prepend, "ARRAY_REMOVE": build_array_remove, "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), "CONCAT": lambda args, dialect: exp.Concat( expressions=args, safe=not dialect.STRICT_STRING_CONCAT, coalesce=dialect.CONCAT_COALESCE, ), "CONCAT_WS": lambda args, dialect: exp.ConcatWs( expressions=args, safe=not dialect.STRICT_STRING_CONCAT, coalesce=dialect.CONCAT_COALESCE, ), "CONVERT_TIMEZONE": build_convert_timezone, "DATE_TO_DATE_STR": lambda args: exp.Cast( this=seq_get(args, 0), to=exp.DataType(this=exp.DType.TEXT), ), "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( start=seq_get(args, 0), end=seq_get(args, 1), step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), ), "GENERATE_UUID": lambda args, dialect: exp.Uuid( is_string=dialect.UUID_IS_STRING_TYPE or None ), "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), "GREATEST": lambda args, dialect: exp.Greatest( this=seq_get(args, 0), expressions=args[1:], ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, ), "LEAST": lambda args, dialect: exp.Least( this=seq_get(args, 0), expressions=args[1:], ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, ), "HEX": build_hex, "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), "JSON_KEYS": lambda args, dialect: exp.JSONKeys( this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) ), "LIKE": build_like, "LOG": build_logarithm, "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), "LOWER": build_lower, "LPAD": lambda args: build_pad(args), "LEFTPAD": lambda args: build_pad(args), "LTRIM": lambda args: build_trim(args), "MOD": build_mod, "RIGHTPAD": lambda args: build_pad(args, is_left=False), "RPAD": lambda args: build_pad(args, is_left=False), "RTRIM": lambda args: build_trim(args, is_left=False), "SCOPE_RESOLUTION": lambda args: ( exp.ScopeResolution(expression=seq_get(args, 0)) if len(args) != 2 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)) ), "STRPOS": exp.StrPosition.from_arg_list, "CHARINDEX": lambda args: build_locate_strposition(args), "INSTR": exp.StrPosition.from_arg_list, "LOCATE": lambda args: build_locate_strposition(args), "TIME_TO_TIME_STR": lambda args: exp.Cast( this=seq_get(args, 0), to=exp.DataType(this=exp.DType.TEXT), ), "TO_HEX": build_hex, "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( this=exp.Cast( this=seq_get(args, 0), to=exp.DataType(this=exp.DType.TEXT), ), start=exp.Literal.number(1), length=exp.Literal.number(10), ), "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), "UPPER": build_upper, "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), "VAR_MAP": build_var_map, } NO_PAREN_FUNCTIONS: t.ClassVar[t.Dict] = { TokenType.CURRENT_DATE: exp.CurrentDate, TokenType.CURRENT_DATETIME: exp.CurrentDate, TokenType.CURRENT_TIME: exp.CurrentTime, TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, TokenType.CURRENT_USER: exp.CurrentUser, TokenType.CURRENT_ROLE: exp.CurrentRole, } STRUCT_TYPE_TOKENS: t.ClassVar = { TokenType.NESTED, TokenType.OBJECT, TokenType.STRUCT, TokenType.UNION, } NESTED_TYPE_TOKENS: t.ClassVar = { TokenType.ARRAY, TokenType.LIST, TokenType.LOWCARDINALITY, TokenType.MAP, TokenType.NULLABLE, TokenType.RANGE, *STRUCT_TYPE_TOKENS, } ENUM_TYPE_TOKENS: t.ClassVar = { TokenType.DYNAMIC, TokenType.ENUM, TokenType.ENUM8, TokenType.ENUM16, } AGGREGATE_TYPE_TOKENS: t.ClassVar = { TokenType.AGGREGATEFUNCTION, TokenType.SIMPLEAGGREGATEFUNCTION, } TYPE_TOKENS: t.ClassVar = { TokenType.BIT, TokenType.BOOLEAN, TokenType.TINYINT, TokenType.UTINYINT, TokenType.SMALLINT, TokenType.USMALLINT, TokenType.INT, TokenType.UINT, TokenType.BIGINT, TokenType.UBIGINT, TokenType.BIGNUM, TokenType.INT128, TokenType.UINT128, TokenType.INT256, TokenType.UINT256, TokenType.MEDIUMINT, TokenType.UMEDIUMINT, TokenType.FIXEDSTRING, TokenType.FLOAT, TokenType.DOUBLE, TokenType.UDOUBLE, TokenType.CHAR, TokenType.NCHAR, TokenType.VARCHAR, TokenType.NVARCHAR, TokenType.BPCHAR, TokenType.TEXT, TokenType.MEDIUMTEXT, TokenType.LONGTEXT, TokenType.BLOB, TokenType.MEDIUMBLOB, TokenType.LONGBLOB, TokenType.BINARY, TokenType.VARBINARY, TokenType.JSON, TokenType.JSONB, TokenType.INTERVAL, TokenType.TINYBLOB, TokenType.TINYTEXT, TokenType.TIME, TokenType.TIMETZ, TokenType.TIME_NS, TokenType.TIMESTAMP, TokenType.TIMESTAMP_S, TokenType.TIMESTAMP_MS, TokenType.TIMESTAMP_NS, TokenType.TIMESTAMPTZ, TokenType.TIMESTAMPLTZ, TokenType.TIMESTAMPNTZ, TokenType.DATETIME, TokenType.DATETIME2, TokenType.DATETIME64, TokenType.SMALLDATETIME, TokenType.DATE, TokenType.DATE32, TokenType.INT4RANGE, TokenType.INT4MULTIRANGE, TokenType.INT8RANGE, TokenType.INT8MULTIRANGE, TokenType.NUMRANGE, TokenType.NUMMULTIRANGE, TokenType.TSRANGE, TokenType.TSMULTIRANGE, TokenType.TSTZRANGE, TokenType.TSTZMULTIRANGE, TokenType.DATERANGE, TokenType.DATEMULTIRANGE, TokenType.DECIMAL, TokenType.DECIMAL32, TokenType.DECIMAL64, TokenType.DECIMAL128, TokenType.DECIMAL256, TokenType.DECFLOAT, TokenType.UDECIMAL, TokenType.BIGDECIMAL, TokenType.UUID, TokenType.GEOGRAPHY, TokenType.GEOGRAPHYPOINT, TokenType.GEOMETRY, TokenType.POINT, TokenType.RING, TokenType.LINESTRING, TokenType.MULTILINESTRING, TokenType.POLYGON, TokenType.MULTIPOLYGON, TokenType.HLLSKETCH, TokenType.HSTORE, TokenType.PSEUDO_TYPE, TokenType.SUPER, TokenType.SERIAL, TokenType.SMALLSERIAL, TokenType.BIGSERIAL, TokenType.XML, TokenType.YEAR, TokenType.USERDEFINED, TokenType.MONEY, TokenType.SMALLMONEY, TokenType.ROWVERSION, TokenType.IMAGE, TokenType.VARIANT, TokenType.VECTOR, TokenType.VOID, TokenType.OBJECT, TokenType.OBJECT_IDENTIFIER, TokenType.INET, TokenType.IPADDRESS, TokenType.IPPREFIX, TokenType.IPV4, TokenType.IPV6, TokenType.UNKNOWN, TokenType.NOTHING, TokenType.NULL, TokenType.NAME, TokenType.TDIGEST, TokenType.DYNAMIC, *ENUM_TYPE_TOKENS, *NESTED_TYPE_TOKENS, *AGGREGATE_TYPE_TOKENS, } SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = { TokenType.BIGINT: TokenType.UBIGINT, TokenType.INT: TokenType.UINT, TokenType.MEDIUMINT: TokenType.UMEDIUMINT, TokenType.SMALLINT: TokenType.USMALLINT, TokenType.TINYINT: TokenType.UTINYINT, TokenType.DECIMAL: TokenType.UDECIMAL, TokenType.DOUBLE: TokenType.UDOUBLE, } SUBQUERY_PREDICATES: t.ClassVar = { TokenType.ANY: exp.Any, TokenType.ALL: exp.All, TokenType.EXISTS: exp.Exists, TokenType.SOME: exp.Any, } SUBQUERY_TOKENS: t.ClassVar = { TokenType.SELECT, TokenType.WITH, TokenType.FROM, } RESERVED_TOKENS: t.ClassVar = { *Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT, } - {TokenType.IDENTIFIER} DB_CREATABLES: t.ClassVar = { TokenType.DATABASE, TokenType.DICTIONARY, TokenType.FILE_FORMAT, TokenType.MODEL, TokenType.NAMESPACE, TokenType.SCHEMA, TokenType.SEMANTIC_VIEW, TokenType.SEQUENCE, TokenType.SINK, TokenType.SOURCE, TokenType.STAGE, TokenType.STORAGE_INTEGRATION, TokenType.STREAMLIT, TokenType.TABLE, TokenType.TAG, TokenType.VIEW, TokenType.WAREHOUSE, } CREATABLES: t.ClassVar = { TokenType.COLUMN, TokenType.CONSTRAINT, TokenType.FOREIGN_KEY, TokenType.FUNCTION, TokenType.INDEX, TokenType.PROCEDURE, TokenType.TRIGGER, *DB_CREATABLES, } TRIGGER_EVENTS: t.ClassVar = { TokenType.INSERT, TokenType.UPDATE, TokenType.DELETE, TokenType.TRUNCATE, } ALTERABLES: t.ClassVar = { TokenType.INDEX, TokenType.TABLE, TokenType.VIEW, TokenType.SESSION, } # Tokens that can represent identifiers ID_VAR_TOKENS: t.ClassVar[t.Set] = { TokenType.ALL, TokenType.ANALYZE, TokenType.ATTACH, TokenType.VAR, TokenType.ANTI, TokenType.APPLY, TokenType.ASC, TokenType.ASOF, TokenType.AUTO_INCREMENT, TokenType.BEGIN, TokenType.BPCHAR, TokenType.CACHE, TokenType.CASE, TokenType.COLLATE, TokenType.COMMAND, TokenType.COMMENT, TokenType.COMMIT, TokenType.CONSTRAINT, TokenType.COPY, TokenType.CUBE, TokenType.CURRENT_SCHEMA, TokenType.DEFAULT, TokenType.DELETE, TokenType.DESC, TokenType.DESCRIBE, TokenType.DETACH, TokenType.DICTIONARY, TokenType.DIV, TokenType.END, TokenType.EXECUTE, TokenType.EXPORT, TokenType.ESCAPE, TokenType.FALSE, TokenType.FIRST, TokenType.FILE, TokenType.FILTER, TokenType.FINAL, TokenType.FORMAT, TokenType.FULL, TokenType.GET, TokenType.IDENTIFIER, TokenType.INOUT, TokenType.IS, TokenType.ISNULL, TokenType.INTERVAL, TokenType.KEEP, TokenType.KILL, TokenType.LEFT, TokenType.LIMIT, TokenType.LOAD, TokenType.LOCK, TokenType.MATCH, TokenType.MERGE, TokenType.NATURAL, TokenType.NEXT, TokenType.OFFSET, TokenType.OPERATOR, TokenType.ORDINALITY, TokenType.OVER, TokenType.OVERLAPS, TokenType.OVERWRITE, TokenType.PARTITION, TokenType.PERCENT, TokenType.PIVOT, TokenType.PRAGMA, TokenType.PUT, TokenType.RANGE, TokenType.RECURSIVE, TokenType.REFERENCES, TokenType.REFRESH, TokenType.RENAME, TokenType.REPLACE, TokenType.RIGHT, TokenType.ROLLUP, TokenType.ROW, TokenType.ROWS, TokenType.SEMI, TokenType.SET, TokenType.SETTINGS, TokenType.SHOW, TokenType.STREAM, TokenType.STREAMLIT, TokenType.TEMPORARY, TokenType.TOP, TokenType.TRUE, TokenType.TRUNCATE, TokenType.UNIQUE, TokenType.UNNEST, TokenType.UNPIVOT, TokenType.UPDATE, TokenType.USE, TokenType.VOLATILE, TokenType.WINDOW, TokenType.CURRENT_CATALOG, TokenType.LOCALTIME, TokenType.LOCALTIMESTAMP, TokenType.SESSION_USER, TokenType.STRAIGHT_JOIN, *ALTERABLES, *CREATABLES, *SUBQUERY_PREDICATES, *TYPE_TOKENS, *NO_PAREN_FUNCTIONS, } - {TokenType.UNION} TABLE_ALIAS_TOKENS: t.ClassVar[t.Set] = ID_VAR_TOKENS - { TokenType.ANTI, TokenType.ASOF, TokenType.FULL, TokenType.LEFT, TokenType.LOCK, TokenType.NATURAL, TokenType.RIGHT, TokenType.SEMI, TokenType.WINDOW, } ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS ARRAY_CONSTRUCTORS: t.ClassVar = { "ARRAY": exp.Array, "LIST": exp.List, } COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS} UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET} TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"} # Tokens that indicate a simple column reference IDENTIFIER_TOKENS: t.ClassVar[t.FrozenSet] = frozenset({TokenType.VAR, TokenType.IDENTIFIER}) BRACKETS: t.ClassVar[t.FrozenSet] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE}) # Postfix tokens that prevent the bare column fast path COLUMN_POSTFIX_TOKENS: t.ClassVar[t.FrozenSet] = frozenset( { TokenType.L_PAREN, TokenType.L_BRACKET, TokenType.L_BRACE, TokenType.COLON, TokenType.JOIN_MARKER, } ) TABLE_POSTFIX_TOKENS: t.ClassVar[t.FrozenSet] = frozenset( { TokenType.L_PAREN, TokenType.L_BRACKET, TokenType.L_BRACE, TokenType.PIVOT, TokenType.UNPIVOT, TokenType.TABLE_SAMPLE, } ) FUNC_TOKENS: t.ClassVar = { TokenType.COLLATE, TokenType.COMMAND, TokenType.CURRENT_DATE, TokenType.CURRENT_DATETIME, TokenType.CURRENT_SCHEMA, TokenType.CURRENT_TIMESTAMP, TokenType.CURRENT_TIME, TokenType.CURRENT_USER, TokenType.CURRENT_CATALOG, TokenType.FILTER, TokenType.FIRST, TokenType.FORMAT, TokenType.GET, TokenType.GLOB, TokenType.IDENTIFIER, TokenType.INDEX, TokenType.ISNULL, TokenType.ILIKE, TokenType.INSERT, TokenType.LIKE, TokenType.LOCALTIME, TokenType.LOCALTIMESTAMP, TokenType.MERGE, TokenType.NEXT, TokenType.OFFSET, TokenType.PRIMARY_KEY, TokenType.RANGE, TokenType.REPLACE, TokenType.RLIKE, TokenType.ROW, TokenType.SESSION_USER, TokenType.UNNEST, TokenType.VAR, TokenType.LEFT, TokenType.RIGHT, TokenType.SEQUENCE, TokenType.DATE, TokenType.DATETIME, TokenType.TABLE, TokenType.TIMESTAMP, TokenType.TIMESTAMPTZ, TokenType.TRUNCATE, TokenType.UTC_DATE, TokenType.UTC_TIME, TokenType.UTC_TIMESTAMP, TokenType.WINDOW, TokenType.XOR, *TYPE_TOKENS, *SUBQUERY_PREDICATES, } CONJUNCTION: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expr]]] = { TokenType.AND: exp.And, } ASSIGNMENT: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expr]]] = { TokenType.COLON_EQ: exp.PropertyEQ, } DISJUNCTION: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expr]]] = { TokenType.OR: exp.Or, } EQUALITY: t.ClassVar = { TokenType.EQ: exp.EQ, TokenType.NEQ: exp.NEQ, TokenType.NULLSAFE_EQ: exp.NullSafeEQ, } COMPARISON: t.ClassVar = { TokenType.GT: exp.GT, TokenType.GTE: exp.GTE, TokenType.LT: exp.LT, TokenType.LTE: exp.LTE, } BITWISE: t.ClassVar = { TokenType.AMP: exp.BitwiseAnd, TokenType.CARET: exp.BitwiseXor, TokenType.PIPE: exp.BitwiseOr, } TERM: t.ClassVar = { TokenType.DASH: exp.Sub, TokenType.PLUS: exp.Add, TokenType.MOD: exp.Mod, TokenType.COLLATE: exp.Collate, } FACTOR: t.ClassVar = { TokenType.DIV: exp.IntDiv, TokenType.LR_ARROW: exp.Distance, TokenType.SLASH: exp.Div, TokenType.STAR: exp.Mul, } EXPONENT: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expr]]] = {} TIMES: t.ClassVar = { TokenType.TIME, TokenType.TIMETZ, } TIMESTAMPS: t.ClassVar = { TokenType.TIMESTAMP, TokenType.TIMESTAMPNTZ, TokenType.TIMESTAMPTZ, TokenType.TIMESTAMPLTZ, *TIMES, } SET_OPERATIONS: t.ClassVar = { TokenType.UNION, TokenType.INTERSECT, TokenType.EXCEPT, } JOIN_METHODS: t.ClassVar = { TokenType.ASOF, TokenType.NATURAL, TokenType.POSITIONAL, } JOIN_SIDES: t.ClassVar = { TokenType.LEFT, TokenType.RIGHT, TokenType.FULL, } JOIN_KINDS: t.ClassVar = { TokenType.ANTI, TokenType.CROSS, TokenType.INNER, TokenType.OUTER, TokenType.SEMI, TokenType.STRAIGHT_JOIN, } JOIN_HINTS: t.ClassVar[t.Set[str]] = set() # Tokens that unambiguously end a table reference on the fast path TABLE_TERMINATORS: t.ClassVar[t.FrozenSet] = frozenset( { TokenType.COMMA, TokenType.GROUP_BY, TokenType.HAVING, TokenType.JOIN, TokenType.LIMIT, TokenType.ON, TokenType.ORDER_BY, TokenType.R_PAREN, TokenType.SEMICOLON, TokenType.SENTINEL, TokenType.WHERE, *SET_OPERATIONS, *JOIN_KINDS, *JOIN_METHODS, *JOIN_SIDES, } ) LAMBDAS: t.ClassVar = { TokenType.ARROW: lambda self, expressions: self.expression( exp.Lambda( this=self._replace_lambda( self._parse_disjunction(), expressions, ), expressions=expressions, ) ), TokenType.FARROW: lambda self, expressions: self.expression( exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction()) ), } # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False LAMBDA_ARG_TERMINATORS: t.ClassVar[t.FrozenSet] = frozenset( {TokenType.COMMA, TokenType.R_PAREN} ) COLUMN_OPERATORS: t.ClassVar = { TokenType.DOT: None, TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)), TokenType.DCOLON: lambda self, this, to: self.build_cast( strict=self.STRICT_CAST, this=this, to=to ), TokenType.ARROW: lambda self, this, path: self.expression( exp.JSONExtract( this=this, expression=self.dialect.to_json_path(path), only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, ) ), TokenType.DARROW: lambda self, this, path: self.expression( exp.JSONExtractScalar( this=this, expression=self.dialect.to_json_path(path), only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, ) ), TokenType.HASH_ARROW: lambda self, this, path: self.expression( exp.JSONBExtract(this=this, expression=path) ), TokenType.DHASH_ARROW: lambda self, this, path: self.expression( exp.JSONBExtractScalar(this=this, expression=path) ), TokenType.PLACEHOLDER: lambda self, this, key: self.expression( exp.JSONBContains(this=this, expression=key) ), } CAST_COLUMN_OPERATORS: t.ClassVar = { TokenType.DOTCOLON, TokenType.DCOLON, } EXPRESSION_PARSERS: t.ClassVar = { exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), exp.Column: lambda self: self._parse_column(), exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), exp.Condition: lambda self: self._parse_disjunction(), exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), exp.Expr: lambda self: self._parse_expression(), exp.From: lambda self: self._parse_from(joins=True), exp.GrantPrincipal: lambda self: self._parse_grant_principal(), exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), exp.Group: lambda self: self._parse_group(), exp.Having: lambda self: self._parse_having(), exp.Hint: lambda self: self._parse_hint_body(), exp.Identifier: lambda self: self._parse_id_var(), exp.Join: lambda self: self._parse_join(), exp.Lambda: lambda self: self._parse_lambda(), exp.Lateral: lambda self: self._parse_lateral(), exp.Limit: lambda self: self._parse_limit(), exp.Offset: lambda self: self._parse_offset(), exp.Order: lambda self: self._parse_order(), exp.Ordered: lambda self: self._parse_ordered(), exp.Properties: lambda self: self._parse_properties(), exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), exp.Qualify: lambda self: self._parse_qualify(), exp.Returning: lambda self: self._parse_returning(), exp.Select: lambda self: self._parse_select(), exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), exp.Table: lambda self: self._parse_table_parts(), exp.TableAlias: lambda self: self._parse_table_alias(), exp.Tuple: lambda self: self._parse_value(values=False), exp.Whens: lambda self: self._parse_when_matched(), exp.Where: lambda self: self._parse_where(), exp.Window: lambda self: self._parse_named_window(), exp.With: lambda self: self._parse_with(), } STATEMENT_PARSERS: t.ClassVar = { TokenType.ALTER: lambda self: self._parse_alter(), TokenType.ANALYZE: lambda self: self._parse_analyze(), TokenType.BEGIN: lambda self: self._parse_transaction(), TokenType.CACHE: lambda self: self._parse_cache(), TokenType.COMMENT: lambda self: self._parse_comment(), TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), TokenType.COPY: lambda self: self._parse_copy(), TokenType.CREATE: lambda self: self._parse_create(), TokenType.DELETE: lambda self: self._parse_delete(), TokenType.DESC: lambda self: self._parse_describe(), TokenType.DESCRIBE: lambda self: self._parse_describe(), TokenType.DROP: lambda self: self._parse_drop(), TokenType.GRANT: lambda self: self._parse_grant(), TokenType.REVOKE: lambda self: self._parse_revoke(), TokenType.INSERT: lambda self: self._parse_insert(), TokenType.KILL: lambda self: self._parse_kill(), TokenType.LOAD: lambda self: self._parse_load(), TokenType.MERGE: lambda self: self._parse_merge(), TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())), TokenType.REFRESH: lambda self: self._parse_refresh(), TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), TokenType.SET: lambda self: self._parse_set(), TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), TokenType.UNCACHE: lambda self: self._parse_uncache(), TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), TokenType.UPDATE: lambda self: self._parse_update(), TokenType.USE: lambda self: self._parse_use(), TokenType.SEMICOLON: lambda self: exp.Semicolon(), } UNARY_PARSERS: t.ClassVar = { TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())), TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())), TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())), TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())), } STRING_PARSERS: t.ClassVar = { TokenType.HEREDOC_STRING: lambda self, token: self.expression( exp.RawString(this=token.text), token ), TokenType.NATIONAL_STRING: lambda self, token: self.expression( exp.National(this=token.text), token ), TokenType.RAW_STRING: lambda self, token: self.expression( exp.RawString(this=token.text), token ), TokenType.STRING: lambda self, token: self.expression( exp.Literal(this=token.text, is_string=True), token ), TokenType.UNICODE_STRING: lambda self, token: self.expression( exp.UnicodeString( this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string() ), token, ), } NUMERIC_PARSERS: t.ClassVar = { TokenType.BIT_STRING: lambda self, token: self.expression( exp.BitString(this=token.text), token ), TokenType.BYTE_STRING: lambda self, token: self.expression( exp.ByteString( this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None ), token, ), TokenType.HEX_STRING: lambda self, token: self.expression( exp.HexString( this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None ), token, ), TokenType.NUMBER: lambda self, token: self.expression( exp.Literal(this=token.text, is_string=False), token ), } PRIMARY_PARSERS: t.ClassVar = { **STRING_PARSERS, **NUMERIC_PARSERS, TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), TokenType.NULL: lambda self, _: self.expression(exp.Null()), TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)), TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)), TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), TokenType.STAR: lambda self, _: self._parse_star_ops(), } PLACEHOLDER_PARSERS: t.ClassVar = { TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()), TokenType.PARAMETER: lambda self: self._parse_parameter(), TokenType.COLON: lambda self: ( self.expression(exp.Placeholder(this=self._prev.text)) if self._match_set(self.COLON_PLACEHOLDER_TOKENS) else None ), } RANGE_PARSERS: t.ClassVar = { TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), TokenType.BETWEEN: lambda self, this: self._parse_between(this), TokenType.GLOB: binary_range_parser(exp.Glob), TokenType.ILIKE: binary_range_parser(exp.ILike), TokenType.IN: lambda self, this: self._parse_in(this), TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), TokenType.IS: lambda self, this: self._parse_is(this), TokenType.LIKE: binary_range_parser(exp.Like), TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), TokenType.RLIKE: binary_range_parser(exp.RegexpLike), TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), TokenType.FOR: lambda self, this: self._parse_comprehension(this), TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), TokenType.ADJACENT: binary_range_parser(exp.Adjacent), TokenType.OPERATOR: lambda self, this: self._parse_operator(this), TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), } PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = { "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), "AS": lambda self, query: self._build_pipe_cte( query, [exp.Star()], self._parse_table_alias() ), "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), "ORDER BY": lambda self, query: query.order_by( self._parse_order(), append=False, copy=False ), "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), } PROPERTY_PARSERS: t.ClassVar[t.Dict[str, t.Callable]] = { "ALLOWED_VALUES": lambda self: self.expression( exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary)) ), "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), "AUTO": lambda self: self._parse_auto_property(), "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), "BACKUP": lambda self: self.expression( exp.BackupProperty(this=self._parse_var(any_token=True)) ), "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), "CHECKSUM": lambda self: self._parse_checksum(), "CLUSTER BY": lambda self: self._parse_cluster(), "CLUSTERED": lambda self: self._parse_clustered_by(), "COLLATE": lambda self, **kwargs: self._parse_property_assignment( exp.CollateProperty, **kwargs ), "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), "CONTAINS": lambda self: self._parse_contains_property(), "COPY": lambda self: self._parse_copy_property(), "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), "DATA_DELETION": lambda self: self._parse_data_deletion_property(), "DEFINER": lambda self: self._parse_definer(), "DETERMINISTIC": lambda self: self.expression( exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) ), "DISTRIBUTED": lambda self: self._parse_distributed_property(), "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), "DISTKEY": lambda self: self._parse_distkey(), "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), "EMPTY": lambda self: self.expression(exp.EmptyProperty()), "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), "ENVIRONMENT": lambda self: self.expression( exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment)) ), "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty), "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), "FREESPACE": lambda self: self._parse_freespace(), "GLOBAL": lambda self: self.expression(exp.GlobalProperty()), "HEAP": lambda self: self.expression(exp.HeapProperty()), "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), "IMMUTABLE": lambda self: self.expression( exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) ), "INHERITS": lambda self: self.expression( exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table)) ), "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())), "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), "LIKE": lambda self: self._parse_create_like(), "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), "LOCK": lambda self: self._parse_locking(), "LOCKING": lambda self: self._parse_locking(), "LOG": lambda self, **kwargs: self._parse_log(**kwargs), "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), "MODIFIES": lambda self: self._parse_modifies_property(), "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)), "NO": lambda self: self._parse_no_property(), "ON": lambda self: self._parse_on_property(), "ORDER BY": lambda self: self._parse_order(skip_order_token=True), "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())), "PARTITION": lambda self: self._parse_partitioned_of(), "PARTITION BY": lambda self: self._parse_partitioned_by(), "PARTITIONED BY": lambda self: self._parse_partitioned_by(), "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), "RANGE": lambda self: self._parse_dict_range(this="RANGE"), "READS": lambda self: self._parse_reads_property(), "REMOTE": lambda self: self._parse_remote_with_connection(), "RETURNS": lambda self: self._parse_returns(), "STRICT": lambda self: self.expression(exp.StrictProperty()), "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()), "ROW": lambda self: self._parse_row(), "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), "SAMPLE": lambda self: self.expression( exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise()) ), "SECURE": lambda self: self.expression(exp.SecureProperty()), "SECURITY": lambda self: self._parse_sql_security(), "SQL SECURITY": lambda self: self._parse_sql_security(), "SET": lambda self: self.expression(exp.SetProperty(multi=False)), "SETTINGS": lambda self: self._parse_settings_property(), "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), "SORTKEY": lambda self: self._parse_sortkey(), "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), "STABLE": lambda self: self.expression( exp.StabilityProperty(this=exp.Literal.string("STABLE")) ), "STORED": lambda self: self._parse_stored(), "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), "TEMP": lambda self: self.expression(exp.TemporaryProperty()), "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()), "TO": lambda self: self._parse_to_table(), "TRANSIENT": lambda self: self.expression(exp.TransientProperty()), "TRANSFORM": lambda self: self.expression( exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression)) ), "TTL": lambda self: self._parse_ttl(), "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()), "VOLATILE": lambda self: self._parse_volatile_property(), "WITH": lambda self: self._parse_with_property(), } CONSTRAINT_PARSERS: t.ClassVar = { "AUTOINCREMENT": lambda self: self._parse_auto_increment(), "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)), "CHARACTER SET": lambda self: self.expression( exp.CharacterSetColumnConstraint(this=self._parse_var_or_string()) ), "CHECK": lambda self: self._parse_check_constraint(), "COLLATE": lambda self: self.expression( exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column()) ), "COMMENT": lambda self: self.expression( exp.CommentColumnConstraint(this=self._parse_string()) ), "COMPRESS": lambda self: self._parse_compress(), "CLUSTERED": lambda self: self.expression( exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) ), "NONCLUSTERED": lambda self: self.expression( exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) ), "DEFAULT": lambda self: self.expression( exp.DefaultColumnConstraint(this=self._parse_bitwise()) ), "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())), "EPHEMERAL": lambda self: self.expression( exp.EphemeralColumnConstraint(this=self._parse_bitwise()) ), "EXCLUDE": lambda self: self.expression( exp.ExcludeColumnConstraint(this=self._parse_index_params()) ), "FOREIGN KEY": lambda self: self._parse_foreign_key(), "FORMAT": lambda self: self.expression( exp.DateFormatColumnConstraint(this=self._parse_var_or_string()) ), "GENERATED": lambda self: self._parse_generated_as_identity(), "IDENTITY": lambda self: self._parse_auto_increment(), "INLINE": lambda self: self._parse_inline(), "LIKE": lambda self: self._parse_create_like(), "NOT": lambda self: self._parse_not_constraint(), "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)), "ON": lambda self: ( ( self._match(TokenType.UPDATE) and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function())) ) or self.expression(exp.OnProperty(this=self._parse_id_var())) ), "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())), "PERIOD": lambda self: self._parse_period_for_system_time(), "PRIMARY KEY": lambda self: self._parse_primary_key(), "REFERENCES": lambda self: self._parse_references(match=False), "TITLE": lambda self: self.expression( exp.TitleColumnConstraint(this=self._parse_var_or_string()) ), "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])), "UNIQUE": lambda self: self._parse_unique(), "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()), "WITH": lambda self: self.expression( exp.Properties(expressions=self._parse_wrapped_properties()) ), "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), } def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expr]: if not self._match(TokenType.L_PAREN, advance=False): # Partitioning by bucket or truncate follows the syntax: # PARTITION BY (BUCKET(..) | TRUNCATE(..)) # If we don't have parenthesis after each keyword, we should instead parse this as an identifier self._retreat(self._index - 1) return None klass = ( exp.PartitionedByBucket if self._prev.text.upper() == "BUCKET" else exp.PartitionByTruncate ) args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) this, expression = seq_get(args, 0), seq_get(args, 1) if isinstance(this, exp.Literal): # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order # - For Hive, it's `bucket(, )` or `truncate(, )` # - For Trino, it's reversed - `bucket(, )` or `truncate(, )` # Both variants are canonicalized in the latter i.e `bucket(, )` # # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties this, expression = expression, this return self.expression(klass(this=this, expression=expression)) ALTER_PARSERS: t.ClassVar = { "ADD": lambda self: self._parse_alter_table_add(), "AS": lambda self: self._parse_select(), "ALTER": lambda self: self._parse_alter_table_alter(), "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())), "DROP": lambda self: self._parse_alter_table_drop(), "RENAME": lambda self: self._parse_alter_table_rename(), "SET": lambda self: self._parse_alter_table_set(), "SWAP": lambda self: self.expression( exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True)) ), } ALTER_ALTER_PARSERS: t.ClassVar = { "DISTKEY": lambda self: self._parse_alter_diststyle(), "DISTSTYLE": lambda self: self._parse_alter_diststyle(), "SORTKEY": lambda self: self._parse_alter_sortkey(), "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), } SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = { "CHECK", "EXCLUDE", "FOREIGN KEY", "LIKE", "PERIOD", "PRIMARY KEY", "UNIQUE", "BUCKET", "TRUNCATE", } NO_PAREN_FUNCTION_PARSERS: t.ClassVar = { "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())), "CASE": lambda self: self._parse_case(), "CONNECT_BY_ROOT": lambda self: self.expression( exp.ConnectByRoot(this=self._parse_column()) ), "IF": lambda self: self._parse_if(), } INVALID_FUNC_NAME_TOKENS: t.ClassVar = { TokenType.IDENTIFIER, TokenType.STRING, } FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"} KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) FUNCTION_PARSERS: t.ClassVar[t.Dict[str, t.Callable]] = { **{ name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() }, **{ name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() }, "CAST": lambda self: self._parse_cast(self.STRICT_CAST), "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), "CHAR": lambda self: self._parse_char(), "CHR": lambda self: self._parse_char(), "DECODE": lambda self: self._parse_decode(), "EXTRACT": lambda self: self._parse_extract(), "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), "GAP_FILL": lambda self: self._parse_gap_fill(), "INITCAP": lambda self: self._parse_initcap(), "JSON_OBJECT": lambda self: self._parse_json_object(), "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), "JSON_TABLE": lambda self: self._parse_json_table(), "MATCH": lambda self: self._parse_match_against(), "NORMALIZE": lambda self: self._parse_normalize(), "OPENJSON": lambda self: self._parse_open_json(), "OVERLAY": lambda self: self._parse_overlay(), "POSITION": lambda self: self._parse_position(), "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), "STRING_AGG": lambda self: self._parse_string_agg(), "SUBSTRING": lambda self: self._parse_substring(), "TRIM": lambda self: self._parse_trim(), "TRY_CAST": lambda self: self._parse_cast(False, safe=True), "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), "XMLELEMENT": lambda self: self._parse_xml_element(), "XMLTABLE": lambda self: self._parse_xml_table(), } QUERY_MODIFIER_PARSERS: t.ClassVar = { TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), TokenType.WHERE: lambda self: ("where", self._parse_where()), TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), TokenType.HAVING: lambda self: ("having", self._parse_having()), TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), TokenType.FETCH: lambda self: ("limit", self._parse_limit()), TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), TokenType.FOR: lambda self: ("locks", self._parse_locks()), TokenType.LOCK: lambda self: ("locks", self._parse_locks()), TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), TokenType.CLUSTER_BY: lambda self: ( "cluster", self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), ), TokenType.DISTRIBUTE_BY: lambda self: ( "distribute", self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), ), TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), } QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS) SET_PARSERS: t.ClassVar = { "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), "TRANSACTION": lambda self: self._parse_set_transaction(), } SHOW_PARSERS: t.ClassVar[t.Dict[str, t.Callable]] = {} TYPE_LITERAL_PARSERS: t.ClassVar = { exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)), } TYPE_CONVERTERS: t.ClassVar[t.Dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {} DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = { "ISOLATION": ( ("LEVEL", "REPEATABLE", "READ"), ("LEVEL", "READ", "COMMITTED"), ("LEVEL", "READ", "UNCOMITTED"), ("LEVEL", "SERIALIZABLE"), ), "READ": ("WRITE", "ONLY"), } CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = { **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()), "DO": ("NOTHING", "UPDATE"), } TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = { "INSTEAD": (("OF",),), "BEFORE": tuple(), "AFTER": tuple(), } TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = { "NOT": (("DEFERRABLE",),), "DEFERRABLE": tuple(), } CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = { "SCALE": ("EXTEND", "NOEXTEND"), "SHARD": ("EXTEND", "NOEXTEND"), "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), **dict.fromkeys( ( "SESSION", "GLOBAL", "KEEP", "NOKEEP", "ORDER", "NOORDER", "NOCACHE", "CYCLE", "NOCYCLE", "NOMINVALUE", "NOMAXVALUE", "NOSCALE", "NOSHARD", ), tuple(), ), } ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")} USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() ) CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { "TYPE": ("EVOLUTION",), **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), } PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {} EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( ("CALLER", "SELF", "OWNER"), tuple() ) KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { "NOT": ("ENFORCED",), "MATCH": ( "FULL", "PARTIAL", "SIMPLE", ), "INITIALLY": ("DEFERRED", "IMMEDIATE"), "USING": ( "BTREE", "HASH", ), **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), } WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { "NO": ("OTHERS",), "CURRENT": ("ROW",), **dict.fromkeys(("GROUP", "TIES"), tuple()), } INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"} HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"} HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"} OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN} TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER} WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"} JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS} FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} ADD_CONSTRAINT_TOKENS: t.ClassVar = { TokenType.CONSTRAINT, TokenType.FOREIGN_KEY, TokenType.INDEX, TokenType.KEY, TokenType.PRIMARY_KEY, TokenType.UNIQUE, } DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT} UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} COPY_INTO_VARLEN_OPTIONS: t.ClassVar = { "FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL", } IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} ODBC_DATETIME_LITERALS: t.ClassVar[t.Dict[str, t.Type[exp.Expr]]] = {} ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} # The style options for the DESCRIBE statement DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"} # The style options for the ANALYZE statement ANALYZE_STYLES: t.ClassVar = { "BUFFER_USAGE_LIMIT", "FULL", "LOCAL", "NO_WRITE_TO_BINLOG", "SAMPLE", "SKIP_LOCKED", "VERBOSE", } ANALYZE_EXPRESSION_PARSERS: t.ClassVar = { "ALL": lambda self: self._parse_analyze_columns(), "COMPUTE": lambda self: self._parse_analyze_statistics(), "DELETE": lambda self: self._parse_analyze_delete(), "DROP": lambda self: self._parse_analyze_histogram(), "ESTIMATE": lambda self: self._parse_analyze_statistics(), "LIST": lambda self: self._parse_analyze_list(), "PREDICATE": lambda self: self._parse_analyze_columns(), "UPDATE": lambda self: self._parse_analyze_histogram(), "VALIDATE": lambda self: self._parse_analyze_validate(), } PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"} AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET) OPERATION_MODIFIERS: t.ClassVar[t.Set[str]] = set() RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"} SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"} MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) STRICT_CAST: t.ClassVar = True PREFIXED_PIVOT_COLUMNS: t.ClassVar = False IDENTIFY_PIVOT_STRINGS: t.ClassVar = False LOG_DEFAULTS_TO_LN: t.ClassVar = False # Whether the table sample clause expects CSV syntax TABLESAMPLE_CSV: t.ClassVar = False # The default method used for table sampling DEFAULT_SAMPLING_METHOD: t.ClassVar[t.Optional[str]] = None # Whether the SET command needs a delimiter (e.g. "=") for assignments SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True # Whether the TRIM function expects the characters to trim as its first argument TRIM_PATTERN_FIRST: t.ClassVar = False # Whether string aliases are supported `SELECT COUNT(*) 'count'` STRING_ALIASES: t.ClassVar = False # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"} # Whether to parse IF statements that aren't followed by a left parenthesis as commands NO_PAREN_IF_COMMANDS: t.ClassVar = True # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False # Whether the `:` operator is used to extract a value from a VARIANT column COLON_IS_VARIANT_EXTRACT: t.ClassVar = False # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. # If this is True and '(' is not found, the keyword will be treated as an identifier VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS INTERVAL_SPANS: t.ClassVar = True # Whether a PARTITION clause can follow a table reference SUPPORTS_PARTITION_SELECTION: t.ClassVar = False # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True # Whether the 'AS' keyword is optional in the CTE definition syntax OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True # Whether Alter statements are allowed to contain Partition specifications ALTER_TABLE_PARTITIONS: t.ClassVar = False # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such # as BigQuery, where all joins have the same precedence. JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False # Whether TIMESTAMP can produce a zone-aware timestamp ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False # Whether map literals support arbitrary expressions as keys. # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). # When False, keys are typically restricted to identifiers. MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this # is true for Snowflake but not for BigQuery which can also process strings JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False # Dialects like Databricks support JOINS without join criteria # Adding an ON TRUE, makes transpilation semantically correct for other dialects ADD_JOIN_ON_TRUE: t.ClassVar = False # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False SHOW_TRIE: t.ClassVar[t.Dict] = new_trie(key.split(" ") for key in SHOW_PARSERS) SET_TRIE: t.ClassVar[t.Dict] = new_trie(key.split(" ") for key in SET_PARSERS) def __init__( self, error_level: t.Optional[ErrorLevel] = None, error_message_context: int = 100, max_errors: int = 3, dialect: DialectType = None, ): self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE self.error_message_context: int = error_message_context self.max_errors: int = max_errors self.dialect: t.Any = _resolve_dialect(dialect) self.sql: str = "" self.errors: t.List[ParseError] = [] self._tokens: t.List[Token] = [] self._tokens_size: i64 = 0 self._index: i64 = 0 self._curr: Token = SENTINEL_NONE self._next: Token = SENTINEL_NONE self._prev: Token = SENTINEL_NONE self._prev_comments: t.List[str] = [] self._pipe_cte_counter: int = 0 self._chunks: t.List[t.List[Token]] = [] self._chunk_index: i64 = 0 def reset(self) -> None: self.sql = "" self.errors = [] self._tokens = [] self._tokens_size = 0 self._index = 0 self._curr = SENTINEL_NONE self._next = SENTINEL_NONE self._prev = SENTINEL_NONE self._prev_comments = [] self._pipe_cte_counter = 0 self._chunks = [] self._chunk_index = 0 def _advance(self, times: i64 = 1) -> None: index = self._index + times self._index = index tokens = self._tokens size = self._tokens_size self._curr = tokens[index] if index < size else SENTINEL_NONE self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE if index > 0: prev = tokens[index - 1] self._prev = prev self._prev_comments = prev.comments else: self._prev = SENTINEL_NONE self._prev_comments = [] def _advance_chunk(self) -> None: self._index = -1 self._tokens = self._chunks[self._chunk_index] self._tokens_size = i64(len(self._tokens)) self._chunk_index += 1 self._advance() def _retreat(self, index: i64) -> None: if index != self._index: self._advance(index - self._index) def _add_comments(self, expression: t.Optional[exp.Expr]) -> None: if expression and self._prev_comments: expression.add_comments(self._prev_comments) self._prev_comments = [] def _match( self, token_type: TokenType, advance: bool = True, expression: t.Optional[exp.Expr] = None ) -> bool: if self._curr.token_type == token_type: if advance: self._advance() self._add_comments(expression) return True return False def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool: if self._curr.token_type in types: if advance: self._advance() return True return False def _match_pair( self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True ) -> bool: if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: if advance: self._advance(2) return True return False def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool: if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts: if advance: self._advance() return True return False def _match_text_seq(self, *texts: str, advance: bool = True) -> bool: index = self._index string_type = TokenType.STRING for text in texts: if self._curr.token_type != string_type and self._curr.text.upper() == text: self._advance() else: self._retreat(index) return False if not advance: self._retreat(index) return True def _is_connected(self) -> bool: prev = self._prev curr = self._curr return bool(prev and curr and prev.end + 1 == curr.start) def _find_sql(self, start: Token, end: Token) -> str: return self.sql[start.start : end.end + 1] def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: token = token or self._curr or self._prev or Token.string("") formatted_sql, start_context, highlight, end_context = highlight_sql( sql=self.sql, positions=[(token.start, token.end)], context_length=self.error_message_context, ) formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" error = ParseError.new( formatted_message, description=message, line=token.line, col=token.col, start_context=start_context, highlight=highlight, end_context=end_context, ) if self.error_level == ErrorLevel.IMMEDIATE: raise error self.errors.append(error) def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: if self.error_level != ErrorLevel.IGNORE: for error_message in expression.error_messages(args): self.raise_error(error_message) return expression def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: index = self._index error_level = self.error_level this: t.Optional[T] = None self.error_level = ErrorLevel.IMMEDIATE try: this = parse_method() except ParseError: this = None finally: if not this or retreat: self._retreat(index) self.error_level = error_level return this def parse(self, raw_tokens: t.List[Token], sql: str) -> t.List[t.Optional[exp.Expr]]: """ Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement. Args: raw_tokens: The list of tokens. sql: The original SQL string. Returns: The list of the produced syntax trees. """ return self._parse( parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql ) def parse_into( self, expression_types: exp.IntoType, raw_tokens: t.List[Token], sql: t.Optional[str] = None, ) -> t.List[t.Optional[exp.Expr]]: """ Parses a list of tokens into a given Expr type. If a collection of Expr types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds. Args: expression_types: The expression type(s) to try and parse the token list into. raw_tokens: The list of tokens. sql: The original SQL string, used to produce helpful debug messages. Returns: The target Expr. """ errors = [] for expression_type in ensure_list(expression_types): parser = self.EXPRESSION_PARSERS.get(t.cast(t.Type[exp.Expr], expression_type)) if not parser: raise TypeError(f"No parser registered for {expression_type}") try: return self._parse(parser, raw_tokens, sql) except ParseError as e: e.errors[0]["into_expression"] = expression_type errors.append(e) raise ParseError( f"Failed to parse '{sql or raw_tokens}' into {expression_types}", errors=merge_errors(errors), ) from errors[-1] def check_errors(self) -> None: """Logs or raises any found errors, depending on the chosen error level setting.""" if self.error_level == ErrorLevel.WARN: for error in self.errors: logger.error(str(error)) elif self.error_level == ErrorLevel.RAISE and self.errors: raise ParseError( concat_messages(self.errors, self.max_errors), errors=merge_errors(self.errors), ) def expression( self, instance: E, token: t.Optional[Token] = None, comments: t.Optional[t.List[str]] = None, ) -> E: if token: instance.update_positions(token) instance.add_comments(comments) if comments else self._add_comments(instance) if not instance.is_primitive: instance = self.validate_expression(instance) return instance def _parse_batch_statements( self, parse_method: t.Callable[[Parser], t.Optional[exp.Expr]], sep_first_statement: bool = True, ) -> t.List[t.Optional[exp.Expr]]: expressions = [] # Chunkification binds if/while statements with the first statement of the body if sep_first_statement: self._match(TokenType.BEGIN) expressions.append(parse_method(self)) chunks_length = len(self._chunks) while self._chunk_index < chunks_length: self._advance_chunk() if self._match(TokenType.ELSE, advance=False): return expressions if not self._next and self._match(TokenType.END): expressions.append(exp.EndStatement()) continue expressions.append(parse_method(self)) if self._index < self._tokens_size: self.raise_error("Invalid expression / Unexpected token") self.check_errors() return expressions def _parse( self, parse_method: t.Callable[[Parser], t.Optional[exp.Expr]], raw_tokens: t.List[Token], sql: t.Optional[str] = None, ) -> t.List[t.Optional[exp.Expr]]: self.reset() self.sql = sql or "" total = len(raw_tokens) chunks: t.List[t.List[Token]] = [[]] for i, token in enumerate(raw_tokens): if token.token_type == TokenType.SEMICOLON: if token.comments: chunks.append([token]) if i < total - 1: chunks.append([]) else: chunks[-1].append(token) self._chunks = chunks return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) def _warn_unsupported(self) -> None: if self._tokens_size <= 1: return # We use _find_sql because self.sql may comprise multiple chunks, and we're only # interested in emitting a warning for the one being currently processed. sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] logger.warning( f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." ) def _parse_command(self) -> exp.Command: self._warn_unsupported() comments = self._prev_comments return self.expression( exp.Command(this=self._prev.text.upper(), expression=self._parse_string()), comments=comments, ) def _parse_comment(self, allow_exists: bool = True) -> exp.Expr: start = self._prev exists = self._parse_exists() if allow_exists else None self._match(TokenType.ON) materialized = self._match_text_seq("MATERIALIZED") kind = self._match_set(self.CREATABLES) and self._prev if not kind: return self._parse_as_command(start) if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): this = self._parse_user_defined_function(kind=kind.token_type) elif kind.token_type == TokenType.TABLE: this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) elif kind.token_type == TokenType.COLUMN: this = self._parse_column() else: this = self._parse_id_var() self._match(TokenType.IS) return self.expression( exp.Comment( this=this, kind=kind.text, expression=self._parse_string(), exists=exists, materialized=materialized, ) ) def _parse_to_table( self, ) -> exp.ToTableProperty: table = self._parse_table_parts(schema=True) return self.expression(exp.ToTableProperty(this=table)) # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl def _parse_ttl(self) -> exp.Expr: def _parse_ttl_action() -> t.Optional[exp.Expr]: this = self._parse_bitwise() if self._match_text_seq("DELETE"): return self.expression(exp.MergeTreeTTLAction(this=this, delete=True)) if self._match_text_seq("RECOMPRESS"): return self.expression( exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise()) ) if self._match_text_seq("TO", "DISK"): return self.expression( exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string()) ) if self._match_text_seq("TO", "VOLUME"): return self.expression( exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string()) ) return this expressions = self._parse_csv(_parse_ttl_action) where = self._parse_where() group = self._parse_group() aggregates = None if group and self._match(TokenType.SET): aggregates = self._parse_csv(self._parse_set_item) return self.expression( exp.MergeTreeTTL( expressions=expressions, where=where, group=group, aggregates=aggregates ) ) def _parse_condition(self) -> t.Optional[exp.Expr]: return self._parse_wrapped(parse_method=self._parse_expression, optional=True) def _parse_block(self) -> exp.Block: return self.expression( exp.Block( expressions=self._parse_batch_statements( parse_method=lambda self: self._parse_statement() ) ) ) def _parse_whileblock(self) -> exp.WhileBlock: return self.expression( exp.WhileBlock(this=self._parse_condition(), body=self._parse_block()) ) def _parse_statement(self) -> t.Optional[exp.Expr]: if not self._curr: return None if self._match_set(self.STATEMENT_PARSERS): comments = self._prev_comments stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) stmt.add_comments(comments, prepend=True) return stmt if self._match_set(self.dialect.tokenizer_class.COMMANDS): return self._parse_command() if self._match_text_seq("WHILE"): return self._parse_whileblock() expression = self._parse_expression() expression = self._parse_set_operations(expression) if expression else self._parse_select() return self._parse_query_modifiers(expression) def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: start = self._prev temporary = self._match(TokenType.TEMPORARY) materialized = self._match_text_seq("MATERIALIZED") kind = self._match_set(self.CREATABLES) and self._prev.text.upper() if not kind: return self._parse_as_command(start) concurrently = self._match_text_seq("CONCURRENTLY") if_exists = exists or self._parse_exists() if kind == "COLUMN": this = self._parse_column() else: this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA") cluster = self._parse_on_property() if self._match(TokenType.ON) else None if self._match(TokenType.L_PAREN, advance=False): expressions = self._parse_wrapped_csv(self._parse_types) else: expressions = None return self.expression( exp.Drop( exists=if_exists, this=this, expressions=expressions, kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, temporary=temporary, materialized=materialized, cascade=self._match_text_seq("CASCADE"), constraints=self._match_text_seq("CONSTRAINTS"), purge=self._match_text_seq("PURGE"), cluster=cluster, concurrently=concurrently, sync=self._match_text_seq("SYNC"), ) ) def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: return ( self._match_text_seq("IF") and (not not_ or self._match(TokenType.NOT)) and self._match(TokenType.EXISTS) ) def _parse_create(self) -> exp.Create | exp.Command: # Note: this can't be None because we've matched a statement parser start = self._prev replace = ( start.token_type == TokenType.REPLACE or self._match_pair(TokenType.OR, TokenType.REPLACE) or self._match_pair(TokenType.OR, TokenType.ALTER) ) refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) unique = self._match(TokenType.UNIQUE) if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): clustered = True elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( "COLUMNSTORE" ): clustered = False else: clustered = None if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): self._advance() properties = None create_token = self._match_set(self.CREATABLES) and self._prev if not create_token: # exp.Properties.Location.POST_CREATE properties = self._parse_properties() create_token = self._match_set(self.CREATABLES) and self._prev if not properties or not create_token: return self._parse_as_command(start) create_token_type = t.cast(Token, create_token).token_type concurrently = self._match_text_seq("CONCURRENTLY") exists = self._parse_exists(not_=True) this = None expression: t.Optional[exp.Expr] = None indexes = None no_schema_binding = None begin = None clone = None def extend_props(temp_props: t.Optional[exp.Properties]) -> None: nonlocal properties if properties and temp_props: properties.expressions.extend(temp_props.expressions) elif temp_props: properties = temp_props if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): this = self._parse_user_defined_function(kind=create_token_type) # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) extend_props(self._parse_properties()) expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None extend_props(self._parse_properties()) if not expression: if self._match(TokenType.COMMAND): expression = self._parse_as_command(self._prev) else: begin = self._match(TokenType.BEGIN) return_ = self._match_text_seq("RETURN") if self._match(TokenType.STRING, advance=False): # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement expression = self._parse_string() extend_props(self._parse_properties()) else: expression = ( self._parse_user_defined_function_expression() if create_token_type == TokenType.FUNCTION else self._parse_block() ) if return_: expression = self.expression(exp.Return(this=expression)) elif create_token_type == TokenType.INDEX: # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) if not self._match(TokenType.ON): index = self._parse_id_var() anonymous = False else: index = None anonymous = True this = self._parse_index(index=index, anonymous=anonymous) elif ( create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) ) or create_token_type == TokenType.TRIGGER: if is_constraint := (create_token_type == TokenType.CONSTRAINT): create_token = self._prev trigger_name = self._parse_id_var() if not trigger_name: return self._parse_as_command(start) timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) timing = timing_var.this if timing_var else None if not timing: return self._parse_as_command(start) events = self._parse_trigger_events() if not self._match(TokenType.ON): self.raise_error("Expected ON in trigger definition") table = self._parse_table_parts() referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None deferrable, initially = self._parse_trigger_deferrable() referencing = self._parse_trigger_referencing() for_each = self._parse_trigger_for_each() when = self._match_text_seq("WHEN") and self._parse_wrapped( self._parse_disjunction, optional=True ) execute = self._parse_trigger_execute() if execute is None: return self._parse_as_command(start) trigger_props = self.expression( exp.TriggerProperties( table=table, timing=timing, events=events, execute=execute, constraint=is_constraint, referenced_table=referenced_table, deferrable=deferrable, initially=initially, referencing=referencing, for_each=for_each, when=when, ) ) this = trigger_name extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) elif create_token_type in self.DB_CREATABLES: table_parts = self._parse_table_parts( schema=True, is_db_reference=create_token_type == TokenType.SCHEMA ) # exp.Properties.Location.POST_NAME self._match(TokenType.COMMA) extend_props(self._parse_properties(before=True)) this = self._parse_schema(this=table_parts) # exp.Properties.Location.POST_SCHEMA and POST_WITH extend_props(self._parse_properties()) has_alias = self._match(TokenType.ALIAS) if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): # exp.Properties.Location.POST_ALIAS extend_props(self._parse_properties()) if create_token_type == TokenType.SEQUENCE: expression = self._parse_types() props = self._parse_properties() if props: sequence_props = exp.SequenceProperties() options = [] for prop in props: if isinstance(prop, exp.SequenceProperties): for arg, value in prop.args.items(): if arg == "options": options.extend(value) else: sequence_props.set(arg, value) prop.pop() if options: sequence_props.set("options", options) props.append("expressions", sequence_props) extend_props(props) else: expression = self._parse_ddl_select() # Some dialects also support using a table as an alias instead of a SELECT. # Here we fallback to this as an alternative. if not expression and has_alias: expression = self._try_parse(self._parse_table_parts) if create_token_type == TokenType.TABLE: # exp.Properties.Location.POST_EXPRESSION extend_props(self._parse_properties()) indexes = [] while True: index = self._parse_index() # exp.Properties.Location.POST_INDEX extend_props(self._parse_properties()) if not index: break else: self._match(TokenType.COMMA) indexes.append(index) elif create_token_type == TokenType.VIEW: if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): no_schema_binding = True elif create_token_type in (TokenType.SINK, TokenType.SOURCE): extend_props(self._parse_properties()) shallow = self._match_text_seq("SHALLOW") if self._match_texts(self.CLONE_KEYWORDS): copy = self._prev.text.lower() == "copy" clone = self.expression( exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy) ) if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): return self._parse_as_command(start) create_kind_text = create_token.text.upper() return self.expression( exp.Create( this=this, kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, replace=replace, refresh=refresh, unique=unique, expression=expression, exists=exists, properties=properties, indexes=indexes, no_schema_binding=no_schema_binding, begin=begin, clone=clone, concurrently=concurrently, clustered=clustered, ) ) def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: seq = exp.SequenceProperties() options = [] index = self._index while self._curr: self._match(TokenType.COMMA) if self._match_text_seq("INCREMENT"): self._match_text_seq("BY") self._match_text_seq("=") seq.set("increment", self._parse_term()) elif self._match_text_seq("MINVALUE"): seq.set("minvalue", self._parse_term()) elif self._match_text_seq("MAXVALUE"): seq.set("maxvalue", self._parse_term()) elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): self._match_text_seq("=") seq.set("start", self._parse_term()) elif self._match_text_seq("CACHE"): # T-SQL allows empty CACHE which is initialized dynamically seq.set("cache", self._parse_number() or True) elif self._match_text_seq("OWNED", "BY"): # "OWNED BY NONE" is the default seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) else: opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) if opt: options.append(opt) else: break seq.set("options", options if options else None) return None if self._index == index else seq def _parse_trigger_events(self) -> t.List[exp.TriggerEvent]: events = [] while True: event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() if not event_type: self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") columns = ( self._parse_csv(self._parse_column) if event_type == "UPDATE" and self._match_text_seq("OF") else None ) events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns))) if not self._match(TokenType.OR): break return events def _parse_trigger_deferrable( self, ) -> t.Tuple[t.Optional[str], t.Optional[str]]: deferrable_var = self._parse_var_from_options( self.TRIGGER_DEFERRABLE, raise_unmatched=False ) deferrable = deferrable_var.this if deferrable_var else None initially = None if deferrable and self._match_text_seq("INITIALLY"): initially = ( self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None ) return deferrable, initially def _parse_trigger_referencing_clause(self, keyword: str) -> t.Optional[exp.Expr]: if not self._match_text_seq(keyword): return None if not self._match_text_seq("TABLE"): self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") self._match_text_seq("AS") return self._parse_id_var() def _parse_trigger_referencing(self) -> t.Optional[exp.TriggerReferencing]: if not self._match_text_seq("REFERENCING"): return None old_alias = None new_alias = None while True: if alias := self._parse_trigger_referencing_clause("OLD"): if old_alias is not None: self.raise_error("Duplicate OLD clause in REFERENCING") old_alias = alias elif alias := self._parse_trigger_referencing_clause("NEW"): if new_alias is not None: self.raise_error("Duplicate NEW clause in REFERENCING") new_alias = alias else: break if old_alias is None and new_alias is None: self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias)) def _parse_trigger_for_each(self) -> t.Optional[str]: if not self._match_text_seq("FOR", "EACH"): return None return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None def _parse_trigger_execute(self) -> t.Optional[exp.TriggerExecute]: if not self._match(TokenType.EXECUTE): return None if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") func_call = self._parse_function(anonymous=True, optional_parens=False) return self.expression(exp.TriggerExecute(this=func_call)) def _parse_property_before(self) -> exp.Expr | t.List[exp.Expr] | None: # only used for teradata currently self._match(TokenType.COMMA) kwargs = { "no": self._match_text_seq("NO"), "dual": self._match_text_seq("DUAL"), "before": self._match_text_seq("BEFORE"), "default": self._match_text_seq("DEFAULT"), "local": (self._match_text_seq("LOCAL") and "LOCAL") or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), "after": self._match_text_seq("AFTER"), "minimum": self._match_texts(("MIN", "MINIMUM")), "maximum": self._match_texts(("MAX", "MAXIMUM")), } if self._match_texts(self.PROPERTY_PARSERS): parser = self.PROPERTY_PARSERS[self._prev.text.upper()] try: return parser(self, **{k: v for k, v in kwargs.items() if v}) except TypeError: self.raise_error(f"Cannot parse property '{self._prev.text}'") return None def _parse_wrapped_properties(self) -> t.List[exp.Expr | t.List[exp.Expr]]: return self._parse_wrapped_csv(self._parse_property) def _parse_property(self) -> exp.Expr | t.List[exp.Expr] | None: if self._match_texts(self.PROPERTY_PARSERS): return self.PROPERTY_PARSERS[self._prev.text.upper()](self) if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) if self._match_text_seq("COMPOUND", "SORTKEY"): return self._parse_sortkey(compound=True) if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"): return self.expression(exp.ParameterStyleProperty(this="PANDAS")) index = self._index seq_props = self._parse_sequence_properties() if seq_props: return seq_props self._retreat(index) key = self._parse_column() if not self._match(TokenType.EQ): self._retreat(index) return None # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise if isinstance(key, exp.Column): key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) value = self._parse_bitwise() or self._parse_var(any_token=True) # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) if isinstance(value, exp.Column): value = exp.var(value.name) return self.expression(exp.Property(this=key, value=value)) def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: if self._match_text_seq("BY"): return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string())) self._match(TokenType.ALIAS) input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None return self.expression( exp.FileFormatProperty( this=( self.expression( exp.InputOutputFormat( input_format=input_format, output_format=output_format ) ) if input_format or output_format else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() ), hive_format=True, ) ) def _parse_unquoted_field(self) -> t.Optional[exp.Expr]: field = self._parse_field() if isinstance(field, exp.Identifier) and not field.quoted: field = exp.var(field) return field def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: self._match(TokenType.EQ) self._match(TokenType.ALIAS) return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs)) def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: properties = [] while True: if before: prop = self._parse_property_before() else: prop = self._parse_property() if not prop: break for p in ensure_list(prop): properties.append(p) if properties: return self.expression(exp.Properties(expressions=properties)) return None def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: return self.expression( exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION")) ) def _parse_sql_security(self) -> exp.SqlSecurityProperty: return self.expression( exp.SqlSecurityProperty( this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper() ) ) def _parse_settings_property(self) -> exp.SettingsProperty: return self.expression( exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment)) ) def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: if self._index >= 2: pre_volatile_token = self._tokens[self._index - 2] else: pre_volatile_token = None if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: return exp.VolatileProperty() return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE"))) def _parse_retention_period(self) -> exp.Var: # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | DAY | DAYS | MONTH ...} number = self._parse_number() number_str = f"{number} " if number else "" unit = self._parse_var(any_token=True) return exp.var(f"{number_str}{unit}") def _parse_system_versioning_property( self, with_: bool = False ) -> exp.WithSystemVersioningProperty: self._match(TokenType.EQ) prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_)) if self._match_text_seq("OFF"): prop.set("on", False) return prop self._match(TokenType.ON) if self._match(TokenType.L_PAREN): while self._curr and not self._match(TokenType.R_PAREN): if self._match_text_seq("HISTORY_TABLE", "="): prop.set("this", self._parse_table_parts()) elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): prop.set("retention_period", self._parse_retention_period()) self._match(TokenType.COMMA) return prop def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: self._match(TokenType.EQ) on = self._match_text_seq("ON") or not self._match_text_seq("OFF") prop = self.expression(exp.DataDeletionProperty(on=on)) if self._match(TokenType.L_PAREN): while self._curr and not self._match(TokenType.R_PAREN): if self._match_text_seq("FILTER_COLUMN", "="): prop.set("filter_column", self._parse_column()) elif self._match_text_seq("RETENTION_PERIOD", "="): prop.set("retention_period", self._parse_retention_period()) self._match(TokenType.COMMA) return prop def _parse_distributed_property(self) -> exp.DistributedByProperty: kind = "HASH" expressions: t.Optional[t.List[exp.Expr]] = None if self._match_text_seq("BY", "HASH"): expressions = self._parse_wrapped_csv(self._parse_id_var) elif self._match_text_seq("BY", "RANDOM"): kind = "RANDOM" # If the BUCKETS keyword is not present, the number of buckets is AUTO buckets: t.Optional[exp.Expr] = None if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): buckets = self._parse_number() return self.expression( exp.DistributedByProperty( expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order() ) ) def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: self._match_text_seq("KEY") expressions = self._parse_wrapped_id_vars() return self.expression(expr_type(expressions=expressions)) def _parse_with_property(self) -> t.Optional[exp.Expr] | t.List[exp.Expr]: if self._match_text_seq("(", "SYSTEM_VERSIONING"): prop = self._parse_system_versioning_property(with_=True) self._match_r_paren() return prop if self._match(TokenType.L_PAREN, advance=False): result: t.List[exp.Expr] = [] for i in self._parse_wrapped_properties(): result.extend(i) if isinstance(i, list) else result.append(i) return result if self._match_text_seq("JOURNAL"): return self._parse_withjournaltable() if self._match_texts(self.VIEW_ATTRIBUTES): return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper())) if self._match_text_seq("DATA"): return self._parse_withdata(no=False) elif self._match_text_seq("NO", "DATA"): return self._parse_withdata(no=True) if self._match(TokenType.SERDE_PROPERTIES, advance=False): return self._parse_serde_properties(with_=True) if self._match(TokenType.SCHEMA): return self.expression( exp.WithSchemaBindingProperty( this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS) ) ) if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): return self.expression( exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option)) ) if not self._next: return None return self._parse_withisolatedloading() def _parse_procedure_option(self) -> exp.Expr | None: if self._match_text_seq("EXECUTE", "AS"): return self.expression( exp.ExecuteAsProperty( this=self._parse_var_from_options( self.EXECUTE_AS_OPTIONS, raise_unmatched=False ) or self._parse_string() ) ) return self._parse_var_from_options(self.PROCEDURE_OPTIONS) # https://dev.mysql.com/doc/refman/8.0/en/create-view.html def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: self._match(TokenType.EQ) user = self._parse_id_var() self._match(TokenType.PARAMETER) host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) if not user or not host: return None return exp.DefinerProperty(this=f"{user}@{host}") def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: self._match(TokenType.TABLE) self._match(TokenType.EQ) return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts())) def _parse_log(self, no: bool = False) -> exp.LogProperty: return self.expression(exp.LogProperty(no=no)) def _parse_journal(self, **kwargs) -> exp.JournalProperty: return self.expression(exp.JournalProperty(**kwargs)) def _parse_checksum(self) -> exp.ChecksumProperty: self._match(TokenType.EQ) on = None if self._match(TokenType.ON): on = True elif self._match_text_seq("OFF"): on = False return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT))) def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: return self.expression( exp.Cluster( expressions=( self._parse_wrapped_csv(self._parse_ordered) if wrapped else self._parse_csv(self._parse_ordered) ) ) ) def _parse_clustered_by(self) -> exp.ClusteredByProperty: self._match_text_seq("BY") self._match_l_paren() expressions = self._parse_csv(self._parse_column) self._match_r_paren() if self._match_text_seq("SORTED", "BY"): self._match_l_paren() sorted_by = self._parse_csv(self._parse_ordered) self._match_r_paren() else: sorted_by = None self._match(TokenType.INTO) buckets = self._parse_number() self._match_text_seq("BUCKETS") return self.expression( exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets) ) def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: if not self._match_text_seq("GRANTS"): self._retreat(self._index - 1) return None return self.expression(exp.CopyGrantsProperty()) def _parse_freespace(self) -> exp.FreespaceProperty: self._match(TokenType.EQ) return self.expression( exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT)) ) def _parse_mergeblockratio( self, no: bool = False, default: bool = False ) -> exp.MergeBlockRatioProperty: if self._match(TokenType.EQ): return self.expression( exp.MergeBlockRatioProperty( this=self._parse_number(), percent=self._match(TokenType.PERCENT) ) ) return self.expression(exp.MergeBlockRatioProperty(no=no, default=default)) def _parse_datablocksize( self, default: t.Optional[bool] = None, minimum: t.Optional[bool] = None, maximum: t.Optional[bool] = None, ) -> exp.DataBlocksizeProperty: self._match(TokenType.EQ) size = self._parse_number() units = None if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): units = self._prev.text return self.expression( exp.DataBlocksizeProperty( size=size, units=units, default=default, minimum=minimum, maximum=maximum ) ) def _parse_blockcompression(self) -> exp.BlockCompressionProperty: self._match(TokenType.EQ) always = self._match_text_seq("ALWAYS") manual = self._match_text_seq("MANUAL") never = self._match_text_seq("NEVER") default = self._match_text_seq("DEFAULT") autotemp = None if self._match_text_seq("AUTOTEMP"): autotemp = self._parse_schema() return self.expression( exp.BlockCompressionProperty( always=always, manual=manual, never=never, default=default, autotemp=autotemp ) ) def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: index = self._index no = self._match_text_seq("NO") concurrent = self._match_text_seq("CONCURRENT") if not self._match_text_seq("ISOLATED", "LOADING"): self._retreat(index) return None target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) return self.expression( exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target) ) def _parse_locking(self) -> exp.LockingProperty: if self._match(TokenType.TABLE): kind = "TABLE" elif self._match(TokenType.VIEW): kind = "VIEW" elif self._match(TokenType.ROW): kind = "ROW" elif self._match_text_seq("DATABASE"): kind = "DATABASE" else: kind = None if kind in ("DATABASE", "TABLE", "VIEW"): this = self._parse_table_parts() else: this = None if self._match(TokenType.FOR): for_or_in = "FOR" elif self._match(TokenType.IN): for_or_in = "IN" else: for_or_in = None if self._match_text_seq("ACCESS"): lock_type = "ACCESS" elif self._match_texts(("EXCL", "EXCLUSIVE")): lock_type = "EXCLUSIVE" elif self._match_text_seq("SHARE"): lock_type = "SHARE" elif self._match_text_seq("READ"): lock_type = "READ" elif self._match_text_seq("WRITE"): lock_type = "WRITE" elif self._match_text_seq("CHECKSUM"): lock_type = "CHECKSUM" else: lock_type = None override = self._match_text_seq("OVERRIDE") return self.expression( exp.LockingProperty( this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override ) ) def _parse_partition_by(self) -> t.List[exp.Expr]: if self._match(TokenType.PARTITION_BY): return self._parse_csv(self._parse_disjunction) return [] def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: def _parse_partition_bound_expr() -> t.Optional[exp.Expr]: if self._match_text_seq("MINVALUE"): return exp.var("MINVALUE") if self._match_text_seq("MAXVALUE"): return exp.var("MAXVALUE") return self._parse_bitwise() this: t.Optional[exp.Expr | t.List[exp.Expr]] = None expression = None from_expressions = None to_expressions = None if self._match(TokenType.IN): this = self._parse_wrapped_csv(self._parse_bitwise) elif self._match(TokenType.FROM): from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) self._match_text_seq("TO") to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) elif self._match_text_seq("WITH", "(", "MODULUS"): this = self._parse_number() self._match_text_seq(",", "REMAINDER") expression = self._parse_number() self._match_r_paren() else: self.raise_error("Failed to parse partition bound spec.") return self.expression( exp.PartitionBoundSpec( this=this, expression=expression, from_expressions=from_expressions, to_expressions=to_expressions, ) ) # https://www.postgresql.org/docs/current/sql-createtable.html def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: if not self._match_text_seq("OF"): self._retreat(self._index - 1) return None this = self._parse_table(schema=True) if self._match(TokenType.DEFAULT): expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") elif self._match_text_seq("FOR", "VALUES"): expression = self._parse_partition_bound_spec() else: self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") return self.expression(exp.PartitionedOfProperty(this=this, expression=expression)) def _parse_partitioned_by(self) -> exp.PartitionedByProperty: self._match(TokenType.EQ) return self.expression( exp.PartitionedByProperty( this=self._parse_schema() or self._parse_bracket(self._parse_field()) ) ) def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: if self._match_text_seq("AND", "STATISTICS"): statistics = True elif self._match_text_seq("AND", "NO", "STATISTICS"): statistics = False else: statistics = None return self.expression(exp.WithDataProperty(no=no, statistics=statistics)) def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: if self._match_text_seq("SQL"): return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL")) return None def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: if self._match_text_seq("SQL", "DATA"): return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA")) return None def _parse_no_property(self) -> t.Optional[exp.Expr]: if self._match_text_seq("PRIMARY", "INDEX"): return exp.NoPrimaryIndexProperty() if self._match_text_seq("SQL"): return self.expression(exp.SqlReadWriteProperty(this="NO SQL")) return None def _parse_on_property(self) -> t.Optional[exp.Expr]: if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): return exp.OnCommitProperty() if self._match_text_seq("COMMIT", "DELETE", "ROWS"): return exp.OnCommitProperty(delete=True) return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var()))) def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: if self._match_text_seq("SQL", "DATA"): return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA")) return None def _parse_distkey(self) -> exp.DistKeyProperty: return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var))) def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: table = self._parse_table(schema=True) options = [] while self._match_texts(("INCLUDING", "EXCLUDING")): this = self._prev.text.upper() id_var = self._parse_id_var() if not id_var: return None options.append( self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper()))) ) return self.expression(exp.LikeProperty(this=table, expressions=options)) def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: return self.expression( exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound) ) def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: self._match(TokenType.EQ) return self.expression( exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default) ) def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: self._match_text_seq("WITH", "CONNECTION") return self.expression( exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts()) ) def _parse_returns(self) -> exp.ReturnsProperty: value: t.Optional[exp.Expr] null = None is_table = self._match(TokenType.TABLE) if is_table: if self._match(TokenType.LT): value = self.expression( exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types)) ) if not self._match(TokenType.GT): self.raise_error("Expecting >") else: value = self._parse_schema(exp.var("TABLE")) elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): null = True value = None else: value = self._parse_types() return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null)) def _parse_describe(self) -> exp.Describe: kind = self._prev.text if self._match_set(self.CREATABLES) else None style: t.Optional[str] = ( self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None ) if self._match(TokenType.DOT): style = None self._retreat(self._index - 2) format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None if self._match_set(self.STATEMENT_PARSERS, advance=False): this = self._parse_statement() else: this = self._parse_table(schema=True) properties = self._parse_properties() expressions = properties.expressions if properties else None partition = self._parse_partition() return self.expression( exp.Describe( this=this, style=style, kind=kind, expressions=expressions, partition=partition, format=format, as_json=self._match_text_seq("AS", "JSON"), ) ) def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: kind = self._prev.text.upper() expressions = [] def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: if self._match(TokenType.WHEN): expression = self._parse_disjunction() self._match(TokenType.THEN) else: expression = None else_ = self._match(TokenType.ELSE) if not self._match(TokenType.INTO): return None return self.expression( exp.ConditionalInsert( this=self.expression( exp.Insert( this=self._parse_table(schema=True), expression=self._parse_derived_table_values(), ) ), expression=expression, else_=else_, ) ) expression = parse_conditional_insert() while expression is not None: expressions.append(expression) expression = parse_conditional_insert() return self.expression( exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()), comments=comments, ) def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: comments = [] hint = self._parse_hint() overwrite = self._match(TokenType.OVERWRITE) ignore = self._match(TokenType.IGNORE) local = self._match_text_seq("LOCAL") alternative = None is_function = None if self._match_text_seq("DIRECTORY"): this: t.Optional[exp.Expr] = self.expression( exp.Directory( this=self._parse_var_or_string(), local=local, row_format=self._parse_row_format(match_row=True), ) ) else: if self._match_set((TokenType.FIRST, TokenType.ALL)): comments += ensure_list(self._prev_comments) return self._parse_multitable_inserts(comments) if self._match(TokenType.OR): alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text self._match(TokenType.INTO) comments += ensure_list(self._prev_comments) self._match(TokenType.TABLE) is_function = self._match(TokenType.FUNCTION) this = self._parse_function() if is_function else self._parse_insert_table() returning = self._parse_returning() # TSQL allows RETURNING before source return self.expression( exp.Insert( hint=hint, is_function=is_function, this=this, stored=self._match_text_seq("STORED") and self._parse_stored(), by_name=self._match_text_seq("BY", "NAME"), exists=self._parse_exists(), where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_disjunction(), partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), default=self._match_text_seq("DEFAULT", "VALUES"), expression=self._parse_derived_table_values() or self._parse_ddl_select(), conflict=self._parse_on_conflict(), returning=returning or self._parse_returning(), overwrite=overwrite, alternative=alternative, ignore=ignore, source=self._match(TokenType.TABLE) and self._parse_table(), ), comments=comments, ) def _parse_insert_table(self) -> t.Optional[exp.Expr]: this = self._parse_table(schema=True, parse_partition=True) if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): this.set("alias", self._parse_table_alias()) return this def _parse_kill(self) -> exp.Kill: kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None return self.expression(exp.Kill(this=self._parse_primary(), kind=kind)) def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: conflict = self._match_text_seq("ON", "CONFLICT") duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") if not conflict and not duplicate: return None conflict_keys = None constraint = None if conflict: if self._match_text_seq("ON", "CONSTRAINT"): constraint = self._parse_id_var() elif self._match(TokenType.L_PAREN): conflict_keys = self._parse_csv(self._parse_id_var) self._match_r_paren() index_predicate = self._parse_where() action = self._parse_var_from_options(self.CONFLICT_ACTIONS) if self._prev.token_type == TokenType.UPDATE: self._match(TokenType.SET) expressions = self._parse_csv(self._parse_equality) else: expressions = None return self.expression( exp.OnConflict( duplicate=duplicate, expressions=expressions, action=action, conflict_keys=conflict_keys, index_predicate=index_predicate, constraint=constraint, where=self._parse_where(), ) ) def _parse_returning(self) -> t.Optional[exp.Returning]: if not self._match(TokenType.RETURNING): return None return self.expression( exp.Returning( expressions=self._parse_csv(self._parse_expression), into=self._match(TokenType.INTO) and self._parse_table_part(), ) ) def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: if not self._match(TokenType.FORMAT): return None return self._parse_row_format() def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: index = self._index with_ = with_ or self._match_text_seq("WITH") if not self._match(TokenType.SERDE_PROPERTIES): self._retreat(index) return None return self.expression( exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_) ) def _parse_row_format( self, match_row: bool = False ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): return None if self._match_text_seq("SERDE"): this = self._parse_string() serde_properties = self._parse_serde_properties() return self.expression( exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties) ) self._match_text_seq("DELIMITED") kwargs = {} if self._match_text_seq("FIELDS", "TERMINATED", "BY"): kwargs["fields"] = self._parse_string() if self._match_text_seq("ESCAPED", "BY"): kwargs["escaped"] = self._parse_string() if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): kwargs["collection_items"] = self._parse_string() if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): kwargs["map_keys"] = self._parse_string() if self._match_text_seq("LINES", "TERMINATED", "BY"): kwargs["lines"] = self._parse_string() if self._match_text_seq("NULL", "DEFINED", "AS"): kwargs["null"] = self._parse_string() return self.expression(exp.RowFormatDelimitedProperty(**kwargs)) # type: ignore def _parse_load(self) -> exp.LoadData | exp.Command: if self._match_text_seq("DATA"): local = self._match_text_seq("LOCAL") self._match_text_seq("INPATH") inpath = self._parse_string() overwrite = self._match(TokenType.OVERWRITE) self._match_pair(TokenType.INTO, TokenType.TABLE) return self.expression( exp.LoadData( this=self._parse_table(schema=True), local=local, overwrite=overwrite, inpath=inpath, partition=self._parse_partition(), input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), serde=self._match_text_seq("SERDE") and self._parse_string(), ) ) return self._parse_as_command(self._prev) def _parse_delete(self) -> exp.Delete: # This handles MySQL's "Multiple-Table Syntax" # https://dev.mysql.com/doc/refman/8.0/en/delete.html tables = None if not self._match(TokenType.FROM, advance=False): tables = self._parse_csv(self._parse_table) or None returning = self._parse_returning() return self.expression( exp.Delete( tables=tables, this=self._match(TokenType.FROM) and self._parse_table(joins=True), using=self._match(TokenType.USING) and self._parse_csv(lambda: self._parse_table(joins=True)), cluster=self._match(TokenType.ON) and self._parse_on_property(), where=self._parse_where(), returning=returning or self._parse_returning(), order=self._parse_order(), limit=self._parse_limit(), ) ) def _parse_update(self) -> exp.Update: kwargs: t.Dict[str, object] = { "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), } while self._curr: if self._match(TokenType.SET): kwargs["expressions"] = self._parse_csv(self._parse_equality) elif self._match(TokenType.RETURNING, advance=False): kwargs["returning"] = self._parse_returning() elif self._match(TokenType.FROM, advance=False): from_ = self._parse_from(joins=True) table = from_.this if from_ else None if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): table.set("joins", list(self._parse_joins()) or None) kwargs["from_"] = from_ elif self._match(TokenType.WHERE, advance=False): kwargs["where"] = self._parse_where() elif self._match(TokenType.ORDER_BY, advance=False): kwargs["order"] = self._parse_order() elif self._match(TokenType.LIMIT, advance=False): kwargs["limit"] = self._parse_limit() else: break return self.expression(exp.Update(**kwargs)) def _parse_use(self) -> exp.Use: return self.expression( exp.Use( kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), this=self._parse_table(schema=False), ) ) def _parse_uncache(self) -> exp.Uncache: if not self._match(TokenType.TABLE): self.raise_error("Expecting TABLE after UNCACHE") return self.expression( exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True)) ) def _parse_cache(self) -> exp.Cache: lazy = self._match_text_seq("LAZY") self._match(TokenType.TABLE) table = self._parse_table(schema=True) options = [] if self._match_text_seq("OPTIONS"): self._match_l_paren() k = self._parse_string() self._match(TokenType.EQ) v = self._parse_string() options = [k, v] self._match_r_paren() self._match(TokenType.ALIAS) return self.expression( exp.Cache( this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True) ) ) def _parse_partition(self) -> t.Optional[exp.Partition]: if not self._match_texts(self.PARTITION_KEYWORDS): return None return self.expression( exp.Partition( subpartition=self._prev.text.upper() == "SUBPARTITION", expressions=self._parse_wrapped_csv(self._parse_disjunction), ) ) def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: def _parse_value_expression() -> t.Optional[exp.Expr]: if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): return exp.var(self._prev.text.upper()) return self._parse_expression() if self._match(TokenType.L_PAREN): expressions = self._parse_csv(_parse_value_expression) self._match_r_paren() return self.expression(exp.Tuple(expressions=expressions)) # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. expression = self._parse_expression() if expression: return self.expression(exp.Tuple(expressions=[expression])) return None def _parse_projections( self, ) -> t.Tuple[t.List[exp.Expr], t.Optional[t.List[exp.Expr]]]: return self._parse_expressions(), None def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expr]: if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): this: t.Optional[exp.Expr] = self._parse_simplified_pivot( is_unpivot=self._prev.token_type == TokenType.UNPIVOT ) elif self._match(TokenType.FROM): from_ = self._parse_from(skip_from_token=True, consume_pipe=True) # Support parentheses for duckdb FROM-first syntax select = self._parse_select(from_=from_) if select: if not select.args.get("from_"): select.set("from_", from_) this = select else: this = exp.select("*").from_(t.cast(exp.From, from_)) this = self._parse_query_modifiers(self._parse_set_operations(this)) else: this = ( self._parse_table(consume_pipe=True) if table else self._parse_select(nested=True, parse_set_operation=False) ) # Transform exp.Values into a exp.Table to pass through parse_query_modifiers # in case a modifier (e.g. join) is following if table and isinstance(this, exp.Values) and this.alias: alias = this.args["alias"].pop() this = exp.Table(this=this, alias=alias) this = self._parse_query_modifiers(self._parse_set_operations(this)) return this def _parse_select( self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True, parse_set_operation: bool = True, consume_pipe: bool = True, from_: t.Optional[exp.From] = None, ) -> t.Optional[exp.Expr]: query = self._parse_select_query( nested=nested, table=table, parse_subquery_alias=parse_subquery_alias, parse_set_operation=parse_set_operation, ) if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): if not query and from_: query = exp.select("*").from_(from_) if isinstance(query, exp.Query): query = self._parse_pipe_syntax_query(query) query = query.subquery(copy=False) if query and table else query return query def _parse_select_query( self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True, parse_set_operation: bool = True, ) -> t.Optional[exp.Expr]: cte = self._parse_with() if cte: this = self._parse_statement() if not this: self.raise_error("Failed to parse any statement following CTE") return cte while isinstance(this, exp.Subquery) and this.is_wrapper: this = this.this assert this is not None if "with_" in this.arg_types: this.set("with_", cte) else: self.raise_error(f"{this.key} does not support CTE") this = cte return this # duckdb supports leading with FROM x from_ = ( self._parse_from(joins=True, consume_pipe=True) if self._match(TokenType.FROM, advance=False) else None ) if self._match(TokenType.SELECT): comments = self._prev_comments hint = self._parse_hint() if self._next and not self._next.token_type == TokenType.DOT: all_ = self._match(TokenType.ALL) matched_distinct = self._match_set(self.DISTINCT_TOKENS) else: all_, matched_distinct = None, False kind = ( self._prev.text.upper() if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) else None ) distinct: t.Optional[exp.Expr] = ( self.expression( exp.Distinct( on=self._parse_value(values=False) if self._match(TokenType.ON) else None ) ) if matched_distinct else None ) if all_ and distinct: self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") operation_modifiers = [] while self._curr and self._match_texts(self.OPERATION_MODIFIERS): operation_modifiers.append(exp.var(self._prev.text.upper())) limit = self._parse_limit(top=True) projections, exclude = self._parse_projections() this = self.expression( exp.Select( kind=kind, hint=hint, distinct=distinct, expressions=projections, limit=limit, exclude=exclude, operation_modifiers=operation_modifiers or None, ) ) this.comments = comments into = self._parse_into() if into: this.set("into", into) if not from_: from_ = self._parse_from() if from_: this.set("from_", from_) this = self._parse_query_modifiers(this) elif (table or nested) and self._match(TokenType.L_PAREN): comments = self._prev_comments this = self._parse_wrapped_select(table=table) if this: this.add_comments(comments, prepend=True) # We return early here so that the UNION isn't attached to the subquery by the # following call to _parse_set_operations, but instead becomes the parent node self._match_r_paren() return self._parse_subquery(this, parse_alias=parse_subquery_alias) elif self._match(TokenType.VALUES, advance=False): this = self._parse_derived_table_values() elif from_: this = exp.select("*").from_(from_.this, copy=False) this = self._parse_query_modifiers(this) elif self._match(TokenType.SUMMARIZE): table = self._match(TokenType.TABLE) this = self._parse_select() or self._parse_string() or self._parse_table() return self.expression(exp.Summarize(this=this, table=table)) elif self._match(TokenType.DESCRIBE): this = self._parse_describe() else: this = None return self._parse_set_operations(this) if parse_set_operation else this def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: self._match_text_seq("SEARCH") kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() if not kind: return None self._match_text_seq("FIRST", "BY") return self.expression( exp.RecursiveWithSearch( kind=kind, this=self._parse_id_var(), expression=self._match_text_seq("SET") and self._parse_id_var(), using=self._match_text_seq("USING") and self._parse_id_var(), ) ) def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: if not skip_with_token and not self._match(TokenType.WITH): return None comments = self._prev_comments recursive = self._match(TokenType.RECURSIVE) last_comments = None expressions = [] while True: cte = self._parse_cte() if isinstance(cte, exp.CTE): expressions.append(cte) if last_comments: cte.add_comments(last_comments) if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): break else: self._match(TokenType.WITH) last_comments = self._prev_comments return self.expression( exp.With( expressions=expressions, recursive=recursive or None, search=self._parse_recursive_with_search(), ), comments=comments, ) def _parse_cte(self) -> t.Optional[exp.CTE]: index = self._index alias = self._parse_table_alias(self.ID_VAR_TOKENS) if not alias or not alias.this: self.raise_error("Expected CTE to have alias") key_expressions = ( self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None ) if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: self._retreat(index) return None comments = self._prev_comments if self._match_text_seq("NOT", "MATERIALIZED"): materialized = False elif self._match_text_seq("MATERIALIZED"): materialized = True else: materialized = None cte = self.expression( exp.CTE( this=self._parse_wrapped(self._parse_statement), alias=alias, materialized=materialized, key_expressions=key_expressions, ), comments=comments, ) values = cte.this if isinstance(values, exp.Values): if values.alias: cte.set("this", exp.select("*").from_(values)) else: cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) return cte def _parse_table_alias( self, alias_tokens: t.Optional[t.Collection[TokenType]] = None ) -> t.Optional[exp.TableAlias]: # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) # so this section tries to parse the clause version and if it fails, it treats the token # as an identifier (alias) if self._can_parse_limit_or_offset(): return None any_token = self._match(TokenType.ALIAS) alias = ( self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) or self._parse_string_as_identifier() ) index = self._index if self._match(TokenType.L_PAREN): columns = self._parse_csv(self._parse_function_parameter) self._match_r_paren() if columns else self._retreat(index) else: columns = None if not alias and not columns: return None table_alias = self.expression(exp.TableAlias(this=alias, columns=columns)) # We bubble up comments from the Identifier to the TableAlias if isinstance(alias, exp.Identifier): table_alias.add_comments(alias.pop_comments()) return table_alias def _parse_subquery( self, this: t.Optional[exp.Expr], parse_alias: bool = True ) -> t.Optional[exp.Subquery]: if not this: return None return self.expression( exp.Subquery( this=this, pivots=self._parse_pivots(), alias=self._parse_table_alias() if parse_alias else None, sample=self._parse_table_sample(), ) ) def _implicit_unnests_to_explicit(self, this: E) -> E: from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} for i, join in enumerate(this.args.get("joins") or []): table = join.this normalized_table = table.copy() normalized_table.meta["maybe_column"] = True normalized_table = _norm(normalized_table, dialect=self.dialect) if isinstance(table, exp.Table) and not join.args.get("on"): if normalized_table.parts[0].name in refs: table_as_column = table.to_column() unnest = exp.Unnest(expressions=[table_as_column]) # Table.to_column creates a parent Alias node that we want to convert to # a TableAlias and attach to the Unnest, so it matches the parser's output if isinstance(table.args.get("alias"), exp.TableAlias): table_as_column.replace(table_as_column.this) exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) table.replace(unnest) refs.add(normalized_table.alias_or_name) return this @t.overload def _parse_query_modifiers(self, this: E) -> E: ... @t.overload def _parse_query_modifiers(self, this: None) -> None: ... def _parse_query_modifiers(self, this): if isinstance(this, self.MODIFIABLES): for join in self._parse_joins(): this.append("joins", join) for lateral in iter(self._parse_lateral, None): this.append("laterals", lateral) while True: if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): modifier_token = self._curr parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] key, expression = parser(self) if expression: if this.args.get(key): self.raise_error( f"Found multiple '{modifier_token.text.upper()}' clauses", token=modifier_token, ) this.set(key, expression) if key == "limit": offset = expression.args.get("offset") expression.set("offset", None) if offset: offset = exp.Offset(expression=offset) this.set("offset", offset) limit_by_expressions = expression.expressions expression.set("expressions", None) offset.set("expressions", limit_by_expressions) continue break if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): this = self._implicit_unnests_to_explicit(this) return this def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: start = self._curr while self._curr: self._advance() end = self._tokens[self._index - 1] return exp.Hint(expressions=[self._find_sql(start, end)]) def _parse_hint_function_call(self) -> t.Optional[exp.Expr]: return self._parse_function_call() def _parse_hint_body(self) -> t.Optional[exp.Hint]: start_index = self._index should_fallback_to_string = False hints = [] try: for hint in iter( lambda: self._parse_csv( lambda: self._parse_hint_function_call() or self._parse_var(upper=True), ), [], ): hints.extend(hint) except ParseError: should_fallback_to_string = True if should_fallback_to_string or self._curr: self._retreat(start_index) return self._parse_hint_fallback_to_string() return self.expression(exp.Hint(expressions=hints)) def _parse_hint(self) -> t.Optional[exp.Hint]: if self._match(TokenType.HINT) and self._prev_comments: return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) return None def _parse_into(self) -> t.Optional[exp.Into]: if not self._match(TokenType.INTO): return None temp = self._match(TokenType.TEMPORARY) unlogged = self._match_text_seq("UNLOGGED") self._match(TokenType.TABLE) return self.expression( exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged) ) def _parse_from( self, joins: bool = False, skip_from_token: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.From]: if not skip_from_token and not self._match(TokenType.FROM): return None comments = self._prev_comments return self.expression( exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)), comments=comments, ) def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: return self.expression( exp.MatchRecognizeMeasure( window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), this=self._parse_expression(), ) ) def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: if not self._match(TokenType.MATCH_RECOGNIZE): return None self._match_l_paren() partition = self._parse_partition_by() order = self._parse_order() measures = ( self._parse_csv(self._parse_match_recognize_measure) if self._match_text_seq("MEASURES") else None ) if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): rows = exp.var("ONE ROW PER MATCH") elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): text = "ALL ROWS PER MATCH" if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): text += " SHOW EMPTY MATCHES" elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): text += " OMIT EMPTY MATCHES" elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): text += " WITH UNMATCHED ROWS" rows = exp.var(text) else: rows = None if self._match_text_seq("AFTER", "MATCH", "SKIP"): text = "AFTER MATCH SKIP" if self._match_text_seq("PAST", "LAST", "ROW"): text += " PAST LAST ROW" elif self._match_text_seq("TO", "NEXT", "ROW"): text += " TO NEXT ROW" elif self._match_text_seq("TO", "FIRST"): text += f" TO FIRST {self._advance_any().text}" # type: ignore elif self._match_text_seq("TO", "LAST"): text += f" TO LAST {self._advance_any().text}" # type: ignore after = exp.var(text) else: after = None if self._match_text_seq("PATTERN"): self._match_l_paren() if not self._curr: self.raise_error("Expecting )", self._curr) paren = 1 start = self._curr while self._curr and paren > 0: if self._curr.token_type == TokenType.L_PAREN: paren += 1 if self._curr.token_type == TokenType.R_PAREN: paren -= 1 end = self._prev self._advance() if paren > 0: self.raise_error("Expecting )", self._curr) pattern = exp.var(self._find_sql(start, end)) else: pattern = None define = ( self._parse_csv(self._parse_name_as_expression) if self._match_text_seq("DEFINE") else None ) self._match_r_paren() return self.expression( exp.MatchRecognize( partition_by=partition, order=order, measures=measures, rows=rows, after=after, pattern=pattern, define=define, alias=self._parse_table_alias(), ) ) def _parse_lateral(self) -> t.Optional[exp.Lateral]: cross_apply: t.Optional[bool] = None if self._match_pair(TokenType.CROSS, TokenType.APPLY): cross_apply = True elif self._match_pair(TokenType.OUTER, TokenType.APPLY): cross_apply = False if cross_apply is not None: this = self._parse_select(table=True) view = None outer = None elif self._match(TokenType.LATERAL): this = self._parse_select(table=True) view = self._match(TokenType.VIEW) outer = self._match(TokenType.OUTER) else: return None if not this: this = ( self._parse_unnest() or self._parse_function() or self._parse_id_var(any_token=False) ) while self._match(TokenType.DOT): this = exp.Dot( this=this, expression=self._parse_function() or self._parse_id_var(any_token=False), ) ordinality: t.Optional[bool] = None if view: table = self._parse_id_var(any_token=False) columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] table_alias: t.Optional[exp.TableAlias] = self.expression( exp.TableAlias(this=table, columns=columns) ) elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: # We move the alias from the lateral's child node to the lateral itself table_alias = this.args["alias"].pop() else: ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) table_alias = self._parse_table_alias() return self.expression( exp.Lateral( this=this, view=view, outer=outer, alias=table_alias, cross_apply=cross_apply, ordinality=ordinality, ) ) def _parse_stream(self) -> t.Optional[exp.Stream]: index = self._index if self._match(TokenType.STREAM): if this := self._try_parse(self._parse_table): return self.expression(exp.Stream(this=this)) self._retreat(index) return None def _parse_join_parts( self, ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: return ( self._prev if self._match_set(self.JOIN_METHODS) else None, self._prev if self._match_set(self.JOIN_SIDES) else None, self._prev if self._match_set(self.JOIN_KINDS) else None, ) def _parse_using_identifiers(self) -> t.List[exp.Expr]: def _parse_column_as_identifier() -> t.Optional[exp.Expr]: this = self._parse_column() if isinstance(this, exp.Column): return this.this return this return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) def _parse_join( self, skip_join_token: bool = False, parse_bracket: bool = False ) -> t.Optional[exp.Join]: if self._match(TokenType.COMMA): table = self._try_parse(self._parse_table) cross_join = self.expression(exp.Join(this=table)) if table else None if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: cross_join.set("kind", "CROSS") return cross_join index = self._index method, side, kind = self._parse_join_parts() directed = self._match_text_seq("DIRECTED") hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) join_comments = self._prev_comments if not skip_join_token and not join: self._retreat(index) kind = None method = None side = None outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) if not skip_join_token and not join and not outer_apply and not cross_apply: return None kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): kwargs["expressions"] = self._parse_csv( lambda: self._parse_table(parse_bracket=parse_bracket) ) if method: kwargs["method"] = method.text.upper() if side: kwargs["side"] = side.text.upper() if kind: kwargs["kind"] = kind.text.upper() if hint: kwargs["hint"] = hint if self._match(TokenType.MATCH_CONDITION): kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) if self._match(TokenType.ON): kwargs["on"] = self._parse_disjunction() elif self._match(TokenType.USING): kwargs["using"] = self._parse_using_identifiers() elif ( not method and not (outer_apply or cross_apply) and not isinstance(kwargs["this"], exp.Unnest) and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) ): index = self._index joins: t.Optional[list] = list(self._parse_joins()) if joins and self._match(TokenType.ON): kwargs["on"] = self._parse_disjunction() elif joins and self._match(TokenType.USING): kwargs["using"] = self._parse_using_identifiers() else: joins = None self._retreat(index) kwargs["this"].set("joins", joins if joins else None) kwargs["pivots"] = self._parse_pivots() comments = [c for token in (method, side, kind) if token for c in token.comments] comments = (join_comments or []) + comments if ( self.ADD_JOIN_ON_TRUE and not kwargs.get("on") and not kwargs.get("using") and not kwargs.get("method") and kwargs.get("kind") in (None, "INNER", "OUTER") ): kwargs["on"] = exp.true() if directed: kwargs["directed"] = directed return self.expression(exp.Join(**kwargs), comments=comments) def _parse_opclass(self) -> t.Optional[exp.Expr]: this = self._parse_disjunction() if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): return this if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts())) return this def _parse_index_params(self) -> exp.IndexParameters: using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None if self._match(TokenType.L_PAREN, advance=False): columns = self._parse_wrapped_csv(self._parse_with_operator) else: columns = None include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None partition_by = self._parse_partition_by() with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() tablespace = ( self._parse_var(any_token=True) if self._match_text_seq("USING", "INDEX", "TABLESPACE") else None ) where = self._parse_where() on = self._parse_field() if self._match(TokenType.ON) else None return self.expression( exp.IndexParameters( using=using, columns=columns, include=include, partition_by=partition_by, where=where, with_storage=with_storage, tablespace=tablespace, on=on, ) ) def _parse_index( self, index: t.Optional[exp.Expr] = None, anonymous: bool = False ) -> t.Optional[exp.Index]: if index or anonymous: unique = None primary = None amp = None self._match(TokenType.ON) self._match(TokenType.TABLE) # hive table = self._parse_table_parts(schema=True) else: unique = self._match(TokenType.UNIQUE) primary = self._match_text_seq("PRIMARY") amp = self._match_text_seq("AMP") if not self._match(TokenType.INDEX): return None index = self._parse_id_var() table = None params = self._parse_index_params() return self.expression( exp.Index( this=index, table=table, unique=unique, primary=primary, amp=amp, params=params ) ) def _parse_table_hints(self) -> t.Optional[t.List[exp.Expr]]: hints: t.List[exp.Expr] = [] if self._match_pair(TokenType.WITH, TokenType.L_PAREN): # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 hints.append( self.expression( exp.WithTableHint( expressions=self._parse_csv( lambda: self._parse_function() or self._parse_var(any_token=True) ) ) ) ) self._match_r_paren() else: # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html while self._match_set(self.TABLE_INDEX_HINT_TOKENS): hint = exp.IndexTableHint(this=self._prev.text.upper()) self._match_set((TokenType.INDEX, TokenType.KEY)) if self._match(TokenType.FOR): hint.set("target", self._advance_any() and self._prev.text.upper()) hint.set("expressions", self._parse_wrapped_id_vars()) hints.append(hint) return hints or None def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expr]: return ( (not schema and self._parse_function(optional_parens=False)) or self._parse_id_var(any_token=False) or self._parse_string_as_identifier() or self._parse_placeholder() ) def _parse_table_parts_fast(self) -> t.Optional[exp.Table]: index = self._index parts: t.Optional[t.List[exp.Identifier]] = None all_comments: t.Optional[t.List[str]] = None while self._match_set(self.IDENTIFIER_TOKENS): token = self._prev comments = self._prev_comments has_dot = self._match(TokenType.DOT) curr_tt = self._curr.token_type if not has_dot: if curr_tt in self.TABLE_POSTFIX_TOKENS: self._retreat(index) return None elif curr_tt not in self.IDENTIFIER_TOKENS: self._retreat(index) return None if parts is None: parts = [] if comments: if all_comments is None: all_comments = [] all_comments.extend(comments) self._prev_comments = [] parts.append( self.expression( exp.Identifier( this=token.text, quoted=token.token_type == TokenType.IDENTIFIER ), token, ) ) if not has_dot: break if parts is None: return None n = len(parts) if n == 1: table: exp.Table = exp.Table(this=parts[0]) elif n == 2: table = exp.Table(this=parts[1], db=parts[0]) elif n >= 3: this: exp.Identifier | exp.Dot = parts[2] for i in range(3, n): this = exp.Dot(this=this, expression=parts[i]) table = exp.Table(this=this, db=parts[1], catalog=parts[0]) if table is None: self._retreat(index) elif all_comments: table.add_comments(all_comments) return table def _parse_table_parts( self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False, fast: bool = False, ) -> t.Optional[exp.Table | exp.Dot]: if fast: return self._parse_table_parts_fast() catalog: t.Optional[exp.Expr | str] = None db: t.Optional[exp.Expr | str] = None table: t.Optional[exp.Expr | str] = self._parse_table_part(schema=schema) while self._match(TokenType.DOT): if catalog: # This allows nesting the table in arbitrarily many dot expressions if needed table = self.expression( exp.Dot(this=table, expression=self._parse_table_part(schema=schema)) ) else: catalog = db db = table # "" used for tsql FROM a..b case table = self._parse_table_part(schema=schema) or "" if ( wildcard and self._is_connected() and (isinstance(table, exp.Identifier) or not table) and self._match(TokenType.STAR) ): if isinstance(table, exp.Identifier): table.args["this"] += "*" else: table = exp.Identifier(this="*") if is_db_reference: catalog = db db = table table = None if not table and not is_db_reference: self.raise_error(f"Expected table name but got {self._curr}") if not db and is_db_reference: self.raise_error(f"Expected database name but got {self._curr}") table = self.expression(exp.Table(this=table, db=db, catalog=catalog)) # Bubble up comments from identifier parts to the Table comments = [] for part in table.parts: if part_comments := part.pop_comments(): comments.extend(part_comments) if comments: table.add_comments(comments) changes = self._parse_changes() if changes: table.set("changes", changes) at_before = self._parse_historical_data() if at_before: table.set("when", at_before) pivots = self._parse_pivots() if pivots: table.set("pivots", pivots) return table def _parse_table( self, schema: bool = False, joins: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None, parse_bracket: bool = False, is_db_reference: bool = False, parse_partition: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.Expr]: if not schema and not is_db_reference and not consume_pipe and not joins: index = self._index table = self._parse_table_parts(fast=True) if table is not None: curr_tt = self._curr.token_type next_tt = self._next.token_type fast_terminators = self.TABLE_TERMINATORS # only return the table if we're sure there are no other operators # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION: return table postfix_tokens = self.TABLE_POSTFIX_TOKENS if curr_tt not in postfix_tokens and next_tt not in postfix_tokens: if alias := self._parse_table_alias( alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS ): table.set("alias", alias) if self._curr.token_type in fast_terminators: return table self._retreat(index) if stream := self._parse_stream(): return stream if lateral := self._parse_lateral(): return lateral if unnest := self._parse_unnest(): return unnest if values := self._parse_derived_table_values(): return values if subquery := self._parse_select(table=True, consume_pipe=consume_pipe): if not subquery.args.get("pivots"): subquery.set("pivots", self._parse_pivots()) return subquery bracket = parse_bracket and self._parse_bracket(None) bracket = self.expression(exp.Table(this=bracket)) if bracket else None rows_from_tables = ( self._parse_wrapped_csv(self._parse_table) if self._match_text_seq("ROWS", "FROM") else None ) rows_from = ( self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None ) only = self._match(TokenType.ONLY) this = t.cast( exp.Expr, bracket or rows_from or self._parse_bracket( self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) ), ) if only: this.set("only", only) # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context self._match(TokenType.STAR) parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION if parse_partition and self._match(TokenType.PARTITION, advance=False): this.set("partition", self._parse_partition()) if schema: return self._parse_schema(this=this) if self.dialect.ALIAS_POST_VERSION: this.set("version", self._parse_version()) if self.dialect.ALIAS_POST_TABLESAMPLE: this.set("sample", self._parse_table_sample()) alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) if alias: this.set("alias", alias) if self._match(TokenType.INDEXED_BY): this.set("indexed", self._parse_table_parts()) elif self._match_text_seq("NOT", "INDEXED"): this.set("indexed", False) if isinstance(this, exp.Table) and self._match_text_seq("AT"): return self.expression( exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var()) ) this.set("hints", self._parse_table_hints()) if not this.args.get("pivots"): this.set("pivots", self._parse_pivots()) if not self.dialect.ALIAS_POST_TABLESAMPLE: this.set("sample", self._parse_table_sample()) if not self.dialect.ALIAS_POST_VERSION: this.set("version", self._parse_version()) if joins: for join in self._parse_joins(): this.append("joins", join) if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): this.set("ordinality", True) this.set("alias", self._parse_table_alias()) return this def _parse_version(self) -> t.Optional[exp.Version]: if self._match(TokenType.TIMESTAMP_SNAPSHOT): this = "TIMESTAMP" elif self._match(TokenType.VERSION_SNAPSHOT): this = "VERSION" else: return None if self._match_set((TokenType.FROM, TokenType.BETWEEN)): kind = self._prev.text.upper() start = self._parse_bitwise() self._match_texts(("TO", "AND")) end = self._parse_bitwise() expression: t.Optional[exp.Expr] = self.expression(exp.Tuple(expressions=[start, end])) elif self._match_text_seq("CONTAINED", "IN"): kind = "CONTAINED IN" expression = self.expression( exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise)) ) elif self._match(TokenType.ALL): kind = "ALL" expression = None else: self._match_text_seq("AS", "OF") kind = "AS OF" expression = self._parse_type() return self.expression(exp.Version(this=this, expression=expression, kind=kind)) def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: # https://docs.snowflake.com/en/sql-reference/constructs/at-before index = self._index historical_data = None if self._match_texts(self.HISTORICAL_DATA_PREFIX): this = self._prev.text.upper() kind = ( self._match(TokenType.L_PAREN) and self._match_texts(self.HISTORICAL_DATA_KIND) and self._prev.text.upper() ) expression = self._match(TokenType.FARROW) and self._parse_bitwise() if expression: self._match_r_paren() historical_data = self.expression( exp.HistoricalData(this=this, kind=kind, expression=expression) ) else: self._retreat(index) return historical_data def _parse_changes(self) -> t.Optional[exp.Changes]: if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): return None information = self._parse_var(any_token=True) self._match_r_paren() return self.expression( exp.Changes( information=information, at_before=self._parse_historical_data(), end=self._parse_historical_data(), ) ) def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): return None self._advance() expressions = self._parse_wrapped_csv(self._parse_equality) offset: t.Union[bool, exp.Expr] = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) alias = self._parse_table_alias() if with_alias else None if alias: if self.dialect.UNNEST_COLUMN_ONLY: if alias.args.get("columns"): self.raise_error("Unexpected extra column alias in unnest.") alias.set("columns", [alias.this]) alias.set("this", None) columns = alias.args.get("columns") or [] if offset and len(expressions) < len(columns): offset = columns.pop() if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): self._match(TokenType.ALIAS) offset = self._parse_id_var( any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS ) or exp.to_identifier("offset") return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset)) def _parse_derived_table_values(self) -> t.Optional[exp.Values]: is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) if not is_derived and not ( # ClickHouse's `FORMAT Values` is equivalent to `VALUES` self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") ): return None expressions = self._parse_csv(self._parse_value) alias = self._parse_table_alias() if is_derived: self._match_r_paren() return self.expression( exp.Values(expressions=expressions, alias=alias or self._parse_table_alias()) ) def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: if not self._match(TokenType.TABLE_SAMPLE) and not ( as_modifier and self._match_text_seq("USING", "SAMPLE") ): return None bucket_numerator = None bucket_denominator = None bucket_field = None percent = None size = None seed = None method = self._parse_var(tokens=(TokenType.ROW,), upper=True) matched_l_paren = self._match(TokenType.L_PAREN) if self.TABLESAMPLE_CSV: num = None expressions = self._parse_csv(self._parse_primary) else: expressions = None num = ( self._parse_factor() if self._match(TokenType.NUMBER, advance=False) else self._parse_primary() or self._parse_placeholder() ) if self._match_text_seq("BUCKET"): bucket_numerator = self._parse_number() self._match_text_seq("OUT", "OF") bucket_denominator = bucket_denominator = self._parse_number() self._match(TokenType.ON) bucket_field = self._parse_field() elif self._match_set((TokenType.PERCENT, TokenType.MOD)): percent = num elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: size = num else: percent = num if matched_l_paren: self._match_r_paren() if self._match(TokenType.L_PAREN): method = self._parse_var(upper=True) seed = self._match(TokenType.COMMA) and self._parse_number() self._match_r_paren() elif self._match_texts(("SEED", "REPEATABLE")): seed = self._parse_wrapped(self._parse_number) if not method and self.DEFAULT_SAMPLING_METHOD: method = exp.var(self.DEFAULT_SAMPLING_METHOD) return self.expression( exp.TableSample( expressions=expressions, method=method, bucket_numerator=bucket_numerator, bucket_denominator=bucket_denominator, bucket_field=bucket_field, percent=percent, size=size, seed=seed, ) ) def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: return list(iter(self._parse_pivot, None)) or None def _parse_joins(self) -> t.Iterator[exp.Join]: return iter(self._parse_join, None) def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: if not self._match(TokenType.INTO): return None return self.expression( exp.UnpivotColumns( this=self._match_text_seq("NAME") and self._parse_column(), expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), ) ) # https://duckdb.org/docs/sql/statements/pivot def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: def _parse_on() -> t.Optional[exp.Expr]: this = self._parse_bitwise() if self._match(TokenType.IN): # PIVOT ... ON col IN (row_val1, row_val2) return self._parse_in(this) if self._match(TokenType.ALIAS, advance=False): # UNPIVOT ... ON (col1, col2, col3) AS row_val return self._parse_alias(this) return this this = self._parse_table() expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) into = self._parse_unpivot_columns() using = self._match(TokenType.USING) and self._parse_csv( lambda: self._parse_alias(self._parse_column()) ) group = self._parse_group() return self.expression( exp.Pivot( this=this, expressions=expressions, using=using, group=group, unpivot=is_unpivot, into=into, ) ) def _parse_pivot_in(self) -> exp.In: def _parse_aliased_expression() -> t.Optional[exp.Expr]: this = self._parse_select_or_expression() self._match(TokenType.ALIAS) alias = self._parse_bitwise() if alias: if isinstance(alias, exp.Column) and not alias.db: alias = alias.this return self.expression(exp.PivotAlias(this=this, alias=alias)) return this value = self._parse_column() if not self._match(TokenType.IN): self.raise_error("Expecting IN") if self._match(TokenType.L_PAREN): if self._match(TokenType.ANY): exprs: t.List[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order())) else: exprs = self._parse_csv(_parse_aliased_expression) self._match_r_paren() return self.expression(exp.In(this=value, expressions=exprs)) return self.expression(exp.In(this=value, field=self._parse_id_var())) def _parse_pivot_aggregation(self) -> t.Optional[exp.Expr]: func = self._parse_function() if not func: if self._prev.token_type == TokenType.COMMA: return None self.raise_error("Expecting an aggregation function in PIVOT") return self._parse_alias(func) def _parse_pivot(self) -> t.Optional[exp.Pivot]: index = self._index include_nulls = None if self._match(TokenType.PIVOT): unpivot = False elif self._match(TokenType.UNPIVOT): unpivot = True # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax if self._match_text_seq("INCLUDE", "NULLS"): include_nulls = True elif self._match_text_seq("EXCLUDE", "NULLS"): include_nulls = False else: return None expressions = [] if not self._match(TokenType.L_PAREN): self._retreat(index) return None if unpivot: expressions = self._parse_csv(self._parse_column) else: expressions = self._parse_csv(self._parse_pivot_aggregation) if not expressions: self.raise_error("Failed to parse PIVOT's aggregation list") if not self._match(TokenType.FOR): self.raise_error("Expecting FOR") fields = [] while True: field = self._try_parse(self._parse_pivot_in) if not field: break fields.append(field) default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( self._parse_bitwise ) group = self._parse_group() self._match_r_paren() pivot = self.expression( exp.Pivot( expressions=expressions, fields=fields, unpivot=unpivot, include_nulls=include_nulls, default_on_null=default_on_null, group=group, ) ) if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): pivot.set("alias", self._parse_table_alias()) if not unpivot: names = self._pivot_column_names(t.cast(t.List[exp.Expr], expressions)) columns: t.List[exp.Expr] = [] all_fields = [] for pivot_field in pivot.fields: pivot_field_expressions = pivot_field.expressions # The `PivotAny` expression corresponds to `ANY ORDER BY `; we can't infer in this case. if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): continue all_fields.append( [ fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name for fld in pivot_field_expressions ] ) if all_fields: if names: all_fields.append(names) # Generate all possible combinations of the pivot columns # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] for fld_parts_tuple in itertools.product(*all_fields): fld_parts = list(fld_parts_tuple) if names and self.PREFIXED_PIVOT_COLUMNS: # Move the "name" to the front of the list fld_parts.insert(0, fld_parts.pop(-1)) columns.append(exp.to_identifier("_".join(fld_parts))) pivot.set("columns", columns) return pivot def _pivot_column_names(self, aggregations: t.List[exp.Expr]) -> t.List[str]: return [agg.alias for agg in aggregations if agg.alias] def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: if not skip_where_token and not self._match(TokenType.PREWHERE): return None comments = self._prev_comments return self.expression( exp.PreWhere(this=self._parse_disjunction()), comments=comments, ) def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: if not skip_where_token and not self._match(TokenType.WHERE): return None comments = self._prev_comments return self.expression( exp.Where(this=self._parse_disjunction()), comments=comments, ) def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: if not skip_group_by_token and not self._match(TokenType.GROUP_BY): return None comments = self._prev_comments elements: t.Dict[str, t.Any] = defaultdict(list) if self._match(TokenType.ALL): elements["all"] = True elif self._match(TokenType.DISTINCT): elements["all"] = False if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): return self.expression(exp.Group(**elements), comments=comments) # type: ignore while True: index = self._index elements["expressions"].extend( self._parse_csv( lambda: ( None if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) else self._parse_disjunction() ) ) ) before_with_index = self._index with_prefix = self._match(TokenType.WITH) if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" elements[key].append(cube_or_rollup) elif grouping_sets := self._parse_grouping_sets(): elements["grouping_sets"].append(grouping_sets) elif self._match_text_seq("TOTALS"): elements["totals"] = True # type: ignore if before_with_index <= self._index <= before_with_index + 1: self._retreat(before_with_index) break if index == self._index: break return self.expression(exp.Group(**elements), comments=comments) # type: ignore def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: if self._match(TokenType.CUBE): kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube elif self._match(TokenType.ROLLUP): kind = exp.Rollup else: return None return self.expression( kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise)) ) def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: if self._match(TokenType.GROUPING_SETS): return self.expression( exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set)) ) return None def _parse_grouping_set(self) -> t.Optional[exp.Expr]: return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: if not skip_having_token and not self._match(TokenType.HAVING): return None comments = self._prev_comments return self.expression( exp.Having(this=self._parse_disjunction()), comments=comments, ) def _parse_qualify(self) -> t.Optional[exp.Qualify]: if not self._match(TokenType.QUALIFY): return None return self.expression(exp.Qualify(this=self._parse_disjunction())) def _parse_connect_with_prior(self) -> t.Optional[exp.Expr]: self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( exp.Prior(this=self._parse_bitwise()) ) connect = self._parse_disjunction() self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") return connect def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: if skip_start_token: start = None elif self._match(TokenType.START_WITH): start = self._parse_disjunction() else: return None self._match(TokenType.CONNECT_BY) nocycle = self._match_text_seq("NOCYCLE") connect = self._parse_connect_with_prior() if not start and self._match(TokenType.START_WITH): start = self._parse_disjunction() return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle)) def _parse_name_as_expression(self) -> t.Optional[exp.Expr]: this = self._parse_id_var(any_token=True) if self._match(TokenType.ALIAS): this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction())) return this def _parse_interpolate(self) -> t.Optional[t.List[exp.Expr]]: if self._match_text_seq("INTERPOLATE"): return self._parse_wrapped_csv(self._parse_name_as_expression) return None def _parse_order( self, this: t.Optional[exp.Expr] = None, skip_order_token: bool = False ) -> t.Optional[exp.Expr]: siblings = None if not skip_order_token and not self._match(TokenType.ORDER_BY): if not self._match(TokenType.ORDER_SIBLINGS_BY): return this siblings = True comments = self._prev_comments return self.expression( exp.Order( this=this, expressions=self._parse_csv(self._parse_ordered), siblings=siblings, ), comments=comments, ) def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: if not self._match(token): return None return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered))) def _parse_ordered( self, parse_method: t.Optional[t.Callable[[], t.Optional[exp.Expr]]] = None ) -> t.Optional[exp.Ordered]: this = parse_method() if parse_method else self._parse_disjunction() if not this: return None if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: this = exp.var("ALL") asc = self._match(TokenType.ASC) desc: t.Optional[bool] = True if self._match(TokenType.DESC) else (False if asc else None) is_nulls_first = self._match_text_seq("NULLS", "FIRST") is_nulls_last = self._match_text_seq("NULLS", "LAST") nulls_first = is_nulls_first or False explicitly_null_ordered = is_nulls_first or is_nulls_last if ( not explicitly_null_ordered and ( (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") ) and self.dialect.NULL_ORDERING != "nulls_are_last" ): nulls_first = True if self._match_text_seq("WITH", "FILL"): with_fill = self.expression( exp.WithFill( from_=self._match(TokenType.FROM) and self._parse_bitwise(), to=self._match_text_seq("TO") and self._parse_bitwise(), step=self._match_text_seq("STEP") and self._parse_bitwise(), interpolate=self._parse_interpolate(), ) ) else: with_fill = None return self.expression( exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill) ) def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) rows = self._match_set((TokenType.ROW, TokenType.ROWS)) self._match_text_seq("ONLY") with_ties = self._match_text_seq("WITH", "TIES") if not (percent or rows or with_ties): return None return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties)) def _parse_limit( self, this: t.Optional[exp.Expr] = None, top: bool = False, skip_limit_token: bool = False, ) -> t.Optional[exp.Expr]: if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): comments = self._prev_comments if top: limit_paren = self._match(TokenType.L_PAREN) expression = self._parse_term() if limit_paren else self._parse_number() if limit_paren: self._match_r_paren() else: # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since # we try to build an exp.Mod expr. For that matter, we backtrack and instead # consume the factor plus parse the percentage separately index = self._index expression = self._try_parse(self._parse_term) if isinstance(expression, exp.Mod): self._retreat(index) expression = self._parse_factor() elif not expression: expression = self._parse_factor() limit_options = self._parse_limit_options() if self._match(TokenType.COMMA): offset = expression expression = self._parse_term() else: offset = None limit_exp = self.expression( exp.Limit( this=this, expression=expression, offset=offset, limit_options=limit_options, expressions=self._parse_limit_by(), ), comments=comments, ) return limit_exp if self._match(TokenType.FETCH): direction = ( self._prev.text.upper() if self._match_set((TokenType.FIRST, TokenType.NEXT)) else "FIRST" ) count = self._parse_field(tokens=self.FETCH_TOKENS) return self.expression( exp.Fetch( direction=direction, count=count, limit_options=self._parse_limit_options() ) ) return this def _parse_offset(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: if not self._match(TokenType.OFFSET): return this count = self._parse_term() self._match_set((TokenType.ROW, TokenType.ROWS)) return self.expression( exp.Offset(this=this, expression=count, expressions=self._parse_limit_by()) ) def _can_parse_limit_or_offset(self) -> bool: if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): return False index = self._index result = bool( self._try_parse(self._parse_limit, retreat=True) or self._try_parse(self._parse_offset, retreat=True) ) self._retreat(index) # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset if self._next.token_type == TokenType.MATCH_CONDITION: result = False return result def _parse_limit_by(self) -> t.Optional[t.List[exp.Expr]]: return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None def _parse_locks(self) -> t.List[exp.Lock]: locks = [] while True: update, key = None, None if self._match_text_seq("FOR", "UPDATE"): update = True elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( "LOCK", "IN", "SHARE", "MODE" ): update = False elif self._match_text_seq("FOR", "KEY", "SHARE"): update, key = False, True elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): update, key = True, True else: break expressions = None if self._match_text_seq("OF"): expressions = self._parse_csv(lambda: self._parse_table(schema=True)) wait: t.Optional[bool | exp.Expr] = None if self._match_text_seq("NOWAIT"): wait = True elif self._match_text_seq("WAIT"): wait = self._parse_primary() elif self._match_text_seq("SKIP", "LOCKED"): wait = False locks.append( self.expression( exp.Lock(update=update, expressions=expressions, wait=wait, key=key) ) ) return locks def parse_set_operation( self, this: t.Optional[exp.Expr], consume_pipe: bool = False ) -> t.Optional[exp.Expr]: start = self._index _, side_token, kind_token = self._parse_join_parts() side = side_token.text if side_token else None kind = kind_token.text if kind_token else None if not self._match_set(self.SET_OPERATIONS): self._retreat(start) return None token_type = self._prev.token_type if token_type == TokenType.UNION: operation: t.Type[exp.SetOperation] = exp.Union elif token_type == TokenType.EXCEPT: operation = exp.Except else: operation = exp.Intersect comments = self._prev.comments if self._match(TokenType.DISTINCT): distinct: t.Optional[bool] = True elif self._match(TokenType.ALL): distinct = False else: distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] if distinct is None: self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") by_name = ( self._match_text_seq("BY", "NAME") or self._match_text_seq("STRICT", "CORRESPONDING") or None ) if self._match_text_seq("CORRESPONDING"): by_name = True if not side and not kind: kind = "INNER" on_column_list = None if by_name and self._match_texts(("ON", "BY")): on_column_list = self._parse_wrapped_csv(self._parse_column) expression = self._parse_select( nested=True, parse_set_operation=False, consume_pipe=consume_pipe ) return self.expression( operation( this=this, distinct=distinct, by_name=by_name, expression=expression, side=side, kind=kind, on=on_column_list, ), comments=comments, ) def _parse_set_operations(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: while this: setop = self.parse_set_operation(this) if not setop: break this = setop if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: expression = this.expression if expression: for arg in self.SET_OP_MODIFIERS: expr = expression.args.get(arg) if expr: this.set(arg, expr.pop()) return this def _parse_expression(self) -> t.Optional[exp.Expr]: return self._parse_alias(self._parse_assignment()) def _parse_assignment(self) -> t.Optional[exp.Expr]: this = self._parse_disjunction() if not this and self._next.token_type in self.ASSIGNMENT: # This allows us to parse := this = exp.column( t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) ) while self._match_set(self.ASSIGNMENT): if isinstance(this, exp.Column) and len(this.parts) == 1: this = this.this comments = self._prev_comments this = self.expression( self.ASSIGNMENT[self._prev.token_type]( this=this, expression=self._parse_assignment() ), comments=comments, ) return this def _parse_disjunction(self) -> t.Optional[exp.Expr]: this = self._parse_conjunction() while self._match_set(self.DISJUNCTION): comments = self._prev_comments this = self.expression( self.DISJUNCTION[self._prev.token_type]( this=this, expression=self._parse_conjunction() ), comments=comments, ) return this def _parse_conjunction(self) -> t.Optional[exp.Expr]: this = self._parse_equality() while self._match_set(self.CONJUNCTION): comments = self._prev_comments this = self.expression( self.CONJUNCTION[self._prev.token_type]( this=this, expression=self._parse_equality() ), comments=comments, ) return this def _parse_equality(self) -> t.Optional[exp.Expr]: this = self._parse_comparison() while self._match_set(self.EQUALITY): comments = self._prev_comments this = self.expression( self.EQUALITY[self._prev.token_type]( this=this, expression=self._parse_comparison() ), comments=comments, ) return this def _parse_comparison(self) -> t.Optional[exp.Expr]: this = self._parse_range() while self._match_set(self.COMPARISON): comments = self._prev_comments this = self.expression( self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()), comments=comments, ) return this def _parse_range(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: this = this or self._parse_bitwise() negate = self._match(TokenType.NOT) if self._match_set(self.RANGE_PARSERS): expression = self.RANGE_PARSERS[self._prev.token_type](self, this) if not expression: return this this = expression elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): this = self.expression(exp.Is(this=this, expression=exp.Null())) # Postgres supports ISNULL and NOTNULL for conditions. # https://blog.andreiavram.ro/postgresql-null-composite-type/ if self._match(TokenType.NOTNULL): this = self.expression(exp.Is(this=this, expression=exp.Null())) this = self.expression(exp.Not(this=this)) if negate: this = self._negate_range(this) if self._match(TokenType.IS): this = self._parse_is(this) return this def _negate_range(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: if not this: return this return self.expression(exp.Not(this=this)) def _parse_is(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: index = self._index - 1 negate = self._match(TokenType.NOT) if self._match_text_seq("DISTINCT", "FROM"): klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ return self.expression(klass(this=this, expression=self._parse_bitwise())) if self._match(TokenType.JSON): kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() if self._match_text_seq("WITH"): _with = True elif self._match_text_seq("WITHOUT"): _with = False else: _with = None unique = self._match(TokenType.UNIQUE) self._match_text_seq("KEYS") expression: t.Optional[exp.Expr] = self.expression( exp.JSON(this=kind, with_=_with, unique=unique) ) else: expression = self._parse_null() or self._parse_bitwise() if not expression: self._retreat(index) return None this = self.expression(exp.Is(this=this, expression=expression)) this = self.expression(exp.Not(this=this)) if negate else this return self._parse_column_ops(this) def _parse_in(self, this: t.Optional[exp.Expr], alias: bool = False) -> exp.In: unnest = self._parse_unnest(with_alias=False) if unnest: this = self.expression(exp.In(this=this, unnest=unnest)) elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): matched_l_paren = self._prev.token_type == TokenType.L_PAREN expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): this = self.expression( exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False)) ) else: this = self.expression(exp.In(this=this, expressions=expressions)) if matched_l_paren: self._match_r_paren(this) elif not self._match(TokenType.R_BRACKET, expression=this): self.raise_error("Expecting ]") else: this = self.expression(exp.In(this=this, field=self._parse_column())) return this def _parse_between(self, this: t.Optional[exp.Expr]) -> exp.Between: symmetric = None if self._match_text_seq("SYMMETRIC"): symmetric = True elif self._match_text_seq("ASYMMETRIC"): symmetric = False low = self._parse_bitwise() self._match(TokenType.AND) high = self._parse_bitwise() return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric)) def _parse_escape(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: if not self._match(TokenType.ESCAPE): return this return self.expression( exp.Escape(this=this, expression=self._parse_string() or self._parse_null()) ) def _parse_interval_span(self, this: exp.Expr) -> exp.Interval: # handle day-time format interval span with omitted units: # INTERVAL ' hh[:][mm[:ss[.ff]]]' interval_span_units_omitted = None if ( this and this.is_string and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT and exp.INTERVAL_DAY_TIME_RE.match(this.name) ): index = self._index # Var "TO" Var first_unit = self._parse_var(any_token=True, upper=True) second_unit = None if first_unit and self._match_text_seq("TO"): second_unit = self._parse_var(any_token=True, upper=True) interval_span_units_omitted = not (first_unit and second_unit) self._retreat(index) unit = ( None if interval_span_units_omitted else ( self._parse_function() or ( not self._match_set((TokenType.ALIAS, TokenType.DCOLON), advance=False) and self._parse_var(any_token=True, upper=True) ) ) ) # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse # each INTERVAL expression into this canonical form so it's easy to transpile if this and this.is_number: this = exp.Literal.string(this.to_py()) elif this and this.is_string: parts = exp.INTERVAL_STRING_RE.findall(this.name) if parts and unit: # Unconsume the eagerly-parsed unit, since the real unit was part of the string unit = None self._retreat(self._index - 1) if len(parts) == 1: this = exp.Literal.string(parts[0][0]) unit = self.expression(exp.Var(this=parts[0][1].upper())) if self.INTERVAL_SPANS and self._match_text_seq("TO"): unit = self.expression( exp.IntervalSpan( this=unit, expression=self._parse_function() or self._parse_var(any_token=True, upper=True), ) ) return self.expression(exp.Interval(this=this, unit=unit)) def _parse_interval(self, require_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: index = self._index if not self._match(TokenType.INTERVAL) and require_interval: return None if self._match(TokenType.STRING, advance=False): this = self._parse_primary() else: this = self._parse_term() if not this or ( isinstance(this, exp.Column) and not this.table and not this.this.quoted and self._curr and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS ): self._retreat(index) return None interval = self._parse_interval_span(this) index = self._index self._match(TokenType.PLUS) # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): return self.expression(exp.Add(this=interval, expression=self._parse_interval(False))) self._retreat(index) return interval def _parse_bitwise(self) -> t.Optional[exp.Expr]: this = self._parse_term() while True: if self._match_set(self.BITWISE): this = self.expression( self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term()) ) elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): this = self.expression( exp.DPipe( this=this, expression=self._parse_term(), safe=not self.dialect.STRICT_STRING_CONCAT, ) ) elif self._match(TokenType.DQMARK): this = self.expression( exp.Coalesce(this=this, expressions=ensure_list(self._parse_term())) ) elif self._match_pair(TokenType.LT, TokenType.LT): this = self.expression( exp.BitwiseLeftShift(this=this, expression=self._parse_term()) ) elif self._match_pair(TokenType.GT, TokenType.GT): this = self.expression( exp.BitwiseRightShift(this=this, expression=self._parse_term()) ) else: break return this def _parse_term(self) -> t.Optional[exp.Expr]: this = self._parse_factor() while self._match_set(self.TERM): klass = self.TERM[self._prev.token_type] comments = self._prev_comments expression = self._parse_factor() this = self.expression(klass(this=this, expression=expression), comments=comments) if isinstance(this, exp.Collate): expr = this.expression # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise # fallback to Identifier / Var if isinstance(expr, exp.Column) and len(expr.parts) == 1: ident = expr.this if isinstance(ident, exp.Identifier): this.set("expression", ident if ident.quoted else exp.var(ident.name)) return this def _parse_factor(self) -> t.Optional[exp.Expr]: parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary this = self._parse_at_time_zone(parse_method()) while self._match_set(self.FACTOR): klass = self.FACTOR[self._prev.token_type] comments = self._prev_comments expression = parse_method() if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): self._retreat(self._index - 1) return this this = self.expression(klass(this=this, expression=expression), comments=comments) if isinstance(this, exp.Div): this.set("typed", self.dialect.TYPED_DIVISION) this.set("safe", self.dialect.SAFE_DIVISION) return this def _parse_exponent(self) -> t.Optional[exp.Expr]: this = self._parse_unary() while self._match_set(self.EXPONENT): comments = self._prev_comments this = self.expression( self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()), comments=comments, ) return this def _parse_unary(self) -> t.Optional[exp.Expr]: if self._match_set(self.UNARY_PARSERS): return self.UNARY_PARSERS[self._prev.token_type](self) return self._parse_type() def _parse_type( self, parse_interval: bool = True, fallback_to_identifier: bool = False ) -> t.Optional[exp.Expr]: if not fallback_to_identifier and (atom := self._parse_atom()) is not None: return atom if interval := parse_interval and self._parse_interval(): return self._parse_column_ops(interval) index = self._index data_type = self._parse_types(check_func=True, allow_identifiers=False) # parse_types() returns a Cast if we parsed BQ's inline constructor () e.g. # STRUCT(1, 'foo'), which is canonicalized to CAST( AS ) if isinstance(data_type, exp.Cast): # This constructor can contain ops directly after it, for instance struct unnesting: # STRUCT(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT 1: self._retreat(index2) return self._parse_column_ops(data_type) self._retreat(index) if fallback_to_identifier: return self._parse_id_var() return self._parse_column() def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: this = self._parse_type() if not this: return None if isinstance(this, exp.Column) and not this.table: this = exp.var(this.name.upper()) return self.expression( exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True)) ) def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expr]: type_name = identifier.name while self._match(TokenType.DOT): type_name = f"{type_name}.{self._advance_any() and self._prev.text}" return exp.DataType.build(type_name, dialect=self.dialect, udt=True) def _parse_types( self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True ) -> t.Optional[exp.Expr]: index = self._index this: t.Optional[exp.Expr] = None if self._match_set(self.TYPE_TOKENS): type_token = self._prev.token_type else: type_token = None identifier = allow_identifiers and self._parse_id_var( any_token=False, tokens=(TokenType.VAR,) ) if isinstance(identifier, exp.Identifier): try: tokens = self.dialect.tokenize(identifier.name) except TokenError: tokens = None if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS: if len(tokens) > 1: return exp.DataType.build(identifier.name, dialect=self.dialect) elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: this = self._parse_user_defined_type(identifier) else: self._retreat(self._index - 1) return None else: return None if type_token == TokenType.PSEUDO_TYPE: return self.expression(exp.PseudoType(this=self._prev.text.upper())) if type_token == TokenType.OBJECT_IDENTIFIER: return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper())) # https://materialize.com/docs/sql/types/map/ if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): key_type = self._parse_types( check_func=check_func, schema=schema, allow_identifiers=allow_identifiers ) if not self._match(TokenType.FARROW): self._retreat(index) return None value_type = self._parse_types( check_func=check_func, schema=schema, allow_identifiers=allow_identifiers ) if not self._match(TokenType.R_BRACKET): self._retreat(index) return None return exp.DataType( this=exp.DType.MAP, expressions=[key_type, value_type], nested=True, ) nested = type_token in self.NESTED_TYPE_TOKENS is_struct = type_token in self.STRUCT_TYPE_TOKENS is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS expressions = None maybe_func = False if self._match(TokenType.L_PAREN): if is_struct: expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) elif nested: expressions = self._parse_csv( lambda: self._parse_types( check_func=check_func, schema=schema, allow_identifiers=allow_identifiers ) ) if type_token == TokenType.NULLABLE and len(expressions) == 1: this = expressions[0] this.set("nullable", True) self._match_r_paren() return this elif type_token in self.ENUM_TYPE_TOKENS: expressions = self._parse_csv(self._parse_equality) elif type_token == TokenType.JSON: # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value) # https://clickhouse.com/docs/sql-reference/data-types/newjson expressions = self._parse_csv(self._parse_json_type_arg) elif is_aggregate: func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( any_token=False, tokens=(TokenType.VAR, TokenType.ANY) ) if not func_or_ident: return None expressions = [func_or_ident] if self._match(TokenType.COMMA): expressions.extend( self._parse_csv( lambda: self._parse_types( check_func=check_func, schema=schema, allow_identifiers=allow_identifiers, ) ) ) else: expressions = self._parse_csv(self._parse_type_size) # https://docs.snowflake.com/en/sql-reference/data-types-vector if type_token == TokenType.VECTOR and len(expressions) == 2: expressions = self._parse_vector_expressions(expressions) if not self._match(TokenType.R_PAREN): self._retreat(index) return None maybe_func = True values: t.Optional[t.List[exp.Expr]] = None if nested and self._match(TokenType.LT): if is_struct: expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) else: expressions = self._parse_csv( lambda: self._parse_types( check_func=check_func, schema=schema, allow_identifiers=allow_identifiers ) ) if not self._match(TokenType.GT): self.raise_error("Expecting >") if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): values = self._parse_csv(self._parse_disjunction) if not values and is_struct: values = None self._retreat(self._index - 1) else: self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) if type_token in self.TIMESTAMPS: if self._match_text_seq("WITH", "TIME", "ZONE"): maybe_func = False tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ this = exp.DataType(this=tz_type, expressions=expressions) elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): maybe_func = False this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions) elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): maybe_func = False elif type_token == TokenType.INTERVAL: if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: unit = self._parse_var(upper=True) if self._match_text_seq("TO"): unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit)))) else: this = self.expression(exp.DataType(this=exp.DType.INTERVAL)) elif type_token == TokenType.VOID: this = exp.DataType(this=exp.DType.NULL) if maybe_func and check_func: index2 = self._index peek = self._parse_string() if not peek: self._retreat(index) return None self._retreat(index2) if not this: assert type_token is not None if self._match_text_seq("UNSIGNED"): unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) if not unsigned_type_token: self.raise_error(f"Cannot convert {type_token.name} to unsigned.") type_token = unsigned_type_token or type_token # NULLABLE without parentheses can be a column (Presto/Trino) if type_token == TokenType.NULLABLE and not expressions: self._retreat(index) return None this = exp.DataType( this=exp.DType[type_token.name], expressions=expressions, nested=nested, ) # Empty arrays/structs are allowed if values is not None: cls = exp.Struct if is_struct else exp.Array this = exp.cast(cls(expressions=values), this, copy=False) elif expressions: this.set("expressions", expressions) # https://materialize.com/docs/sql/types/list/#type-name while self._match(TokenType.LIST): this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True) index = self._index # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] matched_array = self._match(TokenType.ARRAY) while self._curr: datatype_token = self._prev.token_type matched_l_bracket = self._match(TokenType.L_BRACKET) if (not matched_l_bracket and not matched_array) or ( datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) ): # Postgres allows casting empty arrays such as ARRAY[]::INT[], # not to be confused with the fixed size array parsing break matched_array = False values = self._parse_csv(self._parse_disjunction) or None if ( values and not schema and ( not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY or not self._match(TokenType.R_BRACKET, advance=False) ) ): # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type self._retreat(index) break this = exp.DataType( this=exp.DType.ARRAY, expressions=[this], values=values, nested=True ) self._match(TokenType.R_BRACKET) if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType): converter = self.TYPE_CONVERTERS.get(this.this) if converter: this = converter(t.cast(exp.DataType, this)) return this def _parse_json_type_arg(self) -> t.Optional[exp.Expr]: """Parse a single argument to ClickHouse's JSON type.""" # SKIP col or SKIP REGEXP 'pattern' if self._match_text_seq("SKIP"): regexp = self._match(TokenType.RLIKE) arg = self._parse_column() if isinstance(arg, exp.Column): arg = arg.to_dot() return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg)) param_or_col = self._parse_column() if not isinstance(param_or_col, exp.Column): return None # Parameter: name=value (e.g., max_dynamic_paths=2) if len(param_or_col.parts) == 1 and self._match(TokenType.EQ): param = param_or_col.name value = self._parse_primary() return self.expression(exp.EQ(this=exp.var(param), expression=value)) # Column type hint: col_name Type col = param_or_col.to_dot() kind = self._parse_types(check_func=False, allow_identifiers=False) return self.expression(exp.ColumnDef(this=col, kind=kind)) def _parse_vector_expressions(self, expressions: t.List[exp.Expr]) -> t.List[exp.Expr]: return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expr]: index = self._index if ( self._curr and self._next and self._curr.token_type in self.TYPE_TOKENS and self._next.token_type in self.TYPE_TOKENS ): # Takes care of special cases like `STRUCT>` where the identifier is also a # type token. Without this, the list will be parsed as a type and we'll eventually crash this = self._parse_id_var() else: this = ( self._parse_type(parse_interval=False, fallback_to_identifier=True) or self._parse_id_var() ) self._match(TokenType.COLON) if ( type_required and not isinstance(this, exp.DataType) and not self._match_set(self.TYPE_TOKENS, advance=False) ): self._retreat(index) return self._parse_types() return self._parse_column_def(this) def _parse_at_time_zone(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: if not self._match_text_seq("AT", "TIME", "ZONE"): return this return self._parse_at_time_zone( self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary())) ) def _parse_atom(self) -> t.Optional[exp.Expr]: if ( self._curr.token_type in self.IDENTIFIER_TOKENS and (column := self._parse_column()) is not None ): return column token = self._curr token_type = token.token_type if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)): return None next_type = self._next.token_type if ( next_type in self.COLUMN_OPERATORS or next_type in self.COLUMN_POSTFIX_TOKENS or (token_type == TokenType.STRING and next_type == TokenType.STRING) ): return None self._advance() return primary_parser(self, token) def _parse_column(self) -> t.Optional[exp.Expr]: column: t.Optional[exp.Expr] = self._parse_column_parts_fast() if column is None: this = self._parse_column_reference() if not this: this = self._parse_bracket(this) column = self._parse_column_ops(this) if this else this if column: if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: column.set("join_mark", self._match(TokenType.JOIN_MARKER)) if self.COLON_IS_VARIANT_EXTRACT: column = self._parse_colon_as_variant_extract(column) return column def _parse_column_parts_fast(self) -> t.Optional[exp.Column | exp.Dot]: """Fast path for simple column and dot references (a, a.b, ...). Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks that nothing complex follows. If it does, retreats and returns None so the slow path can handle it. For >4 parts, wraps in exp.Dot nodes. """ index = self._index parts: t.Optional[t.List[exp.Identifier]] = None all_comments: t.Optional[t.List[str]] = None while self._match_set(self.IDENTIFIER_TOKENS): token = self._prev comments = self._prev_comments if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS: self._retreat(index) return None has_dot = self._match(TokenType.DOT) curr_tt = self._curr.token_type if not has_dot: if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS: self._retreat(index) return None elif curr_tt not in self.IDENTIFIER_TOKENS: self._retreat(index) return None if parts is None: parts = [] if comments: if all_comments is None: all_comments = [] all_comments.extend(comments) self._prev_comments = [] parts.append( self.expression( exp.Identifier( this=token.text, quoted=token.token_type == TokenType.IDENTIFIER ), token, ) ) if not has_dot: break if parts is None: return None n = len(parts) if n == 1: column: exp.Column | exp.Dot = exp.Column(this=parts[0]) elif n == 2: column = exp.Column(this=parts[1], table=parts[0]) elif n == 3: column = exp.Column(this=parts[2], table=parts[1], db=parts[0]) else: column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0]) for i in range(4, n): column = exp.Dot(this=column, expression=parts[i]) if all_comments: column.add_comments(all_comments) return column def _parse_column_reference(self) -> t.Optional[exp.Expr]: this = self._parse_field() if ( not this and self._match(TokenType.VALUES, advance=False) and self.VALUES_FOLLOWED_BY_PAREN and (not self._next or self._next.token_type != TokenType.L_PAREN) ): this = self._parse_id_var() if isinstance(this, exp.Identifier): # We bubble up comments from the Identifier to the Column this = self.expression(exp.Column(this=this), comments=this.pop_comments()) return this def _build_json_extract( self, this: t.Optional[exp.Expr], json_path: t.List[str], escape: t.Optional[bool], ) -> exp.JSONExtract: json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) if json_path_expr: json_path_expr.set("escape", escape) return self.expression( exp.JSONExtract( this=this, expression=json_path_expr, variant_extract=True, requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, ) ) def _parse_colon_as_variant_extract(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: casts = [] json_path = [] escape = None while self._match(TokenType.COLON): start_index = self._index # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True path = self._parse_column_ops( self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) ) # The cast :: operator has a lower precedence than the extraction operator :, so # we rearrange the AST appropriately to avoid casting the JSON path while isinstance(path, exp.Cast): casts.append(path.to) path = path.this if casts: dcolon_offset = next( i for i, t in enumerate(self._tokens[start_index:]) if t.token_type == TokenType.DCOLON ) end_token = self._tokens[start_index + dcolon_offset - 1] else: end_token = self._prev if path: # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as # it'll roundtrip to a string literal in GET_PATH if isinstance(path, exp.Identifier) and path.quoted: escape = True # Dynamic brackets (e.g. value:a[s.x].b.c or value:a[s.x].r.d[s.y]) # can't be in the JSON path string since the index is a column reference. # We traverse Dot/Bracket layers from outside in, collecting segments, then # process them inside out. segments: t.List[t.Tuple[exp.Bracket, t.List[str]]] = [] node = path while True: suffixes = [] while isinstance(node, exp.Dot): suffixes.append(node.expression.sql(dialect=self.dialect)) node = node.this if isinstance(node, exp.Bracket) and any( e.find(exp.Column) for e in node.expressions ): suffixes.reverse() segments.append((node, suffixes)) node = node.this else: break if segments: json_path.append(segments[-1][0].this.sql(dialect=self.dialect)) for bracket, suffixes in reversed(segments): this = self._build_json_extract(this, json_path, escape) this = exp.Bracket(this=this, expressions=bracket.expressions) json_path = suffixes if json_path: this = self._build_json_extract(this, json_path, None) json_path = [] continue json_path.append(self._find_sql(self._tokens[start_index], end_token)) # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while # Databricks transforms it back to the colon/dot notation if json_path: this = self._build_json_extract(this, json_path, escape) while casts: this = self.expression(exp.Cast(this=this, to=casts.pop())) return this def _parse_dcolon(self) -> t.Optional[exp.Expr]: return self._parse_types() def _parse_column_ops(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: while self._curr.token_type in self.BRACKETS: this = self._parse_bracket(this) column_operators = self.COLUMN_OPERATORS cast_column_operators = self.CAST_COLUMN_OPERATORS while self._curr: op_token = self._curr.token_type if op_token not in column_operators: break op = column_operators[op_token] self._advance() if op_token in cast_column_operators: field = self._parse_dcolon() if not field: self.raise_error("Expected type") elif op and self._curr: field = self._parse_column_reference() or self._parse_bitwise() if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): field = self._parse_column_ops(field) else: field = self._parse_field(any_token=True, anonymous_func=True) # Function calls can be qualified, e.g., x.y.FOO() # This converts the final AST to a series of Dots leading to the function call # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules if isinstance(field, (exp.Func, exp.Window)) and this: this = this.transform( lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n ) if op: this = op(self, this, field) elif isinstance(this, exp.Column) and not this.args.get("catalog"): this = self.expression( exp.Column( this=field, table=this.this, db=this.args.get("table"), catalog=this.args.get("db"), ), comments=this.comments, ) elif isinstance(field, exp.Window): # Move the exp.Dot's to the window's function window_func = self.expression(exp.Dot(this=this, expression=field.this)) field.set("this", window_func) this = field else: this = self.expression(exp.Dot(this=this, expression=field)) if field and field.comments: t.cast(exp.Expr, this).add_comments(field.pop_comments()) this = self._parse_bracket(this) return this def _parse_paren(self) -> t.Optional[exp.Expr]: if not self._match(TokenType.L_PAREN): return None comments = self._prev_comments query = self._parse_select() if query: expressions = [query] else: expressions = self._parse_expressions() this = seq_get(expressions, 0) if not this and self._match(TokenType.R_PAREN, advance=False): this = self.expression(exp.Tuple()) elif isinstance(this, exp.UNWRAPPED_QUERIES): this = self._parse_subquery(this=this, parse_alias=False) elif isinstance(this, (exp.Subquery, exp.Values)): this = self._parse_subquery( this=self._parse_query_modifiers(self._parse_set_operations(this)), parse_alias=False, ) elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: this = self.expression(exp.Tuple(expressions=expressions)) else: this = self.expression(exp.Paren(this=this)) if this: this.add_comments(comments) self._match_r_paren(expression=this) if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): return self._parse_window(this) return this def _parse_primary(self) -> t.Optional[exp.Expr]: if self._match_set(self.PRIMARY_PARSERS): token_type = self._prev.token_type primary = self.PRIMARY_PARSERS[token_type](self, self._prev) if token_type == TokenType.STRING: expressions = [primary] while self._match(TokenType.STRING): expressions.append(exp.Literal.string(self._prev.text)) if len(expressions) > 1: return self.expression( exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE) ) return primary if self._match_pair(TokenType.DOT, TokenType.NUMBER): return exp.Literal.number(f"0.{self._prev.text}") return self._parse_paren() def _parse_field( self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None, anonymous_func: bool = False, ) -> t.Optional[exp.Expr]: if anonymous_func: field = ( self._parse_function(anonymous=anonymous_func, any_token=any_token) or self._parse_primary() ) else: field = self._parse_primary() or self._parse_function( anonymous=anonymous_func, any_token=any_token ) return field or self._parse_id_var(any_token=any_token, tokens=tokens) def _parse_function( self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False, optional_parens: bool = True, any_token: bool = False, ) -> t.Optional[exp.Expr]: # This allows us to also parse {fn } syntax (Snowflake, MySQL support this) # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences fn_syntax = False if ( self._match(TokenType.L_BRACE, advance=False) and self._next and self._next.text.upper() == "FN" ): self._advance(2) fn_syntax = True func = self._parse_function_call( functions=functions, anonymous=anonymous, optional_parens=optional_parens, any_token=any_token, ) if fn_syntax: self._match(TokenType.R_BRACE) return func def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expr]: return self._parse_csv(lambda: self._parse_lambda(alias=alias)) def _parse_function_call( self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False, optional_parens: bool = True, any_token: bool = False, ) -> t.Optional[exp.Expr]: if not self._curr: return None comments = self._curr.comments prev = self._prev token = self._curr token_type = self._curr.token_type this: str | exp.Expr = self._curr.text upper = self._curr.text.upper() parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: self._advance() return self._parse_window(parser(self)) if self._next.token_type != TokenType.L_PAREN: if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: self._advance() return self.expression(self.NO_PAREN_FUNCTIONS[token_type]()) return None if any_token: if token_type in self.RESERVED_TOKENS: return None elif token_type not in self.FUNC_TOKENS: return None self._advance(2) parser = self.FUNCTION_PARSERS.get(upper) if parser and not anonymous: result = parser(self) else: subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) if subquery_predicate: expr = None if self._curr.token_type in self.SUBQUERY_TOKENS: expr = self._parse_select() self._match_r_paren() elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren self._advance(-1) expr = self._parse_bitwise() if expr: return self.expression(subquery_predicate(this=expr), comments=comments) if functions is None: functions = self.FUNCTIONS function = functions.get(upper) known_function = function and not anonymous alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS args = self._parse_function_args(alias) post_func_comments = self._curr.comments if self._curr else None if known_function and post_func_comments: # If the user-inputted comment "/* sqlglot.anonymous */" is following the function # call we'll construct it as exp.Anonymous, even if it's "known" if any( comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) for comment in post_func_comments ): known_function = False if alias and known_function: args = self._kv_to_prop_eq(args) if known_function: func_builder = t.cast(t.Callable, function) # mypyc compiled functions don't have __code__, so we use # try/except to check if func_builder accepts 'dialect'. try: func = func_builder(args) except TypeError: func = func_builder(args, dialect=self.dialect) func = self.validate_expression(func, args) if self.dialect.PRESERVE_ORIGINAL_NAMES: func.meta["name"] = this result = func else: if token_type == TokenType.IDENTIFIER: this = exp.Identifier(this=this, quoted=True).update_positions(token) result = self.expression(exp.Anonymous(this=this, expressions=args)) result = result.update_positions(token) if isinstance(result, exp.Expr): result.add_comments(comments) if parser: self._match(TokenType.R_PAREN, expression=result) else: self._match_r_paren(result) return self._parse_window(result) def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: return expression def _kv_to_prop_eq( self, expressions: t.List[exp.Expr], parse_map: bool = False ) -> t.List[exp.Expr]: transformed = [] for index, e in enumerate(expressions): if isinstance(e, self.KEY_VALUE_DEFINITIONS): if isinstance(e, exp.Alias): e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this)) if not isinstance(e, exp.PropertyEQ): e = self.expression( exp.PropertyEQ( this=e.this if parse_map else exp.to_identifier(e.this.name), expression=e.expression, ) ) if isinstance(e.this, exp.Column): e.this.replace(e.this.this) else: e = self._to_prop_eq(e, index) transformed.append(e) return transformed def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expr]: return self._parse_statement() def _parse_function_parameter(self) -> t.Optional[exp.Expr]: return self._parse_column_def(this=self._parse_id_var(), computed_column=False) def _parse_user_defined_function( self, kind: t.Optional[TokenType] = None ) -> t.Optional[exp.Expr]: this = self._parse_table_parts(schema=True) if not self._match(TokenType.L_PAREN): return this expressions = self._parse_csv(self._parse_function_parameter) self._match_r_paren() return self.expression( exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True) ) def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: literal = self._parse_primary() if literal: return self.expression(exp.Introducer(this=token.text, expression=literal), token) return self._identifier_expression(token) def _parse_session_parameter(self) -> exp.SessionParameter: kind = None this = self._parse_id_var() or self._parse_primary() if this and self._match(TokenType.DOT): kind = this.name this = self._parse_var() or self._parse_primary() return self.expression(exp.SessionParameter(this=this, kind=kind)) def _parse_lambda_arg(self) -> t.Optional[exp.Expr]: return self._parse_id_var() def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expr]: next_token_type = self._next.token_type # Fast path: simple atom (column, literal, null, bool) followed by , or ) if ( next_token_type in self.LAMBDA_ARG_TERMINATORS and (atom := self._parse_atom()) is not None ): return atom index = self._index if self._match(TokenType.L_PAREN): expressions = t.cast( t.List[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg) ) if not self._match(TokenType.R_PAREN): self._retreat(index) elif self._match_set(self.LAMBDAS): return self.LAMBDAS[self._prev.token_type](self, expressions) else: self._retreat(index) elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS: expressions = [self._parse_lambda_arg()] if self._match_set(self.LAMBDAS): return self.LAMBDAS[self._prev.token_type](self, expressions) self._retreat(index) this: t.Optional[exp.Expr] if self._match(TokenType.DISTINCT): this = self.expression( exp.Distinct(expressions=self._parse_csv(self._parse_disjunction)) ) else: this = self._parse_select_or_expression(alias=alias) return self._parse_limit( self._parse_respect_or_ignore_nulls( self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) ) ) def _parse_schema(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: index = self._index if not self._match(TokenType.L_PAREN): return this # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (), # expr can be of both types if self._match_set(self.SELECT_START_TOKENS): self._retreat(index) return this args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) self._match_r_paren() return self.expression(exp.Schema(this=this, expressions=args)) def _parse_field_def(self) -> t.Optional[exp.Expr]: return self._parse_column_def(self._parse_field(any_token=True)) def _parse_column_def( self, this: t.Optional[exp.Expr], computed_column: bool = True ) -> t.Optional[exp.Expr]: # column defs are not really columns, they're identifiers if isinstance(this, exp.Column): this = this.this if not computed_column: self._match(TokenType.ALIAS) kind = self._parse_types(schema=True) if self._match_text_seq("FOR", "ORDINALITY"): return self.expression(exp.ColumnDef(this=this, ordinality=True)) constraints: t.List[exp.Expr] = [] if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( ("ALIAS", "MATERIALIZED") ): persisted = self._prev.text.upper() == "MATERIALIZED" constraint_kind = exp.ComputedColumnConstraint( this=self._parse_disjunction(), persisted=persisted or self._match_text_seq("PERSISTED"), data_type=exp.Var(this="AUTO") if self._match_text_seq("AUTO") else self._parse_types(), not_null=self._match_pair(TokenType.NOT, TokenType.NULL), ) constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind))) elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): in_out_constraint = self.expression( exp.InOutColumnConstraint( input_=self._match(TokenType.IN), output=self._match(TokenType.OUT) ) ) constraints.append(in_out_constraint) kind = self._parse_types() elif ( kind and self._match(TokenType.ALIAS, advance=False) and ( not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT or self._next.token_type == TokenType.L_PAREN ) ): self._advance() constraints.append( self.expression( exp.ColumnConstraint( kind=exp.ComputedColumnConstraint( this=self._parse_disjunction(), persisted=self._match_texts(("STORED", "VIRTUAL")) and self._prev.text.upper() == "STORED", ) ) ) ) while True: constraint = self._parse_column_constraint() if not constraint: break constraints.append(constraint) if not kind and not constraints: return this return self.expression(exp.ColumnDef(this=this, kind=kind, constraints=constraints)) def _parse_auto_increment( self, ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: start = None increment = None order = None if self._match(TokenType.L_PAREN, advance=False): args = self._parse_wrapped_csv(self._parse_bitwise) start = seq_get(args, 0) increment = seq_get(args, 1) elif self._match_text_seq("START"): start = self._parse_bitwise() self._match_text_seq("INCREMENT") increment = self._parse_bitwise() if self._match_text_seq("ORDER"): order = True elif self._match_text_seq("NOORDER"): order = False if start and increment: return exp.GeneratedAsIdentityColumnConstraint( start=start, increment=increment, this=False, order=order ) return exp.AutoIncrementColumnConstraint() def _parse_check_constraint(self) -> t.Optional[exp.CheckColumnConstraint]: if not self._match(TokenType.L_PAREN, advance=False): return None return self.expression( exp.CheckColumnConstraint( this=self._parse_wrapped(self._parse_assignment), enforced=self._match_text_seq("ENFORCED"), ) ) def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: if not self._match_text_seq("REFRESH"): self._retreat(self._index - 1) return None return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True))) def _parse_compress(self) -> exp.CompressColumnConstraint: if self._match(TokenType.L_PAREN, advance=False): return self.expression( exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise)) ) return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise())) def _parse_generated_as_identity( self, ) -> ( exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint | exp.GeneratedAsRowColumnConstraint ): if self._match_text_seq("BY", "DEFAULT"): on_null = self._match_pair(TokenType.ON, TokenType.NULL) this = self.expression( exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null) ) else: self._match_text_seq("ALWAYS") this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True)) self._match(TokenType.ALIAS) if self._match_text_seq("ROW"): start = self._match_text_seq("START") if not start: self._match(TokenType.END) hidden = self._match_text_seq("HIDDEN") return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden)) identity = self._match_text_seq("IDENTITY") if self._match(TokenType.L_PAREN): if self._match(TokenType.START_WITH): this.set("start", self._parse_bitwise()) if self._match_text_seq("INCREMENT", "BY"): this.set("increment", self._parse_bitwise()) if self._match_text_seq("MINVALUE"): this.set("minvalue", self._parse_bitwise()) if self._match_text_seq("MAXVALUE"): this.set("maxvalue", self._parse_bitwise()) if self._match_text_seq("CYCLE"): this.set("cycle", True) elif self._match_text_seq("NO", "CYCLE"): this.set("cycle", False) if not identity: this.set("expression", self._parse_range()) elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): args = self._parse_csv(self._parse_bitwise) this.set("start", seq_get(args, 0)) this.set("increment", seq_get(args, 1)) self._match_r_paren() return this def _parse_inline(self) -> exp.InlineLengthColumnConstraint: self._match_text_seq("LENGTH") return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise())) def _parse_not_constraint(self) -> t.Optional[exp.Expr]: if self._match_text_seq("NULL"): return self.expression(exp.NotNullColumnConstraint()) if self._match_text_seq("CASESPECIFIC"): return self.expression(exp.CaseSpecificColumnConstraint(not_=True)) if self._match_text_seq("FOR", "REPLICATION"): return self.expression(exp.NotForReplicationColumnConstraint()) # Unconsume the `NOT` token self._retreat(self._index - 1) return None def _parse_column_constraint(self) -> t.Optional[exp.Expr]: this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None procedure_option_follows = ( self._match(TokenType.WITH, advance=False) and self._next and self._next.text.upper() in self.PROCEDURE_OPTIONS ) if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) if not constraint: self._retreat(self._index - 1) return None return self.expression(exp.ColumnConstraint(this=this, kind=constraint)) return this def _parse_constraint(self) -> t.Optional[exp.Expr]: if not self._match(TokenType.CONSTRAINT): return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) return self.expression( exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints()) ) def _parse_unnamed_constraints(self) -> t.List[exp.Expr]: constraints = [] while True: constraint = self._parse_unnamed_constraint() or self._parse_function() if not constraint: break constraints.append(constraint) return constraints def _parse_unnamed_constraint( self, constraints: t.Optional[t.Collection[str]] = None ) -> t.Optional[exp.Expr]: index = self._index if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( constraints or self.CONSTRAINT_PARSERS ): return None constraint_key = self._prev.text.upper() if constraint_key not in self.CONSTRAINT_PARSERS: self.raise_error(f"No parser found for schema constraint {constraint_key}.") result = self.CONSTRAINT_PARSERS[constraint_key](self) if not result: self._retreat(index) return result def _parse_unique_key(self) -> t.Optional[exp.Expr]: if self._curr and self._curr.text.upper() in self.CONSTRAINT_PARSERS: return None return self._parse_id_var(any_token=False) def _parse_unique(self) -> exp.UniqueColumnConstraint: self._match_texts(("KEY", "INDEX")) return self.expression( exp.UniqueColumnConstraint( nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), this=self._parse_schema(self._parse_unique_key()), index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, on_conflict=self._parse_on_conflict(), options=self._parse_key_constraint_options(), ) ) def _parse_key_constraint_options(self) -> t.List[str]: options = [] while True: if not self._curr: break if self._match(TokenType.ON): action = None on = self._advance_any() and self._prev.text if self._match_text_seq("NO", "ACTION"): action = "NO ACTION" elif self._match_text_seq("CASCADE"): action = "CASCADE" elif self._match_text_seq("RESTRICT"): action = "RESTRICT" elif self._match_pair(TokenType.SET, TokenType.NULL): action = "SET NULL" elif self._match_pair(TokenType.SET, TokenType.DEFAULT): action = "SET DEFAULT" else: self.raise_error("Invalid key constraint") options.append(f"ON {on} {action}") else: var = self._parse_var_from_options( self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False ) if not var: break options.append(var.name) return options def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: if match and not self._match(TokenType.REFERENCES): return None expressions: t.Optional[t.List] = None this = self._parse_table(schema=True) options = self._parse_key_constraint_options() return self.expression(exp.Reference(this=this, expressions=expressions, options=options)) def _parse_foreign_key(self) -> exp.ForeignKey: expressions = ( self._parse_wrapped_id_vars() if not self._match(TokenType.REFERENCES, advance=False) else None ) reference = self._parse_references() on_options = {} while self._match(TokenType.ON): if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): self.raise_error("Expected DELETE or UPDATE") kind = self._prev.text.lower() if self._match_text_seq("NO", "ACTION"): action = "NO ACTION" elif self._match(TokenType.SET): self._match_set((TokenType.NULL, TokenType.DEFAULT)) action = "SET " + self._prev.text.upper() else: self._advance() action = self._prev.text.upper() on_options[kind] = action return self.expression( exp.ForeignKey( expressions=expressions, reference=reference, options=self._parse_key_constraint_options(), **on_options, ) ) def _parse_primary_key_part(self) -> t.Optional[exp.Expr]: return self._parse_field() def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: if not self._match(TokenType.TIMESTAMP_SNAPSHOT): self._retreat(self._index - 1) return None id_vars = self._parse_wrapped_id_vars() return self.expression( exp.PeriodForSystemTimeConstraint( this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1) ) ) def _parse_primary_key( self, wrapped_optional: bool = False, in_props: bool = False, named_primary_key: bool = False, ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: desc = ( self._prev.token_type == TokenType.DESC if self._match_set((TokenType.ASC, TokenType.DESC)) else None ) this = None if ( named_primary_key and self._curr.text.upper() not in self.CONSTRAINT_PARSERS and self._next and self._next.token_type == TokenType.L_PAREN ): this = self._parse_id_var() if not in_props and not self._match(TokenType.L_PAREN, advance=False): return self.expression( exp.PrimaryKeyColumnConstraint( desc=desc, options=self._parse_key_constraint_options() ) ) expressions = self._parse_wrapped_csv( self._parse_primary_key_part, optional=wrapped_optional ) return self.expression( exp.PrimaryKey( this=this, expressions=expressions, include=self._parse_index_params(), options=self._parse_key_constraint_options(), ) ) def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expr]: return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) def _parse_odbc_datetime_literal(self) -> exp.Expr: """ Parses a datetime column in ODBC format. We parse the column into the corresponding types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the same as we did for `DATE('yyyy-mm-dd')`. Reference: https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals """ self._match(TokenType.VAR) exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] expression = self.expression(exp_class(this=self._parse_string())) if not self._match(TokenType.R_BRACE): self.raise_error("Expected }") return expression def _parse_bracket(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: if not self._match_set(self.BRACKETS): return this if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: map_token = seq_get(self._tokens, self._index - 2) parse_map = map_token is not None and map_token.text.upper() == "MAP" else: parse_map = False bracket_kind = self._prev.token_type if ( bracket_kind == TokenType.L_BRACE and self._curr and self._curr.token_type == TokenType.VAR and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS ): return self._parse_odbc_datetime_literal() expressions = self._parse_csv( lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) ) if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): self.raise_error("Expected ]") elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): self.raise_error("Expected }") # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs if bracket_kind == TokenType.L_BRACE: this = self.expression( exp.Struct( expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map) ) ) elif not this: this = build_array_constructor( exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect ) else: constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) if constructor_type: return build_array_constructor( constructor_type, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect, ) expressions = apply_index_offset( this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect ) this = self.expression( exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments() ) self._add_comments(this) return self._parse_bracket(this) def _parse_slice(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: if not self._match(TokenType.COLON): return this if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): self._advance() end: t.Optional[exp.Expr] = -exp.Literal.number("1") else: end = self._parse_assignment() step = self._parse_unary() if self._match(TokenType.COLON) else None return self.expression(exp.Slice(this=this, expression=end, step=step)) def _parse_case(self) -> t.Optional[exp.Expr]: if self._match(TokenType.DOT, advance=False): # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake self._retreat(self._index - 1) return None ifs = [] default = None comments = self._prev_comments expression = self._parse_disjunction() while self._match(TokenType.WHEN): this = self._parse_disjunction() self._match(TokenType.THEN) then = self._parse_disjunction() ifs.append(self.expression(exp.If(this=this, true=then))) if self._match(TokenType.ELSE): default = self._parse_disjunction() if not self._match(TokenType.END): if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": default = exp.column("interval") else: self.raise_error("Expected END after CASE", self._prev) return self.expression( exp.Case(this=expression, ifs=ifs, default=default), comments=comments ) def _parse_if(self) -> t.Optional[exp.Expr]: if self._match(TokenType.L_PAREN): args = self._parse_csv( lambda: self._parse_alias(self._parse_assignment(), explicit=True) ) this = self.validate_expression(exp.If.from_arg_list(args), args) self._match_r_paren() else: index = self._index - 1 if self.NO_PAREN_IF_COMMANDS and index == 0: return self._parse_as_command(self._prev) condition = self._parse_disjunction() if not condition: self._retreat(index) return None self._match(TokenType.THEN) true = self._parse_disjunction() false = self._parse_disjunction() if self._match(TokenType.ELSE) else None self._match(TokenType.END) this = self.expression(exp.If(this=condition, true=true, false=false)) return this def _parse_next_value_for(self) -> t.Optional[exp.Expr]: if not self._match_text_seq("VALUE", "FOR"): self._retreat(self._index - 1) return None return self.expression( exp.NextValueFor( this=self._parse_column(), order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), ) ) def _parse_extract(self) -> exp.Extract | exp.Anonymous: this = self._parse_function() or self._parse_var_or_string(upper=True) if self._match(TokenType.FROM): return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) if not self._match(TokenType.COMMA): self.raise_error("Expected FROM or comma after EXTRACT", self._prev) return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) def _parse_gap_fill(self) -> exp.GapFill: self._match(TokenType.TABLE) this = self._parse_table() self._match(TokenType.COMMA) args = [this, *self._parse_csv(self._parse_lambda)] gap_fill = exp.GapFill.from_arg_list(args) return self.validate_expression(gap_fill, args) def _parse_char(self) -> exp.Chr: return self.expression( exp.Chr( expressions=self._parse_csv(self._parse_assignment), charset=self._match(TokenType.USING) and self._parse_var(), ) ) def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expr: this = self._parse_assignment() if not self._match(TokenType.ALIAS): if self._match(TokenType.COMMA): return self.expression(exp.CastToStrType(this=this, to=self._parse_string())) self.raise_error("Expected AS after CAST") fmt = None to = self._parse_types() default = None if self._match(TokenType.DEFAULT): default = self._parse_bitwise() self._match_text_seq("ON", "CONVERSION", "ERROR") if self._match_set((TokenType.FORMAT, TokenType.COMMA)): fmt_string = self._parse_string() fmt = self._parse_at_time_zone(fmt_string) if not to: to = exp.DataType.build(exp.DType.UNKNOWN) if to.this in exp.DataType.TEMPORAL_TYPES: this = self.expression( (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)( this=this, format=exp.Literal.string( format_time( fmt_string.this if fmt_string else "", self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, ) ), safe=safe, ) ) if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): this.set("zone", fmt.args["zone"]) return this elif not to: self.raise_error("Expected TYPE after CAST") elif isinstance(to, exp.Identifier): to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET): to = exp.DataType.build(exp.DType.CHARACTER_SET, kind=self._parse_var_or_string()) return self.build_cast( strict=strict, this=this, to=to, format=fmt, safe=safe, action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), default=default, ) def _parse_string_agg(self) -> exp.GroupConcat: if self._match(TokenType.DISTINCT): args: t.List[t.Optional[exp.Expr]] = [ self.expression(exp.Distinct(expressions=[self._parse_disjunction()])) ] if self._match(TokenType.COMMA): args.extend(self._parse_csv(self._parse_disjunction)) else: args = self._parse_csv(self._parse_disjunction) # type: ignore if self._match_text_seq("ON", "OVERFLOW"): # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) if self._match_text_seq("ERROR"): on_overflow: t.Optional[exp.Expr] = exp.var("ERROR") else: self._match_text_seq("TRUNCATE") on_overflow = self.expression( exp.OverflowTruncateBehavior( this=self._parse_string(), with_count=( self._match_text_seq("WITH", "COUNT") or not self._match_text_seq("WITHOUT", "COUNT") ), ) ) else: on_overflow = None index = self._index if not self._match(TokenType.R_PAREN) and args: # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) # The order is parsed through `this` as a canonicalization for WITHIN GROUPs args[0] = self._parse_limit(this=self._parse_order(this=args[0])) return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1))) # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY [ASC | DESC]). # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. if not self._match_text_seq("WITHIN", "GROUP"): self._retreat(index) return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) # The corresponding match_r_paren will be called in parse_function (caller) self._match_l_paren() return self.expression( exp.GroupConcat( this=self._parse_order(this=seq_get(args, 0)), separator=seq_get(args, 1), on_overflow=on_overflow, ) ) def _parse_convert(self, strict: bool, safe: t.Optional[bool] = None) -> t.Optional[exp.Expr]: this = self._parse_bitwise() if self._match(TokenType.USING): to: t.Optional[exp.Expr] = exp.DataType.build( exp.DType.CHARACTER_SET, kind=self._parse_var(tokens={TokenType.BINARY}), ) elif self._match(TokenType.COMMA): to = self._parse_types() else: to = None return self.build_cast(strict=strict, this=this, to=to, safe=safe) def _parse_xml_element(self) -> exp.XMLElement: if self._match_text_seq("EVALNAME"): evalname = True this = self._parse_bitwise() else: evalname = None self._match_text_seq("NAME") this = self._parse_id_var() return self.expression( exp.XMLElement( this=this, expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), evalname=evalname, ) ) def _parse_xml_table(self) -> exp.XMLTable: namespaces = None passing = None columns = None if self._match_text_seq("XMLNAMESPACES", "("): namespaces = self._parse_xml_namespace() self._match_text_seq(")", ",") this = self._parse_string() if self._match_text_seq("PASSING"): # The BY VALUE keywords are optional and are provided for semantic clarity self._match_text_seq("BY", "VALUE") passing = self._parse_csv(self._parse_column) by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") if self._match_text_seq("COLUMNS"): columns = self._parse_csv(self._parse_field_def) return self.expression( exp.XMLTable( this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref ) ) def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: namespaces = [] while True: if self._match(TokenType.DEFAULT): uri = self._parse_string() else: uri = self._parse_alias(self._parse_string()) namespaces.append(self.expression(exp.XMLNamespace(this=uri))) if not self._match(TokenType.COMMA): break return namespaces def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: args = self._parse_csv(self._parse_disjunction) if len(args) < 3: return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1))) return self.expression(exp.DecodeCase(expressions=args)) def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: self._match_text_seq("KEY") key = self._parse_column() self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) self._match_text_seq("VALUE") value = self._parse_bitwise() if not key and not value: return None return self.expression(exp.JSONKeyValue(this=key, expression=value)) def _parse_format_json(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: if not this or not self._match_text_seq("FORMAT", "JSON"): return this return self.expression(exp.FormatJson(this=this)) def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) else: error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) if not empty and not error and not null: return None return self.expression(exp.OnCondition(empty=empty, error=error, null=null)) def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str] | t.Optional[exp.Expr]: # Parses the "X ON Y" or "DEFAULT ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) for value in values: if self._match_text_seq(value, "ON", on): return f"{value} ON {on}" index = self._index if self._match(TokenType.DEFAULT): default_value = self._parse_bitwise() if self._match_text_seq("ON", on): return default_value self._retreat(index) return None @t.overload def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... @t.overload def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... def _parse_json_object(self, agg=False): star = self._parse_star() expressions = ( [star] if star else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) ) null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") unique_keys = None if self._match_text_seq("WITH", "UNIQUE"): unique_keys = True elif self._match_text_seq("WITHOUT", "UNIQUE"): unique_keys = False self._match_text_seq("KEYS") return_type = self._match_text_seq("RETURNING") and self._parse_format_json( self._parse_type() ) encoding = self._match_text_seq("ENCODING") and self._parse_var() return self.expression( (exp.JSONObjectAgg if agg else exp.JSONObject)( expressions=expressions, null_handling=null_handling, unique_keys=unique_keys, return_type=return_type, encoding=encoding, ) ) # Note: this is currently incomplete; it only implements the "JSON_value_column" part def _parse_json_column_def(self) -> exp.JSONColumnDef: if not self._match_text_seq("NESTED"): this = self._parse_id_var() ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) kind = self._parse_types(allow_identifiers=False) nested = None else: this = None ordinality = None kind = None nested = True path = self._match_text_seq("PATH") and self._parse_string() nested_schema = nested and self._parse_json_schema() return self.expression( exp.JSONColumnDef( this=this, kind=kind, path=path, nested_schema=nested_schema, ordinality=ordinality ) ) def _parse_json_schema(self) -> exp.JSONSchema: self._match_text_seq("COLUMNS") return self.expression( exp.JSONSchema( expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True) ) ) def _parse_json_table(self) -> exp.JSONTable: this = self._parse_format_json(self._parse_bitwise()) path = self._match(TokenType.COMMA) and self._parse_string() error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") schema = self._parse_json_schema() return exp.JSONTable( this=this, schema=schema, path=path, error_handling=error_handling, empty_handling=empty_handling, ) def _parse_match_against(self) -> exp.MatchAgainst: if self._match_text_seq("TABLE"): # parse SingleStore MATCH(TABLE ...) syntax # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ expressions = [] table = self._parse_table() if table: expressions = [table] else: expressions = self._parse_csv(self._parse_column) self._match_text_seq(")", "AGAINST", "(") this = self._parse_string() if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): modifier = "IN NATURAL LANGUAGE MODE" if self._match_text_seq("WITH", "QUERY", "EXPANSION"): modifier = f"{modifier} WITH QUERY EXPANSION" elif self._match_text_seq("IN", "BOOLEAN", "MODE"): modifier = "IN BOOLEAN MODE" elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): modifier = "WITH QUERY EXPANSION" else: modifier = None return self.expression( exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier) ) # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 def _parse_open_json(self) -> exp.OpenJSON: this = self._parse_bitwise() path = self._match(TokenType.COMMA) and self._parse_string() def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: this = self._parse_field(any_token=True) kind = self._parse_types() path = self._parse_string() as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) return self.expression( exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json) ) expressions = None if self._match_pair(TokenType.R_PAREN, TokenType.WITH): self._match_l_paren() expressions = self._parse_csv(_parse_open_json_column_def) return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions)) def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: args = self._parse_csv(self._parse_bitwise) if self._match(TokenType.IN): return self.expression( exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0)) ) if haystack_first: haystack = seq_get(args, 0) needle = seq_get(args, 1) else: haystack = seq_get(args, 1) needle = seq_get(args, 0) return self.expression( exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) ) def _parse_join_hint(self, func_name: str) -> exp.JoinHint: args = self._parse_csv(self._parse_table) return exp.JoinHint(this=func_name.upper(), expressions=args) def _parse_substring(self) -> exp.Substring: # Postgres supports the form: substring(string [from int] [for int]) # (despite being undocumented, the reverse order also works) # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 args = t.cast(t.List[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise)) start, length = None, None while self._curr: if self._match(TokenType.FROM): start = self._parse_bitwise() elif self._match(TokenType.FOR): if not start: start = exp.Literal.number(1) length = self._parse_bitwise() else: break if start: args.append(start) if length: args.append(length) return self.validate_expression(exp.Substring.from_arg_list(args), args) def _parse_trim(self) -> exp.Trim: # https://www.w3resource.com/sql/character-functions/trim.php # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html position = None collation = None expression = None if self._match_texts(self.TRIM_TYPES): position = self._prev.text.upper() this = self._parse_bitwise() if self._match_set((TokenType.FROM, TokenType.COMMA)): invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST expression = self._parse_bitwise() if invert_order: this, expression = expression, this if self._match(TokenType.COLLATE): collation = self._parse_bitwise() return self.expression( exp.Trim(this=this, position=position, expression=expression, collation=collation) ) def _parse_window_clause(self) -> t.Optional[t.List[exp.Expr]]: return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None def _parse_named_window(self) -> t.Optional[exp.Expr]: return self._parse_window(self._parse_id_var(), alias=True) def _parse_respect_or_ignore_nulls(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: if self._curr.token_type == TokenType.VAR: if self._match_text_seq("IGNORE", "NULLS"): return self.expression(exp.IgnoreNulls(this=this)) if self._match_text_seq("RESPECT", "NULLS"): return self.expression(exp.RespectNulls(this=this)) return this def _parse_having_max(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: if self._match(TokenType.HAVING): self._match_texts(("MAX", "MIN")) max = self._prev.text.upper() != "MIN" return self.expression( exp.HavingMax(this=this, expression=self._parse_column(), max=max) ) return this def _parse_window( self, this: t.Optional[exp.Expr], alias: bool = False ) -> t.Optional[exp.Expr]: func = this comments = func.comments if isinstance(func, exp.Expr) else None # T-SQL allows the OVER (...) syntax after WITHIN GROUP. # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 if self._match_text_seq("WITHIN", "GROUP"): order = self._parse_wrapped(self._parse_order) this = self.expression(exp.WithinGroup(this=this, expression=order)) if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): self._match(TokenType.WHERE) this = self.expression( exp.Filter(this=this, expression=self._parse_where(skip_where_token=True)) ) self._match_r_paren() # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER # Some dialects choose to implement and some do not. # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html # There is some code above in _parse_lambda that handles # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... # The below changes handle # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... # Oracle allows both formats # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) # and Snowflake chose to do the same for familiarity # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes if isinstance(this, exp.AggFunc): ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) if ignore_respect and ignore_respect is not this: ignore_respect.replace(ignore_respect.this) this = self.expression(ignore_respect.__class__(this=this)) this = self._parse_respect_or_ignore_nulls(this) # bigquery select from window x AS (partition by ...) if alias: over = None self._match(TokenType.ALIAS) elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): return this else: over = self._prev.text.upper() if comments and isinstance(func, exp.Expr): func.pop_comments() if not self._match(TokenType.L_PAREN): return self.expression( exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments ) window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) first: t.Optional[bool] = True if self._match(TokenType.FIRST) else None if self._match_text_seq("LAST"): first = False partition, order = self._parse_partition_and_order() kind = ( self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS") ) and self._prev.text if kind: self._match(TokenType.BETWEEN) start = self._parse_window_spec() end = self._parse_window_spec() if self._match(TokenType.AND) else {} exclude = ( self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) if self._match_text_seq("EXCLUDE") else None ) spec = self.expression( exp.WindowSpec( kind=kind, start=start["value"], start_side=start["side"], end=end.get("value"), end_side=end.get("side"), exclude=exclude, ) ) else: spec = None self._match_r_paren() window = self.expression( exp.Window( this=this, partition_by=partition, order=order, spec=spec, alias=window_alias, over=over, first=first, ), comments=comments, ) # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): return self._parse_window(window, alias=alias) return window def _parse_partition_and_order( self, ) -> t.Tuple[t.List[exp.Expr], t.Optional[exp.Expr]]: return self._parse_partition_by(), self._parse_order() def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expr]]: self._match(TokenType.BETWEEN) return { "value": ( (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") or self._parse_bitwise() ), "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, } def _parse_alias( self, this: t.Optional[exp.Expr], explicit: bool = False ) -> t.Optional[exp.Expr]: # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) # so this section tries to parse the clause version and if it fails, it treats the token # as an identifier (alias) if self._can_parse_limit_or_offset(): return this any_token = self._match(TokenType.ALIAS) comments = self._prev_comments if explicit and not any_token: return this if self._match(TokenType.L_PAREN): aliases = self.expression( exp.Aliases( this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token)) ), comments=comments, ) self._match_r_paren(aliases) return aliases alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( self.STRING_ALIASES and self._parse_string_as_identifier() ) if alias: comments.extend(alias.pop_comments()) this = self.expression(exp.Alias(this=this, alias=alias), comments=comments) column = this.this # Moves the comment next to the alias in `expr /* comment */ AS alias` if not this.comments and column and column.comments: this.comments = column.pop_comments() return this def _parse_id_var( self, any_token: bool = True, tokens: t.Optional[t.Collection[TokenType]] = None, ) -> t.Optional[exp.Expr]: expression = self._parse_identifier() if not expression and ( (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) ): quoted = self._prev.token_type == TokenType.STRING expression = self._identifier_expression(quoted=quoted) return expression def _parse_string(self) -> t.Optional[exp.Expr]: if self._match_set(self.STRING_PARSERS): return self.STRING_PARSERS[self._prev.token_type](self, self._prev) return self._parse_placeholder() def _parse_string_as_identifier(self) -> exp.Identifier | None: if not self._match(TokenType.STRING): return None output = exp.to_identifier(self._prev.text, quoted=True) output.update_positions(self._prev) return output def _parse_number(self) -> t.Optional[exp.Expr]: if self._match_set(self.NUMERIC_PARSERS): return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) return self._parse_placeholder() def _parse_identifier(self) -> t.Optional[exp.Expr]: if self._match(TokenType.IDENTIFIER): return self._identifier_expression(quoted=True) return self._parse_placeholder() def _parse_var( self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None, upper: bool = False, ) -> t.Optional[exp.Expr]: if ( (any_token and self._advance_any()) or self._match(TokenType.VAR) or (self._match_set(tokens) if tokens else False) ): return self.expression( exp.Var(this=self._prev.text.upper() if upper else self._prev.text) ) return self._parse_placeholder() def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): self._advance() return self._prev return None def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expr]: return self._parse_string() or self._parse_var(any_token=True, upper=upper) def _parse_primary_or_var(self) -> t.Optional[exp.Expr]: return self._parse_primary() or self._parse_var(any_token=True) def _parse_null(self) -> t.Optional[exp.Expr]: if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) return self._parse_placeholder() def _parse_boolean(self) -> t.Optional[exp.Expr]: if self._match(TokenType.TRUE): return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) if self._match(TokenType.FALSE): return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) return self._parse_placeholder() def _parse_star(self) -> t.Optional[exp.Expr]: if self._match(TokenType.STAR): return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) return self._parse_placeholder() def _parse_parameter(self) -> exp.Parameter: this = self._parse_identifier() or self._parse_primary_or_var() return self.expression(exp.Parameter(this=this)) def _parse_placeholder(self) -> t.Optional[exp.Expr]: if self._match_set(self.PLACEHOLDER_PARSERS): placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) if placeholder: return placeholder self._advance(-1) return None def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expr]]: if not self._match_texts(keywords): return None if self._match(TokenType.L_PAREN, advance=False): return self._parse_wrapped_csv(self._parse_expression) expression = self._parse_alias(self._parse_disjunction(), explicit=True) return [expression] if expression else None def _parse_csv( self, parse_method: t.Callable[[], t.Optional[T]], sep: TokenType = TokenType.COMMA ) -> t.List[T]: parse_result = parse_method() items = [parse_result] if parse_result is not None else [] while self._match(sep): if isinstance(parse_result, exp.Expr): self._add_comments(parse_result) parse_result = parse_method() if parse_result is not None: items.append(parse_result) return items def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expr]: return self._parse_wrapped_csv(self._parse_id_var, optional=optional) def _parse_wrapped_csv( self, parse_method: t.Callable[[], t.Optional[T]], sep: TokenType = TokenType.COMMA, optional: bool = False, ) -> t.List[T]: return self._parse_wrapped( lambda: self._parse_csv(parse_method, sep=sep), optional=optional ) def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T: wrapped = self._match(TokenType.L_PAREN) if not wrapped and not optional: self.raise_error("Expecting (") parse_result = parse_method() if wrapped: self._match_r_paren() return parse_result def _parse_expressions(self) -> t.List[exp.Expr]: return self._parse_csv(self._parse_expression) def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expr]: return ( self._parse_set_operations( self._parse_alias(self._parse_assignment(), explicit=True) if alias else self._parse_assignment() ) or self._parse_select() ) def _parse_ddl_select(self) -> t.Optional[exp.Expr]: return self._parse_query_modifiers( self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) ) def _parse_transaction(self) -> exp.Transaction | exp.Command: this = None if self._match_texts(self.TRANSACTION_KIND): this = self._prev.text self._match_texts(("TRANSACTION", "WORK")) modes = [] while True: mode = [] while self._match(TokenType.VAR) or self._match(TokenType.NOT): mode.append(self._prev.text) if mode: modes.append(" ".join(mode)) if not self._match(TokenType.COMMA): break return self.expression(exp.Transaction(this=this, modes=modes)) def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: chain = None savepoint = None is_rollback = self._prev.token_type == TokenType.ROLLBACK self._match_texts(("TRANSACTION", "WORK")) if self._match_text_seq("TO"): self._match_text_seq("SAVEPOINT") savepoint = self._parse_id_var() if self._match(TokenType.AND): chain = not self._match_text_seq("NO") self._match_text_seq("CHAIN") if is_rollback: return self.expression(exp.Rollback(savepoint=savepoint)) return self.expression(exp.Commit(chain=chain)) def _parse_refresh(self) -> exp.Refresh | exp.Command: if self._match(TokenType.TABLE): kind = "TABLE" elif self._match_text_seq("MATERIALIZED", "VIEW"): kind = "MATERIALIZED VIEW" else: kind = "" this = self._parse_string() or self._parse_table() if not kind and not isinstance(this, exp.Literal): return self._parse_as_command(self._prev) return self.expression(exp.Refresh(this=this, kind=kind)) def _parse_column_def_with_exists(self): start = self._index self._match(TokenType.COLUMN) exists_column = self._parse_exists(not_=True) expression = self._parse_field_def() if not isinstance(expression, exp.ColumnDef): self._retreat(start) return None expression.set("exists", exists_column) return expression def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: if not self._prev.text.upper() == "ADD": return None expression = self._parse_column_def_with_exists() if not expression: return None # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns if self._match_texts(("FIRST", "AFTER")): position = self._prev.text column_position = self.expression( exp.ColumnPosition(this=self._parse_column(), position=position) ) expression.set("position", column_position) return expression def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: drop = self._parse_drop() if self._match(TokenType.DROP) else None if drop and not isinstance(drop, exp.Command): drop.set("kind", drop.args.get("kind", "COLUMN")) return drop # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: return self.expression( exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists) ) def _parse_alter_table_add(self) -> t.List[exp.Expr]: def _parse_add_alteration() -> t.Optional[exp.Expr]: self._match_text_seq("ADD") if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): return self.expression( exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint)) ) column_def = self._parse_add_column() if isinstance(column_def, exp.ColumnDef): return column_def exists = self._parse_exists(not_=True) if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): return self.expression( exp.AddPartition( exists=exists, this=self._parse_field(any_token=True), location=self._match_text_seq("LOCATION", advance=False) and self._parse_property(), ) ) return None if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq("COLUMNS") ): schema = self._parse_schema() return ( ensure_list(schema) if schema else self._parse_csv(self._parse_column_def_with_exists) ) return self._parse_csv(_parse_add_alteration) def _parse_alter_table_alter(self) -> t.Optional[exp.Expr]: if self._match_texts(self.ALTER_ALTER_PARSERS): return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) # Many dialects support the ALTER [COLUMN] syntax, so if there is no # keyword after ALTER we default to parsing this statement self._match(TokenType.COLUMN) column = self._parse_field(any_token=True) if self._match_pair(TokenType.DROP, TokenType.DEFAULT): return self.expression(exp.AlterColumn(this=column, drop=True)) if self._match_pair(TokenType.SET, TokenType.DEFAULT): return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction())) if self._match(TokenType.COMMENT): return self.expression(exp.AlterColumn(this=column, comment=self._parse_string())) if self._match_text_seq("DROP", "NOT", "NULL"): return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True)) if self._match_text_seq("SET", "NOT", "NULL"): return self.expression(exp.AlterColumn(this=column, allow_null=False)) if self._match_text_seq("SET", "VISIBLE"): return self.expression(exp.AlterColumn(this=column, visible="VISIBLE")) if self._match_text_seq("SET", "INVISIBLE"): return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE")) self._match_text_seq("SET", "DATA") self._match_text_seq("TYPE") return self.expression( exp.AlterColumn( this=column, dtype=self._parse_types(), collate=self._match(TokenType.COLLATE) and self._parse_term(), using=self._match(TokenType.USING) and self._parse_disjunction(), ) ) def _parse_alter_diststyle(self) -> exp.AlterDistStyle: if self._match_texts(("ALL", "EVEN", "AUTO")): return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper()))) self._match_text_seq("KEY", "DISTKEY") return self.expression(exp.AlterDistStyle(this=self._parse_column())) def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: if compound: self._match_text_seq("SORTKEY") if self._match(TokenType.L_PAREN, advance=False): return self.expression( exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound) ) self._match_texts(("AUTO", "NONE")) return self.expression( exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound) ) def _parse_alter_table_drop(self) -> t.List[exp.Expr]: index = self._index - 1 partition_exists = self._parse_exists() if self._match(TokenType.PARTITION, advance=False): return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) self._retreat(index) return self._parse_csv(self._parse_drop_column) def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: exists = self._parse_exists() old_column = self._parse_column() to = self._match_text_seq("TO") new_column = self._parse_column() if old_column is None or not to or new_column is None: return None return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists)) self._match_text_seq("TO") return self.expression(exp.AlterRename(this=self._parse_table(schema=True))) def _parse_alter_table_set(self) -> exp.AlterSet: alter_set = self.expression(exp.AlterSet()) if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( "TABLE", "PROPERTIES" ): alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) elif self._match_text_seq("FILESTREAM_ON", advance=False): alter_set.set("expressions", [self._parse_assignment()]) elif self._match_texts(("LOGGED", "UNLOGGED")): alter_set.set("option", exp.var(self._prev.text.upper())) elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) elif self._match_text_seq("LOCATION"): alter_set.set("location", self._parse_field()) elif self._match_text_seq("ACCESS", "METHOD"): alter_set.set("access_method", self._parse_field()) elif self._match_text_seq("TABLESPACE"): alter_set.set("tablespace", self._parse_field()) elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): alter_set.set("file_format", [self._parse_field()]) elif self._match_text_seq("STAGE_FILE_FORMAT"): alter_set.set("file_format", self._parse_wrapped_options()) elif self._match_text_seq("STAGE_COPY_OPTIONS"): alter_set.set("copy_options", self._parse_wrapped_options()) elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): alter_set.set("tag", self._parse_csv(self._parse_assignment)) else: if self._match_text_seq("SERDE"): alter_set.set("serde", self._parse_field()) properties = self._parse_wrapped(self._parse_properties, optional=True) alter_set.set("expressions", [properties]) return alter_set def _parse_alter_session(self) -> exp.AlterSession: """Parse ALTER SESSION SET/UNSET statements.""" if self._match(TokenType.SET): expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) return self.expression(exp.AlterSession(expressions=expressions, unset=False)) self._match_text_seq("UNSET") expressions = self._parse_csv( lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True))) ) return self.expression(exp.AlterSession(expressions=expressions, unset=True)) def _parse_alter(self) -> exp.Alter | exp.Command: start = self._prev alter_token = self._match_set(self.ALTERABLES) and self._prev if not alter_token: return self._parse_as_command(start) exists = self._parse_exists() only = self._match_text_seq("ONLY") if alter_token.token_type == TokenType.SESSION: this = None check = None cluster = None else: this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) check = self._match_text_seq("WITH", "CHECK") cluster = self._parse_on_property() if self._match(TokenType.ON) else None if self._next: self._advance() parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None if parser: actions = ensure_list(parser(self)) not_valid = self._match_text_seq("NOT", "VALID") options = self._parse_csv(self._parse_property) cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") if not self._curr and actions: return self.expression( exp.Alter( this=this, kind=alter_token.text.upper(), exists=exists, actions=actions, only=only, options=options, cluster=cluster, not_valid=not_valid, check=check, cascade=cascade, ) ) return self._parse_as_command(start) def _parse_analyze(self) -> exp.Analyze | exp.Command: start = self._prev # https://duckdb.org/docs/sql/statements/analyze if not self._curr: return self.expression(exp.Analyze()) options = [] while self._match_texts(self.ANALYZE_STYLES): if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") else: options.append(self._prev.text.upper()) this: t.Optional[exp.Expr] = None inner_expression: t.Optional[exp.Expr] = None kind = self._curr.text.upper() if self._curr else None if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): this = self._parse_table_parts() elif self._match_text_seq("TABLES"): if self._match_set((TokenType.FROM, TokenType.IN)): kind = f"{kind} {self._prev.text.upper()}" this = self._parse_table(schema=True, is_db_reference=True) elif self._match_text_seq("DATABASE"): this = self._parse_table(schema=True, is_db_reference=True) elif self._match_text_seq("CLUSTER"): this = self._parse_table() # Try matching inner expr keywords before fallback to parse table. elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): kind = None inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) else: # Empty kind https://prestodb.io/docs/current/sql/analyze.html kind = None this = self._parse_table_parts() partition = self._try_parse(self._parse_partition) if not partition and self._match_texts(self.PARTITION_KEYWORDS): return self._parse_as_command(start) # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( "WITH", "ASYNC", "MODE" ): mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" else: mode = None if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) properties = self._parse_properties() return self.expression( exp.Analyze( kind=kind, this=this, mode=mode, partition=partition, properties=properties, expression=inner_expression, options=options, ) ) # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: this = None kind = self._prev.text.upper() option = self._prev.text.upper() if self._match_text_seq("DELTA") else None expressions = [] if not self._match_text_seq("STATISTICS"): self.raise_error("Expecting token STATISTICS") if self._match_text_seq("NOSCAN"): this = "NOSCAN" elif self._match(TokenType.FOR): if self._match_text_seq("ALL", "COLUMNS"): this = "FOR ALL COLUMNS" if self._match_texts("COLUMNS"): this = "FOR COLUMNS" expressions = self._parse_csv(self._parse_column_reference) elif self._match_text_seq("SAMPLE"): sample = self._parse_number() expressions = [ self.expression( exp.AnalyzeSample( sample=sample, kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, ) ) ] return self.expression( exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions) ) # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html def _parse_analyze_validate(self) -> exp.AnalyzeValidate: kind = None this = None expression: t.Optional[exp.Expr] = None if self._match_text_seq("REF", "UPDATE"): kind = "REF" this = "UPDATE" if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): this = "UPDATE SET DANGLING TO NULL" elif self._match_text_seq("STRUCTURE"): kind = "STRUCTURE" if self._match_text_seq("CASCADE", "FAST"): this = "CASCADE FAST" elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( ("ONLINE", "OFFLINE") ): this = f"CASCADE COMPLETE {self._prev.text.upper()}" expression = self._parse_into() return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression)) def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: this = self._prev.text.upper() if self._match_text_seq("COLUMNS"): return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}")) return None def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None if self._match_text_seq("STATISTICS"): return self.expression(exp.AnalyzeDelete(kind=kind)) return None def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: if self._match_text_seq("CHAINED", "ROWS"): return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into())) return None # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: this = self._prev.text.upper() expression: t.Optional[exp.Expr] = None expressions = [] update_options = None if self._match_text_seq("HISTOGRAM", "ON"): expressions = self._parse_csv(self._parse_column_reference) with_expressions = [] while self._match(TokenType.WITH): # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ if self._match_texts(("SYNC", "ASYNC")): if self._match_text_seq("MODE", advance=False): with_expressions.append(f"{self._prev.text.upper()} MODE") self._advance() else: buckets = self._parse_number() if self._match_text_seq("BUCKETS"): with_expressions.append(f"{buckets} BUCKETS") if with_expressions: expression = self.expression(exp.AnalyzeWith(expressions=with_expressions)) if self._match_texts(("MANUAL", "AUTO")) and self._match( TokenType.UPDATE, advance=False ): update_options = self._prev.text.upper() self._advance() elif self._match_text_seq("USING", "DATA"): expression = self.expression(exp.UsingData(this=self._parse_string())) return self.expression( exp.AnalyzeHistogram( this=this, expressions=expressions, expression=expression, update_options=update_options, ) ) def _parse_merge(self) -> exp.Merge: self._match(TokenType.INTO) target = self._parse_table() if target and self._match(TokenType.ALIAS, advance=False): target.set("alias", self._parse_table_alias()) self._match(TokenType.USING) using = self._parse_table() return self.expression( exp.Merge( this=target, using=using, on=self._match(TokenType.ON) and self._parse_disjunction(), using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), whens=self._parse_when_matched(), returning=self._parse_returning(), ) ) def _parse_when_matched(self) -> exp.Whens: whens = [] while self._match(TokenType.WHEN): matched = not self._match(TokenType.NOT) self._match_text_seq("MATCHED") source = ( False if self._match_text_seq("BY", "TARGET") else self._match_text_seq("BY", "SOURCE") ) condition = self._parse_disjunction() if self._match(TokenType.AND) else None self._match(TokenType.THEN) if self._match(TokenType.INSERT): this = self._parse_star() if this: then: t.Optional[exp.Expr] = self.expression(exp.Insert(this=this)) else: then = self.expression( exp.Insert( this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(values=False), expression=self._match_text_seq("VALUES") and self._parse_value(), ) ) elif self._match(TokenType.UPDATE): expressions = self._parse_star() if expressions: then = self.expression(exp.Update(expressions=expressions)) else: then = self.expression( exp.Update( expressions=self._match(TokenType.SET) and self._parse_csv(self._parse_equality) ) ) elif self._match(TokenType.DELETE): then = self.expression(exp.Var(this=self._prev.text)) else: then = self._parse_var_from_options(self.CONFLICT_ACTIONS) whens.append( self.expression( exp.When(matched=matched, source=source, condition=condition, then=then) ) ) return self.expression(exp.Whens(expressions=whens)) def _parse_show(self) -> t.Optional[exp.Expr]: parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) if parser: return parser(self) return self._parse_as_command(self._prev) def _parse_set_item_assignment(self, kind: t.Optional[str] = None) -> t.Optional[exp.Expr]: index = self._index if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): return self._parse_set_transaction(global_=kind == "GLOBAL") left = self._parse_primary() or self._parse_column() assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): self._retreat(index) return None right = self._parse_statement() or self._parse_id_var() if isinstance(right, (exp.Column, exp.Identifier)): right = exp.var(right.name) this = self.expression(exp.EQ(this=left, expression=right)) return self.expression(exp.SetItem(this=this, kind=kind)) def _parse_set_transaction(self, global_: bool = False) -> exp.Expr: self._match_text_seq("TRANSACTION") characteristics = self._parse_csv( lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) ) return self.expression( exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_) ) def _parse_set_item(self) -> t.Optional[exp.Expr]: parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) return parser(self) if parser else self._parse_set_item_assignment(kind=None) def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: index = self._index set_ = self.expression( exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag) ) if self._curr: self._retreat(index) return self._parse_as_command(self._prev) return set_ def _parse_var_from_options( self, options: OPTIONS_TYPE, raise_unmatched: bool = True ) -> t.Optional[exp.Var]: start = self._curr if not start: return None option = start.text.upper() continuations = options.get(option) index = self._index self._advance() for keywords in continuations or []: if isinstance(keywords, str): keywords = (keywords,) if self._match_text_seq(*keywords): option = f"{option} {' '.join(keywords)}" break else: if continuations or continuations is None: if raise_unmatched: self.raise_error(f"Unknown option {option}") self._retreat(index) return None return exp.var(option) def _parse_as_command(self, start: Token) -> exp.Command: while self._curr: self._advance() text = self._find_sql(start, self._prev) size = len(start.text) self._warn_unsupported() return exp.Command(this=text[:size], expression=text[size:]) def _parse_dict_property(self, this: str) -> exp.DictProperty: settings = [] self._match_l_paren() kind = self._parse_id_var() if self._match(TokenType.L_PAREN): while True: key = self._parse_id_var() value = self._parse_function() or self._parse_primary_or_var() if not key and value is None: break settings.append(self.expression(exp.DictSubProperty(this=key, value=value))) self._match(TokenType.R_PAREN) self._match_r_paren() return self.expression( exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings) ) def _parse_dict_range(self, this: str) -> exp.DictRange: self._match_l_paren() has_min = self._match_text_seq("MIN") if has_min: min = self._parse_var() or self._parse_primary() self._match_text_seq("MAX") max = self._parse_var() or self._parse_primary() else: max = self._parse_var() or self._parse_primary() min = exp.Literal.number(0) self._match_r_paren() return self.expression(exp.DictRange(this=this, min=min, max=max)) def _parse_comprehension(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Comprehension]: index = self._index expression = self._parse_column() position = self._match(TokenType.COMMA) and self._parse_column() if not self._match(TokenType.IN): self._retreat(index - 1) return None iterator = self._parse_column() condition = self._parse_disjunction() if self._match_text_seq("IF") else None return self.expression( exp.Comprehension( this=this, expression=expression, position=position, iterator=iterator, condition=condition, ) ) def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: if self._match(TokenType.HEREDOC_STRING): return self.expression(exp.Heredoc(this=self._prev.text)) if not self._match_text_seq("$"): return None tags = ["$"] tag_text = None if self._is_connected(): self._advance() tags.append(self._prev.text.upper()) else: self.raise_error("No closing $ found") if tags[-1] != "$": if self._is_connected() and self._match_text_seq("$"): tag_text = tags[-1] tags.append("$") else: self.raise_error("No closing $ found") heredoc_start = self._curr while self._curr: if self._match_text_seq(*tags, advance=False): this = self._find_sql(heredoc_start, self._prev) self._advance(len(tags)) return self.expression(exp.Heredoc(this=this, tag=tag_text)) self._advance() self.raise_error(f"No closing {''.join(tags)} found") return None def _find_parser( self, parsers: t.Dict[str, t.Callable], trie: t.Dict ) -> t.Optional[t.Callable]: if not self._curr: return None index = self._index this = [] while True: # The current token might be multiple words curr = self._curr.text.upper() key = curr.split(" ") this.append(curr) self._advance() result, trie = in_trie(trie, key) if result == TrieResult.FAILED: break if result == TrieResult.EXISTS: subparser = parsers[" ".join(this)] return subparser self._retreat(index) return None def _match_l_paren(self, expression: t.Optional[exp.Expr] = None) -> None: if not self._match(TokenType.L_PAREN, expression=expression): self.raise_error("Expecting (") def _match_r_paren(self, expression: t.Optional[exp.Expr] = None) -> None: if not self._match(TokenType.R_PAREN, expression=expression): self.raise_error("Expecting )") def _replace_lambda( self, node: t.Optional[exp.Expr], expressions: t.List[exp.Expr] ) -> t.Optional[exp.Expr]: if not node: return node lambda_types = {e.name: e.args.get("to") or False for e in expressions} for column in node.find_all(exp.Column): typ = lambda_types.get(column.parts[0].name) if typ is not None: dot_or_id = column.to_dot() if column.table else column.this if typ: dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ)) parent = column.parent while isinstance(parent, exp.Dot): if not isinstance(parent.parent, exp.Dot): parent.replace(dot_or_id) break parent = parent.parent else: if column is node: node = dot_or_id else: column.replace(dot_or_id) return node def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expr: start = self._prev # Not to be confused with TRUNCATE(number, decimals) function call if self._match(TokenType.L_PAREN): self._retreat(self._index - 2) return self._parse_function() # Clickhouse supports TRUNCATE DATABASE as well is_database = self._match(TokenType.DATABASE) self._match(TokenType.TABLE) exists = self._parse_exists(not_=False) expressions = self._parse_csv( lambda: self._parse_table(schema=True, is_db_reference=is_database) ) cluster = self._parse_on_property() if self._match(TokenType.ON) else None if self._match_text_seq("RESTART", "IDENTITY"): identity = "RESTART" elif self._match_text_seq("CONTINUE", "IDENTITY"): identity = "CONTINUE" else: identity = None if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): option = self._prev.text else: option = None partition = self._parse_partition() # Fallback case if self._curr: return self._parse_as_command(start) return self.expression( exp.TruncateTable( expressions=expressions, is_database=is_database, exists=exists, cluster=cluster, identity=identity, option=option, partition=partition, ) ) def _parse_with_operator(self) -> t.Optional[exp.Expr]: this = self._parse_ordered(self._parse_opclass) if not self._match(TokenType.WITH): return this op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) return self.expression(exp.WithOperator(this=this, op=op)) def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expr]]: self._match(TokenType.EQ) self._match(TokenType.L_PAREN) opts: t.List[t.Optional[exp.Expr]] = [] option: exp.Expr | t.List[exp.Expr] | None while self._curr and not self._match(TokenType.R_PAREN): if self._match_text_seq("FORMAT_NAME", "="): # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL option = self._parse_format_name() else: option = self._parse_property() if option is None: self.raise_error("Unable to parse option") break opts.extend(ensure_list(option)) return opts def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None options = [] while self._curr and not self._match(TokenType.R_PAREN, advance=False): option = self._parse_var(any_token=True) prev = self._prev.text.upper() # Different dialects might separate options and values by white space, "=" and "AS" self._match(TokenType.EQ) self._match(TokenType.ALIAS) param = self.expression(exp.CopyParameter(this=option)) if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( TokenType.L_PAREN, advance=False ): # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options param.set("expressions", self._parse_wrapped_options()) elif prev == "FILE_FORMAT": # T-SQL's external file format case param.set("expression", self._parse_field()) elif ( prev == "FORMAT" and self._prev.token_type == TokenType.ALIAS and self._match_texts(("AVRO", "JSON")) ): param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) param.set("expression", self._parse_field()) else: param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) options.append(param) if sep: self._match(sep) return options def _parse_credentials(self) -> t.Optional[exp.Credentials]: expr = self.expression(exp.Credentials()) if self._match_text_seq("STORAGE_INTEGRATION", "="): expr.set("storage", self._parse_field()) if self._match_text_seq("CREDENTIALS"): # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS creds = ( self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() ) expr.set("credentials", creds) if self._match_text_seq("ENCRYPTION"): expr.set("encryption", self._parse_wrapped_options()) if self._match_text_seq("IAM_ROLE"): expr.set( "iam_role", exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), ) if self._match_text_seq("REGION"): expr.set("region", self._parse_field()) return expr def _parse_file_location(self) -> t.Optional[exp.Expr]: return self._parse_field() def _parse_copy(self) -> exp.Copy | exp.Command: start = self._prev self._match(TokenType.INTO) this = ( self._parse_select(nested=True, parse_subquery_alias=False) if self._match(TokenType.L_PAREN, advance=False) else self._parse_table(schema=True) ) kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") files = self._parse_csv(self._parse_file_location) if self._match(TokenType.EQ, advance=False): # Backtrack one token since we've consumed the lhs of a parameter assignment here. # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter # list via `_parse_wrapped(..)` below. self._advance(-1) files = [] credentials = self._parse_credentials() self._match_text_seq("WITH") params = self._parse_wrapped(self._parse_copy_parameters, optional=True) # Fallback case if self._curr: return self._parse_as_command(start) return self.expression( exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params) ) def _parse_normalize(self) -> exp.Normalize: return self.expression( exp.Normalize( this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var() ) ) def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: args = self._parse_csv(lambda: self._parse_lambda()) this = seq_get(args, 0) decimals = seq_get(args, 1) return expr_type( this=this, decimals=decimals, to=self._parse_var() if self._match_text_seq("TO") else None, ) def _parse_star_ops(self) -> t.Optional[exp.Expr]: star_token = self._prev if self._match_text_seq("COLUMNS", "(", advance=False): this = self._parse_function() if isinstance(this, exp.Columns): this.set("unpack", True) return this return self.expression( exp.Star( except_=self._parse_star_op("EXCEPT", "EXCLUDE"), replace=self._parse_star_op("REPLACE"), rename=self._parse_star_op("RENAME"), ) ).update_positions(star_token) def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: privilege_parts = [] # Keep consuming consecutive keywords until comma (end of this privilege) or ON # (end of privilege list) or L_PAREN (start of column list) are met while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): privilege_parts.append(self._curr.text.upper()) self._advance() this = exp.var(" ".join(privilege_parts)) expressions = ( self._parse_wrapped_csv(self._parse_column) if self._match(TokenType.L_PAREN, advance=False) else None ) return self.expression(exp.GrantPrivilege(this=this, expressions=expressions)) def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() principal = self._parse_id_var() if not principal: return None return self.expression(exp.GrantPrincipal(this=principal, kind=kind)) def _parse_grant_revoke_common( self, ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expr]]: privileges = self._parse_csv(self._parse_grant_privilege) self._match(TokenType.ON) kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None # Attempt to parse the securable e.g. MySQL allows names # such as "foo.*", "*.*" which are not easily parseable yet securable = self._try_parse(self._parse_table_parts) return privileges, kind, securable def _parse_grant(self) -> exp.Grant | exp.Command: start = self._prev privileges, kind, securable = self._parse_grant_revoke_common() if not securable or not self._match_text_seq("TO"): return self._parse_as_command(start) principals = self._parse_csv(self._parse_grant_principal) grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") if self._curr: return self._parse_as_command(start) return self.expression( exp.Grant( privileges=privileges, kind=kind, securable=securable, principals=principals, grant_option=grant_option, ) ) def _parse_revoke(self) -> exp.Revoke | exp.Command: start = self._prev grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") privileges, kind, securable = self._parse_grant_revoke_common() if not securable or not self._match_text_seq("FROM"): return self._parse_as_command(start) principals = self._parse_csv(self._parse_grant_principal) cascade = None if self._match_texts(("CASCADE", "RESTRICT")): cascade = self._prev.text.upper() if self._curr: return self._parse_as_command(start) return self.expression( exp.Revoke( privileges=privileges, kind=kind, securable=securable, principals=principals, grant_option=grant_option, cascade=cascade, ) ) def _parse_overlay(self) -> exp.Overlay: def _parse_overlay_arg(text: str) -> t.Optional[exp.Expr]: return ( self._parse_bitwise() if self._match(TokenType.COMMA) or self._match_text_seq(text) else None ) return self.expression( exp.Overlay( this=self._parse_bitwise(), expression=_parse_overlay_arg("PLACING"), from_=_parse_overlay_arg("FROM"), for_=_parse_overlay_arg("FOR"), ) ) def _parse_format_name(self) -> exp.Property: # Note: Although not specified in the docs, Snowflake does accept a string/identifier # for FILE_FORMAT = return self.expression( exp.Property( this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts() ) ) def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: args: t.List[exp.Expr] = [] if self._match(TokenType.DISTINCT): args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()]))) self._match(TokenType.COMMA) args.extend(self._parse_function_args()) return self.expression( expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2)) ) def _identifier_expression( self, token: t.Optional[Token] = None, quoted: t.Optional[bool] = None ) -> exp.Identifier: token = token or self._prev return self.expression(exp.Identifier(this=token.text, quoted=quoted), token) def _build_pipe_cte( self, query: exp.Query, expressions: t.List[exp.Expr], alias_cte: t.Optional[exp.TableAlias] = None, ) -> exp.Select: new_cte: t.Optional[t.Union[str, exp.TableAlias]] if alias_cte: new_cte = alias_cte else: self._pipe_cte_counter += 1 new_cte = f"__tmp{self._pipe_cte_counter}" with_ = query.args.get("with_") ctes = with_.pop() if with_ else None new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) if ctes: new_select.set("with_", ctes) return new_select.with_(new_cte, as_=query, copy=False) def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: select = self._parse_select(consume_pipe=False) if not select: return query return self._build_pipe_cte( query=query.select(*select.expressions, append=False), expressions=[exp.Star()] ) def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: limit = self._parse_limit() offset = self._parse_offset() if limit: curr_limit = query.args.get("limit", limit) if curr_limit.expression.to_py() >= limit.expression.to_py(): query.limit(limit, copy=False) if offset: curr_offset = query.args.get("offset") curr_offset = curr_offset.expression.to_py() if curr_offset else 0 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) return query def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expr]: this = self._parse_disjunction() if self._match_text_seq("GROUP", "AND", advance=False): return this this = self._parse_alias(this) if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): return self._parse_ordered(lambda: this) return this def _parse_pipe_syntax_aggregate_group_order_by( self, query: exp.Select, group_by_exists: bool = True ) -> exp.Select: expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) aggregates_or_groups, orders = [], [] for element in expr: if isinstance(element, exp.Ordered): this = element.this if isinstance(this, exp.Alias): element.set("this", this.args["alias"]) orders.append(element) else: this = element aggregates_or_groups.append(this) if group_by_exists: query.select(*aggregates_or_groups, copy=False).group_by( *[projection.args.get("alias", projection) for projection in aggregates_or_groups], copy=False, ) else: query.select(*aggregates_or_groups, append=False, copy=False) if orders: return query.order_by(*orders, append=False, copy=False) return query def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: self._match_text_seq("AGGREGATE") query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) if self._match(TokenType.GROUP_BY) or ( self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) ): query = self._parse_pipe_syntax_aggregate_group_order_by(query) return self._build_pipe_cte(query=query, expressions=[exp.Star()]) def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: first_setop = self.parse_set_operation(this=query) if not first_setop: return None def _parse_and_unwrap_query() -> t.Optional[exp.Expr]: expr = self._parse_paren() return expr.assert_is(exp.Subquery).unnest() if expr else None first_setop.this.pop() setops = [ first_setop.expression.pop().assert_is(exp.Subquery).unnest(), *self._parse_csv(_parse_and_unwrap_query), ] query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) with_ = query.args.get("with_") ctes = with_.pop() if with_ else None if isinstance(first_setop, exp.Union): query = query.union(*setops, copy=False, **first_setop.args) elif isinstance(first_setop, exp.Except): query = query.except_(*setops, copy=False, **first_setop.args) else: query = query.intersect(*setops, copy=False, **first_setop.args) query.set("with_", ctes) return self._build_pipe_cte(query=query, expressions=[exp.Star()]) def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: join = self._parse_join() if not join: return None if isinstance(query, exp.Select): return query.join(join, copy=False) return query def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: pivots = self._parse_pivots() if not pivots: return query from_ = query.args.get("from_") if from_: from_.this.set("pivots", pivots) return self._build_pipe_cte(query=query, expressions=[exp.Star()]) def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: self._match_text_seq("EXTEND") query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) return self._build_pipe_cte(query=query, expressions=[exp.Star()]) def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: sample = self._parse_table_sample() with_ = query.args.get("with_") if with_: with_.expressions[-1].this.set("sample", sample) else: query.set("sample", sample) return query def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: if isinstance(query, exp.Subquery): query = exp.select("*").from_(query, copy=False) if not query.args.get("from_"): query = exp.select("*").from_(query.subquery(copy=False), copy=False) while self._match(TokenType.PIPE_GT): start_index = self._index start_text = self._curr.text.upper() parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text) if not parser: # The set operators (UNION, etc) and the JOIN operator have a few common starting # keywords, making it tricky to disambiguate them without lookahead. The approach # here is to try and parse a set operation and if that fails, then try to parse a # join operator. If that fails as well, then the operator is not supported. parsed_query = self._parse_pipe_syntax_set_operator(query) parsed_query = parsed_query or self._parse_pipe_syntax_join(query) if not parsed_query: self._retreat(start_index) self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.") break query = parsed_query else: query = parser(self, query) return query def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: self._match_texts(("VAR", "VARIABLE")) vars = self._parse_csv(self._parse_id_var) if not vars: return None self._match(TokenType.ALIAS) kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() default = ( self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) ) and self._parse_bitwise() return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default)) def _parse_declare(self) -> exp.Declare | exp.Command: start = self._prev replace = self._match_text_seq("OR", "REPLACE") expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) if not expressions or self._curr: return self._parse_as_command(start) return self.expression(exp.Declare(expressions=expressions, replace=replace)) def build_cast(self, strict: bool, **kwargs) -> exp.Cast: exp_class = exp.Cast if strict else exp.TryCast if exp_class == exp.TryCast: kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING return self.expression(exp_class(**kwargs)) def _parse_json_value(self) -> exp.JSONValue: this = self._parse_bitwise() self._match(TokenType.COMMA) path = self._parse_bitwise() returning = self._match(TokenType.RETURNING) and self._parse_type() return self.expression( exp.JSONValue( this=this, path=self.dialect.to_json_path(path), returning=returning, on_condition=self._parse_on_condition(), ) ) def _parse_group_concat(self) -> t.Optional[exp.Expr]: def concat_exprs(node: t.Optional[exp.Expr], exprs: t.List[exp.Expr]) -> exp.Expr: if isinstance(node, exp.Distinct) and len(node.expressions) > 1: concat_exprs = [ self.expression( exp.Concat( expressions=node.expressions, safe=True, coalesce=self.dialect.CONCAT_COALESCE, ) ) ] node.set("expressions", concat_exprs) return node if len(exprs) == 1: return exprs[0] return self.expression( exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE) ) args = self._parse_csv(self._parse_lambda) if args: order = args[-1] if isinstance(args[-1], exp.Order) else None if order: # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, # remove 'expr' from exp.Order and add it back to args args[-1] = order.this order.set("this", concat_exprs(order.this, args)) this = order or concat_exprs(args[0], args) else: this = None separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None return self.expression(exp.GroupConcat(this=this, separator=separator)) def _parse_initcap(self) -> exp.Initcap: expr = exp.Initcap.from_arg_list(self._parse_function_args()) # attach dialect's default delimiters if expr.args.get("expression") is None: expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) return expr def _parse_operator(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: while True: if not self._match(TokenType.L_PAREN): break op = "" while self._curr and not self._match(TokenType.R_PAREN): op += self._curr.text self._advance() comments = self._prev_comments this = self.expression( exp.Operator(this=this, operator=op, expression=self._parse_bitwise()), comments=comments, ) if not self._match(TokenType.OPERATOR): break return this ================================================ FILE: sqlglot/parsers/__init__.py ================================================ ================================================ FILE: sqlglot/parsers/athena.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.errors import ErrorLevel from sqlglot.parser import Parser from sqlglot.parsers.trino import TrinoParser from sqlglot.tokens import TokenType, Token class AthenaTrinoParser(TrinoParser): STATEMENT_PARSERS = { **TrinoParser.STATEMENT_PARSERS, TokenType.USING: lambda self: self._parse_as_command(self._prev), } class AthenaParser(Parser): def __init__( self, error_level: t.Optional[ErrorLevel] = None, error_message_context: int = 100, max_errors: int = 3, dialect: t.Any = None, hive: t.Any = None, trino: t.Any = None, **kwargs: t.Any, ) -> None: from sqlglot.dialects import Hive, Trino hive = hive or Hive() trino = trino or Trino() super().__init__( error_level=error_level, error_message_context=error_message_context, max_errors=max_errors, dialect=dialect, ) self._hive_parser = hive.parser( error_level=error_level, error_message_context=error_message_context, max_errors=max_errors, ) self._trino_parser = AthenaTrinoParser( error_level=error_level, error_message_context=error_message_context, max_errors=max_errors, dialect=trino, ) def parse(self, raw_tokens: t.List[Token], sql: str) -> t.List[t.Optional[exp.Expr]]: if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: return self._hive_parser.parse(raw_tokens[1:], sql) return self._trino_parser.parse(raw_tokens, sql) def parse_into( self, expression_types: exp.IntoType, raw_tokens: t.List[Token], sql: t.Optional[str] = None, ) -> t.List[t.Optional[exp.Expr]]: if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql) return self._trino_parser.parse_into(expression_types, raw_tokens, sql) ================================================ FILE: sqlglot/parsers/base.py ================================================ from __future__ import annotations from sqlglot import exp, parser from sqlglot.tokens import TokenType class BaseParser(parser.Parser): NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.LOCALTIME: exp.Localtime, TokenType.LOCALTIMESTAMP: exp.Localtimestamp, TokenType.CURRENT_CATALOG: exp.CurrentCatalog, TokenType.SESSION_USER: exp.SessionUser, } ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.STRAIGHT_JOIN} TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.STRAIGHT_JOIN} ================================================ FILE: sqlglot/parsers/bigquery.py ================================================ from __future__ import annotations import re import typing as t from sqlglot import exp, parser from sqlglot.dialects.dialect import ( binary_from_function, build_date_delta_with_interval, build_formatted_time, ) from sqlglot.helper import seq_get, split_num_words from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot._typing import E def _build_contains_substring(args: t.List) -> exp.Contains: this = exp.Lower(this=seq_get(args, 0)) expr = exp.Lower(this=seq_get(args, 1)) return exp.Contains(this=this, expression=expr, json_scope=seq_get(args, 2)) def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: expr_type = exp.DateFromParts if len(args) == 3 else exp.Date return expr_type.from_arg_list(args) def build_date_diff(args: t.List) -> exp.Expr: expr = exp.DateDiff( this=seq_get(args, 0), expression=seq_get(args, 1), unit=seq_get(args, 2), date_part_boundary=True, ) unit = expr.args.get("unit") if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK": expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY"))) return expr def _build_datetime(args: t.List) -> exp.Func: if len(args) == 1: return exp.TsOrDsToDatetime.from_arg_list(args) if len(args) == 2: return exp.Datetime.from_arg_list(args) return exp.TimestampFromParts.from_arg_list(args) def _build_extract_json_with_default_path( expr_type: t.Type[E], ) -> t.Callable: def _builder(args: t.List, dialect: t.Any) -> E: if len(args) == 1: args.append(exp.Literal.string("$")) return parser.build_extract_json_with_path(expr_type)(args, dialect) return _builder def _build_format_time(expr_type: t.Type[exp.Expr]) -> t.Callable[[t.List], exp.TimeToStr]: def _builder(args: t.List) -> exp.TimeToStr: formatted_time = build_formatted_time(exp.TimeToStr, "bigquery")( [expr_type(this=seq_get(args, 1)), seq_get(args, 0)] ) formatted_time.set("zone", seq_get(args, 2)) return formatted_time return _builder def _build_json_strip_nulls(args: t.List) -> exp.JSONStripNulls: expression = exp.JSONStripNulls(this=seq_get(args, 0)) for arg in args[1:]: if isinstance(arg, exp.Kwarg): expression.set(arg.this.name.lower(), arg) else: expression.set("expression", arg) return expression def _build_levenshtein(args: t.List) -> exp.Levenshtein: max_dist = seq_get(args, 2) return exp.Levenshtein( this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=max_dist.expression if max_dist else None, ) def _build_parse_timestamp(args: t.List) -> exp.StrToTime: this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) this.set("zone", seq_get(args, 2)) return this def _build_regexp_extract( expr_type: t.Type[E], default_group: t.Optional[exp.Expr] = None ) -> t.Callable: def _builder(args: t.List, dialect: t.Any) -> E: try: group = re.compile(args[1].name).groups == 1 except re.error: group = False return expr_type( this=seq_get(args, 0), expression=seq_get(args, 1), position=seq_get(args, 2), occurrence=seq_get(args, 3), group=exp.Literal.number(1) if group else default_group, **( {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL} if expr_type is exp.RegexpExtract else {} ), ) return _builder def _build_time(args: t.List) -> exp.Func: if len(args) == 1: return exp.TsOrDsToTime(this=args[0]) if len(args) == 2: return exp.Time.from_arg_list(args) return exp.TimeFromParts.from_arg_list(args) def _build_timestamp(args: t.List) -> exp.Timestamp: timestamp = exp.Timestamp.from_arg_list(args) timestamp.set("with_tz", True) return timestamp def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: arg = seq_get(args, 0) return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) MAKE_INTERVAL_KWARGS = ["year", "month", "day", "hour", "minute", "second"] class BigQueryParser(parser.Parser): PREFIXED_PIVOT_COLUMNS: t.ClassVar = True LOG_DEFAULTS_TO_LN: t.ClassVar = True SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = True JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = True # BigQuery does not allow ASC/DESC to be used as an identifier, allows GRANT as an identifier ID_VAR_TOKENS: t.ClassVar = { *parser.Parser.ID_VAR_TOKENS, TokenType.GRANT, } - {TokenType.ASC, TokenType.DESC} ALIAS_TOKENS: t.ClassVar = { *parser.Parser.ALIAS_TOKENS, TokenType.GRANT, } - {TokenType.ASC, TokenType.DESC} TABLE_ALIAS_TOKENS: t.ClassVar = { *parser.Parser.TABLE_ALIAS_TOKENS, TokenType.ANTI, TokenType.GRANT, TokenType.SEMI, } - {TokenType.ASC, TokenType.DESC} COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = { *parser.Parser.COMMENT_TABLE_ALIAS_TOKENS, TokenType.GRANT, } - {TokenType.ASC, TokenType.DESC} UPDATE_ALIAS_TOKENS: t.ClassVar = { *parser.Parser.UPDATE_ALIAS_TOKENS, TokenType.GRANT, } - {TokenType.ASC, TokenType.DESC} FUNCTIONS: t.ClassVar[t.Dict[str, t.Callable]] = { **{k: v for k, v in parser.Parser.FUNCTIONS.items() if k != "SEARCH"}, "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, "BIT_AND": exp.BitwiseAndAgg.from_arg_list, "BIT_OR": exp.BitwiseOrAgg.from_arg_list, "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, "BIT_COUNT": exp.BitwiseCount.from_arg_list, "BOOL": exp.JSONBool.from_arg_list, "CONTAINS_SUBSTR": _build_contains_substring, "DATE": _build_date, "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), "DATE_DIFF": build_date_diff, "DATE_SUB": build_date_delta_with_interval(exp.DateSub), "DATE_TRUNC": lambda args: exp.DateTrunc( unit=seq_get(args, 1), this=seq_get(args, 0), zone=seq_get(args, 2), ), "DATETIME": _build_datetime, "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), "DIV": binary_from_function(exp.IntDiv), "EDIT_DISTANCE": _build_levenshtein, "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), "JSON_STRIP_NULLS": _build_json_strip_nulls, "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), "MD5": exp.MD5Digest.from_arg_list, "SHA1": exp.SHA1Digest.from_arg_list, "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True ), "OCTET_LENGTH": exp.ByteLength.from_arg_list, "TO_HEX": _build_to_hex, "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( [seq_get(args, 1), seq_get(args, 0)] ), "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( [seq_get(args, 1), seq_get(args, 0)] ), "PARSE_TIMESTAMP": _build_parse_timestamp, "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( [seq_get(args, 1), seq_get(args, 0)] ), "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), "REGEXP_EXTRACT_ALL": _build_regexp_extract( exp.RegexpExtractAll, default_group=exp.Literal.number(0) ), "SHA256": lambda args: exp.SHA2Digest( this=seq_get(args, 0), length=exp.Literal.number(256) ), "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), "SPLIT": lambda args: exp.Split( # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(","), ), "STRPOS": exp.StrPosition.from_arg_list, "TIME": _build_time, "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), "TIMESTAMP": _build_timestamp, "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( this=seq_get(args, 0), scale=exp.UnixToTime.MICROS ), "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS ), "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), "TO_JSON": lambda args: exp.JSONFormat( this=seq_get(args, 0), options=seq_get(args, 1), to_json=True ), "TO_JSON_STRING": exp.JSONFormat.from_arg_list, "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), "FROM_HEX": exp.Unhex.from_arg_list, "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), } FUNCTION_PARSERS = { **{k: v for k, v in parser.Parser.FUNCTION_PARSERS.items() if k != "TRIM"}, "ARRAY": lambda self: self.expression( exp.Array(expressions=[self._parse_statement()], struct_name_inheritance=True) ), "JSON_ARRAY": lambda self: self.expression( exp.JSONArray(expressions=self._parse_csv(self._parse_bitwise)) ), "MAKE_INTERVAL": lambda self: self._parse_make_interval(), "PREDICT": lambda self: self._parse_ml(exp.Predict), "TRANSLATE": lambda self: self._parse_translate(), "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), "GENERATE_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding), "GENERATE_TEXT_EMBEDDING": lambda self: self._parse_ml(exp.GenerateEmbedding, is_text=True), "VECTOR_SEARCH": lambda self: self._parse_vector_search(), "FORECAST": lambda self: self._parse_ml(exp.MLForecast), } NO_PAREN_FUNCTIONS: t.ClassVar = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.CURRENT_DATETIME: exp.CurrentDatetime, } NESTED_TYPE_TOKENS: t.ClassVar = { *parser.Parser.NESTED_TYPE_TOKENS, TokenType.TABLE, } PROPERTY_PARSERS: t.ClassVar = { **parser.Parser.PROPERTY_PARSERS, "NOT DETERMINISTIC": lambda self: self.expression( exp.StabilityProperty(this=exp.Literal.string("VOLATILE")) ), "OPTIONS": lambda self: self._parse_with_property(), } CONSTRAINT_PARSERS: t.ClassVar = { **parser.Parser.CONSTRAINT_PARSERS, "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), } RANGE_PARSERS: t.ClassVar = { k: v for k, v in parser.Parser.RANGE_PARSERS.items() if k != TokenType.OVERLAPS } DASHED_TABLE_PART_FOLLOW_TOKENS: t.ClassVar = { TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN, } STATEMENT_PARSERS: t.ClassVar = { **parser.Parser.STATEMENT_PARSERS, TokenType.ELSE: lambda self: self._parse_as_command(self._prev), TokenType.END: lambda self: self._parse_as_command(self._prev), TokenType.FOR: lambda self: self._parse_for_in(), TokenType.EXPORT: lambda self: self._parse_export_data(), TokenType.DECLARE: lambda self: self._parse_declare(), } BRACKET_OFFSETS: t.ClassVar = { "OFFSET": (0, False), "ORDINAL": (1, False), "SAFE_OFFSET": (0, True), "SAFE_ORDINAL": (1, True), } def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: index = self._index this = self._parse_range() self._match_text_seq("DO") if self._match(TokenType.COMMAND): self._retreat(index) return self._parse_as_command(self._prev) return self.expression(exp.ForIn(this=this, expression=self._parse_statement())) def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expr]: this = super()._parse_table_part(schema=schema) or self._parse_number() # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names if isinstance(this, exp.Identifier): table_name = this.name while self._match(TokenType.DASH, advance=False) and self._next: start = self._curr while self._is_connected() and not self._match_set( self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False ): self._advance() if start == self._curr: break table_name += self._find_sql(start, self._prev) this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")).update_positions( this ) elif isinstance(this, exp.Literal): table_name = this.name if self._is_connected() and self._parse_var(any_token=True): table_name += self._prev.text this = exp.Identifier(this=table_name, quoted=True).update_positions(this) return this def _parse_table_parts( self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False, fast: bool = False, ) -> t.Optional[exp.Table | exp.Dot]: table = super()._parse_table_parts( schema=schema, is_db_reference=is_db_reference, wildcard=True, fast=fast ) if not isinstance(table, exp.Table): return table # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here if not table.catalog: if table.db: previous_db = table.args["db"] parts = table.db.split(".") if len(parts) == 2 and not table.args["db"].quoted: table.set( "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) ) table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) else: previous_this = table.this parts = table.name.split(".") if len(parts) == 2 and not table.this.quoted: table.set("db", exp.Identifier(this=parts[0]).update_positions(previous_this)) table.set("this", exp.Identifier(this=parts[1]).update_positions(previous_this)) if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): alias = table.this catalog, db, this_id, *rest = ( exp.to_identifier(p, quoted=True) for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) ) for part in (catalog, db, this_id): if part: part.update_positions(table.this) this: t.Optional[exp.Expr] = this_id if rest and this: this = exp.Dot.build([this, *rest]) # type: ignore[list-item] table = exp.Table(this=this, db=db, catalog=catalog, pivots=table.args.get("pivots")) table.meta["quoted_table"] = True else: alias = None # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or # dataset, so if the project identifier is omitted we need to fix the ast so that # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. # Otherwise, we wouldn't correctly qualify a `Table` node that references these # views, because it would seem like the "catalog" part is set, when it'd actually # be the region/dataset. Merging the two identifiers into a single one is done to # avoid producing a 4-part Table reference, which would cause issues in the schema # module, when there are 3-part table names mixed with information schema views. # # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax table_parts = table.parts if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": # We need to alias the table here to avoid breaking existing qualified columns. # This is expected to be safe, because if there's an actual alias coming up in # the token stream, it will overwrite this one. If there isn't one, we are only # exposing the name that can be used to reference the view explicitly (a no-op). exp.alias_( table, t.cast(exp.Identifier, alias or table_parts[-1]), table=True, copy=False, ) info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( line=table_parts[-2].meta.get("line"), col=table_parts[-1].meta.get("col"), start=table_parts[-2].meta.get("start"), end=table_parts[-1].meta.get("end"), ) table.set("this", new_this) table.set("db", seq_get(table_parts, -3)) table.set("catalog", seq_get(table_parts, -4)) return table def _parse_column(self) -> t.Optional[exp.Expr]: column = super()._parse_column() if isinstance(column, exp.Column): parts = column.parts if any("." in p.name for p in parts): catalog, db, table, this, *rest = ( exp.to_identifier(p, quoted=True) for p in split_num_words(".".join(p.name for p in parts), ".", 4) ) if rest and this: this = exp.Dot.build([this, *rest]) # type: ignore column = exp.Column(this=this, table=table, db=db, catalog=catalog) column.meta["quoted_column"] = True return column @t.overload def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... @t.overload def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... def _parse_json_object(self, agg=False): json_object = super()._parse_json_object() array_kv_pair = seq_get(json_object.expressions, 0) # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 if ( array_kv_pair and isinstance(array_kv_pair.this, exp.Array) and isinstance(array_kv_pair.expression, exp.Array) ): keys = array_kv_pair.this.expressions values = array_kv_pair.expression.expressions json_object.set( "expressions", [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], ) return json_object def _parse_bracket(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: bracket = super()._parse_bracket(this) if isinstance(bracket, exp.Array): bracket.set("struct_name_inheritance", True) if this is bracket: return bracket if isinstance(bracket, exp.Bracket): for expression in bracket.expressions: name = expression.name.upper() if name not in self.BRACKET_OFFSETS: break offset, safe = self.BRACKET_OFFSETS[name] bracket.set("offset", offset) bracket.set("safe", safe) expression.replace(expression.expressions[0]) return bracket def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: unnest = super()._parse_unnest(with_alias=with_alias) if not unnest: return None unnest_expr = seq_get(unnest.expressions, 0) if unnest_expr: from sqlglot.optimizer.annotate_types import annotate_types unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, # in contrast to other dialects such as DuckDB which flattens only the array by default if unnest_expr.is_type(exp.DType.ARRAY) and any( array_elem.is_type(exp.DType.STRUCT) for array_elem in unnest_expr._type.expressions ): unnest.set("explode_array", True) return unnest def _parse_make_interval(self) -> exp.MakeInterval: expr = exp.MakeInterval() for arg_key in MAKE_INTERVAL_KWARGS: value = self._parse_lambda() if not value: break # Non-named arguments are filled sequentially, (optionally) followed by named arguments # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) if isinstance(value, exp.Kwarg): arg_key = value.this.name expr.set(arg_key, value) self._match(TokenType.COMMA) return expr def _parse_ml(self, expr_type: t.Type[E], **kwargs: t.Any) -> E: self._match_text_seq("MODEL") this = self._parse_table() self._match(TokenType.COMMA) self._match_text_seq("TABLE") # Certain functions like ML.FORECAST require a STRUCT argument but not a TABLE/SELECT one expression = ( self._parse_table() if not self._match(TokenType.STRUCT, advance=False) else None ) self._match(TokenType.COMMA) return self.expression( expr_type( this=this, expression=expression, params_struct=self._parse_bitwise(), **kwargs ) ) def _parse_translate(self) -> exp.Translate | exp.MLTranslate: # Check if this is ML.TRANSLATE by looking at previous tokens token = seq_get(self._tokens, self._index - 4) if token and token.text.upper() == "ML": return self._parse_ml(exp.MLTranslate) return exp.Translate.from_arg_list(self._parse_function_args()) def _parse_features_at_time(self) -> exp.FeaturesAtTime: self._match(TokenType.TABLE) this = self._parse_table() expr = self.expression(exp.FeaturesAtTime(this=this)) while self._match(TokenType.COMMA): arg = self._parse_lambda() # Get the LHS of the Kwarg and set the arg to that value, e.g # "num_rows => 1" sets the expr's `num_rows` arg if arg: expr.set(arg.this.name, arg) return expr def _parse_vector_search(self) -> exp.VectorSearch: self._match(TokenType.TABLE) base_table = self._parse_table() self._match(TokenType.COMMA) column_to_search = self._parse_bitwise() self._match(TokenType.COMMA) self._match(TokenType.TABLE) query_table = self._parse_table() expr = self.expression( exp.VectorSearch( this=base_table, column_to_search=column_to_search, query_table=query_table ) ) while self._match(TokenType.COMMA): # query_column_to_search can be named argument or positional if self._match(TokenType.STRING, advance=False): query_column = self._parse_string() expr.set("query_column_to_search", query_column) else: arg = self._parse_lambda() if arg: expr.set(arg.this.name, arg) return expr def _parse_export_data(self) -> exp.Export: self._match_text_seq("DATA") return self.expression( exp.Export( connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), options=self._parse_properties(), this=self._match_text_seq("AS") and self._parse_select(), ) ) def _parse_column_ops(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: func_index = self._index + 1 this = super()._parse_column_ops(this) if isinstance(this, exp.Dot) and isinstance(this.expression, exp.Func): prefix = this.this.name.upper() func: t.Optional[t.Type[exp.Func]] = None if prefix == "NET": func = exp.NetFunc elif prefix == "SAFE": func = exp.SafeFunc if func: # Retreat to try and parse a known function instead of an anonymous one, # which is parsed by the base column ops parser due to anonymous_func=true self._retreat(func_index) this = func(this=self._parse_function(any_token=True)) return this ================================================ FILE: sqlglot/parsers/clickhouse.py ================================================ from __future__ import annotations import typing as t from collections import deque from sqlglot import exp, parser from sqlglot.dialects.dialect import ( build_date_delta, build_formatted_time, build_json_extract_path, build_like, ) from sqlglot.helper import seq_get from sqlglot.tokens import Token, TokenType from builtins import type as Type if t.TYPE_CHECKING: from sqlglot._typing import E from collections.abc import Mapping, Sequence, Collection def _build_datetime_format( expr_type: Type[E], ) -> t.Callable[[list], E]: def _builder(args: list) -> E: expr = build_formatted_time(expr_type, "clickhouse")(args) timezone = seq_get(args, 2) if timezone: expr.set("zone", timezone) return expr return _builder def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: if len(args) == 1: return exp.CountIf(this=seq_get(args, 0)) return exp.CombinedAggFunc(this="countIf", expressions=args) def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: if len(args) == 3: return exp.Anonymous(this="STR_TO_DATE", expressions=args) strtodate = exp.StrToDate.from_arg_list(args) return exp.cast(strtodate, exp.DataType.build(exp.DType.DATETIME)) def _build_timestamp_trunc(unit: str) -> t.Callable[[t.List], exp.TimestampTrunc]: return lambda args: exp.TimestampTrunc( this=seq_get(args, 0), unit=exp.var(unit), zone=seq_get(args, 1) ) def _build_split_by_char(args: t.List) -> exp.Split | exp.Anonymous: sep = seq_get(args, 0) if isinstance(sep, exp.Literal): sep_value = sep.to_py() if isinstance(sep_value, str) and len(sep_value.encode("utf-8")) == 1: return _build_split(exp.Split)(args) return exp.Anonymous(this="splitByChar", expressions=args) def _build_split(exp_class: Type[E]) -> t.Callable[[t.List], E]: return lambda args: exp_class( this=seq_get(args, 1), expression=seq_get(args, 0), limit=seq_get(args, 2) ) # Skip the 'week' unit since ClickHouse's toStartOfWeek # uses an extra mode argument to specify the first day of the week TIMESTAMP_TRUNC_UNITS = { "MICROSECOND", "MILLISECOND", "SECOND", "MINUTE", "HOUR", "DAY", "MONTH", "QUARTER", "YEAR", } AGG_FUNCTIONS = { "count", "min", "max", "sum", "avg", "any", "stddevPop", "stddevSamp", "varPop", "varSamp", "corr", "covarPop", "covarSamp", "entropy", "exponentialMovingAverage", "intervalLengthSum", "kolmogorovSmirnovTest", "mannWhitneyUTest", "median", "rankCorr", "sumKahan", "studentTTest", "welchTTest", "anyHeavy", "anyLast", "boundingRatio", "first_value", "last_value", "argMin", "argMax", "avgWeighted", "topK", "approx_top_sum", "topKWeighted", "deltaSum", "deltaSumTimestamp", "groupArray", "groupArrayLast", "groupUniqArray", "groupArrayInsertAt", "groupArrayMovingAvg", "groupArrayMovingSum", "groupArraySample", "groupBitAnd", "groupBitOr", "groupBitXor", "groupBitmap", "groupBitmapAnd", "groupBitmapOr", "groupBitmapXor", "sumWithOverflow", "sumMap", "minMap", "maxMap", "skewSamp", "skewPop", "kurtSamp", "kurtPop", "uniq", "uniqExact", "uniqCombined", "uniqCombined64", "uniqHLL12", "uniqTheta", "quantile", "quantiles", "quantileExact", "quantilesExact", "quantilesExactExclusive", "quantileExactLow", "quantilesExactLow", "quantileExactHigh", "quantilesExactHigh", "quantileExactWeighted", "quantilesExactWeighted", "quantileTiming", "quantilesTiming", "quantileTimingWeighted", "quantilesTimingWeighted", "quantileDeterministic", "quantilesDeterministic", "quantileTDigest", "quantilesTDigest", "quantileTDigestWeighted", "quantilesTDigestWeighted", "quantileBFloat16", "quantilesBFloat16", "quantileBFloat16Weighted", "quantilesBFloat16Weighted", "simpleLinearRegression", "stochasticLinearRegression", "stochasticLogisticRegression", "categoricalInformationValue", "contingency", "cramersV", "cramersVBiasCorrected", "theilsU", "maxIntersections", "maxIntersectionsPosition", "meanZTest", "quantileInterpolatedWeighted", "quantilesInterpolatedWeighted", "quantileGK", "quantilesGK", "sparkBar", "sumCount", "largestTriangleThreeBuckets", "histogram", "sequenceMatch", "sequenceCount", "windowFunnel", "retention", "uniqUpTo", "sequenceNextNode", "exponentialTimeDecayedAvg", } # Sorted longest-first so that compound suffixes (e.g. "SimpleState") are matched # before their sub-suffixes (e.g. "State") when resolving multi-combinator functions. AGG_FUNCTIONS_SUFFIXES: t.List[str] = sorted( [ "If", "Array", "ArrayIf", "Map", "SimpleState", "State", "Merge", "MergeState", "ForEach", "Distinct", "OrDefault", "OrNull", "Resample", "ArgMin", "ArgMax", ], key=len, reverse=True, ) # Memoized examples of all 0- and 1-suffix aggregate function names AGG_FUNC_MAPPING: Mapping[str, tuple[str, str | None]] = { f"{f}{sfx}": (f, sfx) for sfx in AGG_FUNCTIONS_SUFFIXES for f in AGG_FUNCTIONS } | {f: (f, None) for f in AGG_FUNCTIONS} class ClickHouseParser(parser.Parser): # Tested in ClickHouse's playground, it seems that the following two queries do the same thing # * select x from t1 union all select x from t2 limit 1; # * select x from t1 union all (select x from t2 limit 1); MODIFIERS_ATTACHED_TO_SET_OP = False INTERVAL_SPANS = False OPTIONAL_ALIAS_TOKEN_CTE = False JOINS_HAVE_EQUAL_PRECEDENCE = True FUNCTIONS = { **{ k: v for k, v in parser.Parser.FUNCTIONS.items() if k not in ("TRANSFORM", "APPROX_TOP_SUM") }, **{f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit) for unit in TIMESTAMP_TRUNC_UNITS}, "ANY": exp.AnyValue.from_arg_list, "ARRAYCOMPACT": exp.ArrayCompact.from_arg_list, "ARRAYCONCAT": exp.ArrayConcat.from_arg_list, "ARRAYDISTINCT": exp.ArrayDistinct.from_arg_list, "ARRAYEXCEPT": exp.ArrayExcept.from_arg_list, "ARRAYSUM": exp.ArraySum.from_arg_list, "ARRAYMAX": exp.ArrayMax.from_arg_list, "ARRAYMIN": exp.ArrayMin.from_arg_list, "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, "ARRAYSLICE": exp.ArraySlice.from_arg_list, "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list, "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list, "COUNTIF": _build_count_if, "CITYHASH64": exp.CityHash64.from_arg_list, "COSINEDISTANCE": exp.CosineDistance.from_arg_list, "VERSION": exp.CurrentVersion.from_arg_list, "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), "DATESUB": build_date_delta(exp.DateSub, default_unit=None), "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), "HAS": exp.ArrayContains.from_arg_list, "ILIKE": build_like(exp.ILike), "JSONEXTRACTSTRING": build_json_extract_path( exp.JSONExtractScalar, zero_based_indexing=False ), "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), "LIKE": build_like(exp.Like), "L2Distance": exp.EuclideanDistance.from_arg_list, "MAP": parser.build_var_map, "MATCH": exp.RegexpLike.from_arg_list, "NOTLIKE": build_like(exp.Like, not_like=True), "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), "RANDCANONICAL": exp.Rand.from_arg_list, "STR_TO_DATE": _build_str_to_date, "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), "TOMONDAY": _build_timestamp_trunc("WEEK"), "UNIQ": exp.ApproxDistinct.from_arg_list, "XOR": lambda args: exp.Xor(expressions=args), "MD5": exp.MD5Digest.from_arg_list, "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), "SPLITBYCHAR": _build_split_by_char, "SPLITBYREGEXP": _build_split(exp.RegexpSplit), "SPLITBYSTRING": _build_split(exp.Split), "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, "TOTYPENAME": exp.Typeof.from_arg_list, "EDITDISTANCE": exp.Levenshtein.from_arg_list, "JAROWINKLERSIMILARITY": exp.JarowinklerSimilarity.from_arg_list, "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, "UTCTIMESTAMP": exp.UtcTimestamp.from_arg_list, } AGG_FUNCTIONS = AGG_FUNCTIONS AGG_FUNCTIONS_SUFFIXES = AGG_FUNCTIONS_SUFFIXES FUNC_TOKENS = { *parser.Parser.FUNC_TOKENS, TokenType.AND, TokenType.OR, TokenType.SET, } RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} ID_VAR_TOKENS = { *parser.Parser.ID_VAR_TOKENS, TokenType.LIKE, } AGG_FUNC_MAPPING = AGG_FUNC_MAPPING @classmethod def _resolve_clickhouse_agg(cls, name: str) -> t.Optional[tuple[str, Sequence[str]]]: # ClickHouse allows chaining multiple combinators on aggregate functions. # See https://clickhouse.com/docs/sql-reference/aggregate-functions/combinators # N.B. this resolution allows any suffix stack, including ones that ClickHouse rejects # syntactically such as sumMergeMerge (due to repeated adjacent suffixes) # Until we are able to identify a 1- or 0-suffix aggregate function by name, # repeatedly strip and queue suffixes (checking longer suffixes first, see comment on # AGG_FUNCTIONS_SUFFIXES_SORTED). This loop only runs for 2 or more suffixes, # as AGG_FUNC_MAPPING memoizes all 0- and 1-suffix accumulated_suffixes: t.Deque[str] = deque() while (parts := AGG_FUNC_MAPPING.get(name)) is None: for suffix in AGG_FUNCTIONS_SUFFIXES: if name.endswith(suffix) and len(name) != len(suffix): accumulated_suffixes.appendleft(suffix) name = name[: -len(suffix)] break else: return None # We now have a 0- or 1-suffix aggregate agg_func_name, inner_suffix = parts if inner_suffix: # this is a 1-suffix aggregate (either naturally or via repeated suffix # stripping). prepend the innermost suffix. accumulated_suffixes.appendleft(inner_suffix) return (agg_func_name, accumulated_suffixes) FUNCTION_PARSERS = { **{k: v for k, v in parser.Parser.FUNCTION_PARSERS.items() if k != "MATCH"}, "ARRAYJOIN": lambda self: self.expression(exp.Explode(this=self._parse_expression())), "QUANTILE": lambda self: self._parse_quantile(), "MEDIAN": lambda self: self._parse_quantile(), "COLUMNS": lambda self: self._parse_columns(), "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)), "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)), "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)), } PROPERTY_PARSERS = { **{k: v for k, v in parser.Parser.PROPERTY_PARSERS.items() if k != "DYNAMIC"}, "ENGINE": lambda self: self._parse_engine_property(), } NO_PAREN_FUNCTION_PARSERS = { k: v for k, v in parser.Parser.NO_PAREN_FUNCTION_PARSERS.items() if k != "ANY" } NO_PAREN_FUNCTIONS = { k: v for k, v in parser.Parser.NO_PAREN_FUNCTIONS.items() if k != TokenType.CURRENT_TIMESTAMP } RANGE_PARSERS = { **parser.Parser.RANGE_PARSERS, TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), } COLUMN_OPERATORS = { **{k: v for k, v in parser.Parser.COLUMN_OPERATORS.items() if k != TokenType.PLACEHOLDER}, TokenType.DOTCARET: lambda self, this, field: self.expression( exp.NestedJSONSelect(this=this, expression=field) ), } JOIN_KINDS = { *parser.Parser.JOIN_KINDS, TokenType.ALL, TokenType.ANY, TokenType.ASOF, TokenType.ARRAY, } TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { TokenType.ALL, TokenType.ANY, TokenType.ARRAY, TokenType.ASOF, TokenType.FINAL, TokenType.FORMAT, TokenType.SETTINGS, } ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { TokenType.FORMAT, } LOG_DEFAULTS_TO_LN = True QUERY_MODIFIER_PARSERS = { **parser.Parser.QUERY_MODIFIER_PARSERS, TokenType.SETTINGS: lambda self: ( "settings", self._advance() or self._parse_csv(self._parse_assignment), ), TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), } CONSTRAINT_PARSERS = { **parser.Parser.CONSTRAINT_PARSERS, "INDEX": lambda self: self._parse_index_constraint(), "CODEC": lambda self: self._parse_compress(), "ASSUME": lambda self: self._parse_assume_constraint(), } ALTER_PARSERS = { **parser.Parser.ALTER_PARSERS, "MODIFY": lambda self: self._parse_alter_table_modify(), "REPLACE": lambda self: self._parse_alter_table_replace(), } SCHEMA_UNNAMED_CONSTRAINTS = { *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, "INDEX", } - {"CHECK"} PLACEHOLDER_PARSERS = { **parser.Parser.PLACEHOLDER_PARSERS, TokenType.L_BRACE: lambda self: self._parse_query_parameter(), } STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.DETACH: lambda self: self._parse_detach(), } def _parse_wrapped_select_or_assignment(self) -> t.Optional[exp.Expr]: return self._parse_wrapped( lambda: self._parse_select() or self._parse_assignment(), optional=True ) def _parse_check_constraint(self) -> t.Optional[exp.CheckColumnConstraint]: return self.expression( exp.CheckColumnConstraint(this=self._parse_wrapped_select_or_assignment()) ) def _parse_assume_constraint(self) -> t.Optional[exp.AssumeColumnConstraint]: return self.expression( exp.AssumeColumnConstraint(this=self._parse_wrapped_select_or_assignment()) ) def _parse_engine_property(self) -> exp.EngineProperty: self._match(TokenType.EQ) return self.expression( exp.EngineProperty(this=self._parse_field(any_token=True, anonymous_func=True)) ) # https://clickhouse.com/docs/en/sql-reference/statements/create/function def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expr]: return self._parse_lambda() def _parse_types( self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True ) -> t.Optional[exp.Expr]: dtype = super()._parse_types( check_func=check_func, schema=schema, allow_identifiers=allow_identifiers ) if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: # Mark every type as non-nullable which is ClickHouse's default, unless it's # already marked as nullable. This marker helps us transpile types from other # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would # fail in ClickHouse without the `Nullable` type constructor. dtype.set("nullable", False) return dtype def _parse_extract(self) -> exp.Extract | exp.Anonymous: index = self._index this = self._parse_bitwise() if self._match(TokenType.FROM): self._retreat(index) return super()._parse_extract() # We return Anonymous here because extract and regexpExtract have different semantics, # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. # # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? self._match(TokenType.COMMA) return self.expression( exp.Anonymous(this="extract", expressions=[this, self._parse_bitwise()]) ) def _parse_assignment(self) -> t.Optional[exp.Expr]: this = super()._parse_assignment() if self._match(TokenType.PLACEHOLDER): return self.expression( exp.If( this=this, true=self._parse_assignment(), false=self._match(TokenType.COLON) and self._parse_assignment(), ) ) return this def _parse_query_parameter(self) -> t.Optional[exp.Expr]: """ Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters """ index = self._index this = self._parse_id_var() self._match(TokenType.COLON) kind = self._parse_types(check_func=False, allow_identifiers=False) or ( self._match_text_seq("IDENTIFIER") and "Identifier" ) if not kind: self._retreat(index) return None elif not self._match(TokenType.R_BRACE): self.raise_error("Expecting }") if isinstance(this, exp.Identifier) and not this.quoted: this = exp.var(this.name) return self.expression(exp.Placeholder(this=this, kind=kind)) def _parse_bracket(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: if this: bracket_json_type = None while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): bracket_json_type = exp.DataType( this=exp.DType.ARRAY, expressions=[ bracket_json_type or exp.DataType.build( dtype=exp.DType.JSON, dialect=self.dialect, nullable=False ) ], nested=True, ) if bracket_json_type: return self.expression(exp.JSONCast(this=this, to=bracket_json_type)) l_brace = self._match(TokenType.L_BRACE, advance=False) bracket = super()._parse_bracket(this) if l_brace and isinstance(bracket, exp.Struct): varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) for expression in bracket.expressions: if not isinstance(expression, exp.PropertyEQ): break varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) varmap.args["values"].append("expressions", expression.expression) return varmap return bracket def _parse_global_in(self, this: t.Optional[exp.Expr]) -> exp.Not | exp.In: is_negated = self._match(TokenType.NOT) in_expr: t.Optional[exp.In] = None if self._match(TokenType.IN): in_expr = self._parse_in(this) in_expr.set("is_global", True) return self.expression(exp.Not(this=in_expr)) if is_negated else t.cast(exp.In, in_expr) def _parse_table( self, schema: bool = False, joins: bool = False, alias_tokens: t.Optional[Collection[TokenType]] = None, parse_bracket: bool = False, is_db_reference: bool = False, parse_partition: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.Expr]: this = super()._parse_table( schema=schema, joins=joins, alias_tokens=alias_tokens, parse_bracket=parse_bracket, is_db_reference=is_db_reference, ) if isinstance(this, exp.Table): inner = this.this alias = this.args.get("alias") if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: alias.set("columns", [exp.to_identifier("generate_series")]) if self._match(TokenType.FINAL): this = self.expression(exp.Final(this=this)) return this def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: return super()._parse_position(haystack_first=True) # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ def _parse_cte(self) -> t.Optional[exp.CTE]: # WITH AS cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) if not cte: # WITH AS cte = self.expression( exp.CTE(this=self._parse_assignment(), alias=self._parse_table_alias(), scalar=True) ) return cte def _parse_join_parts( self, ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: is_global = self._prev if self._match(TokenType.GLOBAL) else None kind_pre = self._prev if self._match_set(self.JOIN_KINDS) else None side = self._prev if self._match_set(self.JOIN_SIDES) else None kind = self._prev if self._match_set(self.JOIN_KINDS) else None return is_global, side or kind, kind_pre or kind def _parse_join( self, skip_join_token: bool = False, parse_bracket: bool = False ) -> t.Optional[exp.Join]: join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) if join: method = join.args.get("method") join.set("method", None) join.set("global_", method) # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join if join.kind == "ARRAY": for table in join.find_all(exp.Table): table.replace(table.to_column()) return join def _parse_function( self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False, optional_parens: bool = True, any_token: bool = False, ) -> t.Optional[exp.Expr]: expr = super()._parse_function( functions=functions, anonymous=anonymous, optional_parens=optional_parens, any_token=any_token, ) func = expr.this if isinstance(expr, exp.Window) else expr # Aggregate functions can be split in 2 parts: parts = self._resolve_clickhouse_agg(func.this) if isinstance(func, exp.Anonymous) else None if parts: anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) params = self._parse_func_params(anon_func) if len(parts[1]) > 0: exp_class: Type[exp.Expr] = ( exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc ) else: exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc instance = exp_class(this=anon_func.this, expressions=anon_func.expressions) if params: instance.set("params", params) func = self.expression(instance) if isinstance(expr, exp.Window): # The window's func was parsed as Anonymous in base parser, fix its # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc expr.set("this", func) elif params: # Params have blocked super()._parse_function() from parsing the following window # (if that exists) as they're standing between the function call and the window spec expr = self._parse_window(func) else: expr = func return expr def _parse_func_params(self, this: t.Optional[exp.Func] = None) -> t.Optional[t.List[exp.Expr]]: if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): return self._parse_csv(self._parse_lambda) if self._match(TokenType.L_PAREN): params = self._parse_csv(self._parse_lambda) self._match_r_paren(this) return params return None def _parse_quantile(self) -> exp.Quantile: this = self._parse_lambda() params = self._parse_func_params() if params: return self.expression(exp.Quantile(this=params[0], quantile=this)) return self.expression(exp.Quantile(this=this, quantile=exp.Literal.number(0.5))) def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expr]: return super()._parse_wrapped_id_vars(optional=True) def _parse_column_def( self, this: t.Optional[exp.Expr], computed_column: bool = True ) -> t.Optional[exp.Expr]: if self._match(TokenType.DOT): return exp.Dot(this=this, expression=self._parse_id_var()) return super()._parse_column_def(this, computed_column=computed_column) def _parse_primary_key( self, wrapped_optional: bool = False, in_props: bool = False, named_primary_key: bool = False, ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: return super()._parse_primary_key( wrapped_optional=wrapped_optional or in_props, in_props=in_props, named_primary_key=named_primary_key, ) def _parse_on_property(self) -> t.Optional[exp.Expr]: index = self._index if self._match_text_seq("CLUSTER"): this = self._parse_string() or self._parse_id_var() if this: return self.expression(exp.OnCluster(this=this)) else: self._retreat(index) return None def _parse_index_constraint(self, kind: t.Optional[str] = None) -> exp.IndexColumnConstraint: # INDEX name1 expr TYPE type1(args) GRANULARITY value this = self._parse_id_var() expression = self._parse_assignment() index_type = self._match_text_seq("TYPE") and (self._parse_function() or self._parse_var()) granularity = self._match_text_seq("GRANULARITY") and self._parse_term() return self.expression( exp.IndexColumnConstraint( this=this, expression=expression, index_type=index_type, granularity=granularity ) ) def _parse_partition(self) -> t.Optional[exp.Partition]: # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression if not self._match(TokenType.PARTITION): return None if self._match_text_seq("ID"): # Corresponds to the PARTITION ID syntax expressions: t.List[exp.Expr] = [ self.expression(exp.PartitionId(this=self._parse_string())) ] else: expressions = self._parse_expressions() return self.expression(exp.Partition(expressions=expressions)) def _parse_alter_table_replace(self) -> t.Optional[exp.Expr]: partition = self._parse_partition() if not partition or not self._match(TokenType.FROM): return None return self.expression( exp.ReplacePartition(expression=partition, source=self._parse_table_parts()) ) def _parse_alter_table_modify(self) -> t.Optional[exp.Expr]: if properties := self._parse_properties(): return self.expression(exp.AlterModifySqlSecurity(expressions=properties.expressions)) return None def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: self._match(TokenType.EQ) if self._match(TokenType.CURRENT_USER): return exp.DefinerProperty(this=exp.Var(this=self._prev.text.upper())) return exp.DefinerProperty(this=self._parse_string()) def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: if not self._match_text_seq("PROJECTION"): return None return self.expression( exp.ProjectionDef( this=self._parse_id_var(), expression=self._parse_wrapped(self._parse_statement) ) ) def _parse_constraint(self) -> t.Optional[exp.Expr]: return super()._parse_constraint() or self._parse_projection_def() def _parse_alias( self, this: t.Optional[exp.Expr], explicit: bool = False ) -> t.Optional[exp.Expr]: # In clickhouse "SELECT APPLY(...)" is a query modifier, # so "APPLY" shouldn't be parsed as 's alias. However, "SELECT apply" is a valid alias if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): return this return super()._parse_alias(this=this, explicit=explicit) def _parse_expression(self) -> t.Optional[exp.Expr]: this = super()._parse_expression() # Clickhouse allows "SELECT [APPLY(func)] [...]]" modifier while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) self._match(TokenType.R_PAREN) return this def _parse_columns(self) -> exp.Expr: this: exp.Expr = self.expression(exp.Columns(this=self._parse_lambda())) while self._next and self._match_text_seq(")", "APPLY", "("): self._match(TokenType.R_PAREN) this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) return this def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: value = super()._parse_value(values=values) if not value: return None # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), # but the final result is not altered by the extra parentheses. # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression expressions = value.expressions if values and not isinstance(expressions[-1], exp.Tuple): value.set( "expressions", [self.expression(exp.Tuple(expressions=[expr])) for expr in expressions], ) return value def _parse_partitioned_by(self) -> exp.PartitionedByProperty: # ClickHouse allows custom expressions as partition key # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key return self.expression(exp.PartitionedByProperty(this=self._parse_assignment())) def _parse_detach(self) -> exp.Detach: kind = self._match_set(self.DB_CREATABLES) and self._prev.text.upper() exists = self._parse_exists() this = self._parse_table_parts() return self.expression( exp.Detach( this=this, kind=kind, exists=exists, cluster=self._parse_on_property() if self._match(TokenType.ON) else None, permanent=self._match_text_seq("PERMANENTLY"), sync=self._match_text_seq("SYNC"), ) ) ================================================ FILE: sqlglot/parsers/databricks.py ================================================ from __future__ import annotations from sqlglot import exp, parser from sqlglot.dialects.dialect import build_date_delta, build_formatted_time from sqlglot.helper import seq_get from sqlglot.parsers.spark import SparkParser from sqlglot.tokens import TokenType class DatabricksParser(SparkParser): LOG_DEFAULTS_TO_LN = True STRICT_CAST = True COLON_IS_VARIANT_EXTRACT = True FUNCTIONS = { **SparkParser.FUNCTIONS, "GETDATE": exp.CurrentTimestamp.from_arg_list, "DATEADD": build_date_delta(exp.DateAdd), "DATE_ADD": build_date_delta(exp.DateAdd), "DATEDIFF": build_date_delta(exp.DateDiff), "DATE_DIFF": build_date_delta(exp.DateDiff), "NOW": exp.CurrentTimestamp.from_arg_list, "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "databricks"), "UNIFORM": lambda args: exp.Uniform( this=seq_get(args, 0), expression=seq_get(args, 1), seed=seq_get(args, 2) ), } NO_PAREN_FUNCTION_PARSERS = { **SparkParser.NO_PAREN_FUNCTION_PARSERS, "CURDATE": lambda self: self._parse_curdate(), } FACTOR = { **SparkParser.FACTOR, TokenType.COLON: exp.JSONExtract, } COLUMN_OPERATORS = { **parser.Parser.COLUMN_OPERATORS, TokenType.QDCOLON: lambda self, this, to: self.build_cast( False, this=this, to=to, ), } CAST_COLUMN_OPERATORS = { *SparkParser.CAST_COLUMN_OPERATORS, TokenType.QDCOLON, } def _parse_curdate(self) -> exp.CurrentDate: # CURDATE, an alias for CURRENT_DATE, has optional parentheses if self._match(TokenType.L_PAREN): self._match_r_paren() return self.expression(exp.CurrentDate()) ================================================ FILE: sqlglot/parsers/doris.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.helper import seq_get from sqlglot.parsers.mysql import MySQLParser from sqlglot.tokens import TokenType # Accept both DATE_TRUNC(datetime, unit) and DATE_TRUNC(unit, datetime) def _build_date_trunc(args: t.List[exp.Expr]) -> exp.Expr: a0, a1 = seq_get(args, 0), seq_get(args, 1) def _is_unit_like(e: exp.Expr | None) -> bool: if not (isinstance(e, exp.Literal) and e.is_string): return False text = e.this return not any(ch.isdigit() for ch in text) # Determine which argument is the unit unit, this = (a0, a1) if _is_unit_like(a0) else (a1, a0) return exp.TimestampTrunc(this=this, unit=unit) class DorisParser(MySQLParser): FUNCTIONS = { **MySQLParser.FUNCTIONS, "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, "DATE_TRUNC": _build_date_trunc, "L2_DISTANCE": exp.EuclideanDistance.from_arg_list, "MONTHS_ADD": exp.AddMonths.from_arg_list, "REGEXP": exp.RegexpLike.from_arg_list, "TO_DATE": exp.TsOrDsToDate.from_arg_list, } FUNCTION_PARSERS = { k: v for k, v in MySQLParser.FUNCTION_PARSERS.items() if k != "GROUP_CONCAT" } NO_PAREN_FUNCTIONS = { k: v for k, v in MySQLParser.NO_PAREN_FUNCTIONS.items() if k != TokenType.CURRENT_DATE } PROPERTY_PARSERS = { **MySQLParser.PROPERTY_PARSERS, "PROPERTIES": lambda self: self._parse_wrapped_properties(), "UNIQUE": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty), # Plain KEY without UNIQUE/DUPLICATE/AGGREGATE prefixes should be treated as UniqueKeyProperty with unique=False "KEY": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty), "BUILD": lambda self: self._parse_build_property(), "REFRESH": lambda self: self._parse_refresh_property(), } def _parse_partition_property( self, ) -> t.Optional[exp.Expr] | t.List[exp.Expr]: expr = super()._parse_partition_property() if not expr: return self._parse_partitioned_by() if isinstance(expr, exp.Property): return expr self._match_l_paren() if self._match_text_seq("FROM", advance=False): create_expressions = self._parse_csv(self._parse_partitioning_granularity_dynamic) else: create_expressions = None self._match_r_paren() return self.expression( exp.PartitionByRangeProperty( partition_expressions=expr, create_expressions=create_expressions ) ) def _parse_partitioning_granularity_dynamic(self) -> exp.PartitionByRangePropertyDynamic: self._match_text_seq("FROM") start = self._parse_wrapped(self._parse_string) self._match_text_seq("TO") end = self._parse_wrapped(self._parse_string) self._match_text_seq("INTERVAL") number = self._parse_number() unit = self._parse_var(any_token=True) every = self.expression(exp.Interval(this=number, unit=unit)) return self.expression( exp.PartitionByRangePropertyDynamic(start=start, end=end, every=every) ) def _parse_partition_range_value(self) -> t.Optional[exp.Expr]: expr = super()._parse_partition_range_value() if isinstance(expr, exp.Partition): return expr self._match_text_seq("VALUES") name = expr # Doris-specific bracket syntax: VALUES [(...), (...)) self._match(TokenType.L_BRACKET) values = self._parse_csv(lambda: self._parse_wrapped_csv(self._parse_expression)) self._match(TokenType.R_BRACKET) self._match(TokenType.R_PAREN) part_range = self.expression(exp.PartitionRange(this=name, expressions=values)) return self.expression(exp.Partition(expressions=[part_range])) def _parse_build_property(self) -> exp.BuildProperty: return self.expression(exp.BuildProperty(this=self._parse_var(upper=True))) def _parse_refresh_property(self) -> exp.RefreshTriggerProperty: method = self._parse_var(upper=True) self._match(TokenType.ON) kind = self._match_texts(("MANUAL", "COMMIT", "SCHEDULE")) and self._prev.text.upper() every = self._match_text_seq("EVERY") and self._parse_number() unit = self._parse_var(any_token=True) if every else None starts = self._match_text_seq("STARTS") and self._parse_string() return self.expression( exp.RefreshTriggerProperty( method=method, kind=kind, every=every, unit=unit, starts=starts ) ) ================================================ FILE: sqlglot/parsers/dremio.py ================================================ from __future__ import annotations import typing as t from sqlglot import expressions as exp from sqlglot import parser from sqlglot.dialects.dialect import ( build_date_delta, build_formatted_time, build_timetostr_or_tochar, ) from sqlglot.helper import seq_get from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType DATE_DELTA = t.Union[exp.DateAdd, exp.DateSub] def to_char_is_numeric_handler(args: t.List, dialect: DialectType) -> exp.TimeToStr | exp.ToChar: expression = build_timetostr_or_tochar(args, dialect) fmt = seq_get(args, 1) if fmt and isinstance(expression, exp.ToChar) and fmt.is_string and "#" in fmt.name: # Only mark as numeric if format is a literal containing # expression.set("is_numeric", True) return expression def build_date_delta_with_cast_interval( expression_class: t.Type[DATE_DELTA], ) -> t.Callable[[t.List[exp.Expr]], exp.Expr]: fallback_builder = build_date_delta(expression_class) def _builder(args): if len(args) == 2: date_arg, interval_arg = args if ( isinstance(interval_arg, exp.Cast) and isinstance(interval_arg.to, exp.DataType) and isinstance(interval_arg.to.this, exp.Interval) ): return expression_class( this=date_arg, expression=interval_arg.this, unit=interval_arg.to.this.unit, ) return expression_class(this=date_arg, expression=interval_arg) return fallback_builder(args) return _builder def datetype_handler(args: t.List[exp.Expr], dialect: DialectType) -> exp.Expr: from sqlglot.dialects.dialect import Dialect year, month, day = args if all(isinstance(arg, exp.Literal) and arg.is_int for arg in (year, month, day)): date_str = f"{int(year.this):04d}-{int(month.this):02d}-{int(day.this):02d}" return exp.Date(this=exp.Literal.string(date_str)) dialect = Dialect.get_or_raise(dialect) return exp.Cast( this=exp.Concat( expressions=[ year, exp.Literal.string("-"), month, exp.Literal.string("-"), day, ], coalesce=dialect.CONCAT_COALESCE, ), to=exp.DataType.build("DATE"), ) class DremioParser(parser.Parser): LOG_DEFAULTS_TO_LN = True TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.SEMI, } NO_PAREN_FUNCTION_PARSERS = { **parser.Parser.NO_PAREN_FUNCTION_PARSERS, "CURRENT_DATE_UTC": lambda self: self._parse_current_date_utc(), } FUNCTIONS = { **parser.Parser.FUNCTIONS, "ARRAY_GENERATE_RANGE": exp.GenerateSeries.from_arg_list, "BIT_AND": exp.BitwiseAndAgg.from_arg_list, "BIT_OR": exp.BitwiseOrAgg.from_arg_list, "DATE_ADD": build_date_delta_with_cast_interval(exp.DateAdd), "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "dremio"), "DATE_SUB": build_date_delta_with_cast_interval(exp.DateSub), "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, "REPEATSTR": exp.Repeat.from_arg_list, "TO_CHAR": to_char_is_numeric_handler, "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "dremio"), "DATE_PART": exp.Extract.from_arg_list, "DATETYPE": datetype_handler, } def _parse_current_date_utc(self) -> exp.Cast: if self._match(TokenType.L_PAREN): self._match_r_paren() return exp.Cast( this=exp.AtTimeZone( this=exp.CurrentTimestamp(), zone=exp.Literal.string("UTC"), ), to=exp.DataType.build("DATE"), ) ================================================ FILE: sqlglot/parsers/drill.py ================================================ from __future__ import annotations from sqlglot import exp, parser from sqlglot.dialects.dialect import build_formatted_time from sqlglot.tokens import TokenType class DrillParser(parser.Parser): STRICT_CAST = False TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.SEMI, } FUNCTIONS = { **parser.Parser.FUNCTIONS, "REPEATED_COUNT": exp.ArraySize.from_arg_list, "TO_TIMESTAMP": exp.TimeStrToTime.from_arg_list, "TO_CHAR": build_formatted_time(exp.TimeToStr, "drill"), "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, } LOG_DEFAULTS_TO_LN = True ================================================ FILE: sqlglot/parsers/druid.py ================================================ from __future__ import annotations from sqlglot.parser import Parser class DruidParser(Parser): pass ================================================ FILE: sqlglot/parsers/duckdb.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.trie import new_trie from sqlglot.dialects.dialect import ( binary_from_function, build_default_decimal_type, build_formatted_time, build_regexp_extract, date_trunc_to_time, pivot_column_names, ) from sqlglot.helper import seq_get from sqlglot.parser import binary_range_parser from sqlglot.tokens import TokenType from collections.abc import Collection def _build_sort_array_desc(args: t.List) -> exp.Expr: return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) def _build_array_prepend(args: t.List) -> exp.Expr: return exp.ArrayPrepend(this=seq_get(args, 1), expression=seq_get(args, 0)) def _build_date_diff(args: t.List) -> exp.Expr: return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: def _builder(args: t.List) -> exp.GenerateSeries: # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions if len(args) == 1: # DuckDB uses 0 as a default for the series' start when it's omitted args.insert(0, exp.Literal.number("0")) gen_series = exp.GenerateSeries.from_arg_list(args) gen_series.set("is_end_exclusive", end_exclusive) return gen_series return _builder def _build_make_timestamp(args: t.List) -> exp.Expr: if len(args) == 1: return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) return exp.TimestampFromParts( year=seq_get(args, 0), month=seq_get(args, 1), day=seq_get(args, 2), hour=seq_get(args, 3), min=seq_get(args, 4), sec=seq_get(args, 5), ) def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDBParser], exp.Show]: def _parse(self: DuckDBParser) -> exp.Show: return self._parse_show_duckdb(*args, **kwargs) return _parse class DuckDBParser(parser.Parser): MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.LOCALTIME: exp.Localtime, TokenType.LOCALTIMESTAMP: exp.Localtimestamp, TokenType.CURRENT_CATALOG: exp.CurrentCatalog, TokenType.SESSION_USER: exp.SessionUser, } BITWISE = {k: v for k, v in parser.Parser.BITWISE.items() if k != TokenType.CARET} RANGE_PARSERS = { **parser.Parser.RANGE_PARSERS, TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), TokenType.CARET_AT: binary_range_parser(exp.StartsWith), TokenType.TILDE: binary_range_parser(exp.RegexpFullMatch), } EXPONENT = { **parser.Parser.EXPONENT, TokenType.CARET: exp.Pow, TokenType.DSTAR: exp.Pow, } FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} SHOW_PARSERS = { "TABLES": _show_parser("TABLES"), "ALL TABLES": _show_parser("ALL TABLES"), } FUNCTIONS = { **{k: v for k, v in parser.Parser.FUNCTIONS.items() if k not in ("DATE_SUB", "GLOB")}, "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), "ARRAY_PREPEND": _build_array_prepend, "ARRAY_REVERSE_SORT": _build_sort_array_desc, "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), "ARRAY_SORT": exp.SortArray.from_arg_list, "BIT_AND": exp.BitwiseAndAgg.from_arg_list, "BIT_OR": exp.BitwiseOrAgg.from_arg_list, "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, "CURRENT_LOCALTIMESTAMP": exp.Localtimestamp.from_arg_list, "DATEDIFF": _build_date_diff, "DATE_DIFF": _build_date_diff, "DATE_TRUNC": date_trunc_to_time, "DATETRUNC": date_trunc_to_time, "DECODE": lambda args: exp.Decode( this=seq_get(args, 0), charset=exp.Literal.string("utf-8") ), "EDITDIST3": exp.Levenshtein.from_arg_list, "ENCODE": lambda args: exp.Encode( this=seq_get(args, 0), charset=exp.Literal.string("utf-8") ), "EPOCH": exp.TimeToUnix.from_arg_list, "EPOCH_MS": lambda args: exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS), "GENERATE_SERIES": _build_generate_series(), "GET_CURRENT_TIME": exp.CurrentTime.from_arg_list, "GET_BIT": lambda args: exp.Getbit( this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True ), "JARO_WINKLER_SIMILARITY": exp.JarowinklerSimilarity.from_arg_list, "JSON": exp.ParseJSON.from_arg_list, "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), "LIST_APPEND": exp.ArrayAppend.from_arg_list, "LIST_CONCAT": parser.build_array_concat, "LIST_CONTAINS": exp.ArrayContains.from_arg_list, "LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list, "LIST_DISTANCE": exp.EuclideanDistance.from_arg_list, "LIST_FILTER": exp.ArrayFilter.from_arg_list, "LIST_HAS": exp.ArrayContains.from_arg_list, "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, "LIST_MAX": exp.ArrayMax.from_arg_list, "LIST_MIN": exp.ArrayMin.from_arg_list, "LIST_PREPEND": _build_array_prepend, "LIST_REVERSE_SORT": _build_sort_array_desc, "LIST_SORT": exp.SortArray.from_arg_list, "LIST_TRANSFORM": exp.Transform.from_arg_list, "LIST_VALUE": lambda args: exp.Array(expressions=args), "MAKE_DATE": exp.DateFromParts.from_arg_list, "MAKE_TIME": exp.TimeFromParts.from_arg_list, "MAKE_TIMESTAMP": _build_make_timestamp, "QUANTILE_CONT": exp.PercentileCont.from_arg_list, "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, "RANGE": _build_generate_series(end_exclusive=True), "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, "REGEXP_REPLACE": lambda args: exp.RegexpReplace( this=seq_get(args, 0), expression=seq_get(args, 1), replacement=seq_get(args, 2), modifiers=seq_get(args, 3), single_replace=True, ), "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), "STRING_SPLIT": exp.Split.from_arg_list, "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, "STRING_TO_ARRAY": exp.Split.from_arg_list, "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), "STRUCT_PACK": exp.Struct.from_arg_list, "STR_SPLIT": exp.Split.from_arg_list, "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, "TODAY": exp.CurrentDate.from_arg_list, "TIME_BUCKET": exp.DateBin.from_arg_list, "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, "UNNEST": exp.Explode.from_arg_list, "VERSION": exp.CurrentVersion.from_arg_list, "XOR": binary_from_function(exp.BitwiseXor), } FUNCTION_PARSERS = { **{k: v for k, v in parser.Parser.FUNCTION_PARSERS.items() if k != "DECODE"}, **dict.fromkeys( ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() ), } NO_PAREN_FUNCTION_PARSERS = { **parser.Parser.NO_PAREN_FUNCTION_PARSERS, "MAP": lambda self: self._parse_map(), "@": lambda self: exp.Abs(this=self._parse_bitwise()), } PLACEHOLDER_PARSERS = { **parser.Parser.PLACEHOLDER_PARSERS, TokenType.PARAMETER: lambda self: ( self.expression(exp.Placeholder(this=self._prev.text)) if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) else None ), } TYPE_CONVERTERS = { # https://duckdb.org/docs/sql/data_types/numeric exp.DType.DECIMAL: build_default_decimal_type(precision=18, scale=3), # https://duckdb.org/docs/sql/data_types/text exp.DType.TEXT: lambda dtype: exp.DataType.build("TEXT"), } STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.ATTACH: lambda self: self._parse_attach_detach(), TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), TokenType.FORCE: lambda self: self._parse_force(), TokenType.INSTALL: lambda self: self._parse_install(), TokenType.SHOW: lambda self: self._parse_show(), } SET_PARSERS = { **parser.Parser.SET_PARSERS, "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), } SHOW_TRIE = new_trie(key.split(" ") for key in SHOW_PARSERS) SET_TRIE = new_trie(key.split(" ") for key in SET_PARSERS) def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expr]: index = self._index if not self._match_text_seq("LAMBDA"): return super()._parse_lambda(alias=alias) expressions = self._parse_csv(self._parse_lambda_arg) if not self._match(TokenType.COLON): self._retreat(index) return None this = self._replace_lambda(self._parse_assignment(), expressions) return self.expression(exp.Lambda(this=this, expressions=expressions, colon=True)) def _parse_expression(self) -> t.Optional[exp.Expr]: # DuckDB supports prefix aliases, e.g. foo: 1 if self._next.token_type == TokenType.COLON: alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) self._match(TokenType.COLON) comments = self._prev_comments this = self._parse_assignment() if isinstance(this, exp.Expr): # Moves the comment next to the alias in `alias: expr /* comment */` comments += this.pop_comments() or [] return self.expression(exp.Alias(this=this, alias=alias), comments=comments) return super()._parse_expression() def _parse_table( self, schema: bool = False, joins: bool = False, alias_tokens: t.Optional[Collection[TokenType]] = None, parse_bracket: bool = False, is_db_reference: bool = False, parse_partition: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.Expr]: # DuckDB supports prefix aliases, e.g. FROM foo: bar if self._next.token_type == TokenType.COLON: alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) self._match(TokenType.COLON) comments = self._prev_comments else: alias = None comments = [] table = super()._parse_table( schema=schema, joins=joins, alias_tokens=alias_tokens, parse_bracket=parse_bracket, is_db_reference=is_db_reference, parse_partition=parse_partition, ) if isinstance(table, exp.Expr) and isinstance(alias, exp.TableAlias): # Moves the comment next to the alias in `alias: table /* comment */` comments += table.pop_comments() or [] alias.comments = alias.pop_comments() + comments table.set("alias", alias) return table def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: # https://duckdb.org/docs/sql/samples.html sample = super()._parse_table_sample(as_modifier=as_modifier) if sample and not sample.args.get("method"): if sample.args.get("size"): sample.set("method", exp.var("RESERVOIR")) else: sample.set("method", exp.var("SYSTEM")) return sample def _parse_bracket(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: bracket = super()._parse_bracket(this) if self.dialect.version < (1, 2) and isinstance(bracket, exp.Bracket): # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes bracket.set("returns_list_for_maps", True) return bracket def _parse_map(self) -> exp.ToMap | exp.Map: if self._match(TokenType.L_BRACE, advance=False): return self.expression(exp.ToMap(this=self._parse_bracket())) args = self._parse_wrapped_csv(self._parse_assignment) return self.expression(exp.Map(keys=seq_get(args, 0), values=seq_get(args, 1))) def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expr]: return self._parse_field_def() def _pivot_column_names(self, aggregations: t.List[exp.Expr]) -> t.List[str]: if len(aggregations) == 1: return super()._pivot_column_names(aggregations) return pivot_column_names(aggregations, dialect="duckdb") def _parse_attach_detach(self, is_attach: bool = True) -> exp.Attach | exp.Detach: def _parse_attach_option() -> exp.AttachOption: return self.expression( exp.AttachOption( this=self._parse_var(any_token=True), expression=self._parse_field(any_token=True), ) ) self._match(TokenType.DATABASE) exists = self._parse_exists(not_=is_attach) this = self._parse_alias(self._parse_primary_or_var(), explicit=True) if self._match(TokenType.L_PAREN, advance=False): expressions = self._parse_wrapped_csv(_parse_attach_option) else: expressions = None return ( self.expression(exp.Attach(this=this, exists=exists, expressions=expressions)) if is_attach else self.expression(exp.Detach(this=this, exists=exists)) ) def _parse_show_duckdb(self, this: str) -> exp.Show: return self.expression(exp.Show(this=this)) def _parse_force(self) -> exp.Install | exp.Command: # FORCE can only be followed by INSTALL or CHECKPOINT # In the case of CHECKPOINT, we fallback if not self._match(TokenType.INSTALL): return self._parse_as_command(self._prev) return self._parse_install(force=True) def _parse_install(self, force: bool = False) -> exp.Install: return self.expression( exp.Install( this=self._parse_id_var(), from_=self._parse_var_or_string() if self._match(TokenType.FROM) else None, force=force, ) ) def _parse_primary(self) -> t.Optional[exp.Expr]: if self._match_pair(TokenType.HASH, TokenType.NUMBER): return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) return super()._parse_primary() ================================================ FILE: sqlglot/parsers/dune.py ================================================ from __future__ import annotations from sqlglot.parsers.trino import TrinoParser class DuneParser(TrinoParser): pass ================================================ FILE: sqlglot/parsers/exasol.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.dialects.dialect import ( binary_from_function, build_date_delta, build_formatted_time, build_timetostr_or_tochar, build_trunc, ) from sqlglot.helper import seq_get from sqlglot.tokens import TokenType DATE_UNITS = {"DAY", "WEEK", "MONTH", "YEAR", "HOUR", "MINUTE", "SECOND"} # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/zeroifnull.htm def _build_zeroifnull(args: t.List) -> exp.If: cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/nullifzero.htm def _build_nullifzero(args: t.List) -> exp.If: cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) class ExasolParser(parser.Parser): TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.SEMI, } FUNCTIONS = { **parser.Parser.FUNCTIONS, **{f"ADD_{unit}S": build_date_delta(exp.DateAdd, default_unit=unit) for unit in DATE_UNITS}, **{ f"{unit}S_BETWEEN": build_date_delta(exp.DateDiff, default_unit=unit) for unit in DATE_UNITS }, "APPROXIMATE_COUNT_DISTINCT": exp.ApproxDistinct.from_arg_list, "BIT_AND": binary_from_function(exp.BitwiseAnd), "BIT_OR": binary_from_function(exp.BitwiseOr), "BIT_XOR": binary_from_function(exp.BitwiseXor), "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), "BIT_LSHIFT": binary_from_function(exp.BitwiseLeftShift), "BIT_RSHIFT": binary_from_function(exp.BitwiseRightShift), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/convert_tz.htm "CONVERT_TZ": lambda args: exp.ConvertTimezone( source_tz=seq_get(args, 1), target_tz=seq_get(args, 2), timestamp=seq_get(args, 0), options=seq_get(args, 3), ), "CURDATE": exp.CurrentDate.from_arg_list, # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/date_trunc.htm#DATE_TRUNC "DATE_TRUNC": lambda args: exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0)), "DIV": binary_from_function(exp.IntDiv), "EVERY": lambda args: exp.All(this=seq_get(args, 0)), "EDIT_DISTANCE": exp.Levenshtein.from_arg_list, "FROM_POSIX_TIME": exp.UnixToTime.from_arg_list, "HASH_SHA": exp.SHA.from_arg_list, "HASH_SHA1": exp.SHA.from_arg_list, "HASH_MD5": exp.MD5.from_arg_list, "HASHTYPE_MD5": exp.MD5Digest.from_arg_list, "HASH_SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), "HASH_SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), "NOW": exp.CurrentTimestamp.from_arg_list, "NULLIFZERO": _build_nullifzero, "REGEXP_LIKE": lambda args: exp.RegexpLike( this=seq_get(args, 0), expression=seq_get(args, 1), flag=seq_get(args, 2), full_match=True, ), "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, "REGEXP_REPLACE": lambda args: exp.RegexpReplace( this=seq_get(args, 0), expression=seq_get(args, 1), replacement=seq_get(args, 2), position=seq_get(args, 3), occurrence=seq_get(args, 4), ), "TRUNC": build_trunc, "TRUNCATE": build_trunc, "TO_CHAR": build_timetostr_or_tochar, "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "exasol"), "USER": exp.CurrentUser.from_arg_list, "VAR_POP": exp.VariancePop.from_arg_list, "ZEROIFNULL": _build_zeroifnull, } CONSTRAINT_PARSERS = { **parser.Parser.CONSTRAINT_PARSERS, "COMMENT": lambda self: self.expression( exp.CommentColumnConstraint(this=self._match(TokenType.IS) and self._parse_string()) ), } RANGE_PARSERS = { **parser.Parser.RANGE_PARSERS, TokenType.RLIKE: lambda self, this: self.expression( exp.RegexpLike(this=this, expression=self._parse_bitwise(), full_match=True) ), } FUNC_TOKENS = { *parser.Parser.FUNC_TOKENS, TokenType.SYSTIMESTAMP, } NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.SYSTIMESTAMP: exp.Systimestamp, TokenType.CURRENT_SCHEMA: exp.CurrentSchema, } FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/listagg.htm # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/group_concat.htm **dict.fromkeys(("GROUP_CONCAT", "LISTAGG"), lambda self: self._parse_group_concat()), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/json_value.htm "JSON_VALUE": lambda self: self._parse_json_value(), # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/json_extract.htm "JSON_EXTRACT": lambda self: self._parse_json_extract(), } def _parse_column(self) -> t.Optional[exp.Expr]: column = super()._parse_column() if not isinstance(column, exp.Column): return column table_ident = column.args.get("table") if ( isinstance(table_ident, exp.Identifier) and table_ident.name.upper() == "LOCAL" and not bool(table_ident.args.get("quoted")) ): column.set("table", None) return column def _parse_json_extract(self) -> exp.JSONExtract: args = self._parse_expressions() self._match_r_paren() expression = exp.JSONExtract(expressions=args) if self._match_texts("EMITS"): expression.set("emits", self._parse_schema()) return expression ODBC_DATETIME_LITERALS = { "d": exp.Date, "ts": exp.Timestamp, } ================================================ FILE: sqlglot/parsers/fabric.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.parsers.tsql import TSQLParser class FabricParser(TSQLParser): def _parse_create(self) -> exp.Create | exp.Command: create = super()._parse_create() if isinstance(create, exp.Create): # Transform VARCHAR/CHAR without precision to VARCHAR(1)/CHAR(1) if create.kind == "TABLE" and isinstance(create.this, exp.Schema): for column in create.this.expressions: if isinstance(column, exp.ColumnDef): column_type = column.kind if ( isinstance(column_type, exp.DataType) and column_type.this in (exp.DType.VARCHAR, exp.DType.CHAR) and not column_type.expressions ): # Add default precision of 1 to VARCHAR/CHAR without precision # When n isn't specified in a data definition or variable declaration statement, the default length is 1. # https://learn.microsoft.com/en-us/sql/t-sql/data-types/char-and-varchar-transact-sql?view=sql-server-ver17#remarks column_type.set("expressions", [exp.Literal.number("1")]) return create ================================================ FILE: sqlglot/parsers/hive.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.dialects.dialect import build_formatted_time, build_regexp_extract from sqlglot.helper import seq_get from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot._typing import F def build_with_ignore_nulls( exp_class: t.Type[exp.Expr], ) -> t.Callable[[t.List[exp.Expr]], exp.Expr]: def _parse(args: t.List[exp.Expr]) -> exp.Expr: this = exp_class(this=seq_get(args, 0)) if seq_get(args, 1) == exp.true(): return exp.IgnoreNulls(this=this) return this return _parse def _build_to_date(args: t.List) -> exp.TsOrDsToDate: expr = build_formatted_time(exp.TsOrDsToDate, "hive")(args) expr.set("safe", True) return expr def _build_date_add(args: t.List) -> exp.TsOrDsAdd: expression = seq_get(args, 1) if expression: expression = expression * -1 return exp.TsOrDsAdd( this=seq_get(args, 0), expression=expression, unit=exp.Literal.string("DAY") ) class HiveParser(parser.Parser): LOG_DEFAULTS_TO_LN = True STRICT_CAST = False VALUES_FOLLOWED_BY_PAREN = False JOINS_HAVE_EQUAL_PRECEDENCE = True ADD_JOIN_ON_TRUE = True ALTER_TABLE_PARTITIONS = True CHANGE_COLUMN_ALTER_SYNTAX = False # Whether the dialect supports using ALTER COLUMN syntax with CHANGE COLUMN. FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, "PERCENTILE": lambda self: self._parse_quantile_function(exp.Quantile), "PERCENTILE_APPROX": lambda self: self._parse_quantile_function(exp.ApproxQuantile), } FUNCTIONS = { **parser.Parser.FUNCTIONS, "BASE64": exp.ToBase64.from_arg_list, "COLLECT_LIST": lambda args: exp.ArrayAgg(this=seq_get(args, 0), nulls_excluded=True), "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, "DATE_ADD": lambda args: exp.TsOrDsAdd( this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") ), "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")( [ exp.TimeStrToTime(this=seq_get(args, 0)), seq_get(args, 1), ] ), "DATE_SUB": _build_date_add, "DATEDIFF": lambda args: exp.DateDiff( this=exp.TsOrDsToDate(this=seq_get(args, 0)), expression=exp.TsOrDsToDate(this=seq_get(args, 1)), ), "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "FIRST": build_with_ignore_nulls(exp.First), "FIRST_VALUE": build_with_ignore_nulls(exp.FirstValue), "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True), "GET_JSON_OBJECT": lambda args, dialect: exp.JSONExtractScalar( this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) ), "LAST": build_with_ignore_nulls(exp.Last), "LAST_VALUE": build_with_ignore_nulls(exp.LastValue), "MAP": parser.build_var_map, "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), "SEQUENCE": exp.GenerateSeries.from_arg_list, "SIZE": exp.ArraySize.from_arg_list, "SPLIT": exp.RegexpSplit.from_arg_list, "STR_TO_MAP": lambda args: exp.StrToMap( this=seq_get(args, 0), pair_delim=seq_get(args, 1) or exp.Literal.string(","), key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), ), "TO_DATE": _build_to_date, "TO_JSON": exp.JSONFormat.from_arg_list, "TRUNC": exp.TimestampTrunc.from_arg_list, "UNBASE64": exp.FromBase64.from_arg_list, "UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)( args or [exp.CurrentTimestamp()] ), "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), } NO_PAREN_FUNCTION_PARSERS = { **parser.Parser.NO_PAREN_FUNCTION_PARSERS, "TRANSFORM": lambda self: self._parse_transform(), } NO_PAREN_FUNCTIONS = { k: v for k, v in parser.Parser.NO_PAREN_FUNCTIONS.items() if k != TokenType.CURRENT_TIME } PROPERTY_PARSERS = { **parser.Parser.PROPERTY_PARSERS, "SERDEPROPERTIES": lambda self: exp.SerdeProperties( expressions=self._parse_wrapped_csv(self._parse_property) ), } ALTER_PARSERS = { **parser.Parser.ALTER_PARSERS, "CHANGE": lambda self: self._parse_alter_table_change(), } def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: if not self._match(TokenType.L_PAREN, advance=False): self._retreat(self._index - 1) return None args = self._parse_wrapped_csv(self._parse_lambda) row_format_before = self._parse_row_format(match_row=True) record_writer = None if self._match_text_seq("RECORDWRITER"): record_writer = self._parse_string() if not self._match(TokenType.USING): return exp.Transform.from_arg_list(args) command_script = self._parse_string() self._match(TokenType.ALIAS) schema = self._parse_schema() row_format_after = self._parse_row_format(match_row=True) record_reader = None if self._match_text_seq("RECORDREADER"): record_reader = self._parse_string() return self.expression( exp.QueryTransform( expressions=args, command_script=command_script, schema=schema, row_format_before=row_format_before, record_writer=record_writer, row_format_after=row_format_after, record_reader=record_reader, ) ) def _parse_quantile_function(self, func: t.Type[F]) -> F: if self._match(TokenType.DISTINCT): first_arg: t.Optional[exp.Expr] = self.expression( exp.Distinct(expressions=[self._parse_lambda()]) ) else: self._match(TokenType.ALL) first_arg = self._parse_lambda() args = [first_arg] if self._match(TokenType.COMMA): args.extend(self._parse_function_args()) return func.from_arg_list(args) def _parse_types( self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True ) -> t.Optional[exp.Expr]: """ Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: spark-sql (default)> select cast(1234 as varchar(2)); 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support char/varchar type and simply treats them as string type. Please use string type directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 1234 Time taken: 4.265 seconds, Fetched 1 row(s) This shows that Spark doesn't truncate the value into '12', which is inconsistent with what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html """ this = super()._parse_types( check_func=check_func, schema=schema, allow_identifiers=allow_identifiers ) if this and not schema: return this.transform( lambda node: ( node.replace(exp.DataType.build("text")) if isinstance(node, exp.DataType) and node.is_type("char", "varchar") else node ), copy=False, ) return this def _parse_alter_table_change(self) -> t.Optional[exp.Expr]: self._match(TokenType.COLUMN) this = self._parse_field(any_token=True) if self.CHANGE_COLUMN_ALTER_SYNTAX and self._match_text_seq("TYPE"): return self.expression(exp.AlterColumn(this=this, dtype=self._parse_types(schema=True))) column_new = self._parse_field(any_token=True) dtype = self._parse_types(schema=True) comment = self._match(TokenType.COMMENT) and self._parse_string() if not this or not column_new or not dtype: self.raise_error( "Expected 'CHANGE COLUMN' to be followed by 'column_name' 'column_name' 'data_type'" ) return self.expression( exp.AlterColumn(this=this, rename_to=column_new, dtype=dtype, comment=comment) ) def _parse_partition_and_order( self, ) -> t.Tuple[t.List[exp.Expr], t.Optional[exp.Expr]]: return ( ( self._parse_csv(self._parse_assignment) if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) else [] ), super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), ) def _parse_parameter(self) -> exp.Parameter: self._match(TokenType.L_BRACE) this = self._parse_identifier() or self._parse_primary_or_var() expression = self._match(TokenType.COLON) and ( self._parse_identifier() or self._parse_primary_or_var() ) self._match(TokenType.R_BRACE) return self.expression(exp.Parameter(this=this, expression=expression)) def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: if expression.is_star: return expression if isinstance(expression, exp.Column): key = expression.this else: key = exp.to_identifier(f"col{index + 1}") return self.expression(exp.PropertyEQ(this=key, expression=expression)) ================================================ FILE: sqlglot/parsers/materialize.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.helper import seq_get from sqlglot.parsers.postgres import PostgresParser from sqlglot.tokens import TokenType class MaterializeParser(PostgresParser): TYPED_LAMBDA_ARGS = True NO_PAREN_FUNCTION_PARSERS = { **PostgresParser.NO_PAREN_FUNCTION_PARSERS, "MAP": lambda self: self._parse_map(), } LAMBDAS = { **PostgresParser.LAMBDAS, TokenType.FARROW: lambda self, expressions: self.expression( exp.Kwarg(this=seq_get(expressions, 0), expression=self._parse_assignment()) ), } def _parse_lambda_arg(self) -> t.Optional[exp.Expr]: return self._parse_field() def _parse_map(self) -> exp.ToMap: if self._match(TokenType.L_PAREN): to_map = self.expression(exp.ToMap(this=self._parse_select())) self._match_r_paren() return to_map if not self._match(TokenType.L_BRACKET): self.raise_error("Expecting [") entries = [ exp.PropertyEQ(this=e.this, expression=e.expression) for e in self._parse_csv(self._parse_lambda) ] if not self._match(TokenType.R_BRACKET): self.raise_error("Expecting ]") return self.expression(exp.ToMap(this=self.expression(exp.Struct(expressions=entries)))) ================================================ FILE: sqlglot/parsers/mysql.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.trie import new_trie from sqlglot.dialects.dialect import ( Dialect, build_date_delta, build_date_delta_with_interval, build_formatted_time, isnull_to_is_null, ) from sqlglot.helper import seq_get from sqlglot.tokens import TokenType # All specifiers for time parts (as opposed to date parts) # https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format TIME_SPECIFIERS = {"f", "H", "h", "I", "i", "k", "l", "p", "r", "S", "s", "T"} def _has_time_specifier(date_format: str) -> bool: i = 0 length = len(date_format) while i < length: if date_format[i] == "%": i += 1 if i < length and date_format[i] in TIME_SPECIFIERS: return True i += 1 return False def _str_to_date(args: t.List) -> exp.StrToDate | exp.StrToTime: mysql_date_format = seq_get(args, 1) date_format = Dialect["mysql"].format_time(mysql_date_format) this = seq_get(args, 0) if mysql_date_format and _has_time_specifier(mysql_date_format.name): return exp.StrToTime(this=this, format=date_format) return exp.StrToDate(this=this, format=date_format) def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[MySQLParser], exp.Show]: def _parse(self: MySQLParser) -> exp.Show: return self._parse_show_mysql(*args, **kwargs) return _parse class MySQLParser(parser.Parser): NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.LOCALTIME: exp.Localtime, TokenType.LOCALTIMESTAMP: exp.Localtimestamp, } ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.STRAIGHT_JOIN} FUNC_TOKENS = { *parser.Parser.FUNC_TOKENS, TokenType.DATABASE, TokenType.MOD, TokenType.SCHEMA, TokenType.VALUES, TokenType.CHARACTER_SET, } CONJUNCTION = { **parser.Parser.CONJUNCTION, TokenType.DAMP: exp.And, TokenType.XOR: exp.Xor, } DISJUNCTION = { **parser.Parser.DISJUNCTION, TokenType.DPIPE: exp.Or, } TABLE_ALIAS_TOKENS = ( (parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.ANTI, TokenType.SEMI}) - parser.Parser.TABLE_INDEX_HINT_TOKENS - {TokenType.STRAIGHT_JOIN} ) RANGE_PARSERS = { **parser.Parser.RANGE_PARSERS, TokenType.SOUNDS_LIKE: lambda self, this: self.expression( exp.EQ( this=self.expression(exp.Soundex(this=this)), expression=self.expression(exp.Soundex(this=self._parse_term())), ) ), TokenType.MEMBER_OF: lambda self, this: self.expression( exp.JSONArrayContains(this=this, expression=self._parse_wrapped(self._parse_expression)) ), } FUNCTIONS = { **parser.Parser.FUNCTIONS, "BIT_AND": exp.BitwiseAndAgg.from_arg_list, "BIT_OR": exp.BitwiseOrAgg.from_arg_list, "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, "BIT_COUNT": exp.BitwiseCount.from_arg_list, "CONVERT_TZ": lambda args: exp.ConvertTimezone( source_tz=seq_get(args, 1), target_tz=seq_get(args, 2), timestamp=seq_get(args, 0) ), "CURDATE": exp.CurrentDate.from_arg_list, "CURTIME": exp.CurrentTime.from_arg_list, "DATE": lambda args: exp.TsOrDsToDate(this=seq_get(args, 0)), "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), "DATE_FORMAT": lambda args: exp.TimeToStr( this=exp.TsOrDsToTimestamp(this=seq_get(args, 0)), format=Dialect["mysql"].format_time(seq_get(args, 1)), ), "DATE_SUB": build_date_delta_with_interval(exp.DateSub), "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "DAYOFMONTH": lambda args: exp.DayOfMonth(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "DAYOFWEEK": lambda args: exp.DayOfWeek(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "DAYOFYEAR": lambda args: exp.DayOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "FORMAT": exp.NumberToStr.from_arg_list, "FROM_UNIXTIME": build_formatted_time(exp.UnixToTime, "mysql"), "ISNULL": isnull_to_is_null, "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), "MAKETIME": exp.TimeFromParts.from_arg_list, "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "MONTHNAME": lambda args: exp.TimeToStr( this=exp.TsOrDsToDate(this=seq_get(args, 0)), format=exp.Literal.string("%B"), ), "SCHEMA": exp.CurrentSchema.from_arg_list, "DATABASE": exp.CurrentSchema.from_arg_list, "STR_TO_DATE": _str_to_date, "TIMESTAMPDIFF": build_date_delta(exp.TimestampDiff), "TO_DAYS": lambda args: exp.paren( exp.DateDiff( this=exp.TsOrDsToDate(this=seq_get(args, 0)), expression=exp.TsOrDsToDate(this=exp.Literal.string("0000-01-01")), unit=exp.var("DAY"), ) + 1 ), "VERSION": exp.CurrentVersion.from_arg_list, "WEEK": lambda args: exp.Week( this=exp.TsOrDsToDate(this=seq_get(args, 0)), mode=seq_get(args, 1) ), "WEEKOFYEAR": lambda args: exp.WeekOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate(this=seq_get(args, 0))), } FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, "GROUP_CONCAT": lambda self: self._parse_group_concat(), # https://dev.mysql.com/doc/refman/5.7/en/miscellaneous-functions.html#function_values "VALUES": lambda self: self.expression( exp.Anonymous(this="VALUES", expressions=[self._parse_id_var()]) ), "JSON_VALUE": lambda self: self._parse_json_value(), "SUBSTR": lambda self: self._parse_substring(), } STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.SHOW: lambda self: self._parse_show(), } SHOW_PARSERS = { "BINARY LOGS": _show_parser("BINARY LOGS"), "MASTER LOGS": _show_parser("BINARY LOGS"), "BINLOG EVENTS": _show_parser("BINLOG EVENTS"), "CHARACTER SET": _show_parser("CHARACTER SET"), "CHARSET": _show_parser("CHARACTER SET"), "COLLATION": _show_parser("COLLATION"), "FULL COLUMNS": _show_parser("COLUMNS", target="FROM", full=True), "COLUMNS": _show_parser("COLUMNS", target="FROM"), "CREATE DATABASE": _show_parser("CREATE DATABASE", target=True), "CREATE EVENT": _show_parser("CREATE EVENT", target=True), "CREATE FUNCTION": _show_parser("CREATE FUNCTION", target=True), "CREATE PROCEDURE": _show_parser("CREATE PROCEDURE", target=True), "CREATE TABLE": _show_parser("CREATE TABLE", target=True), "CREATE TRIGGER": _show_parser("CREATE TRIGGER", target=True), "CREATE VIEW": _show_parser("CREATE VIEW", target=True), "DATABASES": _show_parser("DATABASES"), "SCHEMAS": _show_parser("DATABASES"), "ENGINE": _show_parser("ENGINE", target=True), "STORAGE ENGINES": _show_parser("ENGINES"), "ENGINES": _show_parser("ENGINES"), "ERRORS": _show_parser("ERRORS"), "EVENTS": _show_parser("EVENTS"), "FUNCTION CODE": _show_parser("FUNCTION CODE", target=True), "FUNCTION STATUS": _show_parser("FUNCTION STATUS"), "GRANTS": _show_parser("GRANTS", target="FOR"), "INDEX": _show_parser("INDEX", target="FROM"), "MASTER STATUS": _show_parser("MASTER STATUS"), "OPEN TABLES": _show_parser("OPEN TABLES"), "PLUGINS": _show_parser("PLUGINS"), "PROCEDURE CODE": _show_parser("PROCEDURE CODE", target=True), "PROCEDURE STATUS": _show_parser("PROCEDURE STATUS"), "PRIVILEGES": _show_parser("PRIVILEGES"), "FULL PROCESSLIST": _show_parser("PROCESSLIST", full=True), "PROCESSLIST": _show_parser("PROCESSLIST"), "PROFILE": _show_parser("PROFILE"), "PROFILES": _show_parser("PROFILES"), "RELAYLOG EVENTS": _show_parser("RELAYLOG EVENTS"), "REPLICAS": _show_parser("REPLICAS"), "SLAVE HOSTS": _show_parser("REPLICAS"), "REPLICA STATUS": _show_parser("REPLICA STATUS"), "SLAVE STATUS": _show_parser("REPLICA STATUS"), "GLOBAL STATUS": _show_parser("STATUS", global_=True), "SESSION STATUS": _show_parser("STATUS"), "STATUS": _show_parser("STATUS"), "TABLE STATUS": _show_parser("TABLE STATUS"), "FULL TABLES": _show_parser("TABLES", full=True), "TABLES": _show_parser("TABLES"), "TRIGGERS": _show_parser("TRIGGERS"), "GLOBAL VARIABLES": _show_parser("VARIABLES", global_=True), "SESSION VARIABLES": _show_parser("VARIABLES"), "VARIABLES": _show_parser("VARIABLES"), "WARNINGS": _show_parser("WARNINGS"), } PROPERTY_PARSERS = { **parser.Parser.PROPERTY_PARSERS, "LOCK": lambda self: self._parse_property_assignment(exp.LockProperty), "PARTITION BY": lambda self: self._parse_partition_property(), } SET_PARSERS = { **parser.Parser.SET_PARSERS, "PERSIST": lambda self: self._parse_set_item_assignment("PERSIST"), "PERSIST_ONLY": lambda self: self._parse_set_item_assignment("PERSIST_ONLY"), "CHARACTER SET": lambda self: self._parse_set_item_charset("CHARACTER SET"), "CHARSET": lambda self: self._parse_set_item_charset("CHARACTER SET"), "NAMES": lambda self: self._parse_set_item_names(), } SHOW_TRIE = new_trie(key.split(" ") for key in SHOW_PARSERS) SET_TRIE = new_trie(key.split(" ") for key in SET_PARSERS) CONSTRAINT_PARSERS = { **parser.Parser.CONSTRAINT_PARSERS, "FULLTEXT": lambda self: self._parse_index_constraint(kind="FULLTEXT"), "INDEX": lambda self: self._parse_index_constraint(), "KEY": lambda self: self._parse_index_constraint(), "SPATIAL": lambda self: self._parse_index_constraint(kind="SPATIAL"), "ZEROFILL": lambda self: self.expression(exp.ZeroFillColumnConstraint()), } ALTER_PARSERS = { **parser.Parser.ALTER_PARSERS, "MODIFY": lambda self: self._parse_alter_table_alter(), } ALTER_ALTER_PARSERS = { **parser.Parser.ALTER_ALTER_PARSERS, "INDEX": lambda self: self._parse_alter_table_alter_index(), } SCHEMA_UNNAMED_CONSTRAINTS = { *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, "FULLTEXT", "INDEX", "KEY", "SPATIAL", } PROFILE_TYPES: parser.OPTIONS_TYPE = { **dict.fromkeys(("ALL", "CPU", "IPC", "MEMORY", "SOURCE", "SWAPS"), tuple()), "BLOCK": ("IO",), "CONTEXT": ("SWITCHES",), "PAGE": ("FAULTS",), } TYPE_TOKENS = { *parser.Parser.TYPE_TOKENS, TokenType.SET, } ENUM_TYPE_TOKENS = { *parser.Parser.ENUM_TYPE_TOKENS, TokenType.SET, } # SELECT [ ALL | DISTINCT | DISTINCTROW ] [ ] OPERATION_MODIFIERS = { "HIGH_PRIORITY", "STRAIGHT_JOIN", "SQL_SMALL_RESULT", "SQL_BIG_RESULT", "SQL_BUFFER_RESULT", "SQL_NO_CACHE", "SQL_CALC_FOUND_ROWS", } LOG_DEFAULTS_TO_LN = True STRING_ALIASES = True VALUES_FOLLOWED_BY_PAREN = False SUPPORTS_PARTITION_SELECTION = True def _parse_generated_as_identity( self, ) -> ( exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint | exp.GeneratedAsRowColumnConstraint ): this = super()._parse_generated_as_identity() if self._match_texts(("STORED", "VIRTUAL")): persisted = self._prev.text.upper() == "STORED" if isinstance(this, exp.ComputedColumnConstraint): this.set("persisted", persisted) elif isinstance(this, exp.GeneratedAsIdentityColumnConstraint): this = self.expression( exp.ComputedColumnConstraint(this=this.expression, persisted=persisted) ) return this def _parse_primary_key_part(self) -> t.Optional[exp.Expr]: this = self._parse_id_var() if not self._match(TokenType.L_PAREN): return this expression = self._parse_number() self._match_r_paren() return self.expression(exp.ColumnPrefix(this=this, expression=expression)) def _parse_index_constraint(self, kind: t.Optional[str] = None) -> exp.IndexColumnConstraint: if kind: self._match_texts(("INDEX", "KEY")) this = self._parse_id_var(any_token=False) index_type = self._match(TokenType.USING) and self._advance_any() and self._prev.text expressions = self._parse_wrapped_csv(self._parse_ordered) options = [] while True: if self._match_text_seq("KEY_BLOCK_SIZE"): self._match(TokenType.EQ) opt = exp.IndexConstraintOption(key_block_size=self._parse_number()) elif self._match(TokenType.USING): opt = exp.IndexConstraintOption(using=self._advance_any() and self._prev.text) elif self._match_text_seq("WITH", "PARSER"): opt = exp.IndexConstraintOption(parser=self._parse_var(any_token=True)) elif self._match(TokenType.COMMENT): opt = exp.IndexConstraintOption(comment=self._parse_string()) elif self._match_text_seq("VISIBLE"): opt = exp.IndexConstraintOption(visible=True) elif self._match_text_seq("INVISIBLE"): opt = exp.IndexConstraintOption(visible=False) elif self._match_text_seq("ENGINE_ATTRIBUTE"): self._match(TokenType.EQ) opt = exp.IndexConstraintOption(engine_attr=self._parse_string()) elif self._match_text_seq("SECONDARY_ENGINE_ATTRIBUTE"): self._match(TokenType.EQ) opt = exp.IndexConstraintOption(secondary_engine_attr=self._parse_string()) else: opt = None if not opt: break options.append(opt) return self.expression( exp.IndexColumnConstraint( this=this, expressions=expressions, kind=kind, index_type=index_type, options=options, ) ) def _parse_show_mysql( self, this: str, target: bool | str = False, full: t.Optional[bool] = None, global_: t.Optional[bool] = None, ) -> exp.Show: json = self._match_text_seq("JSON") if target: if isinstance(target, str): self._match_text_seq(*target.split(" ")) target_id = self._parse_id_var() else: target_id = None log = self._parse_string() if self._match_text_seq("IN") else None if this in ("BINLOG EVENTS", "RELAYLOG EVENTS"): position = self._parse_number() if self._match_text_seq("FROM") else None db = None else: position = None db = None if self._match(TokenType.FROM): db = self._parse_id_var() elif self._match(TokenType.DOT): db = target_id target_id = self._parse_id_var() channel = self._parse_id_var() if self._match_text_seq("FOR", "CHANNEL") else None like = self._parse_string() if self._match_text_seq("LIKE") else None where = self._parse_where() if this == "PROFILE": types = self._parse_csv(lambda: self._parse_var_from_options(self.PROFILE_TYPES)) query = self._parse_number() if self._match_text_seq("FOR", "QUERY") else None offset = self._parse_number() if self._match_text_seq("OFFSET") else None limit = self._parse_number() if self._match_text_seq("LIMIT") else None else: types, query = None, None offset, limit = self._parse_oldstyle_limit() mutex = True if self._match_text_seq("MUTEX") else None mutex = False if self._match_text_seq("STATUS") else mutex for_table = self._parse_id_var() if self._match_text_seq("FOR", "TABLE") else None for_group = self._parse_string() if self._match_text_seq("FOR", "GROUP") else None for_user = self._parse_string() if self._match_text_seq("FOR", "USER") else None for_role = self._parse_string() if self._match_text_seq("FOR", "ROLE") else None into_outfile = self._parse_string() if self._match_text_seq("INTO", "OUTFILE") else None return self.expression( exp.Show( this=this, target=target_id, full=full, log=log, position=position, db=db, channel=channel, like=like, where=where, types=types, query=query, offset=offset, limit=limit, mutex=mutex, for_table=for_table, for_group=for_group, for_user=for_user, for_role=for_role, into_outfile=into_outfile, json=json, global_=global_, ) ) def _parse_oldstyle_limit( self, ) -> t.Tuple[t.Optional[exp.Expr], t.Optional[exp.Expr]]: limit = None offset = None if self._match_text_seq("LIMIT"): parts = self._parse_csv(self._parse_number) if len(parts) == 1: limit = parts[0] elif len(parts) == 2: limit = parts[1] offset = parts[0] return offset, limit def _parse_set_item_charset(self, kind: str) -> exp.Expr: this = self._parse_string() or self._parse_unquoted_field() return self.expression(exp.SetItem(this=this, kind=kind)) def _parse_set_item_names(self) -> exp.Expr: charset = self._parse_string() or self._parse_unquoted_field() if self._match_text_seq("COLLATE"): collate = self._parse_string() or self._parse_unquoted_field() else: collate = None return self.expression(exp.SetItem(this=charset, collate=collate, kind="NAMES")) def _parse_type( self, parse_interval: bool = True, fallback_to_identifier: bool = False ) -> t.Optional[exp.Expr]: # mysql binary is special and can work anywhere, even in order by operations # it operates like a no paren func if self._match(TokenType.BINARY, advance=False): data_type = self._parse_types(check_func=True, allow_identifiers=False) if isinstance(data_type, exp.DataType): return self.expression(exp.Cast(this=self._parse_column(), to=data_type)) return super()._parse_type( parse_interval=parse_interval, fallback_to_identifier=fallback_to_identifier ) def _parse_alter_table_alter_index(self) -> exp.AlterIndex: index = self._parse_field(any_token=True) if self._match_text_seq("VISIBLE"): visible = True elif self._match_text_seq("INVISIBLE"): visible = False else: visible = None return self.expression(exp.AlterIndex(this=index, visible=visible)) def _parse_partition_property( self, ) -> t.Optional[exp.Expr] | t.List[exp.Expr]: partition_cls: t.Optional[t.Type[exp.Expr]] = None value_parser = None if self._match_text_seq("RANGE"): partition_cls = exp.PartitionByRangeProperty value_parser = self._parse_partition_range_value elif self._match_text_seq("LIST"): partition_cls = exp.PartitionByListProperty value_parser = self._parse_partition_list_value if not partition_cls or not value_parser: return None partition_expressions = self._parse_wrapped_csv(self._parse_assignment) # For Doris and Starrocks if not self._match_text_seq("(", "PARTITION", advance=False): return partition_expressions create_expressions = self._parse_wrapped_csv(value_parser) return self.expression( partition_cls( partition_expressions=partition_expressions, create_expressions=create_expressions ) ) def _parse_partition_range_value(self) -> t.Optional[exp.Expr]: self._match_text_seq("PARTITION") name = self._parse_id_var() if not self._match_text_seq("VALUES", "LESS", "THAN"): return name values = self._parse_wrapped_csv(self._parse_expression) if ( len(values) == 1 and isinstance(values[0], exp.Column) and values[0].name.upper() == "MAXVALUE" ): values = [exp.var("MAXVALUE")] part_range = self.expression(exp.PartitionRange(this=name, expressions=values)) return self.expression(exp.Partition(expressions=[part_range])) def _parse_partition_list_value(self) -> exp.Partition: self._match_text_seq("PARTITION") name = self._parse_id_var() self._match_text_seq("VALUES", "IN") values = self._parse_wrapped_csv(self._parse_expression) part_list = self.expression(exp.PartitionList(this=name, expressions=values)) return self.expression(exp.Partition(expressions=[part_list])) def _parse_primary_key( self, wrapped_optional: bool = False, in_props: bool = False, named_primary_key: bool = False, ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: return super()._parse_primary_key( wrapped_optional=wrapped_optional, in_props=in_props, named_primary_key=True ) ================================================ FILE: sqlglot/parsers/oracle.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.dialects.dialect import build_formatted_time, build_timetostr_or_tochar, build_trunc from sqlglot.helper import seq_get from sqlglot.parser import OPTIONS_TYPE, build_coalesce from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot._typing import E def _build_to_timestamp(args: t.List) -> exp.StrToTime | exp.Anonymous: if len(args) == 1: return exp.Anonymous(this="TO_TIMESTAMP", expressions=args) return build_formatted_time(exp.StrToTime, "oracle")(args) class OracleParser(parser.Parser): WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER, TokenType.KEEP} VALUES_FOLLOWED_BY_PAREN = False FUNCTIONS = { **{k: v for k, v in parser.Parser.FUNCTIONS.items() if k != "TO_BOOLEAN"}, "CONVERT": exp.ConvertToCharset.from_arg_list, "L2_DISTANCE": exp.EuclideanDistance.from_arg_list, "NVL": lambda args: build_coalesce(args, is_nvl=True), "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), "TO_CHAR": build_timetostr_or_tochar, "TO_TIMESTAMP": _build_to_timestamp, "TO_DATE": build_formatted_time(exp.StrToDate, "oracle"), "TRUNC": lambda args, dialect: build_trunc( args, dialect, date_trunc_unabbreviate=False, default_date_trunc_unit="DD" ), } NO_PAREN_FUNCTION_PARSERS = { **parser.Parser.NO_PAREN_FUNCTION_PARSERS, "NEXT": lambda self: self._parse_next_value_for(), "PRIOR": lambda self: self.expression(exp.Prior(this=self._parse_bitwise())), "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp(sysdate=True)), "DBMS_RANDOM": lambda self: self._parse_dbms_random(), } NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.LOCALTIMESTAMP: exp.Localtimestamp, TokenType.SYSTIMESTAMP: exp.Systimestamp, } FUNCTION_PARSERS = { **{k: v for k, v in parser.Parser.FUNCTION_PARSERS.items() if k != "CONVERT"}, "JSON_ARRAY": lambda self: self._parse_oracle_json_array(), "JSON_ARRAYAGG": lambda self: self._parse_oracle_json_arrayagg(), "JSON_EXISTS": lambda self: self._parse_json_exists(), } PROPERTY_PARSERS = { **parser.Parser.PROPERTY_PARSERS, "GLOBAL": lambda self: ( self._match_text_seq("TEMPORARY") and self.expression(exp.TemporaryProperty(this="GLOBAL")) ), "PRIVATE": lambda self: ( self._match_text_seq("TEMPORARY") and self.expression(exp.TemporaryProperty(this="PRIVATE")) ), "FORCE": lambda self: self.expression(exp.ForceProperty()), } QUERY_MODIFIER_PARSERS = { **parser.Parser.QUERY_MODIFIER_PARSERS, TokenType.ORDER_SIBLINGS_BY: lambda self: ("order", self._parse_order()), TokenType.WITH: lambda self: ("options", [self._parse_query_restrictions()]), } TYPE_LITERAL_PARSERS = { exp.DType.DATE: lambda self, this, _: self.expression(exp.DateStrToDate(this=this)), # https://docs.oracle.com/en/database/oracle/oracle-database/19/refrn/NLS_TIMESTAMP_FORMAT.html exp.DType.TIMESTAMP: lambda self, this, _: _build_to_timestamp( [this, '"%Y-%m-%d %H:%M:%S.%f"'] ), } # SELECT UNIQUE .. is old-style Oracle syntax for SELECT DISTINCT .. # Reference: https://stackoverflow.com/a/336455 DISTINCT_TOKENS = {TokenType.DISTINCT, TokenType.UNIQUE} QUERY_RESTRICTIONS: t.ClassVar[OPTIONS_TYPE] = { "WITH": ( ("READ", "ONLY"), ("CHECK", "OPTION"), ), } def _parse_dbms_random(self) -> t.Optional[exp.Expr]: if self._match_text_seq(".", "VALUE"): lower, upper = None, None if self._match(TokenType.L_PAREN, advance=False): lower_upper = self._parse_wrapped_csv(self._parse_bitwise) if len(lower_upper) == 2: lower, upper = lower_upper return exp.Rand(lower=lower, upper=upper) self._retreat(self._index - 1) return None def _parse_oracle_json_array(self) -> exp.JSONArray: return self._parse_json_array( exp.JSONArray, expressions=self._parse_csv(lambda: self._parse_format_json(self._parse_bitwise())), ) def _parse_oracle_json_arrayagg(self) -> exp.JSONArrayAgg: return self._parse_json_array( exp.JSONArrayAgg, this=self._parse_format_json(self._parse_bitwise()), order=self._parse_order(), ) def _parse_json_array(self, expr_type: t.Type[E], **kwargs) -> E: return self.expression( expr_type( null_handling=self._parse_on_handling("NULL", "NULL", "ABSENT"), return_type=self._match_text_seq("RETURNING") and self._parse_type(), strict=self._match_text_seq("STRICT"), **kwargs, ) ) def _parse_hint_function_call(self) -> t.Optional[exp.Expr]: if not self._curr or not self._next or self._next.token_type != TokenType.L_PAREN: return None name = self._curr.text self._advance(2) args = self._parse_hint_args() this = self.expression(exp.Anonymous(this=name, expressions=args)) self._match_r_paren(this) return this def _parse_hint_args(self): args = [] result = self._parse_var() while result: args.append(result) result = self._parse_var() return args def _parse_query_restrictions(self) -> t.Optional[exp.Expr]: kind = self._parse_var_from_options(self.QUERY_RESTRICTIONS, raise_unmatched=False) if not kind: return None return self.expression( exp.QueryOption( this=kind, expression=self._match(TokenType.CONSTRAINT) and self._parse_field() ) ) def _parse_json_exists(self) -> exp.JSONExists: this = self._parse_format_json(self._parse_bitwise()) self._match(TokenType.COMMA) return self.expression( exp.JSONExists( this=this, path=self.dialect.to_json_path(self._parse_bitwise()), passing=self._match_text_seq("PASSING") and self._parse_csv(lambda: self._parse_alias(self._parse_bitwise())), on_condition=self._parse_on_condition(), ) ) def _parse_into(self) -> t.Optional[exp.Into]: # https://docs.oracle.com/en/database/oracle/oracle-database/19/lnpls/SELECT-INTO-statement.html bulk_collect = self._match(TokenType.BULK_COLLECT_INTO) if not bulk_collect and not self._match(TokenType.INTO): return None index = self._index expressions = self._parse_expressions() if len(expressions) == 1: self._retreat(index) self._match(TokenType.TABLE) return self.expression( exp.Into(this=self._parse_table(schema=True), bulk_collect=bulk_collect) ) return self.expression(exp.Into(bulk_collect=bulk_collect, expressions=expressions)) def _parse_connect_with_prior(self): return self._parse_assignment() def _parse_column_ops(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: this = super()._parse_column_ops(this) if not this: return this index = self._index # https://docs.oracle.com/en/database/oracle/oracle-database/26/sqlrf/Interval-Exprs.html interval_span = self._try_parse(lambda: self._parse_interval_span(this)) if interval_span and isinstance(interval_span.args.get("unit"), exp.IntervalSpan): return interval_span self._retreat(index) return this def _parse_insert_table(self) -> t.Optional[exp.Expr]: # Oracle does not use AS for INSERT INTO alias # https://docs.oracle.com/en/database/oracle/oracle-database/18/sqlrf/INSERT.html # Parse table parts without schema to avoid parsing the alias with its columns this = self._parse_table_parts(schema=True) if isinstance(this, exp.Table): alias_name = self._parse_id_var(any_token=False) if alias_name: this.set("alias", exp.TableAlias(this=alias_name)) this.set("partition", self._parse_partition()) # Now parse the schema (column list) if present return self._parse_schema(this=this) return this ================================================ FILE: sqlglot/parsers/postgres.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.dialects.dialect import ( DialectType, binary_from_function, build_formatted_time, build_json_extract_path, build_timestamp_trunc, ) from sqlglot.helper import is_int, seq_get from sqlglot.parser import binary_range_parser from sqlglot.tokens import TokenType def _build_generate_series(args: t.List) -> exp.ExplodingGenerateSeries: # The goal is to convert step values like '1 day' or INTERVAL '1 day' into INTERVAL '1' day # Note: postgres allows calls with just two arguments -- the "step" argument defaults to 1 step = seq_get(args, 2) if step is not None: if step.is_string: args[2] = exp.to_interval(step.this) elif isinstance(step, exp.Interval) and not step.args.get("unit"): args[2] = exp.to_interval(step.this.this) return exp.ExplodingGenerateSeries.from_arg_list(args) def _build_to_timestamp(args: t.List) -> exp.UnixToTime | exp.StrToTime: # TO_TIMESTAMP accepts either a single double argument or (text, text) if len(args) == 1: # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE return exp.UnixToTime.from_arg_list(args) # https://www.postgresql.org/docs/current/functions-formatting.html return build_formatted_time(exp.StrToTime, "postgres")(args) def _build_regexp_replace(args: t.List, dialect: DialectType = None) -> exp.RegexpReplace: # The signature of REGEXP_REPLACE is: # regexp_replace(source, pattern, replacement [, start [, N ]] [, flags ]) # # Any one of `start`, `N` and `flags` can be column references, meaning that # unless we can statically see that the last argument is a non-integer string # (eg. not '0'), then it's not possible to construct the correct AST regexp_replace = None if len(args) > 3: last = args[-1] if not is_int(last.name): if not last.type or last.is_type(exp.DType.UNKNOWN, exp.DType.NULL): from sqlglot.optimizer.annotate_types import annotate_types last = annotate_types(last, dialect=dialect) if last.is_type(*exp.DataType.TEXT_TYPES): regexp_replace = exp.RegexpReplace.from_arg_list(args[:-1]) regexp_replace.set("modifiers", last) regexp_replace = regexp_replace or exp.RegexpReplace.from_arg_list(args) regexp_replace.set("single_replace", True) return regexp_replace def _build_levenshtein_less_equal(args: t.List) -> exp.Levenshtein: # Postgres has two signatures for levenshtein_less_equal function, but in both cases # max_dist is the last argument # levenshtein_less_equal(source, target, ins_cost, del_cost, sub_cost, max_d) # levenshtein_less_equal(source, target, max_d) max_dist = args.pop() return exp.Levenshtein( this=seq_get(args, 0), expression=seq_get(args, 1), ins_cost=seq_get(args, 2), del_cost=seq_get(args, 3), sub_cost=seq_get(args, 4), max_dist=max_dist, ) class PostgresParser(parser.Parser): SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = True PROPERTY_PARSERS = { **{k: v for k, v in parser.Parser.PROPERTY_PARSERS.items() if k != "INPUT"}, "SET": lambda self: self.expression(exp.SetConfigProperty(this=self._parse_set())), } PLACEHOLDER_PARSERS = { **parser.Parser.PLACEHOLDER_PARSERS, TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder(jdbc=True)), TokenType.MOD: lambda self: self._parse_query_parameter(), } FUNCTIONS = { **parser.Parser.FUNCTIONS, "ARRAY_PREPEND": lambda args: exp.ArrayPrepend( this=seq_get(args, 1), expression=seq_get(args, 0) ), "BIT_AND": exp.BitwiseAndAgg.from_arg_list, "BIT_OR": exp.BitwiseOrAgg.from_arg_list, "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, "VERSION": exp.CurrentVersion.from_arg_list, "DATE_TRUNC": build_timestamp_trunc, "DIV": lambda args: exp.cast(binary_from_function(exp.IntDiv)(args), exp.DType.DECIMAL), "GENERATE_SERIES": _build_generate_series, "GET_BIT": lambda args: exp.Getbit( this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True ), "JSON_EXTRACT_PATH": build_json_extract_path(exp.JSONExtract), "JSON_EXTRACT_PATH_TEXT": build_json_extract_path(exp.JSONExtractScalar), "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), encoding=seq_get(args, 1)), "MAKE_TIME": exp.TimeFromParts.from_arg_list, "MAKE_TIMESTAMP": exp.TimestampFromParts.from_arg_list, "NOW": exp.CurrentTimestamp.from_arg_list, "REGEXP_REPLACE": _build_regexp_replace, "TO_CHAR": build_formatted_time(exp.TimeToStr, "postgres"), "TO_DATE": build_formatted_time(exp.StrToDate, "postgres"), "TO_TIMESTAMP": _build_to_timestamp, "UNNEST": exp.Explode.from_arg_list, "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), "SHA384": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(384)), "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), "LEVENSHTEIN_LESS_EQUAL": _build_levenshtein_less_equal, "JSON_OBJECT_AGG": lambda args: exp.JSONObjectAgg(expressions=args), "JSONB_OBJECT_AGG": exp.JSONBObjectAgg.from_arg_list, "WIDTH_BUCKET": lambda args: ( exp.WidthBucket(this=seq_get(args, 0), threshold=seq_get(args, 1)) if len(args) == 2 else exp.WidthBucket.from_arg_list(args) ), } NO_PAREN_FUNCTION_PARSERS = { **parser.Parser.NO_PAREN_FUNCTION_PARSERS, "VARIADIC": lambda self: self.expression(exp.Variadic(this=self._parse_bitwise())), } NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.LOCALTIME: exp.Localtime, TokenType.LOCALTIMESTAMP: exp.Localtimestamp, TokenType.CURRENT_CATALOG: exp.CurrentCatalog, TokenType.SESSION_USER: exp.SessionUser, TokenType.CURRENT_SCHEMA: exp.CurrentSchema, } FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, "DATE_PART": lambda self: self._parse_date_part(), "JSON_AGG": lambda self: self.expression( exp.JSONArrayAgg(this=self._parse_lambda(), order=self._parse_order()) ), "JSONB_EXISTS": lambda self: self._parse_jsonb_exists(), } BITWISE = { **parser.Parser.BITWISE, TokenType.HASH: exp.BitwiseXor, } EXPONENT = { TokenType.CARET: exp.Pow, } RANGE_PARSERS = { **parser.Parser.RANGE_PARSERS, TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), TokenType.DAT: lambda self, this: self.expression( exp.MatchAgainst(this=self._parse_bitwise(), expressions=[this]) ), } STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.END: lambda self: self._parse_commit_or_rollback(), } UNARY_PARSERS = { **parser.Parser.UNARY_PARSERS, # The `~` token is remapped from TILDE to RLIKE in Postgres due to the binary REGEXP LIKE operator TokenType.RLIKE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), } JSON_ARROWS_REQUIRE_JSON_TYPE = True COLUMN_OPERATORS = { **parser.Parser.COLUMN_OPERATORS, TokenType.ARROW: lambda self, this, path: self.validate_expression( build_json_extract_path( exp.JSONExtract, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE )([this, path]) ), TokenType.DARROW: lambda self, this, path: self.validate_expression( build_json_extract_path( exp.JSONExtractScalar, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE )([this, path]) ), } ARG_MODE_TOKENS: t.ClassVar = {TokenType.IN, TokenType.OUT, TokenType.INOUT, TokenType.VARIADIC} def _parse_parameter_mode(self) -> t.Optional[TokenType]: """ Parse PostgreSQL function parameter mode (IN, OUT, INOUT, VARIADIC). Disambiguates between mode keywords and identifiers with the same name: - MODE TYPE -> keyword is identifier (e.g., "out INT") - MODE NAME TYPE -> keyword is mode (e.g., "OUT x INT") Returns: Mode token type if current token is a mode keyword, None otherwise. """ if not self._match_set(self.ARG_MODE_TOKENS, advance=False) or not self._next: return None mode_token = self._curr # Check Pattern 1: MODE TYPE # Try parsing next token as a built-in type (not UDT) # If successful, the keyword is an identifier, not a mode is_followed_by_builtin_type = self._try_parse( lambda: ( self._advance() # type: ignore or self._parse_types(check_func=False, allow_identifiers=False) ), retreat=True, ) if is_followed_by_builtin_type: return None # Pattern: "out INT" -> out is parameter name # Check Pattern 2: MODE NAME TYPE # If next token is an identifier, check if there's a type after it # The type can be built-in or user-defined (allow_identifiers=True) if self._next.token_type not in self.ID_VAR_TOKENS: return None is_followed_by_any_type = self._try_parse( lambda: ( self._advance(2) # type: ignore or self._parse_types(check_func=False, allow_identifiers=True) ), retreat=True, ) if is_followed_by_any_type: return mode_token.token_type # Pattern: "OUT x INT" -> OUT is mode return None def _create_mode_constraint(self, param_mode: TokenType) -> exp.InOutColumnConstraint: """ Create parameter mode constraint for function parameters. Args: param_mode: The parameter mode token (IN, OUT, INOUT, or VARIADIC). Returns: InOutColumnConstraint expression representing the parameter mode. """ return self.expression( exp.InOutColumnConstraint( input_=(param_mode in {TokenType.IN, TokenType.INOUT}), output=(param_mode in {TokenType.OUT, TokenType.INOUT}), variadic=(param_mode == TokenType.VARIADIC), ) ) def _parse_function_parameter(self) -> t.Optional[exp.Expr]: param_mode = self._parse_parameter_mode() if param_mode: self._advance() # Parse parameter name and type param_name = self._parse_id_var() column_def = self._parse_column_def(this=param_name, computed_column=False) # Attach mode as constraint if param_mode and column_def: constraint = self._create_mode_constraint(param_mode) if not column_def.args.get("constraints"): column_def.set("constraints", []) column_def.args["constraints"].insert(0, constraint) return column_def def _parse_query_parameter(self) -> t.Optional[exp.Expr]: this = ( self._parse_wrapped(self._parse_id_var) if self._match(TokenType.L_PAREN, advance=False) else None ) self._match_text_seq("S") return self.expression(exp.Placeholder(this=this)) def _parse_date_part(self) -> exp.Expr: part = self._parse_type() self._match(TokenType.COMMA) value = self._parse_bitwise() if part and isinstance(part, (exp.Column, exp.Literal)): part = exp.var(part.name) return self.expression(exp.Extract(this=part, expression=value)) def _parse_unique_key(self) -> t.Optional[exp.Expr]: return None def _parse_jsonb_exists(self) -> exp.JSONBExists: return self.expression( exp.JSONBExists( this=self._parse_bitwise(), path=self._match(TokenType.COMMA) and self.dialect.to_json_path(self._parse_bitwise()), ) ) def _parse_generated_as_identity( self, ) -> ( exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint | exp.GeneratedAsRowColumnConstraint ): this = super()._parse_generated_as_identity() if self._match_text_seq("STORED"): this = self.expression(exp.ComputedColumnConstraint(this=this.expression)) return this def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expr]: udt_type: exp.Identifier | exp.Dot = identifier while self._match(TokenType.DOT): part = self._parse_id_var() if part: udt_type = exp.Dot(this=udt_type, expression=part) return exp.DataType.build(udt_type, udt=True) ================================================ FILE: sqlglot/parsers/presto.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.dialects.dialect import ( binary_from_function, build_formatted_time, build_regexp_extract, build_replace_with_optional_replacement, date_trunc_to_time, ) from sqlglot.helper import seq_get from sqlglot.tokens import TokenType def _build_approx_percentile(args: t.List) -> exp.Expr: if len(args) == 4: return exp.ApproxQuantile( this=seq_get(args, 0), weight=seq_get(args, 1), quantile=seq_get(args, 2), accuracy=seq_get(args, 3), ) if len(args) == 3: return exp.ApproxQuantile( this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) ) return exp.ApproxQuantile.from_arg_list(args) def _build_from_unixtime(args: t.List) -> exp.Expr: if len(args) == 3: return exp.UnixToTime( this=seq_get(args, 0), hours=seq_get(args, 1), minutes=seq_get(args, 2), ) if len(args) == 2: return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) return exp.UnixToTime.from_arg_list(args) def _build_to_char(args: t.List) -> exp.TimeToStr: fmt = seq_get(args, 1) if isinstance(fmt, exp.Literal): # We uppercase this to match Teradata's format mapping keys fmt.set("this", fmt.this.upper()) # We use "teradata" on purpose here, because the time formats are different in Presto. # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char return build_formatted_time(exp.TimeToStr, "teradata")(args) class PrestoParser(parser.Parser): VALUES_FOLLOWED_BY_PAREN = False ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.LOCALTIME: exp.Localtime, TokenType.LOCALTIMESTAMP: exp.Localtimestamp, } TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.SEMI, } FUNCTIONS = { **parser.Parser.FUNCTIONS, "ARBITRARY": exp.AnyValue.from_arg_list, "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, "APPROX_PERCENTILE": _build_approx_percentile, "BITWISE_AND": binary_from_function(exp.BitwiseAnd), "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), "BITWISE_OR": binary_from_function(exp.BitwiseOr), "BITWISE_XOR": binary_from_function(exp.BitwiseXor), "CARDINALITY": exp.ArraySize.from_arg_list, "CONTAINS": exp.ArrayContains.from_arg_list, "DATE_ADD": lambda args: exp.DateAdd( this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) ), "DATE_DIFF": lambda args: exp.DateDiff( this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) ), "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), "DATE_TRUNC": date_trunc_to_time, "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, "DOW": exp.DayOfWeekIso.from_arg_list, "DOY": exp.DayOfYear.from_arg_list, "ELEMENT_AT": lambda args: exp.Bracket( this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True ), "FROM_HEX": exp.Unhex.from_arg_list, "FROM_UNIXTIME": _build_from_unixtime, "FROM_UTF8": lambda args: exp.Decode( this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") ), "JSON_FORMAT": lambda args: exp.JSONFormat( this=seq_get(args, 0), options=seq_get(args, 1), is_json=True ), "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, "NOW": exp.CurrentTimestamp.from_arg_list, "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), "REGEXP_REPLACE": lambda args: exp.RegexpReplace( this=seq_get(args, 0), expression=seq_get(args, 1), replacement=seq_get(args, 2) or exp.Literal.string(""), ), "REPLACE": build_replace_with_optional_replacement, "ROW": exp.Struct.from_arg_list, "SEQUENCE": exp.GenerateSeries.from_arg_list, "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, "STRPOS": lambda args: exp.StrPosition( this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) ), "SLICE": exp.ArraySlice.from_arg_list, "TO_CHAR": _build_to_char, "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, "TO_UTF8": lambda args: exp.Encode( this=seq_get(args, 0), charset=exp.Literal.string("utf-8") ), "MD5": exp.MD5Digest.from_arg_list, "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), "WEEK": exp.WeekOfYear.from_arg_list, } FUNCTION_PARSERS = {k: v for k, v in parser.Parser.FUNCTION_PARSERS.items() if k != "TRIM"} ================================================ FILE: sqlglot/parsers/prql.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.helper import seq_get from sqlglot.tokens import TokenType from collections.abc import Collection def _select_all(table: exp.Expr) -> t.Optional[exp.Select]: return exp.select("*").from_(table, copy=False) if table else None def _resolve_projection(s: exp.Expr, projections: t.Dict[str, exp.Expr]) -> exp.Expr: if isinstance(s, exp.Column) and s.name in projections: return projections[s.name].copy() return s class PRQLParser(parser.Parser): CONJUNCTION = { **parser.Parser.CONJUNCTION, TokenType.DAMP: exp.And, } DISJUNCTION = { **parser.Parser.DISJUNCTION, TokenType.DPIPE: exp.Or, } TRANSFORM_PARSERS = { "DERIVE": lambda self, query: self._parse_selection(query), "SELECT": lambda self, query: self._parse_selection(query, append=False), "TAKE": lambda self, query: self._parse_take(query), "FILTER": lambda self, query: query.where(self._parse_disjunction()), "APPEND": lambda self, query: query.union( _select_all(self._parse_table()), distinct=False, copy=False ), "REMOVE": lambda self, query: query.except_( _select_all(self._parse_table()), distinct=False, copy=False ), "INTERSECT": lambda self, query: query.intersect( _select_all(self._parse_table()), distinct=False, copy=False ), "SORT": lambda self, query: self._parse_order_by(query), "AGGREGATE": lambda self, query: self._parse_selection( query, parse_method=self._parse_aggregate, append=False ), } FUNCTIONS = { **parser.Parser.FUNCTIONS, "AVERAGE": exp.Avg.from_arg_list, "SUM": lambda args: exp.func("COALESCE", exp.Sum(this=seq_get(args, 0)), 0), } def _parse_equality(self) -> t.Optional[exp.Expr]: eq = self._parse_comparison() while self._match_set(self.EQUALITY): comments = self._prev_comments eq = self.expression( self.EQUALITY[self._prev.token_type](this=eq, expression=self._parse_comparison()), comments=comments, ) if not isinstance(eq, (exp.EQ, exp.NEQ)): return eq # https://prql-lang.org/book/reference/spec/null.html if isinstance(eq.expression, exp.Null): is_exp = exp.Is(this=eq.this, expression=eq.expression) return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp) if isinstance(eq.this, exp.Null): is_exp = exp.Is(this=eq.expression, expression=eq.this) return is_exp if isinstance(eq, exp.EQ) else exp.Not(this=is_exp) return eq def _parse_statement(self) -> t.Optional[exp.Expr]: expression = self._parse_expression() expression = expression if expression else self._parse_query() return expression def _parse_query(self) -> t.Optional[exp.Query]: from_ = self._parse_from() if not from_: return None query: exp.Query = exp.select("*").from_(from_, copy=False) while self._match_texts(self.TRANSFORM_PARSERS): query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query) return query def _parse_selection( self, query: exp.Query, parse_method: t.Optional[t.Callable] = None, append: bool = True, ) -> exp.Query: parse_method = parse_method if parse_method else self._parse_expression if self._match(TokenType.L_BRACE): selects = self._parse_csv(parse_method) if not self._match(TokenType.R_BRACE, expression=query): self.raise_error("Expecting }") else: expression = parse_method() selects = [expression] if expression else [] projections = { select.alias_or_name: select.this if isinstance(select, exp.Alias) else select for select in query.selects } resolved = [ select.transform(_resolve_projection, projections=projections, copy=False) for select in selects ] return query.select(*resolved, append=append, copy=False) def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]: num = self._parse_number() # TODO: TAKE for ranges a..b return query.limit(num) if num else None def _parse_ordered( self, parse_method: t.Optional[t.Callable] = None ) -> t.Optional[exp.Ordered]: asc = self._match(TokenType.PLUS) desc = self._match(TokenType.DASH) or (asc and False) term = term = super()._parse_ordered(parse_method=parse_method) if term and desc: term.set("desc", True) term.set("nulls_first", False) return term def _parse_order_by(self, query: exp.Select) -> t.Optional[exp.Query]: l_brace = self._match(TokenType.L_BRACE) expressions = self._parse_csv(self._parse_ordered) if l_brace and not self._match(TokenType.R_BRACE): self.raise_error("Expecting }") return query.order_by(self.expression(exp.Order(expressions=expressions)), copy=False) def _parse_aggregate(self) -> t.Optional[exp.Expr]: alias = None if self._next.token_type == TokenType.ALIAS: alias = self._parse_id_var(any_token=True) self._match(TokenType.ALIAS) name = self._curr.text.upper() func_builder = self.FUNCTIONS.get(name) if func_builder: self._advance() args = self._parse_column() func = func_builder([args]) else: self.raise_error(f"Unsupported aggregation function {name}") if alias: return self.expression(exp.Alias(this=func, alias=alias)) return func def _parse_expression(self) -> t.Optional[exp.Expr]: if self._next.token_type == TokenType.ALIAS: alias = self._parse_id_var(True) self._match(TokenType.ALIAS) return self.expression(exp.Alias(this=self._parse_assignment(), alias=alias)) return self._parse_assignment() def _parse_table( self, schema: bool = False, joins: bool = False, alias_tokens: t.Optional[Collection[TokenType]] = None, parse_bracket: bool = False, is_db_reference: bool = False, parse_partition: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.Expr]: return self._parse_table_parts() def _parse_from( self, joins: bool = False, skip_from_token: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.From]: if not skip_from_token and not self._match(TokenType.FROM): return None comments = self._prev_comments return self.expression( exp.From(this=self._parse_table(joins=joins)), comments=comments, ) ================================================ FILE: sqlglot/parsers/redshift.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.helper import seq_get from sqlglot.parsers.postgres import PostgresParser from sqlglot.parser import build_convert_timezone from sqlglot.tokens import TokenType from sqlglot.dialects.dialect import map_date_part if t.TYPE_CHECKING: from sqlglot._typing import E from collections.abc import Collection def _build_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: def _builder(args: t.List) -> E: expr = expr_type( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), ) if expr_type is exp.TsOrDsAdd: expr.set("return_type", exp.DataType.build("TIMESTAMP")) return expr return _builder class RedshiftParser(PostgresParser): FUNCTIONS = { **{k: v for k, v in PostgresParser.FUNCTIONS.items() if k != "GET_BIT"}, "ADD_MONTHS": lambda args: exp.TsOrDsAdd( this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.var("month"), return_type=exp.DataType.build("TIMESTAMP"), ), "CONVERT_TIMEZONE": lambda args: build_convert_timezone(args, "UTC"), "DATEADD": _build_date_delta(exp.TsOrDsAdd), "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), "GETDATE": exp.CurrentTimestamp.from_arg_list, "LISTAGG": exp.GroupConcat.from_arg_list, "REGEXP_SUBSTR": lambda args: exp.RegexpExtract( this=seq_get(args, 0), expression=seq_get(args, 1), position=seq_get(args, 2), occurrence=seq_get(args, 3), parameters=seq_get(args, 4), ), "SPLIT_TO_ARRAY": lambda args: exp.StringToArray( this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",") ), "STRTOL": exp.FromBase.from_arg_list, } NO_PAREN_FUNCTION_PARSERS = { **PostgresParser.NO_PAREN_FUNCTION_PARSERS, "APPROXIMATE": lambda self: self._parse_approximate_count(), "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp(sysdate=True)), } SUPPORTS_IMPLICIT_UNNEST = True def _parse_table( self, schema: bool = False, joins: bool = False, alias_tokens: t.Optional[Collection[TokenType]] = None, parse_bracket: bool = False, is_db_reference: bool = False, parse_partition: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.Expr]: # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` unpivot = self._match(TokenType.UNPIVOT) table = super()._parse_table( schema=schema, joins=joins, alias_tokens=alias_tokens, parse_bracket=parse_bracket, is_db_reference=is_db_reference, ) return self.expression(exp.Pivot(this=table, unpivot=True)) if unpivot else table def _parse_convert(self, strict: bool, safe: t.Optional[bool] = None) -> t.Optional[exp.Expr]: to = self._parse_types() self._match(TokenType.COMMA) this = self._parse_bitwise() return self.expression(exp.TryCast(this=this, to=to, safe=safe)) def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: index = self._index - 1 func = self._parse_function() if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): return self.expression(exp.ApproxDistinct(this=seq_get(func.this.expressions, 0))) self._retreat(index) return None def _parse_projections(self) -> t.Tuple[t.List[exp.Expr], t.Optional[t.List[exp.Expr]]]: projections, _ = super()._parse_projections() if self._prev.text.upper() == "EXCLUDE" and self._curr: self._retreat(self._index - 1) # EXCLUDE clause always comes at the end of the projection list and applies to it as a whole exclude = ( self._parse_wrapped_csv(self._parse_expression, optional=True) if self._match_text_seq("EXCLUDE") else [] ) if ( exclude and isinstance(expr := projections[-1], exp.Alias) and expr.alias.upper() == "EXCLUDE" ): projections[-1] = expr.this.pop() return projections, exclude ================================================ FILE: sqlglot/parsers/risingwave.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.parsers.postgres import PostgresParser from sqlglot.tokens import TokenType class RisingWaveParser(PostgresParser): WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = False PROPERTY_PARSERS = { **PostgresParser.PROPERTY_PARSERS, "ENCODE": lambda self: self._parse_encode_property(), "INCLUDE": lambda self: self._parse_include_property(), "KEY": lambda self: self._parse_encode_property(key=True), } CONSTRAINT_PARSERS = { **PostgresParser.CONSTRAINT_PARSERS, "WATERMARK": lambda self: self.expression( exp.WatermarkColumnConstraint( this=self._match(TokenType.FOR) and self._parse_column(), expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), ) ), } SCHEMA_UNNAMED_CONSTRAINTS = { *PostgresParser.SCHEMA_UNNAMED_CONSTRAINTS, "WATERMARK", } def _parse_table_hints(self) -> t.Optional[t.List[exp.Expr]]: # There is no hint in risingwave. # Do nothing here to avoid WITH keywords conflict in CREATE SINK statement. return None def _parse_include_property(self) -> t.Optional[exp.Expr]: header: t.Optional[exp.Expr] = None coldef: t.Optional[exp.Expr] = None this = self._parse_var_or_string() if not self._match(TokenType.ALIAS): header = self._parse_field() if header: coldef = self.expression(exp.ColumnDef(this=header, kind=self._parse_types())) self._match(TokenType.ALIAS) alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) return self.expression(exp.IncludeProperty(this=this, alias=alias, column_def=coldef)) def _parse_encode_property(self, key: t.Optional[bool] = None) -> exp.EncodeProperty: self._match_text_seq("ENCODE") this = self._parse_var_or_string() if self._match(TokenType.L_PAREN, advance=False): properties = self.expression( exp.Properties(expressions=self._parse_wrapped_properties()) ) else: properties = None return self.expression(exp.EncodeProperty(this=this, properties=properties, key=key)) ================================================ FILE: sqlglot/parsers/singlestore.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.trie import new_trie from sqlglot.dialects.dialect import ( build_formatted_time, build_json_extract_path, ) from sqlglot.dialects.mysql import MySQL from sqlglot.helper import seq_get from sqlglot.parsers.mysql import MySQLParser, _show_parser from sqlglot.tokens import TokenType def cast_to_time6( expression: t.Optional[exp.Expr], time_type: exp.DType = exp.DType.TIME ) -> exp.Cast: return exp.Cast( this=expression, to=exp.DataType.build( time_type, expressions=[exp.DataTypeParam(this=exp.Literal.number(6))], ), ) class SingleStoreParser(MySQLParser): FUNCTIONS = { **MySQLParser.FUNCTIONS, "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "singlestore"), "TO_TIMESTAMP": build_formatted_time(exp.StrToTime, "singlestore"), "TO_CHAR": build_formatted_time(exp.ToChar, "singlestore"), "STR_TO_DATE": build_formatted_time(exp.StrToDate, "mysql"), "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "mysql"), # The first argument of following functions is converted to TIME(6) # This is needed because exp.TimeToStr is converted to DATE_FORMAT # which interprets the first argument as DATETIME and fails to parse # string literals like '12:05:47' without a date part. "TIME_FORMAT": lambda args: exp.TimeToStr( this=cast_to_time6(seq_get(args, 0)), format=MySQL.format_time(seq_get(args, 1)), ), "HOUR": lambda args: exp.cast( exp.TimeToStr( this=cast_to_time6(seq_get(args, 0)), format=MySQL.format_time(exp.Literal.string("%k")), ), exp.DType.INT, ), "MICROSECOND": lambda args: exp.cast( exp.TimeToStr( this=cast_to_time6(seq_get(args, 0)), format=MySQL.format_time(exp.Literal.string("%f")), ), exp.DType.INT, ), "SECOND": lambda args: exp.cast( exp.TimeToStr( this=cast_to_time6(seq_get(args, 0)), format=MySQL.format_time(exp.Literal.string("%s")), ), exp.DType.INT, ), "MINUTE": lambda args: exp.cast( exp.TimeToStr( this=cast_to_time6(seq_get(args, 0)), format=MySQL.format_time(exp.Literal.string("%i")), ), exp.DType.INT, ), "MONTHNAME": lambda args: exp.TimeToStr( this=seq_get(args, 0), format=MySQL.format_time(exp.Literal.string("%M")), ), "WEEKDAY": lambda args: exp.paren(exp.DayOfWeek(this=seq_get(args, 0)) + 5, copy=False) % 7, "UNIX_TIMESTAMP": exp.StrToUnix.from_arg_list, "FROM_UNIXTIME": build_formatted_time(exp.UnixToTime, "mysql"), "TIME_BUCKET": lambda args: exp.DateBin( this=seq_get(args, 0), expression=seq_get(args, 1), origin=seq_get(args, 2), ), "BSON_EXTRACT_BSON": build_json_extract_path(exp.JSONBExtract), "BSON_EXTRACT_STRING": build_json_extract_path(exp.JSONBExtractScalar, json_type="STRING"), "BSON_EXTRACT_DOUBLE": build_json_extract_path(exp.JSONBExtractScalar, json_type="DOUBLE"), "BSON_EXTRACT_BIGINT": build_json_extract_path(exp.JSONBExtractScalar, json_type="BIGINT"), "JSON_EXTRACT_JSON": build_json_extract_path(exp.JSONExtract), "JSON_EXTRACT_STRING": build_json_extract_path(exp.JSONExtractScalar, json_type="STRING"), "JSON_EXTRACT_DOUBLE": build_json_extract_path(exp.JSONExtractScalar, json_type="DOUBLE"), "JSON_EXTRACT_BIGINT": build_json_extract_path(exp.JSONExtractScalar, json_type="BIGINT"), "JSON_ARRAY_CONTAINS_STRING": lambda args: exp.JSONArrayContains( this=seq_get(args, 1), expression=seq_get(args, 0), json_type="STRING", ), "JSON_ARRAY_CONTAINS_DOUBLE": lambda args: exp.JSONArrayContains( this=seq_get(args, 1), expression=seq_get(args, 0), json_type="DOUBLE", ), "JSON_ARRAY_CONTAINS_JSON": lambda args: exp.JSONArrayContains( this=seq_get(args, 1), expression=seq_get(args, 0), json_type="JSON", ), "JSON_KEYS": lambda args: exp.JSONKeys( this=seq_get(args, 0), expressions=args[1:], ), "JSON_PRETTY": exp.JSONFormat.from_arg_list, "JSON_BUILD_ARRAY": lambda args: exp.JSONArray(expressions=args), "JSON_BUILD_OBJECT": lambda args: exp.JSONObject(expressions=args), "DATE": exp.Date.from_arg_list, "DAYNAME": lambda args: exp.TimeToStr( this=seq_get(args, 0), format=MySQL.format_time(exp.Literal.string("%W")), ), "TIMESTAMPDIFF": lambda args: exp.TimestampDiff( this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0), ), "APPROX_COUNT_DISTINCT": exp.Hll.from_arg_list, "APPROX_PERCENTILE": lambda args, dialect: exp.ApproxQuantile( this=seq_get(args, 0), quantile=seq_get(args, 1), error_tolerance=seq_get(args, 2), ), "VARIANCE": exp.VariancePop.from_arg_list, "INSTR": exp.Contains.from_arg_list, "REGEXP_MATCH": lambda args: exp.RegexpExtractAll( this=seq_get(args, 0), expression=seq_get(args, 1), parameters=seq_get(args, 2), ), "REGEXP_SUBSTR": lambda args: exp.RegexpExtract( this=seq_get(args, 0), expression=seq_get(args, 1), position=seq_get(args, 2), occurrence=seq_get(args, 3), parameters=seq_get(args, 4), ), "REDUCE": lambda args: exp.Reduce( initial=seq_get(args, 0), this=seq_get(args, 1), merge=seq_get(args, 2), ), } FUNCTION_PARSERS = { **MySQLParser.FUNCTION_PARSERS, "JSON_AGG": lambda self: exp.JSONArrayAgg( this=self._parse_term(), order=self._parse_order(), ), } NO_PAREN_FUNCTIONS = { **MySQLParser.NO_PAREN_FUNCTIONS, TokenType.UTC_DATE: exp.UtcDate, TokenType.UTC_TIME: exp.UtcTime, TokenType.UTC_TIMESTAMP: exp.UtcTimestamp, } CAST_COLUMN_OPERATORS = {TokenType.COLON_GT, TokenType.NCOLON_GT} COLUMN_OPERATORS = { **MySQLParser.COLUMN_OPERATORS, TokenType.COLON_GT: lambda self, this, to: self.expression(exp.Cast(this=this, to=to)), TokenType.NCOLON_GT: lambda self, this, to: self.expression(exp.TryCast(this=this, to=to)), TokenType.DCOLON: lambda self, this, path: build_json_extract_path(exp.JSONExtract)( [this, exp.Literal.string(path.name)] ), TokenType.DCOLONDOLLAR: lambda self, this, path: build_json_extract_path( exp.JSONExtractScalar, json_type="STRING" )([this, exp.Literal.string(path.name)]), TokenType.DCOLONPERCENT: lambda self, this, path: build_json_extract_path( exp.JSONExtractScalar, json_type="DOUBLE" )([this, exp.Literal.string(path.name)]), TokenType.DCOLONQMARK: lambda self, this, path: self.expression( exp.JSONExists(this=this, path=path.name, from_dcolonqmark=True) ), } COLUMN_OPERATORS = { k: v for k, v in COLUMN_OPERATORS.items() if k not in ( TokenType.ARROW, TokenType.DARROW, TokenType.HASH_ARROW, TokenType.DHASH_ARROW, TokenType.PLACEHOLDER, ) } SHOW_PARSERS = { **MySQLParser.SHOW_PARSERS, "AGGREGATES": _show_parser("AGGREGATES"), "CDC EXTRACTOR POOL": _show_parser("CDC EXTRACTOR POOL"), "CREATE AGGREGATE": _show_parser("CREATE AGGREGATE", target=True), "CREATE PIPELINE": _show_parser("CREATE PIPELINE", target=True), "CREATE PROJECTION": _show_parser("CREATE PROJECTION", target=True), "DATABASE STATUS": _show_parser("DATABASE STATUS"), "DISTRIBUTED_PLANCACHE STATUS": _show_parser("DISTRIBUTED_PLANCACHE STATUS"), "FULLTEXT SERVICE METRICS LOCAL": _show_parser("FULLTEXT SERVICE METRICS LOCAL"), "FULLTEXT SERVICE METRICS FOR NODE": _show_parser( "FULLTEXT SERVICE METRICS FOR NODE", target=True ), "FULLTEXT SERVICE STATUS": _show_parser("FULLTEXT SERVICE STATUS"), "FUNCTIONS": _show_parser("FUNCTIONS"), "GROUPS": _show_parser("GROUPS"), "GROUPS FOR ROLE": _show_parser("GROUPS FOR ROLE", target=True), "GROUPS FOR USER": _show_parser("GROUPS FOR USER", target=True), "INDEXES": _show_parser("INDEX", target="FROM"), "KEYS": _show_parser("INDEX", target="FROM"), "LINKS": _show_parser("LINKS", target="ON"), "LOAD ERRORS": _show_parser("LOAD ERRORS"), "LOAD WARNINGS": _show_parser("LOAD WARNINGS"), "PARTITIONS": _show_parser("PARTITIONS", target="ON"), "PIPELINES": _show_parser("PIPELINES"), "PLAN": _show_parser("PLAN", target=True), "PLANCACHE": _show_parser("PLANCACHE"), "PROCEDURES": _show_parser("PROCEDURES"), "PROJECTIONS": _show_parser("PROJECTIONS", target="ON TABLE"), "REPLICATION STATUS": _show_parser("REPLICATION STATUS"), "REPRODUCTION": _show_parser("REPRODUCTION"), "RESOURCE POOLS": _show_parser("RESOURCE POOLS"), "ROLES": _show_parser("ROLES"), "ROLES FOR USER": _show_parser("ROLES FOR USER", target=True), "ROLES FOR GROUP": _show_parser("ROLES FOR GROUP", target=True), "STATUS EXTENDED": _show_parser("STATUS EXTENDED"), "USERS": _show_parser("USERS"), "USERS FOR ROLE": _show_parser("USERS FOR ROLE", target=True), "USERS FOR GROUP": _show_parser("USERS FOR GROUP", target=True), } SHOW_TRIE = new_trie(key.split(" ") for key in SHOW_PARSERS) ALTER_PARSERS = { **MySQLParser.ALTER_PARSERS, "CHANGE": lambda self: self.expression( exp.RenameColumn(this=self._parse_column(), to=self._parse_column()) ), } def _parse_vector_expressions(self, expressions: t.List[exp.Expr]) -> t.List[exp.Expr]: type_name = expressions[1].name.upper() if type_name in self.dialect.VECTOR_TYPE_ALIASES: type_name = self.dialect.VECTOR_TYPE_ALIASES[type_name] return [exp.DataType.build(type_name, dialect=self.dialect), expressions[0]] ================================================ FILE: sqlglot/parsers/snowflake.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.trie import new_trie from sqlglot.dialects.dialect import ( Dialect, build_default_decimal_type, build_formatted_time, build_like, build_replace_with_optional_replacement, build_timetostr_or_tochar, build_trunc, binary_from_function, date_trunc_to_time, map_date_part, ) from sqlglot.helper import is_date_unit, is_int, seq_get from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot._typing import B, E from collections.abc import Collection def _build_approx_top_k(args: t.List) -> exp.ApproxTopK: """ Normalizes APPROX_TOP_K arguments to match Snowflake semantics. Snowflake APPROX_TOP_K signature: APPROX_TOP_K(column [, k] [, counters]) - k defaults to 1 if omitted (per Snowflake documentation) - counters is optional precision parameter """ # Add default k=1 if only column is provided if len(args) == 1: args.append(exp.Literal.number(1)) return exp.ApproxTopK.from_arg_list(args) def _build_date_from_parts(args: t.List) -> exp.DateFromParts: return exp.DateFromParts( year=seq_get(args, 0), month=seq_get(args, 1), day=seq_get(args, 2), allow_overflow=True, ) # Timestamp types used in _build_datetime TIMESTAMP_TYPES = { exp.DType.TIMESTAMP: "TO_TIMESTAMP", exp.DType.TIMESTAMPLTZ: "TO_TIMESTAMP_LTZ", exp.DType.TIMESTAMPNTZ: "TO_TIMESTAMP_NTZ", exp.DType.TIMESTAMPTZ: "TO_TIMESTAMP_TZ", } def _build_datetime( name: str, kind: exp.DType, safe: bool = False ) -> t.Callable[[t.List], exp.Func]: def _builder(args: t.List) -> exp.Func: value = seq_get(args, 0) scale_or_fmt = seq_get(args, 1) int_value = value is not None and is_int(value.name) int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int if isinstance(value, (exp.Literal, exp.Neg)) or (value and scale_or_fmt): # Converts calls like `TO_TIME('01:02:03')` into casts if len(args) == 1 and value.is_string and not int_value: return ( exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) if safe else exp.cast(value, kind) ) # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special # cases so we can transpile them, since they're relatively common if kind in TIMESTAMP_TYPES: if not safe and (int_scale_or_fmt or (int_value and scale_or_fmt is None)): # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as # it's not easily transpilable. Also, numeric-looking strings with # format strings (e.g., TO_TIMESTAMP('20240115', 'YYYYMMDD')) should # use StrToTime, not UnixToTime. unix_expr = exp.UnixToTime(this=value, scale=scale_or_fmt) unix_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) return unix_expr if scale_or_fmt and not int_scale_or_fmt: # Format string provided (e.g., 'YYYY-MM-DD'), use StrToTime strtotime_expr = build_formatted_time(exp.StrToTime, "snowflake")(args) strtotime_expr.set("safe", safe) strtotime_expr.set("target_type", exp.DataType.build(kind, dialect="snowflake")) return strtotime_expr # Handle DATE/TIME with format strings - allow int_value if a format string is provided has_format_string = scale_or_fmt and not int_scale_or_fmt if kind in (exp.DType.DATE, exp.DType.TIME) and (not int_value or has_format_string): klass = exp.TsOrDsToDate if kind == exp.DType.DATE else exp.TsOrDsToTime formatted_exp = build_formatted_time(klass, "snowflake")(args) formatted_exp.set("safe", safe) return formatted_exp return exp.Anonymous(this=name, expressions=args) return _builder def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: def _builder(args: t.List) -> B | exp.Anonymous: if len(args) == 3: # Special handling for bitwise operations with padside argument if expr_type in (exp.BitwiseAnd, exp.BitwiseOr, exp.BitwiseXor): return expr_type( this=seq_get(args, 0), expression=seq_get(args, 1), padside=seq_get(args, 2) ) return exp.Anonymous(this=name, expressions=args) result = binary_from_function(expr_type)(args) # Snowflake specifies INT128 for bitwise shifts if expr_type in (exp.BitwiseLeftShift, exp.BitwiseRightShift): result.set("requires_int128", True) return result return _builder # https://docs.snowflake.com/en/sql-reference/functions/div0 def _build_if_from_div0(args: t.List) -> exp.If: lhs = exp._wrap(seq_get(args, 0), exp.Binary) rhs = exp._wrap(seq_get(args, 1), exp.Binary) cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( exp.Is(this=lhs, expression=exp.null()).not_() ) true = exp.Literal.number(0) false = exp.Div(this=lhs, expression=rhs) return exp.If(this=cond, true=true, false=false) # https://docs.snowflake.com/en/sql-reference/functions/div0null def _build_if_from_div0null(args: t.List) -> exp.If: lhs = exp._wrap(seq_get(args, 0), exp.Binary) rhs = exp._wrap(seq_get(args, 1), exp.Binary) # Returns 0 when divisor is 0 OR NULL cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).or_( exp.Is(this=rhs, expression=exp.null()) ) true = exp.Literal.number(0) false = exp.Div(this=lhs, expression=rhs) return exp.If(this=cond, true=true, false=false) # https://docs.snowflake.com/en/sql-reference/functions/zeroifnull def _build_if_from_zeroifnull(args: t.List) -> exp.If: cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) def _build_search(args: t.List) -> exp.Search: kwargs = { "this": seq_get(args, 0), "expression": seq_get(args, 1), **{arg.name.lower(): arg for arg in args[2:] if isinstance(arg, exp.Kwarg)}, } return exp.Search(**kwargs) # https://docs.snowflake.com/en/sql-reference/functions/zeroifnull def _build_if_from_nullifzero(args: t.List) -> exp.If: cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: regexp_replace = exp.RegexpReplace.from_arg_list(args) if not regexp_replace.args.get("replacement"): regexp_replace.set("replacement", exp.Literal.string("")) return regexp_replace def _build_regexp_like(args: t.List) -> exp.RegexpLike: return exp.RegexpLike( this=seq_get(args, 0), expression=seq_get(args, 1), flag=seq_get(args, 2), full_match=True, ) def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: trunc = date_trunc_to_time(args) unit = map_date_part(trunc.args["unit"]) trunc.set("unit", unit) is_time_input = trunc.this.is_type(exp.DType.TIME, exp.DType.TIMETZ) if (isinstance(trunc, exp.TimestampTrunc) and is_date_unit(unit) or is_time_input) or ( isinstance(trunc, exp.DateTrunc) and not is_date_unit(unit) ): trunc.set("input_type_preserved", True) return trunc def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: def _builder(args: t.List, dialect: Dialect) -> E: return expr_type( this=seq_get(args, 0), expression=seq_get(args, 1), position=seq_get(args, 2), occurrence=seq_get(args, 3), parameters=seq_get(args, 4), group=seq_get(args, 5) or exp.Literal.number(0), **( {"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL} if expr_type is exp.RegexpExtract else {} ), ) return _builder def _build_timestamp_from_parts(args: t.List) -> exp.Func: """Build TimestampFromParts with support for both syntaxes: 1. TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second [, nanosecond] [, time_zone]) 2. TIMESTAMP_FROM_PARTS(date_expr, time_expr) - Snowflake specific """ if len(args) == 2: return exp.TimestampFromParts(this=seq_get(args, 0), expression=seq_get(args, 1)) return exp.TimestampFromParts.from_arg_list(args) def _build_round(args: t.List) -> exp.Round: """ Build Round expression, unwrapping Snowflake's named parameters. Maps EXPR => this, SCALE => decimals, ROUNDING_MODE => truncate. Note: Snowflake does not support mixing named and positional arguments. Arguments are either all named or all positional. """ kwarg_map = {"EXPR": "this", "SCALE": "decimals", "ROUNDING_MODE": "truncate"} round_args = {} positional_keys = ["this", "decimals", "truncate"] positional_idx = 0 for arg in args: if isinstance(arg, exp.Kwarg): key = arg.this.name.upper() round_key = kwarg_map.get(key) if round_key: round_args[round_key] = arg.expression else: if positional_idx < len(positional_keys): round_args[positional_keys[positional_idx]] = arg positional_idx += 1 expression = exp.Round(**round_args) expression.set("casts_non_integer_decimals", True) return expression def _build_array_sort(args: t.List) -> exp.SortArray: asc = seq_get(args, 1) nulls_first = seq_get(args, 2) if nulls_first is None and isinstance(asc, exp.Boolean): nulls_first = exp.Boolean(this=not asc.this) return exp.SortArray(this=seq_get(args, 0), asc=asc, nulls_first=nulls_first) def _build_generator(args: t.List) -> exp.Generator: """ Build Generator expression, unwrapping Snowflake's named parameters. Maps ROWCOUNT => rowcount, TIMELIMIT => timelimit. """ kwarg_map = {"ROWCOUNT": "rowcount", "TIMELIMIT": "timelimit"} gen_args = {} positional_keys = ("rowcount", "timelimit") for i, arg in enumerate(args): if isinstance(arg, exp.Kwarg): key = arg.this.name.upper() gen_key = kwarg_map.get(key) if gen_key: gen_args[gen_key] = arg.expression elif i < len(positional_keys): gen_args[positional_keys[i]] = arg return exp.Generator(**gen_args) def _build_try_to_number(args: t.List[exp.Expr]) -> exp.Expr: return exp.ToNumber( this=seq_get(args, 0), format=seq_get(args, 1), precision=seq_get(args, 2), scale=seq_get(args, 3), safe=True, ) def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[SnowflakeParser], exp.Show]: def _parse(self: SnowflakeParser) -> exp.Show: return self._parse_show_snowflake(*args, **kwargs) return _parse class SnowflakeParser(parser.Parser): IDENTIFY_PIVOT_STRINGS = True TYPED_LAMBDA_ARGS = True DEFAULT_SAMPLING_METHOD = "BERNOULLI" COLON_IS_VARIANT_EXTRACT = True JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True TYPE_TOKENS = {*parser.Parser.TYPE_TOKENS, TokenType.FILE} STRUCT_TYPE_TOKENS = {*parser.Parser.STRUCT_TYPE_TOKENS, TokenType.FILE} NESTED_TYPE_TOKENS = {*parser.Parser.NESTED_TYPE_TOKENS, TokenType.FILE} ID_VAR_TOKENS = { *parser.Parser.ID_VAR_TOKENS, TokenType.EXCEPT, TokenType.INTEGRATION, TokenType.MATCH_CONDITION, TokenType.PACKAGE, TokenType.POLICY, TokenType.POOL, TokenType.ROLE, TokenType.RULE, TokenType.VOLUME, } ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS | { TokenType.INTEGRATION, TokenType.PACKAGE, TokenType.POLICY, TokenType.POOL, TokenType.ROLE, TokenType.RULE, TokenType.VOLUME, } TABLE_ALIAS_TOKENS = ( parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.INTEGRATION, TokenType.PACKAGE, TokenType.POLICY, TokenType.POOL, TokenType.ROLE, TokenType.RULE, TokenType.SEMI, TokenType.VOLUME, TokenType.WINDOW, } ) - {TokenType.MATCH_CONDITION} COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.LOCALTIME: exp.Localtime, TokenType.LOCALTIMESTAMP: exp.Localtimestamp, TokenType.CURRENT_TIME: exp.Localtime, } FUNCTIONS = { **parser.Parser.FUNCTIONS, "CHARINDEX": lambda args: exp.StrPosition( this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2), clamp_position=True, ), "ADD_MONTHS": lambda args: exp.AddMonths( this=seq_get(args, 0), expression=seq_get(args, 1), preserve_end_of_month=True, ), "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, "CURRENT_TIME": lambda args: exp.Localtime(this=seq_get(args, 0)), "APPROX_TOP_K": _build_approx_top_k, "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), "ARRAY_CONTAINS": lambda args: exp.ArrayContains( this=seq_get(args, 1), expression=seq_get(args, 0), ensure_variant=False, check_null=True, ), "ARRAY_DISTINCT": lambda args: exp.ArrayDistinct( this=seq_get(args, 0), check_null=True, ), "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( # Snowflake has exclusive end semantics start=seq_get(args, 0), end=seq_get(args, 1), step=seq_get(args, 2), is_end_exclusive=True, ), "ARRAY_EXCEPT": lambda args: exp.ArrayExcept( this=seq_get(args, 0), expression=seq_get(args, 1), is_multiset=True, ), "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect( expressions=args, is_multiset=True, ), "ARRAY_POSITION": lambda args: exp.ArrayPosition( this=seq_get(args, 1), expression=seq_get(args, 0), zero_based=True, ), "ARRAY_SLICE": lambda args: exp.ArraySlice( this=seq_get(args, 0), start=seq_get(args, 1), end=seq_get(args, 2), zero_based=True, ), "ARRAY_SORT": _build_array_sort, "ARRAY_FLATTEN": exp.Flatten.from_arg_list, "ARRAY_TO_STRING": lambda args: exp.ArrayToString( this=seq_get(args, 0), expression=seq_get(args, 1), null_is_empty=True, null_delim_is_null=True, ), "ARRAYS_OVERLAP": lambda args: exp.ArrayOverlaps( this=seq_get(args, 0), expression=seq_get(args, 1), null_safe=True ), "BITAND": _build_bitwise(exp.BitwiseAnd, "BITAND"), "BIT_AND": _build_bitwise(exp.BitwiseAnd, "BITAND"), "BITNOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, "BITORAGG": exp.BitwiseOrAgg.from_arg_list, "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, "BITMAP_OR_AGG": exp.BitmapOrAgg.from_arg_list, "BOOLAND": lambda args: exp.Booland( this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True ), "BOOLOR": lambda args: exp.Boolor( this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True ), "BOOLNOT": lambda args: exp.Boolnot(this=seq_get(args, 0), round_input=True), "BOOLXOR": lambda args: exp.Xor( this=seq_get(args, 0), expression=seq_get(args, 1), round_input=True ), "CORR": lambda args: exp.Corr( this=seq_get(args, 0), expression=seq_get(args, 1), null_on_zero_variance=True, ), "DATE": _build_datetime("DATE", exp.DType.DATE), "DATEFROMPARTS": _build_date_from_parts, "DATE_FROM_PARTS": _build_date_from_parts, "DATE_TRUNC": _date_trunc_to_time, "DATEADD": lambda args: exp.DateAdd( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), ), "DATEDIFF": lambda args: exp.DateDiff( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), date_part_boundary=True, ), "DAYNAME": lambda args: exp.Dayname(this=seq_get(args, 0), abbreviated=True), "DAYOFWEEKISO": exp.DayOfWeekIso.from_arg_list, "DIV0": _build_if_from_div0, "DIV0NULL": _build_if_from_div0null, "EDITDISTANCE": lambda args: exp.Levenshtein( this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) ), "FLATTEN": exp.Explode.from_arg_list, "GENERATOR": _build_generator, "GET": exp.GetExtract.from_arg_list, "GETDATE": exp.CurrentTimestamp.from_arg_list, "GET_PATH": lambda args, dialect: exp.JSONExtract( this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)), requires_json=True, ), "GREATEST_IGNORE_NULLS": lambda args: exp.Greatest( this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True ), "LEAST_IGNORE_NULLS": lambda args: exp.Least( this=seq_get(args, 0), expressions=args[1:], ignore_nulls=True ), "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, "IFF": exp.If.from_arg_list, "JAROWINKLER_SIMILARITY": lambda args: exp.JarowinklerSimilarity( this=seq_get(args, 0), expression=seq_get(args, 1), case_insensitive=True, ), "MD5_HEX": exp.MD5.from_arg_list, "MD5_BINARY": exp.MD5Digest.from_arg_list, "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, "MONTHNAME": lambda args: exp.Monthname(this=seq_get(args, 0), abbreviated=True), "LAST_DAY": lambda args: exp.LastDay( this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) ), "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), "LOCALTIMESTAMP": exp.CurrentTimestamp.from_arg_list, "NULLIFZERO": _build_if_from_nullifzero, "OBJECT_CONSTRUCT": lambda args: build_object_construct(args), "OBJECT_KEYS": exp.JSONKeys.from_arg_list, "OCTET_LENGTH": exp.ByteLength.from_arg_list, "PARSE_URL": lambda args: exp.ParseUrl(this=seq_get(args, 0), permissive=seq_get(args, 1)), "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), "REGEXP_LIKE": _build_regexp_like, "REGEXP_REPLACE": _build_regexp_replace, "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), "RANDOM": lambda args: exp.Rand( this=seq_get(args, 0), lower=exp.Literal.number(-9223372036854775808.0), # -2^63 as float to avoid overflow upper=exp.Literal.number(9223372036854775807.0), # 2^63-1 as float ), "REPLACE": build_replace_with_optional_replacement, "RLIKE": _build_regexp_like, "ROUND": _build_round, "SHA1_BINARY": exp.SHA1Digest.from_arg_list, "SHA1_HEX": exp.SHA.from_arg_list, "SHA2_BINARY": exp.SHA2Digest.from_arg_list, "SHA2_HEX": exp.SHA2.from_arg_list, "SPLIT": lambda args: exp.Split( this=seq_get(args, 0), expression=seq_get(args, 1), null_returns_null=True, empty_delimiter_returns_whole=True, ), "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), "STDDEV_SAMP": exp.Stddev.from_arg_list, "SYSDATE": lambda args: exp.CurrentTimestamp(this=seq_get(args, 0), sysdate=True), "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), "TIMEADD": lambda args: exp.TimeAdd( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), ), "TIMEDIFF": lambda args: exp.DateDiff( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), date_part_boundary=True, ), "TIME_FROM_PARTS": lambda args: exp.TimeFromParts( hour=seq_get(args, 0), min=seq_get(args, 1), sec=seq_get(args, 2), nano=seq_get(args, 3), overflow=True, ), "TIMESTAMPADD": lambda args: exp.DateAdd( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), ), "TIMESTAMPDIFF": lambda args: exp.DateDiff( this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)), date_part_boundary=True, ), "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, "TIMESTAMPNTZFROMPARTS": _build_timestamp_from_parts, "TIMESTAMP_NTZ_FROM_PARTS": _build_timestamp_from_parts, "TRUNC": lambda args, dialect: build_trunc(args, dialect, date_trunc_requires_part=False), "TRUNCATE": lambda args, dialect: build_trunc( args, dialect, date_trunc_requires_part=False ), "TRY_DECRYPT": lambda args: exp.Decrypt( this=seq_get(args, 0), passphrase=seq_get(args, 1), aad=seq_get(args, 2), encryption_method=seq_get(args, 3), safe=True, ), "TRY_DECRYPT_RAW": lambda args: exp.DecryptRaw( this=seq_get(args, 0), key=seq_get(args, 1), iv=seq_get(args, 2), aad=seq_get(args, 3), encryption_method=seq_get(args, 4), aead=seq_get(args, 5), safe=True, ), "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), "TRY_TO_BINARY": lambda args: exp.ToBinary( this=seq_get(args, 0), format=seq_get(args, 1), safe=True ), "TRY_TO_BOOLEAN": lambda args: exp.ToBoolean(this=seq_get(args, 0), safe=True), "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DType.DATE, safe=True), **dict.fromkeys( ("TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"), _build_try_to_number ), "TRY_TO_DOUBLE": lambda args: exp.ToDouble( this=seq_get(args, 0), format=seq_get(args, 1), safe=True ), "TRY_TO_FILE": lambda args: exp.ToFile( this=seq_get(args, 0), path=seq_get(args, 1), safe=True ), "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DType.TIME, safe=True), "TRY_TO_TIMESTAMP": _build_datetime("TRY_TO_TIMESTAMP", exp.DType.TIMESTAMP, safe=True), "TRY_TO_TIMESTAMP_LTZ": _build_datetime( "TRY_TO_TIMESTAMP_LTZ", exp.DType.TIMESTAMPLTZ, safe=True ), "TRY_TO_TIMESTAMP_NTZ": _build_datetime( "TRY_TO_TIMESTAMP_NTZ", exp.DType.TIMESTAMPNTZ, safe=True ), "TRY_TO_TIMESTAMP_TZ": _build_datetime( "TRY_TO_TIMESTAMP_TZ", exp.DType.TIMESTAMPTZ, safe=True ), "TO_CHAR": build_timetostr_or_tochar, "TO_DATE": _build_datetime("TO_DATE", exp.DType.DATE), **dict.fromkeys( ("TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"), lambda args: exp.ToNumber( this=seq_get(args, 0), format=seq_get(args, 1), precision=seq_get(args, 2), scale=seq_get(args, 3), ), ), "TO_TIME": _build_datetime("TO_TIME", exp.DType.TIME), "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DType.TIMESTAMP), "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DType.TIMESTAMPLTZ), "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DType.TIMESTAMPNTZ), "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DType.TIMESTAMPTZ), "TO_GEOGRAPHY": lambda args: ( exp.cast(args[0], exp.DType.GEOGRAPHY) if len(args) == 1 else exp.Anonymous(this="TO_GEOGRAPHY", expressions=args) ), "TO_GEOMETRY": lambda args: ( exp.cast(args[0], exp.DType.GEOMETRY) if len(args) == 1 else exp.Anonymous(this="TO_GEOMETRY", expressions=args) ), "TO_VARCHAR": build_timetostr_or_tochar, "TO_JSON": exp.JSONFormat.from_arg_list, "VECTOR_COSINE_SIMILARITY": exp.CosineDistance.from_arg_list, "VECTOR_INNER_PRODUCT": exp.DotProduct.from_arg_list, "VECTOR_L1_DISTANCE": exp.ManhattanDistance.from_arg_list, "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, "ZEROIFNULL": _build_if_from_zeroifnull, "LIKE": build_like(exp.Like), "ILIKE": build_like(exp.ILike), "SEARCH": _build_search, "SKEW": exp.Skewness.from_arg_list, "SPLIT_PART": lambda args: exp.SplitPart( this=seq_get(args, 0), delimiter=seq_get(args, 1), part_index=seq_get(args, 2), part_index_zero_as_one=True, empty_delimiter_returns_whole=True, ), "STRTOK": lambda args: exp.Strtok( this=seq_get(args, 0), delimiter=seq_get(args, 1) or exp.Literal.string(" "), part_index=seq_get(args, 2) or exp.Literal.number("1"), ), "SYSTIMESTAMP": exp.CurrentTimestamp.from_arg_list, "WEEKISO": exp.WeekOfYear.from_arg_list, "WEEKOFYEAR": exp.Week.from_arg_list, } FUNCTIONS = {k: v for k, v in FUNCTIONS.items() if k != "PREDICT"} FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, "DATE_PART": lambda self: self._parse_date_part(), "DIRECTORY": lambda self: self._parse_directory(), "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), "LISTAGG": lambda self: self._parse_string_agg(), "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), } FUNCTION_PARSERS = {k: v for k, v in FUNCTION_PARSERS.items() if k != "TRIM"} TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} ALTER_PARSERS = { **parser.Parser.ALTER_PARSERS, "MODIFY": lambda self: self._parse_alter_table_alter(), "SESSION": lambda self: self._parse_alter_session(), "UNSET": lambda self: self.expression( exp.Set( tag=self._match_text_seq("TAG"), expressions=self._parse_csv(self._parse_id_var), unset=True, ) ), } STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.GET: lambda self: self._parse_get(), TokenType.PUT: lambda self: self._parse_put(), TokenType.SHOW: lambda self: self._parse_show(), } PROPERTY_PARSERS = { **parser.Parser.PROPERTY_PARSERS, "CREDENTIALS": lambda self: self._parse_credentials_property(), "FILE_FORMAT": lambda self: self._parse_file_format_property(), "LOCATION": lambda self: self._parse_location_property(), "TAG": lambda self: self._parse_tag(), "USING": lambda self: ( self._match_text_seq("TEMPLATE") and self.expression(exp.UsingTemplateProperty(this=self._parse_statement())) ), } DESCRIBE_QUALIFIER_PARSERS: t.ClassVar[t.Dict[str, t.Callable]] = { "API": lambda self: self.expression(exp.ApiProperty()), "APPLICATION": lambda self: self.expression(exp.ApplicationProperty()), "CATALOG": lambda self: self.expression(exp.CatalogProperty()), "COMPUTE": lambda self: self.expression(exp.ComputeProperty()), "DATABASE": lambda self: ( self.expression(exp.DatabaseProperty()) if self._curr and self._curr.text.upper() == "ROLE" else None ), "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), "HYBRID": lambda self: self.expression(exp.HybridProperty()), "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), "MASKING": lambda self: self.expression(exp.MaskingProperty()), "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), "NETWORK": lambda self: self.expression(exp.NetworkProperty()), "ROW": lambda self: ( self.expression(exp.RowAccessProperty()) if self._match_text_seq("ACCESS") else None ), "SECURITY": lambda self: ( self.expression(exp.SecurityIntegrationProperty()) if self._curr and self._curr.text.upper() == "INTEGRATION" else None ), } TYPE_CONVERTERS = { # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number exp.DType.DECIMAL: build_default_decimal_type(precision=38, scale=0), } SHOW_PARSERS = { "DATABASES": _show_parser("DATABASES"), "TERSE DATABASES": _show_parser("DATABASES"), "SCHEMAS": _show_parser("SCHEMAS"), "TERSE SCHEMAS": _show_parser("SCHEMAS"), "OBJECTS": _show_parser("OBJECTS"), "TERSE OBJECTS": _show_parser("OBJECTS"), "TABLES": _show_parser("TABLES"), "TERSE TABLES": _show_parser("TABLES"), "VIEWS": _show_parser("VIEWS"), "TERSE VIEWS": _show_parser("VIEWS"), "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), "SEQUENCES": _show_parser("SEQUENCES"), "TERSE SEQUENCES": _show_parser("SEQUENCES"), "STAGES": _show_parser("STAGES"), "COLUMNS": _show_parser("COLUMNS"), "USERS": _show_parser("USERS"), "TERSE USERS": _show_parser("USERS"), "FILE FORMATS": _show_parser("FILE FORMATS"), "FUNCTIONS": _show_parser("FUNCTIONS"), "PROCEDURES": _show_parser("PROCEDURES"), "WAREHOUSES": _show_parser("WAREHOUSES"), } SHOW_TRIE = new_trie(key.split(" ") for key in SHOW_PARSERS) CONSTRAINT_PARSERS = { **parser.Parser.CONSTRAINT_PARSERS, "WITH": lambda self: self._parse_with_constraint(), "MASKING": lambda self: self._parse_with_constraint(), "PROJECTION": lambda self: self._parse_with_constraint(), "TAG": lambda self: self._parse_with_constraint(), } STAGED_FILE_SINGLE_TOKENS = { TokenType.DOT, TokenType.MOD, TokenType.SLASH, } FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} CREATABLES = { *parser.Parser.CREATABLES, TokenType.INTEGRATION, TokenType.PACKAGE, TokenType.POLICY, TokenType.POOL, TokenType.ROLE, TokenType.RULE, TokenType.VOLUME, } LAMBDAS = { **parser.Parser.LAMBDAS, TokenType.ARROW: lambda self, expressions: self.expression( exp.Lambda( this=self._replace_lambda( self._parse_assignment(), expressions, ), expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], ) ), } COLUMN_OPERATORS = { **parser.Parser.COLUMN_OPERATORS, TokenType.EXCLAMATION: lambda self, this, attr: self.expression( exp.ModelAttribute(this=this, expression=attr) ), } def _parse_directory(self) -> exp.DirectoryStage: table = self._parse_table_parts() this = table.this if isinstance(table, exp.Table) else table return self.expression(exp.DirectoryStage(this=this)) def _parse_describe(self) -> exp.Describe: index = self._index if self._match_texts(self.DESCRIBE_QUALIFIER_PARSERS): qualifier = self.DESCRIBE_QUALIFIER_PARSERS[self._prev.text.upper()](self) if qualifier: kind = self._match_set(self.CREATABLES) and self._prev.text.upper() if kind: this = self._parse_table(schema=True) properties = self.expression(exp.Properties(expressions=[qualifier])) post_props = self._parse_properties() expressions = post_props.expressions if post_props else None return self.expression( exp.Describe( this=this, kind=kind, properties=properties, expressions=expressions, ) ) self._retreat(index) return super()._parse_describe() def _parse_use(self) -> exp.Use: if self._match_text_seq("SECONDARY", "ROLES"): this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) return self.expression(exp.Use(kind="SECONDARY ROLES", this=this, expressions=roles)) return super()._parse_use() def _negate_range(self, this: t.Optional[exp.Expr] = None) -> t.Optional[exp.Expr]: if not this: return this query = this.args.get("query") if isinstance(this, exp.In) and isinstance(query, exp.Query): # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` # which can produce different results (most likely a SnowFlake bug). # # https://docs.snowflake.com/en/sql-reference/functions/in # Context: https://github.com/tobymao/sqlglot/issues/3890 return self.expression(exp.NEQ(this=this.this, expression=exp.All(this=query.unnest()))) return self.expression(exp.Not(this=this)) def _parse_tag(self) -> exp.Tags: return self.expression(exp.Tags(expressions=self._parse_wrapped_csv(self._parse_property))) def _parse_with_constraint(self) -> t.Optional[exp.Expr]: if self._prev.token_type != TokenType.WITH: self._retreat(self._index - 1) if self._match_text_seq("MASKING", "POLICY"): policy = self._parse_column() return self.expression( exp.MaskingPolicyColumnConstraint( this=policy.to_dot() if isinstance(policy, exp.Column) else policy, expressions=self._match(TokenType.USING) and self._parse_wrapped_csv(self._parse_id_var), ) ) if self._match_text_seq("PROJECTION", "POLICY"): policy = self._parse_column() return self.expression( exp.ProjectionPolicyColumnConstraint( this=policy.to_dot() if isinstance(policy, exp.Column) else policy ) ) if self._match(TokenType.TAG): return self._parse_tag() return None def _parse_with_property(self) -> t.Optional[exp.Expr] | t.List[exp.Expr]: if self._match(TokenType.TAG): return self._parse_tag() return super()._parse_with_property() def _parse_create(self) -> exp.Create | exp.Command: expression = super()._parse_create() if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: # Replace the Table node with the enclosed Identifier expression.this.replace(expression.this.this) return expression # https://docs.snowflake.com/en/sql-reference/functions/date_part.html # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts def _parse_date_part(self) -> t.Optional[exp.Expr]: this = self._parse_var() or self._parse_type() if not this: return None # Handle both syntaxes: DATE_PART(part, expr) and DATE_PART(part FROM expr) expression = self._match_set((TokenType.FROM, TokenType.COMMA)) and self._parse_bitwise() return self.expression( exp.Extract(this=map_date_part(this, self.dialect), expression=expression) ) def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expr]: if is_map: # Keys are strings in Snowflake's objects, see also: # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured # - https://docs.snowflake.com/en/sql-reference/functions/object_construct return self._parse_slice(self._parse_string()) or self._parse_assignment() return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) def _parse_lateral(self) -> t.Optional[exp.Lateral]: lateral = super()._parse_lateral() if not lateral: return lateral if isinstance(lateral.this, exp.Explode): table_alias = lateral.args.get("alias") columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] if table_alias and not table_alias.args.get("columns"): table_alias.set("columns", columns) elif not table_alias: exp.alias_(lateral, "_flattened", table=columns, copy=False) return lateral def _parse_table_parts( self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False, fast: bool = False, ) -> t.Optional[exp.Table | exp.Dot]: # https://docs.snowflake.com/en/user-guide/querying-stage if self._match(TokenType.STRING, advance=False): table = self._parse_string() elif self._match_text_seq("@", advance=False): table = self._parse_location_path() else: table = None if table: file_format = None pattern = None wrapped = self._match(TokenType.L_PAREN) while self._curr and wrapped and not self._match(TokenType.R_PAREN): if self._match_text_seq("FILE_FORMAT", "=>"): file_format = self._parse_string() or super()._parse_table_parts( is_db_reference=is_db_reference ) elif self._match_text_seq("PATTERN", "=>"): pattern = self._parse_string() else: break self._match(TokenType.COMMA) table = self.expression(exp.Table(this=table, format=file_format, pattern=pattern)) else: table = super()._parse_table_parts( schema=schema, is_db_reference=is_db_reference, fast=fast, ) return table def _parse_table( self, schema: bool = False, joins: bool = False, alias_tokens: t.Optional[Collection[TokenType]] = None, parse_bracket: bool = False, is_db_reference: bool = False, parse_partition: bool = False, consume_pipe: bool = False, ) -> t.Optional[exp.Expr]: table = super()._parse_table( schema=schema, joins=joins, alias_tokens=alias_tokens, parse_bracket=parse_bracket, is_db_reference=is_db_reference, parse_partition=parse_partition, ) if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): table_from_rows = table.this for arg in exp.TableFromRows.arg_types: if arg != "this": table_from_rows.set(arg, table.args.get(arg)) table = table_from_rows return table def _parse_id_var( self, any_token: bool = True, tokens: t.Optional[Collection[TokenType]] = None, ) -> t.Optional[exp.Expr]: if self._match_text_seq("IDENTIFIER", "("): identifier = ( super()._parse_id_var(any_token=any_token, tokens=tokens) or self._parse_string() ) self._match_r_paren() return self.expression(exp.Anonymous(this="IDENTIFIER", expressions=[identifier])) return super()._parse_id_var(any_token=any_token, tokens=tokens) def _parse_show_snowflake(self, this: str) -> exp.Show: scope = None scope_kind = None # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS # which is syntactically valid but has no effect on the output terse = self._tokens[self._index - 2].text.upper() == "TERSE" history = self._match_text_seq("HISTORY") like = self._parse_string() if self._match(TokenType.LIKE) else None if self._match(TokenType.IN): if self._match_text_seq("ACCOUNT"): scope_kind = "ACCOUNT" elif self._match_text_seq("CLASS"): scope_kind = "CLASS" scope = self._parse_table_parts() elif self._match_text_seq("APPLICATION"): scope_kind = "APPLICATION" if self._match_text_seq("PACKAGE"): scope_kind += " PACKAGE" scope = self._parse_table_parts() elif self._match_set(self.DB_CREATABLES): scope_kind = self._prev.text.upper() if self._curr: scope = self._parse_table_parts() elif self._curr: scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" scope = self._parse_table_parts() return self.expression( exp.Show( terse=terse, this=this, history=history, like=like, scope=scope, scope_kind=scope_kind, starts_with=self._match_text_seq("STARTS", "WITH") and self._parse_string(), limit=self._parse_limit(), from_=self._parse_string() if self._match(TokenType.FROM) else None, privileges=self._match_text_seq("WITH", "PRIVILEGES") and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), ) ) def _parse_put(self) -> exp.Put | exp.Command: if self._curr.token_type != TokenType.STRING: return self._parse_as_command(self._prev) return self.expression( exp.Put( this=self._parse_string(), target=self._parse_location_path(), properties=self._parse_properties(), ) ) def _parse_get(self) -> t.Optional[exp.Expr]: start = self._prev # If we detect GET( then we need to parse a function, not a statement if self._match(TokenType.L_PAREN): self._retreat(self._index - 2) return self._parse_expression() target = self._parse_location_path() # Parse as command if unquoted file path if self._curr.token_type == TokenType.URI_START: return self._parse_as_command(start) return self.expression( exp.Get(this=self._parse_string(), target=target, properties=self._parse_properties()) ) def _parse_location_property(self) -> exp.LocationProperty: self._match(TokenType.EQ) return self.expression(exp.LocationProperty(this=self._parse_location_path())) def _parse_file_location(self) -> t.Optional[exp.Expr]: # Parse either a subquery or a staged file return ( self._parse_select(table=True, parse_subquery_alias=False) if self._match(TokenType.L_PAREN, advance=False) else self._parse_table_parts() ) def _parse_location_path(self) -> exp.Var: start = self._curr self._advance_any(ignore_reserved=True) # We avoid consuming a comma token because external tables like @foo and @bar # can be joined in a query with a comma separator, as well as closing paren # in case of subqueries while self._is_connected() and not self._match_set( (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False ): self._advance_any(ignore_reserved=True) return exp.var(self._find_sql(start, self._prev)) def _parse_lambda_arg(self) -> t.Optional[exp.Expr]: this = super()._parse_lambda_arg() if not this: return this typ = self._parse_types() if typ: return self.expression(exp.Cast(this=this, to=typ)) return this def _parse_foreign_key(self) -> exp.ForeignKey: # inlineFK, the REFERENCES columns are implied if self._match(TokenType.REFERENCES, advance=False): return self.expression(exp.ForeignKey()) # outoflineFK, explicitly names the columns return super()._parse_foreign_key() def _parse_file_format_property(self) -> exp.FileFormatProperty: self._match(TokenType.EQ) if self._match(TokenType.L_PAREN, advance=False): expressions = self._parse_wrapped_options() else: expressions = [self._parse_format_name()] return self.expression(exp.FileFormatProperty(expressions=expressions)) def _parse_credentials_property(self) -> exp.CredentialsProperty: return self.expression(exp.CredentialsProperty(expressions=self._parse_wrapped_options())) def _parse_semantic_view(self) -> exp.SemanticView: kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} while self._curr and not self._match(TokenType.R_PAREN, advance=False): if self._match_texts(("DIMENSIONS", "METRICS", "FACTS")): keyword = self._prev.text.lower() kwargs[keyword] = self._parse_csv( lambda: self._parse_alias(self._parse_disjunction(), explicit=True) ) elif self._match_text_seq("WHERE"): kwargs["where"] = self._parse_expression() else: self.raise_error("Expecting ) or encountered unexpected keyword") break return self.expression(exp.SemanticView(**kwargs)) def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: set = super()._parse_set(unset=unset, tag=tag) if isinstance(set, exp.Set): for expr in set.expressions: if isinstance(expr, exp.SetItem): expr.set("kind", "VARIABLE") return set def _parse_window( self, this: t.Optional[exp.Expr], alias: bool = False ) -> t.Optional[exp.Expr]: if isinstance(this, exp.NthValue): if self._match_text_seq("FROM"): if self._match_texts(("FIRST", "LAST")): from_first = self._prev.text.upper() == "FIRST" this.set("from_first", from_first) result = super()._parse_window(this, alias) # Set default window frame for ranking functions if not present if ( isinstance(result, exp.Window) and isinstance(this, RANKING_WINDOW_FUNCTIONS_WITH_FRAME) and not result.args.get("spec") ): frame = exp.WindowSpec( kind="ROWS", start="UNBOUNDED", start_side="PRECEDING", end="UNBOUNDED", end_side="FOLLOWING", ) result.set("spec", frame) return result # This is imported and used by both the parser (above) and the generator in the dialect file RANKING_WINDOW_FUNCTIONS_WITH_FRAME = ( exp.FirstValue, exp.LastValue, exp.NthValue, ) def build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: expression = parser.build_var_map(args) if isinstance(expression, exp.StarMap): return expression return exp.Struct( expressions=[ exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) ] ) ================================================ FILE: sqlglot/parsers/solr.py ================================================ from __future__ import annotations from sqlglot import exp, parser from sqlglot.tokens import TokenType class SolrParser(parser.Parser): TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.SEMI, } DISJUNCTION = { **parser.Parser.DISJUNCTION, TokenType.DPIPE: exp.Or, } ================================================ FILE: sqlglot/parsers/spark.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.trie import new_trie from sqlglot.dialects.dialect import build_date_delta, build_like from sqlglot.helper import ensure_list, seq_get from sqlglot.parsers.hive import build_with_ignore_nulls from sqlglot.parsers.spark2 import Spark2Parser, build_as_cast from sqlglot.tokens import TokenType def _build_datediff(args: t.List) -> exp.Expr: """ Although Spark docs don't mention the "unit" argument, Spark3 added support for it at some point. Databricks also supports this variant (see below). For example, in spark-sql (v3.3.1): - SELECT DATEDIFF('2020-01-01', '2020-01-05') results in -4 - SELECT DATEDIFF(day, '2020-01-01', '2020-01-05') results in 4 See also: - https://docs.databricks.com/sql/language-manual/functions/datediff3.html - https://docs.databricks.com/sql/language-manual/functions/datediff.html """ unit = None this = seq_get(args, 0) expression = seq_get(args, 1) if len(args) == 3: unit = exp.var(t.cast(exp.Expr, this).name) this = args[2] return exp.DateDiff( this=exp.TsOrDsToDate(this=this), expression=exp.TsOrDsToDate(this=expression), unit=unit ) def _build_dateadd(args: t.List) -> exp.Expr: expression = seq_get(args, 1) if len(args) == 2: # DATE_ADD(startDate, numDays INTEGER) # https://docs.databricks.com/en/sql/language-manual/functions/date_add.html return exp.TsOrDsAdd( this=seq_get(args, 0), expression=expression, unit=exp.Literal.string("DAY") ) # DATE_ADD / DATEADD / TIMESTAMPADD(unit, value integer, expr) # https://docs.databricks.com/en/sql/language-manual/functions/date_add3.html return exp.TimestampAdd(this=seq_get(args, 2), expression=expression, unit=seq_get(args, 0)) class SparkParser(Spark2Parser): NO_PAREN_FUNCTIONS = { **Spark2Parser.NO_PAREN_FUNCTIONS, TokenType.SESSION_USER: exp.SessionUser, } SET_PARSERS = { **Spark2Parser.SET_PARSERS, "VAR": lambda self: self._parse_set_item_assignment("VARIABLE"), "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), } SET_TRIE = new_trie(key.split(" ") for key in SET_PARSERS) FUNCTIONS = { **Spark2Parser.FUNCTIONS, "ANY_VALUE": build_with_ignore_nulls(exp.AnyValue), "ARRAY_INSERT": lambda args: exp.ArrayInsert( this=seq_get(args, 0), position=seq_get(args, 1), expression=seq_get(args, 2), offset=1, ), "BIT_AND": exp.BitwiseAndAgg.from_arg_list, "BIT_GET": exp.Getbit.from_arg_list, "BIT_OR": exp.BitwiseOrAgg.from_arg_list, "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, "BIT_COUNT": exp.BitwiseCount.from_arg_list, "CURDATE": exp.CurrentDate.from_arg_list, "DATE_ADD": _build_dateadd, "DATEADD": _build_dateadd, "MAKE_TIMESTAMP": exp.TimestampFromParts.from_arg_list, "TIMESTAMPADD": _build_dateadd, "TIMESTAMPDIFF": build_date_delta(exp.TimestampDiff), "TRY_ADD": exp.SafeAdd.from_arg_list, "TRY_MULTIPLY": exp.SafeMultiply.from_arg_list, "TRY_SUBTRACT": exp.SafeSubtract.from_arg_list, "DATEDIFF": _build_datediff, "DATE_DIFF": _build_datediff, "JSON_OBJECT_KEYS": exp.JSONKeys.from_arg_list, "LISTAGG": exp.GroupConcat.from_arg_list, "TIMESTAMP_LTZ": build_as_cast("TIMESTAMP_LTZ"), "TIMESTAMP_NTZ": build_as_cast("TIMESTAMP_NTZ"), "TRY_ELEMENT_AT": lambda args: exp.Bracket( this=seq_get(args, 0), expressions=ensure_list(seq_get(args, 1)), offset=1, safe=True, ), "LIKE": build_like(exp.Like), "ILIKE": build_like(exp.ILike), } PLACEHOLDER_PARSERS = { **Spark2Parser.PLACEHOLDER_PARSERS, TokenType.L_BRACE: lambda self: self._parse_query_parameter(), } def _parse_query_parameter(self) -> t.Optional[exp.Expr]: this = self._parse_id_var() self._match(TokenType.R_BRACE) return self.expression(exp.Placeholder(this=this, widget=True)) FUNCTION_PARSERS = { **Spark2Parser.FUNCTION_PARSERS, "SUBSTR": lambda self: self._parse_substring(), } STATEMENT_PARSERS = { **Spark2Parser.STATEMENT_PARSERS, TokenType.DECLARE: lambda self: self._parse_declare(), } def _parse_generated_as_identity( self, ) -> ( exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint | exp.GeneratedAsRowColumnConstraint ): this = super()._parse_generated_as_identity() if this.expression: return self.expression(exp.ComputedColumnConstraint(this=this.expression)) return this def _parse_pivot_aggregation(self) -> t.Optional[exp.Expr]: # Spark 3+ and Databricks support non aggregate functions in PIVOT too, e.g # PIVOT (..., 'foo' AS bar FOR col_to_pivot IN (...)) aggregate_expr = self._parse_function() or self._parse_disjunction() return self._parse_alias(aggregate_expr) ================================================ FILE: sqlglot/parsers/spark2.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.dialects.dialect import ( binary_from_function, build_formatted_time, pivot_column_names, ) from sqlglot.helper import ensure_list, seq_get from sqlglot.parsers.hive import HiveParser from sqlglot.parser import build_trim def build_as_cast(to_type: str) -> t.Callable[[t.List], exp.Expr]: return lambda args: exp.Cast(this=seq_get(args, 0), to=exp.DataType.build(to_type)) class Spark2Parser(HiveParser): TRIM_PATTERN_FIRST = True CHANGE_COLUMN_ALTER_SYNTAX = True FUNCTIONS = { **HiveParser.FUNCTIONS, "AGGREGATE": exp.Reduce.from_arg_list, "BOOLEAN": build_as_cast("boolean"), "DATE": build_as_cast("date"), "DATE_TRUNC": lambda args: exp.TimestampTrunc( this=seq_get(args, 1), unit=exp.var(seq_get(args, 0)) ), "DAYOFMONTH": lambda args: exp.DayOfMonth(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "DAYOFWEEK": lambda args: exp.DayOfWeek(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "DAYOFYEAR": lambda args: exp.DayOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), "DOUBLE": build_as_cast("double"), "ELEMENT_AT": lambda args: exp.Bracket( this=seq_get(args, 0), expressions=ensure_list(seq_get(args, 1)), offset=1, safe=False, ), "FLOAT": build_as_cast("float"), "FORMAT_STRING": exp.Format.from_arg_list, "FROM_UTC_TIMESTAMP": lambda args, dialect: exp.AtTimeZone( this=exp.cast( seq_get(args, 0) or exp.Var(this=""), exp.DType.TIMESTAMP, dialect=dialect, ), zone=seq_get(args, 1), ), "LTRIM": lambda args: build_trim(args, reverse_args=True), "INT": build_as_cast("int"), "MAP_FROM_ARRAYS": exp.Map.from_arg_list, "RLIKE": exp.RegexpLike.from_arg_list, "RTRIM": lambda args: build_trim(args, is_left=False, reverse_args=True), "SHIFTLEFT": binary_from_function(exp.BitwiseLeftShift), "SHIFTRIGHT": binary_from_function(exp.BitwiseRightShift), "STRING": build_as_cast("string"), "SLICE": exp.ArraySlice.from_arg_list, "TIMESTAMP": build_as_cast("timestamp"), "TO_TIMESTAMP": lambda args: ( build_as_cast("timestamp")(args) if len(args) == 1 else build_formatted_time(exp.StrToTime, "spark")(args) ), "TO_UNIX_TIMESTAMP": exp.StrToUnix.from_arg_list, "TO_UTC_TIMESTAMP": lambda args, dialect: exp.FromTimeZone( this=exp.cast( seq_get(args, 0) or exp.Var(this=""), exp.DType.TIMESTAMP, dialect=dialect, ), zone=seq_get(args, 1), ), "TRUNC": lambda args: exp.DateTrunc(unit=seq_get(args, 1), this=seq_get(args, 0)), "WEEKOFYEAR": lambda args: exp.WeekOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), } FUNCTION_PARSERS = { **HiveParser.FUNCTION_PARSERS, "APPROX_PERCENTILE": lambda self: self._parse_quantile_function(exp.ApproxQuantile), "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"), "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"), "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"), "MERGE": lambda self: self._parse_join_hint("MERGE"), "SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"), "MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"), "SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"), "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), } def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: return ( self.expression(exp.Drop(this=self._parse_schema(), kind="COLUMNS")) if self._match_text_seq("DROP", "COLUMNS") else None ) def _pivot_column_names(self, aggregations: t.List[exp.Expr]) -> t.List[str]: if len(aggregations) == 1: return [] return pivot_column_names(aggregations, dialect="spark") ================================================ FILE: sqlglot/parsers/sqlite.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.parser import binary_range_parser from sqlglot.tokens import TokenType def _build_strftime(args: t.List) -> exp.Anonymous | exp.TimeToStr: if len(args) == 1: args.append(exp.CurrentTimestamp()) if len(args) == 2: return exp.TimeToStr(this=exp.TsOrDsToTimestamp(this=args[1]), format=args[0]) return exp.Anonymous(this="STRFTIME", expressions=args) class SQLiteParser(parser.Parser): STRING_ALIASES = True ALTER_RENAME_REQUIRES_COLUMN = False JOINS_HAVE_EQUAL_PRECEDENCE = True ADD_JOIN_ON_TRUE = True TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.SEMI, } FUNCTIONS = { **parser.Parser.FUNCTIONS, "DATETIME": lambda args: exp.Anonymous(this="DATETIME", expressions=args), "EDITDIST3": exp.Levenshtein.from_arg_list, "JSON_GROUP_ARRAY": exp.JSONArrayAgg.from_arg_list, "JSON_GROUP_OBJECT": lambda args: exp.JSONObjectAgg(expressions=args), "STRFTIME": _build_strftime, "SQLITE_VERSION": exp.CurrentVersion.from_arg_list, "TIME": lambda args: exp.Anonymous(this="TIME", expressions=args), } STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.ATTACH: lambda self: self._parse_attach_detach(), TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), } RANGE_PARSERS = { **parser.Parser.RANGE_PARSERS, # https://www.sqlite.org/lang_expr.html TokenType.MATCH: binary_range_parser(exp.Match), } def _parse_unique(self) -> exp.UniqueColumnConstraint: # Do not consume more tokens if UNIQUE is used as a standalone constraint, e.g: # CREATE TABLE foo (bar TEXT UNIQUE REFERENCES baz ...) if self._curr.text.upper() in self.CONSTRAINT_PARSERS: return self.expression(exp.UniqueColumnConstraint()) return super()._parse_unique() def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: self._match(TokenType.DATABASE) this = self._parse_expression() return ( self.expression(exp.Attach(this=this)) if is_attach else self.expression(exp.Detach(this=this)) ) ================================================ FILE: sqlglot/parsers/starrocks.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.dialects.dialect import build_timestamp_trunc from sqlglot.helper import seq_get from sqlglot.parsers.mysql import MySQLParser class StarRocksParser(MySQLParser): FUNCTIONS = { **MySQLParser.FUNCTIONS, "DATE_TRUNC": build_timestamp_trunc, "DATEDIFF": lambda args: exp.DateDiff( this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") ), "DATE_DIFF": lambda args: exp.DateDiff( this=seq_get(args, 1), expression=seq_get(args, 2), unit=seq_get(args, 0) ), "ARRAY_FLATTEN": exp.Flatten.from_arg_list, "REGEXP": exp.RegexpLike.from_arg_list, } PROPERTY_PARSERS = { **MySQLParser.PROPERTY_PARSERS, "PROPERTIES": lambda self: self._parse_wrapped_properties(), "UNIQUE": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty), "ROLLUP": lambda self: self._parse_rollup_property(), "REFRESH": lambda self: self._parse_refresh_property(), } def _parse_rollup_property(self) -> exp.RollupProperty: # ROLLUP (rollup_name (col1, col2) [FROM from_index] [PROPERTIES (...)], ...) def parse_rollup_index() -> exp.RollupIndex: return self.expression( exp.RollupIndex( this=self._parse_id_var(), expressions=self._parse_wrapped_id_vars(), from_index=self._parse_id_var() if self._match_text_seq("FROM") else None, properties=self.expression( exp.Properties(expressions=self._parse_wrapped_properties()) ) if self._match_text_seq("PROPERTIES") else None, ) ) return self.expression( exp.RollupProperty(expressions=self._parse_wrapped_csv(parse_rollup_index)) ) def _parse_create(self) -> exp.Create | exp.Command: create = super()._parse_create() # Starrocks' primary key is defined outside of the schema, so we need to move it there # https://docs.starrocks.io/docs/table_design/table_types/primary_key_table/#usage if isinstance(create, exp.Create) and isinstance(create.this, exp.Schema): props = create.args.get("properties") if props: primary_key = props.find(exp.PrimaryKey) if primary_key: create.this.append("expressions", primary_key.pop()) return create def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: unnest = super()._parse_unnest(with_alias=with_alias) if unnest: alias = unnest.args.get("alias") if not alias: # Starrocks defaults to naming the table alias as "unnest" alias = exp.TableAlias( this=exp.to_identifier("unnest"), columns=[exp.to_identifier("unnest")] ) unnest.set("alias", alias) elif not alias.args.get("columns"): # Starrocks defaults to naming the UNNEST column as "unnest" # if it's not otherwise specified alias.set("columns", [exp.to_identifier("unnest")]) return unnest def _parse_partitioned_by(self) -> exp.PartitionedByProperty: return self.expression( exp.PartitionedByProperty( this=exp.Schema( expressions=self._parse_wrapped_csv(self._parse_assignment, optional=True) ) ) ) def _parse_partition_property( self, ) -> t.Optional[exp.Expr] | t.List[exp.Expr]: expr = super()._parse_partition_property() if not expr: return self._parse_partitioned_by() if isinstance(expr, exp.Property): return expr self._match_l_paren() if self._match_text_seq("START", advance=False): create_expressions = self._parse_csv(self._parse_partitioning_granularity_dynamic) else: create_expressions = None self._match_r_paren() return self.expression( exp.PartitionByRangeProperty( partition_expressions=expr, create_expressions=create_expressions ) ) def _parse_partitioning_granularity_dynamic(self) -> exp.PartitionByRangePropertyDynamic: self._match_text_seq("START") start = self._parse_wrapped(self._parse_string) self._match_text_seq("END") end = self._parse_wrapped(self._parse_string) self._match_text_seq("EVERY") every = self._parse_wrapped(lambda: self._parse_interval() or self._parse_number()) return self.expression( exp.PartitionByRangePropertyDynamic(start=start, end=end, every=every) ) def _parse_refresh_property(self) -> exp.RefreshTriggerProperty: """ REFRESH [DEFERRED | IMMEDIATE] [ASYNC | ASYNC [START ()] EVERY (INTERVAL ) | MANUAL] """ method = self._match_texts(("DEFERRED", "IMMEDIATE")) and self._prev.text.upper() kind = self._match_texts(("ASYNC", "MANUAL")) and self._prev.text.upper() start = self._match_text_seq("START") and self._parse_wrapped(self._parse_string) if self._match_text_seq("EVERY"): self._match_l_paren() self._match_text_seq("INTERVAL") every = self._parse_number() unit = self._parse_var(any_token=True) self._match_r_paren() else: every = None unit = None return self.expression( exp.RefreshTriggerProperty( method=method, kind=kind, starts=start, every=every, unit=unit ) ) ================================================ FILE: sqlglot/parsers/tableau.py ================================================ from __future__ import annotations from sqlglot import exp, parser from sqlglot.helper import seq_get class TableauParser(parser.Parser): FUNCTIONS = { **parser.Parser.FUNCTIONS, "COUNTD": lambda args: exp.Count(this=exp.Distinct(expressions=args)), "FIND": exp.StrPosition.from_arg_list, "FINDNTH": lambda args: exp.StrPosition( this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) ), } NO_PAREN_IF_COMMANDS = False ================================================ FILE: sqlglot/parsers/teradata.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.helper import seq_get from sqlglot.tokens import TokenType from sqlglot.trie import new_trie class TeradataParser(parser.Parser): TABLESAMPLE_CSV = True VALUES_FOLLOWED_BY_PAREN = False TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | { TokenType.ANTI, TokenType.SEMI, } CHARSET_TRANSLATORS = { "GRAPHIC_TO_KANJISJIS", "GRAPHIC_TO_LATIN", "GRAPHIC_TO_UNICODE", "GRAPHIC_TO_UNICODE_PadSpace", "KANJI1_KanjiEBCDIC_TO_UNICODE", "KANJI1_KanjiEUC_TO_UNICODE", "KANJI1_KANJISJIS_TO_UNICODE", "KANJI1_SBC_TO_UNICODE", "KANJISJIS_TO_GRAPHIC", "KANJISJIS_TO_LATIN", "KANJISJIS_TO_UNICODE", "LATIN_TO_GRAPHIC", "LATIN_TO_KANJISJIS", "LATIN_TO_UNICODE", "LOCALE_TO_UNICODE", "UNICODE_TO_GRAPHIC", "UNICODE_TO_GRAPHIC_PadGraphic", "UNICODE_TO_GRAPHIC_VarGraphic", "UNICODE_TO_KANJI1_KanjiEBCDIC", "UNICODE_TO_KANJI1_KanjiEUC", "UNICODE_TO_KANJI1_KANJISJIS", "UNICODE_TO_KANJI1_SBC", "UNICODE_TO_KANJISJIS", "UNICODE_TO_LATIN", "UNICODE_TO_LOCALE", "UNICODE_TO_UNICODE_FoldSpace", "UNICODE_TO_UNICODE_Fullwidth", "UNICODE_TO_UNICODE_Halfwidth", "UNICODE_TO_UNICODE_NFC", "UNICODE_TO_UNICODE_NFD", "UNICODE_TO_UNICODE_NFKC", "UNICODE_TO_UNICODE_NFKD", } FUNC_TOKENS = parser.Parser.FUNC_TOKENS - {TokenType.REPLACE} STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.DATABASE: lambda self: self.expression( exp.Use(this=self._parse_table(schema=False)) ), TokenType.REPLACE: lambda self: self._parse_create(), TokenType.LOCK: lambda self: self._parse_locking_statement(), } def _parse_locking_statement(self) -> exp.LockingStatement: # Reuse exp.LockingProperty parsing for the lock kind, type etc locking_property = self._parse_locking() wrapped_query = self._parse_select() if not wrapped_query: self.raise_error("Expected SELECT statement after LOCKING clause") return self.expression( exp.LockingStatement(this=locking_property, expression=wrapped_query) ) SET_PARSERS = { **parser.Parser.SET_PARSERS, "QUERY_BAND": lambda self: self._parse_query_band(), } SET_TRIE = new_trie(key.split(" ") for key in SET_PARSERS) FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, # https://docs.teradata.com/r/SQL-Functions-Operators-Exprs-and-Predicates/June-2017/Data-Type-Conversions/TRYCAST "TRYCAST": parser.Parser.FUNCTION_PARSERS["TRY_CAST"], "RANGE_N": lambda self: self._parse_rangen(), "TRANSLATE": lambda self: self._parse_translate(), } FUNCTIONS = { **parser.Parser.FUNCTIONS, "CARDINALITY": exp.ArraySize.from_arg_list, "RANDOM": lambda args: exp.Rand(lower=seq_get(args, 0), upper=seq_get(args, 1)), } EXPONENT = { TokenType.DSTAR: exp.Pow, } def _parse_translate(self) -> exp.TranslateCharacters: this = self._parse_assignment() self._match(TokenType.USING) self._match_texts(self.CHARSET_TRANSLATORS) return self.expression( exp.TranslateCharacters( this=this, expression=self._prev.text.upper(), with_error=self._match_text_seq("WITH", "ERROR"), ) ) # FROM before SET in Teradata UPDATE syntax # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/Teradata-VantageTM-SQL-Data-Manipulation-Language-17.20/Statement-Syntax/UPDATE/UPDATE-Syntax-Basic-Form-FROM-Clause def _parse_update(self) -> exp.Update: return self.expression( exp.Update( this=self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), from_=self._parse_from(joins=True), expressions=self._match(TokenType.SET) and self._parse_csv(self._parse_equality), where=self._parse_where(), ) ) def _parse_rangen(self): this = self._parse_id_var() self._match(TokenType.BETWEEN) expressions = self._parse_csv(self._parse_assignment) each = self._match_text_seq("EACH") and self._parse_assignment() return self.expression(exp.RangeN(this=this, expressions=expressions, each=each)) def _parse_query_band(self) -> exp.QueryBand: # Parse: SET QUERY_BAND = 'key=value;key2=value2;' FOR SESSION|TRANSACTION # Also supports: SET QUERY_BAND = 'key=value;' UPDATE FOR SESSION|TRANSACTION # Also supports: SET QUERY_BAND = NONE FOR SESSION|TRANSACTION self._match(TokenType.EQ) # Handle both string literals and NONE keyword if self._match_text_seq("NONE"): query_band_string: t.Optional[exp.Expr] = exp.Var(this="NONE") else: query_band_string = self._parse_string() update = self._match_text_seq("UPDATE") self._match_text_seq("FOR") # Handle scope - can be SESSION, TRANSACTION, VOLATILE, or SESSION VOLATILE if self._match_text_seq("SESSION", "VOLATILE"): scope = "SESSION VOLATILE" elif self._match_texts(("SESSION", "TRANSACTION")): scope = self._prev.text.upper() else: scope = None return self.expression(exp.QueryBand(this=query_band_string, scope=scope, update=update)) def _parse_index_params(self) -> exp.IndexParameters: this = super()._parse_index_params() if this.args.get("on"): this.set("on", None) self._retreat(self._index - 2) return this def _parse_function( self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False, optional_parens: bool = True, any_token: bool = False, ) -> t.Optional[exp.Expr]: # Teradata uses a `(FORMAT )` clause after column references to # override the output format. When we see this pattern we do not # parse it as a function call. The syntax is documented at # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Data-Types-and-Literals/Data-Type-Formats-and-Format-Phrases/FORMAT if ( self._next and self._next.token_type == TokenType.L_PAREN and self._index + 2 < len(self._tokens) and self._tokens[self._index + 2].token_type == TokenType.FORMAT ): return None return super()._parse_function( functions=functions, anonymous=anonymous, optional_parens=optional_parens, any_token=any_token, ) def _parse_column_ops(self, this: t.Optional[exp.Expr]) -> t.Optional[exp.Expr]: this = super()._parse_column_ops(this) if self._match_pair(TokenType.L_PAREN, TokenType.FORMAT): # `(FORMAT )` after a column specifies a Teradata format override. # See https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Data-Types-and-Literals/Data-Type-Formats-and-Format-Phrases/FORMAT fmt_string = self._parse_string() self._match_r_paren() this = self.expression(exp.FormatPhrase(this=this, format=fmt_string)) return this ================================================ FILE: sqlglot/parsers/trino.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp, parser from sqlglot.parsers.presto import PrestoParser from sqlglot.tokens import TokenType class TrinoParser(PrestoParser): NO_PAREN_FUNCTIONS = { **PrestoParser.NO_PAREN_FUNCTIONS, TokenType.CURRENT_CATALOG: exp.CurrentCatalog, } FUNCTIONS = { **PrestoParser.FUNCTIONS, "VERSION": exp.CurrentVersion.from_arg_list, } FUNCTION_PARSERS = { **PrestoParser.FUNCTION_PARSERS, "TRIM": lambda self: self._parse_trim(), "JSON_QUERY": lambda self: self._parse_json_query(), "JSON_VALUE": lambda self: self._parse_json_value(), "LISTAGG": lambda self: self._parse_string_agg(), } JSON_QUERY_OPTIONS: parser.OPTIONS_TYPE = { **dict.fromkeys( ("WITH", "WITHOUT"), ( ("WRAPPER"), ("ARRAY", "WRAPPER"), ("CONDITIONAL", "WRAPPER"), ("CONDITIONAL", "ARRAY", "WRAPPED"), ("UNCONDITIONAL", "WRAPPER"), ("UNCONDITIONAL", "ARRAY", "WRAPPER"), ), ), } def _parse_json_query_quote(self) -> t.Optional[exp.JSONExtractQuote]: if not (self._match_text_seq("KEEP", "QUOTES") or self._match_text_seq("OMIT", "QUOTES")): return None return self.expression( exp.JSONExtractQuote( option=self._tokens[self._index - 2].text.upper(), scalar=self._match_text_seq("ON", "SCALAR", "STRING"), ) ) def _parse_json_query(self) -> exp.JSONExtract: return self.expression( exp.JSONExtract( this=self._parse_bitwise(), expression=self._match(TokenType.COMMA) and self._parse_bitwise(), option=self._parse_var_from_options(self.JSON_QUERY_OPTIONS, raise_unmatched=False), json_query=True, quote=self._parse_json_query_quote(), on_condition=self._parse_on_condition(), ) ) ================================================ FILE: sqlglot/parsers/tsql.py ================================================ from __future__ import annotations import datetime import re import typing as t from sqlglot import exp, parser from sqlglot.dialects.dialect import ( Dialect, build_date_delta, map_date_part, ) from sqlglot.helper import seq_get from sqlglot.parser import build_coalesce from sqlglot.time import format_time from sqlglot.tokens import TokenType if t.TYPE_CHECKING: from sqlglot._typing import E from collections.abc import Collection FULL_FORMAT_TIME_MAPPING = { "weekday": "%A", "dw": "%A", "w": "%A", "month": "%B", "mm": "%B", "m": "%B", } DATE_DELTA_INTERVAL = { "year": "year", "yyyy": "year", "yy": "year", "quarter": "quarter", "qq": "quarter", "q": "quarter", "month": "month", "mm": "month", "m": "month", "week": "week", "ww": "week", "wk": "week", "day": "day", "dd": "day", "d": "day", } DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})") # N = Numeric, C=Currency TRANSPILE_SAFE_NUMBER_FMT = {"N", "C"} DEFAULT_START_DATE = datetime.date(1900, 1, 1) # Unsupported options: # - OPTIMIZE FOR ( @variable_name { UNKNOWN | = } [ , ...n ] ) # - TABLE HINT OPTIONS: parser.OPTIONS_TYPE = { **dict.fromkeys( ( "DISABLE_OPTIMIZED_PLAN_FORCING", "FAST", "IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX", "LABEL", "MAXDOP", "MAXRECURSION", "MAX_GRANT_PERCENT", "MIN_GRANT_PERCENT", "NO_PERFORMANCE_SPOOL", "QUERYTRACEON", "RECOMPILE", ), tuple(), ), "CONCAT": ("UNION",), "DISABLE": ("EXTERNALPUSHDOWN", "SCALEOUTEXECUTION"), "EXPAND": ("VIEWS",), "FORCE": ("EXTERNALPUSHDOWN", "ORDER", "SCALEOUTEXECUTION"), "HASH": ("GROUP", "JOIN", "UNION"), "KEEP": ("PLAN",), "KEEPFIXED": ("PLAN",), "LOOP": ("JOIN",), "MERGE": ("JOIN", "UNION"), "OPTIMIZE": (("FOR", "UNKNOWN"),), "ORDER": ("GROUP",), "PARAMETERIZATION": ("FORCED", "SIMPLE"), "ROBUST": ("PLAN",), "USE": ("PLAN",), } XML_OPTIONS: parser.OPTIONS_TYPE = { **dict.fromkeys( ( "AUTO", "EXPLICIT", "TYPE", ), tuple(), ), "ELEMENTS": ( "XSINIL", "ABSENT", ), "BINARY": ("BASE64",), } OPTIONS_THAT_REQUIRE_EQUAL = ("MAX_GRANT_PERCENT", "MIN_GRANT_PERCENT", "LABEL") def _build_formatted_time( exp_class: t.Type[E], full_format_mapping: t.Optional[bool] = None ) -> t.Callable[[t.List], E]: def _builder(args: t.List) -> E: fmt = seq_get(args, 0) if isinstance(fmt, exp.Expr): from sqlglot.dialects.tsql import TSQL fmt = exp.Literal.string( format_time( fmt.name.lower(), ( {**TSQL.TIME_MAPPING, **FULL_FORMAT_TIME_MAPPING} if full_format_mapping else TSQL.TIME_MAPPING ), ) ) this = seq_get(args, 1) if isinstance(this, exp.Expr): this = exp.cast(this, exp.DType.DATETIME2) return exp_class(this=this, format=fmt) return _builder def _build_format(args: t.List) -> exp.NumberToStr | exp.TimeToStr: this = seq_get(args, 0) fmt = seq_get(args, 1) culture = seq_get(args, 2) number_fmt = fmt and (fmt.name in TRANSPILE_SAFE_NUMBER_FMT or not DATE_FMT_RE.search(fmt.name)) if number_fmt: return exp.NumberToStr(this=this, format=fmt, culture=culture) if fmt: from sqlglot.dialects.tsql import TSQL fmt = exp.Literal.string( format_time(fmt.name, TSQL.FORMAT_TIME_MAPPING) if len(fmt.name) == 1 else format_time(fmt.name, TSQL.TIME_MAPPING) ) return exp.TimeToStr(this=this, format=fmt, culture=culture) def _build_eomonth(args: t.List) -> exp.LastDay: date = exp.TsOrDsToDate(this=seq_get(args, 0)) month_lag = seq_get(args, 1) if month_lag is None: this: exp.Expr = date else: unit = DATE_DELTA_INTERVAL.get("month") this = exp.DateAdd(this=date, expression=month_lag, unit=unit and exp.var(unit)) return exp.LastDay(this=this) def _build_hashbytes(args: t.List) -> exp.Expr: kind, data = args kind = kind.name.upper() if kind.is_string else "" if kind == "MD5": args.pop(0) return exp.MD5(this=data) if kind in ("SHA", "SHA1"): args.pop(0) return exp.SHA(this=data) if kind == "SHA2_256": return exp.SHA2(this=data, length=exp.Literal.number(256)) if kind == "SHA2_512": return exp.SHA2(this=data, length=exp.Literal.number(512)) return exp.func("HASHBYTES", *args) def _build_date_delta( exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None, big_int: bool = False ) -> t.Callable[[t.List], E]: def _builder(args: t.List) -> E: unit = seq_get(args, 0) if unit and unit_mapping: unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) start_date = seq_get(args, 1) if start_date and start_date.is_number: # Numeric types are valid DATETIME values if start_date.is_int: adds = DEFAULT_START_DATE + datetime.timedelta(days=start_date.to_py()) start_date = exp.Literal.string(adds.strftime("%F")) else: # We currently don't handle float values, i.e. they're not converted to equivalent DATETIMEs. # This is not a problem when generating T-SQL code, it is when transpiling to other dialects. return exp_class( this=seq_get(args, 2), expression=start_date, unit=unit, big_int=big_int ) return exp_class( this=exp.TimeStrToTime(this=seq_get(args, 2)), expression=exp.TimeStrToTime(this=start_date), unit=unit, big_int=big_int, ) return _builder # https://learn.microsoft.com/en-us/sql/t-sql/functions/datetimefromparts-transact-sql?view=sql-server-ver16#syntax def _build_datetimefromparts(args: t.List) -> exp.TimestampFromParts: return exp.TimestampFromParts( year=seq_get(args, 0), month=seq_get(args, 1), day=seq_get(args, 2), hour=seq_get(args, 3), min=seq_get(args, 4), sec=seq_get(args, 5), milli=seq_get(args, 6), ) # https://learn.microsoft.com/en-us/sql/t-sql/functions/timefromparts-transact-sql?view=sql-server-ver16#syntax def _build_timefromparts(args: t.List) -> exp.TimeFromParts: return exp.TimeFromParts( hour=seq_get(args, 0), min=seq_get(args, 1), sec=seq_get(args, 2), fractions=seq_get(args, 3), precision=seq_get(args, 4), ) def _build_with_arg_as_text( klass: t.Type[exp.Expr], ) -> t.Callable[[t.List[exp.Expr]], exp.Expr]: def _parse(args: t.List[exp.Expr]) -> exp.Expr: this = seq_get(args, 0) if this and not this.is_string: this = exp.cast(this, exp.DType.TEXT) expression = seq_get(args, 1) kwargs = {"this": this} if expression: kwargs["expression"] = expression return klass(**kwargs) return _parse # https://learn.microsoft.com/en-us/sql/t-sql/functions/parsename-transact-sql?view=sql-server-ver16 def _build_parsename(args: t.List) -> exp.SplitPart | exp.Anonymous: # PARSENAME(...) will be stored into exp.SplitPart if: # - All args are literals # - The part index (2nd arg) is <= 4 (max valid value, otherwise TSQL returns NULL) if len(args) == 2 and all(isinstance(arg, exp.Literal) for arg in args): this = args[0] part_index = args[1] split_count = len(this.name.split(".")) if split_count <= 4: return exp.SplitPart( this=this, delimiter=exp.Literal.string("."), part_index=exp.Literal.number(split_count + 1 - part_index.to_py()), ) return exp.Anonymous(this="PARSENAME", expressions=args) def _build_json_query(args: t.List, dialect: Dialect) -> exp.JSONExtract: if len(args) == 1: # The default value for path is '$'. As a result, if you don't provide a # value for path, JSON_QUERY returns the input expression. args.append(exp.Literal.string("$")) return parser.build_extract_json_with_path(exp.JSONExtract)(args, dialect) def _build_datetrunc(args: t.List) -> exp.TimestampTrunc: unit = seq_get(args, 0) this = seq_get(args, 1) if this and this.is_string: this = exp.cast(this, exp.DType.DATETIME2) return exp.TimestampTrunc(this=this, unit=unit) class TSQLParser(parser.Parser): SET_REQUIRES_ASSIGNMENT_DELIMITER = False LOG_DEFAULTS_TO_LN = True STRING_ALIASES = True NO_PAREN_IF_COMMANDS = False NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, TokenType.SESSION_USER: exp.SessionUser, } QUERY_MODIFIER_PARSERS = { **parser.Parser.QUERY_MODIFIER_PARSERS, TokenType.OPTION: lambda self: ("options", self._parse_options()), TokenType.FOR: lambda self: ("for_", self._parse_for()), } # T-SQL does not allow BEGIN to be used as an identifier ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.BEGIN} ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.BEGIN} TABLE_ALIAS_TOKENS = (parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.ANTI, TokenType.SEMI}) - { TokenType.BEGIN } COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - {TokenType.BEGIN} UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.BEGIN} FUNCTIONS = { **parser.Parser.FUNCTIONS, "ATN2": exp.Atan2.from_arg_list, "CHARINDEX": lambda args: exp.StrPosition( this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2), ), "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=False), "COUNT_BIG": lambda args: exp.Count( this=seq_get(args, 0), expressions=args[1:], big_int=True ), "DATEADD": build_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), "DATEDIFF": _build_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), "DATEDIFF_BIG": _build_date_delta( exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL, big_int=True ), "DATENAME": _build_formatted_time(exp.TimeToStr, full_format_mapping=True), "DATETIMEFROMPARTS": _build_datetimefromparts, "EOMONTH": _build_eomonth, "FORMAT": _build_format, "GETDATE": exp.CurrentTimestamp.from_arg_list, "HASHBYTES": _build_hashbytes, "ISNULL": lambda args: build_coalesce(args=args, is_null=True), "JSON_QUERY": _build_json_query, "JSON_VALUE": parser.build_extract_json_with_path(exp.JSONExtractScalar), "LEN": _build_with_arg_as_text(exp.Length), "LEFT": _build_with_arg_as_text(exp.Left), "NEWID": exp.Uuid.from_arg_list, "RIGHT": _build_with_arg_as_text(exp.Right), "PARSENAME": _build_parsename, "REPLICATE": exp.Repeat.from_arg_list, "SCHEMA_NAME": exp.CurrentSchema.from_arg_list, "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, "SUSER_NAME": exp.CurrentUser.from_arg_list, "SUSER_SNAME": exp.CurrentUser.from_arg_list, "SYSDATETIMEOFFSET": exp.CurrentTimestampLTZ.from_arg_list, "SYSTEM_USER": exp.CurrentUser.from_arg_list, "TIMEFROMPARTS": _build_timefromparts, "DATETRUNC": _build_datetrunc, } JOIN_HINTS = {"LOOP", "HASH", "MERGE", "REMOTE"} PROCEDURE_OPTIONS = dict.fromkeys( ("ENCRYPTION", "RECOMPILE", "SCHEMABINDING", "NATIVE_COMPILATION", "EXECUTE"), tuple() ) COLUMN_DEFINITION_MODES = {"OUT", "OUTPUT", "READONLY"} RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { TokenType.TABLE, *parser.Parser.TYPE_TOKENS, } STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.DECLARE: lambda self: self._parse_declare(), TokenType.EXECUTE: lambda self: self._parse_execute(), } RANGE_PARSERS = { **parser.Parser.RANGE_PARSERS, TokenType.DCOLON: lambda self, this: self.expression( exp.ScopeResolution( this=this, expression=self._parse_function() or self._parse_var(any_token=True) ) ), } NO_PAREN_FUNCTION_PARSERS = { **parser.Parser.NO_PAREN_FUNCTION_PARSERS, "NEXT": lambda self: self._parse_next_value_for(), } FUNCTION_PARSERS = { **parser.Parser.FUNCTION_PARSERS, "JSON_ARRAYAGG": lambda self: self.expression( exp.JSONArrayAgg( this=self._parse_bitwise(), order=self._parse_order(), null_handling=self._parse_on_handling("NULL", "NULL", "ABSENT"), ) ), "DATEPART": lambda self: self._parse_datepart(), } # The DCOLON (::) operator serves as a scope resolution (exp.ScopeResolution) operator in T-SQL COLUMN_OPERATORS = { **parser.Parser.COLUMN_OPERATORS, TokenType.DCOLON: lambda self, this, to: ( self.expression(exp.Cast(this=this, to=to)) if isinstance(to, exp.DataType) and to.this != exp.DType.USERDEFINED else self.expression(exp.ScopeResolution(this=this, expression=to)) ), } SET_OP_MODIFIERS = {"offset"} ODBC_DATETIME_LITERALS = { "d": exp.Date, "t": exp.Time, "ts": exp.Timestamp, } def _parse_execute(self) -> exp.Execute: execute = self.expression( exp.Execute( this=self._parse_table(schema=True), expressions=self._parse_csv(self._parse_expression), ) ) if execute.name.lower() == "sp_executesql": execute = self.expression(exp.ExecuteSql(**execute.args)) return execute def _parse_datepart(self) -> exp.Extract: this = self._parse_var(tokens=[TokenType.IDENTIFIER]) expression = self._match(TokenType.COMMA) and self._parse_bitwise() name = map_date_part(this, self.dialect) return self.expression(exp.Extract(this=name, expression=expression)) def _parse_alter_table_set(self) -> exp.AlterSet: return self._parse_wrapped(super()._parse_alter_table_set) def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expr]: if self._match(TokenType.MERGE): comments = self._prev_comments merge = self._parse_merge() merge.add_comments(comments, prepend=True) return merge return super()._parse_wrapped_select(table=table) def _parse_dcolon(self) -> t.Optional[exp.Expr]: # We want to use _parse_types() if the first token after :: is a known type, # otherwise we could parse something like x::varchar(max) into a function if self._match_set(self.TYPE_TOKENS, advance=False): return self._parse_types() return self._parse_function() or self._parse_types() def _parse_options(self) -> t.Optional[t.List[exp.Expr]]: if not self._match(TokenType.OPTION): return None def _parse_option() -> t.Optional[exp.Expr]: option = self._parse_var_from_options(OPTIONS) if not option: return None self._match(TokenType.EQ) return self.expression( exp.QueryOption(this=option, expression=self._parse_primary_or_var()) ) return self._parse_wrapped_csv(_parse_option) def _parse_xml_key_value_option(self) -> exp.XMLKeyValueOption: this = self._parse_primary_or_var() if self._match(TokenType.L_PAREN, advance=False): expression = self._parse_wrapped(self._parse_string) else: expression = None return exp.XMLKeyValueOption(this=this, expression=expression) def _parse_for(self) -> t.Optional[t.List[exp.Expr]]: if not self._match_pair(TokenType.FOR, TokenType.XML): return None def _parse_for_xml() -> t.Optional[exp.Expr]: return self.expression( exp.QueryOption( this=self._parse_var_from_options(XML_OPTIONS, raise_unmatched=False) or self._parse_xml_key_value_option() ) ) return self._parse_csv(_parse_for_xml) def _parse_projections( self, ) -> t.Tuple[t.List[exp.Expr], t.Optional[t.List[exp.Expr]]]: """ T-SQL supports the syntax alias = expression in the SELECT's projection list, so we transform all parsed Selects to convert their EQ projections into Aliases. See: https://learn.microsoft.com/en-us/sql/t-sql/queries/select-clause-transact-sql?view=sql-server-ver16#syntax """ projections, _ = super()._parse_projections() return [ ( exp.alias_(projection.expression, projection.this.this, copy=False) if isinstance(projection, exp.EQ) and isinstance(projection.this, exp.Column) else projection ) for projection in projections ], None def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: """Applies to SQL Server and Azure SQL Database COMMIT [ { TRAN | TRANSACTION } [ transaction_name | @tran_name_variable ] ] [ WITH ( DELAYED_DURABILITY = { OFF | ON } ) ] ROLLBACK { TRAN | TRANSACTION } [ transaction_name | @tran_name_variable | savepoint_name | @savepoint_variable ] """ rollback = self._prev.token_type == TokenType.ROLLBACK self._match_texts(("TRAN", "TRANSACTION")) this = self._parse_id_var() if rollback: return self.expression(exp.Rollback(this=this)) durability = None if self._match_pair(TokenType.WITH, TokenType.L_PAREN): self._match_text_seq("DELAYED_DURABILITY") self._match(TokenType.EQ) if self._match_text_seq("OFF"): durability = False else: self._match(TokenType.ON) durability = True self._match_r_paren() return self.expression(exp.Commit(this=this, durability=durability)) def _parse_transaction(self) -> exp.Transaction | exp.Command: """Applies to SQL Server and Azure SQL Database BEGIN { TRAN | TRANSACTION } [ { transaction_name | @tran_name_variable } [ WITH MARK [ 'description' ] ] ] """ if self._match_texts(("TRAN", "TRANSACTION")): transaction = self.expression(exp.Transaction(this=self._parse_id_var())) if self._match_text_seq("WITH", "MARK"): transaction.set("mark", self._parse_string()) return transaction return self._parse_as_command(self._prev) def _parse_returns(self) -> exp.ReturnsProperty: table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) returns = super()._parse_returns() returns.set("table", table) return returns def _parse_convert(self, strict: bool, safe: t.Optional[bool] = None) -> t.Optional[exp.Expr]: this = self._parse_types() self._match(TokenType.COMMA) args = [this, *self._parse_csv(self._parse_assignment)] convert = exp.Convert.from_arg_list(args) convert.set("safe", safe) return convert def _parse_column_def( self, this: t.Optional[exp.Expr], computed_column: bool = True ) -> t.Optional[exp.Expr]: this = super()._parse_column_def(this=this, computed_column=computed_column) if not this: return None if self._match(TokenType.EQ): this.set("default", self._parse_disjunction()) if self._match_texts(self.COLUMN_DEFINITION_MODES): this.set("output", self._prev.text) return this def _parse_user_defined_function( self, kind: t.Optional[TokenType] = None ) -> t.Optional[exp.Expr]: this = super()._parse_user_defined_function(kind=kind) if kind == TokenType.FUNCTION or isinstance(this, exp.UserDefinedFunction): return this if kind == TokenType.PROCEDURE and this: expressions = this.expressions if not ( expressions or self._match_set((TokenType.ALIAS, TokenType.WITH), advance=False) ): expressions = self._parse_csv(self._parse_function_parameter) return self.expression( exp.StoredProcedure( this=this if isinstance(this, exp.Table) else this.this, expressions=expressions, wrapped=this.args.get("wrapped"), ) ) return self.expression(exp.UserDefinedFunction(this=this)) def _parse_into(self) -> t.Optional[exp.Into]: into = super()._parse_into() table = isinstance(into, exp.Into) and into.find(exp.Table) if isinstance(table, exp.Table): table_identifier = table.this if table_identifier.args.get("temporary"): # Promote the temporary property from the Identifier to the Into expression t.cast(exp.Into, into).set("temporary", True) return into def _parse_id_var( self, any_token: bool = True, tokens: t.Optional[Collection[TokenType]] = None, ) -> t.Optional[exp.Expr]: is_temporary = self._match(TokenType.HASH) is_global = is_temporary and self._match(TokenType.HASH) this = super()._parse_id_var(any_token=any_token, tokens=tokens) if this: if is_global: this.set("global_", True) elif is_temporary: this.set("temporary", True) return this def _parse_create(self) -> exp.Create | exp.Command: create = super()._parse_create() if isinstance(create, exp.Create): table = create.this.this if isinstance(create.this, exp.Schema) else create.this if isinstance(table, exp.Table) and table.this and table.this.args.get("temporary"): if not create.args.get("properties"): create.set("properties", exp.Properties(expressions=[])) create.args["properties"].append("expressions", exp.TemporaryProperty()) return create def _parse_if(self) -> t.Optional[exp.Expr]: this = self._parse_condition() true = self._parse_block() false = self._match(TokenType.ELSE) and self._parse_block() return self.expression(exp.IfBlock(this=this, true=true, false=false)) def _parse_unique(self) -> exp.UniqueColumnConstraint: if self._match_texts(("CLUSTERED", "NONCLUSTERED")): this = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) else: this = self._parse_schema(self._parse_id_var(any_token=False)) return self.expression(exp.UniqueColumnConstraint(this=this)) def _parse_update(self) -> exp.Update: expression = super()._parse_update() expression.set("options", self._parse_options()) return expression def _parse_partition(self) -> t.Optional[exp.Partition]: if not self._match_text_seq("WITH", "(", "PARTITIONS"): return None def parse_range(): low = self._parse_bitwise() high = self._parse_bitwise() if self._match_text_seq("TO") else None return self.expression(exp.PartitionRange(this=low, expression=high)) if high else low partition = self.expression(exp.Partition(expressions=self._parse_wrapped_csv(parse_range))) self._match_r_paren() return partition def _parse_alter_table_alter(self) -> t.Optional[exp.Expr]: expression = super()._parse_alter_table_alter() if expression is not None: collation = expression.args.get("collate") if isinstance(collation, exp.Column) and isinstance(collation.this, exp.Identifier): identifier = collation.this collation.set("this", exp.Var(this=identifier.name)) return expression def _parse_primary_key_part(self) -> t.Optional[exp.Expr]: return self._parse_ordered() ================================================ FILE: sqlglot/planner.py ================================================ from __future__ import annotations import math import typing as t from sqlglot import alias, exp from sqlglot.helper import name_sequence from sqlglot.optimizer.eliminate_joins import join_condition from collections.abc import Iterator, Sequence, Iterable class Plan: def __init__(self, expression: exp.Expr) -> None: self.expression = expression.copy() self.root = Step.from_expression(self.expression) self._dag: dict[Step, set[Step]] = {} @property def dag(self) -> dict[Step, set[Step]]: if not self._dag: dag: dict[Step, set[Step]] = {} nodes = {self.root} while nodes: node = nodes.pop() dag[node] = set() for dep in node.dependencies: dag[node].add(dep) nodes.add(dep) self._dag = dag return self._dag @property def leaves(self) -> Iterator[Step]: return (node for node, deps in self.dag.items() if not deps) def __repr__(self) -> str: return f"Plan\n----\n{repr(self.root)}" class Step: @classmethod def from_expression( cls, expression: exp.Expr, ctes: t.Optional[t.Dict[str, Step]] = None ) -> Step: """ Builds a DAG of Steps from a SQL expression so that it's easier to execute in an engine. Note: the expression's tables and subqueries must be aliased for this method to work. For example, given the following expression: SELECT x.a, SUM(x.b) FROM x AS x JOIN y AS y ON x.a = y.a GROUP BY x.a the following DAG is produced (the expression IDs might differ per execution): - Aggregate: x (4347984624) Context: Aggregations: - SUM(x.b) Group: - x.a Projections: - x.a - "x"."" Dependencies: - Join: x (4347985296) Context: y: On: x.a = y.a Projections: Dependencies: - Scan: x (4347983136) Context: Source: x AS x Projections: - Scan: y (4343416624) Context: Source: y AS y Projections: Args: expression: the expression to build the DAG from. ctes: a dictionary that maps CTEs to their corresponding Step DAG by name. Returns: A Step DAG corresponding to `expression`. """ ctes = ctes or {} expression = expression.unnest() with_ = expression.args.get("with_") # CTEs break the mold of scope and introduce themselves to all in the context. if with_: ctes = ctes.copy() for cte in with_.expressions: step = Step.from_expression(cte.this, ctes) step.name = cte.alias ctes[step.name] = step # type: ignore from_ = expression.args.get("from_") if isinstance(expression, exp.Select) and from_: step = Scan.from_expression(from_.this, ctes) elif isinstance(expression, exp.SetOperation): step = SetOperation.from_expression(expression, ctes) else: step = Scan() joins = expression.args.get("joins") if joins: join = Join.from_joins(joins, ctes) join.name = step.name join.source_name = step.name join.add_dependency(step) step = join projections: t.List[ exp.Expr ] = [] # final selects in this chain of steps representing a select operands = {} # intermediate computations of agg funcs eg x + 1 in SUM(x + 1) aggregations = {} next_operand_name = name_sequence("_a_") def extract_agg_operands(expression): agg_funcs = tuple(expression.find_all(exp.AggFunc)) if agg_funcs: aggregations[expression] = None for agg in agg_funcs: for operand in agg.unnest_operands(): if isinstance(operand, exp.Column): continue if operand not in operands: operands[operand] = next_operand_name() operand.replace(exp.column(operands[operand], quoted=True)) return bool(agg_funcs) def set_ops_and_aggs(step): step.operands = tuple(alias(operand, alias_) for operand, alias_ in operands.items()) step.aggregations = list(aggregations) for e in expression.expressions: if e.find(exp.AggFunc): projections.append(exp.column(e.alias_or_name, step.name, quoted=True)) extract_agg_operands(e) else: projections.append(e) where = expression.args.get("where") if where: step.condition = where.this group = expression.args.get("group") if group or aggregations: aggregate = Aggregate() aggregate.source = step.name aggregate.name = step.name having = expression.args.get("having") if having: if extract_agg_operands(exp.alias_(having.this, "_h", quoted=True)): aggregate.condition = exp.column("_h", step.name, quoted=True) else: aggregate.condition = having.this set_ops_and_aggs(aggregate) # give aggregates names and replace projections with references to them aggregate.group = { f"_g{i}": e for i, e in enumerate(group.expressions if group else []) } intermediate: t.Dict[str | exp.Expr, str] = {} for k, v in aggregate.group.items(): intermediate[v] = k if isinstance(v, exp.Column): intermediate[v.name] = k for projection in projections: for node in projection.walk(): name = intermediate.get(node) if name: node.replace(exp.column(name, step.name)) if aggregate.condition: for node in aggregate.condition.walk(): name = intermediate.get(node) or intermediate.get(node.name) if name: node.replace(exp.column(name, step.name)) aggregate.add_dependency(step) step = aggregate else: aggregate = None order = expression.args.get("order") if order: if aggregate and isinstance(step, Aggregate): for i, ordered in enumerate(order.expressions): if extract_agg_operands(exp.alias_(ordered.this, f"_o_{i}", quoted=True)): ordered.this.replace(exp.column(f"_o_{i}", step.name, quoted=True)) set_ops_and_aggs(aggregate) sort = Sort() sort.name = step.name sort.key = order.expressions sort.add_dependency(step) step = sort step.projections = projections if isinstance(expression, exp.Select) and expression.args.get("distinct"): distinct = Aggregate() distinct.source = step.name distinct.name = step.name distinct.group = { e.alias_or_name: exp.column(col=e.alias_or_name, table=step.name) for e in projections or expression.expressions } distinct.add_dependency(step) step = distinct limit = expression.args.get("limit") if limit: step.limit = int(limit.text("expression")) return step def __init__(self) -> None: self.name: t.Optional[str] = None self.dependencies: set[Step] = set() self.dependents: set[Step] = set() self.projections: Sequence[exp.Expr] = [] self.limit: float = math.inf self.condition: t.Optional[exp.Expr] = None def add_dependency(self, dependency: Step) -> None: self.dependencies.add(dependency) dependency.dependents.add(self) def __repr__(self) -> str: return self.to_s() def to_s(self, level: int = 0) -> str: indent = " " * level nested = f"{indent} " context = self._to_s(f"{nested} ") if context: context = [f"{nested}Context:"] + context lines = [ f"{indent}- {self.id}", *context, f"{nested}Projections:", ] for expression in self.projections: lines.append(f"{nested} - {expression.sql()}") if self.condition: lines.append(f"{nested}Condition: {self.condition.sql()}") if self.limit is not math.inf: lines.append(f"{nested}Limit: {self.limit}") if self.dependencies: lines.append(f"{nested}Dependencies:") for dependency in self.dependencies: lines.append(" " + dependency.to_s(level + 1)) return "\n".join(lines) @property def type_name(self) -> str: return self.__class__.__name__ @property def id(self) -> str: name = self.name name = f" {name}" if name else "" return f"{self.type_name}:{name} ({id(self)})" def _to_s(self, _indent: str) -> t.List[str]: return [] class Scan(Step): @classmethod def from_expression( cls, expression: exp.Expr, ctes: t.Optional[t.Dict[str, Step]] = None ) -> Step: table = expression alias_ = expression.alias_or_name if isinstance(expression, exp.Subquery): table = expression.this step = Step.from_expression(table, ctes) step.name = alias_ return step step = Scan() step.name = alias_ step.source = expression if ctes and table.name in ctes: step.add_dependency(ctes[table.name]) return step def __init__(self) -> None: super().__init__() self.source: t.Optional[exp.Expr] = None def _to_s(self, indent: str) -> t.List[str]: return [f"{indent}Source: {self.source.sql() if self.source else '-static-'}"] # type: ignore class Join(Step): @classmethod def from_joins( cls, joins: Iterable[exp.Join], ctes: t.Optional[dict[str, Step]] = None ) -> Join: step = Join() for join in joins: source_key, join_key, condition = join_condition(join) step.joins[join.alias_or_name] = { "side": join.side, # type: ignore "join_key": join_key, "source_key": source_key, "condition": condition, } step.add_dependency(Scan.from_expression(join.this, ctes)) return step def __init__(self) -> None: super().__init__() self.source_name: t.Optional[str] = None self.joins: t.Dict[str, t.Dict[str, t.List[str] | exp.Expr]] = {} def _to_s(self, indent: str) -> t.List[str]: lines = [f"{indent}Source: {self.source_name or self.name}"] for name, join in self.joins.items(): lines.append(f"{indent}{name}: {join['side'] or 'INNER'}") join_key = ", ".join(str(key) for key in t.cast(list, join.get("join_key") or [])) if join_key: lines.append(f"{indent}Key: {join_key}") if join.get("condition"): lines.append(f"{indent}On: {join['condition'].sql()}") # type: ignore return lines class Aggregate(Step): def __init__(self) -> None: super().__init__() self.aggregations: t.List[exp.Expr] = [] self.operands: t.Tuple[exp.Expr, ...] = () self.group: t.Dict[str, exp.Expr] = {} self.source: t.Optional[str] = None def _to_s(self, indent: str) -> t.List[str]: lines = [f"{indent}Aggregations:"] for expression in self.aggregations: lines.append(f"{indent} - {expression.sql()}") if self.group: lines.append(f"{indent}Group:") for expression in self.group.values(): lines.append(f"{indent} - {expression.sql()}") if self.condition: lines.append(f"{indent}Having:") lines.append(f"{indent} - {self.condition.sql()}") if self.operands: lines.append(f"{indent}Operands:") for expression in self.operands: lines.append(f"{indent} - {expression.sql()}") return lines class Sort(Step): def __init__(self) -> None: super().__init__() self.key = None def _to_s(self, indent: str) -> t.List[str]: lines = [f"{indent}Key:"] for expression in self.key: # type: ignore lines.append(f"{indent} - {expression.sql()}") return lines class SetOperation(Step): def __init__( self, op: t.Type[exp.Expr], left: str | None, right: str | None, distinct: bool = False, ) -> None: super().__init__() self.op = op self.left = left self.right = right self.distinct = distinct @classmethod def from_expression( cls, expression: exp.Expr, ctes: t.Optional[t.Dict[str, Step]] = None ) -> SetOperation: assert isinstance(expression, exp.SetOperation) left = Step.from_expression(expression.left, ctes) # SELECT 1 UNION SELECT 2 <-- these subqueries don't have names left.name = left.name or "left" right = Step.from_expression(expression.right, ctes) right.name = right.name or "right" step = cls( op=expression.__class__, left=left.name, right=right.name, distinct=bool(expression.args.get("distinct")), ) step.add_dependency(left) step.add_dependency(right) limit = expression.args.get("limit") if limit: step.limit = int(limit.text("expression")) return step def _to_s(self, indent: str) -> t.List[str]: lines = [] if self.distinct: lines.append(f"{indent}Distinct: {self.distinct}") return lines @property def type_name(self) -> str: return self.op.__name__ ================================================ FILE: sqlglot/py.typed ================================================ ================================================ FILE: sqlglot/schema.py ================================================ from __future__ import annotations import abc import typing as t from sqlglot import expressions as exp from sqlglot.dialects.dialect import Dialect from sqlglot.errors import SchemaError from sqlglot.helper import dict_depth, first from sqlglot.trie import TrieResult, in_trie, new_trie from sqlglot.helper import trait if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType from collections.abc import Sequence ColumnMapping = t.Union[dict, str, list] @trait class Schema(abc.ABC): """Abstract base class for database schemas""" @property def dialect(self) -> t.Optional[Dialect]: """ Returns None by default. Subclasses that require dialect-specific behavior should override this property. """ return None @abc.abstractmethod def add_table( self, table: exp.Table | str, column_mapping: t.Optional[ColumnMapping] = None, dialect: DialectType = None, normalize: t.Optional[bool] = None, match_depth: bool = True, ) -> None: """ Register or update a table. Some implementing classes may require column information to also be provided. The added table must have the necessary number of qualifiers in its path to match the schema's nesting level. Args: table: the `Table` expression instance or string representing the table. column_mapping: a column mapping that describes the structure of the table. dialect: the SQL dialect that will be used to parse `table` if it's a string. normalize: whether to normalize identifiers according to the dialect of interest. match_depth: whether to enforce that the table must match the schema's depth or not. """ @abc.abstractmethod def column_names( self, table: exp.Table | str, only_visible: bool = False, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> Sequence[str]: """ Get the column names for a table. Args: table: the `Table` expression instance. only_visible: whether to include invisible columns. dialect: the SQL dialect that will be used to parse `table` if it's a string. normalize: whether to normalize identifiers according to the dialect of interest. Returns: The sequence of column names. """ @abc.abstractmethod def get_column_type( self, table: exp.Table | str, column: exp.Column | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> exp.DataType: """ Get the `sqlglot.exp.DataType` type of a column in the schema. Args: table: the source table. column: the target column. dialect: the SQL dialect that will be used to parse `table` if it's a string. normalize: whether to normalize identifiers according to the dialect of interest. Returns: The resulting column type. """ def has_column( self, table: exp.Table | str, column: exp.Column | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> bool: """ Returns whether `column` appears in `table`'s schema. Args: table: the source table. column: the target column. dialect: the SQL dialect that will be used to parse `table` if it's a string. normalize: whether to normalize identifiers according to the dialect of interest. Returns: True if the column appears in the schema, False otherwise. """ name = column if isinstance(column, str) else column.name return name in self.column_names(table, dialect=dialect, normalize=normalize) def get_udf_type( self, udf: exp.Anonymous | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> exp.DataType: """ Get the return type of a UDF. Args: udf: the UDF expression or string. dialect: the SQL dialect for parsing string arguments. normalize: whether to normalize identifiers. Returns: The return type as a DataType, or UNKNOWN if not found. """ return exp.DataType.build("unknown") @property @abc.abstractmethod def supported_table_args(self) -> t.Tuple[str, ...]: """ Table arguments this schema support, e.g. `("this", "db", "catalog")` """ @property def empty(self) -> bool: """Returns whether the schema is empty.""" return True class AbstractMappingSchema: def __init__( self, mapping: t.Optional[t.Dict] = None, udf_mapping: t.Optional[t.Dict] = None, ) -> None: self.mapping = mapping or {} self.mapping_trie = new_trie( tuple(reversed(t)) for t in flatten_schema(self.mapping, depth=self.depth()) ) self.udf_mapping = udf_mapping or {} self.udf_trie = new_trie( tuple(reversed(t)) for t in flatten_schema(self.udf_mapping, depth=self.udf_depth()) ) self._supported_table_args: t.Tuple[str, ...] = tuple() @property def empty(self) -> bool: return not self.mapping def depth(self) -> int: return dict_depth(self.mapping) def udf_depth(self) -> int: return dict_depth(self.udf_mapping) @property def supported_table_args(self) -> t.Tuple[str, ...]: if not self._supported_table_args and self.mapping: depth = self.depth() if not depth: # None self._supported_table_args = tuple() elif 1 <= depth <= 3: self._supported_table_args = exp.TABLE_PARTS[:depth] else: raise SchemaError(f"Invalid mapping shape. Depth: {depth}") return self._supported_table_args def table_parts(self, table: exp.Table) -> t.List[str]: return [p.name for p in reversed(table.parts)] def udf_parts(self, udf: exp.Anonymous) -> t.List[str]: # a.b.c(...) is represented as Dot(Dot(a, b), Anonymous(c, ...)) parent = udf.parent parts = [p.name for p in parent.flatten()] if isinstance(parent, exp.Dot) else [udf.name] return list(reversed(parts))[0 : self.udf_depth()] def _find_in_trie( self, parts: t.List[str], trie: t.Dict, raise_on_missing: bool, ) -> t.Optional[t.List[str]]: value, trie = in_trie(trie, parts) if value == TrieResult.FAILED: return None if value == TrieResult.PREFIX: possibilities = flatten_schema(trie) if len(possibilities) == 1: parts.extend(possibilities[0]) else: if raise_on_missing: joined_parts = ".".join(parts) message = ", ".join(".".join(p) for p in possibilities) raise SchemaError(f"Ambiguous mapping for {joined_parts}: {message}.") return None return parts def find( self, table: exp.Table, raise_on_missing: bool = True, ensure_data_types: bool = False ) -> t.Optional[t.Any]: """ Returns the schema of a given table. Args: table: the target table. raise_on_missing: whether to raise in case the schema is not found. ensure_data_types: whether to convert `str` types to their `DataType` equivalents. Returns: The schema of the target table. """ parts = self.table_parts(table)[0 : len(self.supported_table_args)] resolved_parts = self._find_in_trie(parts, self.mapping_trie, raise_on_missing) if resolved_parts is None: return None return self.nested_get(resolved_parts, raise_on_missing=raise_on_missing) def find_udf(self, udf: exp.Anonymous, raise_on_missing: bool = False) -> t.Optional[t.Any]: """ Returns the return type of a given UDF. Args: udf: the target UDF expression. raise_on_missing: whether to raise if the UDF is not found. Returns: The return type of the UDF, or None if not found. """ parts = self.udf_parts(udf) resolved_parts = self._find_in_trie(parts, self.udf_trie, raise_on_missing) if resolved_parts is None: return None return nested_get( self.udf_mapping, *zip(resolved_parts, reversed(resolved_parts)), raise_on_missing=raise_on_missing, ) def nested_get( self, parts: Sequence[str], d: t.Optional[dict] = None, raise_on_missing=True ) -> t.Optional[t.Any]: return nested_get( d or self.mapping, *zip(self.supported_table_args, reversed(parts)), raise_on_missing=raise_on_missing, ) class MappingSchema(AbstractMappingSchema, Schema): """ Schema based on a nested mapping. Args: schema: Mapping in one of the following forms: 1. {table: {col: type}} 2. {db: {table: {col: type}}} 3. {catalog: {db: {table: {col: type}}}} 4. None - Tables will be added later visible: Optional mapping of which columns in the schema are visible. If not provided, all columns are assumed to be visible. The nesting should mirror that of the schema: 1. {table: set(*cols)}} 2. {db: {table: set(*cols)}}} 3. {catalog: {db: {table: set(*cols)}}}} dialect: The dialect to be used for custom type mappings & parsing string arguments. normalize: Whether to normalize identifier names according to the given dialect or not. """ def __init__( self, schema: t.Optional[t.Dict] = None, visible: t.Optional[t.Dict] = None, dialect: DialectType = None, normalize: bool = True, udf_mapping: t.Optional[t.Dict] = None, ) -> None: self.visible = {} if visible is None else visible self.normalize = normalize self._dialect = Dialect.get_or_raise(dialect) self._type_mapping_cache: t.Dict[str, exp.DataType] = {} self._normalized_table_cache: t.Dict[t.Tuple[exp.Table, DialectType, bool], exp.Table] = {} self._normalized_name_cache: t.Dict[t.Tuple[str, DialectType, bool, bool], str] = {} self._depth = 0 schema = {} if schema is None else schema udf_mapping = {} if udf_mapping is None else udf_mapping super().__init__( self._normalize(schema) if self.normalize else schema, self._normalize_udfs(udf_mapping) if self.normalize else udf_mapping, ) @property def dialect(self) -> Dialect: """Returns the dialect for this mapping schema.""" return self._dialect @classmethod def from_mapping_schema(cls, mapping_schema: MappingSchema) -> MappingSchema: return MappingSchema( schema=mapping_schema.mapping, visible=mapping_schema.visible, dialect=mapping_schema.dialect, normalize=mapping_schema.normalize, udf_mapping=mapping_schema.udf_mapping, ) def find( self, table: exp.Table, raise_on_missing: bool = True, ensure_data_types: bool = False ) -> t.Optional[t.Any]: schema = super().find( table, raise_on_missing=raise_on_missing, ensure_data_types=ensure_data_types ) if ensure_data_types and isinstance(schema, dict): schema = { col: self._to_data_type(dtype) if isinstance(dtype, str) else dtype for col, dtype in schema.items() } return schema def copy(self, **kwargs) -> MappingSchema: return MappingSchema( **{ # type: ignore "schema": self.mapping.copy(), "visible": self.visible.copy(), "dialect": self.dialect, "normalize": self.normalize, "udf_mapping": self.udf_mapping.copy(), **kwargs, } ) def add_table( self, table: exp.Table | str, column_mapping: t.Optional[ColumnMapping] = None, dialect: DialectType = None, normalize: t.Optional[bool] = None, match_depth: bool = True, ) -> None: """ Register or update a table. Updates are only performed if a new column mapping is provided. The added table must have the necessary number of qualifiers in its path to match the schema's nesting level. Args: table: the `Table` expression instance or string representing the table. column_mapping: a column mapping that describes the structure of the table. dialect: the SQL dialect that will be used to parse `table` if it's a string. normalize: whether to normalize identifiers according to the dialect of interest. match_depth: whether to enforce that the table must match the schema's depth or not. """ normalized_table = self._normalize_table(table, dialect=dialect, normalize=normalize) if match_depth and not self.empty and len(normalized_table.parts) != self.depth(): raise SchemaError( f"Table {normalized_table.sql(dialect=self.dialect)} must match the " f"schema's nesting level: {self.depth()}." ) normalized_column_mapping = { self._normalize_name(key, dialect=dialect, normalize=normalize): value for key, value in ensure_column_mapping(column_mapping).items() } schema = self.find(normalized_table, raise_on_missing=False) if schema and not normalized_column_mapping: return parts = self.table_parts(normalized_table) nested_set(self.mapping, tuple(reversed(parts)), normalized_column_mapping) new_trie([parts], self.mapping_trie) def column_names( self, table: exp.Table | str, only_visible: bool = False, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> t.List[str]: normalized_table = self._normalize_table(table, dialect=dialect, normalize=normalize) schema = self.find(normalized_table) if schema is None: return [] if not only_visible or not self.visible: return list(schema) visible = self.nested_get(self.table_parts(normalized_table), self.visible) or [] return [col for col in schema if col in visible] def get_column_type( self, table: exp.Table | str, column: exp.Column | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> exp.DataType: normalized_table = self._normalize_table(table, dialect=dialect, normalize=normalize) normalized_column_name = self._normalize_name( column if isinstance(column, str) else column.this, dialect=dialect, normalize=normalize ) table_schema = self.find(normalized_table, raise_on_missing=False) if table_schema: column_type = table_schema.get(normalized_column_name) if isinstance(column_type, exp.DataType): return column_type elif isinstance(column_type, str): return self._to_data_type(column_type, dialect=dialect) return exp.DataType.build("unknown") def get_udf_type( self, udf: exp.Anonymous | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> exp.DataType: """ Get the return type of a UDF. Args: udf: the UDF expression or string (e.g., "db.my_func()"). dialect: the SQL dialect for parsing string arguments. normalize: whether to normalize identifiers. Returns: The return type as a DataType, or UNKNOWN if not found. """ parts = self._normalize_udf(udf, dialect=dialect, normalize=normalize) resolved_parts = self._find_in_trie(parts, self.udf_trie, raise_on_missing=False) if resolved_parts is None: return exp.DataType.build("unknown") udf_type = nested_get( self.udf_mapping, *zip(resolved_parts, reversed(resolved_parts)), raise_on_missing=False, ) if isinstance(udf_type, exp.DataType): return udf_type elif isinstance(udf_type, str): return self._to_data_type(udf_type, dialect=dialect) return exp.DataType.build("unknown") def has_column( self, table: exp.Table | str, column: exp.Column | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> bool: normalized_table = self._normalize_table(table, dialect=dialect, normalize=normalize) normalized_column_name = self._normalize_name( column if isinstance(column, str) else column.this, dialect=dialect, normalize=normalize ) table_schema = self.find(normalized_table, raise_on_missing=False) return normalized_column_name in table_schema if table_schema else False def _normalize(self, schema: t.Dict) -> t.Dict: """ Normalizes all identifiers in the schema. Args: schema: the schema to normalize. Returns: The normalized schema mapping. """ normalized_mapping: t.Dict = {} flattened_schema = flatten_schema(schema) error_msg = "Table {} must match the schema's nesting level: {}." for keys in flattened_schema: columns = nested_get(schema, *zip(keys, keys)) if not isinstance(columns, dict): raise SchemaError(error_msg.format(".".join(keys[:-1]), len(flattened_schema[0]))) if not columns: raise SchemaError(f"Table {'.'.join(keys[:-1])} must have at least one column") if isinstance(first(columns.values()), dict): raise SchemaError( error_msg.format( ".".join(keys + flatten_schema(columns)[0]), len(flattened_schema[0]) ), ) normalized_keys = [self._normalize_name(key, is_table=True) for key in keys] for column_name, column_type in columns.items(): nested_set( normalized_mapping, normalized_keys + [self._normalize_name(column_name)], column_type, ) return normalized_mapping def _normalize_udfs(self, udfs: t.Dict) -> t.Dict: """ Normalizes all identifiers in the UDF mapping. Args: udfs: the UDF mapping to normalize. Returns: The normalized UDF mapping. """ normalized_mapping: t.Dict = {} for keys in flatten_schema(udfs, depth=dict_depth(udfs)): udf_type = nested_get(udfs, *zip(keys, keys)) normalized_keys = [self._normalize_name(key, is_table=True) for key in keys] nested_set(normalized_mapping, normalized_keys, udf_type) return normalized_mapping def _normalize_udf( self, udf: exp.Anonymous | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> t.List[str]: """ Extract and normalize UDF parts for lookup. Args: udf: the UDF expression or qualified string (e.g., "db.my_func()"). dialect: the SQL dialect for parsing. normalize: whether to normalize identifiers. Returns: A list of normalized UDF parts (reversed for trie lookup). """ dialect = dialect or self.dialect normalize = self.normalize if normalize is None else normalize if isinstance(udf, str): parsed: exp.Expr = exp.maybe_parse(udf, dialect=dialect) if isinstance(parsed, exp.Anonymous): udf = parsed elif isinstance(parsed, exp.Dot) and isinstance(parsed.expression, exp.Anonymous): udf = parsed.expression else: raise SchemaError(f"Unable to parse UDF from: {udf!r}") parts = self.udf_parts(udf) if normalize: parts = [self._normalize_name(part, dialect=dialect, is_table=True) for part in parts] return parts def _normalize_table( self, table: exp.Table | str, dialect: DialectType = None, normalize: t.Optional[bool] = None, ) -> exp.Table: dialect = dialect or self.dialect normalize = self.normalize if normalize is None else normalize # Cache normalized tables by object id for exp.Table inputs # This is effective when the same Table object is looked up multiple times if isinstance(table, exp.Table) and ( cached := self._normalized_table_cache.get((table, dialect, normalize)) ): return cached normalized_table = exp.maybe_parse(table, into=exp.Table, dialect=dialect, copy=normalize) if normalize: for part in normalized_table.parts: if isinstance(part, exp.Identifier): part.replace( normalize_name(part, dialect=dialect, is_table=True, normalize=normalize) ) self._normalized_table_cache[(normalized_table, dialect, normalize)] = normalized_table return normalized_table def _normalize_name( self, name: str | exp.Identifier, dialect: DialectType = None, is_table: bool = False, normalize: t.Optional[bool] = None, ) -> str: normalize = self.normalize if normalize is None else normalize dialect = dialect or self.dialect name_str = name if isinstance(name, str) else name.name cache_key = (name_str, dialect, is_table, normalize) if cached := self._normalized_name_cache.get(cache_key): return cached result = normalize_name( name, dialect=dialect, is_table=is_table, normalize=normalize, ).name self._normalized_name_cache[cache_key] = result return result def depth(self) -> int: if not self.empty and not self._depth: # The columns themselves are a mapping, but we don't want to include those self._depth = super().depth() - 1 return self._depth def _to_data_type(self, schema_type: str, dialect: DialectType = None) -> exp.DataType: """ Convert a type represented as a string to the corresponding `sqlglot.exp.DataType` object. Args: schema_type: the type we want to convert. dialect: the SQL dialect that will be used to parse `schema_type`, if needed. Returns: The resulting expression type. """ if schema_type not in self._type_mapping_cache: dialect = Dialect.get_or_raise(dialect) if dialect else self.dialect udt = dialect.SUPPORTS_USER_DEFINED_TYPES try: expression = exp.DataType.build(schema_type, dialect=dialect, udt=udt) expression.transform(dialect.normalize_identifier, copy=False) self._type_mapping_cache[schema_type] = expression except AttributeError: in_dialect = f" in dialect {dialect}" if dialect else "" raise SchemaError(f"Failed to build type '{schema_type}'{in_dialect}.") return self._type_mapping_cache[schema_type] def normalize_name( identifier: str | exp.Identifier, dialect: DialectType = None, is_table: bool = False, normalize: t.Optional[bool] = True, ) -> exp.Identifier: if isinstance(identifier, str): identifier = exp.parse_identifier(identifier, dialect=dialect) if not normalize: return identifier # this is used for normalize_identifier, bigquery has special rules pertaining tables identifier.meta["is_table"] = is_table return Dialect.get_or_raise(dialect).normalize_identifier(identifier) def ensure_schema(schema: Schema | t.Optional[t.Dict], **kwargs: t.Any) -> Schema: if isinstance(schema, Schema): return schema return MappingSchema(schema, **kwargs) def ensure_column_mapping(mapping: t.Optional[ColumnMapping]) -> t.Dict: if mapping is None: return {} elif isinstance(mapping, dict): return mapping elif isinstance(mapping, str): col_name_type_strs = [x.strip() for x in mapping.split(",")] return { name_type_str.split(":")[0].strip(): name_type_str.split(":")[1].strip() for name_type_str in col_name_type_strs } elif isinstance(mapping, list): return {x.strip(): None for x in mapping} raise ValueError(f"Invalid mapping provided: {type(mapping)}") def flatten_schema( schema: t.Dict, depth: t.Optional[int] = None, keys: t.Optional[t.List[str]] = None ) -> t.List[t.List[str]]: tables = [] keys = keys or [] depth = dict_depth(schema) - 1 if depth is None else depth for k, v in schema.items(): if depth == 1 or not isinstance(v, dict): tables.append(keys + [k]) elif depth >= 2: tables.extend(flatten_schema(v, depth - 1, keys + [k])) return tables def nested_get( d: t.Dict, *path: t.Tuple[str, str], raise_on_missing: bool = True ) -> t.Optional[t.Any]: """ Get a value for a nested dictionary. Args: d: the dictionary to search. *path: tuples of (name, key), where: `key` is the key in the dictionary to get. `name` is a string to use in the error if `key` isn't found. Returns: The value or None if it doesn't exist. """ result: t.Any = d for name, key in path: result = result.get(key) if result is None: if raise_on_missing: name = "table" if name == "this" else name raise ValueError(f"Unknown {name}: {key}") return None return result def nested_set(d: dict, keys: Sequence[str], value: t.Any) -> dict: """ In-place set a value for a nested dictionary Example: >>> nested_set({}, ["top_key", "second_key"], "value") {'top_key': {'second_key': 'value'}} >>> nested_set({"top_key": {"third_key": "third_value"}}, ["top_key", "second_key"], "value") {'top_key': {'third_key': 'third_value', 'second_key': 'value'}} Args: d: dictionary to update. keys: the keys that makeup the path to `value`. value: the value to set in the dictionary for the given key path. Returns: The (possibly) updated dictionary. """ if not keys: return d if len(keys) == 1: d[keys[0]] = value return d subd = d for key in keys[:-1]: if key not in subd: subd = subd.setdefault(key, {}) else: subd = subd[key] subd[keys[-1]] = value return d ================================================ FILE: sqlglot/serde.py ================================================ from __future__ import annotations import typing as t from sqlglot import expressions as exp INDEX = "i" ARG_KEY = "k" IS_ARRAY = "a" CLASS = "c" TYPE = "t" COMMENTS = "o" META = "m" VALUE = "v" DATA_TYPE = "DataType.Type" def dump(expression: exp.Expr) -> t.List[t.Dict[str, t.Any]]: """ Dump an Expr into a JSON serializable List. """ i = 0 payloads = [] stack: t.List[t.Tuple[t.Any, t.Optional[int], t.Optional[str], bool]] = [ (expression, None, None, False) ] while stack: node, index, arg_key, is_array = stack.pop() payload: t.Dict[str, t.Any] = {} if index is not None: payload[INDEX] = index if arg_key is not None: payload[ARG_KEY] = arg_key if is_array: payload[IS_ARRAY] = is_array payloads.append(payload) if hasattr(node, "parent"): klass = node.__class__.__qualname__ if node.__class__.__module__ != exp.__name__: klass = f"{node.__module__}.{klass}" payload[CLASS] = klass if node.type: payload[TYPE] = dump(node.type) if node.comments: payload[COMMENTS] = node.comments if node._meta is not None: payload[META] = node._meta if node.args: for k, vs in reversed(node.args.items()): if type(vs) is list: for v in reversed(vs): stack.append((v, i, k, True)) elif vs is not None: stack.append((vs, i, k, False)) elif type(node) is exp.DType: payload[CLASS] = DATA_TYPE payload[VALUE] = node.value else: payload[VALUE] = node i += 1 return payloads def load( payloads: t.Optional[t.List[t.Dict[str, t.Any]]], ) -> t.Optional[exp.Expr | exp.DType]: """ Load a list of dicts generated by dump into an Expr. """ if not payloads: return None payload, *tail = payloads root = _load(payload) nodes: t.List[object] = [root] for payload in tail: if CLASS in payload: node: object = _load(payload) else: node = payload[VALUE] nodes.append(node) parent = nodes[payload[INDEX]] arg_key = payload[ARG_KEY] if payload.get(IS_ARRAY): parent.append(arg_key, node) else: parent.set(arg_key, node) return root def _load(payload: t.Dict[str, t.Any]) -> exp.Expr | exp.DType: class_name = payload[CLASS] if class_name == DATA_TYPE: return exp.DType(payload[VALUE]) if "." in class_name: module_path, class_name = class_name.rsplit(".", maxsplit=1) module = __import__(module_path, fromlist=[class_name]) else: module = exp expression = getattr(module, class_name)() expression._type = load(payload.get(TYPE)) expression.comments = payload.get(COMMENTS) expression._meta = payload.get(META) return expression ================================================ FILE: sqlglot/time.py ================================================ import typing as t import datetime # The generic time format is based on python time.strftime. # https://docs.python.org/3/library/time.html#time.strftime from sqlglot.trie import TrieResult, in_trie, new_trie def format_time( string: str, mapping: t.Dict[str, str], trie: t.Optional[t.Dict] = None ) -> t.Optional[str]: """ Converts a time string given a mapping. Examples: >>> format_time("%Y", {"%Y": "YYYY"}) 'YYYY' Args: mapping: dictionary of time format to target time format. trie: optional trie, can be passed in for performance. Returns: The converted time string. """ if not string: return None start = 0 end = 1 size = len(string) trie = trie or new_trie(mapping) current = trie chunks = [] sym = None while end <= size: chars = string[start:end] result, current = in_trie(current, chars[-1]) if result == TrieResult.FAILED: if sym: end -= 1 chars = sym sym = None else: chars = chars[0] end = start + 1 start += len(chars) chunks.append(chars) current = trie elif result == TrieResult.EXISTS: sym = chars end += 1 if result != TrieResult.FAILED and end > size: chunks.append(chars) return "".join(mapping.get(chars, chars) for chars in chunks) TIMEZONES = { tz.lower() for tz in ( "Africa/Abidjan", "Africa/Accra", "Africa/Addis_Ababa", "Africa/Algiers", "Africa/Asmara", "Africa/Asmera", "Africa/Bamako", "Africa/Bangui", "Africa/Banjul", "Africa/Bissau", "Africa/Blantyre", "Africa/Brazzaville", "Africa/Bujumbura", "Africa/Cairo", "Africa/Casablanca", "Africa/Ceuta", "Africa/Conakry", "Africa/Dakar", "Africa/Dar_es_Salaam", "Africa/Djibouti", "Africa/Douala", "Africa/El_Aaiun", "Africa/Freetown", "Africa/Gaborone", "Africa/Harare", "Africa/Johannesburg", "Africa/Juba", "Africa/Kampala", "Africa/Khartoum", "Africa/Kigali", "Africa/Kinshasa", "Africa/Lagos", "Africa/Libreville", "Africa/Lome", "Africa/Luanda", "Africa/Lubumbashi", "Africa/Lusaka", "Africa/Malabo", "Africa/Maputo", "Africa/Maseru", "Africa/Mbabane", "Africa/Mogadishu", "Africa/Monrovia", "Africa/Nairobi", "Africa/Ndjamena", "Africa/Niamey", "Africa/Nouakchott", "Africa/Ouagadougou", "Africa/Porto-Novo", "Africa/Sao_Tome", "Africa/Timbuktu", "Africa/Tripoli", "Africa/Tunis", "Africa/Windhoek", "America/Adak", "America/Anchorage", "America/Anguilla", "America/Antigua", "America/Araguaina", "America/Argentina/Buenos_Aires", "America/Argentina/Catamarca", "America/Argentina/ComodRivadavia", "America/Argentina/Cordoba", "America/Argentina/Jujuy", "America/Argentina/La_Rioja", "America/Argentina/Mendoza", "America/Argentina/Rio_Gallegos", "America/Argentina/Salta", "America/Argentina/San_Juan", "America/Argentina/San_Luis", "America/Argentina/Tucuman", "America/Argentina/Ushuaia", "America/Aruba", "America/Asuncion", "America/Atikokan", "America/Atka", "America/Bahia", "America/Bahia_Banderas", "America/Barbados", "America/Belem", "America/Belize", "America/Blanc-Sablon", "America/Boa_Vista", "America/Bogota", "America/Boise", "America/Buenos_Aires", "America/Cambridge_Bay", "America/Campo_Grande", "America/Cancun", "America/Caracas", "America/Catamarca", "America/Cayenne", "America/Cayman", "America/Chicago", "America/Chihuahua", "America/Ciudad_Juarez", "America/Coral_Harbour", "America/Cordoba", "America/Costa_Rica", "America/Creston", "America/Cuiaba", "America/Curacao", "America/Danmarkshavn", "America/Dawson", "America/Dawson_Creek", "America/Denver", "America/Detroit", "America/Dominica", "America/Edmonton", "America/Eirunepe", "America/El_Salvador", "America/Ensenada", "America/Fort_Nelson", "America/Fort_Wayne", "America/Fortaleza", "America/Glace_Bay", "America/Godthab", "America/Goose_Bay", "America/Grand_Turk", "America/Grenada", "America/Guadeloupe", "America/Guatemala", "America/Guayaquil", "America/Guyana", "America/Halifax", "America/Havana", "America/Hermosillo", "America/Indiana/Indianapolis", "America/Indiana/Knox", "America/Indiana/Marengo", "America/Indiana/Petersburg", "America/Indiana/Tell_City", "America/Indiana/Vevay", "America/Indiana/Vincennes", "America/Indiana/Winamac", "America/Indianapolis", "America/Inuvik", "America/Iqaluit", "America/Jamaica", "America/Jujuy", "America/Juneau", "America/Kentucky/Louisville", "America/Kentucky/Monticello", "America/Knox_IN", "America/Kralendijk", "America/La_Paz", "America/Lima", "America/Los_Angeles", "America/Louisville", "America/Lower_Princes", "America/Maceio", "America/Managua", "America/Manaus", "America/Marigot", "America/Martinique", "America/Matamoros", "America/Mazatlan", "America/Mendoza", "America/Menominee", "America/Merida", "America/Metlakatla", "America/Mexico_City", "America/Miquelon", "America/Moncton", "America/Monterrey", "America/Montevideo", "America/Montreal", "America/Montserrat", "America/Nassau", "America/New_York", "America/Nipigon", "America/Nome", "America/Noronha", "America/North_Dakota/Beulah", "America/North_Dakota/Center", "America/North_Dakota/New_Salem", "America/Nuuk", "America/Ojinaga", "America/Panama", "America/Pangnirtung", "America/Paramaribo", "America/Phoenix", "America/Port-au-Prince", "America/Port_of_Spain", "America/Porto_Acre", "America/Porto_Velho", "America/Puerto_Rico", "America/Punta_Arenas", "America/Rainy_River", "America/Rankin_Inlet", "America/Recife", "America/Regina", "America/Resolute", "America/Rio_Branco", "America/Rosario", "America/Santa_Isabel", "America/Santarem", "America/Santiago", "America/Santo_Domingo", "America/Sao_Paulo", "America/Scoresbysund", "America/Shiprock", "America/Sitka", "America/St_Barthelemy", "America/St_Johns", "America/St_Kitts", "America/St_Lucia", "America/St_Thomas", "America/St_Vincent", "America/Swift_Current", "America/Tegucigalpa", "America/Thule", "America/Thunder_Bay", "America/Tijuana", "America/Toronto", "America/Tortola", "America/Vancouver", "America/Virgin", "America/Whitehorse", "America/Winnipeg", "America/Yakutat", "America/Yellowknife", "Antarctica/Casey", "Antarctica/Davis", "Antarctica/DumontDUrville", "Antarctica/Macquarie", "Antarctica/Mawson", "Antarctica/McMurdo", "Antarctica/Palmer", "Antarctica/Rothera", "Antarctica/South_Pole", "Antarctica/Syowa", "Antarctica/Troll", "Antarctica/Vostok", "Arctic/Longyearbyen", "Asia/Aden", "Asia/Almaty", "Asia/Amman", "Asia/Anadyr", "Asia/Aqtau", "Asia/Aqtobe", "Asia/Ashgabat", "Asia/Ashkhabad", "Asia/Atyrau", "Asia/Baghdad", "Asia/Bahrain", "Asia/Baku", "Asia/Bangkok", "Asia/Barnaul", "Asia/Beirut", "Asia/Bishkek", "Asia/Brunei", "Asia/Calcutta", "Asia/Chita", "Asia/Choibalsan", "Asia/Chongqing", "Asia/Chungking", "Asia/Colombo", "Asia/Dacca", "Asia/Damascus", "Asia/Dhaka", "Asia/Dili", "Asia/Dubai", "Asia/Dushanbe", "Asia/Famagusta", "Asia/Gaza", "Asia/Harbin", "Asia/Hebron", "Asia/Ho_Chi_Minh", "Asia/Hong_Kong", "Asia/Hovd", "Asia/Irkutsk", "Asia/Istanbul", "Asia/Jakarta", "Asia/Jayapura", "Asia/Jerusalem", "Asia/Kabul", "Asia/Kamchatka", "Asia/Karachi", "Asia/Kashgar", "Asia/Kathmandu", "Asia/Katmandu", "Asia/Khandyga", "Asia/Kolkata", "Asia/Krasnoyarsk", "Asia/Kuala_Lumpur", "Asia/Kuching", "Asia/Kuwait", "Asia/Macao", "Asia/Macau", "Asia/Magadan", "Asia/Makassar", "Asia/Manila", "Asia/Muscat", "Asia/Nicosia", "Asia/Novokuznetsk", "Asia/Novosibirsk", "Asia/Omsk", "Asia/Oral", "Asia/Phnom_Penh", "Asia/Pontianak", "Asia/Pyongyang", "Asia/Qatar", "Asia/Qostanay", "Asia/Qyzylorda", "Asia/Rangoon", "Asia/Riyadh", "Asia/Saigon", "Asia/Sakhalin", "Asia/Samarkand", "Asia/Seoul", "Asia/Shanghai", "Asia/Singapore", "Asia/Srednekolymsk", "Asia/Taipei", "Asia/Tashkent", "Asia/Tbilisi", "Asia/Tehran", "Asia/Tel_Aviv", "Asia/Thimbu", "Asia/Thimphu", "Asia/Tokyo", "Asia/Tomsk", "Asia/Ujung_Pandang", "Asia/Ulaanbaatar", "Asia/Ulan_Bator", "Asia/Urumqi", "Asia/Ust-Nera", "Asia/Vientiane", "Asia/Vladivostok", "Asia/Yakutsk", "Asia/Yangon", "Asia/Yekaterinburg", "Asia/Yerevan", "Atlantic/Azores", "Atlantic/Bermuda", "Atlantic/Canary", "Atlantic/Cape_Verde", "Atlantic/Faeroe", "Atlantic/Faroe", "Atlantic/Jan_Mayen", "Atlantic/Madeira", "Atlantic/Reykjavik", "Atlantic/South_Georgia", "Atlantic/St_Helena", "Atlantic/Stanley", "Australia/ACT", "Australia/Adelaide", "Australia/Brisbane", "Australia/Broken_Hill", "Australia/Canberra", "Australia/Currie", "Australia/Darwin", "Australia/Eucla", "Australia/Hobart", "Australia/LHI", "Australia/Lindeman", "Australia/Lord_Howe", "Australia/Melbourne", "Australia/NSW", "Australia/North", "Australia/Perth", "Australia/Queensland", "Australia/South", "Australia/Sydney", "Australia/Tasmania", "Australia/Victoria", "Australia/West", "Australia/Yancowinna", "Brazil/Acre", "Brazil/DeNoronha", "Brazil/East", "Brazil/West", "CET", "CST6CDT", "Canada/Atlantic", "Canada/Central", "Canada/Eastern", "Canada/Mountain", "Canada/Newfoundland", "Canada/Pacific", "Canada/Saskatchewan", "Canada/Yukon", "Chile/Continental", "Chile/EasterIsland", "Cuba", "EET", "EST", "EST5EDT", "Egypt", "Eire", "Etc/GMT", "Etc/GMT+0", "Etc/GMT+1", "Etc/GMT+10", "Etc/GMT+11", "Etc/GMT+12", "Etc/GMT+2", "Etc/GMT+3", "Etc/GMT+4", "Etc/GMT+5", "Etc/GMT+6", "Etc/GMT+7", "Etc/GMT+8", "Etc/GMT+9", "Etc/GMT-0", "Etc/GMT-1", "Etc/GMT-10", "Etc/GMT-11", "Etc/GMT-12", "Etc/GMT-13", "Etc/GMT-14", "Etc/GMT-2", "Etc/GMT-3", "Etc/GMT-4", "Etc/GMT-5", "Etc/GMT-6", "Etc/GMT-7", "Etc/GMT-8", "Etc/GMT-9", "Etc/GMT0", "Etc/Greenwich", "Etc/UCT", "Etc/UTC", "Etc/Universal", "Etc/Zulu", "Europe/Amsterdam", "Europe/Andorra", "Europe/Astrakhan", "Europe/Athens", "Europe/Belfast", "Europe/Belgrade", "Europe/Berlin", "Europe/Bratislava", "Europe/Brussels", "Europe/Bucharest", "Europe/Budapest", "Europe/Busingen", "Europe/Chisinau", "Europe/Copenhagen", "Europe/Dublin", "Europe/Gibraltar", "Europe/Guernsey", "Europe/Helsinki", "Europe/Isle_of_Man", "Europe/Istanbul", "Europe/Jersey", "Europe/Kaliningrad", "Europe/Kiev", "Europe/Kirov", "Europe/Kyiv", "Europe/Lisbon", "Europe/Ljubljana", "Europe/London", "Europe/Luxembourg", "Europe/Madrid", "Europe/Malta", "Europe/Mariehamn", "Europe/Minsk", "Europe/Monaco", "Europe/Moscow", "Europe/Nicosia", "Europe/Oslo", "Europe/Paris", "Europe/Podgorica", "Europe/Prague", "Europe/Riga", "Europe/Rome", "Europe/Samara", "Europe/San_Marino", "Europe/Sarajevo", "Europe/Saratov", "Europe/Simferopol", "Europe/Skopje", "Europe/Sofia", "Europe/Stockholm", "Europe/Tallinn", "Europe/Tirane", "Europe/Tiraspol", "Europe/Ulyanovsk", "Europe/Uzhgorod", "Europe/Vaduz", "Europe/Vatican", "Europe/Vienna", "Europe/Vilnius", "Europe/Volgograd", "Europe/Warsaw", "Europe/Zagreb", "Europe/Zaporozhye", "Europe/Zurich", "GB", "GB-Eire", "GMT", "GMT+0", "GMT-0", "GMT0", "Greenwich", "HST", "Hongkong", "Iceland", "Indian/Antananarivo", "Indian/Chagos", "Indian/Christmas", "Indian/Cocos", "Indian/Comoro", "Indian/Kerguelen", "Indian/Mahe", "Indian/Maldives", "Indian/Mauritius", "Indian/Mayotte", "Indian/Reunion", "Iran", "Israel", "Jamaica", "Japan", "Kwajalein", "Libya", "MET", "MST", "MST7MDT", "Mexico/BajaNorte", "Mexico/BajaSur", "Mexico/General", "NZ", "NZ-CHAT", "Navajo", "PRC", "PST8PDT", "Pacific/Apia", "Pacific/Auckland", "Pacific/Bougainville", "Pacific/Chatham", "Pacific/Chuuk", "Pacific/Easter", "Pacific/Efate", "Pacific/Enderbury", "Pacific/Fakaofo", "Pacific/Fiji", "Pacific/Funafuti", "Pacific/Galapagos", "Pacific/Gambier", "Pacific/Guadalcanal", "Pacific/Guam", "Pacific/Honolulu", "Pacific/Johnston", "Pacific/Kanton", "Pacific/Kiritimati", "Pacific/Kosrae", "Pacific/Kwajalein", "Pacific/Majuro", "Pacific/Marquesas", "Pacific/Midway", "Pacific/Nauru", "Pacific/Niue", "Pacific/Norfolk", "Pacific/Noumea", "Pacific/Pago_Pago", "Pacific/Palau", "Pacific/Pitcairn", "Pacific/Pohnpei", "Pacific/Ponape", "Pacific/Port_Moresby", "Pacific/Rarotonga", "Pacific/Saipan", "Pacific/Samoa", "Pacific/Tahiti", "Pacific/Tarawa", "Pacific/Tongatapu", "Pacific/Truk", "Pacific/Wake", "Pacific/Wallis", "Pacific/Yap", "Poland", "Portugal", "ROC", "ROK", "Singapore", "Turkey", "UCT", "US/Alaska", "US/Aleutian", "US/Arizona", "US/Central", "US/East-Indiana", "US/Eastern", "US/Hawaii", "US/Indiana-Starke", "US/Michigan", "US/Mountain", "US/Pacific", "US/Samoa", "UTC", "Universal", "W-SU", "WET", "Zulu", ) } def subsecond_precision(timestamp_literal: str) -> int: """ Given an ISO-8601 timestamp literal, eg '2023-01-01 12:13:14.123456+00:00' figure out its subsecond precision so we can construct types like DATETIME(6) Note that in practice, this is either 3 or 6 digits (3 = millisecond precision, 6 = microsecond precision) - 6 is the maximum because strftime's '%f' formats to microseconds and almost every database supports microsecond precision in timestamps - Except Presto/Trino which in most cases only supports millisecond precision but will still honour '%f' and format to microseconds (replacing the remaining 3 digits with 0's) - Python prior to 3.11 only supports 0, 3 or 6 digits in a timestamp literal. Any other amounts will throw a 'ValueError: Invalid isoformat string:' error """ try: parsed = datetime.datetime.fromisoformat(timestamp_literal) subsecond_digit_count = len(str(parsed.microsecond).rstrip("0")) precision = 0 if subsecond_digit_count > 3: precision = 6 elif subsecond_digit_count > 0: precision = 3 return precision except ValueError: return 0 ================================================ FILE: sqlglot/tokenizer_core.py ================================================ from __future__ import annotations import typing as t from enum import IntEnum, auto from sqlglot.errors import TokenError # dict lookup is faster than .upper() and .isdigit() _CHAR_UPPER: t.Dict[str, str] = {chr(i): chr(i).upper() for i in range(97, 123)} _DIGIT_CHARS: t.FrozenSet[str] = frozenset("0123456789") class TokenType(IntEnum): L_PAREN = auto() R_PAREN = auto() L_BRACKET = auto() R_BRACKET = auto() L_BRACE = auto() R_BRACE = auto() COMMA = auto() DOT = auto() DASH = auto() PLUS = auto() COLON = auto() DOTCOLON = auto() DOTCARET = auto() DCOLON = auto() DCOLONDOLLAR = auto() DCOLONPERCENT = auto() DCOLONQMARK = auto() DQMARK = auto() SEMICOLON = auto() STAR = auto() BACKSLASH = auto() SLASH = auto() LT = auto() LTE = auto() GT = auto() GTE = auto() NOT = auto() EQ = auto() NEQ = auto() NULLSAFE_EQ = auto() COLON_EQ = auto() COLON_GT = auto() NCOLON_GT = auto() AND = auto() OR = auto() AMP = auto() DPIPE = auto() PIPE_GT = auto() PIPE = auto() PIPE_SLASH = auto() DPIPE_SLASH = auto() CARET = auto() CARET_AT = auto() TILDE = auto() ARROW = auto() DARROW = auto() FARROW = auto() HASH = auto() HASH_ARROW = auto() DHASH_ARROW = auto() LR_ARROW = auto() DAT = auto() LT_AT = auto() AT_GT = auto() DOLLAR = auto() PARAMETER = auto() SESSION = auto() SESSION_PARAMETER = auto() SESSION_USER = auto() DAMP = auto() AMP_LT = auto() AMP_GT = auto() ADJACENT = auto() XOR = auto() DSTAR = auto() QMARK_AMP = auto() QMARK_PIPE = auto() HASH_DASH = auto() EXCLAMATION = auto() URI_START = auto() BLOCK_START = auto() BLOCK_END = auto() SPACE = auto() BREAK = auto() STRING = auto() NUMBER = auto() IDENTIFIER = auto() DATABASE = auto() COLUMN = auto() COLUMN_DEF = auto() SCHEMA = auto() TABLE = auto() WAREHOUSE = auto() STAGE = auto() STREAM = auto() STREAMLIT = auto() VAR = auto() BIT_STRING = auto() HEX_STRING = auto() BYTE_STRING = auto() NATIONAL_STRING = auto() RAW_STRING = auto() HEREDOC_STRING = auto() UNICODE_STRING = auto() # types BIT = auto() BOOLEAN = auto() TINYINT = auto() UTINYINT = auto() SMALLINT = auto() USMALLINT = auto() MEDIUMINT = auto() UMEDIUMINT = auto() INT = auto() UINT = auto() BIGINT = auto() UBIGINT = auto() BIGNUM = auto() INT128 = auto() UINT128 = auto() INT256 = auto() UINT256 = auto() FLOAT = auto() DOUBLE = auto() UDOUBLE = auto() DECIMAL = auto() DECIMAL32 = auto() DECIMAL64 = auto() DECIMAL128 = auto() DECIMAL256 = auto() DECFLOAT = auto() UDECIMAL = auto() BIGDECIMAL = auto() CHAR = auto() NCHAR = auto() VARCHAR = auto() NVARCHAR = auto() BPCHAR = auto() TEXT = auto() MEDIUMTEXT = auto() LONGTEXT = auto() BLOB = auto() MEDIUMBLOB = auto() LONGBLOB = auto() TINYBLOB = auto() TINYTEXT = auto() NAME = auto() BINARY = auto() VARBINARY = auto() JSON = auto() JSONB = auto() TIME = auto() TIMETZ = auto() TIME_NS = auto() TIMESTAMP = auto() TIMESTAMPTZ = auto() TIMESTAMPLTZ = auto() TIMESTAMPNTZ = auto() TIMESTAMP_S = auto() TIMESTAMP_MS = auto() TIMESTAMP_NS = auto() DATETIME = auto() DATETIME2 = auto() DATETIME64 = auto() SMALLDATETIME = auto() DATE = auto() DATE32 = auto() INT4RANGE = auto() INT4MULTIRANGE = auto() INT8RANGE = auto() INT8MULTIRANGE = auto() NUMRANGE = auto() NUMMULTIRANGE = auto() TSRANGE = auto() TSMULTIRANGE = auto() TSTZRANGE = auto() TSTZMULTIRANGE = auto() DATERANGE = auto() DATEMULTIRANGE = auto() UUID = auto() GEOGRAPHY = auto() GEOGRAPHYPOINT = auto() NULLABLE = auto() GEOMETRY = auto() POINT = auto() RING = auto() LINESTRING = auto() LOCALTIME = auto() LOCALTIMESTAMP = auto() SYSTIMESTAMP = auto() MULTILINESTRING = auto() POLYGON = auto() MULTIPOLYGON = auto() HLLSKETCH = auto() HSTORE = auto() SUPER = auto() SERIAL = auto() SMALLSERIAL = auto() BIGSERIAL = auto() XML = auto() YEAR = auto() USERDEFINED = auto() MONEY = auto() SMALLMONEY = auto() ROWVERSION = auto() IMAGE = auto() VARIANT = auto() OBJECT = auto() INET = auto() IPADDRESS = auto() IPPREFIX = auto() IPV4 = auto() IPV6 = auto() ENUM = auto() ENUM8 = auto() ENUM16 = auto() FIXEDSTRING = auto() LOWCARDINALITY = auto() NESTED = auto() AGGREGATEFUNCTION = auto() SIMPLEAGGREGATEFUNCTION = auto() TDIGEST = auto() UNKNOWN = auto() VECTOR = auto() DYNAMIC = auto() VOID = auto() # keywords ALIAS = auto() ALTER = auto() ALL = auto() ANTI = auto() ANY = auto() APPLY = auto() ARRAY = auto() ASC = auto() ASOF = auto() ATTACH = auto() AUTO_INCREMENT = auto() BEGIN = auto() BETWEEN = auto() BULK_COLLECT_INTO = auto() CACHE = auto() CASE = auto() CHARACTER_SET = auto() CLUSTER_BY = auto() COLLATE = auto() COMMAND = auto() COMMENT = auto() COMMIT = auto() CONNECT_BY = auto() CONSTRAINT = auto() COPY = auto() CREATE = auto() CROSS = auto() CUBE = auto() CURRENT_DATE = auto() CURRENT_DATETIME = auto() CURRENT_SCHEMA = auto() CURRENT_TIME = auto() CURRENT_TIMESTAMP = auto() CURRENT_USER = auto() CURRENT_ROLE = auto() CURRENT_CATALOG = auto() DECLARE = auto() DEFAULT = auto() DELETE = auto() DESC = auto() DESCRIBE = auto() DETACH = auto() DICTIONARY = auto() DISTINCT = auto() DISTRIBUTE_BY = auto() DIV = auto() DROP = auto() ELSE = auto() END = auto() ESCAPE = auto() EXCEPT = auto() EXECUTE = auto() EXISTS = auto() FALSE = auto() FETCH = auto() FILE = auto() FILE_FORMAT = auto() FILTER = auto() FINAL = auto() FIRST = auto() FOR = auto() FORCE = auto() FOREIGN_KEY = auto() FORMAT = auto() FROM = auto() FULL = auto() FUNCTION = auto() GET = auto() GLOB = auto() GLOBAL = auto() GRANT = auto() GROUP_BY = auto() GROUPING_SETS = auto() HAVING = auto() HINT = auto() IGNORE = auto() ILIKE = auto() IN = auto() INDEX = auto() INDEXED_BY = auto() INNER = auto() INSERT = auto() INSTALL = auto() INTEGRATION = auto() INTERSECT = auto() INTERVAL = auto() INTO = auto() INTRODUCER = auto() IRLIKE = auto() IS = auto() ISNULL = auto() JOIN = auto() JOIN_MARKER = auto() KEEP = auto() KEY = auto() KILL = auto() LANGUAGE = auto() LATERAL = auto() LEFT = auto() LIKE = auto() LIMIT = auto() LIST = auto() LOAD = auto() LOCK = auto() MAP = auto() MATCH = auto() MATCH_CONDITION = auto() MATCH_RECOGNIZE = auto() MEMBER_OF = auto() MERGE = auto() MOD = auto() MODEL = auto() NATURAL = auto() NEXT = auto() NOTHING = auto() NOTNULL = auto() NULL = auto() OBJECT_IDENTIFIER = auto() OFFSET = auto() ON = auto() ONLY = auto() OPERATOR = auto() ORDER_BY = auto() ORDER_SIBLINGS_BY = auto() ORDERED = auto() ORDINALITY = auto() OUT = auto() INOUT = auto() OUTER = auto() OVER = auto() OVERLAPS = auto() OVERWRITE = auto() PACKAGE = auto() PARTITION = auto() PARTITION_BY = auto() PERCENT = auto() PIVOT = auto() PLACEHOLDER = auto() POLICY = auto() POOL = auto() POSITIONAL = auto() PRAGMA = auto() PREWHERE = auto() PRIMARY_KEY = auto() PROCEDURE = auto() PROPERTIES = auto() PSEUDO_TYPE = auto() PUT = auto() QUALIFY = auto() QUOTE = auto() QDCOLON = auto() RANGE = auto() RECURSIVE = auto() REFRESH = auto() RENAME = auto() REPLACE = auto() RETURNING = auto() REVOKE = auto() REFERENCES = auto() RIGHT = auto() RLIKE = auto() ROLE = auto() ROLLBACK = auto() ROLLUP = auto() ROW = auto() ROWS = auto() RULE = auto() SELECT = auto() SEMI = auto() SEPARATOR = auto() SEQUENCE = auto() SERDE_PROPERTIES = auto() SET = auto() SETTINGS = auto() SHOW = auto() SIMILAR_TO = auto() SOME = auto() SORT_BY = auto() SOUNDS_LIKE = auto() SQL_SECURITY = auto() START_WITH = auto() STORAGE_INTEGRATION = auto() STRAIGHT_JOIN = auto() STRUCT = auto() SUMMARIZE = auto() TABLE_SAMPLE = auto() TAG = auto() TEMPORARY = auto() TOP = auto() THEN = auto() TRUE = auto() TRUNCATE = auto() TRIGGER = auto() UNCACHE = auto() UNION = auto() UNNEST = auto() UNPIVOT = auto() UPDATE = auto() USE = auto() USING = auto() VALUES = auto() VARIADIC = auto() VIEW = auto() SEMANTIC_VIEW = auto() VOLATILE = auto() VOLUME = auto() WHEN = auto() WHERE = auto() WINDOW = auto() WITH = auto() UNIQUE = auto() UTC_DATE = auto() UTC_TIME = auto() UTC_TIMESTAMP = auto() VERSION_SNAPSHOT = auto() TIMESTAMP_SNAPSHOT = auto() OPTION = auto() SINK = auto() SOURCE = auto() ANALYZE = auto() NAMESPACE = auto() EXPORT = auto() # sentinels HIVE_TOKEN_STREAM = auto() SENTINEL = auto() def __str__(self) -> str: return f"TokenType.{self.name}" class Token: # mypyc doesn't expose slots _attrs: t.ClassVar[t.Tuple[str, ...]] = ( "token_type", "text", "line", "col", "start", "end", "comments", ) __slots__ = _attrs @classmethod def number(cls, number: int) -> Token: """Returns a NUMBER token with `number` as its text.""" return cls(TokenType.NUMBER, str(number)) @classmethod def string(cls, string: str) -> Token: """Returns a STRING token with `string` as its text.""" return cls(TokenType.STRING, string) @classmethod def identifier(cls, identifier: str) -> Token: """Returns an IDENTIFIER token with `identifier` as its text.""" return cls(TokenType.IDENTIFIER, identifier) @classmethod def var(cls, var: str) -> Token: """Returns an VAR token with `var` as its text.""" return cls(TokenType.VAR, var) def __init__( self, token_type: TokenType, text: str, line: int = 1, col: int = 1, start: int = 0, end: int = 0, comments: t.Optional[t.List[str]] = None, ) -> None: self.token_type = token_type self.text = text self.line = line self.col = col self.start = start self.end = end self.comments = [] if comments is None else comments def __bool__(self) -> bool: return self.token_type != TokenType.SENTINEL def __repr__(self) -> str: attributes = ", ".join( f"{k}: TokenType.{self.token_type.name}" if k == "token_type" else f"{k}: {getattr(self, k)}" for k in self._attrs ) return f"" class TokenizerCore: __slots__ = ( "sql", "size", "tokens", "_start", "_current", "_line", "_col", "_comments", "_char", "_end", "_peek", "_prev_token_line", "single_tokens", "keywords", "quotes", "format_strings", "identifiers", "comments", "string_escapes", "byte_string_escapes", "identifier_escapes", "escape_follow_chars", "commands", "command_prefix_tokens", "nested_comments", "hint_start", "tokens_preceding_hint", "bit_strings", "hex_strings", "numeric_literals", "var_single_tokens", "string_escapes_allowed_in_raw_strings", "heredoc_tag_is_identifier", "heredoc_string_alternative", "keyword_trie", "numbers_can_be_underscore_separated", "identifiers_can_start_with_digit", "unescaped_sequences", ) def __init__( self, single_tokens: t.Dict[str, TokenType], keywords: t.Dict[str, TokenType], quotes: t.Dict[str, str], format_strings: t.Dict[str, t.Tuple[str, TokenType]], identifiers: t.Dict[str, str], comments: t.Dict[str, t.Optional[str]], string_escapes: t.Set[str], byte_string_escapes: t.Set[str], identifier_escapes: t.Set[str], escape_follow_chars: t.Set[str], commands: t.Set[TokenType], command_prefix_tokens: t.Set[TokenType], nested_comments: bool, hint_start: str, tokens_preceding_hint: t.Set[TokenType], bit_strings: t.List[t.Union[str, t.Tuple[str, str]]], hex_strings: t.List[t.Union[str, t.Tuple[str, str]]], numeric_literals: t.Dict[str, str], var_single_tokens: t.Set[str], string_escapes_allowed_in_raw_strings: bool, heredoc_tag_is_identifier: bool, heredoc_string_alternative: TokenType, keyword_trie: t.Dict, numbers_can_be_underscore_separated: bool, identifiers_can_start_with_digit: bool, unescaped_sequences: t.Dict[str, str], ) -> None: self.single_tokens = single_tokens self.keywords = keywords self.quotes = quotes self.format_strings = format_strings self.identifiers = identifiers self.comments = comments self.string_escapes = string_escapes self.byte_string_escapes = byte_string_escapes self.identifier_escapes = identifier_escapes self.escape_follow_chars = escape_follow_chars self.commands = commands self.command_prefix_tokens = command_prefix_tokens self.nested_comments = nested_comments self.hint_start = hint_start self.tokens_preceding_hint = tokens_preceding_hint self.bit_strings = bit_strings self.hex_strings = hex_strings self.numeric_literals = numeric_literals self.var_single_tokens = var_single_tokens self.string_escapes_allowed_in_raw_strings = string_escapes_allowed_in_raw_strings self.heredoc_tag_is_identifier = heredoc_tag_is_identifier self.heredoc_string_alternative = heredoc_string_alternative self.keyword_trie = keyword_trie self.numbers_can_be_underscore_separated = numbers_can_be_underscore_separated self.identifiers_can_start_with_digit = identifiers_can_start_with_digit self.unescaped_sequences = unescaped_sequences self.sql = "" self.size = 0 self.tokens: t.List[Token] = [] self._start = 0 self._current = 0 self._line = 1 self._col = 0 self._comments: t.List[str] = [] self._char = "" self._end = False self._peek = "" self._prev_token_line = -1 def reset(self) -> None: self.sql = "" self.size = 0 self.tokens = [] self._start = 0 self._current = 0 self._line = 1 self._col = 0 self._comments = [] self._char = "" self._end = False self._peek = "" self._prev_token_line = -1 def tokenize(self, sql: str) -> t.List[Token]: """Returns a list of tokens corresponding to the SQL string `sql`.""" self.reset() self.sql = sql self.size = len(sql) try: self._scan() except Exception as e: start = max(self._current - 50, 0) end = min(self._current + 50, self.size - 1) context = self.sql[start:end] raise TokenError(f"Error tokenizing '{context}'") from e return self.tokens def _scan(self, check_semicolon: bool = False) -> None: identifiers = self.identifiers digit_chars = _DIGIT_CHARS while self.size and not self._end: current = self._current # Skip spaces here rather than iteratively calling advance() for performance reasons while current < self.size: char = self.sql[current] if char == " " or char == "\t": current += 1 else: break offset = current - self._current if current > self._current else 1 self._start = current self._advance(offset) if not self._char.isspace(): if self._char in digit_chars: self._scan_number() elif self._char in identifiers: self._scan_identifier(identifiers[self._char]) else: self._scan_keywords() if check_semicolon and self._peek == ";": break if self.tokens and self._comments: self.tokens[-1].comments.extend(self._comments) def _chars(self, size: int) -> str: if size == 1: return self._char start = self._current - 1 end = start + size return self.sql[start:end] if end <= self.size else "" def _advance(self, i: int = 1, alnum: bool = False) -> None: char = self._char if char == "\n" or char == "\r": # Ensures we don't count an extra line if we get a \r\n line break sequence if not (char == "\r" and self._peek == "\n"): self._col = i self._line += 1 else: self._col += i self._current += i sql = self.sql size = self.size self._end = self._current >= size self._char = sql[self._current - 1] self._peek = "" if self._end else sql[self._current] if alnum and self._char.isalnum(): # Cache to local variables instead of attributes for better performance _col = self._col _current = self._current _end = self._end _peek = self._peek while _peek.isalnum(): _col += 1 _current += 1 _end = _current >= size _peek = "" if _end else sql[_current] self._col = _col self._current = _current self._end = _end self._peek = _peek self._char = sql[_current - 1] @property def _text(self) -> str: return self.sql[self._start : self._current] def _add(self, token_type: TokenType, text: t.Optional[str] = None) -> None: self._prev_token_line = self._line if self._comments and token_type == TokenType.SEMICOLON and self.tokens: self.tokens[-1].comments.extend(self._comments) self._comments = [] if text is None: text = self.sql[self._start : self._current] self.tokens.append( Token( token_type, text=text, line=self._line, col=self._col, start=self._start, end=self._current - 1, comments=self._comments, ) ) self._comments = [] # If we have either a semicolon or a begin token before the command's token, we'll parse # whatever follows the command's token as a string if ( token_type in self.commands and self._peek != ";" and (len(self.tokens) == 1 or self.tokens[-2].token_type in self.command_prefix_tokens) ): start = self._current tokens = len(self.tokens) self._scan(check_semicolon=True) self.tokens = self.tokens[:tokens] text = self.sql[start : self._current].strip() if text: self._add(TokenType.STRING, text) def _scan_keywords(self) -> None: sql = self.sql sql_size = self.size single_tokens = self.single_tokens char_upper = _CHAR_UPPER size = 0 word = None chars = self._char char = chars prev_space = False skip = False trie = self.keyword_trie single_token = char in single_tokens while chars: if not skip: sub = trie.get(char_upper.get(char, char)) if sub is None: break trie = sub if 0 in trie: word = chars end = self._current + size size += 1 if end < sql_size: char = sql[end] single_token = single_token or char in single_tokens is_space = char.isspace() if not is_space or not prev_space: if is_space: char = " " chars += char prev_space = is_space skip = False else: skip = True else: char = "" break if word: if self._scan_string(word): return if self._scan_comment(word): return if prev_space or single_token or not char: self._advance(size - 1) word = word.upper() self._add(self.keywords[word], text=word) return if self._char in single_tokens: self._add(single_tokens[self._char], text=self._char) return self._scan_var() def _scan_comment(self, comment_start: str) -> bool: if comment_start not in self.comments: return False comment_start_line = self._line comment_start_size = len(comment_start) comment_end = self.comments[comment_start] if comment_end: # Skip the comment's start delimiter self._advance(comment_start_size) comment_count = 1 comment_end_size = len(comment_end) nested_comments = self.nested_comments while not self._end: if self._chars(comment_end_size) == comment_end: comment_count -= 1 if not comment_count: break self._advance(alnum=True) # Nested comments are allowed by some dialects, e.g. databricks, duckdb, postgres if ( nested_comments and not self._end and self._chars(comment_end_size) == comment_start ): self._advance(comment_start_size) comment_count += 1 self._comments.append(self._text[comment_start_size : -comment_end_size + 1]) self._advance(comment_end_size - 1) else: _peek = self._peek while not self._end and _peek != "\n" and _peek != "\r": self._advance(alnum=True) _peek = self._peek self._comments.append(self._text[comment_start_size:]) if ( comment_start == self.hint_start and self.tokens and self.tokens[-1].token_type in self.tokens_preceding_hint ): self._add(TokenType.HINT) # Leading comment is attached to the succeeding token, whilst trailing comment to the preceding. # Multiple consecutive comments are preserved by appending them to the current comments list. if comment_start_line == self._prev_token_line: self.tokens[-1].comments.extend(self._comments) self._comments = [] self._prev_token_line = self._line return True def _scan_number(self) -> None: if self._char == "0": peek = _CHAR_UPPER.get(self._peek, self._peek) if peek == "B": return self._scan_bits() if self.bit_strings else self._add(TokenType.NUMBER) elif peek == "X": return self._scan_hex() if self.hex_strings else self._add(TokenType.NUMBER) decimal = False scientific = 0 numbers_can_be_underscore_separated = self.numbers_can_be_underscore_separated single_tokens = self.single_tokens keywords = self.keywords numeric_literals = self.numeric_literals identifiers_can_start_with_digit = self.identifiers_can_start_with_digit is_underscore_separated: bool = False number_text: str = "" numeric_literal: str = "" numeric_type: t.Optional[TokenType] = None while True: if self._peek in _DIGIT_CHARS: # Batch consecutive digits: scan ahead to find how many sql = self.sql end = self._current + 1 size = self.size while end < size and sql[end] in _DIGIT_CHARS: end += 1 self._advance(end - self._current) elif self._peek == "." and not decimal: if self.tokens and self.tokens[-1].token_type == TokenType.PARAMETER: break decimal = True self._advance() elif self._peek in ("-", "+") and scientific == 1: # Only consume +/- if followed by a digit if self._current + 1 < self.size and self.sql[self._current + 1] in _DIGIT_CHARS: scientific += 1 self._advance() else: break elif _CHAR_UPPER.get(self._peek, self._peek) == "E" and not scientific: scientific += 1 self._advance() elif self._peek == "_" and numbers_can_be_underscore_separated: is_underscore_separated = True self._advance() elif self._peek.isidentifier(): number_text = self._text while self._peek and not self._peek.isspace() and self._peek not in single_tokens: numeric_literal += self._peek self._advance() numeric_type = keywords.get(numeric_literals.get(numeric_literal.upper(), "")) if numeric_type: break elif identifiers_can_start_with_digit: return self._add(TokenType.VAR) self._advance(-len(numeric_literal)) break else: break number_text = number_text or self.sql[self._start : self._current] # Normalize inputs such as 100_000 to 100000 if is_underscore_separated: number_text = number_text.replace("_", "") self._add(TokenType.NUMBER, number_text) # Normalize inputs such as 123L to 123::BIGINT so that they're parsed as casts if numeric_type: self._add(TokenType.DCOLON, "::") self._add(numeric_type, numeric_literal) def _scan_bits(self) -> None: self._advance() value = self._extract_value() try: # If `value` can't be converted to a binary, fallback to tokenizing it as an identifier int(value, 2) self._add(TokenType.BIT_STRING, value[2:]) # Drop the 0b except ValueError: self._add(TokenType.IDENTIFIER) def _scan_hex(self) -> None: self._advance() value = self._extract_value() try: # If `value` can't be converted to a hex, fallback to tokenizing it as an identifier int(value, 16) self._add(TokenType.HEX_STRING, value[2:]) # Drop the 0x except ValueError: self._add(TokenType.IDENTIFIER) def _extract_value(self) -> str: single_tokens = self.single_tokens while True: char = self._peek.strip() if char and char not in single_tokens: self._advance(alnum=True) else: break return self._text def _scan_string(self, start: str) -> bool: base = None token_type = TokenType.STRING if start in self.quotes: end = self.quotes[start] elif start in self.format_strings: end, token_type = self.format_strings[start] if token_type == TokenType.HEX_STRING: base = 16 elif token_type == TokenType.BIT_STRING: base = 2 elif token_type == TokenType.HEREDOC_STRING: self._advance() if self._char == end: tag = "" else: tag = self._extract_string( end, raw_string=True, raise_unmatched=not self.heredoc_tag_is_identifier, ) if ( tag and self.heredoc_tag_is_identifier and (self._end or tag.isdigit() or any(c.isspace() for c in tag)) ): if not self._end: self._advance(-1) self._advance(-len(tag)) self._add(self.heredoc_string_alternative) return True end = f"{start}{tag}{end}" else: return False self._advance(len(start)) text = self._extract_string( end, escapes=( self.byte_string_escapes if token_type == TokenType.BYTE_STRING else self.string_escapes ), raw_string=token_type == TokenType.RAW_STRING, ) if base and text: try: int(text, base) except Exception: raise TokenError( f"Numeric string contains invalid characters from {self._line}:{self._start}" ) self._add(token_type, text) return True def _scan_identifier(self, identifier_end: str) -> None: self._advance() text = self._extract_string( identifier_end, escapes=self.identifier_escapes | {identifier_end} ) self._add(TokenType.IDENTIFIER, text) def _scan_var(self) -> None: var_single_tokens = self.var_single_tokens single_tokens = self.single_tokens while True: peek = self._peek if not peek or peek.isspace(): break if peek not in var_single_tokens and peek in single_tokens: break self._advance(alnum=True) self._add( TokenType.VAR if self.tokens and self.tokens[-1].token_type == TokenType.PARAMETER else self.keywords.get(self.sql[self._start : self._current].upper(), TokenType.VAR) ) def _extract_string( self, delimiter: str, escapes: t.Optional[t.Set[str]] = None, raw_string: bool = False, raise_unmatched: bool = True, ) -> str: text = "" delim_size = len(delimiter) escapes = self.string_escapes if escapes is None else escapes unescaped_sequences = self.unescaped_sequences escape_follow_chars = self.escape_follow_chars string_escapes_allowed_in_raw_strings = self.string_escapes_allowed_in_raw_strings quotes = self.quotes sql = self.sql # use str.find() when the string is simple... no \ or other escapes if delim_size == 1: pos = self._current - 1 end = sql.find(delimiter, pos) if ( # the closing delimiter was found end != -1 # there's no doubled delimiter (e.g. '' escape), or the delimiter isn't an escape char and (end + 1 >= self.size or sql[end + 1] != delimiter or delimiter not in escapes) # no backslash in the string that would need escape processing and (not (unescaped_sequences or "\\" in escapes) or sql.find("\\", pos, end) == -1) ): newlines = sql.count("\n", pos, end) if newlines: self._line += newlines self._col = end - sql.rfind("\n", pos, end) else: self._col += end - pos self._current = end + 1 self._end = self._current >= self.size self._char = sql[end] self._peek = "" if self._end else sql[self._current] return sql[pos:end] while True: if not raw_string and unescaped_sequences and self._peek and self._char in escapes: unescaped_sequence = unescaped_sequences.get(self._char + self._peek) if unescaped_sequence: self._advance(2) text += unescaped_sequence continue is_valid_custom_escape = ( escape_follow_chars and self._char == "\\" and self._peek not in escape_follow_chars ) if ( (string_escapes_allowed_in_raw_strings or not raw_string) and self._char in escapes and (self._peek == delimiter or self._peek in escapes or is_valid_custom_escape) and (self._char not in quotes or self._char == self._peek) ): if self._peek == delimiter: text += self._peek elif is_valid_custom_escape and self._char != self._peek: text += self._peek else: text += self._char + self._peek if self._current + 1 < self.size: self._advance(2) else: raise TokenError(f"Missing {delimiter} from {self._line}:{self._current}") else: if self._chars(delim_size) == delimiter: if delim_size > 1: self._advance(delim_size - 1) break if self._end: if not raise_unmatched: return text + self._char raise TokenError(f"Missing {delimiter} from {self._line}:{self._start}") current = self._current - 1 self._advance(alnum=True) text += sql[current : self._current - 1] return text ================================================ FILE: sqlglot/tokens.py ================================================ from __future__ import annotations import typing as t from sqlglot.trie import new_trie # Import Token and TokenType from tokenizer_core (compiled with mypyc) from sqlglot.tokenizer_core import Token, TokenType try: import sqlglotc # noqa: F401 except ImportError: pass try: import sqlglotrs # type: ignore # noqa: F401 import warnings if "sqlglotc" not in globals(): warnings.warn( "sqlglot[rs] is deprecated and no longer compatible with sqlglot. " "Please use sqlglotc instead for faster parsing: pip install sqlglot[c]", ) except ImportError: pass if t.TYPE_CHECKING: from sqlglot.dialects.dialect import DialectType def _convert_quotes(arr: t.List[str | t.Tuple[str, str]]) -> t.Dict[str, str]: return dict((item, item) if isinstance(item, str) else (item[0], item[1]) for item in arr) def _quotes_to_format( token_type: TokenType, arr: t.List[str | t.Tuple[str, str]] ) -> t.Dict[str, t.Tuple[str, TokenType]]: return {k: (v, token_type) for k, v in _convert_quotes(arr).items()} class _TokenizerBase: QUOTES: t.ClassVar[t.List[t.Tuple[str, str] | str]] IDENTIFIERS: t.ClassVar[t.List[str | t.Tuple[str, str]]] BIT_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] BYTE_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] HEX_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] RAW_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] HEREDOC_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] UNICODE_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] STRING_ESCAPES: t.ClassVar[t.List[str]] BYTE_STRING_ESCAPES: t.ClassVar[t.List[str]] ESCAPE_FOLLOW_CHARS: t.ClassVar[t.List[str]] IDENTIFIER_ESCAPES: t.ClassVar[t.List[str]] HINT_START: t.ClassVar[str] KEYWORDS: t.ClassVar[t.Dict[str, TokenType]] SINGLE_TOKENS: t.ClassVar[t.Dict[str, TokenType]] NUMERIC_LITERALS: t.ClassVar[t.Dict[str, str]] VAR_SINGLE_TOKENS: t.ClassVar[t.Set[str]] COMMANDS: t.ClassVar[t.Set[TokenType]] COMMAND_PREFIX_TOKENS: t.ClassVar[t.Set[TokenType]] HEREDOC_TAG_IS_IDENTIFIER: t.ClassVar[bool] STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS: t.ClassVar[bool] NESTED_COMMENTS: t.ClassVar[bool] TOKENS_PRECEDING_HINT: t.ClassVar[t.Set[TokenType]] HEREDOC_STRING_ALTERNATIVE: t.ClassVar[TokenType] COMMENTS: t.ClassVar[t.List[str | t.Tuple[str, str]]] _QUOTES: t.ClassVar[t.Dict[str, str]] _IDENTIFIERS: t.ClassVar[t.Dict[str, str]] _FORMAT_STRINGS: t.ClassVar[t.Dict[str, t.Tuple[str, TokenType]]] _STRING_ESCAPES: t.ClassVar[t.Set[str]] _BYTE_STRING_ESCAPES: t.ClassVar[t.Set[str]] _ESCAPE_FOLLOW_CHARS: t.ClassVar[t.Set[str]] _IDENTIFIER_ESCAPES: t.ClassVar[t.Set[str]] _COMMENTS: t.ClassVar[t.Dict[str, t.Optional[str]]] _KEYWORD_TRIE: t.ClassVar[t.Dict] @classmethod def __init_subclass__(cls, **kwargs: t.Any) -> None: super().__init_subclass__(**kwargs) cls._QUOTES = _convert_quotes(cls.QUOTES) cls._IDENTIFIERS = _convert_quotes(cls.IDENTIFIERS) cls._FORMAT_STRINGS = { **{ p + s: (e, TokenType.NATIONAL_STRING) for s, e in cls._QUOTES.items() for p in ("n", "N") }, **_quotes_to_format(TokenType.BIT_STRING, cls.BIT_STRINGS), **_quotes_to_format(TokenType.BYTE_STRING, cls.BYTE_STRINGS), **_quotes_to_format(TokenType.HEX_STRING, cls.HEX_STRINGS), **_quotes_to_format(TokenType.RAW_STRING, cls.RAW_STRINGS), **_quotes_to_format(TokenType.HEREDOC_STRING, cls.HEREDOC_STRINGS), **_quotes_to_format(TokenType.UNICODE_STRING, cls.UNICODE_STRINGS), } if "BYTE_STRING_ESCAPES" not in cls.__dict__: cls.BYTE_STRING_ESCAPES = cls.STRING_ESCAPES.copy() cls._STRING_ESCAPES = set(cls.STRING_ESCAPES) cls._BYTE_STRING_ESCAPES = set(cls.BYTE_STRING_ESCAPES) cls._ESCAPE_FOLLOW_CHARS = set(cls.ESCAPE_FOLLOW_CHARS) cls._IDENTIFIER_ESCAPES = set(cls.IDENTIFIER_ESCAPES) cls._COMMENTS = { **{c: None for c in cls.COMMENTS if isinstance(c, str)}, **{c[0]: c[1] for c in cls.COMMENTS if not isinstance(c, str)}, "{#": "#}", # Ensure Jinja comments are tokenized correctly in all dialects } if cls.HINT_START in cls.KEYWORDS: cls._COMMENTS[cls.HINT_START] = "*/" cls._KEYWORD_TRIE = new_trie( key.upper() for key in ( *cls.KEYWORDS, *cls._COMMENTS, *cls._QUOTES, *cls._FORMAT_STRINGS, ) if " " in key or any(single in key for single in cls.SINGLE_TOKENS) ) class Tokenizer(_TokenizerBase): SINGLE_TOKENS = { "(": TokenType.L_PAREN, ")": TokenType.R_PAREN, "[": TokenType.L_BRACKET, "]": TokenType.R_BRACKET, "{": TokenType.L_BRACE, "}": TokenType.R_BRACE, "&": TokenType.AMP, "^": TokenType.CARET, ":": TokenType.COLON, ",": TokenType.COMMA, ".": TokenType.DOT, "-": TokenType.DASH, "=": TokenType.EQ, ">": TokenType.GT, "<": TokenType.LT, "%": TokenType.MOD, "!": TokenType.NOT, "|": TokenType.PIPE, "+": TokenType.PLUS, ";": TokenType.SEMICOLON, "/": TokenType.SLASH, "\\": TokenType.BACKSLASH, "*": TokenType.STAR, "~": TokenType.TILDE, "?": TokenType.PLACEHOLDER, "@": TokenType.PARAMETER, "#": TokenType.HASH, # Used for breaking a var like x'y' but nothing else the token type doesn't matter "'": TokenType.UNKNOWN, "`": TokenType.UNKNOWN, '"': TokenType.UNKNOWN, } BIT_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] = [] BYTE_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] = [] HEX_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] = [] RAW_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] = [] HEREDOC_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] = [] UNICODE_STRINGS: t.ClassVar[t.List[str | t.Tuple[str, str]]] = [] IDENTIFIERS: t.ClassVar[t.List[str | t.Tuple[str, str]]] = ['"'] QUOTES: t.ClassVar[t.List[t.Tuple[str, str] | str]] = ["'"] STRING_ESCAPES = ["'"] BYTE_STRING_ESCAPES: t.ClassVar[t.List[str]] = [] VAR_SINGLE_TOKENS: t.ClassVar[t.Set[str]] = set() ESCAPE_FOLLOW_CHARS: t.ClassVar[t.List[str]] = [] # The strings in this list can always be used as escapes, regardless of the surrounding # identifier delimiters. By default, the closing delimiter is assumed to also act as an # identifier escape, e.g. if we use double-quotes, then they also act as escapes: "x""" IDENTIFIER_ESCAPES: t.ClassVar[t.List[str]] = [] # Whether the heredoc tags follow the same lexical rules as unquoted identifiers HEREDOC_TAG_IS_IDENTIFIER = False # Token that we'll generate as a fallback if the heredoc prefix doesn't correspond to a heredoc HEREDOC_STRING_ALTERNATIVE = TokenType.VAR # Whether string escape characters function as such when placed within raw strings STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS = True NESTED_COMMENTS = True HINT_START = "/*+" TOKENS_PRECEDING_HINT = {TokenType.SELECT, TokenType.INSERT, TokenType.UPDATE, TokenType.DELETE} # Autofilled _COMMENTS: t.ClassVar[t.Dict[str, t.Optional[str]]] = {} _FORMAT_STRINGS: t.ClassVar[t.Dict[str, t.Tuple[str, TokenType]]] = {} _IDENTIFIERS: t.ClassVar[t.Dict[str, str]] = {} _IDENTIFIER_ESCAPES: t.ClassVar[t.Set[str]] = set() _QUOTES: t.ClassVar[t.Dict[str, str]] = {} _STRING_ESCAPES: t.ClassVar[t.Set[str]] = set() _BYTE_STRING_ESCAPES: t.ClassVar[t.Set[str]] = set() _KEYWORD_TRIE: t.ClassVar[t.Dict] = {} _ESCAPE_FOLLOW_CHARS: t.ClassVar[t.Set[str]] = set() KEYWORDS: t.ClassVar[t.Dict[str, TokenType]] = { **{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")}, **{f"{prefix}%}}": TokenType.BLOCK_END for prefix in ("", "+", "-")}, **{f"{{{{{postfix}": TokenType.BLOCK_START for postfix in ("+", "-")}, **{f"{prefix}}}}}": TokenType.BLOCK_END for prefix in ("+", "-")}, HINT_START: TokenType.HINT, "&<": TokenType.AMP_LT, "&>": TokenType.AMP_GT, "==": TokenType.EQ, "::": TokenType.DCOLON, "?::": TokenType.QDCOLON, "||": TokenType.DPIPE, "|>": TokenType.PIPE_GT, ">=": TokenType.GTE, "<=": TokenType.LTE, "<>": TokenType.NEQ, "!=": TokenType.NEQ, ":=": TokenType.COLON_EQ, "<=>": TokenType.NULLSAFE_EQ, "->": TokenType.ARROW, "->>": TokenType.DARROW, "=>": TokenType.FARROW, "#>": TokenType.HASH_ARROW, "#>>": TokenType.DHASH_ARROW, "<->": TokenType.LR_ARROW, "&&": TokenType.DAMP, "??": TokenType.DQMARK, "~~~": TokenType.GLOB, "~~": TokenType.LIKE, "~~*": TokenType.ILIKE, "~*": TokenType.IRLIKE, "-|-": TokenType.ADJACENT, "ALL": TokenType.ALL, "AND": TokenType.AND, "ANTI": TokenType.ANTI, "ANY": TokenType.ANY, "ASC": TokenType.ASC, "AS": TokenType.ALIAS, "ASOF": TokenType.ASOF, "AUTOINCREMENT": TokenType.AUTO_INCREMENT, "AUTO_INCREMENT": TokenType.AUTO_INCREMENT, "BEGIN": TokenType.BEGIN, "BETWEEN": TokenType.BETWEEN, "CACHE": TokenType.CACHE, "UNCACHE": TokenType.UNCACHE, "CASE": TokenType.CASE, "CHARACTER SET": TokenType.CHARACTER_SET, "CLUSTER BY": TokenType.CLUSTER_BY, "COLLATE": TokenType.COLLATE, "COLUMN": TokenType.COLUMN, "COMMIT": TokenType.COMMIT, "CONNECT BY": TokenType.CONNECT_BY, "CONSTRAINT": TokenType.CONSTRAINT, "COPY": TokenType.COPY, "CREATE": TokenType.CREATE, "CROSS": TokenType.CROSS, "CUBE": TokenType.CUBE, "CURRENT_DATE": TokenType.CURRENT_DATE, "CURRENT_SCHEMA": TokenType.CURRENT_SCHEMA, "CURRENT_TIME": TokenType.CURRENT_TIME, "CURRENT_TIMESTAMP": TokenType.CURRENT_TIMESTAMP, "CURRENT_USER": TokenType.CURRENT_USER, "CURRENT_CATALOG": TokenType.CURRENT_CATALOG, "DATABASE": TokenType.DATABASE, "DEFAULT": TokenType.DEFAULT, "DELETE": TokenType.DELETE, "DESC": TokenType.DESC, "DESCRIBE": TokenType.DESCRIBE, "DISTINCT": TokenType.DISTINCT, "DISTRIBUTE BY": TokenType.DISTRIBUTE_BY, "DIV": TokenType.DIV, "DROP": TokenType.DROP, "ELSE": TokenType.ELSE, "END": TokenType.END, "ENUM": TokenType.ENUM, "ESCAPE": TokenType.ESCAPE, "EXCEPT": TokenType.EXCEPT, "EXECUTE": TokenType.EXECUTE, "EXISTS": TokenType.EXISTS, "FALSE": TokenType.FALSE, "FETCH": TokenType.FETCH, "FILTER": TokenType.FILTER, "FILE": TokenType.FILE, "FIRST": TokenType.FIRST, "FULL": TokenType.FULL, "FUNCTION": TokenType.FUNCTION, "FOR": TokenType.FOR, "FOREIGN KEY": TokenType.FOREIGN_KEY, "FORMAT": TokenType.FORMAT, "FROM": TokenType.FROM, "GEOGRAPHY": TokenType.GEOGRAPHY, "GEOMETRY": TokenType.GEOMETRY, "GLOB": TokenType.GLOB, "GROUP BY": TokenType.GROUP_BY, "GROUPING SETS": TokenType.GROUPING_SETS, "HAVING": TokenType.HAVING, "ILIKE": TokenType.ILIKE, "IN": TokenType.IN, "INDEX": TokenType.INDEX, "INET": TokenType.INET, "INNER": TokenType.INNER, "INSERT": TokenType.INSERT, "INTERVAL": TokenType.INTERVAL, "INTERSECT": TokenType.INTERSECT, "INTO": TokenType.INTO, "IS": TokenType.IS, "ISNULL": TokenType.ISNULL, "JOIN": TokenType.JOIN, "KEEP": TokenType.KEEP, "KILL": TokenType.KILL, "LATERAL": TokenType.LATERAL, "LEFT": TokenType.LEFT, "LIKE": TokenType.LIKE, "LIMIT": TokenType.LIMIT, "LOAD": TokenType.LOAD, "LOCALTIME": TokenType.LOCALTIME, "LOCALTIMESTAMP": TokenType.LOCALTIMESTAMP, "LOCK": TokenType.LOCK, "MERGE": TokenType.MERGE, "NAMESPACE": TokenType.NAMESPACE, "NATURAL": TokenType.NATURAL, "NEXT": TokenType.NEXT, "NOT": TokenType.NOT, "NOTNULL": TokenType.NOTNULL, "NULL": TokenType.NULL, "OBJECT": TokenType.OBJECT, "OFFSET": TokenType.OFFSET, "ON": TokenType.ON, "OR": TokenType.OR, "XOR": TokenType.XOR, "ORDER BY": TokenType.ORDER_BY, "ORDINALITY": TokenType.ORDINALITY, "OUT": TokenType.OUT, "OUTER": TokenType.OUTER, "OVER": TokenType.OVER, "OVERLAPS": TokenType.OVERLAPS, "OVERWRITE": TokenType.OVERWRITE, "PARTITION": TokenType.PARTITION, "PARTITION BY": TokenType.PARTITION_BY, "PARTITIONED BY": TokenType.PARTITION_BY, "PARTITIONED_BY": TokenType.PARTITION_BY, "PERCENT": TokenType.PERCENT, "PIVOT": TokenType.PIVOT, "PRAGMA": TokenType.PRAGMA, "PRIMARY KEY": TokenType.PRIMARY_KEY, "PROCEDURE": TokenType.PROCEDURE, "OPERATOR": TokenType.OPERATOR, "QUALIFY": TokenType.QUALIFY, "RANGE": TokenType.RANGE, "RECURSIVE": TokenType.RECURSIVE, "REGEXP": TokenType.RLIKE, "RENAME": TokenType.RENAME, "REPLACE": TokenType.REPLACE, "RETURNING": TokenType.RETURNING, "REFERENCES": TokenType.REFERENCES, "RIGHT": TokenType.RIGHT, "RLIKE": TokenType.RLIKE, "ROLLBACK": TokenType.ROLLBACK, "ROLLUP": TokenType.ROLLUP, "ROW": TokenType.ROW, "ROWS": TokenType.ROWS, "SCHEMA": TokenType.SCHEMA, "SELECT": TokenType.SELECT, "SEMI": TokenType.SEMI, "SESSION": TokenType.SESSION, "SESSION_USER": TokenType.SESSION_USER, "SET": TokenType.SET, "SETTINGS": TokenType.SETTINGS, "SHOW": TokenType.SHOW, "SIMILAR TO": TokenType.SIMILAR_TO, "SOME": TokenType.SOME, "SORT BY": TokenType.SORT_BY, "SQL SECURITY": TokenType.SQL_SECURITY, "START WITH": TokenType.START_WITH, "STRAIGHT_JOIN": TokenType.STRAIGHT_JOIN, "TABLE": TokenType.TABLE, "TABLESAMPLE": TokenType.TABLE_SAMPLE, "TEMP": TokenType.TEMPORARY, "TEMPORARY": TokenType.TEMPORARY, "THEN": TokenType.THEN, "TRUE": TokenType.TRUE, "TRUNCATE": TokenType.TRUNCATE, "TRIGGER": TokenType.TRIGGER, "UNION": TokenType.UNION, "UNKNOWN": TokenType.UNKNOWN, "UNNEST": TokenType.UNNEST, "UNPIVOT": TokenType.UNPIVOT, "UPDATE": TokenType.UPDATE, "USE": TokenType.USE, "USING": TokenType.USING, "UUID": TokenType.UUID, "VALUES": TokenType.VALUES, "VIEW": TokenType.VIEW, "VOLATILE": TokenType.VOLATILE, "WHEN": TokenType.WHEN, "WHERE": TokenType.WHERE, "WINDOW": TokenType.WINDOW, "WITH": TokenType.WITH, "APPLY": TokenType.APPLY, "ARRAY": TokenType.ARRAY, "BIT": TokenType.BIT, "BOOL": TokenType.BOOLEAN, "BOOLEAN": TokenType.BOOLEAN, "BYTE": TokenType.TINYINT, "MEDIUMINT": TokenType.MEDIUMINT, "INT1": TokenType.TINYINT, "TINYINT": TokenType.TINYINT, "INT16": TokenType.SMALLINT, "SHORT": TokenType.SMALLINT, "SMALLINT": TokenType.SMALLINT, "HUGEINT": TokenType.INT128, "UHUGEINT": TokenType.UINT128, "INT2": TokenType.SMALLINT, "INTEGER": TokenType.INT, "INT": TokenType.INT, "INT4": TokenType.INT, "INT32": TokenType.INT, "INT64": TokenType.BIGINT, "INT128": TokenType.INT128, "INT256": TokenType.INT256, "LONG": TokenType.BIGINT, "BIGINT": TokenType.BIGINT, "INT8": TokenType.TINYINT, "UINT": TokenType.UINT, "UINT128": TokenType.UINT128, "UINT256": TokenType.UINT256, "DEC": TokenType.DECIMAL, "DECIMAL": TokenType.DECIMAL, "DECIMAL32": TokenType.DECIMAL32, "DECIMAL64": TokenType.DECIMAL64, "DECIMAL128": TokenType.DECIMAL128, "DECIMAL256": TokenType.DECIMAL256, "DECFLOAT": TokenType.DECFLOAT, "BIGDECIMAL": TokenType.BIGDECIMAL, "BIGNUMERIC": TokenType.BIGDECIMAL, "BIGNUM": TokenType.BIGNUM, "LIST": TokenType.LIST, "MAP": TokenType.MAP, "NULLABLE": TokenType.NULLABLE, "NUMBER": TokenType.DECIMAL, "NUMERIC": TokenType.DECIMAL, "FIXED": TokenType.DECIMAL, "REAL": TokenType.FLOAT, "FLOAT": TokenType.FLOAT, "FLOAT4": TokenType.FLOAT, "FLOAT8": TokenType.DOUBLE, "DOUBLE": TokenType.DOUBLE, "DOUBLE PRECISION": TokenType.DOUBLE, "JSON": TokenType.JSON, "JSONB": TokenType.JSONB, "CHAR": TokenType.CHAR, "CHARACTER": TokenType.CHAR, "CHAR VARYING": TokenType.VARCHAR, "CHARACTER VARYING": TokenType.VARCHAR, "NCHAR": TokenType.NCHAR, "VARCHAR": TokenType.VARCHAR, "VARCHAR2": TokenType.VARCHAR, "NVARCHAR": TokenType.NVARCHAR, "NVARCHAR2": TokenType.NVARCHAR, "BPCHAR": TokenType.BPCHAR, "STR": TokenType.TEXT, "STRING": TokenType.TEXT, "TEXT": TokenType.TEXT, "LONGTEXT": TokenType.LONGTEXT, "MEDIUMTEXT": TokenType.MEDIUMTEXT, "TINYTEXT": TokenType.TINYTEXT, "CLOB": TokenType.TEXT, "LONGVARCHAR": TokenType.TEXT, "BINARY": TokenType.BINARY, "BLOB": TokenType.VARBINARY, "LONGBLOB": TokenType.LONGBLOB, "MEDIUMBLOB": TokenType.MEDIUMBLOB, "TINYBLOB": TokenType.TINYBLOB, "BYTEA": TokenType.VARBINARY, "VARBINARY": TokenType.VARBINARY, "TIME": TokenType.TIME, "TIMETZ": TokenType.TIMETZ, "TIME_NS": TokenType.TIME_NS, "TIMESTAMP": TokenType.TIMESTAMP, "TIMESTAMPTZ": TokenType.TIMESTAMPTZ, "TIMESTAMPLTZ": TokenType.TIMESTAMPLTZ, "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, "TIMESTAMPNTZ": TokenType.TIMESTAMPNTZ, "TIMESTAMP_NTZ": TokenType.TIMESTAMPNTZ, "DATE": TokenType.DATE, "DATETIME": TokenType.DATETIME, "INT4RANGE": TokenType.INT4RANGE, "INT4MULTIRANGE": TokenType.INT4MULTIRANGE, "INT8RANGE": TokenType.INT8RANGE, "INT8MULTIRANGE": TokenType.INT8MULTIRANGE, "NUMRANGE": TokenType.NUMRANGE, "NUMMULTIRANGE": TokenType.NUMMULTIRANGE, "TSRANGE": TokenType.TSRANGE, "TSMULTIRANGE": TokenType.TSMULTIRANGE, "TSTZRANGE": TokenType.TSTZRANGE, "TSTZMULTIRANGE": TokenType.TSTZMULTIRANGE, "DATERANGE": TokenType.DATERANGE, "DATEMULTIRANGE": TokenType.DATEMULTIRANGE, "UNIQUE": TokenType.UNIQUE, "VECTOR": TokenType.VECTOR, "STRUCT": TokenType.STRUCT, "SEQUENCE": TokenType.SEQUENCE, "VARIANT": TokenType.VARIANT, "ALTER": TokenType.ALTER, "ANALYZE": TokenType.ANALYZE, "CALL": TokenType.COMMAND, "COMMENT": TokenType.COMMENT, "EXPLAIN": TokenType.COMMAND, "GRANT": TokenType.GRANT, "REVOKE": TokenType.REVOKE, "OPTIMIZE": TokenType.COMMAND, "PREPARE": TokenType.COMMAND, "VACUUM": TokenType.COMMAND, "USER-DEFINED": TokenType.USERDEFINED, "FOR VERSION": TokenType.VERSION_SNAPSHOT, "FOR TIMESTAMP": TokenType.TIMESTAMP_SNAPSHOT, } COMMANDS = { TokenType.COMMAND, TokenType.EXECUTE, TokenType.FETCH, TokenType.SHOW, TokenType.RENAME, } COMMAND_PREFIX_TOKENS = {TokenType.SEMICOLON, TokenType.BEGIN} # Handle numeric literals like in hive (3L = BIGINT) NUMERIC_LITERALS: t.ClassVar[t.Dict[str, str]] = {} COMMENTS = ["--", ("/*", "*/")] __slots__ = ( "dialect", "_core", ) def __init__( self, dialect: DialectType = None, **opts: t.Any, ) -> None: from sqlglot.dialects import Dialect from sqlglot.tokenizer_core import TokenizerCore as _TokenizerCore self.dialect = Dialect.get_or_raise(dialect) self._core = _TokenizerCore( single_tokens=self.SINGLE_TOKENS, keywords=self.KEYWORDS, quotes=self._QUOTES, format_strings=self._FORMAT_STRINGS, identifiers=self._IDENTIFIERS, comments=self._COMMENTS, string_escapes=self._STRING_ESCAPES, byte_string_escapes=self._BYTE_STRING_ESCAPES, identifier_escapes=self._IDENTIFIER_ESCAPES, escape_follow_chars=self._ESCAPE_FOLLOW_CHARS, commands=self.COMMANDS, command_prefix_tokens=self.COMMAND_PREFIX_TOKENS, nested_comments=self.NESTED_COMMENTS, hint_start=self.HINT_START, tokens_preceding_hint=self.TOKENS_PRECEDING_HINT, bit_strings=list(self.BIT_STRINGS), hex_strings=list(self.HEX_STRINGS), numeric_literals=self.NUMERIC_LITERALS, var_single_tokens=self.VAR_SINGLE_TOKENS, string_escapes_allowed_in_raw_strings=self.STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS, heredoc_tag_is_identifier=self.HEREDOC_TAG_IS_IDENTIFIER, heredoc_string_alternative=self.HEREDOC_STRING_ALTERNATIVE, keyword_trie=self._KEYWORD_TRIE, numbers_can_be_underscore_separated=self.dialect.NUMBERS_CAN_BE_UNDERSCORE_SEPARATED, identifiers_can_start_with_digit=self.dialect.IDENTIFIERS_CAN_START_WITH_DIGIT, unescaped_sequences=self.dialect.UNESCAPED_SEQUENCES, ) def tokenize(self, sql: str) -> t.List[Token]: """Returns a list of tokens corresponding to the SQL string `sql`.""" return self._core.tokenize(sql) # type: ignore @property def sql(self) -> str: """The SQL string being tokenized.""" return self._core.sql @property def size(self) -> int: """Length of the SQL string.""" return self._core.size @property def tokens(self) -> t.List[Token]: """The list of tokens produced by tokenization.""" return self._core.tokens ================================================ FILE: sqlglot/transforms.py ================================================ from __future__ import annotations import typing as t from sqlglot import expressions as exp from sqlglot.errors import UnsupportedError from sqlglot.helper import find_new_name, name_sequence, seq_get if t.TYPE_CHECKING: from sqlglot._typing import E from sqlglot.generator import Generator def preprocess( transforms: t.List[t.Callable[[exp.Expr], exp.Expr]], generator: t.Optional[t.Callable[[Generator, exp.Expr], str]] = None, ) -> t.Callable[[Generator, exp.Expr], str]: """ Creates a new transform by chaining a sequence of transformations and converts the resulting expression to SQL, using either the "_sql" method corresponding to the resulting expression, or the appropriate `Generator.TRANSFORMS` function (when applicable -- see below). Args: transforms: sequence of transform functions. These will be called in order. Returns: Function that can be used as a generator transform. """ def _to_sql(self, expression: exp.Expr) -> str: expression_type = type(expression) try: expression = transforms[0](expression) for transform in transforms[1:]: expression = transform(expression) except UnsupportedError as unsupported_error: self.unsupported(str(unsupported_error)) if generator: return generator(self, expression) _sql_handler = getattr(self, expression.key + "_sql", None) if _sql_handler: return _sql_handler(expression) transforms_handler = self.TRANSFORMS.get(type(expression)) if transforms_handler: if expression_type is type(expression): if isinstance(expression, exp.Func): return self.function_fallback_sql(expression) # Ensures we don't enter an infinite loop. This can happen when the original expression # has the same type as the final expression and there's no _sql method available for it, # because then it'd re-enter _to_sql. raise ValueError( f"Expr type {expression.__class__.__name__} requires a _sql method in order to be transformed." ) return transforms_handler(self, expression) raise ValueError(f"Unsupported expression type {expression.__class__.__name__}.") return _to_sql def unnest_generate_date_array_using_recursive_cte(expression: exp.Expr) -> exp.Expr: if isinstance(expression, exp.Select): count = 0 recursive_ctes = [] for unnest in expression.find_all(exp.Unnest): if ( not isinstance(unnest.parent, (exp.From, exp.Join)) or len(unnest.expressions) != 1 or not isinstance(unnest.expressions[0], exp.GenerateDateArray) ): continue generate_date_array = unnest.expressions[0] start = generate_date_array.args.get("start") end = generate_date_array.args.get("end") step = generate_date_array.args.get("step") if not start or not end or not isinstance(step, exp.Interval): continue alias = unnest.args.get("alias") column_name = alias.columns[0] if isinstance(alias, exp.TableAlias) else "date_value" start = exp.cast(start, "date") date_add = exp.func( "date_add", column_name, exp.Literal.number(step.name), step.args.get("unit") ) cast_date_add = exp.cast(date_add, "date") cte_name = "_generated_dates" + (f"_{count}" if count else "") base_query = exp.select(start.as_(column_name)) recursive_query = ( exp.select(cast_date_add) .from_(cte_name) .where(cast_date_add <= exp.cast(end, "date")) ) cte_query = base_query.union(recursive_query, distinct=False) generate_dates_query = exp.select(column_name).from_(cte_name) unnest.replace(generate_dates_query.subquery(cte_name)) recursive_ctes.append( exp.alias_(exp.CTE(this=cte_query), cte_name, table=[column_name]) ) count += 1 if recursive_ctes: with_expression = expression.args.get("with_") or exp.With() with_expression.set("recursive", True) with_expression.set("expressions", [*recursive_ctes, *with_expression.expressions]) expression.set("with_", with_expression) return expression def unnest_generate_series(expression: exp.Expr) -> exp.Expr: """Unnests GENERATE_SERIES or SEQUENCE table references.""" this = expression.this if isinstance(expression, exp.Table) and isinstance(this, exp.GenerateSeries): unnest = exp.Unnest(expressions=[this]) if expression.alias: return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) return unnest return expression def eliminate_distinct_on(expression: exp.Expr) -> exp.Expr: """ Convert SELECT DISTINCT ON statements to a subquery with a window function. This is useful for dialects that don't support SELECT DISTINCT ON but support window functions. Args: expression: the expression that will be transformed. Returns: The transformed expression. """ if ( isinstance(expression, exp.Select) and expression.args.get("distinct") and isinstance(expression.args["distinct"].args.get("on"), exp.Tuple) ): row_number_window_alias = find_new_name(expression.named_selects, "_row_number") distinct_cols = expression.args["distinct"].pop().args["on"].expressions window = exp.Window(this=exp.RowNumber(), partition_by=distinct_cols) order = expression.args.get("order") if order: window.set("order", order.pop()) else: window.set("order", exp.Order(expressions=[c.copy() for c in distinct_cols])) expression.select(exp.alias_(window, row_number_window_alias), copy=False) # We add aliases to the projections so that we can safely reference them in the outer query new_selects = [] taken_names = {row_number_window_alias} for select in expression.selects[:-1]: if select.is_star: new_selects = [exp.Star()] break if not isinstance(select, exp.Alias): alias = find_new_name(taken_names, select.output_name or "_col") quoted = select.this.args.get("quoted") if isinstance(select, exp.Column) else None select = select.replace(exp.alias_(select, alias, quoted=quoted)) taken_names.add(select.output_name) new_selects.append(select.args["alias"]) return ( exp.select(*new_selects, copy=False) .from_(expression.subquery("_t", copy=False), copy=False) .where(exp.column(row_number_window_alias).eq(1), copy=False) ) return expression def eliminate_qualify(expression: exp.Expr) -> exp.Expr: """ Convert SELECT statements that contain the QUALIFY clause into subqueries, filtered equivalently. The idea behind this transformation can be seen in Snowflake's documentation for QUALIFY: https://docs.snowflake.com/en/sql-reference/constructs/qualify Some dialects don't support window functions in the WHERE clause, so we need to include them as projections in the subquery, in order to refer to them in the outer filter using aliases. Also, if a column is referenced in the QUALIFY clause but is not selected, we need to include it too, otherwise we won't be able to refer to it in the outer query's WHERE clause. Finally, if a newly aliased projection is referenced in the QUALIFY clause, it will be replaced by the corresponding expression to avoid creating invalid column references. """ if isinstance(expression, exp.Select) and expression.args.get("qualify"): taken = set(expression.named_selects) for select in expression.selects: if not select.alias_or_name: alias = find_new_name(taken, "_c") select.replace(exp.alias_(select, alias)) taken.add(alias) def _select_alias_or_name(select: exp.Expr) -> str | exp.Column: alias_or_name = select.alias_or_name identifier = select.args.get("alias") or select.this if isinstance(identifier, exp.Identifier): return exp.column(alias_or_name, quoted=identifier.args.get("quoted")) return alias_or_name outer_selects = exp.select(*list(map(_select_alias_or_name, expression.selects))) qualify_filters = expression.args["qualify"].pop().this expression_by_alias = { select.alias: select.this for select in expression.selects if isinstance(select, exp.Alias) } select_candidates = exp.Window if expression.is_star else (exp.Window, exp.Column) for select_candidate in list(qualify_filters.find_all(select_candidates)): if isinstance(select_candidate, exp.Window): if expression_by_alias: for column in select_candidate.find_all(exp.Column): expr = expression_by_alias.get(column.name) if expr: column.replace(expr) alias = find_new_name(expression.named_selects, "_w") expression.select(exp.alias_(select_candidate, alias), copy=False) column = exp.column(alias) if isinstance(select_candidate.parent, exp.Qualify): qualify_filters = column else: select_candidate.replace(column) elif select_candidate.name not in expression.named_selects: expression.select(select_candidate.copy(), copy=False) return outer_selects.from_(expression.subquery(alias="_t", copy=False), copy=False).where( qualify_filters, copy=False ) return expression def remove_precision_parameterized_types(expression: exp.Expr) -> exp.Expr: """ Some dialects only allow the precision for parameterized types to be defined in the DDL and not in other expressions. This transforms removes the precision from parameterized types in expressions. """ for node in expression.find_all(exp.DataType): node.set( "expressions", [e for e in node.expressions if not isinstance(e, exp.DataTypeParam)] ) return expression def unqualify_unnest(expression: exp.Expr) -> exp.Expr: """Remove references to unnest table aliases, added by the optimizer's qualify_columns step.""" from sqlglot.optimizer.scope import find_all_in_scope if isinstance(expression, exp.Select): unnest_aliases = { unnest.alias for unnest in find_all_in_scope(expression, exp.Unnest) if isinstance(unnest.parent, (exp.From, exp.Join)) } if unnest_aliases: for column in expression.find_all(exp.Column): leftmost_part = column.parts[0] if leftmost_part.arg_key != "this" and leftmost_part.this in unnest_aliases: leftmost_part.pop() return expression def unnest_to_explode( expression: exp.Expr, unnest_using_arrays_zip: bool = True, ) -> exp.Expr: """Convert cross join unnest into lateral view explode.""" def _unnest_zip_exprs( u: exp.Unnest, unnest_exprs: t.List[exp.Expr], has_multi_expr: bool ) -> t.List[exp.Expr]: if has_multi_expr: if not unnest_using_arrays_zip: raise UnsupportedError("Cannot transpile UNNEST with multiple input arrays") # Use INLINE(ARRAYS_ZIP(...)) for multiple expressions zip_exprs: t.List[exp.Expr] = [ exp.Anonymous(this="ARRAYS_ZIP", expressions=unnest_exprs) ] u.set("expressions", zip_exprs) return zip_exprs return unnest_exprs def _udtf_type(u: exp.Unnest, has_multi_expr: bool) -> t.Type[exp.Func]: if u.args.get("offset"): return exp.Posexplode return exp.Inline if has_multi_expr else exp.Explode if isinstance(expression, exp.Select): from_ = expression.args.get("from_") if from_ and isinstance(from_.this, exp.Unnest): unnest = from_.this alias = unnest.args.get("alias") exprs = unnest.expressions has_multi_expr = len(exprs) > 1 this, *_ = _unnest_zip_exprs(unnest, exprs, has_multi_expr) columns = alias.columns if alias else [] offset = unnest.args.get("offset") if offset: columns.insert( 0, offset if isinstance(offset, exp.Identifier) else exp.to_identifier("pos") ) unnest.replace( exp.Table( this=_udtf_type(unnest, has_multi_expr)(this=this), alias=exp.TableAlias(this=alias.this, columns=columns) if alias else None, ) ) joins = expression.args.get("joins") or [] for join in list(joins): join_expr = join.this is_lateral = isinstance(join_expr, exp.Lateral) unnest = join_expr.this if is_lateral else join_expr if isinstance(unnest, exp.Unnest): if is_lateral: alias = join_expr.args.get("alias") else: alias = unnest.args.get("alias") exprs = unnest.expressions # The number of unnest.expressions will be changed by _unnest_zip_exprs, we need to record it here has_multi_expr = len(exprs) > 1 exprs = _unnest_zip_exprs(unnest, exprs, has_multi_expr) joins.remove(join) alias_cols = alias.columns if alias else [] # # Handle UNNEST to LATERAL VIEW EXPLODE: Exception is raised when there are 0 or > 2 aliases # Spark LATERAL VIEW EXPLODE requires single alias for array/struct and two for Map type column unlike unnest in trino/presto which can take an arbitrary amount. # Refs: https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-lateral-view.html if not has_multi_expr and len(alias_cols) not in (1, 2): raise UnsupportedError( "CROSS JOIN UNNEST to LATERAL VIEW EXPLODE transformation requires explicit column aliases" ) offset = unnest.args.get("offset") if offset: alias_cols.insert( 0, offset if isinstance(offset, exp.Identifier) else exp.to_identifier("pos"), ) for e, column in zip(exprs, alias_cols): expression.append( "laterals", exp.Lateral( this=_udtf_type(unnest, has_multi_expr)(this=e), view=True, alias=exp.TableAlias( this=alias.this, # type: ignore columns=alias_cols, ), ), ) return expression def explode_projection_to_unnest( index_offset: int = 0, ) -> t.Callable[[exp.Expr], exp.Expr]: """Convert explode/posexplode projections into unnests.""" def _explode_projection_to_unnest(expression: exp.Expr) -> exp.Expr: if isinstance(expression, exp.Select): from sqlglot.optimizer.scope import Scope taken_select_names = set(expression.named_selects) taken_source_names = {name for name, _ in Scope(expression).references} def new_name(names: t.Set[str], name: str) -> str: name = find_new_name(names, name) names.add(name) return name arrays: t.List[exp.Condition] = [] series_alias = new_name(taken_select_names, "pos") series = exp.alias_( exp.Unnest( expressions=[exp.GenerateSeries(start=exp.Literal.number(index_offset))] ), new_name(taken_source_names, "_u"), table=[series_alias], ) # we use list here because expression.selects is mutated inside the loop for select in list(expression.selects): explode = select.find(exp.Explode) if explode: pos_alias: t.Any = "" explode_alias: t.Any = "" if isinstance(select, exp.Alias): explode_alias = select.args["alias"] alias = select elif isinstance(select, exp.Aliases): pos_alias = select.aliases[0] explode_alias = select.aliases[1] alias = select.replace(exp.alias_(select.this, "", copy=False)) else: alias = select.replace(exp.alias_(select, "")) explode = alias.find(exp.Explode) assert explode is_posexplode = isinstance(explode, exp.Posexplode) explode_arg = explode.this if isinstance(explode, exp.ExplodeOuter): bracket = explode_arg[0] bracket.set("safe", True) bracket.set("offset", True) explode_arg = exp.func( "IF", exp.func( "ARRAY_SIZE", exp.func("COALESCE", explode_arg, exp.Array()) ).eq(0), exp.array(bracket, copy=False), explode_arg, ) # This ensures that we won't use [POS]EXPLODE's argument as a new selection if isinstance(explode_arg, exp.Column): taken_select_names.add(explode_arg.output_name) unnest_source_alias = new_name(taken_source_names, "_u") if not explode_alias: explode_alias = new_name(taken_select_names, "col") if is_posexplode: pos_alias = new_name(taken_select_names, "pos") if not pos_alias: pos_alias = new_name(taken_select_names, "pos") alias.set("alias", exp.to_identifier(explode_alias)) series_table_alias = series.args["alias"].this column = exp.If( this=exp.column(series_alias, table=series_table_alias).eq( exp.column(pos_alias, table=unnest_source_alias) ), true=exp.column(explode_alias, table=unnest_source_alias), ) explode.replace(column) if is_posexplode: expressions = expression.expressions expressions.insert( expressions.index(alias) + 1, exp.If( this=exp.column(series_alias, table=series_table_alias).eq( exp.column(pos_alias, table=unnest_source_alias) ), true=exp.column(pos_alias, table=unnest_source_alias), ).as_(pos_alias), ) expression.set("expressions", expressions) if not arrays: if expression.args.get("from_"): expression.join(series, copy=False, join_type="CROSS") else: expression.from_(series, copy=False) size: exp.Condition = exp.ArraySize(this=explode_arg.copy()) arrays.append(size) # trino doesn't support left join unnest with on conditions # if it did, this would be much simpler expression.join( exp.alias_( exp.Unnest( expressions=[explode_arg.copy()], offset=exp.to_identifier(pos_alias), ), unnest_source_alias, table=[explode_alias], ), join_type="CROSS", copy=False, ) if index_offset != 1: size = size - 1 expression.where( exp.column(series_alias, table=series_table_alias) .eq(exp.column(pos_alias, table=unnest_source_alias)) .or_( (exp.column(series_alias, table=series_table_alias) > size).and_( exp.column(pos_alias, table=unnest_source_alias).eq(size) ) ), copy=False, ) if arrays: end: exp.Condition = exp.Greatest(this=arrays[0], expressions=arrays[1:]) if index_offset != 1: end = end - (1 - index_offset) series.expressions[0].set("end", end) return expression return _explode_projection_to_unnest def add_within_group_for_percentiles(expression: exp.Expr) -> exp.Expr: """Transforms percentiles by adding a WITHIN GROUP clause to them.""" if ( isinstance(expression, exp.PERCENTILES) and not isinstance(expression.parent, exp.WithinGroup) and expression.expression ): column = expression.this.pop() expression.set("this", expression.expression.pop()) order = exp.Order(expressions=[exp.Ordered(this=column)]) expression = exp.WithinGroup(this=expression, expression=order) return expression def remove_within_group_for_percentiles(expression: exp.Expr) -> exp.Expr: """Transforms percentiles by getting rid of their corresponding WITHIN GROUP clause.""" if ( isinstance(expression, exp.WithinGroup) and isinstance(expression.this, exp.PERCENTILES) and isinstance(expression.expression, exp.Order) ): quantile = expression.this.this input_value = t.cast(exp.Ordered, expression.find(exp.Ordered)).this return expression.replace(exp.ApproxQuantile(this=input_value, quantile=quantile)) return expression def add_recursive_cte_column_names(expression: exp.Expr) -> exp.Expr: """Uses projection output names in recursive CTE definitions to define the CTEs' columns.""" if isinstance(expression, exp.With) and expression.recursive: next_name = name_sequence("_c_") for cte in expression.expressions: if not cte.args["alias"].columns: query = cte.this if isinstance(query, exp.SetOperation): query = query.this cte.args["alias"].set( "columns", [exp.to_identifier(s.alias_or_name or next_name()) for s in query.selects], ) return expression def epoch_cast_to_ts(expression: exp.Expr) -> exp.Expr: """Replace 'epoch' in casts by the equivalent date literal.""" if ( isinstance(expression, (exp.Cast, exp.TryCast)) and expression.name.lower() == "epoch" and expression.to.this in exp.DataType.TEMPORAL_TYPES ): expression.this.replace(exp.Literal.string("1970-01-01 00:00:00")) return expression def eliminate_semi_and_anti_joins(expression: exp.Expr) -> exp.Expr: """Convert SEMI and ANTI joins into equivalent forms that use EXIST instead.""" if isinstance(expression, exp.Select): for join in expression.args.get("joins") or []: on = join.args.get("on") if on and join.kind in ("SEMI", "ANTI"): subquery = exp.select("1").from_(join.this).where(on) exists: exp.Exists | exp.Not = exp.Exists(this=subquery) if join.kind == "ANTI": exists = exists.not_(copy=False) join.pop() expression.where(exists, copy=False) return expression def eliminate_full_outer_join(expression: exp.Expr) -> exp.Expr: """ Converts a query with a FULL OUTER join to a union of identical queries that use LEFT/RIGHT OUTER joins instead. This transformation currently only works for queries that have a single FULL OUTER join. """ if isinstance(expression, exp.Select): full_outer_joins = [ (index, join) for index, join in enumerate(expression.args.get("joins") or []) if join.side == "FULL" ] if len(full_outer_joins) == 1: expression_copy = expression.copy() expression.set("limit", None) index, full_outer_join = full_outer_joins[0] tables = (expression.args["from_"].alias_or_name, full_outer_join.alias_or_name) join_conditions = full_outer_join.args.get("on") or exp.and_( *[ exp.column(col, tables[0]).eq(exp.column(col, tables[1])) for col in full_outer_join.args.get("using") ] ) full_outer_join.set("side", "left") anti_join_clause = ( exp.select("1").from_(expression.args["from_"]).where(join_conditions) ) expression_copy.args["joins"][index].set("side", "right") expression_copy = expression_copy.where(exp.Exists(this=anti_join_clause).not_()) expression_copy.set("with_", None) # remove CTEs from RIGHT side expression.set("order", None) # remove order by from LEFT side return exp.union(expression, expression_copy, copy=False, distinct=False) return expression def move_ctes_to_top_level(expression: E) -> E: """ Some dialects (e.g. Hive, T-SQL, Spark prior to version 3) only allow CTEs to be defined at the top-level, so for example queries like: SELECT * FROM (WITH t(c) AS (SELECT 1) SELECT * FROM t) AS subq are invalid in those dialects. This transformation can be used to ensure all CTEs are moved to the top level so that the final SQL code is valid from a syntax standpoint. TODO: handle name clashes whilst moving CTEs (it can get quite tricky & costly). """ top_level_with = expression.args.get("with_") for inner_with in expression.find_all(exp.With): if inner_with.parent is expression: continue if not top_level_with: top_level_with = inner_with.pop() expression.set("with_", top_level_with) else: if inner_with.recursive: top_level_with.set("recursive", True) parent_cte = inner_with.find_ancestor(exp.CTE) inner_with.pop() if parent_cte: i = top_level_with.expressions.index(parent_cte) top_level_with.expressions[i:i] = inner_with.expressions top_level_with.set("expressions", top_level_with.expressions) else: top_level_with.set( "expressions", top_level_with.expressions + inner_with.expressions ) return expression def ensure_bools(expression: exp.Expr) -> exp.Expr: """Converts numeric values used in conditions into explicit boolean expressions.""" from sqlglot.optimizer.canonicalize import ensure_bools def _ensure_bool(node: exp.Expr) -> None: if ( node.is_number or ( not isinstance(node, exp.SubqueryPredicate) and node.is_type(exp.DType.UNKNOWN, *exp.DataType.NUMERIC_TYPES) ) or (isinstance(node, exp.Column) and not node.type) ): node.replace(node.neq(0)) for node in expression.walk(): ensure_bools(node, _ensure_bool) return expression def unqualify_columns(expression: exp.Expr) -> exp.Expr: for column in expression.find_all(exp.Column): # We only wanna pop off the table, db, catalog args for part in column.parts[:-1]: part.pop() return expression def remove_unique_constraints(expression: exp.Expr) -> exp.Expr: assert isinstance(expression, exp.Create) for constraint in expression.find_all(exp.UniqueColumnConstraint): if constraint.parent: constraint.parent.pop() return expression def ctas_with_tmp_tables_to_create_tmp_view( expression: exp.Expr, tmp_storage_provider: t.Callable[[exp.Expr], exp.Expr] = lambda e: e, ) -> exp.Expr: assert isinstance(expression, exp.Create) properties = expression.args.get("properties") temporary = any( isinstance(prop, exp.TemporaryProperty) for prop in (properties.expressions if properties else []) ) # CTAS with temp tables map to CREATE TEMPORARY VIEW if expression.kind == "TABLE" and temporary: if expression.expression: return exp.Create( kind="TEMPORARY VIEW", this=expression.this, expression=expression.expression, ) return tmp_storage_provider(expression) return expression def move_schema_columns_to_partitioned_by(expression: exp.Expr) -> exp.Expr: """ In Hive, the PARTITIONED BY property acts as an extension of a table's schema. When the PARTITIONED BY value is an array of column names, they are transformed into a schema. The corresponding columns are removed from the create statement. """ assert isinstance(expression, exp.Create) has_schema = isinstance(expression.this, exp.Schema) is_partitionable = expression.kind in {"TABLE", "VIEW"} if has_schema and is_partitionable: prop = expression.find(exp.PartitionedByProperty) if prop and prop.this and not isinstance(prop.this, exp.Schema): schema = expression.this columns = {v.name.upper() for v in prop.this.expressions} partitions = [col for col in schema.expressions if col.name.upper() in columns] schema.set("expressions", [e for e in schema.expressions if e not in partitions]) prop.replace(exp.PartitionedByProperty(this=exp.Schema(expressions=partitions))) expression.set("this", schema) return expression def move_partitioned_by_to_schema_columns(expression: exp.Expr) -> exp.Expr: """ Spark 3 supports both "HIVEFORMAT" and "DATASOURCE" formats for CREATE TABLE. Currently, SQLGlot uses the DATASOURCE format for Spark 3. """ assert isinstance(expression, exp.Create) prop = expression.find(exp.PartitionedByProperty) if ( prop and prop.this and isinstance(prop.this, exp.Schema) and all(isinstance(e, exp.ColumnDef) and e.kind for e in prop.this.expressions) ): prop_this = exp.Tuple( expressions=[exp.to_identifier(e.this) for e in prop.this.expressions] ) schema = expression.this for e in prop.this.expressions: schema.append("expressions", e) prop.set("this", prop_this) return expression def struct_kv_to_alias(expression: exp.Expr) -> exp.Expr: """Converts struct arguments to aliases, e.g. STRUCT(1 AS y).""" if isinstance(expression, exp.Struct): expression.set( "expressions", [ exp.alias_(e.expression, e.this) if isinstance(e, exp.PropertyEQ) else e for e in expression.expressions ], ) return expression def eliminate_join_marks(expression: exp.Expr) -> exp.Expr: """https://docs.oracle.com/cd/B19306_01/server.102/b14200/queries006.htm#sthref3178 1. You cannot specify the (+) operator in a query block that also contains FROM clause join syntax. 2. The (+) operator can appear only in the WHERE clause or, in the context of left-correlation (that is, when specifying the TABLE clause) in the FROM clause, and can be applied only to a column of a table or view. The (+) operator does not produce an outer join if you specify one table in the outer query and the other table in an inner query. You cannot use the (+) operator to outer-join a table to itself, although self joins are valid. The (+) operator can be applied only to a column, not to an arbitrary expression. However, an arbitrary expression can contain one or more columns marked with the (+) operator. A WHERE condition containing the (+) operator cannot be combined with another condition using the OR logical operator. A WHERE condition cannot use the IN comparison condition to compare a column marked with the (+) operator with an expression. A WHERE condition cannot compare any column marked with the (+) operator with a subquery. -- example with WHERE SELECT d.department_name, sum(e.salary) as total_salary FROM departments d, employees e WHERE e.department_id(+) = d.department_id group by department_name -- example of left correlation in select SELECT d.department_name, ( SELECT SUM(e.salary) FROM employees e WHERE e.department_id(+) = d.department_id) AS total_salary FROM departments d; -- example of left correlation in from SELECT d.department_name, t.total_salary FROM departments d, ( SELECT SUM(e.salary) AS total_salary FROM employees e WHERE e.department_id(+) = d.department_id ) t """ from sqlglot.optimizer.scope import traverse_scope from sqlglot.optimizer.normalize import normalize, normalized from collections import defaultdict # we go in reverse to check the main query for left correlation for scope in reversed(traverse_scope(expression)): query = scope.expression where = query.args.get("where") joins = query.args.get("joins", []) if not where or not any(c.args.get("join_mark") for c in where.find_all(exp.Column)): continue # knockout: we do not support left correlation (see point 2) assert not scope.is_correlated_subquery, "Correlated queries are not supported" # make sure we have AND of ORs to have clear join terms where = normalize(where.this) assert normalized(where), "Cannot normalize JOIN predicates" joins_ons = defaultdict(list) # dict of {name: list of join AND conditions} for cond in [where] if not isinstance(where, exp.And) else where.flatten(): join_cols = [col for col in cond.find_all(exp.Column) if col.args.get("join_mark")] left_join_table = set(col.table for col in join_cols) if not left_join_table: continue assert not (len(left_join_table) > 1), ( "Cannot combine JOIN predicates from different tables" ) for col in join_cols: col.set("join_mark", False) joins_ons[left_join_table.pop()].append(cond) old_joins = {join.alias_or_name: join for join in joins} new_joins = {} query_from = query.args["from_"] for table, predicates in joins_ons.items(): join_what = old_joins.get(table, query_from).this.copy() new_joins[join_what.alias_or_name] = exp.Join( this=join_what, on=exp.and_(*predicates), kind="LEFT" ) for p in predicates: while isinstance(p.parent, exp.Paren): p.parent.replace(p) parent = p.parent p.pop() if isinstance(parent, exp.Binary): left = parent.args.get("this") parent.replace(parent.right if left is None else left) elif isinstance(parent, exp.Where): parent.pop() if query_from.alias_or_name in new_joins: only_old_joins = old_joins.keys() - new_joins.keys() assert len(only_old_joins) >= 1, ( "Cannot determine which table to use in the new FROM clause" ) new_from_name = list(only_old_joins)[0] query.set("from_", exp.From(this=old_joins[new_from_name].this)) if new_joins: for n, j in old_joins.items(): # preserve any other joins if n not in new_joins and n != query.args["from_"].name: if not j.kind: j.set("kind", "CROSS") new_joins[n] = j query.set("joins", list(new_joins.values())) return expression def any_to_exists(expression: exp.Expr) -> exp.Expr: """ Transform ANY operator to Spark's EXISTS For example, - Postgres: SELECT * FROM tbl WHERE 5 > ANY(tbl.col) - Spark: SELECT * FROM tbl WHERE EXISTS(tbl.col, x -> x < 5) Both ANY and EXISTS accept queries but currently only array expressions are supported for this transformation """ if isinstance(expression, exp.Select): for any_expr in expression.find_all(exp.Any): this = any_expr.this if isinstance(this, exp.Query) or isinstance(any_expr.parent, (exp.Like, exp.ILike)): continue binop = any_expr.parent if isinstance(binop, exp.Binary): lambda_arg = exp.to_identifier("x") any_expr.replace(lambda_arg) lambda_expr = exp.Lambda(this=binop.copy(), expressions=[lambda_arg]) binop.replace(exp.Exists(this=this.unnest(), expression=lambda_expr)) return expression def eliminate_window_clause(expression: exp.Expr) -> exp.Expr: """Eliminates the `WINDOW` query clause by inling each named window.""" if isinstance(expression, exp.Select) and expression.args.get("windows"): from sqlglot.optimizer.scope import find_all_in_scope windows = expression.args["windows"] expression.set("windows", None) window_expression: t.Dict[str, exp.Expr] = {} def _inline_inherited_window(window: exp.Expr) -> None: inherited_window = window_expression.get(window.alias.lower()) if not inherited_window: return window.set("alias", None) for key in ("partition_by", "order", "spec"): arg = inherited_window.args.get(key) if arg: window.set(key, arg.copy()) for window in windows: _inline_inherited_window(window) window_expression[window.name.lower()] = window for window in find_all_in_scope(expression, exp.Window): _inline_inherited_window(window) return expression def inherit_struct_field_names(expression: exp.Expr) -> exp.Expr: """ Inherit field names from the first struct in an array. BigQuery supports implicitly inheriting names from the first STRUCT in an array: Example: ARRAY[ STRUCT('Alice' AS name, 85 AS score), -- defines names STRUCT('Bob', 92), -- inherits names STRUCT('Diana', 95) -- inherits names ] This transformation makes the field names explicit on all structs by adding PropertyEQ nodes, in order to facilitate transpilation to other dialects. Args: expression: The expression tree to transform Returns: The modified expression with field names inherited in all structs """ if ( isinstance(expression, exp.Array) and expression.args.get("struct_name_inheritance") and isinstance(first_item := seq_get(expression.expressions, 0), exp.Struct) and all(isinstance(fld, exp.PropertyEQ) for fld in first_item.expressions) ): field_names = [fld.this for fld in first_item.expressions] # Apply field names to subsequent structs that don't have them for struct in expression.expressions[1:]: if not isinstance(struct, exp.Struct) or len(struct.expressions) != len(field_names): continue # Convert unnamed expressions to PropertyEQ with inherited names new_expressions = [] for i, expr in enumerate(struct.expressions): if not isinstance(expr, exp.PropertyEQ): # Create PropertyEQ: field_name := value, preserving the type from the inner expression property_eq = exp.PropertyEQ( this=field_names[i].copy(), expression=expr, ) property_eq.type = expr.type new_expressions.append(property_eq) else: new_expressions.append(expr) struct.set("expressions", new_expressions) return expression ================================================ FILE: sqlglot/trie.py ================================================ import typing as t from enum import Enum, auto from collections.abc import Sequence, Iterable key = Sequence[t.Hashable] class TrieResult(Enum): FAILED = auto() PREFIX = auto() EXISTS = auto() def new_trie(keywords: Iterable[key], trie: t.Optional[dict] = None) -> dict: """ Creates a new trie out of a collection of keywords. The trie is represented as a sequence of nested dictionaries keyed by either single character strings, or by 0, which is used to designate that a keyword is in the trie. Example: >>> new_trie(["bla", "foo", "blab"]) {'b': {'l': {'a': {0: True, 'b': {0: True}}}}, 'f': {'o': {'o': {0: True}}}} Args: keywords: the keywords to create the trie from. trie: a trie to mutate instead of creating a new one Returns: The trie corresponding to `keywords`. """ trie = {} if trie is None else trie for key in keywords: current = trie for char in key: current = current.setdefault(char, {}) current[0] = True return trie def in_trie(trie: t.Dict, key: key) -> t.Tuple[TrieResult, t.Dict]: """ Checks whether a key is in a trie. Examples: >>> in_trie(new_trie(["cat"]), "bob") (, {'c': {'a': {'t': {0: True}}}}) >>> in_trie(new_trie(["cat"]), "ca") (, {'t': {0: True}}) >>> in_trie(new_trie(["cat"]), "cat") (, {0: True}) Args: trie: The trie to be searched. key: The target key. Returns: A pair `(value, subtrie)`, where `subtrie` is the sub-trie we get at the point where the search stops, and `value` is a TrieResult value that can be one of: - TrieResult.FAILED: the search was unsuccessful - TrieResult.PREFIX: `value` is a prefix of a keyword in `trie` - TrieResult.EXISTS: `key` exists in `trie` """ if not key: return (TrieResult.FAILED, trie) current = trie for char in key: if char not in current: return (TrieResult.FAILED, current) current = current[char] if 0 in current: return (TrieResult.EXISTS, current) return (TrieResult.PREFIX, current) ================================================ FILE: sqlglot/typing/__init__.py ================================================ import typing as t from sqlglot import exp from sqlglot.helper import subclasses from builtins import type as Type ExprMetadataType = t.Dict[Type[exp.Expr], t.Dict[str, t.Any]] TIMESTAMP_EXPRESSIONS = { exp.CurrentTimestamp, exp.StrToTime, exp.TimeStrToTime, exp.TimestampAdd, exp.TimestampSub, exp.UnixToTime, } EXPRESSION_METADATA: ExprMetadataType = { **{ expr_type: {"annotator": lambda self, e: self._annotate_binary(e)} for expr_type in subclasses(exp.__name__, exp.Binary) }, **{ expr_type: {"annotator": lambda self, e: self._annotate_unary(e)} for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias)) }, **{ expr_type: {"returns": exp.DType.BIGINT} for expr_type in { exp.ApproxDistinct, exp.ArraySize, exp.CountIf, exp.Int64, exp.UnixSeconds, exp.UnixMicros, exp.UnixMillis, } }, **{ expr_type: {"returns": exp.DType.BINARY} for expr_type in { exp.FromBase32, exp.FromBase64, } }, **{ expr_type: {"returns": exp.DType.BOOLEAN} for expr_type in { exp.All, exp.Any, exp.ArrayContains, exp.Between, exp.Boolean, exp.Contains, exp.EndsWith, exp.Exists, exp.In, exp.IsInf, exp.IsNan, exp.LogicalAnd, exp.LogicalOr, exp.RegexpLike, exp.StartsWith, } }, **{ expr_type: {"returns": exp.DType.DATE} for expr_type in { exp.CurrentDate, exp.Date, exp.DateFromParts, exp.DateStrToDate, exp.DiToDate, exp.LastDay, exp.StrToDate, exp.TimeStrToDate, exp.TsOrDsToDate, } }, **{ expr_type: {"returns": exp.DType.DATETIME} for expr_type in { exp.CurrentDatetime, exp.Datetime, exp.DatetimeAdd, exp.DatetimeSub, } }, **{ expr_type: {"returns": exp.DType.DOUBLE} for expr_type in { exp.Asin, exp.Asinh, exp.Acos, exp.Acosh, exp.ApproxQuantile, exp.Atan, exp.Atanh, exp.Avg, exp.Cbrt, exp.Cos, exp.Cosh, exp.Cot, exp.Degrees, exp.Exp, exp.Kurtosis, exp.Ln, exp.Log, exp.Pi, exp.Pow, exp.Quantile, exp.Radians, exp.Round, exp.SafeDivide, exp.Sin, exp.Sinh, exp.Sqrt, exp.Stddev, exp.StddevPop, exp.StddevSamp, exp.Rand, exp.Tan, exp.Tanh, exp.ToDouble, exp.Variance, exp.VariancePop, exp.Skewness, } }, **{ expr_type: {"returns": exp.DType.INT} for expr_type in { exp.Ascii, exp.BitLength, exp.Ceil, exp.DatetimeDiff, exp.DayOfMonth, exp.DayOfWeek, exp.DayOfYear, exp.Getbit, exp.Hour, exp.TimestampDiff, exp.TimeDiff, exp.Unicode, exp.DateToDi, exp.Levenshtein, exp.Length, exp.Sign, exp.StrPosition, exp.TsOrDiToDi, exp.Quarter, exp.UnixDate, } }, **{ expr_type: {"returns": exp.DType.INTERVAL} for expr_type in { exp.Interval, exp.JustifyDays, exp.JustifyHours, exp.JustifyInterval, exp.MakeInterval, } }, **{ expr_type: {"returns": exp.DType.JSON} for expr_type in { exp.ParseJSON, } }, **{ expr_type: {"returns": exp.DType.TIME} for expr_type in { exp.CurrentTime, exp.Localtime, exp.Time, exp.TimeAdd, exp.TimeSub, } }, **{ expr_type: {"returns": exp.DType.TIMESTAMPLTZ} for expr_type in { exp.TimestampLtzFromParts, } }, **{ expr_type: {"returns": exp.DType.TIMESTAMPTZ} for expr_type in { exp.CurrentTimestampLTZ, exp.TimestampTzFromParts, } }, **{expr_type: {"returns": exp.DType.TIMESTAMP} for expr_type in TIMESTAMP_EXPRESSIONS}, **{ expr_type: {"returns": exp.DType.TINYINT} for expr_type in { exp.Day, exp.DayOfWeekIso, exp.Month, exp.Week, exp.WeekOfYear, exp.Year, exp.YearOfWeek, exp.YearOfWeekIso, } }, **{ expr_type: {"returns": exp.DType.VARCHAR} for expr_type in { exp.ArrayToString, exp.Concat, exp.ConcatWs, exp.Chr, exp.CurrentCatalog, exp.CurrentSchema, exp.CurrentVersion, exp.CurrentUser, exp.Dayname, exp.DateToDateStr, exp.DPipe, exp.GroupConcat, exp.Initcap, exp.Lower, exp.MD5, exp.Monthname, exp.SHA, exp.SHA2, exp.Substring, exp.String, exp.TimeToStr, exp.TimeToTimeStr, exp.Trim, exp.ToBase32, exp.ToBase64, exp.Translate, exp.TsOrDsToDateStr, exp.UnixToStr, exp.UnixToTimeStr, exp.Upper, exp.RawString, exp.SessionUser, exp.Space, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for expr_type in { exp.Abs, exp.AnyValue, exp.ArrayConcatAgg, exp.ArrayReverse, exp.ArraySlice, exp.Filter, exp.HavingMax, exp.LastValue, exp.Limit, exp.Order, exp.SortArray, exp.Window, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions")} for expr_type in { exp.ArrayConcat, exp.Coalesce, exp.Greatest, exp.Least, exp.Max, exp.Min, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_array_element(e)} for expr_type in { exp.ArrayFirst, exp.ArrayLast, } }, exp.Anonymous: {"annotator": lambda self, e: self._set_type(e, self.schema.get_udf_type(e))}, **{ expr_type: {"annotator": lambda self, e: self._annotate_timeunit(e)} for expr_type in { exp.DateAdd, exp.DateSub, exp.DateTrunc, } }, **{ expr_type: {"annotator": lambda self, e: self._set_type(e, e.args["to"])} for expr_type in { exp.Cast, exp.TryCast, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_map(e)} for expr_type in { exp.Map, exp.VarMap, } }, exp.Array: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions", array=True)}, exp.ArrayAgg: {"annotator": lambda self, e: self._annotate_by_args(e, "this", array=True)}, exp.Bracket: {"annotator": lambda self, e: self._annotate_bracket(e)}, exp.Case: { "annotator": lambda self, e: self._annotate_by_args( e, *[if_expr.args["true"] for if_expr in e.args["ifs"]], "default" ) }, exp.Count: { "annotator": lambda self, e: self._set_type( e, exp.DType.BIGINT if e.args.get("big_int") else exp.DType.INT ) }, exp.DateDiff: { "annotator": lambda self, e: self._set_type( e, exp.DType.BIGINT if e.args.get("big_int") else exp.DType.INT ) }, exp.DataType: {"annotator": lambda self, e: self._set_type(e, e.copy())}, exp.Div: {"annotator": lambda self, e: self._annotate_div(e)}, exp.Distinct: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")}, exp.Dot: {"annotator": lambda self, e: self._annotate_dot(e)}, exp.Explode: {"annotator": lambda self, e: self._annotate_explode(e)}, exp.Extract: {"annotator": lambda self, e: self._annotate_extract(e)}, exp.HexString: { "annotator": lambda self, e: self._set_type( e, exp.DType.BIGINT if e.args.get("is_integer") else exp.DType.BINARY, ) }, exp.GenerateSeries: { "annotator": lambda self, e: self._annotate_by_args(e, "start", "end", "step", array=True) }, exp.GenerateDateArray: { "annotator": lambda self, e: self._set_type(e, exp.DataType.build("ARRAY")) }, exp.GenerateTimestampArray: { "annotator": lambda self, e: self._set_type(e, exp.DataType.build("ARRAY")) }, exp.If: {"annotator": lambda self, e: self._annotate_by_args(e, "true", "false")}, exp.Literal: {"annotator": lambda self, e: self._annotate_literal(e)}, exp.Null: {"returns": exp.DType.NULL}, exp.Nullif: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expression")}, exp.PropertyEQ: {"annotator": lambda self, e: self._annotate_by_args(e, "expression")}, exp.Struct: {"annotator": lambda self, e: self._annotate_struct(e)}, exp.Sum: { "annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True) }, exp.Timestamp: { "annotator": lambda self, e: self._set_type( e, exp.DType.TIMESTAMPTZ if e.args.get("with_tz") else exp.DType.TIMESTAMP, ) }, exp.ToMap: {"annotator": lambda self, e: self._annotate_to_map(e)}, exp.Unnest: {"annotator": lambda self, e: self._annotate_unnest(e)}, exp.Subquery: {"annotator": lambda self, e: self._annotate_subquery(e)}, } ================================================ FILE: sqlglot/typing/bigquery.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA, TIMESTAMP_EXPRESSIONS if t.TYPE_CHECKING: from sqlglot.optimizer.annotate_types import TypeAnnotator def _annotate_math_functions(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: """ Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: +---------+---------+---------+------------+---------+ | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | +---------+---------+---------+------------+---------+ | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | +---------+---------+---------+------------+---------+ """ this: exp.Expr = expression.this self._set_type( expression, exp.DType.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, ) return expression def _annotate_safe_divide(self: TypeAnnotator, expression: exp.SafeDivide) -> exp.Expr: """ +------------+------------+------------+-------------+---------+ | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | +------------+------------+------------+-------------+---------+ | INT64 | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | | NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 | | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 | | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | +------------+------------+------------+-------------+---------+ """ if expression.this.is_type(*exp.DataType.INTEGER_TYPES) and expression.expression.is_type( *exp.DataType.INTEGER_TYPES ): return self._set_type(expression, exp.DType.DOUBLE) return _annotate_by_args_with_coerce(self, expression) def _annotate_by_args_with_coerce(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: """ +------------+------------+------------+-------------+---------+ | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | +------------+------------+------------+-------------+---------+ | INT64 | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | | NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 | | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 | | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | +------------+------------+------------+-------------+---------+ """ self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type)) return expression def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK: struct_type = exp.DataType( this=exp.DType.STRUCT, expressions=[expression.this.type, exp.DataType(this=exp.DType.BIGINT)], nested=True, ) self._set_type( expression, exp.DataType(this=exp.DType.ARRAY, expressions=[struct_type], nested=True), ) return expression def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: annotated = self._annotate_by_args(expression, "expressions") # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat if not annotated.is_type(exp.DType.BINARY, exp.DType.UNKNOWN): self._set_type(annotated, exp.DType.VARCHAR) return annotated def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: array_args = expression.expressions # BigQuery behaves as follows: # # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY # ARRAY(SELECT ... UNION ALL SELECT ...) -- ARRAY if len(array_args) == 1: unnested = array_args[0].unnest() projection_type: t.Optional[exp.DataType | exp.DType] = None # Handle ARRAY(SELECT ...) - single SELECT query if isinstance(unnested, exp.Select): if ( (query_type := unnested.meta.get("query_type")) is not None and query_type.is_type(exp.DType.STRUCT) and len(query_type.expressions) == 1 and isinstance(col_def := query_type.expressions[0], exp.ColumnDef) and (col_type := col_def.kind) is not None and not col_type.is_type(exp.DType.UNKNOWN) ): projection_type = col_type # Handle ARRAY(SELECT ... UNION ALL SELECT ...) - set operations elif isinstance(unnested, exp.SetOperation): # Get all column types for the SetOperation col_types = self._get_setop_column_types(unnested) # For ARRAY constructor, there should only be one projection # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/array_functions#array if col_types and unnested.left.selects: first_col_name = unnested.left.selects[0].alias_or_name projection_type = col_types.get(first_col_name) # If we successfully determine a projection type and it's not UNKNOWN, wrap it in ARRAY if projection_type and not ( ( isinstance(projection_type, exp.DataType) and projection_type.is_type(exp.DType.UNKNOWN) ) or projection_type == exp.DType.UNKNOWN ): element_type = ( projection_type.copy() if isinstance(projection_type, exp.DataType) else exp.DataType(this=projection_type) ) array_type = exp.DataType( this=exp.DType.ARRAY, expressions=[element_type], nested=True, ) return self._set_type(expression, array_type) return self._annotate_by_args(expression, "expressions", array=True) EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"annotator": lambda self, e: _annotate_math_functions(self, e)} for expr_type in { exp.Avg, exp.Ceil, exp.Exp, exp.Floor, exp.Ln, exp.Log, exp.Round, exp.Sqrt, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for expr_type in { exp.ArgMax, exp.ArgMin, exp.DateAdd, exp.DateTrunc, exp.DatetimeTrunc, exp.FirstValue, exp.GroupConcat, exp.IgnoreNulls, exp.JSONExtract, exp.Lead, exp.Left, exp.Lower, exp.NetFunc, exp.NthValue, exp.Pad, exp.PercentileDisc, exp.RegexpExtract, exp.RegexpReplace, exp.Repeat, exp.Replace, exp.RespectNulls, exp.Reverse, exp.Right, exp.SafeFunc, exp.SafeNegate, exp.Sign, exp.Substring, exp.TimestampTrunc, exp.Translate, exp.Trim, exp.Upper, } }, **{ expr_type: {"returns": exp.DType.BIGINT} for expr_type in { exp.BitwiseAndAgg, exp.BitwiseCount, exp.BitwiseOrAgg, exp.BitwiseXorAgg, exp.ByteLength, exp.DenseRank, exp.FarmFingerprint, exp.Grouping, exp.LaxInt64, exp.Length, exp.Ntile, exp.Rank, exp.RangeBucket, exp.RegexpInstr, exp.RowNumber, exp.UnixDate, } }, **{ expr_type: {"returns": exp.DType.BINARY} for expr_type in { exp.ByteString, exp.CodePointsToBytes, exp.MD5Digest, exp.SHA, exp.SHA2, exp.SHA1Digest, exp.SHA2Digest, exp.Unhex, } }, **{ expr_type: {"returns": exp.DType.BOOLEAN} for expr_type in { exp.JSONBool, exp.LaxBool, } }, **{ expr_type: {"returns": exp.DType.DATETIME} for expr_type in { exp.ParseDatetime, exp.TimestampFromParts, } }, **{ expr_type: {"returns": exp.DType.DOUBLE} for expr_type in { exp.Atan2, exp.Corr, exp.CosineDistance, exp.Coth, exp.CovarPop, exp.CovarSamp, exp.Csc, exp.Csch, exp.CumeDist, exp.EuclideanDistance, exp.Float64, exp.LaxFloat64, exp.PercentRank, exp.Sec, exp.Sech, } }, **{ expr_type: {"returns": exp.DType.JSON} for expr_type in { exp.JSONArray, exp.JSONArrayAppend, exp.JSONArrayInsert, exp.JSONObject, exp.JSONRemove, exp.JSONSet, exp.JSONStripNulls, } }, **{ expr_type: {"returns": exp.DType.TIME} for expr_type in { exp.ParseTime, exp.TimeFromParts, exp.TimeTrunc, exp.TsOrDsToTime, } }, **{ expr_type: {"returns": exp.DType.VARCHAR} for expr_type in { exp.CodePointsToString, exp.Format, exp.Host, exp.JSONExtractScalar, exp.JSONType, exp.LaxString, exp.LowerHex, exp.Normalize, exp.RegDomain, exp.SafeConvertBytesToString, exp.Soundex, exp.Uuid, } }, **{ expr_type: {"annotator": lambda self, e: _annotate_by_args_with_coerce(self, e)} for expr_type in { exp.PercentileCont, exp.SafeAdd, exp.SafeDivide, exp.SafeMultiply, exp.SafeSubtract, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this", array=True)} for expr_type in { exp.ApproxQuantiles, exp.JSONExtractArray, exp.RegexpExtractAll, exp.Split, } }, **{expr_type: {"returns": exp.DType.TIMESTAMPTZ} for expr_type in TIMESTAMP_EXPRESSIONS}, exp.ApproxTopK: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)}, exp.ApproxTopSum: {"annotator": lambda self, e: _annotate_by_args_approx_top(self, e)}, exp.Array: {"annotator": _annotate_array}, exp.Concat: {"annotator": _annotate_concat}, exp.DateFromUnixDate: {"returns": exp.DType.DATE}, exp.GenerateTimestampArray: { "annotator": lambda self, e: self._set_type( e, exp.DataType.build("ARRAY", dialect="bigquery") ) }, exp.JSONFormat: { "annotator": lambda self, e: self._set_type( e, exp.DType.JSON if e.args.get("to_json") else exp.DType.VARCHAR ) }, exp.JSONKeysAtDepth: { "annotator": lambda self, e: self._set_type( e, exp.DataType.build("ARRAY", dialect="bigquery") ) }, exp.JSONValueArray: { "annotator": lambda self, e: self._set_type( e, exp.DataType.build("ARRAY", dialect="bigquery") ) }, exp.Lag: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "default")}, exp.ParseBignumeric: {"returns": exp.DType.BIGDECIMAL}, exp.ParseNumeric: {"returns": exp.DType.DECIMAL}, exp.SafeDivide: {"annotator": lambda self, e: _annotate_safe_divide(self, e)}, exp.ToCodePoints: { "annotator": lambda self, e: self._set_type( e, exp.DataType.build("ARRAY", dialect="bigquery") ) }, } ================================================ FILE: sqlglot/typing/clickhouse.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"returns": exp.DType.UBIGINT} for expr_type in { exp.CountIf, } }, } ================================================ FILE: sqlglot/typing/duckdb.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"returns": exp.DType.BIGINT} for expr_type in { exp.BitLength, exp.DateDiff, exp.Day, exp.DayOfMonth, exp.DayOfWeek, exp.DayOfWeekIso, exp.DayOfYear, exp.Extract, exp.Hour, exp.Length, exp.Minute, exp.Month, exp.Quarter, exp.Second, exp.Week, exp.Year, } }, **{ expr_type: {"returns": exp.DType.INT128} for expr_type in { exp.CountIf, exp.Factorial, } }, **{ expr_type: {"returns": exp.DType.DOUBLE} for expr_type in { exp.Atan2, exp.JarowinklerSimilarity, exp.TimeToUnix, } }, **{ expr_type: {"returns": exp.DType.VARCHAR} for expr_type in { exp.Format, exp.Reverse, } }, exp.DateBin: {"annotator": lambda self, e: self._annotate_by_args(e, "expression")}, exp.Localtimestamp: {"returns": exp.DType.TIMESTAMP}, exp.ToDays: {"returns": exp.DType.INTERVAL}, exp.TimeFromParts: {"returns": exp.DType.TIME}, } ================================================ FILE: sqlglot/typing/hive.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"returns": exp.DType.BINARY} for expr_type in { exp.Encode, exp.Unhex, } }, **{ expr_type: {"returns": exp.DType.DOUBLE} for expr_type in { exp.Corr, exp.MonthsBetween, } }, **{ expr_type: {"returns": exp.DType.VARCHAR} for expr_type in { exp.AddMonths, exp.CurrentDatabase, exp.CurrentUser, exp.CurrentSchema, exp.Hex, exp.NextDay, exp.Repeat, exp.Replace, exp.Soundex, } }, **{ expr_type: {"returns": exp.DType.BIGINT} for expr_type in { exp.StrToUnix, exp.Factorial, } }, **{ expr_type: {"returns": exp.DType.INT} for expr_type in { exp.Month, exp.Second, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for expr_type in { exp.ArrayDistinct, exp.ArrayExcept, exp.Reverse, } }, exp.ApproxQuantile: {"annotator": lambda self, e: self._annotate_by_args(e, "quantile")}, exp.ArrayIntersect: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")}, exp.Coalesce: { "annotator": lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True) }, exp.If: {"annotator": lambda self, e: self._annotate_by_args(e, "true", "false", promote=True)}, exp.Quantile: {"annotator": lambda self, e: self._annotate_by_args(e, "quantile")}, exp.RegexpSplit: {"returns": exp.DataType.build("ARRAY")}, } ================================================ FILE: sqlglot/typing/mysql.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"returns": exp.DType.DOUBLE} for expr_type in { exp.Atan2, } }, **{ expr_type: {"returns": exp.DType.DATETIME} for expr_type in { exp.CurrentTimestamp, exp.Localtime, } }, **{ expr_type: {"returns": exp.DType.VARCHAR} for expr_type in { exp.Elt, } }, **{ expr_type: {"returns": exp.DType.INT} for expr_type in { exp.Month, exp.Second, exp.Week, } }, } ================================================ FILE: sqlglot/typing/presto.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"returns": exp.DType.BIGINT} for expr_type in { exp.BitwiseAnd, exp.BitwiseNot, exp.BitwiseOr, exp.BitwiseXor, exp.Length, exp.Levenshtein, exp.StrPosition, exp.WidthBucket, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for expr_type in { exp.Ceil, exp.Floor, exp.Round, exp.Sign, } }, exp.Mod: {"annotator": lambda self, e: self._annotate_by_args(e, "this", "expression")}, exp.Rand: { "annotator": lambda self, e: ( self._annotate_by_args(e, "this") if e.this else self._set_type(e, exp.DType.DOUBLE) ) }, exp.MD5Digest: {"returns": exp.DType.VARBINARY}, } ================================================ FILE: sqlglot/typing/redshift.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, # Redshift's TO_TIMESTAMP returns TIMESTAMPTZ, not TIMESTAMP # https://docs.aws.amazon.com/redshift/latest/dg/r_TO_TIMESTAMP.html exp.StrToTime: {"returns": exp.DataType.Type.TIMESTAMPTZ}, } ================================================ FILE: sqlglot/typing/snowflake.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.helper import seq_get from sqlglot.typing import EXPRESSION_METADATA if t.TYPE_CHECKING: from sqlglot.optimizer.annotate_types import TypeAnnotator DATE_PARTS = {"DAY", "WEEK", "MONTH", "QUARTER", "YEAR"} MAX_PRECISION = 38 MAX_SCALE = 37 def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse: expression = self._annotate_by_args(expression, "this") if expression.is_type(exp.DType.NULL): # Snowflake treats REVERSE(NULL) as a VARCHAR self._set_type(expression, exp.DType.VARCHAR) return expression def _annotate_timestamp_from_parts( self: TypeAnnotator, expression: exp.TimestampFromParts ) -> exp.TimestampFromParts: """Annotate TimestampFromParts with correct type based on arguments. TIMESTAMP_FROM_PARTS with time_zone -> TIMESTAMPTZ TIMESTAMP_FROM_PARTS without time_zone -> TIMESTAMP (defaults to TIMESTAMP_NTZ) """ if expression.args.get("zone"): self._set_type(expression, exp.DType.TIMESTAMPTZ) else: self._set_type(expression, exp.DType.TIMESTAMP) return expression def _annotate_date_or_time_add(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: if ( expression.this.is_type(exp.DType.DATE) and expression.text("unit").upper() not in DATE_PARTS ): self._set_type(expression, exp.DType.TIMESTAMPNTZ) else: self._annotate_by_args(expression, "this") return expression def _annotate_decode_case(self: TypeAnnotator, expression: exp.DecodeCase) -> exp.DecodeCase: """Annotate DecodeCase with the type inferred from return values only. DECODE uses the format: DECODE(expr, val1, ret1, val2, ret2, ..., default) We only look at the return values (ret1, ret2, ..., default) to determine the type, not the comparison values (val1, val2, ...) or the expression being compared. """ expressions = expression.expressions # Return values are at indices 2, 4, 6, ... and the last element (if even length) # DECODE(expr, val1, ret1, val2, ret2, ..., default) return_types = [expressions[i].type for i in range(2, len(expressions), 2)] # If the total number of expressions is even, the last one is the default # Example: # DECODE(x, 1, 'a', 2, 'b') -> len=5 (odd), no default # DECODE(x, 1, 'a', 2, 'b', 'default') -> len=6 (even), has default if len(expressions) % 2 == 0: return_types.append(expressions[-1].type) # Determine the common type from all return values last_type = None for ret_type in return_types: last_type = self._maybe_coerce(last_type or ret_type, ret_type) self._set_type(expression, last_type) return expression def _annotate_arg_max_min(self, expression): self._set_type( expression, exp.DType.ARRAY if expression.args.get("count") else expression.this.type, ) return expression def _annotate_within_group(self: TypeAnnotator, expression: exp.WithinGroup) -> exp.WithinGroup: """Annotate WithinGroup with correct type based on the inner function. 1) Annotate args first 2) Check if this is PercentileDisc/PercentileCont and if so, re-annotate its type to match the ordered expression's type """ if ( isinstance(expression.this, (exp.PercentileDisc, exp.PercentileCont)) and isinstance(order_expr := expression.expression, exp.Order) and len(order_expr.expressions) == 1 and isinstance(ordered_expr := order_expr.expressions[0], exp.Ordered) ): self._set_type(expression, ordered_expr.this.type) return expression def _annotate_median(self: TypeAnnotator, expression: exp.Median) -> exp.Median: """Annotate MEDIAN function with correct return type. Based on Snowflake documentation: - If the expr is FLOAT/DOUBLE -> annotate as DOUBLE (FLOAT is a synonym for DOUBLE) - If the expr is NUMBER(p, s) -> annotate as NUMBER(min(p+3, 38), min(s+3, 37)) """ # First annotate the argument to get its type expression = self._annotate_by_args(expression, "this") # Get the input type input_type = expression.this.type if input_type.is_type(exp.DType.DOUBLE): # If input is FLOAT/DOUBLE, return DOUBLE (FLOAT is normalized to DOUBLE in Snowflake) self._set_type(expression, exp.DType.DOUBLE) else: # If input is NUMBER(p, s), return NUMBER(min(p+3, 38), min(s+3, 37)) exprs = input_type.expressions precision_expr = seq_get(exprs, 0) precision = precision_expr.this.to_py() if precision_expr else MAX_PRECISION scale_expr = seq_get(exprs, 1) scale = scale_expr.this.to_py() if scale_expr else 0 new_precision = min(precision + 3, MAX_PRECISION) new_scale = min(scale + 3, MAX_SCALE) # Build the new NUMBER type new_type = exp.DataType.build(f"NUMBER({new_precision}, {new_scale})", dialect="snowflake") self._set_type(expression, new_type) return expression def _annotate_variance(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: """Annotate variance functions (VAR_POP, VAR_SAMP, VARIANCE, VARIANCE_POP) with correct return type. Based on Snowflake behavior: - DECFLOAT -> DECFLOAT(38) - FLOAT/DOUBLE -> FLOAT - INT, NUMBER(p, 0) -> NUMBER(38, 6) - NUMBER(p, s) -> NUMBER(38, max(12, s)) """ # First annotate the argument to get its type expression = self._annotate_by_args(expression, "this") # Get the input type input_type = expression.this.type # Special case: DECFLOAT -> DECFLOAT(38) if input_type.is_type(exp.DType.DECFLOAT): self._set_type(expression, exp.DataType.build("DECFLOAT", dialect="snowflake")) # Special case: FLOAT/DOUBLE -> DOUBLE elif input_type.is_type(exp.DType.FLOAT, exp.DType.DOUBLE): self._set_type(expression, exp.DType.DOUBLE) # For NUMBER types: determine the scale else: exprs = input_type.expressions scale_expr = seq_get(exprs, 1) scale = scale_expr.this.to_py() if scale_expr else 0 # If scale is 0 (INT, BIGINT, NUMBER(p,0)): return NUMBER(38, 6) # Otherwise, Snowflake appears to assign scale through the formula MAX(12, s) new_scale = 6 if scale == 0 else max(12, scale) # Build the new NUMBER type new_type = exp.DataType.build(f"NUMBER({MAX_PRECISION}, {new_scale})", dialect="snowflake") self._set_type(expression, new_type) return expression def _annotate_kurtosis(self: TypeAnnotator, expression: exp.Kurtosis) -> exp.Kurtosis: """Annotate KURTOSIS with correct return type. Based on Snowflake behavior: - DECFLOAT input -> DECFLOAT - DOUBLE or FLOAT input -> DOUBLE - Other numeric types (INT, NUMBER) -> NUMBER(38, 12) """ expression = self._annotate_by_args(expression, "this") input_type = expression.this.type if input_type.is_type(exp.DType.DECFLOAT): self._set_type(expression, exp.DataType.build("DECFLOAT", dialect="snowflake")) elif input_type.is_type(exp.DType.FLOAT, exp.DType.DOUBLE): self._set_type(expression, exp.DType.DOUBLE) else: self._set_type( expression, exp.DataType.build(f"NUMBER({MAX_PRECISION}, 12)", dialect="snowflake") ) return expression def _annotate_math_with_float_decfloat(self: TypeAnnotator, expression: exp.Expr) -> exp.Expr: """Annotate math functions that preserve DECFLOAT but return DOUBLE for others. In Snowflake, trigonometric and exponential math functions: - If input is DECFLOAT -> return DECFLOAT - For integer types (INT, BIGINT, etc.) -> return DOUBLE - For other numeric types (NUMBER, DECIMAL, DOUBLE) -> return DOUBLE """ expression = self._annotate_by_args(expression, "this") # If input is DECFLOAT, preserve if expression.this.is_type(exp.DType.DECFLOAT): self._set_type(expression, expression.this.type) else: # For all other types (integers, decimals, etc.), return DOUBLE self._set_type(expression, exp.DType.DOUBLE) return expression def _annotate_str_to_time(self: TypeAnnotator, expression: exp.StrToTime) -> exp.StrToTime: # target_type is stored as a DataType instance target_type_arg = expression.args.get("target_type") target_type = ( target_type_arg.this if isinstance(target_type_arg, exp.DataType) else exp.DType.TIMESTAMP ) self._set_type(expression, target_type) return expression EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for expr_type in { exp.AddMonths, exp.Ceil, exp.DateTrunc, exp.Floor, exp.Left, exp.Mode, exp.Pad, exp.Right, exp.Round, exp.Stuff, exp.Substring, exp.TimeSlice, exp.TimestampTrunc, } }, **{ expr_type: {"returns": exp.DType.ARRAY} for expr_type in ( exp.ApproxTopK, exp.ApproxTopKEstimate, exp.Array, exp.ArrayAgg, exp.ArrayAppend, exp.ArrayCompact, exp.ArrayConcat, exp.ArrayConstructCompact, exp.ArrayPrepend, exp.ArrayRemove, exp.ArraysZip, exp.ArrayUniqueAgg, exp.ArrayUnionAgg, exp.MapKeys, exp.RegexpExtractAll, exp.Split, exp.StringToArray, ) }, **{ expr_type: {"returns": exp.DType.BIGINT} for expr_type in { exp.BitmapBitPosition, exp.BitmapBucketNumber, exp.BitmapCount, exp.Factorial, exp.GroupingId, exp.MD5NumberLower64, exp.MD5NumberUpper64, exp.Rand, exp.Seq8, exp.Zipf, } }, **{ expr_type: {"returns": exp.DType.BINARY} for expr_type in { exp.Base64DecodeBinary, exp.BitmapConstructAgg, exp.BitmapOrAgg, exp.Compress, exp.DecompressBinary, exp.Decrypt, exp.DecryptRaw, exp.Encrypt, exp.EncryptRaw, exp.HexString, exp.MD5Digest, exp.SHA1Digest, exp.SHA2Digest, exp.ToBinary, exp.TryBase64DecodeBinary, exp.TryHexDecodeBinary, exp.Unhex, } }, **{ expr_type: {"returns": exp.DType.BOOLEAN} for expr_type in { exp.Booland, exp.Boolnot, exp.Boolor, exp.BoolxorAgg, exp.EqualNull, exp.IsNullValue, exp.MapContainsKey, exp.Search, exp.SearchIp, exp.ToBoolean, } }, **{ expr_type: {"returns": exp.DType.DATE} for expr_type in { exp.NextDay, exp.PreviousDay, } }, **{ expr_type: { "annotator": lambda self, e: self._set_type( e, exp.DataType.build("NUMBER", dialect="snowflake") ) } for expr_type in ( exp.BitwiseAndAgg, exp.BitwiseOrAgg, exp.BitwiseXorAgg, exp.RegexpCount, exp.RegexpInstr, exp.ToNumber, ) }, **{ expr_type: {"returns": exp.DType.DOUBLE} for expr_type in { exp.ApproxPercentileEstimate, exp.ApproximateSimilarity, exp.CosineDistance, exp.CovarPop, exp.CovarSamp, exp.DotProduct, exp.EuclideanDistance, exp.ManhattanDistance, exp.MonthsBetween, exp.Normal, } }, exp.Kurtosis: {"annotator": _annotate_kurtosis}, **{ expr_type: {"returns": exp.DType.DECFLOAT} for expr_type in { exp.ToDecfloat, exp.TryToDecfloat, } }, **{ expr_type: {"annotator": _annotate_math_with_float_decfloat} for expr_type in { exp.Acos, exp.Asin, exp.Atan, exp.Atan2, exp.Cbrt, exp.Cos, exp.Cot, exp.Degrees, exp.Exp, exp.Ln, exp.Log, exp.Pow, exp.Radians, exp.RegrAvgx, exp.RegrAvgy, exp.RegrCount, exp.RegrIntercept, exp.RegrR2, exp.RegrSlope, exp.RegrSxx, exp.RegrSxy, exp.RegrSyy, exp.RegrValx, exp.RegrValy, exp.Sin, exp.Sqrt, exp.Tan, exp.Tanh, } }, **{ expr_type: {"returns": exp.DType.INT} for expr_type in { exp.ByteLength, exp.Grouping, exp.JarowinklerSimilarity, exp.MapSize, exp.Minute, exp.RtrimmedLength, exp.Second, exp.Seq1, exp.Seq2, exp.Seq4, exp.WidthBucket, } }, **{ expr_type: {"returns": exp.DType.OBJECT} for expr_type in { exp.ApproxPercentileAccumulate, exp.ApproxPercentileCombine, exp.ApproxTopKAccumulate, exp.ApproxTopKCombine, exp.ObjectAgg, exp.ParseIp, exp.ParseUrl, exp.XMLGet, } }, **{ expr_type: {"returns": exp.DType.MAP} for expr_type in { exp.MapCat, exp.MapDelete, exp.MapInsert, exp.MapPick, } }, **{ expr_type: {"returns": exp.DType.FILE} for expr_type in { exp.ToFile, } }, **{ expr_type: {"returns": exp.DType.TIME} for expr_type in { exp.TimeFromParts, exp.TsOrDsToTime, } }, **{ expr_type: {"returns": exp.DType.TIMESTAMPLTZ} for expr_type in { exp.CurrentTimestamp, exp.Localtimestamp, } }, **{ expr_type: {"returns": exp.DType.TINYINT} for expr_type in { exp.DayOfMonth, exp.DayOfWeek, exp.DayOfYear, exp.Quarter, } }, **{ expr_type: {"returns": exp.DType.VARCHAR} for expr_type in { exp.AIAgg, exp.AIClassify, exp.AISummarizeAgg, exp.Base64DecodeString, exp.Base64Encode, exp.CheckJson, exp.CheckXml, exp.Collate, exp.Collation, exp.CurrentAccount, exp.CurrentAccountName, exp.CurrentAvailableRoles, exp.CurrentClient, exp.CurrentDatabase, exp.CurrentIpAddress, exp.CurrentSchemas, exp.CurrentSecondaryRoles, exp.CurrentSession, exp.CurrentStatement, exp.CurrentTransaction, exp.CurrentWarehouse, exp.CurrentOrganizationUser, exp.CurrentRegion, exp.CurrentRole, exp.CurrentRoleType, exp.CurrentOrganizationName, exp.DecompressString, exp.HexDecodeString, exp.HexEncode, exp.Randstr, exp.RegexpExtract, exp.RegexpReplace, exp.Repeat, exp.Replace, exp.Soundex, exp.SoundexP123, exp.SplitPart, exp.Strtok, exp.TryBase64DecodeString, exp.TryHexDecodeString, exp.Uuid, } }, **{ expr_type: {"returns": exp.DType.VARIANT} for expr_type in { exp.Minhash, exp.MinhashCombine, } }, **{ expr_type: {"annotator": _annotate_variance} for expr_type in ( exp.Variance, exp.VariancePop, ) }, exp.ArgMax: {"annotator": _annotate_arg_max_min}, exp.ArgMin: {"annotator": _annotate_arg_max_min}, exp.ConcatWs: {"annotator": lambda self, e: self._annotate_by_args(e, "expressions")}, exp.ConvertTimezone: { "annotator": lambda self, e: self._set_type( e, exp.DType.TIMESTAMPNTZ if e.args.get("source_tz") else exp.DType.TIMESTAMPTZ, ) }, exp.DateAdd: {"annotator": _annotate_date_or_time_add}, exp.DecodeCase: {"annotator": _annotate_decode_case}, exp.HashAgg: { "annotator": lambda self, e: self._set_type( e, exp.DataType.build("NUMBER(19, 0)", dialect="snowflake") ) }, exp.Median: {"annotator": _annotate_median}, exp.Reverse: {"annotator": _annotate_reverse}, exp.StrToTime: {"annotator": _annotate_str_to_time}, exp.TimeAdd: {"annotator": _annotate_date_or_time_add}, exp.TimestampFromParts: {"annotator": _annotate_timestamp_from_parts}, exp.WithinGroup: {"annotator": _annotate_within_group}, } ================================================ FILE: sqlglot/typing/spark.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing.spark2 import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ exp_type: {"returns": exp.DType.DOUBLE} for exp_type in { exp.Sec, } }, **{ exp_type: {"returns": exp.DType.INT} for exp_type in { exp.ArraySize, } }, **{ exp_type: {"returns": exp.DType.VARCHAR} for exp_type in { exp.Collation, exp.CurrentTimezone, exp.Randstr, } }, **{ exp_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for exp_type in { exp.ArrayCompact, exp.ArrayInsert, exp.BitwiseAndAgg, exp.BitwiseOrAgg, exp.BitwiseXorAgg, exp.Overlay, } }, exp.BitmapCount: {"returns": exp.DType.BIGINT}, exp.Localtimestamp: {"returns": exp.DType.TIMESTAMPNTZ}, exp.ToBinary: {"returns": exp.DType.BINARY}, exp.DateFromUnixDate: {"returns": exp.DType.DATE}, } ================================================ FILE: sqlglot/typing/spark2.py ================================================ from __future__ import annotations import typing as t from sqlglot import exp from sqlglot.helper import ensure_list from sqlglot.typing.hive import EXPRESSION_METADATA as HIVE_EXPRESSION_METADATA if t.TYPE_CHECKING: from sqlglot._typing import E from sqlglot.optimizer.annotate_types import TypeAnnotator from sqlglot.typing import ExprMetadataType def _annotate_by_similar_args( self: TypeAnnotator, expression: E, *args: str, target_type: exp.DataType | exp.DType ) -> E: """ Infers the type of the expression according to the following rules: - If all args are of the same type OR any arg is of target_type, the expr is inferred as such - If any arg is of UNKNOWN type and none of target_type, the expr is inferred as UNKNOWN """ expressions: t.List[exp.Expr] = [] for arg in args: arg_expr = expression.args.get(arg) expressions.extend(expr for expr in ensure_list(arg_expr) if expr) last_datatype = None has_unknown = False for expr in expressions: if expr.is_type(exp.DType.UNKNOWN): has_unknown = True elif expr.is_type(target_type): has_unknown = False last_datatype = target_type break else: last_datatype = expr.type self._set_type(expression, exp.DType.UNKNOWN if has_unknown else last_datatype) return expression EXPRESSION_METADATA: ExprMetadataType = { **HIVE_EXPRESSION_METADATA, **{ expr_type: {"returns": exp.DType.DOUBLE} for expr_type in { exp.Atan2, exp.Randn, } }, **{ exp_type: {"returns": exp.DType.VARCHAR} for exp_type in { exp.Format, exp.Right, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for expr_type in { exp.ArrayFilter, exp.Substring, } }, exp.AddMonths: {"returns": exp.DType.DATE}, exp.ApproxQuantile: { "annotator": lambda self, e: self._annotate_by_args( e, "this", array=e.args["quantile"].is_type(exp.DType.ARRAY) ) }, exp.AtTimeZone: {"returns": exp.DType.TIMESTAMP}, exp.Concat: { "annotator": lambda self, e: _annotate_by_similar_args( self, e, "expressions", target_type=exp.DType.TEXT ) }, exp.NextDay: {"returns": exp.DType.DATE}, exp.Pad: { "annotator": lambda self, e: _annotate_by_similar_args( self, e, "this", "fill_pattern", target_type=exp.DType.TEXT ) }, } ================================================ FILE: sqlglot/typing/tsql.py ================================================ from __future__ import annotations from sqlglot import exp from sqlglot.typing import EXPRESSION_METADATA EXPRESSION_METADATA = { **EXPRESSION_METADATA, **{ expr_type: {"returns": exp.DType.FLOAT} for expr_type in { exp.Acos, exp.Asin, exp.Atan, exp.Atan2, exp.Cos, exp.Cot, exp.Sin, exp.Tan, } }, **{ expr_type: {"returns": exp.DType.VARCHAR} for expr_type in { exp.Soundex, exp.Stuff, } }, **{ expr_type: {"annotator": lambda self, e: self._annotate_by_args(e, "this")} for expr_type in { exp.Degrees, exp.Radians, } }, exp.CurrentTimezone: {"returns": exp.DType.NVARCHAR}, exp.CurrentTimestamp: {"returns": exp.DType.DATETIME}, } ================================================ FILE: sqlglotc/MANIFEST.in ================================================ include pyproject.toml include setup.py recursive-include sqlglot *.py ================================================ FILE: sqlglotc/pyproject.toml ================================================ [project] name = "sqlglotc" dynamic = ["version"] description = "mypyc-compiled extensions for sqlglot" authors = [{ name = "Toby Mao", email = "toby.mao@gmail.com" }] license = "MIT" requires-python = ">= 3.9" [project.optional-dependencies] dev = ["setuptools >= 61.0", "setuptools_scm", "sqlglot-mypy>=1.19.1.post1"] [project.urls] Homepage = "https://sqlglot.com/" Repository = "https://github.com/tobymao/sqlglot" [build-system] requires = ["setuptools >= 61.0", "setuptools_scm", "sqlglot-mypy>=1.19.1.post1", "types-python-dateutil", "sqlglot"] build-backend = "setuptools.build_meta" [tool.setuptools] include-package-data = false [tool.setuptools_scm] root = ".." fallback_version = "0.0.0" local_scheme = "no-local-version" [[tool.mypy.overrides]] module = ["sqlglot._version", "sqlglotc"] ignore_missing_imports = true ================================================ FILE: sqlglotc/setup.py ================================================ import os import shutil from setuptools import setup from setuptools.command.build_ext import build_ext as _build_ext from setuptools.command.sdist import sdist as _sdist from mypyc.build import mypycify SQLGLOT_SRC = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "sqlglot") def _find_sqlglot_dir(): """Find the sqlglot source directory: repo source, or installed package. When the installed package is in site-packages, copy it to a clean temp directory so mypy doesn't discover unrelated modules (mypy_extensions, typing_extensions) that cause shadowing errors. """ if os.path.isdir(SQLGLOT_SRC): return SQLGLOT_SRC # Fall back to the installed sqlglot package (build dependency). import sqlglot import tempfile installed = os.path.dirname(sqlglot.__file__) tmp = tempfile.mkdtemp(prefix="sqlglotc_build_") dst = os.path.join(tmp, "sqlglot") shutil.copytree(installed, dst) return dst def _subpkg_files(src_dir, subpkg, files=None): """List source files from a sqlglot subpackage. Compiles all .py files if `files` is None.""" if files is None: files = sorted( f for f in os.listdir(os.path.join(src_dir, subpkg)) if f.endswith(".py") and f != "__init__.py" ) return [os.path.join(subpkg, f) for f in files] def _source_files(src_dir): return [ "errors.py", "helper.py", "parser.py", "schema.py", "serde.py", "time.py", "tokenizer_core.py", "trie.py", *_subpkg_files(src_dir, "expressions"), *_subpkg_files( src_dir, "optimizer", [ "scope.py", "resolver.py", "isolate_table_selects.py", "normalize_identifiers.py", "qualify.py", "qualify_tables.py", "qualify_columns.py", ], ), *_subpkg_files(src_dir, "parsers"), *_subpkg_files(src_dir, "executor", ["table.py"]), ] SRC_DIR = _find_sqlglot_dir() SOURCE_FILES = _source_files(SRC_DIR) # Set MYPYPATH to the parent of the sqlglot source so mypy resolves # `import sqlglot` from there — not from site-packages where # mypy_extensions.py / typing_extensions.py can cause shadowing errors. os.environ["MYPYPATH"] = os.path.dirname(SRC_DIR) def _source_paths(): return [os.path.join(SRC_DIR, f) for f in SOURCE_FILES] class build_ext(_build_ext): def copy_extensions_to_source(self): """For editable installs, put sqlglot.* .so files in the sqlglot source dir.""" for ext in self.extensions: fullname = self.get_ext_fullname(ext.name) filename = self.get_ext_filename(fullname) src = os.path.join(self.build_lib, filename) parts = fullname.split(".") if parts[0] == "sqlglot" and os.path.isdir(SQLGLOT_SRC): # Place compiled sqlglot.* / sqlglot.sub.* modules in the sqlglot source tree. sub_module = ".".join(parts[1:]) dst = os.path.join(SQLGLOT_SRC, self.get_ext_filename(sub_module)) else: # Place the mypyc runtime helper (e.g., HASH__mypyc) inside sqlglot/. # sqlglot/__init__.py bootstraps it into sys.modules for editable installs. dst = os.path.join(SQLGLOT_SRC, os.path.basename(filename)) self.copy_file(src, dst, level=self.verbose) class sdist(_sdist): """Bundle sqlglot source files into the sdist as a fallback.""" def run(self): local_sqlglot = os.path.join(os.path.dirname(os.path.abspath(__file__)), "sqlglot") os.makedirs(local_sqlglot, exist_ok=True) subpkgs = {os.path.dirname(f) for f in SOURCE_FILES if os.path.dirname(f)} for subpkg in subpkgs: pkg_dir = os.path.join(local_sqlglot, subpkg) os.makedirs(pkg_dir, exist_ok=True) for fname in SOURCE_FILES: dst_path = os.path.join(local_sqlglot, fname) os.makedirs(os.path.dirname(dst_path), exist_ok=True) shutil.copy2(os.path.join(SQLGLOT_SRC, fname), dst_path) try: super().run() finally: shutil.rmtree(local_sqlglot, ignore_errors=True) setup( name="sqlglotc", packages=[], ext_modules=mypycify(_source_paths(), opt_level=os.environ.get("MYPYC_OPT", "2")), cmdclass={"build_ext": build_ext, "sdist": sdist}, ) ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/dialects/__init__.py ================================================ ================================================ FILE: tests/dialects/test_athena.py ================================================ from sqlglot import exp from tests.dialects.test_dialect import Validator class TestAthena(Validator): dialect = "athena" maxDiff = None def test_athena(self): self.validate_identity(r"SELECT '\d+'") self.validate_identity("SELECT 'foo''bar'") self.validate_identity( "CREATE TABLE IF NOT EXISTS t (name STRING) LOCATION 's3://bucket/tmp/mytable/' TBLPROPERTIES ('table_type'='iceberg', 'FORMAT'='parquet')" ) self.validate_identity( "UNLOAD (SELECT name1, address1, comment1, key1 FROM table1) " "TO 's3://amzn-s3-demo-bucket/ partitioned/' " "WITH (format = 'TEXTFILE', partitioned_by = ARRAY['key1'])", check_command_warning=True, ) self.validate_identity( """USING EXTERNAL FUNCTION some_function(input VARBINARY) RETURNS VARCHAR LAMBDA 'some-name' SELECT some_function(1)""", check_command_warning=True, ) self.validate_identity( "/* leading comment */CREATE SCHEMA foo", "/* leading comment */ CREATE SCHEMA `foo`", identify=True, ) self.validate_identity( "/* leading comment */SELECT * FROM foo", '/* leading comment */ SELECT * FROM "foo"', identify=True, ) def test_ddl(self): # Hive-like, https://docs.aws.amazon.com/athena/latest/ug/create-table.html self.validate_identity("CREATE EXTERNAL TABLE foo (id INT) COMMENT 'test comment'") self.validate_identity( r"CREATE EXTERNAL TABLE george.t (id INT COMMENT 'foo \\ bar') LOCATION 's3://my-bucket/'" ) self.validate_identity( r"CREATE EXTERNAL TABLE my_table (id BIGINT COMMENT 'this is the row\'s id') LOCATION 's3://my-s3-bucket'" ) self.validate_identity( "CREATE EXTERNAL TABLE foo (id INT, val STRING) CLUSTERED BY (id, val) INTO 10 BUCKETS" ) self.validate_identity( "CREATE EXTERNAL TABLE foo (id INT, val STRING) STORED AS PARQUET LOCATION 's3://foo' TBLPROPERTIES ('has_encryped_data'='true', 'classification'='test')" ) self.validate_identity( "CREATE EXTERNAL TABLE IF NOT EXISTS foo (a INT, b STRING) ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' WITH SERDEPROPERTIES ('case.insensitive'='FALSE') LOCATION 's3://table/path'" ) self.validate_identity( """CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""", ) self.validate_identity( """CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')""" ) # Iceberg, https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html self.validate_identity( "CREATE TABLE iceberg_table (`id` BIGINT, `data` STRING, category STRING) PARTITIONED BY (category, BUCKET(16, id)) LOCATION 's3://amzn-s3-demo-bucket/your-folder/' TBLPROPERTIES ('table_type'='ICEBERG', 'write_compression'='snappy')" ) self.validate_identity( "CREATE OR REPLACE TABLE iceberg_table (`id` BIGINT, `data` STRING, category STRING) PARTITIONED BY (category, BUCKET(16, id)) LOCATION 's3://amzn-s3-demo-bucket/your-folder/' TBLPROPERTIES ('table_type'='ICEBERG', 'write_compression'='snappy')" ) # CTAS goes to the Trino engine, where the table properties cant be encased in single quotes like they can for Hive # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties # They're also case sensitive and need to be lowercase, otherwise you get eg "Table properties [FORMAT] are not supported." self.validate_identity( "CREATE TABLE foo WITH (table_type='ICEBERG', location='s3://foo/', format='orc', partitioning=ARRAY['bucket(id, 5)']) AS SELECT * FROM a" ) self.validate_identity( "CREATE TABLE foo WITH (table_type='HIVE', external_location='s3://foo/', format='parquet', partitioned_by=ARRAY['ds']) AS SELECT * FROM a" ) self.validate_identity( "CREATE TABLE foo AS WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo" ) # ALTER TABLE ADD COLUMN not supported, it needs to be generated as ALTER TABLE ADD COLUMNS self.validate_identity( "ALTER TABLE `foo`.`bar` ADD COLUMN `end_ts` BIGINT", "ALTER TABLE `foo`.`bar` ADD COLUMNS (`end_ts` BIGINT)", ) self.validate_identity("ALTER TABLE `foo` DROP COLUMN `id`") def test_dml(self): self.validate_all( "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)", read={"": "SELECT CAST(ds AS STRING) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)"}, write={ "hive": "SELECT CAST(ds AS STRING) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)", "trino": "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)", "athena": "SELECT CAST(ds AS VARCHAR) AS ds FROM (VALUES ('2022-01-01')) AS t(ds)", }, ) def test_ddl_quoting(self): self.validate_identity("CREATE SCHEMA `foo`") self.validate_identity("CREATE SCHEMA foo") self.validate_identity("CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'") self.validate_identity("CREATE EXTERNAL TABLE foo (id INT) LOCATION 's3://foo/'") self.validate_identity( "CREATE EXTERNAL TABLE foo (id INT) LOCATION 's3://foo/'", "CREATE EXTERNAL TABLE `foo` (`id` INT) LOCATION 's3://foo/'", identify=True, ) self.validate_identity("CREATE TABLE foo AS SELECT * FROM a") self.validate_identity('CREATE TABLE "foo" AS SELECT * FROM "a"') self.validate_identity('DROP VIEW IF EXISTS "foo"."bar"') self.validate_identity('CREATE VIEW "foo" AS SELECT "id" FROM "tbl"') self.validate_identity( "CREATE VIEW foo AS SELECT id FROM tbl", 'CREATE VIEW "foo" AS SELECT "id" FROM "tbl"', identify=True, ) self.validate_identity("DROP TABLE `foo`") self.validate_identity("DROP TABLE foo") self.validate_identity( "DROP TABLE foo", "DROP TABLE `foo`", identify=True, ) self.validate_identity('CREATE VIEW "foo" AS SELECT "id" FROM "tbl"') self.validate_identity("CREATE VIEW foo AS SELECT id FROM tbl") self.validate_identity( "CREATE VIEW foo AS SELECT id FROM tbl", 'CREATE VIEW "foo" AS SELECT "id" FROM "tbl"', identify=True, ) # As a side effect of being able to parse both quote types, we can also fix the quoting on incorrectly quoted source queries self.validate_identity('CREATE SCHEMA "foo"', "CREATE SCHEMA `foo`") self.validate_identity('DROP TABLE "foo"', "DROP TABLE `foo`") self.validate_identity( "DESCRIBE foo.bar", "DESCRIBE `foo`.`bar`", identify=True, ) self.validate_identity( 'CREATE TABLE "foo" AS WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"' ) def test_dml_quoting(self): self.validate_identity("SELECT a AS foo FROM tbl") self.validate_identity('SELECT "a" AS "foo" FROM "tbl"') self.validate_identity("INSERT INTO foo (id) VALUES (1)") self.validate_identity('INSERT INTO "foo" ("id") VALUES (1)') self.validate_identity("UPDATE foo SET id = 3 WHERE id = 7") self.validate_identity('UPDATE "foo" SET "id" = 3 WHERE "id" = 7') self.validate_identity("DELETE FROM foo WHERE id > 10") self.validate_identity('DELETE FROM "foo" WHERE "id" > 10') self.validate_identity("WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo") self.validate_identity( "WITH foo AS (SELECT a, b FROM bar) SELECT * FROM foo", 'WITH "foo" AS (SELECT "a", "b" FROM "bar") SELECT * FROM "foo"', identify=True, ) def test_create_table(self): # There are two CREATE TABLE syntaxes # Both hit Athena's Hive engine but creating an Iceberg table is different from creating a normal Hive table table_schema = exp.Schema( this=exp.to_table("foo.bar"), expressions=[ exp.ColumnDef(this=exp.to_identifier("a"), kind=exp.DataType.build("int")), exp.ColumnDef(this=exp.to_identifier("b"), kind=exp.DataType.build("varchar")), ], ) # Hive tables - CREATE EXTERNAL TABLE ct_hive = exp.Create( this=table_schema, kind="TABLE", properties=exp.Properties( expressions=[ exp.ExternalProperty(), exp.FileFormatProperty(this=exp.Literal.string("parquet")), exp.LocationProperty(this=exp.Literal.string("s3://foo")), exp.PartitionedByProperty( this=exp.Schema(expressions=[exp.to_column("partition_col")]) ), ] ), ) self.assertEqual( ct_hive.sql(dialect=self.dialect, identify=True), "CREATE EXTERNAL TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`)", ) # Iceberg tables - CREATE TABLE... TBLPROPERTIES ('table_type'='iceberg') # no EXTERNAL keyword and the 'table_type=iceberg' property must be set # ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning ct_iceberg = exp.Create( this=table_schema, kind="TABLE", properties=exp.Properties( expressions=[ exp.FileFormatProperty(this=exp.Literal.string("parquet")), exp.LocationProperty(this=exp.Literal.string("s3://foo")), exp.PartitionedByProperty( this=exp.Schema( expressions=[ exp.to_column("partition_col"), exp.PartitionedByBucket( this=exp.to_column("a"), expression=exp.Literal.number(4) ), ] ) ), exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")), ] ), ) self.assertEqual( ct_iceberg.sql(dialect=self.dialect, identify=True), "CREATE TABLE `foo`.`bar` (`a` INT, `b` STRING) STORED AS PARQUET LOCATION 's3://foo' PARTITIONED BY (`partition_col`, BUCKET(4, `a`)) TBLPROPERTIES ('table_type'='iceberg')", ) def test_ctas(self): # Hive tables use 'external_location' to specify the table location, Iceberg tables use 'location' to specify the table location # In addition, Hive tables used 'partitioned_by' to specify the partition fields and Iceberg tables use 'partitioning' to specify the partition fields # The 'table_type' property is used to determine if it's a Hive or an Iceberg table. If it's omitted, it defaults to Hive # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties ctas_hive = exp.Create( this=exp.to_table("foo.bar"), kind="TABLE", properties=exp.Properties( expressions=[ exp.FileFormatProperty(this=exp.Literal.string("parquet")), exp.LocationProperty(this=exp.Literal.string("s3://foo")), exp.PartitionedByProperty( this=exp.Schema(expressions=[exp.to_column("partition_col", quoted=True)]) ), ] ), expression=exp.select("1"), ) # Even if identify=True, the column names should not be quoted within the string literals in the partitioned_by ARRAY[] self.assertEqual( ctas_hive.sql(dialect=self.dialect, identify=True), "CREATE TABLE \"foo\".\"bar\" WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1", ) self.assertEqual( ctas_hive.sql(dialect=self.dialect, identify=False), "CREATE TABLE foo.bar WITH (format='parquet', external_location='s3://foo', partitioned_by=ARRAY['partition_col']) AS SELECT 1", ) ctas_iceberg = exp.Create( this=exp.to_table("foo.bar"), kind="TABLE", properties=exp.Properties( expressions=[ exp.Property(this=exp.var("table_type"), value=exp.Literal.string("iceberg")), exp.LocationProperty(this=exp.Literal.string("s3://foo")), exp.PartitionedByProperty( this=exp.Schema( expressions=[ exp.to_column("partition_col"), exp.PartitionedByBucket( this=exp.to_column("a", quoted=True), expression=exp.Literal.number(4), ), ] ) ), ] ), expression=exp.select("1"), ) # Even if identify=True, the column names should not be quoted within the string literals in the partitioning ARRAY[] # Technically Trino's Iceberg connector does support quoted column names in the string literals but its undocumented # so we dont do it to keep consistency with the Hive connector self.assertEqual( ctas_iceberg.sql(dialect=self.dialect, identify=True), "CREATE TABLE \"foo\".\"bar\" WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1", ) self.assertEqual( ctas_iceberg.sql(dialect=self.dialect, identify=False), "CREATE TABLE foo.bar WITH (table_type='iceberg', location='s3://foo', partitioning=ARRAY['partition_col', 'BUCKET(a, 4)']) AS SELECT 1", ) def test_parse_partitioned_by_returns_iceberg_transforms(self): # check that parse_into works for PartitionedByProperty and also that correct AST nodes are emitted for Iceberg transforms parsed = self.parse_one( "(a, bucket(4, b), truncate(3, c), month(d))", into=exp.PartitionedByProperty ) assert isinstance(parsed, exp.PartitionedByProperty) assert isinstance(parsed.this, exp.Schema) assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionedByBucket)) assert next(n for n in parsed.this.expressions if isinstance(n, exp.PartitionByTruncate)) ================================================ FILE: tests/dialects/test_bigquery.py ================================================ from unittest import mock import datetime import pytz from sqlglot import ( ErrorLevel, ParseError, TokenError, UnsupportedError, exp, parse, transpile, parse_one, ) from sqlglot.helper import logger as helper_logger from sqlglot.parser import logger as parser_logger from tests.dialects.test_dialect import Validator from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.optimizer.qualify import qualify class TestBigQuery(Validator): dialect = "bigquery" maxDiff = None def test_bigquery(self): for prefix in ("c.db.", "db.", ""): with self.subTest(f"Parsing {prefix}INFORMATION_SCHEMA.X into a Table"): table = self.parse_one(f"`{prefix}INFORMATION_SCHEMA.X`", into=exp.Table) this = table.this self.assertIsInstance(this, exp.Identifier) self.assertTrue(this.quoted) self.assertEqual(this.name, "INFORMATION_SCHEMA.X") table = self.parse_one("x-0._y.z", into=exp.Table) self.assertEqual(table.catalog, "x-0") self.assertEqual(table.db, "_y") self.assertEqual(table.name, "z") table = self.parse_one("x-0._y", into=exp.Table) self.assertEqual(table.db, "x-0") self.assertEqual(table.name, "_y") self.validate_identity("SAFE.SOME_RANDOM_FUNC(a, b, c)").assert_is(exp.SafeFunc) self.validate_identity( "SAFE.SUBSTR('foo', 0, -2)", ).assert_is(exp.SafeFunc).this.assert_is(exp.Substring) self.validate_identity("SAFE.TIMESTAMP(foo, zone)").assert_is(exp.SafeFunc).this.assert_is( exp.Timestamp ) self.validate_identity( "SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15')", "SAFE.PARSE_DATE('%F', '2024-01-15')", ).assert_is(exp.SafeFunc).this.assert_is(exp.StrToDate) self.validate_identity( "SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')", "SAFE.PARSE_DATETIME('%F %T', '2024-01-15 10:30:00')", ).assert_is(exp.SafeFunc).this.assert_is(exp.ParseDatetime) self.validate_identity( "SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00')", "SAFE.PARSE_TIMESTAMP('%F %T', '2024-01-15 10:30:00')", ).assert_is(exp.SafeFunc).this.assert_is(exp.StrToTime) self.validate_identity("TIMESTAMP(foo, zone)").assert_is(exp.Timestamp) self.validate_identity("SELECT * FROM x-0.y") self.assertEqual(exp.to_table("`a.b`.`c.d`", dialect="bigquery").sql(), '"a"."b"."c"."d"') self.assertEqual(exp.to_table("`x`.`y.z`", dialect="bigquery").sql(), '"x"."y"."z"') self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql(), '"x"."y"."z"') self.assertEqual(exp.to_table("`x.y.z`", dialect="bigquery").sql("bigquery"), "`x.y.z`") self.assertEqual(exp.to_table("`x`.`y`", dialect="bigquery").sql("bigquery"), "`x`.`y`") column = self.validate_identity("SELECT `db.t`.`c` FROM `db.t`").selects[0] self.assertEqual(len(column.parts), 3) select_with_quoted_udf = self.validate_identity("SELECT `p.d.UdF`(data) FROM `p.d.t`") self.assertEqual(select_with_quoted_udf.selects[0].name, "p.d.UdF") self.validate_identity("SELECT EXP(1)") self.validate_identity("NET.HOST('http://example.com')").assert_is( exp.NetFunc ).this.assert_is(exp.Host) self.validate_identity("NET.REG_DOMAIN('http://example.com')").assert_is( exp.NetFunc ).this.assert_is(exp.RegDomain) self.validate_identity("DATE_TRUNC(x, @foo)").unit.assert_is(exp.Parameter) self.validate_identity("ARRAY_CONCAT_AGG(x ORDER BY ARRAY_LENGTH(x) LIMIT 2)") self.validate_identity("ARRAY_CONCAT_AGG(x LIMIT 2)") self.validate_identity("ARRAY_CONCAT_AGG(x ORDER BY ARRAY_LENGTH(x))") self.validate_identity("ARRAY_CONCAT_AGG(x)") self.validate_identity("PARSE_TIMESTAMP('%FT%H:%M:%E*S%z', x)") self.validate_identity("SELECT ARRAY_CONCAT([1])") self.validate_identity("SELECT * FROM READ_CSV('bla.csv')") self.validate_identity("CAST(x AS STRUCT>)") self.validate_identity("assert.true(1 = 1)") self.validate_identity("SELECT jsondoc['some_key']") self.validate_identity("SELECT `p.d.UdF`(data).* FROM `p.d.t`") self.validate_identity("SELECT * FROM `my-project.my-dataset.my-table`") self.validate_identity("CREATE OR REPLACE TABLE `a.b.c` CLONE `a.b.d`") self.validate_identity("SELECT x, 1 AS y GROUP BY 1 ORDER BY 1") self.validate_identity("SELECT * FROM x.*") self.validate_identity("SELECT * FROM x.y*") self.validate_identity("CASE A WHEN 90 THEN 'red' WHEN 50 THEN 'blue' ELSE 'green' END") self.validate_identity("CREATE SCHEMA x DEFAULT COLLATE 'en'") self.validate_identity("CREATE TABLE x (y INT64) DEFAULT COLLATE 'en'") self.validate_identity("PARSE_JSON('{}', wide_number_mode => 'exact')") self.validate_identity("FOO(values)") self.validate_identity("STRUCT(values AS value)") self.validate_identity("SELECT SEARCH(data_to_search, 'search_query')") self.validate_identity( "SELECT SEARCH(data_to_search, 'search_query', json_scope => 'JSON_KEYS_AND_VALUES')" ) self.validate_identity( "SELECT SEARCH(data_to_search, 'search_query', analyzer => 'PATTERN_ANALYZER')" ) self.validate_identity( "SELECT SEARCH(data_to_search, 'search_query', analyzer_options => 'analyzer_options_values')" ) self.validate_identity( "SELECT SEARCH(data_to_search, 'search_query', json_scope => 'JSON_VALUES', analyzer => 'LOG_ANALYZER')" ) self.validate_identity( "SELECT SEARCH(data_to_search, 'search_query', analyzer => 'PATTERN_ANALYZER', analyzer_options => 'options')" ) self.validate_identity("ARRAY_AGG(x IGNORE NULLS LIMIT 1)") self.validate_identity("ARRAY_AGG(x IGNORE NULLS ORDER BY x LIMIT 1)") self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY x LIMIT 1)") self.validate_identity("ARRAY_AGG(x IGNORE NULLS)") self.validate_identity("ARRAY_AGG(DISTINCT x IGNORE NULLS HAVING MAX x ORDER BY x LIMIT 1)") self.validate_identity("SELECT * FROM dataset.my_table TABLESAMPLE SYSTEM (10 PERCENT)") self.validate_identity("TIME('2008-12-25 15:30:00+08')") self.validate_identity("TIME('2008-12-25 15:30:00+08', 'America/Los_Angeles')") self.validate_identity(r"SELECT '\n\r\a\v\f\t'") self.validate_identity("SELECT * FROM tbl FOR SYSTEM_TIME AS OF z") self.validate_identity("SELECT PARSE_TIMESTAMP('%c', 'Thu Dec 25 07:30:00 2008', 'UTC')") self.validate_identity("SELECT ANY_VALUE(fruit HAVING MAX sold) FROM fruits") self.validate_identity("SELECT ANY_VALUE(fruit HAVING MIN sold) FROM fruits") self.validate_all( "SELECT ANY_VALUE(fruit HAVING MAX sold) FROM Store", write={ "bigquery": "SELECT ANY_VALUE(fruit HAVING MAX sold) FROM Store", "duckdb": "SELECT ARG_MAX_NULL(fruit, sold) FROM Store", }, ) self.validate_all( "SELECT ANY_VALUE(fruit HAVING MIN sold) FROM Store", write={ "bigquery": "SELECT ANY_VALUE(fruit HAVING MIN sold) FROM Store", "duckdb": "SELECT ARG_MIN_NULL(fruit, sold) FROM Store", }, ) self.validate_all( "SELECT category, ANY_VALUE(product HAVING MAX price), ANY_VALUE(product HAVING MIN cost), ANY_VALUE(supplier) FROM products GROUP BY category", write={ "bigquery": "SELECT category, ANY_VALUE(product HAVING MAX price), ANY_VALUE(product HAVING MIN cost), ANY_VALUE(supplier) FROM products GROUP BY category", "duckdb": "SELECT category, ARG_MAX_NULL(product, price), ARG_MIN_NULL(product, cost), ANY_VALUE(supplier) FROM products GROUP BY category", }, ) self.validate_all( 'WITH data AS (SELECT "A" AS fruit, 20 AS sold UNION ALL SELECT NULL AS fruit, 25 AS sold) SELECT ANY_VALUE(fruit HAVING MAX sold) FROM data', write={ "duckdb": "WITH data AS (SELECT 'A' AS fruit, 20 AS sold UNION ALL SELECT NULL AS fruit, 25 AS sold) SELECT ARG_MAX_NULL(fruit, sold) FROM data", }, ) self.validate_identity("SELECT `project-id`.udfs.func(call.dir)") self.validate_identity("SELECT CAST(CURRENT_DATE AS STRING FORMAT 'DAY') AS current_day") self.validate_identity("SAFE_CAST(encrypted_value AS STRING FORMAT 'BASE64')") self.validate_identity("CAST(encrypted_value AS STRING FORMAT 'BASE64')") self.validate_identity("DATE(2016, 12, 25)") self.validate_identity("DATE(CAST('2016-12-25 23:59:59' AS DATETIME))") self.validate_identity("SELECT foo IN UNNEST(bar) AS bla") self.validate_identity("SELECT * FROM x-0.a") self.validate_identity("SELECT * FROM pivot CROSS JOIN foo") self.validate_identity("SAFE_CAST(x AS STRING)") self.validate_identity("SELECT * FROM a-b-c.mydataset.mytable") self.validate_identity("SELECT * FROM abc-def-ghi") self.validate_identity("SELECT * FROM a-b-c") self.validate_identity("SELECT * FROM my-table") self.validate_identity("SELECT * FROM my-project.mydataset.mytable") self.validate_identity("SELECT * FROM pro-ject_id.c.d CROSS JOIN foo-bar") self.validate_identity("SELECT * FROM foo.bar.25", "SELECT * FROM foo.bar.`25`") self.validate_identity("SELECT * FROM foo.bar.25_", "SELECT * FROM foo.bar.`25_`") self.validate_identity("SELECT * FROM foo.bar.25x a", "SELECT * FROM foo.bar.`25x` AS a") self.validate_identity("SELECT * FROM foo.bar.25ab c", "SELECT * FROM foo.bar.`25ab` AS c") self.validate_identity("x <> ''") self.validate_identity("DATE_TRUNC(col, WEEK(MONDAY))") self.validate_identity("DATE_TRUNC(col, MONTH, 'UTC+8')") self.validate_identity("SELECT b'abc'") self.validate_identity("SELECT AS STRUCT 1 AS a, 2 AS b") self.validate_identity("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b") self.validate_identity("SELECT AS VALUE STRUCT(1 AS a, 2 AS b)") self.validate_identity("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))") self.validate_identity("""CREATE TABLE x (a STRUCT>)""") self.validate_identity("""CREATE TABLE x (a STRUCT)""") self.validate_identity("CAST(x AS TIMESTAMP)") self.validate_identity("BEGIN DECLARE y INT64", check_command_warning=True) self.validate_identity("LOOP SET x = x + 1", check_command_warning=True) self.validate_identity("REPEAT SET x = x + 1", check_command_warning=True) self.validate_identity("SELECT MAKE_INTERVAL(100, 11, 1, 12, 30, 10)") self.validate_identity( "WHILE i < ARRAY_LENGTH(batches) DO SET x = batches[OFFSET(i)]", check_command_warning=True, ) self.validate_identity("BEGIN TRANSACTION") self.validate_identity("COMMIT TRANSACTION") self.validate_identity("ROLLBACK TRANSACTION") self.validate_identity("CAST(x AS BIGNUMERIC)") self.validate_identity("SELECT y + 1 FROM x GROUP BY y + 1 ORDER BY 1") self.validate_identity("SELECT TIMESTAMP_SECONDS(2) AS t") self.validate_identity("SELECT TIMESTAMP_MILLIS(2) AS t") self.validate_identity("UPDATE x SET y = NULL") self.validate_identity("LOG(n, b)") self.validate_identity("SELECT COUNT(x RESPECT NULLS)") self.validate_identity("SELECT LAST_VALUE(x IGNORE NULLS) OVER y AS x") self.validate_identity("SELECT ARRAY((SELECT AS STRUCT 1 AS a, 2 AS b))") self.validate_identity("SELECT ARRAY((SELECT AS STRUCT 1 AS a, 2 AS b) LIMIT 10)") self.validate_identity("CAST(x AS CHAR)", "CAST(x AS STRING)") self.validate_identity("CAST(x AS NCHAR)", "CAST(x AS STRING)") self.validate_identity("CAST(x AS NVARCHAR)", "CAST(x AS STRING)") self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS TIMESTAMP)") self.validate_identity("CAST(x AS RECORD)", "CAST(x AS STRUCT)") self.validate_identity("SELECT * FROM x WHERE x.y >= (SELECT MAX(a) FROM b-c) - 20") self.validate_identity( """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT JSON_EXTRACT(c, '$.x-y') FROM t""" ).selects[0].expression.assert_is(exp.JSONPath) self.validate_identity( "SELECT FORMAT_TIMESTAMP('%F %T', CURRENT_TIMESTAMP(), 'Europe/Berlin') AS ts" ) self.validate_identity( "SELECT cars, apples FROM some_table PIVOT(SUM(total_counts) FOR products IN ('general.cars' AS cars, 'food.apples' AS apples))" ) self.validate_identity( "MERGE INTO dataset.NewArrivals USING (SELECT * FROM UNNEST([('microwave', 10, 'warehouse #1'), ('dryer', 30, 'warehouse #1'), ('oven', 20, 'warehouse #2')])) ON FALSE WHEN NOT MATCHED THEN INSERT ROW WHEN NOT MATCHED BY SOURCE THEN DELETE" ) self.validate_identity( "SELECT * FROM test QUALIFY a IS DISTINCT FROM b WINDOW c AS (PARTITION BY d)" ) self.validate_identity( "FOR record IN (SELECT word, word_count FROM bigquery-public-data.samples.shakespeare LIMIT 5) DO SELECT record.word, record.word_count" ) self.validate_identity( "DATE(CAST('2016-12-25 05:30:00+07' AS DATETIME), 'America/Los_Angeles')" ) self.validate_identity( """CREATE TABLE x (a STRING OPTIONS (description='x')) OPTIONS (table_expiration_days=1)""" ) self.validate_identity( "SELECT * FROM (SELECT * FROM `t`) AS a UNPIVOT((c) FOR c_name IN (v1, v2))" ) self.validate_identity( "CREATE TABLE IF NOT EXISTS foo AS SELECT * FROM bla EXCEPT DISTINCT (SELECT * FROM bar) LIMIT 0" ) self.validate_identity( "SELECT ROW() OVER (y ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM x WINDOW y AS (PARTITION BY CATEGORY)" ) self.validate_identity( "SELECT item, purchases, LAST_VALUE(item) OVER (item_window ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS most_popular FROM Produce WINDOW item_window AS (ORDER BY purchases)" ) self.validate_identity( "SELECT LAST_VALUE(a IGNORE NULLS) OVER y FROM x WINDOW y AS (PARTITION BY CATEGORY)", ) self.validate_identity( "CREATE OR REPLACE VIEW test (tenant_id OPTIONS (description='Test description on table creation')) AS SELECT 1 AS tenant_id, 1 AS customer_id", ) self.validate_identity( "SELECT * FROM foo AS t0 FOR SYSTEM_TIME AS OF '2026-02-12T23:22:21.743416+00:00'", ) self.validate_identity( '''SELECT b"\\x0a$'x'00"''', """SELECT b'\\x0a$\\'x\\'00'""", ) self.validate_identity( "--c\nARRAY_AGG(v IGNORE NULLS)", "ARRAY_AGG(v IGNORE NULLS) /* c */", ) self.validate_identity( "SELECT * FROM t1, t2", "SELECT * FROM t1 CROSS JOIN t2", ) self.validate_identity( 'SELECT r"\\t"', "SELECT '\\\\t'", ) self.validate_identity( "ARRAY(SELECT AS STRUCT e.x AS y, e.z AS bla FROM UNNEST(bob))::ARRAY>", "CAST(ARRAY(SELECT AS STRUCT e.x AS y, e.z AS bla FROM UNNEST(bob)) AS ARRAY>)", ) self.validate_identity( "SELECT * FROM `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW`", "SELECT * FROM `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW` AS `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW`", ) self.validate_identity( "SELECT * FROM region_or_dataset.INFORMATION_SCHEMA.TABLES", "SELECT * FROM region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES", ) self.validate_identity( "SELECT * FROM region_or_dataset.INFORMATION_SCHEMA.TABLES AS some_name", "SELECT * FROM region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS some_name", ) self.validate_identity( "SELECT * FROM proj.region_or_dataset.INFORMATION_SCHEMA.TABLES", "SELECT * FROM proj.region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES", ) self.validate_identity( "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=TIMESTAMP '2020-01-02T04:05:06.007Z') AS SELECT 1 AS c", "CREATE VIEW `d.v` OPTIONS (expiration_timestamp=CAST('2020-01-02T04:05:06.007Z' AS TIMESTAMP)) AS SELECT 1 AS c", ) self.validate_identity( "SELECT ARRAY(SELECT AS STRUCT 1 a, 2 b)", "SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b)", ) self.validate_identity( "select array_contains([1, 2, 3], 1)", "SELECT EXISTS(SELECT 1 FROM UNNEST([1, 2, 3]) AS _col WHERE _col = 1)", ) self.validate_identity( "SELECT SPLIT(foo)", "SELECT SPLIT(foo, ',')", ) self.validate_identity( "SELECT 1 AS hash", "SELECT 1 AS `hash`", ) self.validate_identity( "SELECT 1 AS at", "SELECT 1 AS `at`", ) self.validate_identity( 'x <> ""', "x <> ''", ) self.validate_identity( 'x <> """"""', "x <> ''", ) self.validate_identity( "x <> ''''''", "x <> ''", ) self.validate_identity( "SELECT a overlaps", "SELECT a AS overlaps", ) self.validate_identity( "SELECT y + 1 z FROM x GROUP BY y + 1 ORDER BY z", "SELECT y + 1 AS z FROM x GROUP BY z ORDER BY z", ) self.validate_identity( "SELECT y + 1 z FROM x GROUP BY y + 1", "SELECT y + 1 AS z FROM x GROUP BY y + 1", ) self.validate_identity( """SELECT JSON '"foo"' AS json_data""", """SELECT PARSE_JSON('"foo"') AS json_data""", ) self.validate_identity( "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))", "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))", ) self.validate_identity( "SELECT CAST(1 AS BYTEINT)", "SELECT CAST(1 AS INT64)", ) self.validate_identity( """CREATE TEMPORARY FUNCTION FOO() RETURNS STRING LANGUAGE js AS 'return "Hello world!"'""", pretty=True, ) self.validate_identity( "[a, a(1, 2,3,4444444444444444, tttttaoeunthaoentuhaoentuheoantu, toheuntaoheutnahoeunteoahuntaoeh), b(3, 4,5), c, d, tttttttttttttttteeeeeeeeeeeeeett, 12312312312]", """[ a, a( 1, 2, 3, 4444444444444444, tttttaoeunthaoentuhaoentuheoantu, toheuntaoheutnahoeunteoahuntaoeh ), b(3, 4, 5), c, d, tttttttttttttttteeeeeeeeeeeeeett, 12312312312 ]""", pretty=True, ) self.validate_all( "SELECT TRUE IS TRUE", write={ "bigquery": "SELECT TRUE IS TRUE", "snowflake": "SELECT TRUE", }, ) self.validate_all( "SELECT REPEAT(' ', 2)", read={ "hive": "SELECT SPACE(2)", "spark": "SELECT SPACE(2)", "databricks": "SELECT SPACE(2)", "trino": "SELECT REPEAT(' ', 2)", }, ) self.validate_all( "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", write={ "bigquery": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "clickhouse": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "databricks": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "duckdb": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "mysql": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "oracle": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "postgres": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "presto": "SELECT purchases, LAST_VALUE(item) OVER (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS most_popular FROM Produce", "redshift": "SELECT purchases, LAST_VALUE(item) OVER (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS most_popular FROM Produce", "snowflake": "SELECT purchases, LAST_VALUE(item) OVER (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS most_popular FROM Produce", "spark": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "trino": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases NULLS FIRST ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", "tsql": "SELECT purchases, LAST_VALUE(item) OVER item_window AS most_popular FROM Produce WINDOW item_window AS (PARTITION BY purchases ORDER BY purchases ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)", }, ) self.validate_all( "SELECT DATE(2024, 1, 15)", write={ "bigquery": "SELECT DATE(2024, 1, 15)", "duckdb": "SELECT MAKE_DATE(2024, 1, 15)", }, ) self.validate_all( "EXTRACT(HOUR FROM DATETIME(2008, 12, 25, 15, 30, 00))", write={ "bigquery": "EXTRACT(HOUR FROM DATETIME(2008, 12, 25, 15, 30, 00))", "duckdb": "EXTRACT(HOUR FROM MAKE_TIMESTAMP(2008, 12, 25, 15, 30, 00))", "snowflake": "DATE_PART(HOUR, TIMESTAMP_FROM_PARTS(2008, 12, 25, 15, 30, 00))", }, ) self.validate_all( "SELECT STRUCT(1, 2, 3), STRUCT(), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 as a, 'abc' AS b), STRUCT(str_col AS abc)", write={ "bigquery": "SELECT STRUCT(1, 2, 3), STRUCT(), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)", "duckdb": "SELECT {'_0': 1, '_1': 2, '_2': 3}, {}, {'_0': 'abc'}, {'_0': 1, 'str_col': t.str_col}, {'a': 1, 'b': 'abc'}, {'abc': str_col}", "hive": "SELECT STRUCT(1, 2, 3), STRUCT(), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1, 'abc'), STRUCT(str_col)", "spark2": "SELECT STRUCT(1, 2, 3), STRUCT(), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)", "spark": "SELECT STRUCT(1, 2, 3), STRUCT(), STRUCT('abc'), STRUCT(1, t.str_col), STRUCT(1 AS a, 'abc' AS b), STRUCT(str_col AS abc)", "snowflake": "SELECT OBJECT_CONSTRUCT('_0', 1, '_1', 2, '_2', 3), OBJECT_CONSTRUCT(), OBJECT_CONSTRUCT('_0', 'abc'), OBJECT_CONSTRUCT('_0', 1, '_1', t.str_col), OBJECT_CONSTRUCT('a', 1, 'b', 'abc'), OBJECT_CONSTRUCT('abc', str_col)", # fallback to unnamed without type inference "trino": "SELECT ROW(1, 2, 3), ROW(), ROW('abc'), ROW(1, t.str_col), CAST(ROW(1, 'abc') AS ROW(a INTEGER, b VARCHAR)), ROW(str_col)", }, ) self.validate_all( "PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E6S%z', x)", write={ "bigquery": "PARSE_TIMESTAMP('%FT%H:%M:%E6S%z', x)", "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S.%f%z')", }, ) self.validate_all( "SELECT DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY)", write={ "bigquery": "SELECT DATE_SUB(CURRENT_DATE, INTERVAL '2' DAY)", "databricks": "SELECT DATE_ADD(CURRENT_DATE, -2)", }, ) self.validate_all( "SELECT DATE_SUB(DATE '2008-12-25', INTERVAL 5 DAY)", write={ "bigquery": "SELECT DATE_SUB(CAST('2008-12-25' AS DATE), INTERVAL '5' DAY)", "duckdb": "SELECT CAST('2008-12-25' AS DATE) - INTERVAL '5' DAY", "snowflake": "SELECT DATEADD(DAY, '5' * -1, CAST('2008-12-25' AS DATE))", }, ) self.validate_all( "EDIT_DISTANCE(col1, col2, max_distance => 3)", write={ "bigquery": "EDIT_DISTANCE(col1, col2, max_distance => 3)", "clickhouse": UnsupportedError, "databricks": UnsupportedError, "drill": UnsupportedError, "duckdb": "CASE WHEN LEVENSHTEIN(col1, col2) IS NULL OR 3 IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(col1, col2), 3) END", "hive": UnsupportedError, "postgres": "LEVENSHTEIN_LESS_EQUAL(col1, col2, 3)", "presto": UnsupportedError, "snowflake": "EDITDISTANCE(col1, col2, 3)", "spark": UnsupportedError, "spark2": UnsupportedError, "sqlite": UnsupportedError, }, ) self.validate_all( "EDIT_DISTANCE(a, b)", write={ "bigquery": "EDIT_DISTANCE(a, b)", "duckdb": "LEVENSHTEIN(a, b)", }, ) self.validate_all( "SAFE_CAST(some_date AS DATE FORMAT 'DD MONTH YYYY')", write={ "bigquery": "SAFE_CAST(some_date AS DATE FORMAT 'DD MONTH YYYY')", "duckdb": "CAST(TRY_STRPTIME(some_date, '%d %B %Y') AS DATE)", }, ) self.validate_all( "SAFE_CAST(some_date AS DATE FORMAT 'YYYY-MM-DD') AS some_date", write={ "bigquery": "SAFE_CAST(some_date AS DATE FORMAT 'YYYY-MM-DD') AS some_date", "duckdb": "CAST(TRY_STRPTIME(some_date, '%Y-%m-%d') AS DATE) AS some_date", }, ) self.validate_all( "SAFE_CAST(x AS TIMESTAMP)", write={ "bigquery": "SAFE_CAST(x AS TIMESTAMP)", "snowflake": "CAST(x AS TIMESTAMPTZ)", }, ) self.validate_all( "SELECT t.c1, h.c2, s.c3 FROM t1 AS t, UNNEST(t.t2) AS h, UNNEST(h.t3) AS s", write={ "bigquery": "SELECT t.c1, h.c2, s.c3 FROM t1 AS t CROSS JOIN UNNEST(t.t2) AS h CROSS JOIN UNNEST(h.t3) AS s", "duckdb": "SELECT t.c1, h.c2, s.c3 FROM t1 AS t CROSS JOIN UNNEST(t.t2) AS _t0(h) CROSS JOIN UNNEST(h.t3) AS _t1(s)", }, ) self.validate_all( "PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E6S%z', x)", write={ "bigquery": "PARSE_TIMESTAMP('%FT%H:%M:%E6S%z', x)", "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S.%f%z')", }, ) self.validate_all( "SELECT results FROM Coordinates, Coordinates.position AS results", write={ "bigquery": "SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results", "presto": "SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS _t0(results)", }, ) self.validate_all( "SELECT results FROM Coordinates, `Coordinates.position` AS results", write={ "bigquery": "SELECT results FROM Coordinates CROSS JOIN `Coordinates.position` AS results", "presto": 'SELECT results FROM Coordinates CROSS JOIN "Coordinates"."position" AS results', }, ) self.validate_all( "SELECT results FROM Coordinates AS c, UNNEST(c.position) AS results", read={ "presto": "SELECT results FROM Coordinates AS c, UNNEST(c.position) AS _t(results)", "redshift": "SELECT results FROM Coordinates AS c, c.position AS results", }, write={ "bigquery": "SELECT results FROM Coordinates AS c CROSS JOIN UNNEST(c.position) AS results", "presto": "SELECT results FROM Coordinates AS c CROSS JOIN UNNEST(c.position) AS _t0(results)", "redshift": "SELECT results FROM Coordinates AS c CROSS JOIN c.position AS results", }, ) self.validate_all( "TIMESTAMP(x)", write={ "bigquery": "TIMESTAMP(x)", "duckdb": "CAST(x AS TIMESTAMPTZ)", "snowflake": "CAST(x AS TIMESTAMPTZ)", "presto": "CAST(x AS TIMESTAMP WITH TIME ZONE)", }, ) self.validate_all( "SELECT TIMESTAMP('2008-12-25 15:30:00', 'America/Los_Angeles')", write={ "bigquery": "SELECT TIMESTAMP('2008-12-25 15:30:00', 'America/Los_Angeles')", "duckdb": "SELECT CAST('2008-12-25 15:30:00' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles'", "snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', CAST('2008-12-25 15:30:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT SUM(x IGNORE NULLS) AS x", read={ "bigquery": "SELECT SUM(x IGNORE NULLS) AS x", "duckdb": "SELECT SUM(x IGNORE NULLS) AS x", "spark": "SELECT SUM(x) IGNORE NULLS AS x", "snowflake": "SELECT SUM(x) IGNORE NULLS AS x", }, write={ "bigquery": "SELECT SUM(x IGNORE NULLS) AS x", "duckdb": "SELECT SUM(x) AS x", "postgres": UnsupportedError, "spark": "SELECT SUM(x) IGNORE NULLS AS x", "snowflake": "SELECT SUM(x) IGNORE NULLS AS x", }, ) self.validate_all( "SELECT SUM(x RESPECT NULLS) AS x", read={ "bigquery": "SELECT SUM(x RESPECT NULLS) AS x", "spark": "SELECT SUM(x) RESPECT NULLS AS x", "snowflake": "SELECT SUM(x) RESPECT NULLS AS x", }, write={ "bigquery": "SELECT SUM(x RESPECT NULLS) AS x", "duckdb": "SELECT SUM(x) AS x", "postgres": UnsupportedError, "spark": "SELECT SUM(x) RESPECT NULLS AS x", "snowflake": "SELECT SUM(x) RESPECT NULLS AS x", }, ) self.validate_all( "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", write={ "bigquery": "SELECT PERCENTILE_CONT(x, 0.5 RESPECT NULLS) OVER ()", "duckdb": "SELECT QUANTILE_CONT(x, 0.5) OVER ()", "spark": "SELECT PERCENTILE_CONT(x, 0.5) RESPECT NULLS OVER ()", }, ) self.validate_all( "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", write={ "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 10) AS x", "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 10) AS x", "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 10) IGNORE NULLS AS x", }, ) self.validate_all( "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", write={ "bigquery": "SELECT ARRAY_AGG(DISTINCT x IGNORE NULLS ORDER BY a, b DESC LIMIT 1, 10) AS x", "duckdb": "SELECT ARRAY_AGG(DISTINCT x ORDER BY a NULLS FIRST, b DESC LIMIT 1, 10) AS x", "spark": "SELECT COLLECT_LIST(DISTINCT x ORDER BY a, b DESC LIMIT 1, 10) IGNORE NULLS AS x", }, ) self.validate_all( "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'))", read={ "spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS semester_1, (Q3, Q4) AS semester_2))", }, write={ "bigquery": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2'))", "spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS semester_1, (Q3, Q4) AS semester_2))", }, ) self.validate_all( "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 1, (Q3, Q4) AS 2))", write={ "bigquery": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 1, (Q3, Q4) AS 2))", "spark": "SELECT * FROM Produce UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS `1`, (Q3, Q4) AS `2`))", }, ) self.validate_all( "SELECT UNIX_DATE(DATE '2008-12-25')", write={ "bigquery": "SELECT UNIX_DATE(CAST('2008-12-25' AS DATE))", "duckdb": "SELECT DATE_DIFF('DAY', CAST('1970-01-01' AS DATE), CAST('2008-12-25' AS DATE))", }, ) self.validate_all( "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", read={ "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONS)", }, write={ "bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "clickhouse": "SELECT LAST_DAY(CAST('2008-11-25' AS Nullable(DATE)))", "mysql": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "oracle": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "postgres": "SELECT CAST(DATE_TRUNC('MONTH', CAST('2008-11-25' AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", "presto": "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))", "redshift": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", "spark": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "tsql": "SELECT EOMONTH(CAST('2008-11-25' AS DATE))", }, ) self.validate_all( "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", read={ "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", }, write={ "bigquery": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), QUARTER)", }, ) self.validate_all( "CAST(x AS DATETIME)", read={ "": "x::timestamp", }, ) self.validate_all( "SELECT TIME(15, 30, 00)", read={ "duckdb": "SELECT MAKE_TIME(15, 30, 00)", "mysql": "SELECT MAKETIME(15, 30, 00)", "postgres": "SELECT MAKE_TIME(15, 30, 00)", "snowflake": "SELECT TIME_FROM_PARTS(15, 30, 00)", }, write={ "bigquery": "SELECT TIME(15, 30, 00)", "duckdb": "SELECT MAKE_TIME(15, 30, 00)", "mysql": "SELECT MAKETIME(15, 30, 00)", "postgres": "SELECT MAKE_TIME(15, 30, 00)", "snowflake": "SELECT TIME_FROM_PARTS(15, 30, 00)", "tsql": "SELECT TIMEFROMPARTS(15, 30, 00, 0, 0)", }, ) self.validate_all( "SELECT TIME('2008-12-25 15:30:00')", write={ "bigquery": "SELECT TIME('2008-12-25 15:30:00')", "duckdb": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", "mysql": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", "postgres": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", "redshift": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", "spark": "SELECT CAST('2008-12-25 15:30:00' AS TIMESTAMP)", "tsql": "SELECT CAST('2008-12-25 15:30:00' AS TIME)", }, ) self.validate_all( "SELECT COUNTIF(x)", read={ "clickhouse": "SELECT countIf(x)", "duckdb": "SELECT COUNT_IF(x)", }, write={ "bigquery": "SELECT COUNTIF(x)", "clickhouse": "SELECT countIf(x)", "duckdb": "SELECT COUNT_IF(x)", }, ) self.validate_all( "SELECT TIMESTAMP_DIFF(TIMESTAMP_SECONDS(60), TIMESTAMP_SECONDS(0), minute)", write={ "bigquery": "SELECT TIMESTAMP_DIFF(TIMESTAMP_SECONDS(60), TIMESTAMP_SECONDS(0), MINUTE)", "databricks": "SELECT TIMESTAMPDIFF(MINUTE, CAST(FROM_UNIXTIME(0) AS TIMESTAMP), CAST(FROM_UNIXTIME(60) AS TIMESTAMP))", "duckdb": "SELECT DATE_DIFF('MINUTE', TO_TIMESTAMP(0), TO_TIMESTAMP(60))", "snowflake": "SELECT TIMESTAMPDIFF(MINUTE, TO_TIMESTAMP(0), TO_TIMESTAMP(60))", }, ) self.validate_all( "TIMESTAMP_DIFF(a, b, MONTH)", read={ "bigquery": "TIMESTAMP_DIFF(a, b, month)", "databricks": "TIMESTAMPDIFF(month, b, a)", "mysql": "TIMESTAMPDIFF(month, b, a)", }, write={ "databricks": "TIMESTAMPDIFF(MONTH, b, a)", "mysql": "TIMESTAMPDIFF(MONTH, b, a)", "snowflake": "TIMESTAMPDIFF(MONTH, b, a)", }, ) self.validate_all( "SELECT TIMESTAMP_MICROS(x)", read={ "duckdb": "SELECT MAKE_TIMESTAMP(x)", "spark": "SELECT TIMESTAMP_MICROS(x)", }, write={ "bigquery": "SELECT TIMESTAMP_MICROS(x)", "duckdb": "SELECT MAKE_TIMESTAMP(x)", "snowflake": "SELECT TO_TIMESTAMP(x, 6)", "spark": "SELECT TIMESTAMP_MICROS(x)", }, ) self.validate_all( "SELECT * FROM t WHERE EXISTS(SELECT * FROM unnest(nums) AS x WHERE x > 1)", write={ "bigquery": "SELECT * FROM t WHERE EXISTS(SELECT * FROM UNNEST(nums) AS x WHERE x > 1)", "duckdb": "SELECT * FROM t WHERE EXISTS(SELECT * FROM UNNEST(nums) AS _t0(x) WHERE x > 1)", }, ) self.validate_all( "NULL", read={ "duckdb": "NULL = a", "postgres": "a = NULL", }, ) self.validate_all( "SELECT '\\n'", read={ "bigquery": "SELECT '''\n'''", }, write={ "bigquery": "SELECT '\\n'", "postgres": "SELECT '\n'", }, ) self.validate_all( "TRIM(item, '*')", read={ "snowflake": "TRIM(item, '*')", "spark": "TRIM('*', item)", }, write={ "bigquery": "TRIM(item, '*')", "snowflake": "TRIM(item, '*')", "spark": "TRIM('*' FROM item)", }, ) expr = self.parse_one( "SELECT TRIM(CAST('***apple***' AS BYTES), CAST('*' AS BYTES)) AS result" ) annotated = annotate_types(expr, dialect="bigquery") self.assertEqual( annotated.sql("duckdb"), "SELECT CAST(TRIM(CAST(CAST('***apple***' AS BLOB) AS TEXT), CAST(CAST('*' AS BLOB) AS TEXT)) AS BLOB) AS result", ) expr = self.parse_one("SELECT TRIM('***apple***', '*') AS result") annotated = annotate_types(expr, dialect="bigquery") self.assertEqual(annotated.sql("duckdb"), "SELECT TRIM('***apple***', '*') AS result") self.validate_all( "CREATE OR REPLACE TABLE `a.b.c` COPY `a.b.d`", write={ "bigquery": "CREATE OR REPLACE TABLE `a.b.c` COPY `a.b.d`", "snowflake": 'CREATE OR REPLACE TABLE "a"."b"."c" CLONE "a"."b"."d"', }, ) ( self.validate_all( "SELECT DATETIME_DIFF('2023-01-01T00:00:00', '2023-01-01T05:00:00', MILLISECOND)", write={ "bigquery": "SELECT DATETIME_DIFF('2023-01-01T00:00:00', '2023-01-01T05:00:00', MILLISECOND)", "databricks": "SELECT TIMESTAMPDIFF(MILLISECOND, '2023-01-01T05:00:00', '2023-01-01T00:00:00')", "snowflake": "SELECT TIMESTAMPDIFF(MILLISECOND, '2023-01-01T05:00:00', '2023-01-01T00:00:00')", "duckdb": "SELECT DATE_DIFF('MILLISECOND', CAST('2023-01-01T05:00:00' AS TIMESTAMP), CAST('2023-01-01T00:00:00' AS TIMESTAMP))", }, ), ) ( self.validate_all( "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)", write={ "bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)", "databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')", "duckdb": "SELECT CAST('2023-01-01T00:00:00' AS TIMESTAMP) + INTERVAL '1' MILLISECOND", "snowflake": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')", "spark": "SELECT '2023-01-01T00:00:00' + INTERVAL '1' MILLISECOND", }, ), ) ( self.validate_all( "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL 1 MILLISECOND)", write={ "bigquery": "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)", "databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1' * -1, '2023-01-01T00:00:00')", "duckdb": "SELECT CAST('2023-01-01T00:00:00' AS TIMESTAMP) - INTERVAL '1' MILLISECOND", "spark": "SELECT '2023-01-01T00:00:00' - INTERVAL '1' MILLISECOND", }, ), ) ( self.validate_all( "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)", write={ "bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)", "databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')", "duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS TIMESTAMP))", }, ), ) self.validate_all("LEAST(x, y)", read={"sqlite": "MIN(x, y)"}) self.validate_all( 'SELECT TIMESTAMP_ADD(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)', write={ "bigquery": "SELECT TIMESTAMP_ADD(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL '10' MINUTE)", "databricks": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))", "duckdb": "SELECT CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ) + INTERVAL '10' MINUTE", "mysql": "SELECT DATE_ADD(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)", "spark": "SELECT DATE_ADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))", "snowflake": "SELECT TIMESTAMPADD(MINUTE, '10', CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))", }, ) self.validate_all( 'SELECT TIMESTAMP_SUB(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE)', write={ "bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL '10' MINUTE)", "duckdb": "SELECT CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ) - INTERVAL '10' MINUTE", "mysql": "SELECT DATE_SUB(TIMESTAMP('2008-12-25 15:30:00+00'), INTERVAL '10' MINUTE)", "snowflake": "SELECT TIMESTAMPADD(MINUTE, '10' * -1, CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))", "spark": "SELECT CAST('2008-12-25 15:30:00+00' AS TIMESTAMP) - INTERVAL '10' MINUTE", }, ) self.validate_all( 'SELECT TIMESTAMP_SUB(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL col MINUTE)', write={ "bigquery": "SELECT TIMESTAMP_SUB(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP), INTERVAL col MINUTE)", "snowflake": "SELECT TIMESTAMPADD(MINUTE, col * -1, CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))", }, ) self.validate_all( "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", write={ "bigquery": "SELECT TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL '2' HOUR)", "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", }, ) self.validate_all( "SELECT TIME_SUB(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR)", write={ "bigquery": "SELECT TIME_SUB(CAST('09:05:03' AS TIME), INTERVAL '2' HOUR)", "duckdb": "SELECT CAST('09:05:03' AS TIME) - INTERVAL '2' HOUR", }, ) expr = self.parse_one("LOWER(CAST('HELLO' AS BYTES))") annotated = annotate_types(expr, dialect="bigquery") self.assertEqual( annotated.sql("duckdb"), "CAST(LOWER(CAST(CAST('HELLO' AS BLOB) AS TEXT)) AS BLOB)" ) sql = "LOWER('HELLO')" expr = self.parse_one(sql) annotated = annotate_types(expr, dialect="bigquery") self.assertEqual(annotated.sql("duckdb"), "LOWER('HELLO')") self.validate_all( "LOWER(TO_HEX(x))", write={ "": "LOWER(HEX(x))", "bigquery": "TO_HEX(x)", "clickhouse": "LOWER(HEX(x))", "duckdb": "LOWER(HEX(x))", "hive": "LOWER(HEX(x))", "mysql": "LOWER(HEX(x))", "spark": "LOWER(HEX(x))", "sqlite": "LOWER(HEX(x))", "presto": "LOWER(TO_HEX(x))", "trino": "LOWER(TO_HEX(x))", }, ) self.validate_all( "TO_HEX(x)", read={ "": "LOWER(HEX(x))", "clickhouse": "LOWER(HEX(x))", "duckdb": "LOWER(HEX(x))", "hive": "LOWER(HEX(x))", "mysql": "LOWER(HEX(x))", "spark": "LOWER(HEX(x))", "sqlite": "LOWER(HEX(x))", "presto": "LOWER(TO_HEX(x))", "trino": "LOWER(TO_HEX(x))", }, write={ "": "LOWER(HEX(x))", "bigquery": "TO_HEX(x)", "clickhouse": "LOWER(HEX(x))", "duckdb": "LOWER(HEX(x))", "hive": "LOWER(HEX(x))", "mysql": "LOWER(HEX(x))", "presto": "LOWER(TO_HEX(x))", "spark": "LOWER(HEX(x))", "sqlite": "LOWER(HEX(x))", "trino": "LOWER(TO_HEX(x))", }, ) sql = "UPPER(CAST('hello' AS BYTES))" expr = self.parse_one("UPPER(CAST('hello' AS BYTES))") annotated = annotate_types(expr, dialect="bigquery") self.assertEqual( annotated.sql("duckdb"), "CAST(UPPER(CAST(CAST('hello' AS BLOB) AS TEXT)) AS BLOB)" ) sql = "UPPER('hello')" expr = self.parse_one(sql) annotated = annotate_types(expr, dialect="bigquery") self.assertEqual(annotated.sql("duckdb"), "UPPER('hello')") self.validate_all( "UPPER(TO_HEX(x))", read={ "": "HEX(x)", "clickhouse": "HEX(x)", "duckdb": "HEX(x)", "hive": "HEX(x)", "mysql": "HEX(x)", "presto": "TO_HEX(x)", "spark": "HEX(x)", "sqlite": "HEX(x)", "trino": "TO_HEX(x)", }, write={ "": "HEX(x)", "bigquery": "UPPER(TO_HEX(x))", "clickhouse": "HEX(x)", "duckdb": "HEX(x)", "hive": "HEX(x)", "mysql": "HEX(x)", "presto": "TO_HEX(x)", "spark": "HEX(x)", "sqlite": "HEX(x)", "trino": "TO_HEX(x)", }, ) self.validate_all( "MD5(x)", read={ "clickhouse": "MD5(x)", "presto": "MD5(x)", "trino": "MD5(x)", }, write={ "": "MD5_DIGEST(x)", "bigquery": "MD5(x)", "clickhouse": "MD5(x)", "hive": "UNHEX(MD5(x))", "presto": "MD5(x)", "spark": "UNHEX(MD5(x))", "trino": "MD5(x)", }, ) self.validate_all( "SELECT TO_HEX(MD5(some_string))", read={ "duckdb": "SELECT MD5(some_string)", "spark": "SELECT MD5(some_string)", "clickhouse": "SELECT LOWER(HEX(MD5(some_string)))", "presto": "SELECT LOWER(TO_HEX(MD5(some_string)))", "trino": "SELECT LOWER(TO_HEX(MD5(some_string)))", }, write={ "": "SELECT MD5(some_string)", "bigquery": "SELECT TO_HEX(MD5(some_string))", "duckdb": "SELECT MD5(some_string)", "clickhouse": "SELECT LOWER(HEX(MD5(some_string)))", "presto": "SELECT LOWER(TO_HEX(MD5(some_string)))", "trino": "SELECT LOWER(TO_HEX(MD5(some_string)))", }, ) self.validate_all( "SHA1(x)", read={ "bigquery": "SHA1(x)", "clickhouse": "SHA1(x)", "presto": "SHA1(x)", "trino": "SHA1(x)", }, write={ "clickhouse": "SHA1(x)", "bigquery": "SHA1(x)", "presto": "SHA1(x)", "trino": "SHA1(x)", "duckdb": "UNHEX(SHA1(x))", }, ) self.validate_all( "SHA256(x)", read={ "clickhouse": "SHA256(x)", "presto": "SHA256(x)", "trino": "SHA256(x)", "postgres": "SHA256(x)", "duckdb": "SHA256(x)", }, write={ "bigquery": "SHA256(x)", "spark2": "SHA2(x, 256)", "clickhouse": "SHA256(x)", "postgres": "SHA256(x)", "presto": "SHA256(x)", "redshift": "SHA2(x, 256)", "trino": "SHA256(x)", "duckdb": "UNHEX(SHA256(x))", "snowflake": "SHA2_BINARY(x, 256)", }, ) self.validate_all( "SHA512(x)", read={ "clickhouse": "SHA512(x)", "presto": "SHA512(x)", "trino": "SHA512(x)", }, write={ "clickhouse": "SHA512(x)", "bigquery": "SHA512(x)", "spark2": "SHA2(x, 512)", "presto": "SHA512(x)", "trino": "SHA512(x)", }, ) self.validate_all( "SELECT CAST('20201225' AS TIMESTAMP FORMAT 'YYYYMMDD' AT TIME ZONE 'America/New_York')", write={"bigquery": "SELECT PARSE_TIMESTAMP('%Y%m%d', '20201225', 'America/New_York')"}, ) self.validate_all( "SELECT CAST('20201225' AS TIMESTAMP FORMAT 'YYYYMMDD')", write={"bigquery": "SELECT PARSE_TIMESTAMP('%Y%m%d', '20201225')"}, ) self.validate_all( "SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM') AS date_time_to_string", write={ "bigquery": "SELECT CAST(CAST('2008-12-25 00:00:00+00:00' AS TIMESTAMP) AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM') AS date_time_to_string", }, ) self.validate_all( "SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string", write={ "bigquery": "SELECT CAST(CAST('2008-12-25 00:00:00+00:00' AS TIMESTAMP) AS STRING FORMAT 'YYYY-MM-DD HH24:MI:SS TZH:TZM' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string", }, ) self.validate_all( "WITH cte AS (SELECT [1, 2, 3] AS arr) SELECT IF(pos = pos_2, col, NULL) AS col FROM cte CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(arr)) - 1)) AS pos CROSS JOIN UNNEST(arr) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(arr) - 1) AND pos_2 = (ARRAY_LENGTH(arr) - 1))", read={ "spark": "WITH cte AS (SELECT ARRAY(1, 2, 3) AS arr) SELECT EXPLODE(arr) FROM cte" }, ) self.validate_all( "SELECT IF(pos = pos_2, col, NULL) AS col FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], []))) - 1)) AS pos CROSS JOIN UNNEST(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1) AND pos_2 = (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1))", read={"spark": "select explode_outer([])"}, ) self.validate_all( "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_2, pos_2, NULL) AS pos_2 FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], []))) - 1)) AS pos CROSS JOIN UNNEST(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1) AND pos_2 = (ARRAY_LENGTH(IF(ARRAY_LENGTH(COALESCE([], [])) = 0, [[][SAFE_ORDINAL(0)]], [])) - 1))", read={"spark": "select posexplode_outer([])"}, ) self.validate_all( "SELECT AS STRUCT ARRAY(SELECT AS STRUCT 1 AS b FROM x) AS y FROM z", write={ "": "SELECT AS STRUCT ARRAY(SELECT AS STRUCT 1 AS b FROM x) AS y FROM z", "bigquery": "SELECT AS STRUCT ARRAY(SELECT AS STRUCT 1 AS b FROM x) AS y FROM z", "duckdb": "SELECT {'y': ARRAY(SELECT {'b': 1} FROM x)} FROM z", }, ) self.validate_all( "SELECT CAST(STRUCT(1) AS STRUCT)", write={ "bigquery": "SELECT CAST(STRUCT(1) AS STRUCT)", "snowflake": "SELECT CAST(OBJECT_CONSTRUCT('_0', 1) AS OBJECT)", }, ) self.validate_all( "cast(x as date format 'MM/DD/YYYY')", write={ "bigquery": "PARSE_DATE('%m/%d/%Y', x)", }, ) self.validate_all( "cast(x as time format 'YYYY.MM.DD HH:MI:SSTZH')", write={ "bigquery": "PARSE_TIMESTAMP('%Y.%m.%d %I:%M:%S%z', x)", }, ) self.validate_identity( "CREATE TEMP TABLE foo AS SELECT 1", "CREATE TEMPORARY TABLE foo AS SELECT 1", ) self.validate_all( "REGEXP_CONTAINS('foo', '.*')", read={ "bigquery": "REGEXP_CONTAINS('foo', '.*')", "mysql": "REGEXP_LIKE('foo', '.*')", "starrocks": "REGEXP('foo', '.*')", }, write={ "mysql": "REGEXP_LIKE('foo', '.*')", "starrocks": "REGEXP('foo', '.*')", }, ) self.validate_all( '"""x"""', write={ "bigquery": "'x'", "duckdb": "'x'", "presto": "'x'", "hive": "'x'", "spark": "'x'", }, ) self.validate_all( '"""x\'"""', write={ "bigquery": "'x\\''", "duckdb": "'x'''", "presto": "'x'''", "hive": "'x\\''", "spark": "'x\\''", }, ) self.validate_all( "r'x\\''", write={ "bigquery": "'x\\''", "hive": "'x\\''", }, ) self.validate_all( "r'x\\y'", write={ "bigquery": "'x\\\\y'", "hive": "'x\\\\y'", }, ) self.validate_all( "'\\\\'", write={ "bigquery": "'\\\\'", "duckdb": "'\\'", "presto": "'\\'", "hive": "'\\\\'", }, ) self.validate_all( r'r"""/\*.*\*/"""', write={ "bigquery": r"'/\\*.*\\*/'", "duckdb": r"'/\*.*\*/'", "presto": r"'/\*.*\*/'", "hive": r"'/\\*.*\\*/'", "spark": r"'/\\*.*\\*/'", }, ) self.validate_all( r'R"""/\*.*\*/"""', write={ "bigquery": r"'/\\*.*\\*/'", "duckdb": r"'/\*.*\*/'", "presto": r"'/\*.*\*/'", "hive": r"'/\\*.*\\*/'", "spark": r"'/\\*.*\\*/'", }, ) self.validate_all( 'r"""a\n"""', write={ "bigquery": "'a\\n'", "duckdb": "'a\n'", }, ) self.validate_all( '"""a\n"""', write={ "bigquery": "'a\\n'", "duckdb": "'a\n'", }, ) self.validate_all( "CAST(a AS INT64)", write={ "bigquery": "CAST(a AS INT64)", "duckdb": "CAST(a AS BIGINT)", "presto": "CAST(a AS BIGINT)", "hive": "CAST(a AS BIGINT)", "spark": "CAST(a AS BIGINT)", }, ) self.validate_all( "CAST(a AS BYTES)", write={ "bigquery": "CAST(a AS BYTES)", "duckdb": "CAST(a AS BLOB)", "presto": "CAST(a AS VARBINARY)", "hive": "CAST(a AS BINARY)", "spark": "CAST(a AS BINARY)", }, ) # Test STARTS_WITH with BYTES/BLOB handling from BigQuery to DuckDB # Requires type annotation for proper BLOB -> VARCHAR casting expr = self.parse_one("STARTS_WITH(CAST('foo' AS BYTES), CAST('f' AS BYTES))") annotated = annotate_types(expr, dialect="bigquery") self.assertEqual( annotated.sql("duckdb"), "STARTS_WITH(CAST(CAST('foo' AS BLOB) AS TEXT), CAST(CAST('f' AS BLOB) AS TEXT))", ) expr = self.parse_one("STARTS_WITH(CAST('foo' AS BYTES), b'f')") annotated = annotate_types(expr, dialect="bigquery") self.assertEqual( annotated.sql("duckdb"), "STARTS_WITH(CAST(CAST('foo' AS BLOB) AS TEXT), CAST(CAST(e'f' AS BLOB) AS TEXT))", ) self.validate_all( "CAST(a AS NUMERIC)", write={ "bigquery": "CAST(a AS NUMERIC)", "duckdb": "CAST(a AS DECIMAL)", "presto": "CAST(a AS DECIMAL)", "hive": "CAST(a AS DECIMAL)", "spark": "CAST(a AS DECIMAL)", }, ) self.validate_all( "[1, 2, 3]", read={ "duckdb": "[1, 2, 3]", "presto": "ARRAY[1, 2, 3]", "hive": "ARRAY(1, 2, 3)", "spark": "ARRAY(1, 2, 3)", }, write={ "bigquery": "[1, 2, 3]", "duckdb": "[1, 2, 3]", "presto": "ARRAY[1, 2, 3]", "hive": "ARRAY(1, 2, 3)", "spark": "ARRAY(1, 2, 3)", }, ) self.validate_all( "SELECT * FROM UNNEST(['7', '14']) AS x", read={ "spark": "SELECT * FROM UNNEST(ARRAY('7', '14')) AS (x)", }, write={ "bigquery": "SELECT * FROM UNNEST(['7', '14']) AS x", "presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS _t0(x)", "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS _t0(x)", }, ) self.validate_all( "SELECT ARRAY(SELECT x FROM UNNEST([0, 1]) AS x)", write={"bigquery": "SELECT ARRAY(SELECT x FROM UNNEST([0, 1]) AS x)"}, ) self.validate_all( "SELECT ARRAY(SELECT DISTINCT x FROM UNNEST(some_numbers) AS x) AS unique_numbers", write={ "bigquery": "SELECT ARRAY(SELECT DISTINCT x FROM UNNEST(some_numbers) AS x) AS unique_numbers" }, ) self.validate_all( "SELECT ARRAY(SELECT * FROM foo JOIN bla ON x = y)", write={"bigquery": "SELECT ARRAY(SELECT * FROM foo JOIN bla ON x = y)"}, ) self.validate_all( "CURRENT_TIMESTAMP()", read={ "tsql": "GETDATE()", }, write={ "tsql": "GETDATE()", }, ) self.validate_all( "current_datetime", write={ "bigquery": "CURRENT_DATETIME()", "presto": "CURRENT_DATETIME()", "hive": "CURRENT_DATETIME()", "spark": "CURRENT_DATETIME()", }, ) self.validate_all( "current_time", write={ "bigquery": "CURRENT_TIME()", "duckdb": "CURRENT_TIME", "presto": "CURRENT_TIME", "trino": "CURRENT_TIME", "hive": "CURRENT_TIME()", "spark": "CURRENT_TIME()", }, ) self.validate_all( "CURRENT_TIMESTAMP", write={ "bigquery": "CURRENT_TIMESTAMP()", "duckdb": "CURRENT_TIMESTAMP", "postgres": "CURRENT_TIMESTAMP", "presto": "CURRENT_TIMESTAMP", "hive": "CURRENT_TIMESTAMP()", "spark": "CURRENT_TIMESTAMP()", }, ) self.validate_all( "CURRENT_TIMESTAMP()", write={ "bigquery": "CURRENT_TIMESTAMP()", "duckdb": "CURRENT_TIMESTAMP", "postgres": "CURRENT_TIMESTAMP", "presto": "CURRENT_TIMESTAMP", "hive": "CURRENT_TIMESTAMP()", "spark": "CURRENT_TIMESTAMP()", }, ) self.validate_all( "DIV(x, y)", write={ "bigquery": "DIV(x, y)", "duckdb": "x // y", }, ) self.validate_all( "CREATE TABLE db.example_table (col_a struct)", write={ "bigquery": "CREATE TABLE db.example_table (col_a STRUCT)", "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "hive": "CREATE TABLE db.example_table (col_a STRUCT)", "spark": "CREATE TABLE db.example_table (col_a STRUCT)", }, ) self.validate_all( "CREATE TABLE db.example_table (col_a STRUCT>)", write={ "bigquery": "CREATE TABLE db.example_table (col_a STRUCT>)", "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a BIGINT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a BIGINT, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "hive": "CREATE TABLE db.example_table (col_a STRUCT>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT>)", }, ) self.validate_all( "CREATE TABLE db.example_table (x int) PARTITION BY x cluster by x", write={ "bigquery": "CREATE TABLE db.example_table (x INT64) PARTITION BY x CLUSTER BY x", }, ) self.validate_all( "DELETE db.example_table WHERE x = 1", write={ "bigquery": "DELETE db.example_table WHERE x = 1", "presto": "DELETE FROM db.example_table WHERE x = 1", }, ) self.validate_all( "DELETE db.example_table tb WHERE tb.x = 1", write={ "bigquery": "DELETE db.example_table AS tb WHERE tb.x = 1", "presto": "DELETE FROM db.example_table WHERE x = 1", }, ) self.validate_all( "DELETE db.example_table AS tb WHERE tb.x = 1", write={ "bigquery": "DELETE db.example_table AS tb WHERE tb.x = 1", "presto": "DELETE FROM db.example_table WHERE x = 1", }, ) self.validate_all( "DELETE FROM db.example_table WHERE x = 1", write={ "bigquery": "DELETE FROM db.example_table WHERE x = 1", "presto": "DELETE FROM db.example_table WHERE x = 1", }, ) self.validate_all( "DELETE FROM db.example_table tb WHERE tb.x = 1", write={ "bigquery": "DELETE FROM db.example_table AS tb WHERE tb.x = 1", "presto": "DELETE FROM db.example_table WHERE x = 1", }, ) self.validate_all( "DELETE FROM db.example_table AS tb WHERE tb.x = 1", write={ "bigquery": "DELETE FROM db.example_table AS tb WHERE tb.x = 1", "presto": "DELETE FROM db.example_table WHERE x = 1", }, ) self.validate_all( "DELETE FROM db.example_table AS tb WHERE example_table.x = 1", write={ "bigquery": "DELETE FROM db.example_table AS tb WHERE example_table.x = 1", "presto": "DELETE FROM db.example_table WHERE x = 1", }, ) self.validate_all( "DELETE FROM db.example_table WHERE example_table.x = 1", write={ "bigquery": "DELETE FROM db.example_table WHERE example_table.x = 1", "presto": "DELETE FROM db.example_table WHERE example_table.x = 1", }, ) self.validate_all( "DELETE FROM db.t1 AS t1 WHERE NOT t1.c IN (SELECT db.t2.c FROM db.t2)", write={ "bigquery": "DELETE FROM db.t1 AS t1 WHERE NOT t1.c IN (SELECT db.t2.c FROM db.t2)", "presto": "DELETE FROM db.t1 WHERE NOT c IN (SELECT c FROM db.t2)", }, ) self.validate_all( "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", write={ "bigquery": "SELECT * FROM a WHERE b IN UNNEST([1, 2, 3])", "presto": "SELECT * FROM a WHERE b IN (SELECT UNNEST(ARRAY[1, 2, 3]))", "hive": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))", "spark": "SELECT * FROM a WHERE b IN (SELECT EXPLODE(ARRAY(1, 2, 3)))", }, ) self.validate_all( "DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)", write={ "postgres": "CURRENT_DATE - INTERVAL '1 DAY'", "bigquery": "DATE_SUB(CURRENT_DATE, INTERVAL '1' DAY)", }, ) self.validate_all( "DATE_ADD(CURRENT_DATE(), INTERVAL -1 DAY)", write={ "bigquery": "DATE_ADD(CURRENT_DATE, INTERVAL '-1' DAY)", "duckdb": "CURRENT_DATE + INTERVAL '-1' DAY", "mysql": "DATE_ADD(CURRENT_DATE, INTERVAL '-1' DAY)", "postgres": "CURRENT_DATE + INTERVAL '-1 DAY'", "presto": "DATE_ADD('DAY', CAST('-1' AS BIGINT), CURRENT_DATE)", "hive": "DATE_ADD(CURRENT_DATE, -1)", "spark": "DATE_ADD(CURRENT_DATE, -1)", }, ) self.validate_all( "DATE_DIFF(DATE '2010-07-07', DATE '2008-12-25', DAY)", write={ "bigquery": "DATE_DIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE), DAY)", "mysql": "DATEDIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))", "starrocks": "DATE_DIFF('DAY', CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))", }, ) self.validate_all( "DATE_DIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE), DAY)", read={ "mysql": "DATEDIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))", "starrocks": "DATEDIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))", }, ) self.validate_all( "DATE_DIFF(DATE '2010-07-07', DATE '2008-12-25', MINUTE)", write={ "bigquery": "DATE_DIFF(CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE), MINUTE)", "starrocks": "DATE_DIFF('MINUTE', CAST('2010-07-07' AS DATE), CAST('2008-12-25' AS DATE))", }, ) self.validate_all( "DATE_DIFF('2021-01-01', '2020-01-01', DAY)", write={ "bigquery": "DATE_DIFF('2021-01-01', '2020-01-01', DAY)", "duckdb": "DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2021-01-01' AS DATE))", }, ) self.validate_all( "CURRENT_DATE('UTC')", write={ "bigquery": "CURRENT_DATE('UTC')", "duckdb": "CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)", "mysql": "CURRENT_DATE AT TIME ZONE 'UTC'", "postgres": "CURRENT_DATE AT TIME ZONE 'UTC'", "snowflake": "CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)", }, ) self.validate_all( "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", write={ "bigquery": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", "snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS FIRST LIMIT 10", }, ) self.validate_all( "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) AS tab", read={ "bigquery": "SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) as tab", "snowflake": "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", "spark": "SELECT cola, colb FROM VALUES (1, 'test') AS tab(cola, colb)", }, ) self.validate_all( "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1", read={ "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS _c0)]) AS t1", "postgres": "SELECT * FROM (VALUES (1)) AS t1", }, ) self.validate_all( "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2", read={ "bigquery": "SELECT * FROM UNNEST([STRUCT(1 AS id)]) AS t1 CROSS JOIN UNNEST([STRUCT(1 AS id)]) AS t2", "postgres": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)", }, ) self.validate_all( "SELECT * FROM UNNEST([1]) WITH OFFSET", write={"bigquery": "SELECT * FROM UNNEST([1]) WITH OFFSET AS offset"}, ) self.validate_all( "SELECT * FROM UNNEST([1]) WITH OFFSET y", write={"bigquery": "SELECT * FROM UNNEST([1]) WITH OFFSET AS y"}, ) self.validate_all( "GENERATE_ARRAY(1, 4)", read={"bigquery": "GENERATE_ARRAY(1, 4)"}, write={"duckdb": "GENERATE_SERIES(1, 4)"}, ) self.validate_all( "TO_JSON_STRING(x)", read={ "bigquery": "TO_JSON_STRING(x)", }, write={ "bigquery": "TO_JSON_STRING(x)", "duckdb": "CAST(TO_JSON(x) AS TEXT)", "presto": "JSON_FORMAT(CAST(x AS JSON))", "spark": "TO_JSON(x)", }, ) self.validate_all( """SELECT `u`.`user_email` AS `user_email`, `d`.`user_id` AS `user_id`, `account_id` AS `account_id` FROM `analytics_staging`.`stg_mongodb__users` AS `u`, UNNEST(`u`.`cluster_details`) AS `d`, UNNEST(`d`.`account_ids`) AS `account_id` WHERE NOT `account_id` IS NULL""", read={ "": """ SELECT "u"."user_email" AS "user_email", "_q_0"."d"."user_id" AS "user_id", "_q_1"."account_id" AS "account_id" FROM "analytics_staging"."stg_mongodb__users" AS "u", UNNEST("u"."cluster_details") AS "_q_0"("d"), UNNEST("_q_0"."d"."account_ids") AS "_q_1"("account_id") WHERE NOT "_q_1"."account_id" IS NULL """ }, pretty=True, ) self.validate_all( "SELECT MOD(x, 10)", read={"postgres": "SELECT x % 10"}, write={ "bigquery": "SELECT MOD(x, 10)", "postgres": "SELECT x % 10", }, ) self.validate_all( "SELECT CAST(x AS DATETIME)", write={ "": "SELECT CAST(x AS TIMESTAMP)", "bigquery": "SELECT CAST(x AS DATETIME)", }, ) self.validate_all( "SELECT TIME(foo, 'America/Los_Angeles')", write={ "duckdb": "SELECT CAST(CAST(foo AS TIMESTAMPTZ) AT TIME ZONE 'America/Los_Angeles' AS TIME)", "bigquery": "SELECT TIME(foo, 'America/Los_Angeles')", }, ) self.validate_all( "SELECT DATETIME('2020-01-01')", write={ "duckdb": "SELECT CAST('2020-01-01' AS TIMESTAMP)", "bigquery": "SELECT DATETIME('2020-01-01')", }, ) self.validate_all( "SELECT DATETIME('2020-01-01', TIME '23:59:59')", write={ "duckdb": "SELECT CAST(CAST('2020-01-01' AS DATE) + CAST('23:59:59' AS TIME) AS TIMESTAMP)", "bigquery": "SELECT DATETIME('2020-01-01', CAST('23:59:59' AS TIME))", }, ) self.validate_all( "SELECT DATETIME('2020-01-01', 'America/Los_Angeles')", write={ "duckdb": "SELECT CAST(CAST('2020-01-01' AS TIMESTAMPTZ) AT TIME ZONE 'America/Los_Angeles' AS TIMESTAMP)", "bigquery": "SELECT DATETIME('2020-01-01', 'America/Los_Angeles')", }, ) self.validate_all( "SELECT LENGTH(foo)", read={ "bigquery": "SELECT LENGTH(foo)", "snowflake": "SELECT LENGTH(foo)", }, write={ "duckdb": "SELECT CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END", "snowflake": "SELECT LENGTH(foo)", "": "SELECT LENGTH(foo)", }, ) self.validate_all( "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)", write={ "duckdb": "SELECT DATE_DIFF('MINUTE', CAST('12:30:00' AS TIME), CAST('12:00:00' AS TIME))", "bigquery": "SELECT TIME_DIFF('12:00:00', '12:30:00', MINUTE)", }, ) self.validate_all( "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])", write={ "bigquery": "ARRAY_CONCAT([1, 2], [3, 4], [5, 6])", "duckdb": "LIST_CONCAT([1, 2], [3, 4], [5, 6])", "postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY_CAT(ARRAY[3, 4], ARRAY[5, 6]))", "redshift": "ARRAY_CONCAT(ARRAY(1, 2), ARRAY_CONCAT(ARRAY(3, 4), ARRAY(5, 6)))", "snowflake": "ARRAY_CAT([1, 2], ARRAY_CAT([3, 4], [5, 6]))", "hive": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", "spark2": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", "spark": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", "databricks": "CONCAT(ARRAY(1, 2), ARRAY(3, 4), ARRAY(5, 6))", "presto": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])", "trino": "CONCAT(ARRAY[1, 2], ARRAY[3, 4], ARRAY[5, 6])", }, ) self.validate_all( "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)", write={ "duckdb": "SELECT GENERATE_SERIES(CAST('2016-10-05 00:00:00' AS TIMESTAMP), CAST('2016-10-07 00:00:00' AS TIMESTAMP), INTERVAL '1' DAY)", "bigquery": "SELECT GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-07 00:00:00', INTERVAL '1' DAY)", }, ) self.validate_all( "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')", write={ "bigquery": "SELECT PARSE_DATE('%A %b %e %Y', 'Thursday Dec 25 2008')", "duckdb": "SELECT CAST(STRPTIME('Thursday Dec 25 2008', '%A %b %-d %Y') AS DATE)", }, ) self.validate_all( "SELECT PARSE_DATE('%Y%m%d', '20081225')", write={ "bigquery": "SELECT PARSE_DATE('%Y%m%d', '20081225')", "duckdb": "SELECT CAST(STRPTIME('20081225', '%Y%m%d') AS DATE)", "snowflake": "SELECT DATE('20081225', 'yyyymmDD')", }, ) self.validate_all( "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text", write={ "bigquery": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text", "duckdb": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--') AS text", }, ) self.validate_all( "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--', 'MISSING') AS text", write={ "bigquery": "SELECT ARRAY_TO_STRING(['cake', 'pie', NULL], '--', 'MISSING') AS text", "duckdb": "SELECT ARRAY_TO_STRING(LIST_TRANSFORM(['cake', 'pie', NULL], x -> COALESCE(x, 'MISSING')), '--') AS text", }, ) self.validate_all( "STRING(a)", write={ "bigquery": "STRING(a)", "snowflake": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", }, ) self.validate_all( "STRING('2008-12-25 15:30:00', 'America/New_York')", write={ "bigquery": "STRING('2008-12-25 15:30:00', 'America/New_York')", "snowflake": "CAST(CONVERT_TIMEZONE('UTC', 'America/New_York', '2008-12-25 15:30:00') AS VARCHAR)", "duckdb": "CAST(CAST('2008-12-25 15:30:00' AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE 'America/New_York' AS TEXT)", }, ) self.validate_identity("SELECT * FROM a-b c", "SELECT * FROM a-b AS c") self.validate_all( "SAFE_DIVIDE(x, y)", write={ "bigquery": "SAFE_DIVIDE(x, y)", "duckdb": "CASE WHEN y <> 0 THEN x / y ELSE NULL END", "presto": "IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)", "trino": "IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)", "hive": "IF(y <> 0, x / y, NULL)", "spark2": "IF(y <> 0, x / y, NULL)", "spark": "IF(y <> 0, x / y, NULL)", "databricks": "IF(y <> 0, x / y, NULL)", "snowflake": "IFF(y <> 0, x / y, NULL)", "postgres": "CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END", }, ) self.validate_all( "SAFE_DIVIDE(x + 1, 2 * y)", write={ "bigquery": "SAFE_DIVIDE(x + 1, 2 * y)", "duckdb": "CASE WHEN (2 * y) <> 0 THEN (x + 1) / (2 * y) ELSE NULL END", "presto": "IF((2 * y) <> 0, CAST((x + 1) AS DOUBLE) / (2 * y), NULL)", "trino": "IF((2 * y) <> 0, CAST((x + 1) AS DOUBLE) / (2 * y), NULL)", "hive": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", "spark2": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", "spark": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", "databricks": "IF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", "snowflake": "IFF((2 * y) <> 0, (x + 1) / (2 * y), NULL)", "postgres": "CASE WHEN (2 * y) <> 0 THEN CAST((x + 1) AS DOUBLE PRECISION) / (2 * y) ELSE NULL END", }, ) self.validate_all( """SELECT JSON_VALUE_ARRAY('{"arr": [1, "a"]}', '$.arr')""", write={ "bigquery": """SELECT JSON_VALUE_ARRAY('{"arr": [1, "a"]}', '$.arr')""", "duckdb": """SELECT CAST('{"arr": [1, "a"]}' -> '$.arr' AS TEXT[])""", "snowflake": """SELECT TRANSFORM(GET_PATH(PARSE_JSON('{"arr": [1, "a"]}'), 'arr'), x -> CAST(x AS VARCHAR))""", }, ) self.validate_all( "SELECT INSTR('foo@example.com', '@')", write={ "bigquery": "SELECT INSTR('foo@example.com', '@')", "duckdb": "SELECT STRPOS('foo@example.com', '@')", "snowflake": "SELECT CHARINDEX('@', 'foo@example.com')", }, ) self.validate_all( "SELECT ts + MAKE_INTERVAL(1, 2, minute => 5, day => 3)", write={ "bigquery": "SELECT ts + MAKE_INTERVAL(1, 2, day => 3, minute => 5)", "duckdb": "SELECT ts + INTERVAL '1 year 2 month 5 minute 3 day'", "snowflake": "SELECT ts + INTERVAL '1 year, 2 month, 5 minute, 3 day'", }, ) self.validate_all( """SELECT INT64(JSON_QUERY(JSON '{"key": 2000}', '$.key'))""", write={ "bigquery": """SELECT INT64(JSON_QUERY(PARSE_JSON('{"key": 2000}'), '$.key'))""", "duckdb": """SELECT CAST(JSON('{"key": 2000}') -> '$.key' AS BIGINT)""", "snowflake": """SELECT CAST(GET_PATH(PARSE_JSON('{"key": 2000}'), 'key') AS BIGINT)""", }, ) self.validate_identity("CONTAINS_SUBSTR(a, b, json_scope => 'JSON_KEYS_AND_VALUES')") self.validate_all( """CONTAINS_SUBSTR(a, b)""", read={ "": "CONTAINS(a, b)", "spark": "CONTAINS(a, b)", "databricks": "CONTAINS(a, b)", "snowflake": "CONTAINS(a, b)", "duckdb": "CONTAINS(a, b)", "oracle": "CONTAINS(a, b)", }, write={ "": "CONTAINS(LOWER(a), LOWER(b))", "spark": "CONTAINS(LOWER(a), LOWER(b))", "databricks": "CONTAINS(LOWER(a), LOWER(b))", "snowflake": "CONTAINS(LOWER(a), LOWER(b))", "duckdb": "CONTAINS(LOWER(a), LOWER(b))", "oracle": "CONTAINS(LOWER(a), LOWER(b))", "bigquery": "CONTAINS_SUBSTR(a, b)", }, ) self.validate_identity( "EXPORT DATA OPTIONS (URI='gs://path*.csv.gz', FORMAT='CSV') AS SELECT * FROM all_rows" ) self.validate_identity( "EXPORT DATA WITH CONNECTION myproject.us.myconnection OPTIONS (URI='gs://path*.csv.gz', FORMAT='CSV') AS SELECT * FROM all_rows" ) self.validate_all( "SELECT * FROM t1, UNNEST(`t1`) AS `col`", read={ "duckdb": 'SELECT * FROM t1, UNNEST("t1") "t1" ("col")', }, write={ "bigquery": "SELECT * FROM t1 CROSS JOIN UNNEST(`t1`) AS `col`", "redshift": 'SELECT * FROM t1 CROSS JOIN "t1" AS "col"', }, ) self.validate_all( "SELECT * FROM t, UNNEST(`t2`.`t3`) AS `col`", read={ "duckdb": 'SELECT * FROM t, UNNEST("t1"."t2"."t3") "t1" ("col")', }, write={ "bigquery": "SELECT * FROM t CROSS JOIN UNNEST(`t2`.`t3`) AS `col`", "redshift": 'SELECT * FROM t CROSS JOIN "t2"."t3" AS "col"', }, ) self.validate_all( "SELECT * FROM t1, UNNEST(`t1`.`t2`.`t3`.`t4`) AS `col`", read={ "duckdb": 'SELECT * FROM t1, UNNEST("t1"."t2"."t3"."t4") "t3" ("col")', }, write={ "bigquery": "SELECT * FROM t1 CROSS JOIN UNNEST(`t1`.`t2`.`t3`.`t4`) AS `col`", "redshift": 'SELECT * FROM t1 CROSS JOIN "t1"."t2"."t3"."t4" AS "col"', }, ) self.validate_identity("ARRAY_FIRST(['a', 'b'])") self.validate_identity("ARRAY_LAST(['a', 'b'])") self.validate_identity("JSON_TYPE(PARSE_JSON('1'))") self.validate_all( "SELECT CAST(col AS STRUCT>).fld1.fld2", write={ "bigquery": "SELECT CAST(col AS STRUCT>).fld1.fld2", "snowflake": "SELECT CAST(col AS OBJECT(fld1 OBJECT(fld2 INT))):fld1.fld2", }, ) self.validate_identity( "SELECT PARSE_DATETIME('%a %b %e %I:%M:%S %Y', 'Thu Dec 25 07:30:00 2008')" ) self.validate_identity("FORMAT_TIME('%R', CAST('15:30:00' AS TIME))") self.validate_identity("PARSE_TIME('%I:%M:%S', '07:30:00')") self.validate_identity("BYTE_LENGTH('foo')") self.validate_identity("BYTE_LENGTH(b'foo')") self.validate_identity("CODE_POINTS_TO_STRING([65, 255])") self.validate_identity("APPROX_TOP_COUNT(col, 2)") self.validate_identity("ARPOX_TOP_SUM(col, 1.5, 2)") self.validate_identity("SAFE_CONVERT_BYTES_TO_STRING(b'\xc2')") self.validate_identity("FROM_HEX('foo')") self.validate_identity("TO_CODE_POINTS('foo')") self.validate_identity("CODE_POINTS_TO_BYTES([65, 98])") self.validate_identity("PARSE_BIGNUMERIC('1.2')") self.validate_identity("PARSE_NUMERIC('1.2')") self.validate_identity("BOOL(PARSE_JSON('true'))") self.validate_identity("FLOAT64(PARSE_JSON('9.8'))") self.validate_identity("FLOAT64(PARSE_JSON('9.8'), wide_number_mode => 'round')") self.validate_identity("FLOAT64(PARSE_JSON('9.8'), wide_number_mode => 'exact')") self.validate_identity("NORMALIZE_AND_CASEFOLD('foo')") self.validate_identity("NORMALIZE_AND_CASEFOLD('foo', NFKC)") self.validate_identity( "OCTET_LENGTH('foo')", "BYTE_LENGTH('foo')", ) self.validate_identity( "OCTET_LENGTH(b'foo')", "BYTE_LENGTH(b'foo')", ) self.validate_identity( """JSON_ARRAY_APPEND(PARSE_JSON('["a", "b", "c"]'), '$', [1, 2], append_each_element => FALSE)""" ) self.validate_identity( """JSON_ARRAY_INSERT(PARSE_JSON('["a", "b", "c"]'), '$[1]', [1, 2], insert_each_element => FALSE)""" ) self.validate_identity("""JSON_KEYS(PARSE_JSON('{"a": {"b":1}}'))""") self.validate_identity("""JSON_KEYS(PARSE_JSON('{"a": {"b":1}}', 1))""") self.validate_identity("""JSON_KEYS(PARSE_JSON('{"a": {"b":1}}'), 1, mode => 'lax')""") self.validate_identity( """JSON_SET(PARSE_JSON('{"a": 1}'), '$.b', 999, create_if_missing => FALSE)""" ) self.validate_identity("""JSON_STRIP_NULLS(PARSE_JSON('[1, null, 2, null, [null]]'))""") self.validate_identity( """JSON_STRIP_NULLS(PARSE_JSON('[1, null, 2, null]'), include_arrays => FALSE)""" ) self.validate_identity( """JSON_STRIP_NULLS(PARSE_JSON('{"a": {"b": {"c": null}}, "d": [null], "e": [], "f": 1}'), include_arrays => FALSE, remove_empty => TRUE)""" ) self.validate_identity( """JSON_EXTRACT_STRING_ARRAY(PARSE_JSON('{"fruits": ["apples", "oranges", "grapes"]}'), '$.fruits')""", """JSON_VALUE_ARRAY(PARSE_JSON('{"fruits": ["apples", "oranges", "grapes"]}'), '$.fruits')""", ) self.validate_identity("TO_JSON(STRUCT(1 AS id, [10, 20] AS cords))") self.validate_identity("TO_JSON(9999999999, stringify_wide_numbers => FALSE)") self.validate_identity("RANGE_BUCKET(20, [0, 10, 20, 30, 40])") self.validate_identity("SELECT TRANSLATE(MODEL, 'in', 't') FROM (SELECT 'input' AS MODEL)") self.validate_identity("SELECT GRANT FROM (SELECT 'input' AS GRANT)") self.validate_all( "SELECT 0xA", write={ "bigquery": "SELECT 0xA", "duckdb": "SELECT 10", }, ) self.validate_all( "SELECT ARRAY_CONCAT_AGG(1)", write={ "snowflake": "SELECT ARRAY_FLATTEN(ARRAY_AGG(1))", "bigquery": "SELECT ARRAY_CONCAT_AGG(1)", }, ) self.validate_all( "SELECT b'\x61'", write={ "bigquery": "SELECT b'\x61'", "duckdb": "SELECT CAST(e'\x61' AS BLOB)", "postgres": "SELECT CAST(e'\x61' AS BYTEA)", }, ) self.validate_all( "SELECT b'a'", write={ "bigquery": "SELECT b'a'", "duckdb": "SELECT CAST(e'a' AS BLOB)", "postgres": "SELECT CAST(e'a' AS BYTEA)", }, ) self.validate_all( "SELECT GENERATE_UUID()", write={ "bigquery": "SELECT GENERATE_UUID()", "duckdb": "SELECT CAST(UUID() AS TEXT)", "spark2": "SELECT CAST(UUID() AS STRING)", "spark": "SELECT CAST(UUID() AS STRING)", "presto": "SELECT CAST(UUID() AS VARCHAR)", "trino": "SELECT CAST(UUID() AS VARCHAR)", "snowflake": "SELECT UUID_STRING()", }, ) self.validate_all( "SELECT REPLACE('apple pie', 'pie', 'cobbler') AS result", write={ "bigquery": "SELECT REPLACE('apple pie', 'pie', 'cobbler') AS result", "duckdb": "SELECT REPLACE('apple pie', 'pie', 'cobbler') AS result", }, ) expr = self.parse_one( "SELECT REPLACE(CAST('apple pie' AS BYTES), CAST('pie' AS BYTES), CAST('cobbler' AS BYTES)) AS result" ) annotated = annotate_types(expr, dialect="bigquery") self.assertEqual( annotated.sql("duckdb"), "SELECT CAST(REPLACE(CAST(CAST('apple pie' AS BLOB) AS TEXT), CAST(CAST('pie' AS BLOB) AS TEXT), CAST(CAST('cobbler' AS BLOB) AS TEXT)) AS BLOB) AS result", ) expr = self.parse_one("REPLACE('apple pie', 'pie', 'cobbler')") annotated = annotate_types(expr, dialect="bigquery") self.assertEqual(annotated.sql("duckdb"), "REPLACE('apple pie', 'pie', 'cobbler')") self.validate_all( "TIMESTAMP_TRUNC(TIMESTAMP '2024-03-15 14:35:47.123456', DAY, 'America/New_York')", write={ "bigquery": "TIMESTAMP_TRUNC(CAST('2024-03-15 14:35:47.123456' AS TIMESTAMP), DAY, 'America/New_York')", "duckdb": "DATE_TRUNC('DAY', CAST('2024-03-15 14:35:47.123456' AS TIMESTAMPTZ) AT TIME ZONE 'America/New_York') AT TIME ZONE 'America/New_York'", }, ) self.validate_all( "TIMESTAMP_TRUNC(TIMESTAMP '2024-03-15 14:35:00', MINUTE, 'America/New_York')", write={ "bigquery": "TIMESTAMP_TRUNC(CAST('2024-03-15 14:35:00' AS TIMESTAMP), MINUTE, 'America/New_York')", "duckdb": "DATE_TRUNC('MINUTE', CAST('2024-03-15 14:35:00' AS TIMESTAMPTZ))", }, ) self.validate_all( "TIMESTAMP_TRUNC(TIMESTAMP '2024-03-15 14:35:47.123456', DAY)", write={ "bigquery": "TIMESTAMP_TRUNC(CAST('2024-03-15 14:35:47.123456' AS TIMESTAMP), DAY)", "duckdb": "DATE_TRUNC('DAY', CAST('2024-03-15 14:35:47.123456' AS TIMESTAMPTZ))", }, ) self.validate_all( "TIMESTAMP_TRUNC(TIMESTAMP '2025-01-01 14:35:47.123456', MINUTE)", write={ "bigquery": "TIMESTAMP_TRUNC(CAST('2025-01-01 14:35:47.123456' AS TIMESTAMP), MINUTE)", "duckdb": "DATE_TRUNC('MINUTE', CAST('2025-01-01 14:35:47.123456' AS TIMESTAMPTZ))", }, ) self.validate_all( "WITH sample AS (SELECT * FROM UNNEST([TIMESTAMP '2024-03-15 14:35:46', TIMESTAMP '2024-03-16 01:12:03']) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, DAY, 'America/New_York') AS truncated_ts FROM sample", write={ "bigquery": "WITH sample AS (SELECT * FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMP), CAST('2024-03-16 01:12:03' AS TIMESTAMP)]) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, DAY, 'America/New_York') AS truncated_ts FROM sample", "duckdb": "WITH sample AS (SELECT * FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMPTZ), CAST('2024-03-16 01:12:03' AS TIMESTAMPTZ)]) AS _t0(ts)) SELECT ts, DATE_TRUNC('DAY', ts AT TIME ZONE 'America/New_York') AT TIME ZONE 'America/New_York' AS truncated_ts FROM sample", }, ) self.validate_all( "WITH sample AS (SELECT ts FROM UNNEST([TIMESTAMP '2024-03-15 14:35:46', TIMESTAMP '2024-03-16 01:12:03']) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, DAY) AS truncated_ts FROM sample", write={ "bigquery": "WITH sample AS (SELECT ts FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMP), CAST('2024-03-16 01:12:03' AS TIMESTAMP)]) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, DAY) AS truncated_ts FROM sample", "duckdb": "WITH sample AS (SELECT ts FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMPTZ), CAST('2024-03-16 01:12:03' AS TIMESTAMPTZ)]) AS _t0(ts)) SELECT ts, DATE_TRUNC('DAY', ts) AS truncated_ts FROM sample", }, ) self.validate_all( "WITH sample AS (SELECT * FROM UNNEST([TIMESTAMP '2024-03-15 14:35:46', TIMESTAMP '2024-03-16 01:12:03']) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, MINUTE, 'America/New_York') AS truncated_ts FROM sample", write={ "bigquery": "WITH sample AS (SELECT * FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMP), CAST('2024-03-16 01:12:03' AS TIMESTAMP)]) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, MINUTE, 'America/New_York') AS truncated_ts FROM sample", "duckdb": "WITH sample AS (SELECT * FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMPTZ), CAST('2024-03-16 01:12:03' AS TIMESTAMPTZ)]) AS _t0(ts)) SELECT ts, DATE_TRUNC('MINUTE', ts) AS truncated_ts FROM sample", }, ) self.validate_all( "WITH sample AS (SELECT * FROM UNNEST([TIMESTAMP '2024-03-15 14:35:46', TIMESTAMP '2024-03-16 01:12:03']) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, MINUTE) AS truncated_ts FROM sample", write={ "bigquery": "WITH sample AS (SELECT * FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMP), CAST('2024-03-16 01:12:03' AS TIMESTAMP)]) AS ts) SELECT ts, TIMESTAMP_TRUNC(ts, MINUTE) AS truncated_ts FROM sample", "duckdb": "WITH sample AS (SELECT * FROM UNNEST([CAST('2024-03-15 14:35:46' AS TIMESTAMPTZ), CAST('2024-03-16 01:12:03' AS TIMESTAMPTZ)]) AS _t0(ts)) SELECT ts, DATE_TRUNC('MINUTE', ts) AS truncated_ts FROM sample", }, ) self.validate_all( "SELECT GREATEST(1, NULL, 3)", write={ "duckdb": "SELECT CASE WHEN 1 IS NULL OR NULL IS NULL OR 3 IS NULL THEN NULL ELSE GREATEST(1, NULL, 3) END", "bigquery": "SELECT GREATEST(1, NULL, 3)", }, ) self.validate_all( "SELECT LEAST(1, NULL, 3)", write={ "duckdb": "SELECT CASE WHEN 1 IS NULL OR NULL IS NULL OR 3 IS NULL THEN NULL ELSE LEAST(1, NULL, 3) END", "bigquery": "SELECT LEAST(1, NULL, 3)", }, ) def test_errors(self): with self.assertRaises(ParseError): self.parse_one("SELECT * FROM a - b.c.d2") with self.assertRaises(TokenError): transpile("'\\'", read="bigquery") # Reference: https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators with self.assertRaises(UnsupportedError): transpile( "SELECT * FROM a INTERSECT ALL SELECT * FROM b", write="bigquery", unsupported_level=ErrorLevel.RAISE, ) with self.assertRaises(UnsupportedError): transpile( "SELECT * FROM a EXCEPT ALL SELECT * FROM b", write="bigquery", unsupported_level=ErrorLevel.RAISE, ) with self.assertRaises(ParseError): transpile("SELECT * FROM UNNEST(x) AS x(y)", read="bigquery") with self.assertRaises(ParseError): transpile("DATE_ADD(x, day)", read="bigquery") def test_warnings(self): with self.assertLogs(helper_logger) as cm: self.validate_identity( "WITH cte(c) AS (SELECT * FROM t) SELECT * FROM cte", "WITH cte AS (SELECT * FROM t) SELECT * FROM cte", ) self.assertIn("Can't push down CTE column names for star queries.", cm.output[0]) self.assertIn("Named columns are not supported in table alias.", cm.output[1]) with self.assertLogs(helper_logger) as cm: self.validate_identity( "SELECT * FROM t AS t(c1, c2)", "SELECT * FROM t AS t", ) self.assertIn("Named columns are not supported in table alias.", cm.output[0]) with self.assertLogs(helper_logger) as cm: statements = parse( """ BEGIN DECLARE 1; IF from_date IS NULL THEN SET x = 1; END IF; END """, read="bigquery", ) for actual, expected in zip( statements, ("BEGIN DECLARE 1", "IF from_date IS NULL THEN SET x = 1", "END IF", "END"), ): self.assertEqual(actual.sql(dialect="bigquery"), expected) self.assertIn("unsupported syntax", cm.output[0]) with self.assertLogs(helper_logger) as cm: statements = parse( """ BEGIN CALL `project_id.dataset_id.stored_procedure_id`(); EXCEPTION WHEN ERROR THEN INSERT INTO `project_id.dataset_id.table_id` SELECT @@error.message, CURRENT_TIMESTAMP(); END """, read="bigquery", ) expected_statements = ( "BEGIN CALL `project_id.dataset_id.stored_procedure_id`()", "EXCEPTION WHEN ERROR THEN INSERT INTO `project_id.dataset_id.table_id` SELECT @@error.message, CURRENT_TIMESTAMP()", "END", ) for actual, expected in zip(statements, expected_statements): self.assertEqual(actual.sql(dialect="bigquery"), expected) self.assertIn("unsupported syntax", cm.output[0]) with self.assertLogs(helper_logger): statements = parse( """ BEGIN DECLARE MY_VAR INT64 DEFAULT 1; SET MY_VAR = (SELECT 0); IF MY_VAR = 1 THEN SELECT 'TRUE'; ELSEIF MY_VAR = 0 THEN SELECT 'FALSE'; ELSE SELECT 'NULL'; END IF; END """, read="bigquery", ) expected_statements = ( "BEGIN DECLARE MY_VAR INT64 DEFAULT 1", "SET MY_VAR = (SELECT 0)", "IF MY_VAR = 1 THEN SELECT 'TRUE'", "ELSEIF MY_VAR = 0 THEN SELECT 'FALSE'", "ELSE SELECT 'NULL'", "END IF", "END", ) for actual, expected in zip(statements, expected_statements): self.assertEqual(actual.sql(dialect="bigquery"), expected) with self.assertLogs(helper_logger) as cm: self.validate_identity( "SELECT * FROM t AS t(c1, c2)", "SELECT * FROM t AS t", ) self.assertIn("Named columns are not supported in table alias.", cm.output[0]) with self.assertLogs(helper_logger): self.validate_all( "SELECT a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]", write={ "duckdb": "SELECT a[2], b[2], c[1], d[2], e[1]", "bigquery": "SELECT a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]", "presto": "SELECT a[2], b[2], c[1], ELEMENT_AT(d, 2), ELEMENT_AT(e, 1)", }, ) self.validate_all( "a[0]", read={ "bigquery": "a[0]", "duckdb": "a[1]", "presto": "a[1]", }, ) with self.assertLogs(parser_logger) as cm: for_in_stmts = parse( "FOR record IN (SELECT word FROM shakespeare) DO SELECT record.word; END FOR;", read="bigquery", ) self.assertEqual( [s.sql(dialect="bigquery") for s in for_in_stmts], ["FOR record IN (SELECT word FROM shakespeare) DO SELECT record.word", "END FOR"], ) self.assertIn("'END FOR'", cm.output[0]) with self.assertLogs(parser_logger) as cm: for_in_stmts = parse( 'FOR record IN (SELECT word FROM shakespeare) DO BEGIN SET x = "SELECT 1"; EXECUTE IMMEDIATE x; END; END FOR;', read="bigquery", ) self.assertEqual( [s.sql(dialect="bigquery") for s in for_in_stmts], [ 'FOR record IN (SELECT word FROM shakespeare) DO BEGIN SET x = "SELECT 1"', "EXECUTE IMMEDIATE x", "END", "END FOR", ], ) self.assertIn("FOR record", cm.output[0]) self.assertIn("EXECUTE IMMEDIATE", cm.output[1]) self.assertIn("END FOR", cm.output[2]) def test_user_defined_functions(self): self.validate_identity( "CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) RETURNS FLOAT64 NOT DETERMINISTIC LANGUAGE js AS 'return x*y;'" ) self.validate_identity("CREATE TEMPORARY FUNCTION udf(x ANY TYPE) AS (x)") self.validate_identity("CREATE TEMPORARY FUNCTION a(x FLOAT64, y FLOAT64) AS ((x + 4) / y)") self.validate_identity( "CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE AS SELECT s, t" ) self.validate_identity( '''CREATE TEMPORARY FUNCTION string_length_0(strings ARRAY) RETURNS FLOAT64 LANGUAGE js AS """'use strict'; function string_length(strings) { return _.sum(_.map(strings, ((x) => x.length))); } return string_length(strings);""" OPTIONS (library=['gs://ibis-testing-libraries/lodash.min.js'])''', "CREATE TEMPORARY FUNCTION string_length_0(strings ARRAY) RETURNS FLOAT64 LANGUAGE js OPTIONS (library=['gs://ibis-testing-libraries/lodash.min.js']) AS '\\'use strict\\'; function string_length(strings) { return _.sum(_.map(strings, ((x) => x.length))); } return string_length(strings);'", ) def test_remove_precision_parameterized_types(self): self.validate_identity("CREATE TABLE test (a NUMERIC(10, 2))") self.validate_identity( "INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING(10)), CAST(14 AS STRING(10)))", "INSERT INTO test (cola, colb) VALUES (CAST(7 AS STRING), CAST(14 AS STRING))", ) self.validate_identity( "SELECT CAST(1 AS NUMERIC(10, 2))", "SELECT CAST(1 AS NUMERIC)", ) self.validate_identity( "SELECT CAST('1' AS STRING(10)) UNION ALL SELECT CAST('2' AS STRING(10))", "SELECT CAST('1' AS STRING) UNION ALL SELECT CAST('2' AS STRING)", ) self.validate_identity( "SELECT cola FROM (SELECT CAST('1' AS STRING(10)) AS cola UNION ALL SELECT CAST('2' AS STRING(10)) AS cola)", "SELECT cola FROM (SELECT CAST('1' AS STRING) AS cola UNION ALL SELECT CAST('2' AS STRING) AS cola)", ) def test_gap_fill(self): self.validate_identity( "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'locf')]) ORDER BY time" ) self.validate_identity( "SELECT a, b, c, d, e FROM GAP_FILL(TABLE foo, ts_column => 'b', partitioning_columns => ['a'], value_columns => [('c', 'bar'), ('d', 'baz'), ('e', 'bla')], bucket_width => INTERVAL '1' DAY)" ) self.validate_identity( "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'linear')], ignore_null_values => FALSE) ORDER BY time" ) self.validate_identity( "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE) ORDER BY time" ) self.validate_identity( "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'null')], origin => CAST('2023-11-01 09:30:01' AS DATETIME)) ORDER BY time" ) self.validate_identity( "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'locf')]) ORDER BY time" ) def test_models(self): self.validate_identity( "CREATE OR REPLACE MODEL foo OPTIONS (model_type='linear_reg') AS SELECT bla FROM foo WHERE cond" ) self.validate_identity( """CREATE OR REPLACE MODEL m TRANSFORM( ML.FEATURE_CROSS(STRUCT(f1, f2)) AS cross_f, ML.QUANTILE_BUCKETIZE(f3) OVER () AS buckets, label_col ) OPTIONS ( model_type='linear_reg', input_label_cols=['label_col'] ) AS SELECT * FROM t""", pretty=True, ) self.validate_identity( """CREATE MODEL project_id.mydataset.mymodel INPUT( f1 INT64, f2 FLOAT64, f3 STRING, f4 ARRAY ) OUTPUT( out1 INT64, out2 INT64 ) REMOTE WITH CONNECTION myproject.us.test_connection OPTIONS ( ENDPOINT='https://us-central1-aiplatform.googleapis.com/v1/projects/myproject/locations/us-central1/endpoints/1234' )""", pretty=True, ) def test_ml_functions(self): ast = self.validate_identity( "SELECT * FROM ML.PREDICT(MODEL mydataset.mymodel, (SELECT label, column1, column2 FROM mydataset.mytable))" ) assert ast.find(exp.Predict) self.validate_identity( "SELECT label, predicted_label1, predicted_label AS predicted_label2 FROM ML.PREDICT(MODEL mydataset.mymodel2, (SELECT * EXCEPT (predicted_label), predicted_label AS predicted_label1 FROM ML.PREDICT(MODEL mydataset.mymodel1, TABLE mydataset.mytable)))" ) self.validate_identity( "SELECT * FROM ML.PREDICT(MODEL mydataset.mymodel, (SELECT custom_label, column1, column2 FROM mydataset.mytable), STRUCT(0.55 AS threshold))" ) self.validate_identity("SELECT COSH(1.5)") self.validate_identity( "SELECT * FROM ML.PREDICT(MODEL `my_project`.my_dataset.my_model, (SELECT * FROM input_data))" ) self.validate_identity( "SELECT * FROM ML.PREDICT(MODEL my_dataset.vision_model, (SELECT uri, ML.RESIZE_IMAGE(ML.DECODE_IMAGE(data), 480, 480, FALSE) AS input FROM my_dataset.object_table))" ) self.validate_identity( "SELECT * FROM ML.PREDICT(MODEL my_dataset.vision_model, (SELECT uri, ML.CONVERT_COLOR_SPACE(ML.RESIZE_IMAGE(ML.DECODE_IMAGE(data), 224, 280, TRUE), 'YIQ') AS input FROM my_dataset.object_table WHERE content_type = 'image/jpeg'))" ) ast = self.validate_identity("SELECT * FROM ML.FEATURES_AT_TIME((SELECT 1), num_rows => 1)") assert ast.find(exp.FeaturesAtTime) self.validate_identity( "SELECT * FROM ML.FEATURES_AT_TIME(TABLE mydataset.feature_table, time => '2022-06-11 10:00:00+00', num_rows => 1, ignore_feature_nulls => TRUE)" ) ast = self.validate_identity( "SELECT * FROM VECTOR_SEARCH(TABLE mydataset.base_table, 'column_to_search', TABLE mydataset.query_table, 'query_column_to_search', top_k => 2, distance_type => 'cosine', options => '{\"fraction_lists_to_search\":0.15}')" ) assert ast.find(exp.VectorSearch) self.validate_identity( "SELECT * FROM VECTOR_SEARCH(TABLE mydataset.base_table, 'column_to_search', TABLE mydataset.query_table, query_column_to_search => 'query_column_to_search', top_k => 2, distance_type => 'cosine', options => '{\"fraction_lists_to_search\":0.15}')" ) self.validate_identity( "SELECT * FROM VECTOR_SEARCH((SELECT * FROM mydataset.base_table), 'column_to_search', (SELECT * FROM mydataset.query_table), 'query_column_to_search')" ) self.validate_identity( "SELECT * FROM VECTOR_SEARCH(TABLE mydataset.base_table, 'column_to_search', TABLE mydataset.query_table)" ) self.validate_identity( "SELECT * FROM ML.TRANSLATE(MODEL `mydataset.mytranslatemodel`, TABLE `mydataset.mybqtable`, STRUCT('translate_text' AS translate_mode, 'zh-CN' AS target_language_code))" ) self.validate_identity( "SELECT * FROM ML.TRANSLATE(MODEL `mydataset.mymodel`, (SELECT comment AS text_content FROM mydataset.mytable), STRUCT('translate_text' AS translate_mode, 'en' AS target_language_code))" ).find(exp.MLTranslate).assert_is(exp.MLTranslate) self.validate_identity("TRANSLATE(x, y, z)").assert_is(exp.Translate) ast = self.validate_identity( "SELECT * FROM ML.FORECAST(MODEL `mydataset.mymodel`, STRUCT(2 AS horizon))" ) assert ast.find(exp.MLForecast) self.validate_identity( "SELECT * FROM ML.FORECAST(MODEL `mydataset.mymodel`, TABLE `mydataset.mybqtable`, STRUCT(2 AS horizon, 4 AS confidence_level))" ) self.validate_identity( "SELECT * FROM ML.FORECAST(MODEL `mydataset.mymodel`, (SELECT * FROM mydataset.query_table), STRUCT())" ) for name in ("GENERATE_EMBEDDING", "GENERATE_TEXT_EMBEDDING"): with self.subTest(f"Testing BigQuery's ML function {name}"): ast = self.validate_identity( f"SELECT * FROM ML.{name}(MODEL mydataset.mymodel, (SELECT label, column1, column2 FROM mydataset.mytable))" ) self.validate_identity( f"SELECT * FROM ML.{name}(MODEL mydataset.mymodel, TABLE mydataset.mytable, STRUCT(TRUE AS flatten_json_output))" ) assert ast.find(exp.GenerateEmbedding) def test_merge(self): self.validate_all( """ MERGE dataset.Inventory T USING dataset.NewArrivals S ON FALSE WHEN NOT MATCHED BY TARGET AND product LIKE '%a%' THEN DELETE WHEN NOT MATCHED BY SOURCE AND product LIKE '%b%' THEN DELETE""", write={ "bigquery": "MERGE INTO dataset.Inventory AS T USING dataset.NewArrivals AS S ON FALSE WHEN NOT MATCHED AND product LIKE '%a%' THEN DELETE WHEN NOT MATCHED BY SOURCE AND product LIKE '%b%' THEN DELETE", "snowflake": "MERGE INTO dataset.Inventory AS T USING dataset.NewArrivals AS S ON FALSE WHEN NOT MATCHED AND product LIKE '%a%' THEN DELETE WHEN NOT MATCHED AND product LIKE '%b%' THEN DELETE", }, ) def test_rename_table(self): self.validate_all( "ALTER TABLE db.t1 RENAME TO db.t2", write={ "snowflake": "ALTER TABLE db.t1 RENAME TO db.t2", "bigquery": "ALTER TABLE db.t1 RENAME TO t2", }, ) @mock.patch("sqlglot.dialects.bigquery.logger") def test_pushdown_cte_column_names(self, logger): with self.assertRaises(UnsupportedError): transpile( "WITH cte(foo) AS (SELECT * FROM tbl) SELECT foo FROM cte", read="spark", write="bigquery", unsupported_level=ErrorLevel.RAISE, ) self.validate_all( "WITH cte AS (SELECT 1 AS foo) SELECT foo FROM cte", read={"spark": "WITH cte(foo) AS (SELECT 1) SELECT foo FROM cte"}, ) self.validate_all( "WITH cte AS (SELECT 1 AS foo) SELECT foo FROM cte", read={"spark": "WITH cte(foo) AS (SELECT 1 AS bar) SELECT foo FROM cte"}, ) self.validate_all( "WITH cte AS (SELECT 1 AS bar) SELECT bar FROM cte", read={"spark": "WITH cte AS (SELECT 1 AS bar) SELECT bar FROM cte"}, ) self.validate_all( "WITH cte AS (SELECT 1 AS foo, 2) SELECT foo FROM cte", read={"postgres": "WITH cte(foo) AS (SELECT 1, 2) SELECT foo FROM cte"}, ) self.validate_all( "WITH cte AS (SELECT 1 AS foo UNION ALL SELECT 2) SELECT foo FROM cte", read={"postgres": "WITH cte(foo) AS (SELECT 1 UNION ALL SELECT 2) SELECT foo FROM cte"}, ) def test_json_object(self): self.validate_identity("SELECT JSON_OBJECT() AS json_data") self.validate_identity("SELECT JSON_OBJECT('foo', 10, 'bar', TRUE) AS json_data") self.validate_identity("SELECT JSON_OBJECT('foo', 10, 'bar', ['a', 'b']) AS json_data") self.validate_identity("SELECT JSON_OBJECT('a', 10, 'a', 'foo') AS json_data") self.validate_identity( "SELECT JSON_OBJECT(['a', 'b'], [10, NULL]) AS json_data", "SELECT JSON_OBJECT('a', 10, 'b', NULL) AS json_data", ) self.validate_identity( """SELECT JSON_OBJECT(['a', 'b'], [JSON '10', JSON '"foo"']) AS json_data""", """SELECT JSON_OBJECT('a', PARSE_JSON('10'), 'b', PARSE_JSON('"foo"')) AS json_data""", ) self.validate_identity( "SELECT JSON_OBJECT(['a', 'b'], [STRUCT(10 AS id, 'Red' AS color), STRUCT(20 AS id, 'Blue' AS color)]) AS json_data", "SELECT JSON_OBJECT('a', STRUCT(10 AS id, 'Red' AS color), 'b', STRUCT(20 AS id, 'Blue' AS color)) AS json_data", ) self.validate_identity( "SELECT JSON_OBJECT(['a', 'b'], [TO_JSON(10), TO_JSON(['foo', 'bar'])]) AS json_data", "SELECT JSON_OBJECT('a', TO_JSON(10), 'b', TO_JSON(['foo', 'bar'])) AS json_data", ) with self.assertRaises(ParseError): transpile("SELECT JSON_OBJECT('a', 1, 'b') AS json_data", read="bigquery") def test_mod(self): for sql in ("MOD(a, b)", "MOD('a', b)", "MOD(5, 2)", "MOD((a + 1) * 8, 5 - 1)"): with self.subTest(f"Testing BigQuery roundtrip of modulo operation: {sql}"): self.validate_identity(sql) self.validate_identity("SELECT MOD((SELECT 1), 2)") self.validate_identity( "MOD((a + 1), b)", "MOD(a + 1, b)", ) def test_inline_constructor(self): self.validate_identity( """SELECT STRUCT>(["2023-01-17"])""", """SELECT CAST(STRUCT(['2023-01-17']) AS STRUCT>)""", ) self.validate_identity( """SELECT STRUCT((SELECT 'foo')).*""", """SELECT CAST(STRUCT((SELECT 'foo')) AS STRUCT).*""", ) self.validate_all( "SELECT ARRAY[1, 2, 3]", write={ "bigquery": "SELECT ARRAY[1, 2, 3]", "duckdb": "SELECT CAST([1, 2, 3] AS DOUBLE[])", }, ) self.validate_all( "CAST(STRUCT(1) AS STRUCT)", write={ "bigquery": "CAST(CAST(STRUCT(1) AS STRUCT) AS STRUCT)", "duckdb": "CAST(CAST(ROW(1) AS STRUCT(a BIGINT)) AS STRUCT(a BIGINT))", }, ) self.validate_all( "SELECT * FROM UNNEST(ARRAY>[])", write={ "bigquery": "SELECT * FROM UNNEST(ARRAY>[])", "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([] AS STRUCT(x BIGINT)[]), max_depth => 2))", }, ) self.validate_all( "SELECT * FROM UNNEST(ARRAY>[STRUCT(1, DATETIME '2023-11-01 09:34:01', 74, 'INACTIVE'),STRUCT(4, DATETIME '2023-11-01 09:38:01', 80, 'ACTIVE')])", write={ "bigquery": "SELECT * FROM UNNEST(ARRAY>[STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')])", "duckdb": "SELECT * FROM (SELECT UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]), max_depth => 2))", }, ) self.validate_all( "SELECT STRUCT>(1, STRUCT('c_str'))", write={ "bigquery": "SELECT CAST(STRUCT(1, STRUCT('c_str')) AS STRUCT>)", "duckdb": "SELECT CAST(ROW(1, ROW('c_str')) AS STRUCT(a BIGINT, b STRUCT(c TEXT)))", }, ) self.validate_all( "SELECT MAX_BY(name, score) FROM table1", write={ "bigquery": "SELECT MAX_BY(name, score) FROM table1", "duckdb": "SELECT ARG_MAX(name, score) FROM table1", }, ) self.validate_all( "SELECT MIN_BY(product, price) FROM table1", write={ "bigquery": "SELECT MIN_BY(product, price) FROM table1", "duckdb": "SELECT ARG_MIN(product, price) FROM table1", }, ) def test_convert(self): for value, expected in [ (datetime.datetime(2023, 1, 1), "CAST('2023-01-01 00:00:00' AS DATETIME)"), (datetime.datetime(2023, 1, 1, 12, 13, 14), "CAST('2023-01-01 12:13:14' AS DATETIME)"), ( datetime.datetime(2023, 1, 1, 12, 13, 14, tzinfo=datetime.timezone.utc), "CAST('2023-01-01 12:13:14+00:00' AS TIMESTAMP)", ), ( pytz.timezone("America/Los_Angeles").localize( datetime.datetime(2023, 1, 1, 12, 13, 14) ), "CAST('2023-01-01 12:13:14-08:00' AS TIMESTAMP)", ), ]: with self.subTest(value): self.assertEqual(exp.convert(value).sql(dialect=self.dialect), expected) def test_unnest(self): self.validate_all( "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])", write={ "bigquery": "SELECT name, laps FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps), STRUCT('Makhloufi' AS name, [24.5, 25.4, 26.6, 26.1] AS laps)])", "duckdb": "SELECT name, laps FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}, {'name': 'Makhloufi', 'laps': [24.5, 25.4, 26.6, 26.1]}], max_depth => 2))", }, ) self.validate_all( "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])", write={ "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)])", "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, name, laps FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2))", }, ) self.validate_all( "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", write={ "bigquery": "SELECT participant FROM UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", "duckdb": "SELECT participant FROM (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant", }, ) self.validate_all( "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", write={ "bigquery": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN UNNEST([STRUCT('Rudisha' AS name, [23.4, 26.3, 26.4, 26.1] AS laps)]) AS participant", "duckdb": "WITH Races AS (SELECT '800M' AS race) SELECT race, participant FROM Races AS r CROSS JOIN (SELECT UNNEST([{'name': 'Rudisha', 'laps': [23.4, 26.3, 26.4, 26.1]}], max_depth => 2)) AS participant", }, ) self.validate_all( "SELECT * FROM UNNEST([STRUCT('Alice' AS name, STRUCT(85 AS math, 90 AS english) AS scores), STRUCT('Bob' AS name, STRUCT(92 AS math, 88 AS english) AS scores)])", write={ "bigquery": "SELECT * FROM UNNEST([STRUCT('Alice' AS name, STRUCT(85 AS math, 90 AS english) AS scores), STRUCT('Bob' AS name, STRUCT(92 AS math, 88 AS english) AS scores)])", "duckdb": "SELECT * FROM (SELECT UNNEST([{'name': 'Alice', 'scores': {'math': 85, 'english': 90}}, {'name': 'Bob', 'scores': {'math': 92, 'english': 88}}], max_depth => 2))", "snowflake": "SELECT * FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('name', 'Alice', 'scores', OBJECT_CONSTRUCT('math', 85, 'english', 90)), OBJECT_CONSTRUCT('name', 'Bob', 'scores', OBJECT_CONSTRUCT('math', 92, 'english', 88))])) AS _t0(seq, key, path, index, value, this)", "presto": "SELECT * FROM UNNEST(ARRAY[CAST(ROW('Alice', CAST(ROW(85, 90) AS ROW(math INTEGER, english INTEGER))) AS ROW(name VARCHAR, scores ROW(math INTEGER, english INTEGER))), CAST(ROW('Bob', CAST(ROW(92, 88) AS ROW(math INTEGER, english INTEGER))) AS ROW(name VARCHAR, scores ROW(math INTEGER, english INTEGER)))])", "trino": "SELECT * FROM UNNEST(ARRAY[CAST(ROW('Alice', CAST(ROW(85, 90) AS ROW(math INTEGER, english INTEGER))) AS ROW(name VARCHAR, scores ROW(math INTEGER, english INTEGER))), CAST(ROW('Bob', CAST(ROW(92, 88) AS ROW(math INTEGER, english INTEGER))) AS ROW(name VARCHAR, scores ROW(math INTEGER, english INTEGER)))])", "spark2": "SELECT * FROM EXPLODE(ARRAY(STRUCT('Alice' AS name, STRUCT(85 AS math, 90 AS english) AS scores), STRUCT('Bob' AS name, STRUCT(92 AS math, 88 AS english) AS scores)))", "databricks": "SELECT * FROM EXPLODE(ARRAY(STRUCT('Alice' AS name, STRUCT(85 AS math, 90 AS english) AS scores), STRUCT('Bob' AS name, STRUCT(92 AS math, 88 AS english) AS scores)))", "hive": "SELECT * FROM EXPLODE(ARRAY(STRUCT('Alice', STRUCT(85, 90)), STRUCT('Bob', STRUCT(92, 88))))", }, ) self.validate_all( "SELECT * FROM UNNEST([STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92), STRUCT('Diana', 95)])", write={ "bigquery": "SELECT * FROM UNNEST([STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92), STRUCT('Diana', 95)])", "duckdb": "SELECT * FROM (SELECT UNNEST([{'name': 'Alice', 'score': 85}, {'name': 'Bob', 'score': 92}, {'name': 'Diana', 'score': 95}], max_depth => 2))", "snowflake": "SELECT * FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('name', 'Alice', 'score', 85), OBJECT_CONSTRUCT('name', 'Bob', 'score', 92), OBJECT_CONSTRUCT('name', 'Diana', 'score', 95)])) AS _t0(seq, key, path, index, value, this)", "presto": "SELECT * FROM UNNEST(ARRAY[CAST(ROW('Alice', 85) AS ROW(name VARCHAR, score INTEGER)), CAST(ROW('Bob', 92) AS ROW(name VARCHAR, score INTEGER)), CAST(ROW('Diana', 95) AS ROW(name VARCHAR, score INTEGER))])", "trino": "SELECT * FROM UNNEST(ARRAY[CAST(ROW('Alice', 85) AS ROW(name VARCHAR, score INTEGER)), CAST(ROW('Bob', 92) AS ROW(name VARCHAR, score INTEGER)), CAST(ROW('Diana', 95) AS ROW(name VARCHAR, score INTEGER))])", "spark2": "SELECT * FROM EXPLODE(ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score), STRUCT('Diana' AS name, 95 AS score)))", "databricks": "SELECT * FROM EXPLODE(ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score), STRUCT('Diana' AS name, 95 AS score)))", "hive": "SELECT * FROM EXPLODE(ARRAY(STRUCT('Alice', 85), STRUCT('Bob', 92), STRUCT('Diana', 95)))", }, ) def test_range_type(self): for type, value in ( ("RANGE", "'[2020-01-01, 2020-12-31)'"), ("RANGE", "'[UNBOUNDED, 2020-12-31)'"), ("RANGE", "'[2020-01-01 12:00:00, 2020-12-31 12:00:00)'"), ("RANGE", "'[2020-10-01 12:00:00+08, 2020-12-31 12:00:00+08)'"), ): with self.subTest(f"Testing BigQuery's RANGE type: {type} {value}"): self.validate_identity(f"SELECT {type} {value}", f"SELECT CAST({value} AS {type})") self.assertEqual(self.parse_one(type), exp.DataType.build(type, dialect="bigquery")) self.validate_identity( "SELECT RANGE(CAST('2022-12-01' AS DATE), CAST('2022-12-31' AS DATE))" ) self.validate_identity("SELECT RANGE(NULL, CAST('2022-12-31' AS DATE))") self.validate_identity( "SELECT RANGE(CAST('2022-10-01 14:53:27' AS DATETIME), CAST('2022-10-01 16:00:00' AS DATETIME))" ) self.validate_identity( "SELECT RANGE(CAST('2022-10-01 14:53:27 America/Los_Angeles' AS TIMESTAMP), CAST('2022-10-01 16:00:00 America/Los_Angeles' AS TIMESTAMP))" ) def test_null_ordering(self): # Aggregate functions allow "NULLS FIRST" only with ascending order and # "NULLS LAST" only with descending for sort_order, null_order in (("ASC", "NULLS LAST"), ("DESC", "NULLS FIRST")): self.validate_all( f"SELECT color, ARRAY_AGG(id ORDER BY id {sort_order}) AS ids FROM colors GROUP BY 1", read={ "": f"SELECT color, ARRAY_AGG(id ORDER BY id {sort_order} {null_order}) AS ids FROM colors GROUP BY 1" }, write={ "bigquery": f"SELECT color, ARRAY_AGG(id ORDER BY id {sort_order}) AS ids FROM colors GROUP BY 1", }, ) self.validate_all( f"SELECT SUM(f1) OVER (ORDER BY f2 {sort_order}) FROM t", read={ "": f"SELECT SUM(f1) OVER (ORDER BY f2 {sort_order} {null_order}) FROM t", }, write={ "bigquery": f"SELECT SUM(f1) OVER (ORDER BY f2 {sort_order}) FROM t", }, ) def test_null_ordering_in_analytic_functions(self): for func_call in ( "FIRST_VALUE(col1)", "LAST_VALUE(col1)", "NTH_VALUE(col1, 2)", ): for sort_order, null_order in (("ASC", "NULLS LAST"), ("DESC", "NULLS FIRST")): with self.subTest(f"{func_call} with {sort_order} {null_order} ROWS"): self.validate_identity( f"WITH t AS (SELECT 1 AS id, 2 AS col1) SELECT {func_call} OVER (PARTITION BY id ORDER BY col1 {sort_order} {null_order} ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t" ) for func_call in ( "LAG(col1)", "LEAD(col1)", "CUME_DIST()", "DENSE_RANK()", "NTILE(4)", "PERCENT_RANK()", "RANK()", "ROW_NUMBER()", ): for sort_order, null_order in (("ASC", "NULLS LAST"), ("DESC", "NULLS FIRST")): with self.subTest(f"{func_call} with {sort_order} {null_order}"): self.validate_identity( f"WITH t AS (SELECT 1 AS id, 2 AS col1) SELECT {func_call} OVER (PARTITION BY id ORDER BY col1 {sort_order} {null_order}) FROM t" ) def test_json_extract(self): self.validate_all( """SELECT JSON_QUERY('{"class": {"students": []}}', '$.class')""", write={ "bigquery": """SELECT JSON_QUERY('{"class": {"students": []}}', '$.class')""", "duckdb": """SELECT '{"class": {"students": []}}' -> '$.class'""", "snowflake": """SELECT GET_PATH(PARSE_JSON('{"class": {"students": []}}'), 'class')""", }, ) self.validate_all( """SELECT JSON_QUERY(foo, '$.class')""", write={ "bigquery": """SELECT JSON_QUERY(foo, '$.class')""", "snowflake": """SELECT GET_PATH(PARSE_JSON(foo), 'class')""", }, ) for func in ("JSON_EXTRACT_SCALAR", "JSON_VALUE"): with self.subTest(f"Testing BigQuery's {func}"): self.validate_all( f"SELECT {func}('5')", write={ "bigquery": f"SELECT {func}('5', '$')", "duckdb": """SELECT JSON_VALUE('5', '$') ->> '$'""", }, ) sql = f"""SELECT {func}('{{"name": "Jakob", "age": "6"}}', '$.age')""" self.validate_all( sql, write={ "bigquery": sql, "duckdb": """SELECT JSON_VALUE('{"name": "Jakob", "age": "6"}', '$.age') ->> '$'""", "snowflake": """SELECT JSON_EXTRACT_PATH_TEXT('{"name": "Jakob", "age": "6"}', 'age')""", }, ) self.assertEqual(self.parse_one(sql).sql("bigquery", normalize_functions="upper"), sql) # Test double quote escaping for func in ("JSON_VALUE", "JSON_QUERY", "JSON_QUERY_ARRAY"): self.validate_identity( f"{func}(doc, '$. a b c .d')", f"""{func}(doc, '$." a b c ".d')""" ) # Test single quote & bracket escaping for func in ("JSON_EXTRACT", "JSON_EXTRACT_SCALAR", "JSON_EXTRACT_ARRAY"): self.validate_identity( f"{func}(doc, '$. a b c .d')", f"""{func}(doc, '$[\\' a b c \\'].d')""" ) def test_json_extract_array(self): for func in ("JSON_QUERY_ARRAY", "JSON_EXTRACT_ARRAY"): with self.subTest(f"Testing BigQuery's {func}"): sql = f"""SELECT {func}('{{"fruits": [1, "oranges"]}}', '$.fruits')""" self.validate_all( sql, write={ "bigquery": sql, "duckdb": """SELECT CAST('{"fruits": [1, "oranges"]}' -> '$.fruits' AS JSON[])""", "snowflake": """SELECT TRANSFORM(GET_PATH(PARSE_JSON('{"fruits": [1, "oranges"]}'), 'fruits'), x -> PARSE_JSON(TO_JSON(x)))""", }, ) self.assertEqual( self.parse_one(sql).sql("bigquery", normalize_functions="upper"), sql ) def test_unix_seconds(self): self.validate_all( "SELECT UNIX_SECONDS('2008-12-25 15:30:00+00')", read={ "bigquery": "SELECT UNIX_SECONDS('2008-12-25 15:30:00+00')", "spark": "SELECT UNIX_SECONDS('2008-12-25 15:30:00+00')", "databricks": "SELECT UNIX_SECONDS('2008-12-25 15:30:00+00')", }, write={ "spark": "SELECT UNIX_SECONDS('2008-12-25 15:30:00+00')", "databricks": "SELECT UNIX_SECONDS('2008-12-25 15:30:00+00')", "duckdb": "SELECT CAST(EPOCH(CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ)) AS BIGINT)", "snowflake": "SELECT TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), '2008-12-25 15:30:00+00')", }, ) for dialect in ("bigquery", "spark", "databricks"): parse_one("UNIX_SECONDS(col)", dialect=dialect).assert_is(exp.UnixSeconds) def test_unix_micros(self): self.validate_all( "SELECT UNIX_MICROS('2008-12-25 15:30:00+00')", write={ "bigquery": "SELECT UNIX_MICROS('2008-12-25 15:30:00+00')", "duckdb": "SELECT EPOCH_US(CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))", }, ) self.validate_all( "SELECT UNIX_MICROS(TIMESTAMP '2008-12-25 15:30:00+00')", write={ "bigquery": "SELECT UNIX_MICROS(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))", "duckdb": "SELECT EPOCH_US(CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))", }, ) def test_unix_millis(self): self.validate_all( "SELECT UNIX_MILLIS('2008-12-25 15:30:00+00')", write={ "bigquery": "SELECT UNIX_MILLIS('2008-12-25 15:30:00+00')", "duckdb": "SELECT EPOCH_MS(CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))", }, ) self.validate_all( "SELECT UNIX_MILLIS(TIMESTAMP '2008-12-25 15:30:00+00')", write={ "bigquery": "SELECT UNIX_MILLIS(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP))", "duckdb": "SELECT EPOCH_MS(CAST('2008-12-25 15:30:00+00' AS TIMESTAMPTZ))", }, ) def test_regexp_extract(self): self.validate_identity("REGEXP_EXTRACT(x, '(?<)')") self.validate_identity("REGEXP_EXTRACT(`foo`, 'bar: (.+?)', 1, 1)") self.validate_identity( r"REGEXP_EXTRACT(svc_plugin_output, r'\\\((.*)')", r"REGEXP_EXTRACT(svc_plugin_output, '\\\\\\((.*)')", ) self.validate_identity( r"REGEXP_SUBSTR(value, pattern, position, occurrence)", r"REGEXP_EXTRACT(value, pattern, position, occurrence)", ) self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table", write={ "bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)') FROM table", "duckdb": '''SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1) FROM "table"''', }, ) # Position = 1 self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1) FROM table", write={ "bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1) FROM table", "duckdb": '''SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1) FROM "table"''', }, ) # Position = 2 self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2) FROM table", write={ "bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2) FROM table", "duckdb": '''SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(abc, 2), ''), 'pattern(group)', 1) FROM "table"''', }, ) # Position = 1, occurrence = 1 self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1, 1) FROM table", write={ "bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1, 1) FROM table", "duckdb": '''SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 1) FROM "table"''', }, ) # Position = 2, occurrence = 3 self.validate_all( "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2, 3) FROM table", write={ "bigquery": "SELECT REGEXP_EXTRACT(abc, 'pattern(group)', 2, 3) FROM table", "duckdb": '''SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(NULLIF(SUBSTRING(abc, 2), ''), 'pattern(group)', 1), 3) FROM "table"''', }, ) # The pattern does not capture a group (entire regular expression is extracted) self.validate_all( "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", read={ "bigquery": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "trino": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "presto": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "snowflake": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "spark": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]', 0)", "databricks": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]', 0)", }, write={ "bigquery": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "trino": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "presto": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "snowflake": "REGEXP_SUBSTR_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "duckdb": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]')", "spark": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]', 0)", "databricks": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', 'a[0-9]', 0)", }, ) # The pattern does capture >=1 group (the default is to extract the first instance) self.validate_all( "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]')", write={ "bigquery": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]')", "trino": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]', 1)", "presto": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]', 1)", "snowflake": "REGEXP_SUBSTR_ALL('a1_a2a3_a4A5a6', '(a)[0-9]', 1, 1, 'c', 1)", "duckdb": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]', 1)", "spark": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]')", "databricks": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]')", }, ) def test_format_temporal(self): self.validate_all( "SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')", write={ "bigquery": "SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')", "duckdb": "SELECT STRFTIME(CAST('2023-12-25' AS DATE), '%Y%m%d')", }, ) self.validate_all( "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')", write={ "bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %T', CAST('2023-12-25 15:30:00' AS DATETIME))", "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')", }, ) self.validate_all( "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')", write={ "bigquery": "SELECT FORMAT_DATETIME('%D', '2023-12-25 15:30:00')", "duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%m/%d/%y')", }, ) self.validate_all( "SELECT FORMAT_DATETIME('%F %T', DATETIME '2023-10-15 14:30:45')", write={ "bigquery": "SELECT FORMAT_DATETIME('%F %T', CAST('2023-10-15 14:30:45' AS DATETIME))", "duckdb": "SELECT STRFTIME(CAST('2023-10-15 14:30:45' AS TIMESTAMP), '%Y-%m-%d %H:%M:%S')", }, ) self.validate_all( "SELECT FORMAT_DATETIME('%c', DATETIME '2008-12-25 15:30:00')", write={ "bigquery": "SELECT FORMAT_DATETIME('%c', CAST('2008-12-25 15:30:00' AS DATETIME))", "duckdb": "SELECT STRFTIME(CAST('2008-12-25 15:30:00' AS TIMESTAMP), '%a %b %-d %H:%M:%S %Y')", }, ) self.validate_all( "SELECT FORMAT_DATETIME('%Y-%m-%e', DATETIME '2020-09-09 10:15:30')", write={ "bigquery": "SELECT FORMAT_DATETIME('%Y-%m-%e', CAST('2020-09-09 10:15:30' AS DATETIME))", "duckdb": "SELECT STRFTIME(CAST('2020-09-09 10:15:30' AS TIMESTAMP), '%Y-%m-%-d')", }, ) self.validate_all( """SELECT FORMAT_TIMESTAMP("%b-%d-%Y", TIMESTAMP "2050-12-25 15:30:55+00")""", write={ "bigquery": "SELECT FORMAT_TIMESTAMP('%b-%d-%Y', CAST('2050-12-25 15:30:55+00' AS TIMESTAMP))", "duckdb": "SELECT STRFTIME(CAST(CAST('2050-12-25 15:30:55+00' AS TIMESTAMPTZ) AS TIMESTAMP), '%b-%d-%Y')", "snowflake": "SELECT TO_CHAR(CAST(CAST('2050-12-25 15:30:55+00' AS TIMESTAMPTZ) AS TIMESTAMP), 'mon-DD-yyyy')", }, ) def test_string_agg(self): self.validate_identity("STRING_AGG(a, ' & ')") self.validate_identity("STRING_AGG(DISTINCT a, ' & ')") self.validate_identity("STRING_AGG(a, ' & ' ORDER BY LENGTH(a))") self.validate_identity("STRING_AGG(foo, b'|' ORDER BY bar)") self.validate_identity("STRING_AGG(a)") self.validate_identity("STRING_AGG(DISTINCT v, sep LIMIT 3)") self.validate_identity("STRING_AGG(DISTINCT a ORDER BY b DESC, c DESC LIMIT 10)") self.validate_identity( "SELECT a, GROUP_CONCAT(b) FROM table GROUP BY a", "SELECT a, STRING_AGG(b) FROM table GROUP BY a", ) def test_annotate_timestamps(self): sql = """ SELECT CURRENT_TIMESTAMP() AS curr_ts, TIMESTAMP_SECONDS(2) AS ts_seconds, PARSE_TIMESTAMP('%c', 'Thu Dec 25 07:30:00 2008', 'UTC') AS parsed_ts, TIMESTAMP_ADD(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE) AS ts_add, TIMESTAMP_SUB(TIMESTAMP "2008-12-25 15:30:00+00", INTERVAL 10 MINUTE) AS ts_sub, """ annotated = annotate_types(self.parse_one(sql), dialect="bigquery") for select in annotated.selects: self.assertEqual(select.type.sql("bigquery"), "TIMESTAMP") def test_set_operations(self): self.validate_identity("SELECT 1 AS foo INNER UNION ALL SELECT 3 AS foo, 4 AS bar") for side in ("", " LEFT", " FULL"): for kind in ("", " OUTER"): for name in ( "", " BY NAME", " BY NAME ON (foo, bar)", ): with self.subTest(f"Testing {side} {kind} {name} in test_set_operations"): self.validate_identity( f"SELECT 1 AS foo{side}{kind} UNION ALL{name} SELECT 3 AS foo, 4 AS bar", ) self.validate_identity( "SELECT 1 AS x UNION ALL CORRESPONDING SELECT 2 AS x", "SELECT 1 AS x INNER UNION ALL BY NAME SELECT 2 AS x", ) self.validate_identity( "SELECT 1 AS x UNION ALL CORRESPONDING BY (foo, bar) SELECT 2 AS x", "SELECT 1 AS x INNER UNION ALL BY NAME ON (foo, bar) SELECT 2 AS x", ) self.validate_identity( "SELECT 1 AS x LEFT UNION ALL CORRESPONDING SELECT 2 AS x", "SELECT 1 AS x LEFT UNION ALL BY NAME SELECT 2 AS x", ) self.validate_identity( "SELECT 1 AS x UNION ALL STRICT CORRESPONDING SELECT 2 AS x", "SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x", ) self.validate_identity( "SELECT 1 AS x UNION ALL STRICT CORRESPONDING BY (foo, bar) SELECT 2 AS x", "SELECT 1 AS x UNION ALL BY NAME ON (foo, bar) SELECT 2 AS x", ) def test_with_offset(self): self.validate_identity( "SELECT * FROM UNNEST(x) WITH OFFSET EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET", "SELECT * FROM UNNEST(x) WITH OFFSET AS offset EXCEPT DISTINCT SELECT * FROM UNNEST(y) WITH OFFSET AS offset", ) for join_ops in ("LEFT", "RIGHT", "FULL", "NATURAL", "SEMI", "ANTI"): with self.subTest(f"Testing {join_ops} in test_with_offset"): self.validate_identity( f"SELECT * FROM t1, UNNEST([1, 2]) AS hit WITH OFFSET {join_ops} JOIN foo", f"SELECT * FROM t1 CROSS JOIN UNNEST([1, 2]) AS hit WITH OFFSET AS offset {join_ops} JOIN foo", ) def test_identifier_meta(self): ast = parse_one( "SELECT a, b FROM test_schema.test_table_a UNION ALL SELECT c, d FROM test_catalog.test_schema.test_table_b", dialect="bigquery", ) for identifier in ast.find_all(exp.Identifier): self.assertEqual(set(identifier.meta), {"line", "col", "start", "end"}) self.assertEqual( ast.this.args["from_"].this.args["this"].meta, {"line": 1, "col": 41, "start": 29, "end": 40}, ) self.assertEqual( ast.this.args["from_"].this.args["db"].meta, {"line": 1, "col": 28, "start": 17, "end": 27}, ) self.assertEqual( ast.expression.args["from_"].this.args["this"].meta, {"line": 1, "col": 106, "start": 94, "end": 105}, ) self.assertEqual( ast.expression.args["from_"].this.args["db"].meta, {"line": 1, "col": 93, "start": 82, "end": 92}, ) self.assertEqual( ast.expression.args["from_"].this.args["catalog"].meta, {"line": 1, "col": 81, "start": 69, "end": 80}, ) information_schema_sql = "SELECT a, b FROM region.INFORMATION_SCHEMA.COLUMNS" ast = parse_one(information_schema_sql, dialect="bigquery") meta = ast.args["from_"].this.this.meta self.assertEqual(meta, {"line": 1, "col": 50, "start": 24, "end": 49}) assert ( information_schema_sql[meta["start"] : meta["end"] + 1] == "INFORMATION_SCHEMA.COLUMNS" ) def test_quoted_identifier_meta(self): sql = "SELECT `a` FROM `test_schema`.`test_table_a`" ast = parse_one(sql, dialect="bigquery") db_meta = ast.args["from_"].this.args["db"].meta self.assertEqual(sql[db_meta["start"] : db_meta["end"] + 1], "`test_schema`") table_meta = ast.args["from_"].this.this.meta self.assertEqual(sql[table_meta["start"] : table_meta["end"] + 1], "`test_table_a`") information_schema_sql = "SELECT a, b FROM `region.INFORMATION_SCHEMA.COLUMNS`" ast = parse_one(information_schema_sql, dialect="bigquery") table_meta = ast.args["from_"].this.this.meta assert ( information_schema_sql[table_meta["start"] : table_meta["end"] + 1] == "`region.INFORMATION_SCHEMA.COLUMNS`" ) def test_override_normalization_strategy(self): sql = "SELECT * FROM p.d.t" ast = self.parse_one(sql) qualified = qualify(ast.copy(), dialect="bigquery,normalization_strategy=uppercase") self.assertEqual(qualified.sql("bigquery"), "SELECT * FROM `P`.`D`.`T` AS `T`") from sqlglot.dialects import BigQuery from sqlglot.dialects.dialect import NormalizationStrategy try: BigQuery.NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE qualified = qualify(ast.copy(), dialect="bigquery,normalization_strategy=uppercase") self.assertEqual(qualified.sql("bigquery"), "SELECT * FROM `P`.`D`.`T` AS `T`") finally: BigQuery.NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE def test_array_agg(self): for distinct in ("", "DISTINCT "): self.validate_all( f"SELECT ARRAY_AGG({distinct}x ORDER BY x)", write={ "bigquery": f"SELECT ARRAY_AGG({distinct}x ORDER BY x)", "snowflake": f"SELECT ARRAY_AGG({distinct}x) WITHIN GROUP (ORDER BY x NULLS FIRST)", }, ) for nulls in ("", " IGNORE NULLS", " RESPECT NULLS"): self.validate_all( f"SELECT ARRAY_AGG(x{nulls} ORDER BY col1 ASC, col2 DESC)", write={ "bigquery": f"SELECT ARRAY_AGG(x{nulls} ORDER BY col1 ASC, col2 DESC)", "snowflake": "SELECT ARRAY_AGG(x) WITHIN GROUP (ORDER BY col1 ASC NULLS FIRST, col2 DESC NULLS LAST)", }, ) def test_array_concat(self): self.validate_all( "WITH x AS ( SELECT 1 AS id), test_cte AS ( SELECT ARRAY_CONCAT(( SELECT id FROM x WHERE FALSE)) AS result ) SELECT * FROM test_cte;", write={ "snowflake": "WITH x AS (SELECT 1 AS id), test_cte AS (SELECT ARRAY_CAT((SELECT id FROM x WHERE FALSE), []) AS result) SELECT * FROM test_cte", }, ) def test_select_as_struct(self): self.validate_all( "SELECT ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t) AS array_col", write={ "bigquery": "SELECT ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t) AS array_col", "snowflake": "SELECT (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t) AS array_col", }, ) self.validate_all( "WITH t1 AS (SELECT ARRAY(SELECT AS STRUCT x1 AS alias_x1, x2 /* test */ FROM t2) AS array_col) SELECT array_col[0].alias_x1, array_col[0].x2 FROM t1", write={ "bigquery": "WITH t1 AS (SELECT ARRAY(SELECT AS STRUCT x1 AS alias_x1, x2 /* test */ FROM t2) AS array_col) SELECT array_col[0].alias_x1, array_col[0].x2 FROM t1", "snowflake": "WITH t1 AS (SELECT (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('alias_x1', x1, 'x2', x2 /* test */)) FROM t2) AS array_col) SELECT array_col[0].alias_x1, array_col[0].x2 FROM t1", }, ) self.validate_all( "WITH t1 AS (SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b) AS array_col) SELECT array_col[0].a, array_col[0].b FROM t1", write={ "bigquery": "WITH t1 AS (SELECT ARRAY(SELECT AS STRUCT 1 AS a, 2 AS b) AS array_col) SELECT array_col[0].a, array_col[0].b FROM t1", "snowflake": "WITH t1 AS (SELECT (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('a', 1, 'b', 2))) AS array_col) SELECT array_col[0].a, array_col[0].b FROM t1", }, ) self.validate_all( "WITH t1 AS (SELECT ARRAY(SELECT AS STRUCT x1 AS alias_x1, x2 /* test */ FROM t2 WHERE x2 = 4) AS array_col) SELECT array_col[0].alias_x1, array_col[0].x2 FROM t1", write={ "bigquery": "WITH t1 AS (SELECT ARRAY(SELECT AS STRUCT x1 AS alias_x1, x2 /* test */ FROM t2 WHERE x2 = 4) AS array_col) SELECT array_col[0].alias_x1, array_col[0].x2 FROM t1", "snowflake": "WITH t1 AS (SELECT (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('alias_x1', x1, 'x2', x2 /* test */)) FROM t2 WHERE x2 = 4) AS array_col) SELECT array_col[0].alias_x1, array_col[0].x2 FROM t1", }, ) def test_avoid_generating_nested_comment(self): sql = """ select id, foo, -- bar, /* the thing */ from facts """ expected = "SELECT\n id,\n foo\n/* bar, / * the thing * / */\nFROM facts" self.assertEqual(self.parse_one(sql).sql("bigquery", pretty=True), expected) def test_unnest_with_offset(self): for offset, alias in (("", "offset"), ("AS pos", "pos")): self.validate_all( f"SELECT * FROM tbl CROSS JOIN UNNEST(col) AS ref WITH OFFSET {offset}", write={ "bigquery": f"SELECT * FROM tbl CROSS JOIN UNNEST(col) AS ref WITH OFFSET AS {alias}", "hive": f"SELECT * FROM tbl LATERAL VIEW POSEXPLODE(col) AS {alias}, ref", "spark2": f"SELECT * FROM tbl LATERAL VIEW POSEXPLODE(col) AS {alias}, ref", "spark": f"SELECT * FROM tbl LATERAL VIEW POSEXPLODE(col) AS {alias}, ref", "databricks": f"SELECT * FROM tbl LATERAL VIEW POSEXPLODE(col) AS {alias}, ref", }, ) def test_generate_date_array(self): self.validate_all( "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08')", write={ "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' DAY)", "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' DAY) AS DATE[])", }, ) self.validate_all( "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)", write={ "bigquery": "SELECT GENERATE_DATE_ARRAY('2016-10-05', '2016-10-08', INTERVAL '1' MONTH)", "duckdb": "SELECT CAST(GENERATE_SERIES(CAST('2016-10-05' AS DATE), CAST('2016-10-08' AS DATE), INTERVAL '1' MONTH) AS DATE[])", }, ) self.validate_all( "SELECT id, mnth FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start_month, DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL '1' MONTH)) AS mnth", write={ "bigquery": "SELECT id, mnth FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start_month, DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL '1' MONTH)) AS mnth", "duckdb": "SELECT id, mnth FROM t CROSS JOIN UNNEST(CAST(GENERATE_SERIES(start_month, DATE_TRUNC('MONTH', CURRENT_DATE), INTERVAL '1' MONTH) AS DATE[])) AS _t0(mnth)", "snowflake": "SELECT id, DATEADD(MONTH, CAST(mnth AS INT), CAST(start_month AS DATE)) AS mnth FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(MONTH, start_month, DATE_TRUNC('MONTH', CURRENT_DATE)) + 1)) AS _t0(seq, key, path, index, mnth, this)", }, ) self.validate_all( "SELECT id, mnth AS a_mnth FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start_month, DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL '1' MONTH)) AS mnth", write={ "bigquery": "SELECT id, mnth AS a_mnth FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start_month, DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL '1' MONTH)) AS mnth", "duckdb": "SELECT id, mnth AS a_mnth FROM t CROSS JOIN UNNEST(CAST(GENERATE_SERIES(start_month, DATE_TRUNC('MONTH', CURRENT_DATE), INTERVAL '1' MONTH) AS DATE[])) AS _t0(mnth)", "snowflake": "SELECT id, DATEADD(MONTH, CAST(mnth AS INT), CAST(start_month AS DATE)) AS a_mnth FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(MONTH, start_month, DATE_TRUNC('MONTH', CURRENT_DATE)) + 1)) AS _t0(seq, key, path, index, mnth, this)", }, ) self.validate_all( "SELECT id, mnth + 1 AS a_mnth FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start_month, DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL '1' MONTH)) AS mnth", write={ "bigquery": "SELECT id, mnth + 1 AS a_mnth FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start_month, DATE_TRUNC(CURRENT_DATE, MONTH), INTERVAL '1' MONTH)) AS mnth", "duckdb": "SELECT id, mnth + 1 AS a_mnth FROM t CROSS JOIN UNNEST(CAST(GENERATE_SERIES(start_month, DATE_TRUNC('MONTH', CURRENT_DATE), INTERVAL '1' MONTH) AS DATE[])) AS _t0(mnth)", "snowflake": "SELECT id, DATEADD(MONTH, CAST(mnth AS INT), CAST(start_month AS DATE)) + 1 AS a_mnth FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(MONTH, start_month, DATE_TRUNC('MONTH', CURRENT_DATE)) + 1)) AS _t0(seq, key, path, index, mnth, this)", }, ) def test_json_array(self): self.validate_identity("JSON_ARRAY()") self.validate_identity("JSON_ARRAY(10)") self.validate_identity("JSON_ARRAY([])") self.validate_identity("JSON_ARRAY(STRUCT(10 AS a, 'foo' AS b))") self.validate_identity("JSON_ARRAY(10, ['foo', 'bar'], [20, 30])") def test_declare(self): # supported cases self.validate_identity("DECLARE X INT64") self.validate_identity("DECLARE X INT64 DEFAULT 1") self.validate_identity("DECLARE X FLOAT64 DEFAULT 0.9") self.validate_identity("DECLARE X INT64 DEFAULT (SELECT MAX(col) FROM foo)") self.validate_identity("DECLARE X, Y, Z INT64") self.validate_identity("DECLARE X, Y, Z INT64 DEFAULT 42") self.validate_identity("DECLARE X, Y, Z INT64 DEFAULT (SELECT 42)") self.validate_identity("DECLARE START_DATE DATE DEFAULT CURRENT_DATE - 1") self.validate_identity( "DECLARE TS TIMESTAMP DEFAULT CURRENT_TIMESTAMP() - INTERVAL '1' HOUR" ) def test_week(self): self.validate_identity("DATE_TRUNC(date, WEEK(MONDAY))") self.validate_identity( "LAST_DAY(DATETIME '2008-11-10 15:30:00', WEEK(SUNDAY))", "LAST_DAY(CAST('2008-11-10 15:30:00' AS DATETIME), WEEK)", ) self.validate_identity("DATE_DIFF('2017-12-18', '2017-12-17', WEEK(SATURDAY))") self.validate_identity("DATETIME_DIFF('2017-12-18', '2017-12-17', WEEK(MONDAY))") self.validate_identity( "EXTRACT(WEEK(THURSDAY) FROM DATE '2013-12-25')", "EXTRACT(WEEK(THURSDAY) FROM CAST('2013-12-25' AS DATE))", ) week_trunc = { "MONDAY": ("WEEK(MONDAY)", "DATE_TRUNC('WEEK', date)"), "TUESDAY": ( "WEEK(TUESDAY)", "CAST(DATE_TRUNC('WEEK', date + INTERVAL '-1' DAY) + INTERVAL '1' DAY AS DATE)", ), "WEDNESDAY": ( "WEEK(WEDNESDAY)", "CAST(DATE_TRUNC('WEEK', date + INTERVAL '-2' DAY) + INTERVAL '2' DAY AS DATE)", ), "THURSDAY": ( "WEEK(THURSDAY)", "CAST(DATE_TRUNC('WEEK', date + INTERVAL '-3' DAY) + INTERVAL '3' DAY AS DATE)", ), "FRIDAY": ( "WEEK(FRIDAY)", "CAST(DATE_TRUNC('WEEK', date + INTERVAL '-4' DAY) + INTERVAL '4' DAY AS DATE)", ), "SATURDAY": ( "WEEK(SATURDAY)", "CAST(DATE_TRUNC('WEEK', date + INTERVAL '-5' DAY) + INTERVAL '5' DAY AS DATE)", ), "SUNDAY": ( "WEEK", "CAST(DATE_TRUNC('WEEK', date + INTERVAL '1' DAY) + INTERVAL '-1' DAY AS DATE)", ), } for day, (bq_unit, duckdb_sql) in week_trunc.items(): with self.subTest( f"Testing transpilation of DATE_TRUNC from Bigquery to Duckdb for unit: {day}" ): self.validate_all( f"SELECT DATE_TRUNC(date, WEEK({day}))", write={ "bigquery": f"SELECT DATE_TRUNC(date, {bq_unit})", "duckdb": f"SELECT {duckdb_sql}", }, ) # BigQuery → DuckDB transpilation tests for DATE_DIFF with week units self.validate_all( "SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))", write={ "bigquery": "SELECT DATE_DIFF('2024-06-15', '2024-01-08', WEEK(MONDAY))", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-06-15' AS DATE)))", }, ) self.validate_all( "SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK(SUNDAY))", write={ "bigquery": "SELECT DATE_DIFF('2026-01-15', '2024-01-08', WEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2026-01-15' AS DATE) + INTERVAL '1' DAY))", }, ) self.validate_all( "SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))", write={ "bigquery": "SELECT DATE_DIFF('2024-01-15', '2022-04-28', WEEK(SATURDAY))", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2022-04-28' AS DATE) + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '-5' DAY))", }, ) self.validate_all( "SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)", write={ "bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', WEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY))", }, ) # Test WEEK - Saturday to Sunday boundary (critical test for Sunday-start weeks) # In BigQuery: Saturday -> Sunday crosses week boundary = 1 week # Without fix: DuckDB treats as Monday-start weeks = 0 weeks (both in same week) self.validate_all( "SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)", write={ "bigquery": "SELECT DATE_DIFF('2024-01-07', '2024-01-06', WEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-06' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-07' AS DATE) + INTERVAL '1' DAY))", }, ) self.validate_all( "SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)", write={ "bigquery": "SELECT DATE_DIFF('2024-01-15', '2024-01-08', ISOWEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)))", }, ) self.validate_all( "SELECT DATE_DIFF(DATE '2024-09-15', DATE '2024-01-08', WEEK(MONDAY))", write={ "bigquery": "SELECT DATE_DIFF(CAST('2024-09-15' AS DATE), CAST('2024-01-08' AS DATE), WEEK(MONDAY))", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-08' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-09-15' AS DATE)))", }, ) self.validate_all( "SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', WEEK(SUNDAY))", write={ "bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), WEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE) + INTERVAL '1' DAY), DATE_TRUNC('WEEK', CAST('2024-01-01' AS DATE) + INTERVAL '1' DAY))", }, ) self.validate_all( "SELECT DATE_DIFF(DATE '2023-05-01', DATE '2024-01-15', ISOWEEK)", write={ "bigquery": "SELECT DATE_DIFF(CAST('2023-05-01' AS DATE), CAST('2024-01-15' AS DATE), ISOWEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-01-15' AS DATE)), DATE_TRUNC('WEEK', CAST('2023-05-01' AS DATE)))", }, ) self.validate_all( "SELECT DATE_DIFF(DATE '2024-01-01', DATE '2024-01-15', DAY)", write={ "bigquery": "SELECT DATE_DIFF(CAST('2024-01-01' AS DATE), CAST('2024-01-15' AS DATE), DAY)", "duckdb": "SELECT DATE_DIFF('DAY', CAST('2024-01-15' AS DATE), CAST('2024-01-01' AS DATE))", }, ) def test_approx_qunatiles(self): self.validate_identity("APPROX_QUANTILES(foo, 2)") self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 RESPECT NULLS)") self.validate_identity("APPROX_QUANTILES(DISTINCT foo, 2 IGNORE NULLS)") def test_json_lax(self): self.validate_identity("LAX_BOOL(PARSE_JSON('true'))") self.validate_identity("LAX_FLOAT64(PARSE_JSON('9.8'))") self.validate_identity("LAX_INT64(PARSE_JSON('10'))") self.validate_identity("""LAX_STRING(PARSE_JSON('"str"'))""") def test_safe_math_funcs(self): self.validate_identity("SAFE_NEGATE(x)") self.validate_all( "SAFE_ADD(x, y)", read={ "bigquery": "SAFE_ADD(x, y)", "spark": "TRY_ADD(x, y)", "databricks": "TRY_ADD(x, y)", }, write={ "spark": "TRY_ADD(x, y)", "databricks": "TRY_ADD(x, y)", }, ) self.validate_all( "SAFE_MULTIPLY(x, y)", read={ "bigquery": "SAFE_MULTIPLY(x, y)", "spark": "TRY_MULTIPLY(x, y)", "databricks": "TRY_MULTIPLY(x, y)", }, write={ "spark": "TRY_MULTIPLY(x, y)", "databricks": "TRY_MULTIPLY(x, y)", }, ) self.validate_all( "SAFE_SUBTRACT(x, y)", read={ "bigquery": "SAFE_SUBTRACT(x, y)", "spark": "TRY_SUBTRACT(x, y)", "databricks": "TRY_SUBTRACT(x, y)", }, write={ "spark": "TRY_SUBTRACT(x, y)", "databricks": "TRY_SUBTRACT(x, y)", }, ) def test_bitwise_and(self): self.validate_all( "SELECT 1 & 1", write={ "bigquery": "SELECT 1 & 1", "snowflake": "SELECT BITAND(1, 1)", }, ) def test_bitwise_not(self): self.validate_all( "SELECT ~1", write={ "bigquery": "SELECT ~1", "snowflake": "SELECT BITNOT(1)", }, ) def test_bit_aggs(self): self.validate_all( "BIT_AND(x)", read={ "bigquery": "BIT_AND(x)", "databricks": "BIT_AND(x)", "dremio": "BIT_AND(x)", "duckdb": "BIT_AND(x)", "mysql": "BIT_AND(x)", "postgres": "BIT_AND(x)", "spark": "BIT_AND(x)", }, write={ "databricks": "BIT_AND(x)", "dremio": "BIT_AND(x)", "duckdb": "BIT_AND(x)", "mysql": "BIT_AND(x)", "postgres": "BIT_AND(x)", "spark": "BIT_AND(x)", }, ) self.validate_all( "BIT_OR(x)", read={ "bigquery": "BIT_OR(x)", "databricks": "BIT_OR(x)", "dremio": "BIT_OR(x)", "duckdb": "BIT_OR(x)", "mysql": "BIT_OR(x)", "postgres": "BIT_OR(x)", "spark": "BIT_OR(x)", }, write={ "databricks": "BIT_OR(x)", "dremio": "BIT_OR(x)", "duckdb": "BIT_OR(x)", "mysql": "BIT_OR(x)", "postgres": "BIT_OR(x)", "spark": "BIT_OR(x)", }, ) self.validate_all( "BIT_XOR(x)", read={ "bigquery": "BIT_XOR(x)", "databricks": "BIT_XOR(x)", "duckdb": "BIT_XOR(x)", "mysql": "BIT_XOR(x)", "postgres": "BIT_XOR(x)", "spark": "BIT_XOR(x)", }, write={ "databricks": "BIT_XOR(x)", "duckdb": "BIT_XOR(x)", "mysql": "BIT_XOR(x)", "postgres": "BIT_XOR(x)", "spark": "BIT_XOR(x)", }, ) self.validate_all( "BIT_COUNT(x)", read={ "bigquery": "BIT_COUNT(x)", "spark": "BIT_COUNT(x)", "databricks": "BIT_COUNT(x)", "mysql": "BIT_COUNT(x)", }, write={ "spark": "BIT_COUNT(x)", "databricks": "BIT_COUNT(x)", "mysql": "BIT_COUNT(x)", }, ) def test_to_hex(self): self.validate_all( "SELECT TO_HEX(SHA1('abc'))", write={ "bigquery": "SELECT TO_HEX(SHA1('abc'))", "snowflake": "SELECT TO_CHAR(SHA1_BINARY('abc'))", }, ) def test_md5(self): self.validate_all( "SELECT MD5('abc')", write={ "bigquery": "SELECT MD5('abc')", "snowflake": "SELECT MD5_BINARY('abc')", }, ) def test_to_json_string(self): self.validate_all( """SELECT TO_JSON_STRING(STRUCT('Alice' AS name)) AS json_data""", write={ "bigquery": """SELECT TO_JSON_STRING(STRUCT('Alice' AS name)) AS json_data""", "snowflake": """SELECT TO_JSON(OBJECT_CONSTRUCT('name', 'Alice')) AS json_data""", }, ) def test_concat(self): self.validate_all( "SELECT CONCAT('T.P.', ' ', 'Bar') AS author", write={ "bigquery": "SELECT CONCAT('T.P.', ' ', 'Bar') AS author", "duckdb": "SELECT 'T.P.' || ' ' || 'Bar' AS author", }, ) def test_pseudocolumns(self): schema = { "t": { "col": "INT", "a": "TIMESTAMP", "b": "TIMESTAMP", } } ast = self.validate_identity("SELECT col FROM t WHERE _PARTITIONTIME BETWEEN a AND b") self.assertIsNone(ast.find(exp.Pseudocolumn)) qualified = qualify(ast, schema=schema, dialect="bigquery") self.assertIsNotNone(qualified.find(exp.Pseudocolumn)) self.assertEqual( qualified.sql(dialect="bigquery"), "SELECT `t`.`col` AS `col` FROM `t` AS `t` WHERE `_partitiontime` BETWEEN `t`.`a` AND `t`.`b`", ) ast = self.validate_identity("SELECT _DBT_MAX_PARTITION FROM t") self.assertIsNone(ast.find(exp.Pseudocolumn)) qualified = qualify(ast, schema=schema, dialect="bigquery") self.assertIsNotNone(qualified.find(exp.Pseudocolumn)) def test_round(self): self.validate_all( "SELECT ROUND(2.25) AS value", write={ "bigquery": "SELECT ROUND(2.25) AS value", "duckdb": "SELECT ROUND(2.25) AS value", }, ) self.validate_all( "SELECT ROUND(2.25, 1) AS value", write={ "bigquery": "SELECT ROUND(2.25, 1) AS value", "duckdb": "SELECT ROUND(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(NUMERIC '2.25', 1, 'ROUND_HALF_AWAY_FROM_ZERO') AS value", write={ "bigquery": """SELECT ROUND(CAST('2.25' AS NUMERIC), 1, 'ROUND_HALF_AWAY_FROM_ZERO') AS value""", "duckdb": "SELECT ROUND(CAST('2.25' AS DECIMAL), 1) AS value", }, ) self.validate_all( "SELECT ROUND(NUMERIC '2.25', 1, 'ROUND_HALF_EVEN') AS value", write={ "bigquery": """SELECT ROUND(CAST('2.25' AS NUMERIC), 1, 'ROUND_HALF_EVEN') AS value""", "duckdb": "SELECT ROUND_EVEN(CAST('2.25' AS DECIMAL), 1) AS value", }, ) def test_approx_quantiles(self): self.validate_identity("APPROX_QUANTILES(x, 2)") self.validate_identity("APPROX_QUANTILES(FALSE OR TRUE, 2)") self.validate_identity("APPROX_QUANTILES((SELECT 1 AS val), CAST(2.1 AS INT64))") self.validate_identity("APPROX_QUANTILES(DISTINCT x, 2)") self.validate_identity("APPROX_QUANTILES(x, 2 RESPECT NULLS)") self.validate_identity("APPROX_QUANTILES(x, 2 IGNORE NULLS)") self.validate_identity("APPROX_QUANTILES(DISTINCT x, 2 RESPECT NULLS)") def test_approx_quantiles_to_duckdb(self): self.validate_all( "APPROX_QUANTILES(x, 1)", write={"duckdb": "APPROX_QUANTILE(x, [0, 1])"}, ) self.validate_all( "APPROX_QUANTILES(x, 2)", write={"duckdb": "APPROX_QUANTILE(x, [0, 0.5, 1])"}, ) self.validate_all( "APPROX_QUANTILES(x, 4)", write={"duckdb": "APPROX_QUANTILE(x, [0, 0.25, 0.5, 0.75, 1])"}, ) self.validate_all( "APPROX_QUANTILES(DISTINCT x, 2)", write={"duckdb": "APPROX_QUANTILE(DISTINCT x, [0, 0.5, 1])"}, ) with self.subTest("APPROX_QUANTILES 100 buckets"): result = self.parse_one("APPROX_QUANTILES(x, 100)").sql("duckdb") self.assertEqual(result.count("APPROX_QUANTILE("), 1) self.assertIn("0.01", result) self.assertIn("0.99", result) self.assertRegex(result, r"APPROX_QUANTILE\(x, \[.*\]\)") for expr in ("x + y", "CASE WHEN x > 0 THEN x ELSE 0 END", "ABS(x)"): with self.subTest(expr=expr): self.validate_all( f"APPROX_QUANTILES({expr}, 2)", write={"duckdb": f"APPROX_QUANTILE({expr}, [0, 0.5, 1])"}, ) with self.subTest("non-literal bucket count"): with self.assertRaises(UnsupportedError): self.parse_one("APPROX_QUANTILES(x, bucket_count)").sql( "duckdb", unsupported_level=ErrorLevel.RAISE ) with self.subTest("non-integer bucket count"): for value in ("0", "-1", "2.5"): with self.subTest(value=value): with self.assertRaises(UnsupportedError): self.parse_one(f"APPROX_QUANTILES(x, {value})").sql( "duckdb", unsupported_level=ErrorLevel.RAISE ) with self.subTest("NULL bucket count"): with self.assertRaises(UnsupportedError): self.parse_one("APPROX_QUANTILES(x, NULL)").sql( "duckdb", unsupported_level=ErrorLevel.RAISE ) with self.subTest("missing bucket count"): with self.assertRaises(UnsupportedError): self.parse_one("APPROX_QUANTILES(x)").sql( "duckdb", unsupported_level=ErrorLevel.RAISE ) with self.subTest("missing bucket count with DISTINCT"): with self.assertRaises(UnsupportedError): self.parse_one("APPROX_QUANTILES(DISTINCT x)").sql( "duckdb", unsupported_level=ErrorLevel.RAISE ) with self.subTest("APPROX_QUANTILES IGNORE NULLS"): # No warning: IGNORE NULLS is the default behavior in DuckDB from sqlglot.generator import logger as generator_logger with mock.patch.object(generator_logger, "warning") as mock_warning: self.validate_all( "APPROX_QUANTILES(x, 2 IGNORE NULLS)", write={"duckdb": "APPROX_QUANTILE(x, [0, 0.5, 1])"}, ) mock_warning.assert_not_called() with self.subTest("APPROX_QUANTILES RESPECT NULLS"): with self.assertRaises(UnsupportedError): self.parse_one("APPROX_QUANTILES(x, 2 RESPECT NULLS)").sql( "duckdb", unsupported_level=ErrorLevel.RAISE ) def test_bignumeric(self): # BIGDECIMAL is an alias of BIGNUMERIC for type_ in ("BIGNUMERIC", "BIGDECIMAL"): with self.subTest(f"Testing BigQuery's {type_}"): self.validate_all( f"SELECT {type_} '1'", write={ "bigquery": "SELECT CAST('1' AS BIGNUMERIC)", "duckdb": "SELECT CAST('1' AS DECIMAL(38, 5))", }, ) self.validate_all( f"SELECT CAST(1 AS {type_})", write={ "bigquery": "SELECT CAST(1 AS BIGNUMERIC)", "duckdb": "SELECT CAST(1 AS DECIMAL(38, 5))", }, ) ================================================ FILE: tests/dialects/test_clickhouse.py ================================================ from datetime import date, datetime, timezone from sqlglot import exp, parse_one from sqlglot.dialects import ClickHouse from sqlglot.expressions import convert from sqlglot.helper import logger as helper_logger from sqlglot.optimizer import traverse_scope from sqlglot.optimizer.qualify_columns import quote_identifiers from tests.dialects.test_dialect import Validator from sqlglot.errors import ErrorLevel class TestClickhouse(Validator): dialect = "clickhouse" def test_clickhouse(self): self.validate_identity( "SELECT col.^nested, t.col2.^nested, t.col3.^nested.twice FROM t" ).selects[0].assert_is(exp.NestedJSONSelect) self.validate_identity( "cast(notEmpty(report_task_id)?report_task_id:'-1' AS text)", "CAST(CASE WHEN notEmpty(report_task_id) THEN report_task_id ELSE '-1' END AS String)", ) expr = quote_identifiers(self.parse_one("{start_date:String}"), dialect="clickhouse") self.assertEqual(expr.sql("clickhouse"), "{start_date: String}") for string_type_enum in ClickHouse.Generator.STRING_TYPE_MAPPING: self.validate_identity(f"CAST(x AS {string_type_enum.value})", "CAST(x AS String)") # Arrays, maps and tuples can't be Nullable in ClickHouse for non_nullable_type in ("ARRAY", "MAP", "STRUCT(a: INT)"): try_cast = parse_one(f"TRY_CAST(x AS {non_nullable_type})") target_type = try_cast.to.sql("clickhouse") self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS {target_type})") for nullable_type in ("INT", "UINT", "BIGINT", "FLOAT", "DOUBLE", "TEXT", "DATE", "UUID"): try_cast = parse_one(f"TRY_CAST(x AS {nullable_type})") target_type = exp.DataType.build(nullable_type, dialect="clickhouse").sql("clickhouse") self.assertEqual(try_cast.sql("clickhouse"), f"CAST(x AS Nullable({target_type}))") expr = parse_one("count(x)") self.assertEqual(expr.sql(dialect="clickhouse"), "COUNT(x)") self.validate_identity('SELECT DISTINCT ON ("id") * FROM t') self.validate_identity("SELECT 1 OR (1 = 2)") self.validate_identity("SELECT 1 AND (1 = 2)") self.validate_identity("SELECT json.a.:Int64") self.validate_identity("SELECT json.a.:JSON.b.:Int64") self.validate_identity('SELECT json.a.b.:"Array(JSON)".c') self.validate_identity('SELECT json.a.b.:"Array(Array(JSON))".c') self.validate_identity("SELECT json.a.b[].c", 'SELECT json.a.b.:"Array(JSON)".c') self.validate_identity("SELECT json.a.b[][]", 'SELECT json.a.b.:"Array(Array(JSON))"') self.validate_identity("WITH arrayJoin([(1, [2, 3])]) AS arr SELECT arr") self.validate_identity("CAST(1 AS Bool)") self.validate_identity("SELECT toString(CHAR(104.1, 101, 108.9, 108.9, 111, 32))") self.validate_identity("@macro").assert_is(exp.Parameter).this.assert_is(exp.Var) self.validate_identity("SELECT toFloat(like)") self.validate_identity("SELECT like") self.validate_identity("SELECT STR_TO_DATE(str, fmt, tz)") self.validate_identity("SELECT STR_TO_DATE('05 12 2000', '%d %m %Y')") self.validate_identity("SELECT EXTRACT(YEAR FROM toDateTime('2023-02-01'))") self.validate_identity("extract(haystack, pattern)") self.validate_identity("SELECT * FROM x LIMIT 1 UNION ALL SELECT * FROM y") self.validate_identity("SELECT CAST(x AS Tuple(String, Array(Nullable(Float64))))") self.validate_identity("countIf(x, y)") self.validate_identity("x = y") self.validate_identity("x <> y") self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 0.01)") self.validate_identity("SELECT * FROM (SELECT a FROM b SAMPLE 1 / 10 OFFSET 1 / 2)") self.validate_identity("SELECT sum(foo * bar) FROM bla SAMPLE 10000000") self.validate_identity("CAST(x AS Nested(ID UInt32, Serial UInt32, EventTime DateTime))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world' = 2))") self.validate_identity("CAST(x AS Enum('hello', 'world'))") self.validate_identity("CAST(x AS Enum('hello' = 1, 'world'))") self.validate_identity("CAST(x AS Enum8('hello' = -123, 'world'))") self.validate_identity("CAST(x AS FixedString(1))") self.validate_identity("CAST(x AS LowCardinality(FixedString))") self.validate_identity("SELECT isNaN(1.0)") self.validate_identity("SELECT startsWith('Spider-Man', 'Spi')") self.validate_identity("SELECT xor(TRUE, FALSE)") self.validate_identity("CAST(['hello'], 'Array(Enum8(''hello'' = 1))')") self.validate_identity("SELECT x, COUNT() FROM y GROUP BY x WITH TOTALS") self.validate_identity("SELECT INTERVAL t.days DAY") self.validate_identity("SELECT match('abc', '([a-z]+)')") self.validate_identity("dictGet(x, 'y')") self.validate_identity("WITH final AS (SELECT 1) SELECT * FROM final") self.validate_identity("SELECT * FROM x FINAL") self.validate_identity("SELECT * FROM x AS y FINAL") self.validate_identity("'a' IN mapKeys(map('a', 1, 'b', 2))") self.validate_identity("CAST((1, 2) AS Tuple(a Int8, b Int16))") self.validate_identity("SELECT * FROM foo LEFT ANY JOIN bla") self.validate_identity("SELECT * FROM foo LEFT ASOF JOIN bla") self.validate_identity("SELECT * FROM foo ASOF JOIN bla") self.validate_identity("SELECT * FROM foo ANY JOIN bla") self.validate_identity("SELECT * FROM foo GLOBAL ANY JOIN bla") self.validate_identity("SELECT * FROM foo GLOBAL LEFT ANY JOIN bla") self.validate_identity("SELECT quantile(0.5)(a)") self.validate_identity("SELECT quantiles(0.5)(a) AS x FROM t") self.validate_identity("SELECT quantilesIf(0.5)(a, a > 1) AS x FROM t") self.validate_identity("SELECT quantileState(0.5)(a) AS x FROM t") self.validate_identity("SELECT deltaSumMerge(a) AS x FROM t") self.validate_identity("SELECT quantiles(0.1, 0.2, 0.3)(a)") self.validate_identity("SELECT quantileTiming(0.5)(RANGE(100))") self.validate_identity("SELECT histogram(5)(a)") self.validate_identity("SELECT groupUniqArray(2)(a)") self.validate_identity("SELECT exponentialTimeDecayedAvg(60)(a, b)") self.validate_identity("levenshteinDistance(col1, col2)", "editDistance(col1, col2)") self.validate_identity("jaroWinklerSimilarity('hello', 'world')") self.validate_identity("SELECT * FROM foo WHERE x GLOBAL IN (SELECT * FROM bar)") self.validate_identity("SELECT * FROM foo WHERE x GLOBAL NOT IN (SELECT * FROM bar)") self.validate_identity("POSITION(haystack, needle)") self.validate_identity("POSITION(haystack, needle, position)") self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DateTime)") self.validate_identity("CAST(x AS TIMESTAMPTZ)", "CAST(x AS DateTime)") self.validate_identity("CAST(x as MEDIUMINT)", "CAST(x AS Int32)") self.validate_identity("CAST(x AS DECIMAL(38, 2))", "CAST(x AS Decimal(38, 2))") self.validate_identity("SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src") self.validate_identity("""SELECT JSONExtractString('{"x": {"y": 1}}', 'x', 'y')""") self.validate_identity("SELECT * FROM table LIMIT 1 BY a, b") self.validate_identity("SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b") self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER test_cluster") self.validate_identity("TRUNCATE TABLE t1 ON CLUSTER '{cluster}'") self.validate_identity("TRUNCATE DATABASE db") self.validate_identity("TRUNCATE DATABASE db ON CLUSTER test_cluster") self.validate_identity("TRUNCATE DATABASE db ON CLUSTER '{cluster}'") # Numeric trunc self.validate_identity("trunc(3.14159, 2)").assert_is(exp.Trunc) self.validate_identity("trunc(3.14159)").assert_is(exp.Trunc) self.validate_all( "trunc(3.14159, 2)", read={"postgres": "TRUNC(3.14159, 2)"}, ) self.validate_identity("EXCHANGE TABLES x.a AND y.b", check_command_warning=True) self.validate_identity("CREATE TABLE test (id UInt8) ENGINE=Null()") self.validate_identity( "SELECT * FROM foo ORDER BY bar OFFSET 0 ROWS FETCH NEXT 10 ROWS WITH TIES" ) self.validate_identity( "SELECT DATE_BIN(toDateTime('2023-01-01 14:45:00'), INTERVAL '1' MINUTE, toDateTime('2023-01-01 14:35:30'), 'UTC')", ) self.validate_identity( "SELECT CAST(1730098800 AS DateTime64) AS DATETIME, 'test' AS interp ORDER BY DATETIME WITH FILL FROM toDateTime64(1730098800, 3) - INTERVAL '7' HOUR TO toDateTime64(1730185140, 3) - INTERVAL '7' HOUR STEP toIntervalSecond(900) INTERPOLATE (interp)" ) self.validate_identity( "SELECT number, COUNT() OVER (PARTITION BY number % 3) AS partition_count FROM numbers(10) WINDOW window_name AS (PARTITION BY number) QUALIFY partition_count = 4 ORDER BY number" ) self.validate_identity( "SELECT id, quantileGK(100, 0.95)(reading) OVER (PARTITION BY id ORDER BY id RANGE BETWEEN 30000 PRECEDING AND CURRENT ROW) AS window FROM table" ) self.validate_identity( "SELECT * FROM table LIMIT 1 BY CONCAT(datalayerVariantNo, datalayerProductId, warehouse)" ) self.validate_identity( """SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 'a')""" ) self.validate_identity( "ATTACH DATABASE DEFAULT ENGINE = ORDINARY", check_command_warning=True ) self.validate_identity( "SELECT n, source FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL" ) self.validate_identity( "SELECT n, source FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5" ) self.validate_identity( "SELECT toDate((number * 10) * 86400) AS d1, toDate(number * 86400) AS d2, 'original' AS source FROM numbers(10) WHERE (number % 3) = 1 ORDER BY d2 WITH FILL, d1 WITH FILL STEP 5" ) self.validate_identity( "SELECT n, source, inter FROM (SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter FROM numbers(10) WHERE number % 3 = 1) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS inter + 1)" ) self.validate_identity( "SELECT SUM(1) AS impressions, arrayJoin(cities) AS city, arrayJoin(browsers) AS browser FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities, ['Firefox', 'Chrome', 'Chrome'] AS browsers) GROUP BY 2, 3" ) self.validate_identity( "SELECT sum(1) AS impressions, (arrayJoin(arrayZip(cities, browsers)) AS t).1 AS city, t.2 AS browser FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities, ['Firefox', 'Chrome', 'Chrome'] AS browsers) GROUP BY 2, 3" ) self.validate_identity( 'SELECT CAST(tuple(1 AS "a", 2 AS "b", 3.0 AS "c").2 AS Nullable(String))' ) self.validate_identity( "CREATE TABLE test (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()" ) self.validate_identity( "CREATE TABLE test ON CLUSTER default (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()" ) self.validate_identity( "CREATE TABLE test ON CLUSTER '{cluster}' (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()" ) self.validate_identity( "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple() AS SELECT * FROM test_data" ) self.validate_identity( "CREATE MATERIALIZED VIEW test_view ON CLUSTER '{cluster}' (id UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple() AS SELECT * FROM test_data" ) self.validate_identity( "CREATE MATERIALIZED VIEW test_view ON CLUSTER cl1 TO table1 AS SELECT * FROM test_data" ) self.validate_identity( "CREATE MATERIALIZED VIEW test_view ON CLUSTER '{cluster}' TO table1 AS SELECT * FROM test_data" ) self.validate_identity( "CREATE MATERIALIZED VIEW test_view TO db.table1 (id UInt8) AS SELECT * FROM test_data" ) self.validate_identity( "CREATE TABLE t (foo String CODEC(LZ4HC(9), ZSTD, DELTA), size String ALIAS formatReadableSize(size_bytes), INDEX idx1 a TYPE bloom_filter(0.001) GRANULARITY 1, INDEX idx2 a TYPE set(100) GRANULARITY 2, INDEX idx3 a TYPE minmax GRANULARITY 3)" ) self.validate_identity( "SELECT generate_series FROM generate_series(0, 10) AS g(x)", ) self.validate_identity( "SELECT t.c FROM (SELECT arrayJoin([1,2,3,4,5]) AS c) AS t WHERE (t.c + 0) NOT IN (1,2)", "SELECT t.c FROM (SELECT arrayJoin([1, 2, 3, 4, 5]) AS c) AS t WHERE NOT ((t.c + 0) IN (1, 2))", ) self.validate_identity( "SELECT * FROM t1, t2", "SELECT * FROM t1 CROSS JOIN t2", ) self.validate_identity( "SELECT and(1, 2)", "SELECT 1 AND 2", ) self.validate_identity( "SELECT or(1, 2)", "SELECT 1 OR 2", ) self.validate_identity( "SELECT generate_series FROM generate_series(0, 10) AS g", "SELECT generate_series FROM generate_series(0, 10) AS g(generate_series)", ) self.validate_identity( "INSERT INTO tab VALUES ({'key1': 1, 'key2': 10}), ({'key1': 2, 'key2': 20}), ({'key1': 3, 'key2': 30})", "INSERT INTO tab VALUES ((map('key1', 1, 'key2', 10))), ((map('key1', 2, 'key2', 20))), ((map('key1', 3, 'key2', 30)))", ) self.validate_identity( "SELECT (toUInt8('1') + toUInt8('2')) IS NOT NULL", "SELECT NOT ((toUInt8('1') + toUInt8('2')) IS NULL)", ) self.validate_identity( "SELECT $1$foo$1$", "SELECT 'foo'", ) self.validate_identity( "SELECT * FROM table LIMIT 1, 2 BY a, b", "SELECT * FROM table LIMIT 2 OFFSET 1 BY a, b", ) self.validate_identity( "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin']", "SELECT SUM(1) AS impressions FROM (SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities) WHERE arrayJoin(cities) IN ('Istanbul', 'Berlin')", ) self.validate_identity("SELECT SUBSTRING_INDEX(str, delim, count)") self.validate_identity("SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)") self.validate_identity("SELECT SUBSTRING_INDEX('a.b.c.d', '.', -2)") self.validate_all( "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", write={ "databricks": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", "spark": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", "mysql": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", }, ) self.validate_all( "SELECT substringIndex('a.b.c.d', '.', 2)", write={ "databricks": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", "spark": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", "mysql": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", "clickhouse": "SELECT substringIndex('a.b.c.d', '.', 2)", }, ) self.validate_all( "SELECT CAST(STR_TO_DATE(SUBSTRING(a.eta, 1, 10), '%Y-%m-%d') AS Nullable(DATE))", read={ "clickhouse": "SELECT CAST(STR_TO_DATE(SUBSTRING(a.eta, 1, 10), '%Y-%m-%d') AS Nullable(DATE))", "oracle": "SELECT to_date(substr(a.eta, 1,10), 'YYYY-MM-DD')", }, ) self.validate_all( "CHAR(67) || CHAR(65) || CHAR(84)", read={ "clickhouse": "CHAR(67) || CHAR(65) || CHAR(84)", "oracle": "CHR(67) || CHR(65) || CHR(84)", }, ) self.validate_all( "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", read={ "clickhouse": "SELECT lagInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", "oracle": "SELECT LAG(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", }, ) self.validate_all( "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", read={ "clickhouse": "SELECT leadInFrame(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", "oracle": "SELECT LEAD(salary, 1, 0) OVER (ORDER BY hire_date) AS prev_sal FROM employees", }, ) self.validate_all( "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS Nullable(DATE))", read={ "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS Nullable(DATE))", "postgres": "SELECT TO_DATE('05 12 2000', 'DD MM YYYY')", }, write={ "clickhouse": "SELECT CAST(STR_TO_DATE('05 12 2000', '%d %m %Y') AS Nullable(DATE))", "postgres": "SELECT CAST(CAST(TO_DATE('05 12 2000', 'DD MM YYYY') AS TIMESTAMP) AS DATE)", }, ) self.validate_all( "SELECT * FROM x PREWHERE y = 1 WHERE z = 2", write={ "": "SELECT * FROM x WHERE z = 2", "clickhouse": "SELECT * FROM x PREWHERE y = 1 WHERE z = 2", }, ) self.validate_all( "SELECT * FROM x AS prewhere", read={ "clickhouse": "SELECT * FROM x AS prewhere", "duckdb": "SELECT * FROM x prewhere", }, ) self.validate_all( "SELECT a, b FROM (SELECT * FROM x) AS t(a, b)", read={ "clickhouse": "SELECT a, b FROM (SELECT * FROM x) AS t(a, b)", "duckdb": "SELECT a, b FROM (SELECT * FROM x) AS t(a, b)", }, ) self.validate_all( "SELECT arrayJoin([1,2,3])", write={ "clickhouse": "SELECT arrayJoin([1, 2, 3])", "postgres": "SELECT UNNEST(ARRAY[1, 2, 3])", }, ) self.validate_all( "has([1], x)", read={ "postgres": "x = any(array[1])", }, ) self.validate_all( "NOT has([1], x)", read={ "postgres": "any(array[1]) <> x", }, ) self.validate_all( "has([1], x)", read={ "clickhouse": "has([1], x)", "presto": "CONTAINS(ARRAY[1], x)", "spark": "ARRAY_CONTAINS(ARRAY(1), x)", }, write={ "presto": "CONTAINS(ARRAY[1], x)", "spark": "ARRAY_CONTAINS(ARRAY(1), x)", }, ) self.validate_all( "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", read={ "duckdb": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", "postgres": "SELECT TIMESTAMP '2020-01-01' + INTERVAL '500 us'", }, write={ "clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND", "duckdb": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND", "postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'", }, ) self.validate_all( "SELECT CURRENT_DATE()", read={ "clickhouse": "SELECT CURRENT_DATE()", "postgres": "SELECT CURRENT_DATE", }, ) self.validate_all( "SELECT CURRENT_TIMESTAMP()", read={ "clickhouse": "SELECT CURRENT_TIMESTAMP()", "postgres": "SELECT CURRENT_TIMESTAMP", }, ) self.validate_all( "SELECT match('ThOmAs', CONCAT('(?i)', 'thomas'))", read={ "postgres": "SELECT 'ThOmAs' ~* 'thomas'", }, ) self.validate_all( "SELECT match('ThOmAs', CONCAT('(?i)', x)) FROM t", read={ "postgres": "SELECT 'ThOmAs' ~* x FROM t", }, ) self.validate_all( "SELECT '\\0'", read={ "mysql": "SELECT '\0'", }, write={ "clickhouse": "SELECT '\\0'", "mysql": "SELECT '\0'", }, ) self.validate_all( "DATE_ADD(DAY, 1, x)", read={ "clickhouse": "dateAdd(DAY, 1, x)", "presto": "DATE_ADD('DAY', 1, x)", }, write={ "clickhouse": "DATE_ADD(DAY, 1, x)", "presto": "DATE_ADD('DAY', 1, x)", "": "DATE_ADD(x, 1, 'DAY')", }, ) self.validate_all( "DATE_DIFF(DAY, a, b)", read={ "clickhouse": "dateDiff(DAY, a, b)", "presto": "DATE_DIFF('DAY', a, b)", }, write={ "clickhouse": "DATE_DIFF(DAY, a, b)", "presto": "DATE_DIFF('DAY', a, b)", "": "DATEDIFF(b, a, DAY)", }, ) self.validate_all( "SELECT xor(1, 0)", read={ "clickhouse": "SELECT xor(1, 0)", "mysql": "SELECT 1 XOR 0", }, write={ "mysql": "SELECT 1 XOR 0", }, ) self.validate_all( "SELECT xor(0, 1, xor(1, 0, 0))", write={ "clickhouse": "SELECT xor(0, 1, xor(1, 0, 0))", "mysql": "SELECT 0 XOR 1 XOR 1 XOR 0 XOR 0", }, ) self.validate_all( "SELECT xor(xor(1, 0), 1)", read={ "clickhouse": "SELECT xor(xor(1, 0), 1)", "mysql": "SELECT 1 XOR 0 XOR 1", }, write={ "clickhouse": "SELECT xor(xor(1, 0), 1)", "mysql": "SELECT 1 XOR 0 XOR 1", }, ) self.validate_identity("SELECT xor(0, 1, 1, 0)") self.validate_all( "CONCAT(a, b)", read={ "clickhouse": "CONCAT(a, b)", "mysql": "CONCAT(a, b)", }, write={ "mysql": "CONCAT(a, b)", "postgres": "a || b", }, ) self.validate_all( r"'Enum8(\'Sunday\' = 0)'", write={"clickhouse": "'Enum8(''Sunday'' = 0)'"} ) self.validate_all( "SELECT uniq(x) FROM (SELECT any(y) AS x FROM (SELECT 1 AS y))", read={ "bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) x FROM (SELECT 1 y))", }, write={ "bigquery": "SELECT APPROX_COUNT_DISTINCT(x) FROM (SELECT ANY_VALUE(y) AS x FROM (SELECT 1 AS y))", }, ) self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "clickhouse": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST", }, ) self.validate_all( "CAST(1 AS NULLABLE(Int64))", write={ "clickhouse": "CAST(1 AS Nullable(Int64))", }, ) self.validate_all( "CAST(1 AS Nullable(DateTime64(6, 'UTC')))", write={"clickhouse": "CAST(1 AS Nullable(DateTime64(6, 'UTC')))"}, ) self.validate_all( "SELECT x #! comment", write={"": "SELECT x /* comment */"}, ) self.validate_all( "SELECT quantileIf(0.5)(a, true)", write={ "clickhouse": "SELECT quantileIf(0.5)(a, TRUE)", }, ) self.validate_identity( "SELECT POSITION(needle IN haystack)", "SELECT POSITION(haystack, needle)" ) self.validate_identity( "SELECT * FROM x LIMIT 10 SETTINGS max_results = 100, result = 'break'" ) self.validate_identity("SELECT * FROM x LIMIT 10 SETTINGS max_results = 100, result_") self.validate_identity("SELECT * FROM x FORMAT PrettyCompact") self.validate_identity( "SELECT * FROM x LIMIT 10 SETTINGS max_results = 100, result_ FORMAT PrettyCompact" ) self.validate_all( "SELECT * FROM foo JOIN bar USING id, name", write={"clickhouse": "SELECT * FROM foo JOIN bar USING (id, name)"}, ) self.validate_all( "SELECT * FROM foo ANY LEFT JOIN bla ON foo.c1 = bla.c2", write={"clickhouse": "SELECT * FROM foo LEFT ANY JOIN bla ON foo.c1 = bla.c2"}, ) self.validate_all( "SELECT * FROM foo GLOBAL ANY LEFT JOIN bla ON foo.c1 = bla.c2", write={"clickhouse": "SELECT * FROM foo GLOBAL LEFT ANY JOIN bla ON foo.c1 = bla.c2"}, ) self.validate_all( """ SELECT loyalty, count() FROM hits SEMI LEFT JOIN users USING (UserID) GROUP BY loyalty ORDER BY loyalty ASC """, write={ "clickhouse": "SELECT loyalty, count() FROM hits LEFT SEMI JOIN users USING (UserID)" " GROUP BY loyalty ORDER BY loyalty ASC" }, ) self.validate_all( "SELECT quantile(0.5)(a)", read={ "duckdb": "SELECT quantile(a, 0.5)", "clickhouse": "SELECT median(a)", }, write={ "clickhouse": "SELECT quantile(0.5)(a)", }, ) self.validate_all( "SELECT quantiles(0.5, 0.4)(a)", read={"duckdb": "SELECT quantile(a, [0.5, 0.4])"}, write={"clickhouse": "SELECT quantiles(0.5, 0.4)(a)"}, ) self.validate_all( "SELECT quantiles(0.5)(a)", read={"duckdb": "SELECT quantile(a, [0.5])"}, write={"clickhouse": "SELECT quantiles(0.5)(a)"}, ) self.validate_identity("SELECT isNaN(x)") self.validate_all( "SELECT IS_NAN(x), ISNAN(x)", write={"clickhouse": "SELECT isNaN(x), isNaN(x)"}, ) self.validate_identity("SELECT startsWith('a', 'b')") self.validate_all( "SELECT STARTS_WITH('a', 'b'), STARTSWITH('a', 'b')", write={"clickhouse": "SELECT startsWith('a', 'b'), startsWith('a', 'b')"}, ) self.validate_identity("SYSTEM STOP MERGES foo.bar", check_command_warning=True) self.validate_identity( "INSERT INTO FUNCTION s3('url', 'CSV', 'name String, value UInt32', 'gzip') SELECT name, value FROM existing_table" ) self.validate_identity( "INSERT INTO FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')", "INSERT INTO FUNCTION remote('localhost', default.simple_table) VALUES ((100), ('inserted via remote()'))", ) self.validate_identity( """INSERT INTO TABLE FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES ('test', 1, 2)""", """INSERT INTO FUNCTION hdfs('hdfs://hdfs1:9000/test', 'TSV', 'name String, column2 UInt32, column3 UInt32') VALUES (('test'), (1), (2))""", ) self.validate_identity( "INSERT INTO t (n.a, n.b) VALUES (1, [1, 2])", "INSERT INTO t (n.a, n.b) VALUES ((1), ([1, 2]))", ) self.validate_identity("SELECT 1 FORMAT TabSeparated") self.validate_identity("SELECT * FROM t FORMAT TabSeparated") self.validate_identity("SELECT FORMAT") self.validate_identity("1 AS FORMAT").assert_is(exp.Alias) self.validate_identity("SELECT formatDateTime(NOW(), '%Y-%m-%d', '%T')") self.validate_all( "SELECT formatDateTime(NOW(), '%Y-%m-%d')", read={ "clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')", "mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')", }, write={ "clickhouse": "SELECT formatDateTime(NOW(), '%Y-%m-%d')", "mysql": "SELECT DATE_FORMAT(NOW(), '%Y-%m-%d')", }, ) self.validate_identity("ALTER TABLE visits DROP PARTITION 201901") self.validate_identity("ALTER TABLE visits DROP PARTITION ALL") self.validate_identity( "ALTER TABLE visits DROP PARTITION tuple(toYYYYMM(toDate('2019-01-25')))" ) self.validate_identity("ALTER TABLE visits DROP PARTITION ID '201901'") self.validate_identity("ALTER TABLE visits REPLACE PARTITION 201901 FROM visits_tmp") self.validate_identity("ALTER TABLE visits REPLACE PARTITION ALL FROM visits_tmp") self.validate_identity( "ALTER TABLE visits REPLACE PARTITION tuple(toYYYYMM(toDate('2019-01-25'))) FROM visits_tmp" ) self.validate_identity("ALTER TABLE visits REPLACE PARTITION ID '201901' FROM visits_tmp") self.validate_identity("ALTER TABLE visits ON CLUSTER test_cluster DROP COLUMN col1") self.validate_identity("ALTER TABLE visits ON CLUSTER '{cluster}' DROP COLUMN col1") self.validate_identity("DELETE FROM tbl ON CLUSTER test_cluster WHERE date = '2019-01-01'") self.validate_identity("DELETE FROM tbl ON CLUSTER '{cluster}' WHERE date = '2019-01-01'") self.assertIsInstance( parse_one("Tuple(select Int64)", into=exp.DataType, read="clickhouse"), exp.DataType ) self.validate_identity( "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", "INSERT INTO t (col1, col2) VALUES (('abcd'), (1234))", ) self.validate_all( "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", write={ "clickhouse": "INSERT INTO t (col1, col2) VALUES (('abcd'), (1234))", "postgres": "INSERT INTO t (col1, col2) VALUES (('abcd'), (1234))", }, ) self.validate_identity("SELECT TRIM(TRAILING ')' FROM '( Hello, world! )')") self.validate_identity("SELECT TRIM(LEADING '(' FROM '( Hello, world! )')") self.validate_identity("current_timestamp").assert_is(exp.Column) self.validate_identity("SELECT * APPLY(sum) FROM columns_transformers") self.validate_identity("SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers") self.validate_identity( "SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) FROM columns_transformers" ) self.validate_identity("SELECT * APPLY(sum), COLUMNS('col') APPLY(sum) APPLY(avg) FROM t") self.validate_identity( "SELECT * FROM ABC WHERE hasAny(COLUMNS('.*field') APPLY(toUInt64) APPLY(to), (SELECT groupUniqArray(toUInt64(field))))" ) self.validate_identity("SELECT col apply", "SELECT col AS apply") self.validate_identity( "SELECT name FROM data WHERE (SELECT DISTINCT name FROM data) IS NOT NULL", "SELECT name FROM data WHERE NOT ((SELECT DISTINCT name FROM data) IS NULL)", ) self.validate_identity("SELECT 1_2_3_4_5", "SELECT 12345") self.validate_identity("SELECT 1_b", "SELECT 1_b") self.validate_identity( "SELECT COUNT(1) FROM table SETTINGS additional_table_filters = {'a': 'b', 'c': 'd'}" ) self.validate_identity("SELECT arrayConcat([1, 2], [3, 4])") self.validate_identity("SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')") self.validate_identity( "SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'Asia/Istanbul')" ) self.validate_identity("farmFingerprint64(x1, x2, x3)") self.validate_identity("cityHash64()") self.validate_identity("cityHash64(x)") self.validate_identity("cityHash64(x, y, z)") self.validate_identity("cosineDistance(x, y)") self.validate_identity("L2Distance(x, y)") self.validate_identity("tuple(1 = 1, 'foo' = 'foo')") self.validate_identity("SELECT LIKE(a, b)", "SELECT a LIKE b") self.validate_identity("SELECT notLike(a, b)", "SELECT NOT a LIKE b") self.validate_identity("SELECT ilike(a, b)", "SELECT a ILIKE b") self.validate_identity("currentDatabase()", "CURRENT_DATABASE()") self.validate_identity("currentSchemas(TRUE)", "CURRENT_SCHEMAS(TRUE)") self.validate_identity("VERSION()") self.validate_identity( "SELECT quantilesExactExclusive(0.25, 0.5, 0.75)(x) AS y FROM (SELECT number AS x FROM num)" ) self.validate_identity("SELECT or(0, 1, -2)", "SELECT 0 OR 1 OR -2") self.validate_identity("SELECT and(1, 2, 3)", "SELECT 1 AND 2 AND 3") self.validate_identity("SELECT or(and(3, 0), 5)", "SELECT (3 AND 0) OR 5") self.validate_identity("arrayCompact([1, 1, nan, nan, 2, 3, 3, 3])").assert_is( exp.ArrayCompact ) self.validate_identity("arrayConcat([1, 2], [3, 4])").assert_is(exp.ArrayConcat) self.validate_identity("arrayDistinct([1, 2, 2, 3, 1])").assert_is(exp.ArrayDistinct) self.validate_identity("arrayExcept([1, 2, 3, 2, 4], [3, 5])").assert_is(exp.ArrayExcept) self.validate_identity("SELECT UTCTimestamp()", "SELECT CURRENT_TIMESTAMP('UTC')") for global_ in ["", "GLOBAL "]: for side in ["", "LEFT ", "RIGHT ", "FULL "]: for strictness in ["ANY ", "ALL "]: sql = f"SELECT * FROM foo1 {global_}{side}{strictness}JOIN foo2 ON foo1.id = foo2.id" with self.subTest(sql=sql): self.validate_identity(sql) self.validate_identity("SELECT []") def test_clickhouse_values(self): ast = self.parse_one("SELECT * FROM VALUES (1, 2, 3)") self.assertEqual(len(list(ast.find_all(exp.Tuple))), 4) values = exp.select("*").from_( exp.values([exp.tuple_(1, 2, 3)], alias="subq", columns=["a", "b", "c"]) ) self.assertEqual( values.sql("clickhouse"), "SELECT * FROM (SELECT 1 AS a, 2 AS b, 3 AS c) AS subq", ) self.validate_identity("SELECT * FROM VALUES ((1, 1), (2, 1), (3, 1), (4, 1))") self.validate_identity( "SELECT type, id FROM VALUES ('id Int, type Int', (1, 1), (2, 1), (3, 1), (4, 1))" ) self.validate_identity( "INSERT INTO t (col1, col2) VALUES ('abcd', 1234)", "INSERT INTO t (col1, col2) VALUES (('abcd'), (1234))", ) self.validate_identity( "INSERT INTO t (col1, col2) FORMAT Values('abcd', 1234)", "INSERT INTO t (col1, col2) VALUES (('abcd'), (1234))", ) self.validate_all( "SELECT col FROM (SELECT 1 AS col) AS _t", read={ "duckdb": "SELECT col FROM (VALUES (1)) AS _t(col)", }, ) self.validate_all( "SELECT col1, col2 FROM (SELECT 1 AS col1, 2 AS col2 UNION ALL SELECT 3, 4) AS _t", read={ "duckdb": "SELECT col1, col2 FROM (VALUES (1, 2), (3, 4)) AS _t(col1, col2)", }, ) def test_cte(self): self.validate_identity("WITH 'x' AS foo SELECT foo") self.validate_identity("WITH ['c'] AS field_names SELECT field_names") self.validate_identity("WITH SUM(bytes) AS foo SELECT foo FROM system.parts") self.validate_identity("WITH (SELECT foo) AS bar SELECT bar + 5") self.validate_identity("WITH test1 AS (SELECT i + 1, j + 1 FROM test1) SELECT * FROM test1") query = parse_one("""WITH (SELECT 1) AS y SELECT * FROM y""", read="clickhouse") self.assertIsInstance(query.args["with_"].expressions[0].this, exp.Subquery) self.assertEqual(query.args["with_"].expressions[0].alias, "y") query = "WITH 1 AS var SELECT var" for error_level in [ErrorLevel.IGNORE, ErrorLevel.RAISE, ErrorLevel.IMMEDIATE]: self.assertEqual( self.parse_one(query, error_level=error_level).sql(dialect=self.dialect), query, ) self.validate_identity("arraySlice(x, 1)") def test_ternary(self): self.validate_all("x ? 1 : 2", write={"clickhouse": "CASE WHEN x THEN 1 ELSE 2 END"}) self.validate_all( "IF(BAR(col), sign > 0 ? FOO() : 0, 1)", write={ "clickhouse": "CASE WHEN BAR(col) THEN CASE WHEN sign > 0 THEN FOO() ELSE 0 END ELSE 1 END" }, ) self.validate_all( "x AND FOO() > 3 + 2 ? 1 : 2", write={"clickhouse": "CASE WHEN x AND FOO() > 3 + 2 THEN 1 ELSE 2 END"}, ) self.validate_all( "x ? (y ? 1 : 2) : 3", write={"clickhouse": "CASE WHEN x THEN (CASE WHEN y THEN 1 ELSE 2 END) ELSE 3 END"}, ) self.validate_all( "x AND (foo() ? FALSE : TRUE) ? (y ? 1 : 2) : 3", write={ "clickhouse": "CASE WHEN x AND (CASE WHEN foo() THEN FALSE ELSE TRUE END) THEN (CASE WHEN y THEN 1 ELSE 2 END) ELSE 3 END" }, ) ternary = parse_one("x ? (y ? 1 : 2) : 3", read="clickhouse") self.assertIsInstance(ternary, exp.If) self.assertIsInstance(ternary.this, exp.Column) self.assertIsInstance(ternary.args["true"], exp.Paren) self.assertIsInstance(ternary.args["false"], exp.Literal) nested_ternary = ternary.args["true"].this self.assertIsInstance(nested_ternary.this, exp.Column) self.assertIsInstance(nested_ternary.args["true"], exp.Literal) self.assertIsInstance(nested_ternary.args["false"], exp.Literal) parse_one("a and b ? 1 : 2", read="clickhouse").assert_is(exp.If).this.assert_is(exp.And) def test_parameterization(self): self.validate_all( "SELECT {abc: UInt32}, {b: String}, {c: DateTime},{d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", write={ "clickhouse": "SELECT {abc: UInt32}, {b: String}, {c: DateTime}, {d: Map(String, Array(UInt8))}, {e: Tuple(UInt8, String)}", "": "SELECT :abc, :b, :c, :d, :e", }, ) self.validate_all( "SELECT * FROM {table: Identifier}", write={"clickhouse": "SELECT * FROM {table: Identifier}"}, ) def test_signed_and_unsigned_types(self): data_types = [ "UInt8", "UInt16", "UInt32", "UInt64", "UInt128", "UInt256", "Int8", "Int16", "Int32", "Int64", "Int128", "Int256", ] for data_type in data_types: self.validate_all( f"pow(2, 32)::{data_type}", write={"clickhouse": f"CAST(pow(2, 32) AS {data_type})"}, ) def test_geom_types(self): data_types = ["Point", "Ring", "LineString", "MultiLineString", "Polygon", "MultiPolygon"] for data_type in data_types: with self.subTest(f"Casting to ClickHouse {data_type}"): self.validate_identity(f"SELECT CAST(val AS {data_type})") def test_nothing_type(self): data_types = ["Nothing", "Nullable(Nothing)"] for data_type in data_types: with self.subTest(f"Casting to ClickHouse {data_type}"): self.validate_identity(f"SELECT CAST(val AS {data_type})") def test_json_type(self): data_types = [ "JSON", "JSON(col1 String, SKIP col2)", "JSON(col1 String, SKIP REGEXP 'col[0-9]+')", "JSON(col1 String, max_dynamic_paths = 2)", "JSON(col1.nested String, SKIP col2.nested)", ] for i, data_type in enumerate(data_types): with self.subTest(f"Casting to ClickHouse JSON[{i}]"): self.validate_identity(f"SELECT CAST(val AS {data_type})") data_types_non_idempotent = [ ("JSON()", "JSON"), ] for i, (dt_in, dt_out) in enumerate(data_types_non_idempotent): with self.subTest(f"Casting to ClickHouse JSON[{i}]"): self.validate_identity( f"SELECT CAST(val as {dt_in})", write_sql=f"SELECT CAST(val AS {dt_out})" ) # Multiline JSON type and non-case-sensitive SKIP self.validate_identity( """SELECT CAST(val AS JSON( col1 String, skip col2, max_dynamic_paths=2 ))""", "SELECT CAST(val AS JSON(col1 String, SKIP col2, max_dynamic_paths = 2))", ) self.validate_identity( "SELECT CAST(val AS JSON(col1 String, col2 JSON(colA String, SKIP colB)))" ) def test_aggregate_function_column_with_any_keyword(self): # Regression test for https://github.com/tobymao/sqlglot/issues/4723 self.validate_all( """ CREATE TABLE my_db.my_table ( someId UUID, aggregatedColumn AggregateFunction(any, String), aggregatedColumnWithParams AggregateFunction(any(somecolumn), String), ) ENGINE = AggregatingMergeTree() ORDER BY (someId) """, write={ "clickhouse": """CREATE TABLE my_db.my_table ( someId UUID, aggregatedColumn AggregateFunction(any, String), aggregatedColumnWithParams AggregateFunction(any(somecolumn), String) ) ENGINE=AggregatingMergeTree() ORDER BY ( someId )""", }, pretty=True, ) def test_create_table_as_alias(self): ctas_alias = "CREATE TABLE my_db.my_table AS another_db.another_table" expected = exp.Create( this=exp.to_table("my_db.my_table"), kind="TABLE", expression=exp.to_table("another_db.another_table"), ) self.assertEqual(self.parse_one(ctas_alias), expected) self.validate_identity(ctas_alias) def test_ddl(self): db_table_expr = exp.Table(this=None, db=exp.to_identifier("foo"), catalog=None) create_with_cluster = exp.Create( this=db_table_expr, kind="DATABASE", properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]), ) self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") # Transpiled CREATE SCHEMA may have OnCluster property set create_with_cluster = exp.Create( this=db_table_expr, kind="SCHEMA", properties=exp.Properties(expressions=[exp.OnCluster(this=exp.to_identifier("c"))]), ) self.assertEqual(create_with_cluster.sql("clickhouse"), "CREATE DATABASE foo ON CLUSTER c") ctas_with_comment = exp.Create( this=exp.table_("foo"), kind="TABLE", expression=exp.select("*").from_("db.other_table"), properties=exp.Properties( expressions=[ exp.EngineProperty(this=exp.var("Memory")), exp.SchemaCommentProperty(this=exp.Literal.string("foo")), ], ), ) self.assertEqual( ctas_with_comment.sql("clickhouse"), "CREATE TABLE foo ENGINE=Memory AS (SELECT * FROM db.other_table) COMMENT 'foo'", ) self.validate_identity("CREATE FUNCTION linear_equation AS (x, k, b) -> k * x + b") self.validate_identity("CREATE MATERIALIZED VIEW a.b TO a.c (c Int32) AS SELECT * FROM a.d") self.validate_identity("""CREATE TABLE ip_data (ip4 IPv4, ip6 IPv6) ENGINE=TinyLog()""") self.validate_identity("""CREATE TABLE dates (dt1 Date32) ENGINE=TinyLog()""") self.validate_identity("CREATE TABLE named_tuples (a Tuple(select String, i Int64))") self.validate_identity("""CREATE TABLE t (a String) EMPTY AS SELECT * FROM dummy""") self.validate_identity( "CREATE TABLE t1 (a String EPHEMERAL, b String EPHEMERAL func(), c String MATERIALIZED func(), d String ALIAS func()) ENGINE=TinyLog()" ) self.validate_identity( "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()" ) self.validate_identity( """CREATE TABLE xyz (ts DateTime, data String) ENGINE=MergeTree() ORDER BY ts SETTINGS index_granularity = 8192 COMMENT '{"key": "value"}'""" ) self.validate_identity( "INSERT INTO FUNCTION s3('a', 'b', 'c', 'd', 'e') PARTITION BY CONCAT(s1, s2, s3, s4) SETTINGS set1 = 1, set2 = '2' SELECT * FROM some_table SETTINGS foo = 3" ) self.validate_identity( 'CREATE TABLE data5 ("x" UInt32, "y" UInt32) ENGINE=MergeTree ORDER BY (round(y / 1000000000), cityHash64(x)) SAMPLE BY cityHash64(x)' ) self.validate_identity( "CREATE TABLE foo (x UInt32) TTL time_column + INTERVAL '1' MONTH DELETE WHERE column = 'value'" ) self.validate_identity( "CREATE FUNCTION parity_str AS (n) -> IF(n % 2, 'odd', 'even')", "CREATE FUNCTION parity_str AS n -> CASE WHEN n % 2 THEN 'odd' ELSE 'even' END", ) self.validate_identity( "CREATE TABLE a ENGINE=Memory AS SELECT 1 AS c COMMENT 'foo'", "CREATE TABLE a ENGINE=Memory AS (SELECT 1 AS c) COMMENT 'foo'", ) self.validate_identity( 'CREATE TABLE t1 ("x" UInt32, "y" Dynamic, "z" Dynamic(max_types = 10)) ENGINE=MergeTree ORDER BY x' ) self.validate_identity( "CREATE TABLE test_table (id Int32, name String) ENGINE=MergeTree PRIMARY KEY id", "CREATE TABLE test_table (id Int32, name String) ENGINE=MergeTree PRIMARY KEY (id)", ) self.validate_identity( "CREATE TABLE test_table (id Int32, name String) ENGINE=MergeTree PRIMARY KEY tuple()", "CREATE TABLE test_table (id Int32, name String) ENGINE=MergeTree PRIMARY KEY (tuple())", ) self.validate_identity( "CREATE TABLE t (a UInt32, CONSTRAINT a_constraint CHECK (a < 10)) ENGINE=MergeTree ORDER BY a" ) self.validate_identity( "CREATE TABLE t (a UInt32, CONSTRAINT c1 ASSUME (a > 5)) ENGINE=MergeTree ORDER BY a" ) self.validate_identity( "CREATE TABLE t (a UInt32, CONSTRAINT a_constraint CHECK a < 10) ENGINE=MergeTree ORDER BY a", "CREATE TABLE t (a UInt32, CONSTRAINT a_constraint CHECK (a < 10)) ENGINE=MergeTree ORDER BY a", ) self.validate_identity( "CREATE TABLE t (a UInt32, CONSTRAINT c1 ASSUME a > 5) ENGINE=MergeTree ORDER BY a", "CREATE TABLE t (a UInt32, CONSTRAINT c1 ASSUME (a > 5)) ENGINE=MergeTree ORDER BY a", ) self.validate_identity( "CREATE TABLE t (a UInt32, CONSTRAINT t CHECK (SELECT 1)) ENGINE=MergeTree ORDER BY a" ) self.validate_identity( "CREATE TABLE t (a UInt32, CONSTRAINT t ASSUME (SELECT 1)) ENGINE=MergeTree ORDER BY a" ) self.validate_identity("CREATE TABLE t (check UInt32)") self.validate_identity("CREATE TABLE t (assume UInt32)") self.validate_all( "CREATE DATABASE x", read={ "duckdb": "CREATE SCHEMA x", }, write={ "clickhouse": "CREATE DATABASE x", "duckdb": "CREATE SCHEMA x", }, ) self.validate_all( "DROP DATABASE x", read={ "duckdb": "DROP SCHEMA x", }, write={ "clickhouse": "DROP DATABASE x", "duckdb": "DROP SCHEMA x", }, ) self.validate_all( """ CREATE TABLE example1 ( timestamp DateTime, x UInt32 TTL now() + INTERVAL 1 MONTH, y String TTL timestamp + INTERVAL 1 DAY, z String ) ENGINE = MergeTree ORDER BY tuple() """, write={ "clickhouse": """CREATE TABLE example1 ( timestamp DateTime, x UInt32 TTL now() + INTERVAL '1' MONTH, y String TTL timestamp + INTERVAL '1' DAY, z String ) ENGINE=MergeTree ORDER BY tuple()""", }, pretty=True, ) self.validate_all( """ CREATE TABLE test (id UInt64, timestamp DateTime64, data String, max_hits UInt64, sum_hits UInt64) ENGINE = MergeTree PRIMARY KEY (id, toStartOfDay(timestamp), timestamp) TTL timestamp + INTERVAL 1 DAY GROUP BY id, toStartOfDay(timestamp) SET max_hits = max(max_hits), sum_hits = sum(sum_hits) """, write={ "clickhouse": """CREATE TABLE test ( id UInt64, timestamp DateTime64, data String, max_hits UInt64, sum_hits UInt64 ) ENGINE=MergeTree PRIMARY KEY (id, dateTrunc('DAY', timestamp), timestamp) TTL timestamp + INTERVAL '1' DAY GROUP BY id, dateTrunc('DAY', timestamp) SET max_hits = max(max_hits), sum_hits = sum(sum_hits)""", }, pretty=True, ) self.validate_all( """ CREATE TABLE test (id String, data String) ENGINE = AggregatingMergeTree() ORDER BY tuple() SETTINGS max_suspicious_broken_parts=500, parts_to_throw_insert=100 """, write={ "clickhouse": """CREATE TABLE test ( id String, data String ) ENGINE=AggregatingMergeTree() ORDER BY tuple() SETTINGS max_suspicious_broken_parts = 500, parts_to_throw_insert = 100""", }, pretty=True, ) self.validate_all( """ CREATE TABLE example_table ( d DateTime, a Int ) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY d TTL d + INTERVAL 1 MONTH DELETE, d + INTERVAL 1 WEEK TO VOLUME 'aaa', d + INTERVAL 2 WEEK TO DISK 'bbb'; """, write={ "clickhouse": """CREATE TABLE example_table ( d DateTime, a Int32 ) ENGINE=MergeTree PARTITION BY toYYYYMM(d) ORDER BY d TTL d + INTERVAL '1' MONTH DELETE, d + INTERVAL '1' WEEK TO VOLUME 'aaa', d + INTERVAL '2' WEEK TO DISK 'bbb'""", }, pretty=True, ) self.validate_all( """ CREATE TABLE table_with_where ( d DateTime, a Int ) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY d TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; """, write={ "clickhouse": """CREATE TABLE table_with_where ( d DateTime, a Int32 ) ENGINE=MergeTree PARTITION BY toYYYYMM(d) ORDER BY d TTL d + INTERVAL '1' MONTH DELETE WHERE toDayOfWeek(d) = 1""", }, pretty=True, ) self.validate_all( """ CREATE TABLE table_for_recompression ( d DateTime, key UInt64, value String ) ENGINE MergeTree() ORDER BY tuple() PARTITION BY key TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10)) SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; """, write={ "clickhouse": """CREATE TABLE table_for_recompression ( d DateTime, key UInt64, value String ) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY key TTL d + INTERVAL '1' MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL '1' YEAR RECOMPRESS CODEC(LZ4HC(10)) SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0""", }, pretty=True, ) self.validate_all( """ CREATE TABLE table_for_aggregation ( d DateTime, k1 Int, k2 Int, x Int, y Int ) ENGINE = MergeTree ORDER BY (k1, k2) TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); """, write={ "clickhouse": """CREATE TABLE table_for_aggregation ( d DateTime, k1 Int32, k2 Int32, x Int32, y Int32 ) ENGINE=MergeTree ORDER BY (k1, k2) TTL d + INTERVAL '1' MONTH GROUP BY k1, k2 SET x = max(x), y = min(y)""", }, pretty=True, ) self.validate_all( """ CREATE DICTIONARY discounts_dict ( advertiser_id UInt64, discount_start_date Date, discount_end_date Date, amount Float64 ) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'discounts')) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED(range_lookup_strategy 'max')) RANGE(MIN discount_start_date MAX discount_end_date) """, write={ "clickhouse": """CREATE DICTIONARY discounts_dict ( advertiser_id UInt64, discount_start_date DATE, discount_end_date DATE, amount Float64 ) PRIMARY KEY (id) SOURCE(CLICKHOUSE( TABLE 'discounts' )) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED( range_lookup_strategy 'max' )) RANGE(MIN discount_start_date MAX discount_end_date)""", }, pretty=True, ) self.validate_all( """ CREATE DICTIONARY my_ip_trie_dictionary ( prefix String, asn UInt32, cca2 String DEFAULT '??' ) PRIMARY KEY prefix SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses')) LAYOUT(IP_TRIE) LIFETIME(3600); """, write={ "clickhouse": """CREATE DICTIONARY my_ip_trie_dictionary ( prefix String, asn UInt32, cca2 String DEFAULT '??' ) PRIMARY KEY (prefix) SOURCE(CLICKHOUSE( TABLE 'my_ip_addresses' )) LAYOUT(IP_TRIE()) LIFETIME(MIN 0 MAX 3600)""", }, pretty=True, ) self.validate_all( """ CREATE DICTIONARY polygons_test_dictionary ( key Array(Array(Array(Tuple(Float64, Float64)))), name String ) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'polygons_test_table')) LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) LIFETIME(0); """, write={ "clickhouse": """CREATE DICTIONARY polygons_test_dictionary ( key Array(Array(Array(Tuple(Float64, Float64)))), name String ) PRIMARY KEY (key) SOURCE(CLICKHOUSE( TABLE 'polygons_test_table' )) LAYOUT(POLYGON( STORE_POLYGON_KEY_COLUMN 1 )) LIFETIME(MIN 0 MAX 0)""", }, pretty=True, ) self.validate_identity( "CREATE DICTIONARY dict1 (key UInt64) PRIMARY KEY (key) SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' DB CURRENT_DATABASE())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())" ) self.validate_identity( "CREATE DICTIONARY dict1 (key UInt64) PRIMARY KEY (key) SOURCE(FILE(PATH '/tmp/test.csv' FORMAT CSVWithNames)) LIFETIME(MIN 0 MAX 1) LAYOUT(FLAT())" ) self.validate_identity( "CREATE DICTIONARY dict1 (key UInt64) PRIMARY KEY (key) SOURCE(NULL()) LAYOUT(CACHE(SIZE_IN_CELLS 1000)) LIFETIME(MIN 0 MAX 1)" ) self.validate_identity( """CREATE DICTIONARY dict1 (key UInt64) PRIMARY KEY (key) SOURCE(EXECUTABLE(COMMAND 'echo "1"' FORMAT TabSeparated)) LIFETIME(MIN 0 MAX 1) LAYOUT(FLAT())""" ) self.validate_all( """ CREATE TABLE t ( a AggregateFunction(quantiles(0.5, 0.9), UInt64), b AggregateFunction(quantiles, UInt64), c SimpleAggregateFunction(sum, Float64), d AggregateFunction(count) )""", write={ "clickhouse": """CREATE TABLE t ( a AggregateFunction(quantiles(0.5, 0.9), UInt64), b AggregateFunction(quantiles, UInt64), c SimpleAggregateFunction(sum, Float64), d AggregateFunction(count) )""" }, pretty=True, ) self.assertIsNotNone( self.validate_identity("CREATE TABLE t1 (a String MATERIALIZED func())").find( exp.ColumnConstraint ) ) self.validate_all( """ CREATE TABLE session_log ( UserID UInt64, SessionID UUID ) ENGINE = MergeTree PARTITION BY sipHash64(UserID) % 16 ORDER BY tuple(); """, pretty=True, ) self.validate_all( """ CREATE TABLE visits ( VisitDate Date, Hour UInt8, ClientID UUID ) ENGINE = MergeTree() PARTITION BY (toYYYYMM(VisitDate), Hour) ORDER BY Hour; """, pretty=True, ) self.validate_identity("DROP TABLE t SYNC") self.validate_identity("DROP DATABASE IF EXISTS d SYNC") def test_agg_functions(self): def extract_agg_func(query): return parse_one(query, read="clickhouse").selects[0].this self.assertIsInstance( extract_agg_func("select quantileGK(100, 0.95) OVER (PARTITION BY id) FROM table"), exp.AnonymousAggFunc, ) self.assertIsInstance( extract_agg_func( "select quantileGK(100, 0.95)(reading) OVER (PARTITION BY id) FROM table" ), exp.ParameterizedAgg, ) self.assertIsInstance( extract_agg_func("select quantileGKIf(100, 0.95) OVER (PARTITION BY id) FROM table"), exp.CombinedAggFunc, ) self.assertIsInstance( extract_agg_func( "select quantileGKIf(100, 0.95)(reading) OVER (PARTITION BY id) FROM table" ), exp.CombinedParameterizedAgg, ) parse_one("foobar(x)").assert_is(exp.Anonymous) self.validate_identity("SELECT approx_top_sum(column, weight) FROM t").selects[0].assert_is( exp.AnonymousAggFunc ) self.validate_identity("SELECT approx_top_sum(N)(column, weight) FROM t").selects[ 0 ].assert_is(exp.ParameterizedAgg) self.validate_identity("SELECT approx_top_sum(N, reserved)(column, weight) FROM t").selects[ 0 ].assert_is(exp.ParameterizedAgg) def test_agg_functions_multiple_suffixes(self): # Regression test: single-suffix self.validate_identity("SELECT uniqExactIf(x, y) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) # Double suffix: If + Merge self.validate_identity("SELECT countIfMerge(state) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) self.validate_identity("SELECT uniqExactIfMerge(state) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) # Triple suffix: ArgMin + If + State (#4814) self.validate_identity("SELECT avgArgMinIfState(x, y) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) # Double suffix + parameters: If + State with quantile parameter self.validate_identity("SELECT quantileIfState(0.5)(col, cond) FROM t").selects[ 0 ].assert_is(exp.CombinedParameterizedAgg) # Collision-prone bases: "Map" is both a valid suffix and part of the function name. # These must parse as the base function (AnonymousAggFunc), not as sum/min/max + Map suffix. self.validate_identity("SELECT sumMap(k, v) FROM t").selects[0].assert_is( exp.AnonymousAggFunc ) self.validate_identity("SELECT minMap(k, v) FROM t").selects[0].assert_is( exp.AnonymousAggFunc ) self.validate_identity("SELECT maxMap(k, v) FROM t").selects[0].assert_is( exp.AnonymousAggFunc ) # Single-suffix chains on collision-prone bases self.validate_identity("SELECT sumMapIf(k, v, cond) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) self.validate_identity("SELECT minMapIf(k, v, cond) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) self.validate_identity("SELECT maxMapIf(k, v, cond) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) self.validate_identity("SELECT sumMapState(k, v) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) # Multi-suffix chain on a collision-prone base self.validate_identity("SELECT sumMapIfState(k, v, cond) FROM t").selects[0].assert_is( exp.CombinedAggFunc ) # example of a nontrivial query: sum_merge_if_merge = ( self.validate_identity( "SELECT sumMergeIfMerge(s) FROM (SELECT sumMergeIfState(agg, 1 = 1) AS s " "FROM (SELECT sumState(toFloat64(number)) AS agg FROM numbers(10)))" ) .selects[0] .assert_is(exp.CombinedAggFunc) ) assert sum_merge_if_merge.name == "sumMergeIfMerge" def test_detach(self): for kind in ("TABLE", "VIEW", "DICTIONARY", "DATABASE"): with self.subTest(f"Test DETACH with {kind}"): self.validate_identity(f"DETACH {kind} t") self.validate_identity(f"DETACH {kind} IF EXISTS t") self.validate_identity(f"DETACH {kind} IF EXISTS db.t") self.validate_identity(f"DETACH {kind} t ON CLUSTER c") self.validate_identity(f"DETACH {kind} t PERMANENTLY") self.validate_identity(f"DETACH {kind} t SYNC") self.validate_identity( f"DETACH {kind} IF EXISTS db.t ON CLUSTER c PERMANENTLY SYNC" ) def test_drop_on_cluster(self): for creatable in ("DATABASE", "TABLE", "VIEW", "DICTIONARY", "FUNCTION"): with self.subTest(f"Test DROP {creatable} ON CLUSTER"): self.validate_identity(f"DROP {creatable} test ON CLUSTER test_cluster") self.validate_identity(f"DROP {creatable} test ON CLUSTER '{{cluster}}'") def test_datetime_funcs(self): # Each datetime func has an alias that is roundtripped to the original name e.g. (DATE_SUB, DATESUB) -> DATE_SUB datetime_funcs = (("DATE_SUB", "DATESUB"), ("DATE_ADD", "DATEADD")) # 2-arg functions of type (date, unit) for func in (*datetime_funcs, ("TIMESTAMP_ADD", "TIMESTAMPADD")): func_name = func[0] for func_alias in func: self.validate_identity( f"""SELECT {func_alias}(date, INTERVAL '3' YEAR)""", f"""SELECT {func_name}(date, INTERVAL '3' YEAR)""", ) # 3-arg functions of type (unit, value, date) for func in (*datetime_funcs, ("DATE_DIFF", "DATEDIFF"), ("TIMESTAMP_SUB", "TIMESTAMPSUB")): func_name = func[0] for func_alias in func: with self.subTest(f"Test 3-arg date-time function {func_alias}"): self.validate_identity( f"SELECT {func_alias}(SECOND, 1, bar)", f"SELECT {func_name}(SECOND, 1, bar)", ) # 4-arg functions of type (unit, value, date, timezone) for func in (("DATE_DIFF", "DATEDIFF"),): func_name = func[0] for func_alias in func: with self.subTest(f"Test 4-arg date-time function {func_alias}"): self.validate_identity( f"SELECT {func_alias}(SECOND, 1, bar, 'UTC')", f"SELECT {func_name}(SECOND, 1, bar, 'UTC')", ) def test_convert(self): self.assertEqual( convert(date(2020, 1, 1)).sql(dialect=self.dialect), "toDate('2020-01-01')" ) # no fractional seconds self.assertEqual( convert(datetime(2020, 1, 1, 0, 0, 1)).sql(dialect=self.dialect), "CAST('2020-01-01 00:00:01' AS DateTime64(6))", ) self.assertEqual( convert(datetime(2020, 1, 1, 0, 0, 1, tzinfo=timezone.utc)).sql(dialect=self.dialect), "CAST('2020-01-01 00:00:01' AS DateTime64(6, 'UTC'))", ) # with fractional seconds self.assertEqual( convert(datetime(2020, 1, 1, 0, 0, 1, 1)).sql(dialect=self.dialect), "CAST('2020-01-01 00:00:01.000001' AS DateTime64(6))", ) self.assertEqual( convert(datetime(2020, 1, 1, 0, 0, 1, 1, tzinfo=timezone.utc)).sql( dialect=self.dialect ), "CAST('2020-01-01 00:00:01.000001' AS DateTime64(6, 'UTC'))", ) def test_timestr_to_time(self): # no fractional seconds time_strings = [ "2020-01-01 00:00:01", "2020-01-01 00:00:01+01:00", " 2020-01-01 00:00:01-01:00 ", "2020-01-01T00:00:01+01:00", ] for time_string in time_strings: with self.subTest(f"'{time_string}'"): self.assertEqual( exp.TimeStrToTime(this=exp.Literal.string(time_string)).sql( dialect=self.dialect ), f"CAST('{time_string}' AS DateTime64(6))", ) time_strings_no_utc = ["2020-01-01 00:00:01" for i in range(4)] for utc, no_utc in zip(time_strings, time_strings_no_utc): with self.subTest(f"'{time_string}' with UTC timezone"): self.assertEqual( exp.TimeStrToTime( this=exp.Literal.string(utc), zone=exp.Literal.string("UTC") ).sql(dialect=self.dialect), f"CAST('{no_utc}' AS DateTime64(6, 'UTC'))", ) # with fractional seconds time_strings = [ "2020-01-01 00:00:01.001", "2020-01-01 00:00:01.000001", "2020-01-01 00:00:01.001+00:00", "2020-01-01 00:00:01.000001-00:00", "2020-01-01 00:00:01.0001", "2020-01-01 00:00:01.1+00:00", ] for time_string in time_strings: with self.subTest(f"'{time_string}'"): self.assertEqual( exp.TimeStrToTime(this=exp.Literal.string(time_string[0])).sql( dialect=self.dialect ), f"CAST('{time_string[0]}' AS DateTime64(6))", ) time_strings_no_utc = [ "2020-01-01 00:00:01.001000", "2020-01-01 00:00:01.000001", "2020-01-01 00:00:01.001000", "2020-01-01 00:00:01.000001", "2020-01-01 00:00:01.000100", "2020-01-01 00:00:01.100000", ] for utc, no_utc in zip(time_strings, time_strings_no_utc): with self.subTest(f"'{time_string}' with UTC timezone"): self.assertEqual( exp.TimeStrToTime( this=exp.Literal.string(utc), zone=exp.Literal.string("UTC") ).sql(dialect=self.dialect), f"CAST('{no_utc}' AS DateTime64(6, 'UTC'))", ) def test_grant(self): self.validate_identity("GRANT SELECT(x, y) ON db.table TO john WITH GRANT OPTION") self.validate_identity("GRANT INSERT(x, y) ON db.table TO john") def test_revoke(self): self.validate_identity("REVOKE SELECT(x, y) ON db.table FROM john") self.validate_identity("REVOKE INSERT(x, y) ON db.table FROM john") def test_array_join(self): expr = self.validate_identity( "SELECT * FROM arrays_test ARRAY JOIN arr1, arrays_test.arr2 AS foo, ['a', 'b', 'c'] AS elem" ) joins = expr.args["joins"] self.assertEqual(len(joins), 1) join = joins[0] self.assertEqual(join.kind, "ARRAY") self.assertIsInstance(join.this, exp.Column) self.assertEqual(len(join.expressions), 2) self.assertIsInstance(join.expressions[0], exp.Alias) self.assertIsInstance(join.expressions[0].this, exp.Column) self.assertIsInstance(join.expressions[1], exp.Alias) self.assertIsInstance(join.expressions[1].this, exp.Array) self.validate_identity("SELECT s, arr FROM arrays_test ARRAY JOIN arr") self.validate_identity("SELECT s, arr, a FROM arrays_test LEFT ARRAY JOIN arr AS a") self.validate_identity( "SELECT s, arr_external FROM arrays_test ARRAY JOIN [1, 2, 3] AS arr_external" ) self.validate_identity( "SELECT * FROM arrays_test ARRAY JOIN [1, 2, 3] AS arr_external1, ['a', 'b', 'c'] AS arr_external2, splitByString(',', 'asd,qwerty,zxc') AS arr_external3" ) def test_traverse_scope(self): sql = "SELECT * FROM t FINAL" scopes = traverse_scope(parse_one(sql, dialect=self.dialect)) self.assertEqual(len(scopes), 1) self.assertEqual(set(scopes[0].sources), {"t"}) def test_window_functions(self): self.validate_identity( "SELECT row_number(column1) OVER (PARTITION BY column2 ORDER BY column3) FROM table" ) self.validate_identity( "SELECT row_number() OVER (PARTITION BY column2 ORDER BY column3) FROM table" ) def test_functions(self): self.validate_identity("SELECT TRANSFORM(foo, [1, 2], ['first', 'second']) FROM table") self.validate_identity( "SELECT TRANSFORM(foo, [1, 2], ['first', 'second'], 'default') FROM table" ) def test_array_offset(self): with self.assertLogs(helper_logger) as cm: self.validate_all( "SELECT col[1]", write={ "bigquery": "SELECT col[0]", "duckdb": "SELECT col[1]", "hive": "SELECT col[0]", "clickhouse": "SELECT col[1]", "presto": "SELECT col[1]", }, ) self.assertEqual( cm.output, [ "INFO:sqlglot:Applying array index offset (-1)", "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (1)", ], ) def test_to_start_of(self): for unit in ("SECOND", "DAY", "MONTH", "YEAR"): self.validate_all( f"toStartOf{unit}(x)", write={ "clickhouse, version=23.8": f"dateTrunc('{unit.lower()}', x)", "clickhouse, version=24.1": f"dateTrunc('{unit}', x)", "databricks": f"DATE_TRUNC('{unit}', x)", "duckdb": f"DATE_TRUNC('{unit}', x)", "doris": f"DATE_TRUNC(x, '{unit}')", "presto": f"DATE_TRUNC('{unit}', x)", "spark": f"DATE_TRUNC('{unit}', x)", }, ) self.validate_all( "toMonday(x)", write={ "clickhouse, version=23.8": "dateTrunc('week', x)", "clickhouse, version=24.1": "dateTrunc('WEEK', x)", "databricks": "DATE_TRUNC('WEEK', x)", "duckdb": "DATE_TRUNC('WEEK', x)", "doris": "DATE_TRUNC(x, 'WEEK')", "presto": "DATE_TRUNC('WEEK', x)", "spark": "DATE_TRUNC('WEEK', x)", }, ) def test_string_split(self): self.validate_all( "splitByString('s', x)", read={ "bigquery": "SPLIT(x, 's')", "duckdb": "STRING_SPLIT(x, 's')", }, write={ "clickhouse": "splitByString('s', x)", "doris": "SPLIT_BY_STRING(x, 's')", "duckdb": "STR_SPLIT(x, 's')", "hive": r"SPLIT(x, CONCAT('\\Q', 's', '\\E'))", }, ) self.validate_all( r"splitByRegexp('\\d+', x)", read={ "duckdb": r"STRING_SPLIT_REGEX(x, '\d+')", "hive": r"SPLIT(x, '\\d+')", }, write={ "clickhouse": r"splitByRegexp('\\d+', x)", "duckdb": r"STR_SPLIT_REGEX(x, '\d+')", "hive": r"SPLIT(x, '\\d+')", }, ) self.validate_identity("splitByChar('', x)") def test_sql_security(self): stmts = [ "CREATE VIEW v DEFINER='alice' SQL SECURITY DEFINER AS SELECT 1", "CREATE VIEW v SQL SECURITY DEFINER DEFINER='alice' AS SELECT 1", "CREATE VIEW v SQL SECURITY DEFINER DEFINER=CURRENT_USER AS SELECT 1", "CREATE VIEW v SQL SECURITY INVOKER AS SELECT 1", "CREATE VIEW v SQL SECURITY NONE AS SELECT 1", "CREATE MATERIALIZED VIEW v TO t SQL SECURITY DEFINER DEFINER='alice' AS SELECT 1", "CREATE MATERIALIZED VIEW v TO t SQL SECURITY INVOKER AS SELECT 1", "CREATE MATERIALIZED VIEW v TO t SQL SECURITY NONE AS SELECT 1", "ALTER TABLE v MODIFY SQL SECURITY DEFINER DEFINER='alice'", "ALTER TABLE v MODIFY SQL SECURITY DEFINER DEFINER=CURRENT_USER", ] for stmt in stmts: with self.subTest(stmt): self.validate_identity(stmt) ================================================ FILE: tests/dialects/test_databricks.py ================================================ from sqlglot import exp, transpile, parse_one from sqlglot.errors import ParseError from tests.dialects.test_dialect import Validator class TestDatabricks(Validator): dialect = "databricks" def test_databricks(self): self.validate_identity("SELECT COSH(1.5)") null_type = exp.DataType.build("VOID", dialect="databricks") self.assertEqual(null_type.sql(), "NULL") self.assertEqual(null_type.sql("databricks"), "VOID") self.validate_identity("DESCRIBE EXTENDED staging.onetrade_startb AS JSON") self.validate_identity("SELECT BITMAP_BIT_POSITION(10)") self.validate_identity("SELECT BITMAP_BUCKET_NUMBER(32769)") self.validate_identity("SELECT BITMAP_CONSTRUCT_AGG(value)") self.validate_identity("SELECT EXP(1)") self.validate_identity("SELECT MODE(category)") self.validate_identity("SELECT MODE(price, TRUE) AS deterministic_mode FROM products") self.validate_identity("REGEXP_LIKE(x, y)") self.validate_identity("SELECT CAST(NULL AS VOID)") self.validate_identity("SELECT void FROM t") self.validate_identity("SELECT * FROM stream") self.validate_identity("SELECT * FROM STREAM t") self.validate_identity("SELECT t.current_time FROM t") self.validate_identity("ALTER TABLE labels ADD COLUMN label_score FLOAT") self.validate_identity("DESCRIBE HISTORY a.b") self.validate_identity("DESCRIBE history.tbl") self.validate_identity("CREATE TABLE t (a STRUCT>)") self.validate_identity("CREATE TABLE t (c STRUCT)") self.validate_identity("CREATE TABLE my_table TBLPROPERTIES (a.b=15)") self.validate_identity("CREATE TABLE my_table TBLPROPERTIES ('a.b'=15)") self.validate_identity("SELECT CAST('11 23:4:0' AS INTERVAL DAY TO HOUR)") self.validate_identity("SELECT CAST('11 23:4:0' AS INTERVAL DAY TO MINUTE)") self.validate_identity("SELECT CAST('11 23:4:0' AS INTERVAL DAY TO SECOND)") self.validate_identity("SELECT CAST('23:00:00' AS INTERVAL HOUR TO MINUTE)") self.validate_identity("SELECT CAST('23:00:00' AS INTERVAL HOUR TO SECOND)") self.validate_identity("SELECT CAST('23:00:00' AS INTERVAL MINUTE TO SECOND)") self.validate_identity("CREATE TABLE target SHALLOW CLONE source") self.validate_identity("INSERT INTO a REPLACE WHERE cond VALUES (1), (2)") self.validate_identity("CREATE FUNCTION a.b(x INT) RETURNS INT RETURN x + 1") self.validate_identity("CREATE FUNCTION a AS b") self.validate_identity("SELECT ${x} FROM ${y} WHERE ${z} > 1") self.validate_identity("CREATE TABLE foo (x DATE GENERATED ALWAYS AS (CAST(y AS DATE)))") self.validate_identity("TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', address)") self.validate_identity("SELECT PARSE_JSON('{}')") self.validate_identity("SELECT RANDSTR(123)") self.validate_identity("SELECT RANDSTR(123, 456)") self.validate_identity("PARSE_URL('https://example.com/path')") self.validate_identity("PARSE_URL('https://example.com/path', 'HOST')") self.validate_identity("PARSE_URL('https://example.com/path', 'QUERY', 'param')") self.validate_identity( "CREATE TABLE IF NOT EXISTS db.table (a TIMESTAMP, b BOOLEAN GENERATED ALWAYS AS (NOT a IS NULL)) USING DELTA" ) self.validate_identity( "SELECT * FROM sales UNPIVOT INCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))" ) self.validate_identity( "SELECT * FROM sales UNPIVOT EXCLUDE NULLS (sales FOR quarter IN (q1 AS `Jan-Mar`))" ) self.validate_identity( "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$" ) self.validate_identity( "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $FOO$def add_one(x):\n return x+1$FOO$" ) self.validate_identity( "TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', city LIKE 'LA')" ) self.validate_identity( "COPY INTO target FROM `s3://link` FILEFORMAT = AVRO VALIDATE = ALL FILES = ('file1', 'file2') FORMAT_OPTIONS ('opt1'='true', 'opt2'='test') COPY_OPTIONS ('mergeSchema'='true')" ) self.validate_identity( "SELECT * FROM t1, t2", "SELECT * FROM t1 CROSS JOIN t2", ) self.validate_identity( "SELECT TIMESTAMP '2025-04-29 18.47.18'::DATE", "SELECT CAST(CAST('2025-04-29 18.47.18' AS DATE) AS TIMESTAMP)", ) self.validate_identity( "SELECT DATE_FORMAT(CAST(FROM_UTC_TIMESTAMP(foo, 'America/Los_Angeles') AS TIMESTAMP), 'yyyy-MM-dd HH:mm:ss') AS foo FROM t", "SELECT DATE_FORMAT(CAST(FROM_UTC_TIMESTAMP(CAST(foo AS TIMESTAMP), 'America/Los_Angeles') AS TIMESTAMP), 'yyyy-MM-dd HH:mm:ss') AS foo FROM t", ) self.validate_identity( "DATE_DIFF(day, created_at, current_date())", "DATEDIFF(DAY, created_at, CURRENT_DATE)", ).args["unit"].assert_is(exp.Var) self.validate_identity( r'SELECT r"\\foo.bar\"', r"SELECT '\\\\foo.bar\\'", ) self.validate_identity( "FROM_UTC_TIMESTAMP(x::TIMESTAMP, tz)", "FROM_UTC_TIMESTAMP(CAST(x AS TIMESTAMP), tz)", ) self.validate_identity("SELECT SUBSTRING_INDEX(str, delim, count)") self.validate_identity("BITMAP_OR_AGG(x)") self.validate_all( "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", write={ "databricks": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", "spark": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", "mysql": "SELECT SUBSTRING_INDEX('a.b.c.d', '.', 2)", }, ) self.validate_identity( "SELECT SUBSTR('Spark' FROM 5 FOR 1)", "SELECT SUBSTRING('Spark', 5, 1)" ) self.validate_identity("SELECT SUBSTR('Spark SQL', 5)", "SELECT SUBSTRING('Spark SQL', 5)") self.validate_identity( "SELECT SUBSTR(ENCODE('Spark SQL', 'utf-8'), 5)", "SELECT SUBSTRING(ENCODE('Spark SQL', 'utf-8'), 5)", ) self.validate_all( "SELECT TYPEOF(1)", read={ "databricks": "SELECT TYPEOF(1)", "snowflake": "SELECT TYPEOF(1)", "hive": "SELECT TYPEOF(1)", "clickhouse": "SELECT toTypeName(1)", }, write={ "clickhouse": "SELECT toTypeName(1)", }, ) self.validate_all( "SELECT c1:item[1].price", read={ "spark": "SELECT GET_JSON_OBJECT(c1, '$.item[1].price')", }, write={ "databricks": "SELECT c1:item[1].price", "spark": "SELECT GET_JSON_OBJECT(c1, '$.item[1].price')", }, ) self.validate_all( "SELECT GET_JSON_OBJECT(c1, '$.item[1].price')", write={ "databricks": "SELECT c1:item[1].price", "spark": "SELECT GET_JSON_OBJECT(c1, '$.item[1].price')", }, ) self.validate_all( "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))", write={ "databricks": "CREATE TABLE foo (x INT GENERATED ALWAYS AS (YEAR(y)))", "tsql": "CREATE TABLE foo (x AS YEAR(CAST(y AS DATE)))", }, ) self.validate_all( "CREATE TABLE t1 AS (SELECT c FROM t2)", read={ "teradata": "CREATE TABLE t1 AS (SELECT c FROM t2) WITH DATA", }, ) self.validate_all( "SELECT X'1A2B'", read={ "spark2": "SELECT X'1A2B'", "spark": "SELECT X'1A2B'", "databricks": "SELECT x'1A2B'", }, write={ "spark2": "SELECT X'1A2B'", "spark": "SELECT X'1A2B'", "databricks": "SELECT X'1A2B'", }, ) with self.assertRaises(ParseError): transpile( "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo$def add_one(x):\n return x+1$$", read="databricks", ) with self.assertRaises(ParseError): transpile( "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $foo bar$def add_one(x):\n return x+1$foo bar$", read="databricks", ) self.validate_all( "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS TABLE (a INT) RETURN SELECT a", write={ "databricks": "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS TABLE (a INT) RETURN SELECT a", "duckdb": "CREATE OR REPLACE FUNCTION func(a, b) AS TABLE SELECT a", }, ) self.validate_all( "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS BIGINT RETURN a", write={ "databricks": "CREATE OR REPLACE FUNCTION func(a BIGINT, b BIGINT) RETURNS BIGINT RETURN a", "duckdb": "CREATE OR REPLACE FUNCTION func(a, b) AS a", }, ) self.validate_all( "SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)", read={ "databricks": "SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)", "spark": "SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)", }, write={ "spark": "SELECT ANY(col) FROM VALUES (TRUE), (FALSE) AS tab(col)", }, ) for option in ("", " (foo)", " MATCH FULL", " NOT ENFORCED"): with self.subTest(f"Databricks foreign key REFERENCES option: {option}."): self.validate_identity( f"CREATE TABLE t1 (foo BIGINT NOT NULL CONSTRAINT foo_c FOREIGN KEY REFERENCES t2{option})" ) self.validate_identity( "SELECT test, LISTAGG(email, '') AS Email FROM organizations GROUP BY test", ) self.validate_identity( "WITH t AS (VALUES ('foo_val') AS t(foo1)) SELECT foo1 FROM t", "WITH t AS (SELECT * FROM VALUES ('foo_val') AS t(foo1)) SELECT foo1 FROM t", ) self.validate_identity("NTILE() OVER (ORDER BY 1)") self.validate_identity("CURRENT_VERSION()") self.validate_all( "UNIFORM(1, 10, 5)", write={ "snowflake": "UNIFORM(1, 10, RANDOM(5))", "databricks": "UNIFORM(1, 10, 5)", }, ) self.validate_all( "UNIFORM(1, 10)", write={ "databricks": "UNIFORM(1, 10)", "snowflake": "UNIFORM(1, 10, RANDOM())", }, ) self.validate_identity("SELECT ELT(2, 'foo', 'bar', 'baz') AS Result") self.validate_identity("GETDATE()", "CURRENT_TIMESTAMP()") self.validate_identity("NOW()", "CURRENT_TIMESTAMP()") self.validate_identity("CURRENT_TIMEZONE()") self.validate_identity("CURDATE()", "CURRENT_DATE") self.validate_identity("CURDATE", "CURRENT_DATE") self.validate_identity("SELECT MAKE_INTERVAL(100, 11, 12, 13, 14, 14, 15)") self.validate_identity("SELECT name, GROUPING_ID() FROM customer GROUP BY ROLLUP (name)") self.validate_identity("BIT_GET(11, 0)", "GETBIT(11, 0)") self.validate_identity("SELECT CURDATE()", "SELECT CURRENT_DATE") self.validate_identity( "CREATE TABLE tbl (id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY (START WITH 1 INCREMENT BY 1))" ) self.validate_identity( "CREATE TABLE tbl (id BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1 INCREMENT BY 1))" ) self.validate_identity( """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT get_json_object(c, '$.x-y') FROM t""", """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT c:["x-y"] FROM t""", ).selects[0].expression.assert_is(exp.JSONPath) # https://docs.databricks.com/sql/language-manual/functions/colonsign.html def test_json(self): self.validate_identity("SELECT c1:price, c1:price.foo, c1:price.bar[1]") self.validate_identity("SELECT TRY_CAST(c1:price AS ARRAY)") self.validate_identity("""SELECT TRY_CAST(c1:["foo bar"]["baz qux"] AS ARRAY)""") self.validate_identity( """SELECT c1:item[1].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""" ) self.validate_identity( """SELECT c1:item[*].price FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""" ) self.validate_identity( """SELECT FROM_JSON(c1:item[*].price, 'ARRAY')[0] FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""" ) self.validate_identity( """SELECT INLINE(FROM_JSON(c1:item[*], 'ARRAY>')) FROM VALUES ('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') AS T(c1)""" ) self.validate_identity( """SELECT c1:['price'] FROM VALUES ('{ "price": 5 }') AS T(c1)""", """SELECT c1:price FROM VALUES ('{ "price": 5 }') AS T(c1)""", ) self.validate_identity( """SELECT GET_JSON_OBJECT(c1, '$.price') FROM VALUES ('{ "price": 5 }') AS T(c1)""", """SELECT c1:price FROM VALUES ('{ "price": 5 }') AS T(c1)""", ) self.validate_identity( """SELECT raw:`zip code`, raw:`fb:testid`, raw:store['bicycle'], raw:store["zip code"]""", """SELECT raw:["zip code"], raw:["fb:testid"], raw:store.bicycle, raw:store["zip code"]""", ) self.validate_all( "SELECT col:`fr'uit`", write={ "databricks": """SELECT col:["fr'uit"]""", "postgres": "SELECT JSON_EXTRACT_PATH(col, 'fr''uit')", }, ) def test_datediff(self): self.validate_all( "SELECT DATEDIFF(year, 'start', 'end')", write={ "tsql": "SELECT DATEDIFF(YEAR, 'start', 'end')", "databricks": "SELECT DATEDIFF(YEAR, 'start', 'end')", }, ) self.validate_all( "SELECT DATEDIFF(microsecond, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(MICROSECOND, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000000 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(millisecond, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(MILLISECOND, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) * 1000 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(second, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(SECOND, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(minute, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(MINUTE, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 60 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(hour, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(HOUR, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 3600 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(day, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(DAY, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(epoch FROM CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP)) / 86400 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(week, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(WEEK, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST('end' AS TIMESTAMP) - CAST('start' AS TIMESTAMP))) / 7 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(month, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(MONTH, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 12 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(quarter, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(QUARTER, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) * 4 + EXTRACT(month FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) / 3 AS BIGINT)", }, ) self.validate_all( "SELECT DATEDIFF(year, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(YEAR, 'start', 'end')", "postgres": "SELECT CAST(EXTRACT(year FROM AGE(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP))) AS BIGINT)", }, ) def test_add_date(self): self.validate_all( "SELECT DATEADD(year, 1, '2020-01-01')", write={ "tsql": "SELECT DATEADD(YEAR, 1, '2020-01-01')", "databricks": "SELECT DATEADD(YEAR, 1, '2020-01-01')", }, ) self.validate_all( "SELECT DATEDIFF('end', 'start')", write={"databricks": "SELECT DATEDIFF(DAY, 'start', 'end')"}, ) self.validate_all( "SELECT DATE_ADD('2020-01-01', 1)", write={ "tsql": "SELECT DATEADD(DAY, 1, '2020-01-01')", "databricks": "SELECT DATEADD(DAY, 1, '2020-01-01')", }, ) def test_without_as(self): self.validate_all( "CREATE TABLE x (SELECT 1)", write={ "databricks": "CREATE TABLE x AS (SELECT 1)", }, ) self.validate_all( "WITH x (select 1) SELECT * FROM x", write={ "databricks": "WITH x AS (SELECT 1) SELECT * FROM x", }, ) def test_streaming_tables(self): self.validate_identity( "CREATE STREAMING TABLE raw_data AS SELECT * FROM STREAM READ_FILES('abfss://container@storageAccount.dfs.core.windows.net/base/path')" ) self.validate_identity( "CREATE OR REFRESH STREAMING TABLE csv_data (id INT, ts TIMESTAMP, event STRING) AS SELECT * FROM STREAM READ_FILES('s3://bucket/path', format => 'csv', schema => 'id int, ts timestamp, event string')" ) def test_grant(self): self.validate_identity("GRANT CREATE ON SCHEMA my_schema TO `alf@melmak.et`") self.validate_identity("GRANT SELECT ON TABLE sample_data TO `alf@melmak.et`") self.validate_identity("GRANT ALL PRIVILEGES ON TABLE forecasts TO finance") self.validate_identity("GRANT SELECT ON TABLE t TO `fab9e00e-ca35-11ec-9d64-0242ac120002`") def test_revoke(self): self.validate_identity("REVOKE CREATE ON SCHEMA my_schema FROM `alf@melmak.et`") self.validate_identity("REVOKE SELECT ON TABLE sample_data FROM `alf@melmak.et`") self.validate_identity("REVOKE ALL PRIVILEGES ON TABLE forecasts FROM finance") self.validate_identity( "REVOKE SELECT ON TABLE t FROM `fab9e00e-ca35-11ec-9d64-0242ac120002`" ) def test_analyze(self): self.validate_identity("ANALYZE TABLE tbl COMPUTE DELTA STATISTICS NOSCAN") self.validate_identity("ANALYZE TABLE tbl COMPUTE DELTA STATISTICS FOR ALL COLUMNS") self.validate_identity("ANALYZE TABLE tbl COMPUTE DELTA STATISTICS FOR COLUMNS foo, bar") self.validate_identity("ANALYZE TABLE ctlg.db.tbl COMPUTE DELTA STATISTICS NOSCAN") self.validate_identity("ANALYZE TABLES COMPUTE STATISTICS NOSCAN") self.validate_identity("ANALYZE TABLES FROM db COMPUTE STATISTICS") self.validate_identity("ANALYZE TABLES IN db COMPUTE STATISTICS") self.validate_identity( "ANALYZE TABLE ctlg.db.tbl PARTITION(foo = 'foo', bar = 'bar') COMPUTE STATISTICS NOSCAN" ) def test_udf_environment_property(self): self.validate_identity( """CREATE FUNCTION a() ENVIRONMENT (dependencies = '["foo1==1", "foo2==2"]', environment_version = 'None')""" ) def test_udf_handler_property(self): self.validate_identity("""CREATE FUNCTION a() HANDLER 'handler_function'""") def test_udf_parameter_style_property(self): self.validate_identity("""CREATE FUNCTION a() PARAMETER STYLE PANDAS""") def test_to_char_is_numeric_transpile_to_cast(self): # The input SQL simulates a TO_CHAR with is_numeric flag set (from dremio dialect) sql = "SELECT TO_CHAR(12345, '#')" expression = parse_one(sql, read="dremio") to_char_exp = expression.find(exp.ToChar) assert to_char_exp is not None assert to_char_exp.args.get("is_numeric") is True result = transpile(sql, read="dremio", write="databricks")[0] assert "CAST(12345 AS STRING)" in result def test_qdcolon(self): self.validate_identity("SELECT '20'?::INTEGER", "SELECT TRY_CAST('20' AS INT)") def test_overlay(self): self.validate_identity( "SELECT OVERLAY('Spark SQL', 'ANSI ', 7, 0)", "SELECT OVERLAY('Spark SQL' PLACING 'ANSI ' FROM 7 FOR 0)", ) self.validate_identity( "SELECT OVERLAY('Spark SQL' PLACING 'CORE' FROM 7)", ) self.validate_identity( "SELECT OVERLAY(ENCODE('Spark SQL', 'utf-8') PLACING ENCODE('_', 'utf-8') FROM 6)", ) self.validate_identity( "SELECT OVERLAY('Spark SQL' PLACING 'ANSI ' FROM 7 FOR 0)", ) def test_declare(self): self.validate_identity("DECLARE VAR x INT", "DECLARE x INT") self.validate_identity("DECLARE x INT") self.validate_identity("DECLARE VARIABLE myvar INT DEFAULT 1", "DECLARE myvar INT = 1") self.validate_identity("DECLARE x, y, z INT DEFAULT 1", "DECLARE x, y, z INT = 1") self.validate_identity("DECLARE x INT = 1") self.validate_identity("DECLARE OR REPLACE x INT = 1") ================================================ FILE: tests/dialects/test_dialect.py ================================================ import typing as t import unittest from sqlglot import ( Dialect, Dialects, ErrorLevel, ParseError, TokenError, UnsupportedError, exp, parse_one, ) from sqlglot.dialects import BigQuery, Hive, Snowflake, Spark2 from sqlglot.dialects.duckdb import WS_CONTROL_CHARS_TO_DUCK from sqlglot.generator import logger as generator_logger from sqlglot.parser import logger as parser_logger from sqlglot.parsers.snowflake import SnowflakeParser from collections.abc import Iterable import sqlglot.parsers.base as _base_module _PARSER_IS_COMPILED = getattr(_base_module, "__file__", "").endswith(".so") class Validator(unittest.TestCase): dialect = None def parse_one(self, sql, **kwargs): return parse_one(sql, read=self.dialect, **kwargs) def assert_duckdb_sql( self, expression: exp.Expr, *, includes: t.Optional[Iterable[str]] = None, excludes: t.Optional[Iterable[str]] = None, chr_chars: t.Optional[Iterable[str]] = None, ) -> str: duckdb_sql = expression.sql("duckdb") for fragment in includes or (): self.assertIn(fragment, duckdb_sql) for fragment in excludes or (): self.assertNotIn(fragment, duckdb_sql) for char in chr_chars or (): code = WS_CONTROL_CHARS_TO_DUCK.get(char) self.assertIsNotNone(code, f"missing DuckDB code for {repr(char)}") self.assertIn(f"CHR({code})", duckdb_sql) return duckdb_sql def validate_identity( self, sql, write_sql=None, pretty=False, check_command_warning=False, identify=False ): if check_command_warning: with self.assertLogs(parser_logger) as cm: expression = self.parse_one(sql) assert f"'{sql[:100]}' contains unsupported syntax" in cm.output[0] else: expression = self.parse_one(sql) self.assertEqual( write_sql or sql, expression.sql(dialect=self.dialect, pretty=pretty, identify=identify) ) return expression def validate_all(self, sql, read=None, write=None, pretty=False, identify=False): """ Validate that: 1. Everything in `read` transpiles to `sql` 2. `sql` transpiles to everything in `write` Args: sql (str): Main SQL expression read (dict): Mapping of dialect -> SQL write (dict): Mapping of dialect -> SQL pretty (bool): prettify both read and write identify (bool): quote identifiers in both read and write """ expression = self.parse_one(sql) for read_dialect, read_sql in (read or {}).items(): with self.subTest(f"{read_dialect} -> {sql}"): self.assertEqual( parse_one(read_sql, read_dialect).sql( self.dialect, unsupported_level=ErrorLevel.IGNORE, pretty=pretty, identify=identify, ), sql, ) for write_dialect, write_sql in (write or {}).items(): with self.subTest(f"{sql} -> {write_dialect}"): if write_sql is UnsupportedError: with self.assertRaises(UnsupportedError): expression.sql(write_dialect, unsupported_level=ErrorLevel.RAISE) else: self.assertEqual( expression.sql( write_dialect, unsupported_level=ErrorLevel.IGNORE, pretty=pretty, identify=identify, ), write_sql, ) class TestDialect(Validator): maxDiff = None def test_enum(self): dialect_by_key = Dialect.classes for dialect in Dialects: self.assertIsNotNone(Dialect[dialect]) self.assertIsNotNone(Dialect.get(dialect)) self.assertIsNotNone(Dialect.get_or_raise(dialect)) self.assertIsNotNone(Dialect[dialect.value]) self.assertIn(dialect, dialect_by_key) def test_lazy_load(self): import subprocess code = "import sqlglot; assert len(sqlglot.Dialect._classes) == 1; print('Success')" result = subprocess.run(["python", "-c", code], capture_output=True, text=True) assert "Success" in result.stdout def test_get_or_raise(self): self.assertIsInstance(Dialect.get_or_raise(Hive), Hive) self.assertIsInstance(Dialect.get_or_raise(Hive()), Hive) self.assertIsInstance(Dialect.get_or_raise("hive"), Hive) with self.assertRaises(ValueError): Dialect.get_or_raise(1) default_mysql = Dialect.get_or_raise("mysql") self.assertEqual(default_mysql.normalization_strategy, "CASE_SENSITIVE") lowercase_mysql = Dialect.get_or_raise("mysql,normalization_strategy=lowercase") self.assertEqual(lowercase_mysql.normalization_strategy, "LOWERCASE") lowercase_mysql = Dialect.get_or_raise("mysql, normalization_strategy = lowercase") self.assertEqual(lowercase_mysql.normalization_strategy.value, "LOWERCASE") with self.assertRaises(AttributeError) as cm: Dialect.get_or_raise("mysql, normalization_strategy") self.assertEqual(str(cm.exception), "'bool' object has no attribute 'upper'") with self.assertRaises(ValueError) as cm: Dialect.get_or_raise("myqsl") self.assertEqual(str(cm.exception), "Unknown dialect 'myqsl'. Did you mean mysql?") with self.assertRaises(ValueError) as cm: Dialect.get_or_raise("asdfjasodiufjsd") self.assertEqual(str(cm.exception), "Unknown dialect 'asdfjasodiufjsd'.") oracle_with_settings = Dialect.get_or_raise( "oracle, normalization_strategy = lowercase, version = 19.5" ) self.assertEqual(oracle_with_settings.normalization_strategy.value, "LOWERCASE") self.assertEqual(oracle_with_settings.version, (19, 5, 0)) class MyDialect(Dialect): SUPPORTED_SETTINGS = {"s1", "s2", "s3", "s4", "s5"} bool_settings = Dialect.get_or_raise("mydialect, s1=TruE, s2=1, s3=FaLse, s4=0, s5=nonbool") self.assertEqual( bool_settings.settings, {"s1": True, "s2": True, "s3": False, "s4": False, "s5": "nonbool"}, ) with self.assertRaises(ValueError) as cm: Dialect.get_or_raise("tsql,normalisation_strategy=case_sensitive") self.assertEqual( "Unknown setting 'normalisation_strategy'. Did you mean normalization_strategy?", str(cm.exception), ) def test_compare_dialects(self): bigquery_class = Dialect["bigquery"] bigquery_object = BigQuery() bigquery_string = "bigquery" snowflake_class = Dialect["snowflake"] snowflake_object = Snowflake() snowflake_string = "snowflake" self.assertEqual(snowflake_class, snowflake_class) self.assertEqual(snowflake_class, snowflake_object) self.assertEqual(snowflake_class, snowflake_string) self.assertEqual(snowflake_object, snowflake_object) self.assertEqual(snowflake_object, snowflake_string) self.assertNotEqual(snowflake_class, bigquery_class) self.assertNotEqual(snowflake_class, bigquery_object) self.assertNotEqual(snowflake_class, bigquery_string) self.assertNotEqual(snowflake_object, bigquery_object) self.assertNotEqual(snowflake_object, bigquery_string) self.assertTrue(snowflake_class in {"snowflake", "bigquery"}) self.assertTrue(snowflake_object in {"snowflake", "bigquery"}) self.assertFalse(snowflake_class in {"bigquery", "redshift"}) self.assertFalse(snowflake_object in {"bigquery", "redshift"}) def test_compare_dialect_versions(self): ddb_v1 = Dialect.get_or_raise("duckdb, version=1.0") ddb_v1_2 = Dialect.get_or_raise( "duckdb, normalization_strategy=case_sensitive, version=1.0" ) ddb_v2 = Dialect.get_or_raise("duckdb, version=2.2.4") ddb_latest = Dialect.get_or_raise("duckdb") self.assertTrue(ddb_latest.version > ddb_v2.version) self.assertTrue(ddb_v1.version < ddb_v2.version) self.assertTrue(ddb_v1.version == ddb_v1_2.version) self.assertTrue(ddb_latest.version == Dialect.get_or_raise("duckdb").version) def test_cast(self): self.validate_all( "CAST(a AS TEXT)", write={ "bigquery": "CAST(a AS STRING)", "clickhouse": "CAST(a AS Nullable(String))", "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "materialize": "CAST(a AS TEXT)", "mysql": "CAST(a AS CHAR)", "hive": "CAST(a AS STRING)", "oracle": "CAST(a AS CLOB)", "postgres": "CAST(a AS TEXT)", "presto": "CAST(a AS VARCHAR)", "redshift": "CAST(a AS VARCHAR(MAX))", "snowflake": "CAST(a AS VARCHAR)", "spark": "CAST(a AS STRING)", "starrocks": "CAST(a AS STRING)", "tsql": "CAST(a AS VARCHAR(MAX))", "doris": "CAST(a AS STRING)", }, ) self.validate_all( "CAST(a AS BINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", "clickhouse": "CAST(a AS Nullable(BINARY(4)))", "drill": "CAST(a AS VARBINARY(4))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", "mysql": "CAST(a AS BINARY(4))", "hive": "CAST(a AS BINARY(4))", "oracle": "CAST(a AS BLOB(4))", "postgres": "CAST(a AS BYTEA(4))", "presto": "CAST(a AS VARBINARY(4))", "redshift": "CAST(a AS VARBYTE(4))", "snowflake": "CAST(a AS BINARY(4))", "sqlite": "CAST(a AS BLOB(4))", "spark": "CAST(a AS BINARY(4))", "starrocks": "CAST(a AS BINARY(4))", }, ) self.validate_all( "CAST(a AS VARBINARY(4))", write={ "bigquery": "CAST(a AS BYTES)", "clickhouse": "CAST(a AS Nullable(String))", "duckdb": "CAST(a AS BLOB(4))", "materialize": "CAST(a AS BYTEA(4))", "mysql": "CAST(a AS VARBINARY(4))", "hive": "CAST(a AS BINARY(4))", "oracle": "CAST(a AS BLOB(4))", "postgres": "CAST(a AS BYTEA(4))", "presto": "CAST(a AS VARBINARY(4))", "redshift": "CAST(a AS VARBYTE(4))", "snowflake": "CAST(a AS VARBINARY(4))", "sqlite": "CAST(a AS BLOB(4))", "spark": "CAST(a AS BINARY(4))", "starrocks": "CAST(a AS VARBINARY(4))", }, ) self.validate_all( "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))", write={ "clickhouse": "CAST(map('a', '1') AS Map(String, Nullable(String)))", }, ) self.validate_all( "CAST(ARRAY(1, 2) AS ARRAY)", write={ "clickhouse": "CAST([1, 2] AS Array(Nullable(Int8)))", }, ) self.validate_all( "CAST((1, 2, 3, 4) AS STRUCT)", write={ "clickhouse": "CAST((1, 2, 3, 4) AS Tuple(a Nullable(Int8), b Nullable(Int16), c Nullable(Int32), d Nullable(Int64)))", }, ) self.validate_all( "SELECT ARRAY_DISTINCT(x)", write={ "clickhouse": "SELECT arrayDistinct(x)", }, ) self.validate_all( "CAST(a AS DATETIME)", write={ "postgres": "CAST(a AS TIMESTAMP)", "sqlite": "CAST(a AS DATETIME)", }, ) self.validate_all( "CAST(a AS STRING)", write={ "bigquery": "CAST(a AS STRING)", "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "materialize": "CAST(a AS TEXT)", "mysql": "CAST(a AS CHAR)", "hive": "CAST(a AS STRING)", "oracle": "CAST(a AS CLOB)", "postgres": "CAST(a AS TEXT)", "presto": "CAST(a AS VARCHAR)", "redshift": "CAST(a AS VARCHAR(MAX))", "snowflake": "CAST(a AS VARCHAR)", "spark": "CAST(a AS STRING)", "starrocks": "CAST(a AS STRING)", "tsql": "CAST(a AS VARCHAR(MAX))", "doris": "CAST(a AS STRING)", }, ) self.validate_all( "CAST(a AS VARCHAR)", write={ "bigquery": "CAST(a AS STRING)", "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "materialize": "CAST(a AS VARCHAR)", "mysql": "CAST(a AS CHAR)", "hive": "CAST(a AS STRING)", "oracle": "CAST(a AS VARCHAR2)", "postgres": "CAST(a AS VARCHAR)", "presto": "CAST(a AS VARCHAR)", "redshift": "CAST(a AS VARCHAR)", "snowflake": "CAST(a AS VARCHAR)", "spark": "CAST(a AS STRING)", "starrocks": "CAST(a AS VARCHAR)", "tsql": "CAST(a AS VARCHAR)", "doris": "CAST(a AS VARCHAR)", }, ) self.validate_all( "CAST(a AS VARCHAR(3))", write={ "bigquery": "CAST(a AS STRING)", "drill": "CAST(a AS VARCHAR(3))", "duckdb": "CAST(a AS TEXT(3))", "materialize": "CAST(a AS VARCHAR(3))", "mysql": "CAST(a AS CHAR(3))", "hive": "CAST(a AS VARCHAR(3))", "oracle": "CAST(a AS VARCHAR2(3))", "postgres": "CAST(a AS VARCHAR(3))", "presto": "CAST(a AS VARCHAR(3))", "redshift": "CAST(a AS VARCHAR(3))", "snowflake": "CAST(a AS VARCHAR(3))", "spark": "CAST(a AS VARCHAR(3))", "starrocks": "CAST(a AS VARCHAR(3))", "tsql": "CAST(a AS VARCHAR(3))", "doris": "CAST(a AS VARCHAR(3))", }, ) self.validate_all( "CAST(a AS CHARACTER VARYING)", write={ "bigquery": "CAST(a AS STRING)", "drill": "CAST(a AS VARCHAR)", "duckdb": "CAST(a AS TEXT)", "materialize": "CAST(a AS VARCHAR)", "mysql": "CAST(a AS CHAR)", "hive": "CAST(a AS STRING)", "oracle": "CAST(a AS VARCHAR2)", "postgres": "CAST(a AS VARCHAR)", "presto": "CAST(a AS VARCHAR)", "redshift": "CAST(a AS VARCHAR)", "snowflake": "CAST(a AS VARCHAR)", "spark": "CAST(a AS STRING)", "starrocks": "CAST(a AS VARCHAR)", "tsql": "CAST(a AS VARCHAR)", "doris": "CAST(a AS VARCHAR)", }, ) self.validate_all( "CAST(a AS CHARACTER VARYING(3))", write={ "bigquery": "CAST(a AS STRING)", "drill": "CAST(a AS VARCHAR(3))", "duckdb": "CAST(a AS TEXT(3))", "materialize": "CAST(a AS VARCHAR(3))", "mysql": "CAST(a AS CHAR(3))", "hive": "CAST(a AS VARCHAR(3))", "oracle": "CAST(a AS VARCHAR2(3))", "postgres": "CAST(a AS VARCHAR(3))", "presto": "CAST(a AS VARCHAR(3))", "redshift": "CAST(a AS VARCHAR(3))", "snowflake": "CAST(a AS VARCHAR(3))", "spark": "CAST(a AS VARCHAR(3))", "starrocks": "CAST(a AS VARCHAR(3))", "tsql": "CAST(a AS VARCHAR(3))", "doris": "CAST(a AS VARCHAR(3))", }, ) self.validate_all( "CAST(a AS SMALLINT)", write={ "bigquery": "CAST(a AS INT64)", "drill": "CAST(a AS INTEGER)", "duckdb": "CAST(a AS SMALLINT)", "materialize": "CAST(a AS SMALLINT)", "mysql": "CAST(a AS SIGNED)", "hive": "CAST(a AS SMALLINT)", "oracle": "CAST(a AS SMALLINT)", "postgres": "CAST(a AS SMALLINT)", "presto": "CAST(a AS SMALLINT)", "redshift": "CAST(a AS SMALLINT)", "snowflake": "CAST(a AS SMALLINT)", "spark": "CAST(a AS SMALLINT)", "sqlite": "CAST(a AS INTEGER)", "starrocks": "CAST(a AS SMALLINT)", "doris": "CAST(a AS SMALLINT)", }, ) self.validate_all( "CAST(a AS DOUBLE)", read={ "postgres": "CAST(a AS DOUBLE PRECISION)", "redshift": "CAST(a AS DOUBLE PRECISION)", }, write={ "bigquery": "CAST(a AS FLOAT64)", "clickhouse": "CAST(a AS Nullable(Float64))", "doris": "CAST(a AS DOUBLE)", "drill": "CAST(a AS DOUBLE)", "duckdb": "CAST(a AS DOUBLE)", "materialize": "CAST(a AS DOUBLE PRECISION)", "mysql": "CAST(a AS DOUBLE)", "hive": "CAST(a AS DOUBLE)", "oracle": "CAST(a AS DOUBLE PRECISION)", "postgres": "CAST(a AS DOUBLE PRECISION)", "presto": "CAST(a AS DOUBLE)", "redshift": "CAST(a AS DOUBLE PRECISION)", "snowflake": "CAST(a AS DOUBLE)", "spark": "CAST(a AS DOUBLE)", "starrocks": "CAST(a AS DOUBLE)", }, ) self.validate_all( "CAST('1 DAY' AS INTERVAL)", write={ "postgres": "CAST('1 DAY' AS INTERVAL)", "redshift": "CAST('1 DAY' AS INTERVAL)", }, ) self.validate_all( "CAST(a AS TIMESTAMP)", write={ "starrocks": "CAST(a AS DATETIME)", "redshift": "CAST(a AS TIMESTAMP)", "doris": "CAST(a AS DATETIME)", "mysql": "CAST(a AS DATETIME)", }, ) self.validate_all( "CAST(a AS TIMESTAMPTZ)", write={ "starrocks": "TIMESTAMP(a)", "redshift": "CAST(a AS TIMESTAMP WITH TIME ZONE)", "doris": "CAST(a AS DATETIME)", "mysql": "TIMESTAMP(a)", }, ) self.validate_all("CAST(a AS TINYINT)", write={"oracle": "CAST(a AS SMALLINT)"}) self.validate_all("CAST(a AS SMALLINT)", write={"oracle": "CAST(a AS SMALLINT)"}) self.validate_all("CAST(a AS BIGINT)", write={"oracle": "CAST(a AS INT)"}) self.validate_all("CAST(a AS INT)", write={"oracle": "CAST(a AS INT)"}) self.validate_all( "CAST(a AS DECIMAL)", read={"oracle": "CAST(a AS NUMBER)"}, write={"oracle": "CAST(a AS NUMBER)"}, ) self.validate_all( "CAST('127.0.0.1/32' AS INET)", read={"postgres": "INET '127.0.0.1/32'"}, ) self.assertIsNotNone( self.validate_identity("CREATE TABLE foo (bar INT AS (foo))").find( exp.ComputedColumnConstraint ) ) self.assertIsNotNone( self.validate_identity( "CREATE TABLE foo (t1 INT, t2 INT, bar INT AS (t1 * t2 * 2))" ).find(exp.ComputedColumnConstraint) ) def test_ddl(self): self.validate_all( "CREATE TABLE a LIKE b", write={ "": "CREATE TABLE a LIKE b", "bigquery": "CREATE TABLE a LIKE b", "clickhouse": "CREATE TABLE a AS b", "databricks": "CREATE TABLE a LIKE b", "doris": "CREATE TABLE a LIKE b", "drill": "CREATE TABLE a AS SELECT * FROM b LIMIT 0", "duckdb": "CREATE TABLE a AS SELECT * FROM b LIMIT 0", "hive": "CREATE TABLE a LIKE b", "mysql": "CREATE TABLE a LIKE b", "oracle": "CREATE TABLE a LIKE b", "postgres": "CREATE TABLE a (LIKE b)", "presto": "CREATE TABLE a (LIKE b)", "redshift": "CREATE TABLE a (LIKE b)", "snowflake": "CREATE TABLE a LIKE b", "spark": "CREATE TABLE a LIKE b", "sqlite": "CREATE TABLE a AS SELECT * FROM b LIMIT 0", "trino": "CREATE TABLE a (LIKE b)", "tsql": "SELECT TOP 0 * INTO a FROM b AS temp", }, ) def test_heredoc_strings(self): for dialect in ("clickhouse", "postgres", "redshift"): # Invalid matching tag with self.assertRaises(TokenError): parse_one("SELECT $tag1$invalid heredoc string$tag2$", dialect=dialect) # Unmatched tag with self.assertRaises(TokenError): parse_one("SELECT $tag1$invalid heredoc string", dialect=dialect) # Without tag self.validate_all( "SELECT 'this is a heredoc string'", read={ dialect: "SELECT $$this is a heredoc string$$", }, ) self.validate_all( "SELECT ''", read={ dialect: "SELECT $$$$", }, ) # With tag self.validate_all( "SELECT 'this is also a heredoc string'", read={ dialect: "SELECT $foo$this is also a heredoc string$foo$", }, ) self.validate_all( "SELECT ''", read={ dialect: "SELECT $foo$$foo$", }, ) def test_decode(self): self.validate_identity("DECODE(bin, charset)") self.validate_all( "SELECT DECODE(a, 1, 'one')", write={ "": "SELECT DECODE(a, 1, 'one')", "duckdb": "SELECT CASE WHEN a = 1 THEN 'one' END", "oracle": "SELECT DECODE(a, 1, 'one')", "redshift": "SELECT DECODE(a, 1, 'one')", "snowflake": "SELECT DECODE(a, 1, 'one')", "spark": "SELECT DECODE(a, 1, 'one')", }, ) self.validate_all( "SELECT DECODE(a, 1, 'one', 'default')", write={ "": "SELECT DECODE(a, 1, 'one', 'default')", "duckdb": "SELECT CASE WHEN a = 1 THEN 'one' ELSE 'default' END", "oracle": "SELECT DECODE(a, 1, 'one', 'default')", "redshift": "SELECT DECODE(a, 1, 'one', 'default')", "snowflake": "SELECT DECODE(a, 1, 'one', 'default')", "spark": "SELECT DECODE(a, 1, 'one', 'default')", }, ) self.validate_all( "SELECT DECODE(a, NULL, 'null')", write={ "": "SELECT DECODE(a, NULL, 'null')", "duckdb": "SELECT CASE WHEN a IS NULL THEN 'null' END", "oracle": "SELECT DECODE(a, NULL, 'null')", "redshift": "SELECT DECODE(a, NULL, 'null')", "snowflake": "SELECT DECODE(a, NULL, 'null')", "spark": "SELECT DECODE(a, NULL, 'null')", }, ) self.validate_all( "SELECT DECODE(a, b, c)", write={ "": "SELECT DECODE(a, b, c)", "duckdb": "SELECT CASE WHEN a = b OR (a IS NULL AND b IS NULL) THEN c END", "oracle": "SELECT DECODE(a, b, c)", "redshift": "SELECT DECODE(a, b, c)", "snowflake": "SELECT DECODE(a, b, c)", "spark": "SELECT DECODE(a, b, c)", }, ) self.validate_all( "SELECT DECODE(tbl.col, 'some_string', 'foo')", write={ "": "SELECT DECODE(tbl.col, 'some_string', 'foo')", "duckdb": "SELECT CASE WHEN tbl.col = 'some_string' THEN 'foo' END", "oracle": "SELECT DECODE(tbl.col, 'some_string', 'foo')", "redshift": "SELECT DECODE(tbl.col, 'some_string', 'foo')", "snowflake": "SELECT DECODE(tbl.col, 'some_string', 'foo')", "spark": "SELECT DECODE(tbl.col, 'some_string', 'foo')", }, ) def test_to_binary(self): self.validate_all( "TO_BINARY('1C')", read={ "": "TO_BINARY('1C')", "snowflake": "TO_BINARY('1C')", "starrocks": "TO_BINARY('1C')", "duckdb": "TO_BINARY('1C')", "spark": "TO_BINARY('1C')", "databricks": "TO_BINARY('1C')", }, write={ "snowflake": "TO_BINARY('1C')", "starrocks": "TO_BINARY('1C')", "duckdb": "TO_BINARY('1C')", "spark": "TO_BINARY('1C')", "databricks": "TO_BINARY('1C')", }, ) self.validate_all( "TO_BINARY('1C', 'HEX')", read={ "": "TO_BINARY('1C', 'HEX')", "snowflake": "TO_BINARY('1C', 'HEX')", "starrocks": "TO_BINARY('1C', 'HEX')", "spark": "TO_BINARY('1C', 'HEX')", "databricks": "TO_BINARY('1C', 'HEX')", }, write={ "snowflake": "TO_BINARY('1C', 'HEX')", "starrocks": "TO_BINARY('1C', 'HEX')", "spark": "TO_BINARY('1C', 'HEX')", "databricks": "TO_BINARY('1C', 'HEX')", }, ) def test_if_null(self): self.validate_all( "SELECT IFNULL(1, NULL) FROM foo", write={ "": "SELECT COALESCE(1, NULL) FROM foo", "redshift": "SELECT COALESCE(1, NULL) FROM foo", "postgres": "SELECT COALESCE(1, NULL) FROM foo", "mysql": "SELECT COALESCE(1, NULL) FROM foo", "duckdb": "SELECT COALESCE(1, NULL) FROM foo", "spark": "SELECT COALESCE(1, NULL) FROM foo", "bigquery": "SELECT COALESCE(1, NULL) FROM foo", "presto": "SELECT COALESCE(1, NULL) FROM foo", }, ) def test_is_ascii(self): self.validate_all( "SELECT IS_ASCII(x)", write={ "": "SELECT IS_ASCII(x)", "sqlite": "SELECT (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))", "mysql": "SELECT REGEXP_LIKE(x, '^[[:ascii:]]*$')", "postgres": "SELECT (x ~ '^[[:ascii:]]*$')", "tsql": "SELECT (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)", "oracle": "SELECT NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)", }, ) def test_nvl2(self): self.validate_all( "SELECT NVL2(a, b, c)", write={ "": "SELECT NVL2(a, b, c)", "bigquery": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "clickhouse": "SELECT CASE WHEN NOT (a IS NULL) THEN b ELSE c END", "databricks": "SELECT NVL2(a, b, c)", "doris": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "dremio": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "drill": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "duckdb": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "hive": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "mysql": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "oracle": "SELECT NVL2(a, b, c)", "postgres": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "presto": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "redshift": "SELECT NVL2(a, b, c)", "snowflake": "SELECT NVL2(a, b, c)", "spark": "SELECT NVL2(a, b, c)", "spark2": "SELECT NVL2(a, b, c)", "sqlite": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "starrocks": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "teradata": "SELECT NVL2(a, b, c)", "trino": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", "tsql": "SELECT CASE WHEN NOT a IS NULL THEN b ELSE c END", }, ) self.validate_all( "SELECT NVL2(a, b)", write={ "": "SELECT NVL2(a, b)", "bigquery": "SELECT CASE WHEN NOT a IS NULL THEN b END", "clickhouse": "SELECT CASE WHEN NOT (a IS NULL) THEN b END", "databricks": "SELECT NVL2(a, b)", "doris": "SELECT CASE WHEN NOT a IS NULL THEN b END", "dremio": "SELECT CASE WHEN NOT a IS NULL THEN b END", "drill": "SELECT CASE WHEN NOT a IS NULL THEN b END", "duckdb": "SELECT CASE WHEN NOT a IS NULL THEN b END", "hive": "SELECT CASE WHEN NOT a IS NULL THEN b END", "mysql": "SELECT CASE WHEN NOT a IS NULL THEN b END", "oracle": "SELECT NVL2(a, b)", "postgres": "SELECT CASE WHEN NOT a IS NULL THEN b END", "presto": "SELECT CASE WHEN NOT a IS NULL THEN b END", "redshift": "SELECT NVL2(a, b)", "snowflake": "SELECT NVL2(a, b)", "spark": "SELECT NVL2(a, b)", "spark2": "SELECT NVL2(a, b)", "sqlite": "SELECT CASE WHEN NOT a IS NULL THEN b END", "starrocks": "SELECT CASE WHEN NOT a IS NULL THEN b END", "teradata": "SELECT NVL2(a, b)", "trino": "SELECT CASE WHEN NOT a IS NULL THEN b END", "tsql": "SELECT CASE WHEN NOT a IS NULL THEN b END", }, ) def test_time(self): self.validate_all( "STR_TO_TIME(x, '%Y-%m-%dT%H:%M:%S')", read={ "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S')", }, write={ "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%T')", "duckdb": "STRPTIME(x, '%Y-%m-%dT%H:%M:%S')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS TIMESTAMP)", "presto": "DATE_PARSE(x, '%Y-%m-%dT%T')", "drill": "TO_TIMESTAMP(x, 'yyyy-MM-dd''T''HH:mm:ss')", "redshift": "TO_TIMESTAMP(x, 'YYYY-MM-DDTHH24:MI:SS')", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')", }, ) self.validate_all( "STR_TO_TIME('2020-01-01', '%Y-%m-%d')", write={ "drill": "TO_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')", "duckdb": "STRPTIME('2020-01-01', '%Y-%m-%d')", "hive": "CAST('2020-01-01' AS TIMESTAMP)", "oracle": "TO_TIMESTAMP('2020-01-01', 'YYYY-MM-DD')", "postgres": "TO_TIMESTAMP('2020-01-01', 'YYYY-MM-DD')", "presto": "DATE_PARSE('2020-01-01', '%Y-%m-%d')", "redshift": "TO_TIMESTAMP('2020-01-01', 'YYYY-MM-DD')", "spark": "TO_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')", }, ) self.validate_all( "STR_TO_TIME(x, '%y')", write={ "drill": "TO_TIMESTAMP(x, 'yy')", "duckdb": "STRPTIME(x, '%y')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yy')) AS TIMESTAMP)", "materialize": "TO_TIMESTAMP(x, 'YY')", "presto": "DATE_PARSE(x, '%y')", "oracle": "TO_TIMESTAMP(x, 'YY')", "postgres": "TO_TIMESTAMP(x, 'YY')", "redshift": "TO_TIMESTAMP(x, 'YY')", "spark": "TO_TIMESTAMP(x, 'yy')", }, ) self.validate_all( "STR_TO_UNIX('2020-01-01', '%Y-%m-%d')", write={ "duckdb": "EPOCH(STRPTIME('2020-01-01', '%Y-%m-%d'))", "hive": "UNIX_TIMESTAMP('2020-01-01', 'yyyy-MM-dd')", "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('2020-01-01' AS VARCHAR), '%Y-%m-%d')), PARSE_DATETIME(DATE_FORMAT(CAST('2020-01-01' AS TIMESTAMP), '%Y-%m-%d'), 'yyyy-MM-dd')))", "starrocks": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')", "doris": "UNIX_TIMESTAMP('2020-01-01', '%Y-%m-%d')", }, ) self.validate_all( "TIME_STR_TO_DATE('2020-01-01')", write={ "drill": "CAST('2020-01-01' AS DATE)", "duckdb": "CAST('2020-01-01' AS DATE)", "hive": "TO_DATE('2020-01-01')", "presto": "CAST('2020-01-01' AS TIMESTAMP)", "starrocks": "TO_DATE('2020-01-01')", "doris": "TO_DATE('2020-01-01')", }, ) self.validate_all( "TIME_STR_TO_TIME('2020-01-01')", write={ "bigquery": "CAST('2020-01-01' AS DATETIME)", "databricks": "CAST('2020-01-01' AS TIMESTAMP)", "duckdb": "CAST('2020-01-01' AS TIMESTAMP)", "tsql": "CAST('2020-01-01' AS DATETIME2)", "mysql": "CAST('2020-01-01' AS DATETIME)", "postgres": "CAST('2020-01-01' AS TIMESTAMP)", "redshift": "CAST('2020-01-01' AS TIMESTAMP)", "snowflake": "CAST('2020-01-01' AS TIMESTAMP)", "spark": "CAST('2020-01-01' AS TIMESTAMP)", "trino": "CAST('2020-01-01' AS TIMESTAMP)", "clickhouse": "CAST('2020-01-01' AS DateTime64(6))", "drill": "CAST('2020-01-01' AS TIMESTAMP)", "hive": "CAST('2020-01-01' AS TIMESTAMP)", "presto": "CAST('2020-01-01' AS TIMESTAMP)", "sqlite": "'2020-01-01'", "doris": "CAST('2020-01-01' AS DATETIME)", }, ) self.validate_all( "TIME_STR_TO_TIME('2020-01-01 12:13:14.123456+00:00')", write={ "mysql": "CAST('2020-01-01 12:13:14.123456+00:00' AS DATETIME(6))", "trino": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP(6))", "presto": "CAST('2020-01-01 12:13:14.123456+00:00' AS TIMESTAMP)", }, ) self.validate_all( "TIME_STR_TO_TIME('2020-01-01 12:13:14.123-08:00', 'America/Los_Angeles')", write={ "mysql": "TIMESTAMP('2020-01-01 12:13:14.123-08:00')", "trino": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP(3) WITH TIME ZONE)", "presto": "CAST('2020-01-01 12:13:14.123-08:00' AS TIMESTAMP WITH TIME ZONE)", }, ) self.validate_all( "TIME_STR_TO_TIME('2020-01-01 12:13:14-08:00', 'America/Los_Angeles')", write={ "bigquery": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", "databricks": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", "duckdb": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", "tsql": "CAST('2020-01-01 12:13:14-08:00' AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", "mysql": "TIMESTAMP('2020-01-01 12:13:14-08:00')", "postgres": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", "redshift": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", "snowflake": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMPTZ)", "spark": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", "trino": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", "clickhouse": "CAST('2020-01-01 12:13:14' AS DateTime64(6, 'America/Los_Angeles'))", "drill": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", "hive": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP)", "presto": "CAST('2020-01-01 12:13:14-08:00' AS TIMESTAMP WITH TIME ZONE)", "sqlite": "'2020-01-01 12:13:14-08:00'", "doris": "CAST('2020-01-01 12:13:14-08:00' AS DATETIME)", }, ) self.validate_all( "TIME_STR_TO_TIME(col, 'America/Los_Angeles')", write={ "bigquery": "CAST(col AS TIMESTAMP)", "databricks": "CAST(col AS TIMESTAMP)", "duckdb": "CAST(col AS TIMESTAMPTZ)", "tsql": "CAST(col AS DATETIMEOFFSET) AT TIME ZONE 'UTC'", "mysql": "TIMESTAMP(col)", "postgres": "CAST(col AS TIMESTAMPTZ)", "redshift": "CAST(col AS TIMESTAMP WITH TIME ZONE)", "snowflake": "CAST(col AS TIMESTAMPTZ)", "spark": "CAST(col AS TIMESTAMP)", "trino": "CAST(col AS TIMESTAMP WITH TIME ZONE)", "clickhouse": "CAST(col AS DateTime64(6, 'America/Los_Angeles'))", "drill": "CAST(col AS TIMESTAMP)", "hive": "CAST(col AS TIMESTAMP)", "presto": "CAST(col AS TIMESTAMP WITH TIME ZONE)", "sqlite": "col", "doris": "CAST(col AS DATETIME)", }, ) self.validate_all( "TIME_STR_TO_UNIX('2020-01-01')", write={ "duckdb": "EPOCH(CAST('2020-01-01' AS TIMESTAMP))", "hive": "UNIX_TIMESTAMP('2020-01-01')", "mysql": "UNIX_TIMESTAMP('2020-01-01')", "presto": "TO_UNIXTIME(DATE_PARSE('2020-01-01', '%Y-%m-%d %T'))", "doris": "UNIX_TIMESTAMP('2020-01-01')", }, ) self.validate_all( "TIME_TO_STR(x, '%Y-%m-%d')", write={ "bigquery": "FORMAT_DATE('%F', x)", "drill": "TO_CHAR(x, 'yyyy-MM-dd')", "duckdb": "STRFTIME(x, '%Y-%m-%d')", "hive": "DATE_FORMAT(x, 'yyyy-MM-dd')", "materialize": "TO_CHAR(x, 'YYYY-MM-DD')", "oracle": "TO_CHAR(x, 'YYYY-MM-DD')", "postgres": "TO_CHAR(x, 'YYYY-MM-DD')", "presto": "DATE_FORMAT(x, '%Y-%m-%d')", "redshift": "TO_CHAR(x, 'YYYY-MM-DD')", "doris": "DATE_FORMAT(x, '%Y-%m-%d')", }, ) self.validate_all( "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')", write={ "redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')", "tsql": "FORMAT(a, 'yyyy-MM-dd HH:mm:ss.ffffff')", }, ) self.validate_all( "TIME_TO_TIME_STR(x)", write={ "drill": "CAST(x AS VARCHAR)", "duckdb": "CAST(x AS TEXT)", "hive": "CAST(x AS STRING)", "presto": "CAST(x AS VARCHAR)", "redshift": "CAST(x AS VARCHAR(MAX))", "doris": "CAST(x AS STRING)", }, ) self.validate_all( "TIME_TO_UNIX(x)", write={ "drill": "UNIX_TIMESTAMP(x)", "duckdb": "EPOCH(x)", "hive": "UNIX_TIMESTAMP(x)", "presto": "TO_UNIXTIME(x)", "doris": "UNIX_TIMESTAMP(x)", }, ) self.validate_all( "TS_OR_DS_TO_DATE_STR(x)", write={ "duckdb": "SUBSTRING(CAST(x AS TEXT), 1, 10)", "hive": "SUBSTRING(CAST(x AS STRING), 1, 10)", "presto": "SUBSTRING(CAST(x AS VARCHAR), 1, 10)", "doris": "SUBSTRING(CAST(x AS STRING), 1, 10)", }, ) self.validate_all( "TS_OR_DS_TO_DATE(x)", write={ "bigquery": "CAST(x AS DATE)", "duckdb": "CAST(x AS DATE)", "hive": "TO_DATE(x)", "materialize": "CAST(x AS DATE)", "postgres": "CAST(x AS DATE)", "presto": "CAST(CAST(x AS TIMESTAMP) AS DATE)", "snowflake": "TO_DATE(x)", "doris": "TO_DATE(x)", "mysql": "DATE(x)", }, ) self.validate_all( "TS_OR_DS_TO_DATE(x, '%-d')", write={ "duckdb": "CAST(STRPTIME(x, '%-d') AS DATE)", "hive": "TO_DATE(x, 'd')", "presto": "CAST(DATE_PARSE(x, '%e') AS DATE)", "spark": "TO_DATE(x, 'd')", }, ) self.validate_all( "UNIX_TO_STR(x, y)", write={ "duckdb": "STRFTIME(TO_TIMESTAMP(x), y)", "hive": "FROM_UNIXTIME(x, y)", "presto": "DATE_FORMAT(FROM_UNIXTIME(x), y)", "starrocks": "FROM_UNIXTIME(x, y)", "doris": "FROM_UNIXTIME(x, y)", }, ) self.validate_all( "UNIX_TO_TIME(x)", write={ "duckdb": "TO_TIMESTAMP(x)", "hive": "FROM_UNIXTIME(x)", "oracle": "TO_DATE('1970-01-01', 'YYYY-MM-DD') + (x / 86400)", "materialize": "TO_TIMESTAMP(x)", "postgres": "TO_TIMESTAMP(x)", "presto": "FROM_UNIXTIME(x)", "starrocks": "FROM_UNIXTIME(x)", "doris": "FROM_UNIXTIME(x)", "exasol": "FROM_POSIX_TIME(x)", }, ) self.validate_all( "UNIX_TO_TIME_STR(x)", write={ "duckdb": "CAST(TO_TIMESTAMP(x) AS TEXT)", "hive": "FROM_UNIXTIME(x)", "presto": "CAST(FROM_UNIXTIME(x) AS VARCHAR)", }, ) self.validate_all( "DATE_TO_DATE_STR(x)", write={ "drill": "CAST(x AS VARCHAR)", "duckdb": "CAST(x AS TEXT)", "hive": "CAST(x AS STRING)", "presto": "CAST(x AS VARCHAR)", }, ) self.validate_all( "DATE_TO_DI(x)", write={ "drill": "CAST(TO_DATE(x, 'yyyyMMdd') AS INT)", "duckdb": "CAST(STRFTIME(x, '%Y%m%d') AS INT)", "hive": "CAST(DATE_FORMAT(x, 'yyyyMMdd') AS INT)", "presto": "CAST(DATE_FORMAT(x, '%Y%m%d') AS INT)", }, ) self.validate_all( "DI_TO_DATE(x)", write={ "drill": "TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')", "duckdb": "CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)", "hive": "TO_DATE(CAST(x AS STRING), 'yyyyMMdd')", "presto": "CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)", }, ) self.validate_all( "TS_OR_DI_TO_DI(x)", write={ "duckdb": "CAST(SUBSTR(REPLACE(CAST(x AS TEXT), '-', ''), 1, 8) AS INT)", "hive": "CAST(SUBSTR(REPLACE(CAST(x AS STRING), '-', ''), 1, 8) AS INT)", "presto": "CAST(SUBSTR(REPLACE(CAST(x AS VARCHAR), '-', ''), 1, 8) AS INT)", "spark": "CAST(SUBSTR(REPLACE(CAST(x AS STRING), '-', ''), 1, 8) AS INT)", }, ) self.validate_all( "DATE_ADD(x, 1, 'DAY')", read={ "snowflake": "DATEADD('DAY', 1, x)", "dremio": "DATE_ADD(x, 1)", }, write={ "bigquery": "DATE_ADD(x, INTERVAL 1 DAY)", "drill": "DATE_ADD(x, INTERVAL 1 DAY)", "duckdb": "x + INTERVAL 1 DAY", "hive": "DATE_ADD(x, 1)", "materialize": "x + INTERVAL '1 DAY'", "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", "postgres": "x + INTERVAL '1 DAY'", "presto": "DATE_ADD('DAY', 1, x)", "snowflake": "DATEADD(DAY, 1, x)", "spark": "DATE_ADD(x, 1)", "sqlite": "DATE(x, '1 DAY')", "starrocks": "DATE_ADD(x, INTERVAL 1 DAY)", "tsql": "DATEADD(DAY, 1, x)", "doris": "DATE_ADD(x, INTERVAL 1 DAY)", "dremio": "DATE_ADD(x, 1)", }, ) self.validate_all( "DATE_ADD(x, 1)", write={ "bigquery": "DATE_ADD(x, INTERVAL 1 DAY)", "drill": "DATE_ADD(x, INTERVAL 1 DAY)", "duckdb": "x + INTERVAL 1 DAY", "hive": "DATE_ADD(x, 1)", "mysql": "DATE_ADD(x, INTERVAL 1 DAY)", "presto": "DATE_ADD('DAY', 1, x)", "spark": "DATE_ADD(x, 1)", "starrocks": "DATE_ADD(x, INTERVAL 1 DAY)", "doris": "DATE_ADD(x, INTERVAL 1 DAY)", "dremio": "DATE_ADD(x, 1)", }, ) self.validate_all( "DATE_TRUNC('DAY', x)", read={ "bigquery": "DATE_TRUNC(x, day)", "spark": "TRUNC(x, 'day')", }, write={ "bigquery": "DATE_TRUNC(x, DAY)", "duckdb": "DATE_TRUNC('DAY', x)", "mysql": "DATE(x)", "presto": "DATE_TRUNC('DAY', x)", "materialize": "DATE_TRUNC('DAY', x)", "postgres": "DATE_TRUNC('DAY', x)", "snowflake": "DATE_TRUNC('DAY', x)", "starrocks": "DATE_TRUNC('DAY', x)", "spark": "TRUNC(x, 'DAY')", "doris": "DATE_TRUNC(x, 'DAY')", }, ) self.validate_all( "TIMESTAMP_TRUNC(x, DAY)", read={ "bigquery": "TIMESTAMP_TRUNC(x, day)", "duckdb": "DATE_TRUNC('day', x)", "materialize": "DATE_TRUNC('day', x)", "presto": "DATE_TRUNC('day', x)", "postgres": "DATE_TRUNC('day', x)", "snowflake": "DATE_TRUNC('day', x)", "starrocks": "DATE_TRUNC('day', x)", "spark": "DATE_TRUNC('day', x)", "doris": "DATE_TRUNC('day', x)", }, ) self.validate_all( "DATE_TRUNC('DAY', CAST(x AS DATE))", read={ "presto": "DATE_TRUNC('DAY', x::DATE)", "snowflake": "DATE_TRUNC('DAY', x::DATE)", }, ) self.validate_all( "TIMESTAMP_TRUNC(CAST(x AS DATE), DAY)", read={"postgres": "DATE_TRUNC('day', x::DATE)"}, ) self.validate_all( "TIMESTAMP_TRUNC(CAST(x AS DATE), DAY)", read={"starrocks": "DATE_TRUNC('day', x::DATE)"}, ) self.validate_all( "DATE_TRUNC('week', x)", write={ "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')", }, ) self.validate_all( "DATE_TRUNC('month', x)", write={ "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')", }, ) self.validate_all( "DATE_TRUNC('quarter', x)", write={ "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')", }, ) self.validate_all( "DATE_TRUNC('year', x)", write={ "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')", }, ) self.validate_all( "DATE_TRUNC('millennium', x)", write={ "mysql": UnsupportedError, }, ) self.validate_all( "DATE_TRUNC('YEAR', x)", read={ "bigquery": "DATE_TRUNC(x, year)", "spark": "TRUNC(x, 'year')", }, write={ "bigquery": "DATE_TRUNC(x, YEAR)", "materialize": "DATE_TRUNC('YEAR', x)", "mysql": "STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')", "postgres": "DATE_TRUNC('YEAR', x)", "snowflake": "DATE_TRUNC('YEAR', x)", "starrocks": "DATE_TRUNC('YEAR', x)", "spark": "TRUNC(x, 'YEAR')", "doris": "DATE_TRUNC(x, 'YEAR')", }, ) self.validate_all( "TIMESTAMP_TRUNC(x, YEAR)", read={ "bigquery": "TIMESTAMP_TRUNC(x, year)", "materialize": "DATE_TRUNC('YEAR', x)", "postgres": "DATE_TRUNC(year, x)", "spark": "DATE_TRUNC('year', x)", "snowflake": "DATE_TRUNC(year, x)", "starrocks": "DATE_TRUNC('year', x)", }, write={ "bigquery": "TIMESTAMP_TRUNC(x, YEAR)", "spark": "DATE_TRUNC('YEAR', x)", "doris": "DATE_TRUNC(x, 'YEAR')", }, ) self.validate_all( "DATE_TRUNC('millennium', x)", write={ "mysql": UnsupportedError, }, ) self.validate_all( "NEXT_DAY(x, y)", write={ "snowflake": "NEXT_DAY(x, y)", "databricks": "NEXT_DAY(x, y)", "oracle": "NEXT_DAY(x, y)", "redshift": "NEXT_DAY(x, y)", }, ) self.validate_all( "STR_TO_DATE(x, '%Y-%m-%dT%H:%M:%S')", write={ "drill": "TO_DATE(x, 'yyyy-MM-dd''T''HH:mm:ss')", "mysql": "STR_TO_DATE(x, '%Y-%m-%dT%T')", "starrocks": "STR_TO_DATE(x, '%Y-%m-%dT%T')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yyyy-MM-ddTHH:mm:ss')) AS DATE)", "presto": "CAST(DATE_PARSE(x, '%Y-%m-%dT%T') AS DATE)", "spark": "TO_DATE(x, 'yyyy-MM-ddTHH:mm:ss')", "doris": "STR_TO_DATE(x, '%Y-%m-%dT%T')", }, ) self.validate_all( "STR_TO_DATE(x, '%Y-%m-%d')", write={ "drill": "CAST(x AS DATE)", "mysql": "STR_TO_DATE(x, '%Y-%m-%d')", "starrocks": "STR_TO_DATE(x, '%Y-%m-%d')", "hive": "CAST(x AS DATE)", "presto": "CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)", "spark": "TO_DATE(x)", "doris": "STR_TO_DATE(x, '%Y-%m-%d')", }, ) self.validate_all( "DATE_STR_TO_DATE(x)", write={ "drill": "CAST(x AS DATE)", "duckdb": "CAST(x AS DATE)", "hive": "CAST(x AS DATE)", "presto": "CAST(x AS DATE)", "spark": "CAST(x AS DATE)", "sqlite": "x", "tsql": "CAST(x AS DATE)", }, ) self.validate_all( "TS_OR_DS_ADD('2021-02-01', 1, 'DAY')", write={ "drill": "DATE_ADD(CAST('2021-02-01' AS DATE), INTERVAL 1 DAY)", "duckdb": "CAST('2021-02-01' AS DATE) + INTERVAL 1 DAY", "hive": "DATE_ADD('2021-02-01', 1)", "presto": "DATE_ADD('DAY', 1, CAST(CAST('2021-02-01' AS TIMESTAMP) AS DATE))", "spark": "DATE_ADD('2021-02-01', 1)", "mysql": "DATE_ADD('2021-02-01', INTERVAL 1 DAY)", }, ) self.validate_all( "TS_OR_DS_ADD(x, 1, 'DAY')", write={ "presto": "DATE_ADD('DAY', 1, CAST(CAST(x AS TIMESTAMP) AS DATE))", "hive": "DATE_ADD(x, 1)", }, ) self.validate_all( "TS_OR_DS_ADD(CURRENT_DATE, 1, 'DAY')", write={ "presto": "DATE_ADD('DAY', 1, CAST(CAST(CURRENT_DATE AS TIMESTAMP) AS DATE))", "hive": "DATE_ADD(CURRENT_DATE, 1)", }, ) self.validate_all( "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", write={ "drill": "DATE_ADD(CAST('2020-01-01' AS DATE), INTERVAL 1 DAY)", "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY", "hive": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", "presto": "DATE_ADD('DAY', 1, CAST('2020-01-01' AS DATE))", "spark": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", "dremio": "DATE_ADD(CAST('2020-01-01' AS DATE), 1)", }, ) self.validate_all( "TIMESTAMP '2022-01-01'", write={ "drill": "CAST('2022-01-01' AS TIMESTAMP)", "mysql": "CAST('2022-01-01' AS DATETIME)", "starrocks": "CAST('2022-01-01' AS DATETIME)", "hive": "CAST('2022-01-01' AS TIMESTAMP)", "doris": "CAST('2022-01-01' AS DATETIME)", }, ) self.validate_all( "TIMESTAMP('2022-01-01')", write={ "mysql": "TIMESTAMP('2022-01-01')", "starrocks": "TIMESTAMP('2022-01-01')", "hive": "TIMESTAMP('2022-01-01')", "doris": "TIMESTAMP('2022-01-01')", }, ) self.validate_all( "TIMESTAMP_TRUNC(x, DAY, 'UTC')", write={ "": "TIMESTAMP_TRUNC(x, DAY, 'UTC')", "duckdb": "DATE_TRUNC('DAY', x AT TIME ZONE 'UTC') AT TIME ZONE 'UTC'", "materialize": "DATE_TRUNC('DAY', x, 'UTC')", "presto": "DATE_TRUNC('DAY', x)", "postgres": "DATE_TRUNC('DAY', x, 'UTC')", "snowflake": "DATE_TRUNC('DAY', x)", "databricks": "DATE_TRUNC('DAY', x)", "clickhouse": "dateTrunc('DAY', x, 'UTC')", }, ) for unit in ("DAY", "MONTH", "YEAR"): self.validate_all( f"{unit}(x)", read={ dialect: f"{unit}(x)" for dialect in ( "bigquery", "drill", "duckdb", "presto", ) }, write={ dialect: f"{unit}(x)" for dialect in ( "bigquery", "drill", "duckdb", "mysql", "presto", "hive", "spark", ) }, ) self.validate_all( f"{unit}(TS_OR_DS_TO_DATE(x))", write={ dialect: f"{unit}(x)" for dialect in ( "mysql", "doris", "starrocks", ) }, ) self.validate_all( f"{unit}(CAST(x AS DATE))", read={ dialect: f"{unit}(x)" for dialect in ( "mysql", "doris", "starrocks", ) }, ) def test_array(self): self.validate_all( "ARRAY(0, 1, 2)", write={ "bigquery": "[0, 1, 2]", "duckdb": "[0, 1, 2]", "presto": "ARRAY[0, 1, 2]", "spark": "ARRAY(0, 1, 2)", }, ) self.validate_all( "ARRAY_SIZE(x)", write={ "bigquery": "ARRAY_LENGTH(x)", "duckdb": "ARRAY_LENGTH(x)", "drill": "REPEATED_COUNT(x)", "presto": "CARDINALITY(x)", "spark": "SIZE(x)", }, ) self.validate_all( "ARRAY_SUM(ARRAY(1, 2))", write={ "trino": "REDUCE(ARRAY[1, 2], 0, (acc, x) -> acc + x, acc -> acc)", "duckdb": "LIST_SUM([1, 2])", "hive": "ARRAY_SUM(ARRAY(1, 2))", "presto": "ARRAY_SUM(ARRAY[1, 2])", "spark": "AGGREGATE(ARRAY(1, 2), 0, (acc, x) -> acc + x, acc -> acc)", }, ) self.validate_all( "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", write={ "trino": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", "duckdb": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", "hive": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", "spark": "AGGREGATE(x, 0, (acc, x) -> acc + x, acc -> acc)", "presto": "REDUCE(x, 0, (acc, x) -> acc + x, acc -> acc)", }, ) self.validate_all( "ARRAY_INTERSECT(x, y)", read={ "hive": "ARRAY_INTERSECT(x, y)", "spark2": "ARRAY_INTERSECT(x, y)", "spark": "ARRAY_INTERSECT(x, y)", "databricks": "ARRAY_INTERSECT(x, y)", "presto": "ARRAY_INTERSECT(x, y)", "trino": "ARRAY_INTERSECT(x, y)", "snowflake": "ARRAY_INTERSECTION(x, y)", "starrocks": "ARRAY_INTERSECT(x, y)", "duckdb": "ARRAY_INTERSECT(x, y)", }, write={ "hive": "ARRAY_INTERSECT(x, y)", "spark2": "ARRAY_INTERSECT(x, y)", "spark": "ARRAY_INTERSECT(x, y)", "databricks": "ARRAY_INTERSECT(x, y)", "presto": "ARRAY_INTERSECT(x, y)", "trino": "ARRAY_INTERSECT(x, y)", "snowflake": "ARRAY_INTERSECTION(x, y)", "starrocks": "ARRAY_INTERSECT(x, y)", "duckdb": "ARRAY_INTERSECT(x, y)", }, ) self.validate_identity("SELECT ARRAY_INTERSECT(x, y, z)") self.validate_all( "ARRAY_REVERSE(x)", read={ "clickhouse": "arrayReverse(x)", "bigquery": "ARRAY_REVERSE(x)", "snowflake": "ARRAY_REVERSE(x)", "duckdb": "ARRAY_REVERSE(x)", }, write={ "clickhouse": "arrayReverse(x)", "bigquery": "ARRAY_REVERSE(x)", "snowflake": "ARRAY_REVERSE(x)", "duckdb": "ARRAY_REVERSE(x)", }, ) self.validate_all( "ARRAY_SLICE(x, 1, 3)", read={ "clickhouse": "arraySlice(x, 1, 3)", "bigquery": "ARRAY_SLICE(x, 1, 3)", "snowflake": "ARRAY_SLICE(x, 1, 3)", "duckdb": "ARRAY_SLICE(x, 1, 3)", "spark2": "SLICE(x, 1, 3)", "spark": "SLICE(x, 1, 3)", "databricks": "SLICE(x, 1, 3)", "presto": "SLICE(x, 1, 3)", "trino": "SLICE(x, 1, 3)", }, write={ "clickhouse": "arraySlice(x, 1, 3)", "bigquery": "ARRAY_SLICE(x, 1, 3)", "snowflake": "ARRAY_SLICE(x, 1, 3)", "duckdb": "ARRAY_SLICE(x, 1, 3)", "spark2": "SLICE(x, 1, 3)", "spark": "SLICE(x, 1, 3)", "databricks": "SLICE(x, 1, 3)", "presto": "SLICE(x, 1, 3)", "trino": "SLICE(x, 1, 3)", }, ) self.validate_all( "SORT_ARRAY(x)", write={ "duckdb": "LIST_SORT(x)", "hive": "SORT_ARRAY(x)", "presto": "ARRAY_SORT(x)", "snowflake": "ARRAY_SORT(x)", "spark": "SORT_ARRAY(x)", }, ) # Test basic syntax transpilation for ARRAY_PREPEND self.validate_all( "ARRAY_PREPEND(arr, x)", read={ "duckdb": "LIST_PREPEND(x, arr)", "postgres": "ARRAY_PREPEND(x, arr)", }, write={ "duckdb": "LIST_PREPEND(x, arr)", "postgres": "ARRAY_PREPEND(x, arr)", }, ) # Test basic syntax transpilation for array creation semantics self.validate_all( "ARRAY_APPEND(arr, x)", read={ "duckdb": "LIST_APPEND(arr, x)", "postgres": "ARRAY_APPEND(arr, x)", }, write={ "duckdb": "LIST_APPEND(arr, x)", "postgres": "ARRAY_APPEND(arr, x)", }, ) # Test NULL propagation semantics: NULL-propagating dialects → array-creating dialects for source_dialect in ("snowflake", "databricks", "spark"): with self.subTest(f"NULL propagation: {source_dialect} → DuckDB/PostgreSQL"): expr = parse_one("ARRAY_APPEND(arr, x)", dialect=source_dialect) self.assertEqual( expr.sql("duckdb"), "CASE WHEN arr IS NULL THEN NULL ELSE LIST_APPEND(arr, x) END", ) self.assertEqual( expr.sql("postgres"), "CASE WHEN arr IS NULL THEN NULL ELSE ARRAY_APPEND(arr, x) END", ) # Test array creation semantics: array-creating dialects → NULL-propagating dialects for source_dialect, source_sql in ( ("duckdb", "LIST_APPEND(arr, x)"), ("postgres", "ARRAY_APPEND(arr, x)"), ): with self.subTest(f"Array creation: {source_dialect} → Snowflake/Databricks/Spark"): expr = parse_one(source_sql, dialect=source_dialect) self.assertEqual( expr.sql("snowflake"), "ARRAY_APPEND(COALESCE(arr, []), x)", ) self.assertEqual( expr.sql("databricks"), "ARRAY_APPEND(COALESCE(arr, ARRAY()), x)", ) self.assertEqual( expr.sql("spark"), "ARRAY_APPEND(COALESCE(arr, ARRAY()), x)", ) # Test identity transpilation (should NOT add wrappers) for dialect, sql in ( ("duckdb", "LIST_APPEND(arr, x)"), ("postgres", "ARRAY_APPEND(arr, x)"), ("snowflake", "ARRAY_APPEND(arr, x)"), ("databricks", "ARRAY_APPEND(arr, x)"), ("spark", "ARRAY_APPEND(arr, x)"), ): with self.subTest(f"Identity: {dialect} → {dialect}"): expr = parse_one(sql, dialect=dialect) self.assertEqual(expr.sql(dialect), sql) # Test NULL propagation semantics for ARRAY_PREPEND: NULL-propagating dialects → array-creating dialects for source_dialect in ("snowflake", "databricks", "spark"): with self.subTest( f"ARRAY_PREPEND NULL propagation: {source_dialect} → DuckDB/PostgreSQL" ): expr = parse_one("ARRAY_PREPEND(arr, x)", dialect=source_dialect) self.assertEqual( expr.sql("duckdb"), "CASE WHEN arr IS NULL THEN NULL ELSE LIST_PREPEND(x, arr) END", ) self.assertEqual( expr.sql("postgres"), "CASE WHEN arr IS NULL THEN NULL ELSE ARRAY_PREPEND(x, arr) END", ) # Test ARRAY_PREPEND array creation semantics: array-creating dialects → NULL-propagating dialects for source_dialect, source_sql in ( ("duckdb", "LIST_PREPEND(x, arr)"), ("postgres", "ARRAY_PREPEND(x, arr)"), ): with self.subTest( f"ARRAY_PREPEND array creation: {source_dialect} → Snowflake/Databricks/Spark" ): expr = parse_one(source_sql, dialect=source_dialect) self.assertEqual( expr.sql("snowflake"), "ARRAY_PREPEND(COALESCE(arr, []), x)", ) self.assertEqual( expr.sql("databricks"), "ARRAY_PREPEND(COALESCE(arr, ARRAY()), x)", ) self.assertEqual( expr.sql("spark"), "ARRAY_PREPEND(COALESCE(arr, ARRAY()), x)", ) # Test ARRAY_PREPEND identity transpilation (should NOT add wrappers) for dialect, sql in ( ("duckdb", "LIST_PREPEND(x, arr)"), ("postgres", "ARRAY_PREPEND(x, arr)"), ("snowflake", "ARRAY_PREPEND(arr, x)"), ("databricks", "ARRAY_PREPEND(arr, x)"), ("spark", "ARRAY_PREPEND(arr, x)"), ): with self.subTest(f"ARRAY_PREPEND identity: {dialect} → {dialect}"): expr = parse_one(sql, dialect=dialect) self.assertEqual(expr.sql(dialect), sql) # Test NULL propagation semantics for ARRAY_CAT: NULL-propagating dialects → NULL-skipping dialects for source_dialect, source_sql in ( ("snowflake", "ARRAY_CAT(arr1, arr2)"), ("redshift", "ARRAY_CONCAT(arr1, arr2)"), ): with self.subTest(f"ARRAY_CAT NULL propagation: {source_dialect} → DuckDB/PostgreSQL"): expr = parse_one(source_sql, dialect=source_dialect) self.assertEqual( expr.sql("duckdb"), "CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL ELSE LIST_CONCAT(arr1, arr2) END", ) self.assertEqual( expr.sql("postgres"), "CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL ELSE ARRAY_CAT(arr1, arr2) END", ) # Test NULL skipping semantics: NULL-skipping dialects → NULL-propagating dialects for source_dialect, source_sql in ( ("duckdb", "LIST_CONCAT(arr1, arr2)"), ("postgres", "ARRAY_CAT(arr1, arr2)"), ): with self.subTest(f"ARRAY_CAT NULL skipping: {source_dialect} → Snowflake/Redshift"): expr = parse_one(source_sql, dialect=source_dialect) self.assertEqual( expr.sql("snowflake"), "ARRAY_CAT(COALESCE(arr1, []), COALESCE(arr2, []))", ) self.assertEqual( expr.sql("redshift"), "ARRAY_CONCAT(COALESCE(arr1, ARRAY()), COALESCE(arr2, ARRAY()))", ) # Test ARRAY_CAT identity transpilation (should NOT add wrappers) for dialect, sql in ( ("duckdb", "LIST_CONCAT(arr1, arr2)"), ("postgres", "ARRAY_CAT(arr1, arr2)"), ("snowflake", "ARRAY_CAT(arr1, arr2)"), ("redshift", "ARRAY_CONCAT(arr1, arr2)"), ): with self.subTest(f"ARRAY_CAT identity: {dialect} → {dialect}"): expr = parse_one(sql, dialect=dialect) self.assertEqual(expr.sql(dialect), sql) # Test ARRAY_CAT with variadic arguments (3+ arrays) # Verify that ALL arguments are checked in NULL condition for source_dialect, source_sql, expected in ( ( "snowflake", "ARRAY_CAT(arr1, arr2, arr3)", "CASE WHEN arr1 IS NULL OR arr2 IS NULL OR arr3 IS NULL THEN NULL ELSE LIST_CONCAT(arr1, arr2, arr3) END", ), ( "redshift", "ARRAY_CONCAT(arr1, arr2, arr3)", "CASE WHEN arr1 IS NULL OR arr2 IS NULL OR arr3 IS NULL THEN NULL ELSE LIST_CONCAT(arr1, arr2, arr3) END", ), ): with self.subTest(f"ARRAY_CAT variadic NULL propagation: {source_dialect} → DuckDB"): expr = parse_one(source_sql, dialect=source_dialect) self.assertEqual(expr.sql("duckdb"), expected) # Test variadic COALESCE wrapping: ALL args should be wrapped for source_dialect, source_sql, expected_snowflake, expected_redshift in ( ( "duckdb", "LIST_CONCAT(arr1, arr2, arr3)", "ARRAY_CAT(COALESCE(arr1, []), ARRAY_CAT(COALESCE(arr2, []), COALESCE(arr3, [])))", "ARRAY_CONCAT(COALESCE(arr1, ARRAY()), ARRAY_CONCAT(COALESCE(arr2, ARRAY()), COALESCE(arr3, ARRAY())))", ), ( "postgres", "ARRAY_CAT(arr1, arr2, arr3)", "ARRAY_CAT(COALESCE(arr1, []), ARRAY_CAT(COALESCE(arr2, []), COALESCE(arr3, [])))", "ARRAY_CONCAT(COALESCE(arr1, ARRAY()), ARRAY_CONCAT(COALESCE(arr2, ARRAY()), COALESCE(arr3, ARRAY())))", ), ): with self.subTest( f"ARRAY_CAT variadic COALESCE wrapping: {source_dialect} → Snowflake/Redshift" ): expr = parse_one(source_sql, dialect=source_dialect) self.assertEqual(expr.sql("snowflake"), expected_snowflake) self.assertEqual(expr.sql("redshift"), expected_redshift) # Test PostgreSQL → Snowflake (2 args) expr = parse_one("ARRAY_CAT(arr1, arr2)", dialect="postgres") self.assertEqual( expr.sql("snowflake"), "ARRAY_CAT(COALESCE(arr1, []), COALESCE(arr2, []))", ) # Test edge case: array literal optimization (no wrapper needed) expr = parse_one("ARRAY_CAT([1, 2], arr2)", dialect="snowflake") self.assertEqual(expr.sql("duckdb"), "LIST_CONCAT([1, 2], arr2)") # Test edge case: single argument expr = parse_one("ARRAY_CAT(arr1)", dialect="snowflake") self.assertEqual(expr.sql("duckdb"), "LIST_CONCAT(arr1)") # Test ARRAY_MAX transpilation across dialects self.validate_all( "ARRAY_MAX(x)", read={ "athena": "array_max(x)", "clickhouse": "arrayMax(x)", "databricks": "array_max(x)", "duckdb": "list_max(x)", "presto": "array_max(x)", "snowflake": "ARRAY_MAX(x)", "spark": "array_max(x)", "trino": "array_max(x)", }, write={ "athena": "ARRAY_MAX(x)", "clickhouse": "arrayMax(x)", "databricks": "ARRAY_MAX(x)", "duckdb": "LIST_MAX(x)", "presto": "ARRAY_MAX(x)", "snowflake": "ARRAY_MAX(x)", "spark": "ARRAY_MAX(x)", "trino": "ARRAY_MAX(x)", }, ) # Test ARRAY_MIN transpilation across dialects self.validate_all( "ARRAY_MIN(x)", read={ "athena": "array_min(x)", "clickhouse": "arrayMin(x)", "databricks": "array_min(x)", "duckdb": "list_min(x)", "presto": "array_min(x)", "snowflake": "ARRAY_MIN(x)", "spark": "array_min(x)", "trino": "array_min(x)", }, write={ "athena": "ARRAY_MIN(x)", "clickhouse": "arrayMin(x)", "databricks": "ARRAY_MIN(x)", "duckdb": "LIST_MIN(x)", "presto": "ARRAY_MIN(x)", "snowflake": "ARRAY_MIN(x)", "spark": "ARRAY_MIN(x)", "trino": "ARRAY_MIN(x)", }, ) self.validate_all( "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", read={ "spark": "SELECT array_except(array(1, 2, 3), array(2))", "databricks": "SELECT array_except(array(1, 2, 3), array(2))", }, write={ "snowflake": "SELECT ARRAY_EXCEPT([1, 2, 3], [2])", "spark": "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", "databricks": "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))", "trino": "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])", "presto": "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])", "athena": "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])", "duckdb": "SELECT CASE WHEN [1, 2, 3] IS NULL OR [2] IS NULL THEN NULL ELSE LIST_FILTER(LIST_DISTINCT([1, 2, 3]), e -> LENGTH(LIST_FILTER([2], x -> x IS NOT DISTINCT FROM e)) = 0) END", }, ) self.validate_all( "SELECT ARRAY_POSITION(ARRAY(1, 2, 3), 2)", read={ "spark": "SELECT array_position(array(1, 2, 3), 2)", "databricks": "SELECT array_position(array(1, 2, 3), 2)", "trino": "SELECT array_position(array[1, 2, 3], 2)", "presto": "SELECT array_position(array[1, 2, 3], 2)", "athena": "SELECT array_position(array[1, 2, 3], 2)", }, write={ "snowflake": "SELECT ARRAY_POSITION(2, [1, 2, 3])", "spark": "SELECT ARRAY_POSITION(ARRAY(1, 2, 3), 2)", "databricks": "SELECT ARRAY_POSITION(ARRAY(1, 2, 3), 2)", "trino": "SELECT ARRAY_POSITION(ARRAY[1, 2, 3], 2)", "presto": "SELECT ARRAY_POSITION(ARRAY[1, 2, 3], 2)", "athena": "SELECT ARRAY_POSITION(ARRAY[1, 2, 3], 2)", "duckdb": "SELECT ARRAY_POSITION([1, 2, 3], 2)", }, ) def test_order_by(self): self.validate_identity( "SELECT c FROM t ORDER BY a, b,", "SELECT c FROM t ORDER BY a, b", ) self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "bigquery": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", }, ) order_by_all_sql = "SELECT * FROM t ORDER BY ALL" self.validate_identity(order_by_all_sql).find(exp.Ordered).this.assert_is(exp.Column) for dialect in ("duckdb", "spark", "databricks"): with self.subTest(f"Testing ORDER BY ALL in {dialect}"): parse_one(order_by_all_sql, read=dialect).find(exp.Ordered).this.assert_is(exp.Var) def test_json(self): self.validate_all( """JSON_EXTRACT(x, '$["a b"]')""", write={ "": """JSON_EXTRACT(x, '$["a b"]')""", "bigquery": """JSON_EXTRACT(x, '$[\\'a b\\']')""", "clickhouse": "JSONExtractString(x, 'a b')", "duckdb": """x -> '$."a b"'""", "mysql": """JSON_EXTRACT(x, '$."a b"')""", "postgres": "JSON_EXTRACT_PATH(x, 'a b')", "presto": """JSON_EXTRACT(x, '$["a b"]')""", "redshift": "JSON_EXTRACT_PATH_TEXT(x, 'a b')", "snowflake": """GET_PATH(PARSE_JSON(x), '["a b"]')""", "spark": """GET_JSON_OBJECT(x, '$[\\'a b\\']')""", "sqlite": """x -> '$."a b"'""", "trino": """JSON_EXTRACT(x, '$["a b"]')""", "tsql": """ISNULL(JSON_QUERY(x, '$."a b"'), JSON_VALUE(x, '$."a b"'))""", }, ) self.validate_all( "JSON_EXTRACT(x, '$.y')", read={ "bigquery": "JSON_EXTRACT(x, '$.y')", "duckdb": "x -> 'y'", "doris": "JSON_EXTRACT(x, '$.y')", "mysql": "JSON_EXTRACT(x, '$.y')", "postgres": "x->'y'", "presto": "JSON_EXTRACT(x, '$.y')", "snowflake": "GET_PATH(x, 'y')", "sqlite": "x -> '$.y'", "starrocks": "x -> '$.y'", }, write={ "bigquery": "JSON_EXTRACT(x, '$.y')", "clickhouse": "JSONExtractString(x, 'y')", "doris": "JSON_EXTRACT(x, '$.y')", "duckdb": "x -> '$.y'", "mysql": "JSON_EXTRACT(x, '$.y')", "oracle": "JSON_EXTRACT(x, '$.y')", "postgres": "JSON_EXTRACT_PATH(x, 'y')", "presto": "JSON_EXTRACT(x, '$.y')", "snowflake": "GET_PATH(PARSE_JSON(x), 'y')", "spark": "GET_JSON_OBJECT(x, '$.y')", "sqlite": "x -> '$.y'", "starrocks": "x -> '$.y'", "tsql": "ISNULL(JSON_QUERY(x, '$.y'), JSON_VALUE(x, '$.y'))", }, ) self.validate_all( "JSON_EXTRACT_SCALAR(x, '$.y')", read={ "bigquery": "JSON_EXTRACT_SCALAR(x, '$.y')", "clickhouse": "JSONExtractString(x, 'y')", "duckdb": "x ->> 'y'", "postgres": "x ->> 'y'", "presto": "JSON_EXTRACT_SCALAR(x, '$.y')", "redshift": "JSON_EXTRACT_PATH_TEXT(x, 'y')", "spark": "GET_JSON_OBJECT(x, '$.y')", "snowflake": "JSON_EXTRACT_PATH_TEXT(x, 'y')", "sqlite": "x ->> '$.y'", }, write={ "bigquery": "JSON_EXTRACT_SCALAR(x, '$.y')", "clickhouse": "JSONExtractString(x, 'y')", "duckdb": "x ->> '$.y'", "postgres": "JSON_EXTRACT_PATH_TEXT(x, 'y')", "presto": "JSON_EXTRACT_SCALAR(x, '$.y')", "redshift": "JSON_EXTRACT_PATH_TEXT(x, 'y')", "snowflake": "JSON_EXTRACT_PATH_TEXT(x, 'y')", "spark": "GET_JSON_OBJECT(x, '$.y')", "sqlite": "x ->> '$.y'", "tsql": "ISNULL(JSON_QUERY(x, '$.y'), JSON_VALUE(x, '$.y'))", }, ) self.validate_all( "JSON_EXTRACT(x, '$.y[0].z')", read={ "bigquery": "JSON_EXTRACT(x, '$.y[0].z')", "duckdb": "x -> '$.y[0].z'", "doris": "JSON_EXTRACT(x, '$.y[0].z')", "mysql": "JSON_EXTRACT(x, '$.y[0].z')", "presto": "JSON_EXTRACT(x, '$.y[0].z')", "snowflake": "GET_PATH(x, 'y[0].z')", "sqlite": "x -> '$.y[0].z'", "starrocks": "x -> '$.y[0].z'", }, write={ "bigquery": "JSON_EXTRACT(x, '$.y[0].z')", "clickhouse": "JSONExtractString(x, 'y', 1, 'z')", "doris": "JSON_EXTRACT(x, '$.y[0].z')", "duckdb": "x -> '$.y[0].z'", "mysql": "JSON_EXTRACT(x, '$.y[0].z')", "oracle": "JSON_EXTRACT(x, '$.y[0].z')", "postgres": "JSON_EXTRACT_PATH(x, 'y', '0', 'z')", "presto": "JSON_EXTRACT(x, '$.y[0].z')", "redshift": "JSON_EXTRACT_PATH_TEXT(x, 'y', '0', 'z')", "snowflake": "GET_PATH(PARSE_JSON(x), 'y[0].z')", "spark": "GET_JSON_OBJECT(x, '$.y[0].z')", "sqlite": "x -> '$.y[0].z'", "starrocks": "x -> '$.y[0].z'", "tsql": "ISNULL(JSON_QUERY(x, '$.y[0].z'), JSON_VALUE(x, '$.y[0].z'))", }, ) self.validate_all( "JSON_EXTRACT_SCALAR(x, '$.y[0].z')", read={ "bigquery": "JSON_EXTRACT_SCALAR(x, '$.y[0].z')", "clickhouse": "JSONExtractString(x, 'y', 1, 'z')", "duckdb": "x ->> '$.y[0].z'", "presto": "JSON_EXTRACT_SCALAR(x, '$.y[0].z')", "snowflake": "JSON_EXTRACT_PATH_TEXT(x, 'y[0].z')", "spark": 'GET_JSON_OBJECT(x, "$.y[0].z")', "sqlite": "x ->> '$.y[0].z'", }, write={ "bigquery": "JSON_EXTRACT_SCALAR(x, '$.y[0].z')", "clickhouse": "JSONExtractString(x, 'y', 1, 'z')", "duckdb": "x ->> '$.y[0].z'", "postgres": "JSON_EXTRACT_PATH_TEXT(x, 'y', '0', 'z')", "presto": "JSON_EXTRACT_SCALAR(x, '$.y[0].z')", "redshift": "JSON_EXTRACT_PATH_TEXT(x, 'y', '0', 'z')", "snowflake": "JSON_EXTRACT_PATH_TEXT(x, 'y[0].z')", "spark": "GET_JSON_OBJECT(x, '$.y[0].z')", "sqlite": "x ->> '$.y[0].z'", "tsql": "ISNULL(JSON_QUERY(x, '$.y[0].z'), JSON_VALUE(x, '$.y[0].z'))", }, ) self.validate_all( "JSON_EXTRACT(x, '$.y[*]')", write={ "bigquery": UnsupportedError, "clickhouse": UnsupportedError, "duckdb": "x -> '$.y[*]'", "mysql": "JSON_EXTRACT(x, '$.y[*]')", "postgres": UnsupportedError, "presto": "JSON_EXTRACT(x, '$.y[*]')", "redshift": UnsupportedError, "snowflake": UnsupportedError, "spark": "GET_JSON_OBJECT(x, '$.y[*]')", "sqlite": UnsupportedError, "tsql": UnsupportedError, }, ) self.validate_all( "JSON_EXTRACT(x, '$.y[*]')", write={ "bigquery": "JSON_EXTRACT(x, '$.y')", "clickhouse": "JSONExtractString(x, 'y')", "postgres": "JSON_EXTRACT_PATH(x, 'y')", "redshift": "JSON_EXTRACT_PATH_TEXT(x, 'y')", "snowflake": "GET_PATH(PARSE_JSON(x), 'y')", "sqlite": "x -> '$.y'", "tsql": "ISNULL(JSON_QUERY(x, '$.y'), JSON_VALUE(x, '$.y'))", }, ) self.validate_all( "JSON_EXTRACT(x, '$.y.*')", write={ "bigquery": UnsupportedError, "clickhouse": UnsupportedError, "duckdb": "x -> '$.y.*'", "mysql": "JSON_EXTRACT(x, '$.y.*')", "postgres": UnsupportedError, "presto": "JSON_EXTRACT(x, '$.y.*')", "redshift": UnsupportedError, "snowflake": UnsupportedError, "spark": UnsupportedError, "sqlite": UnsupportedError, "tsql": UnsupportedError, }, ) for dialect in ("duckdb", "starrocks"): with self.subTest(f"Generating json extraction with digit-prefixed key ({dialect})"): self.assertEqual( parse_one("""select '{"0": "v"}' -> '0'""", read=dialect).sql(dialect=dialect), """SELECT '{"0": "v"}' -> '0'""", ) def test_cross_join(self): self.validate_all( "SELECT a FROM x CROSS JOIN UNNEST(y) AS t (a)", write={ "drill": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", }, ) self.validate_all( "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t (a, b)", write={ "drill": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)", "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y, z) AS t(a, b)", "spark": "SELECT a, b FROM x LATERAL VIEW INLINE(ARRAYS_ZIP(y, z)) t AS a, b", }, ) self.validate_all( "SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t (a)", write={ "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) WITH ORDINALITY AS t(a)", "spark2": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS pos, a", "spark": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS pos, a", "databricks": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS pos, a", }, ) # UNNEST without column alias self.validate_all( "SELECT * FROM x CROSS JOIN UNNEST(y) AS t", write={ "presto": "SELECT * FROM x CROSS JOIN UNNEST(y) AS t", "spark": UnsupportedError, "databricks": UnsupportedError, }, ) # UNNEST MAP Object into multiple columns, using single alias self.validate_all( "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t (a, b)", write={ "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a, b)", "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a, b", "hive": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a, b", }, ) # Unnest multiple Expr into respective mapped alias self.validate_all( "SELECT numbers, animals, n, a FROM (SELECT ARRAY(2, 5) AS numbers, ARRAY('dog', 'cat', 'bird') AS animals UNION ALL SELECT ARRAY(7, 8, 9), ARRAY('cow', 'pig')) AS x CROSS JOIN UNNEST(numbers, animals) AS t(n, a)", write={ "presto": "SELECT numbers, animals, n, a FROM (SELECT ARRAY[2, 5] AS numbers, ARRAY['dog', 'cat', 'bird'] AS animals UNION ALL SELECT ARRAY[7, 8, 9], ARRAY['cow', 'pig']) AS x CROSS JOIN UNNEST(numbers, animals) AS t(n, a)", "spark": "SELECT numbers, animals, n, a FROM (SELECT ARRAY(2, 5) AS numbers, ARRAY('dog', 'cat', 'bird') AS animals UNION ALL SELECT ARRAY(7, 8, 9), ARRAY('cow', 'pig')) AS x LATERAL VIEW INLINE(ARRAYS_ZIP(numbers, animals)) t AS n, a", "hive": UnsupportedError, }, ) # Unnest column to more then 2 alias (STRUCT) self.validate_all( "SELECT a, b, c, d, e FROM x CROSS JOIN UNNEST(y) AS t(a, b, c, d)", write={ "presto": "SELECT a, b, c, d, e FROM x CROSS JOIN UNNEST(y) AS t(a, b, c, d)", "spark": UnsupportedError, "hive": UnsupportedError, }, ) def test_multiple_chained_unnest(self): self.validate_all( "SELECT * FROM x CROSS JOIN UNNEST(a) AS j(lista) CROSS JOIN UNNEST(b) AS k(listb) CROSS JOIN UNNEST(c) AS l(listc)", write={ "presto": "SELECT * FROM x CROSS JOIN UNNEST(a) AS j(lista) CROSS JOIN UNNEST(b) AS k(listb) CROSS JOIN UNNEST(c) AS l(listc)", "spark": "SELECT * FROM x LATERAL VIEW EXPLODE(a) j AS lista LATERAL VIEW EXPLODE(b) k AS listb LATERAL VIEW EXPLODE(c) l AS listc", "hive": "SELECT * FROM x LATERAL VIEW EXPLODE(a) j AS lista LATERAL VIEW EXPLODE(b) k AS listb LATERAL VIEW EXPLODE(c) l AS listc", }, ) def test_lateral_subquery(self): self.validate_identity( "SELECT art FROM tbl1 INNER JOIN LATERAL (SELECT art FROM tbl2) AS tbl2 ON tbl1.art = tbl2.art" ) self.validate_identity( "SELECT * FROM tbl AS t LEFT JOIN LATERAL (SELECT * FROM b WHERE b.t_id = t.t_id) AS t ON TRUE" ) def test_set_operators(self): self.validate_all( "SELECT * FROM a UNION SELECT * FROM b ORDER BY x LIMIT 1", write={ "": "SELECT * FROM a UNION SELECT * FROM b ORDER BY x LIMIT 1", "clickhouse": "SELECT * FROM (SELECT * FROM a UNION DISTINCT SELECT * FROM b) AS _l_0 ORDER BY x NULLS FIRST LIMIT 1", "tsql": "SELECT TOP 1 * FROM (SELECT * FROM a UNION SELECT * FROM b) AS _l_0 ORDER BY x", }, ) self.validate_all( "SELECT * FROM a UNION SELECT * FROM b", read={ "bigquery": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", "clickhouse": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a UNION SELECT * FROM b", "presto": "SELECT * FROM a UNION SELECT * FROM b", "spark": "SELECT * FROM a UNION SELECT * FROM b", }, write={ "bigquery": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", "drill": "SELECT * FROM a UNION SELECT * FROM b", "duckdb": "SELECT * FROM a UNION SELECT * FROM b", "presto": "SELECT * FROM a UNION SELECT * FROM b", "spark": "SELECT * FROM a UNION SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a UNION ALL SELECT * FROM b", read={ "bigquery": "SELECT * FROM a UNION ALL SELECT * FROM b", "clickhouse": "SELECT * FROM a UNION ALL SELECT * FROM b", "duckdb": "SELECT * FROM a UNION ALL SELECT * FROM b", "presto": "SELECT * FROM a UNION ALL SELECT * FROM b", "spark": "SELECT * FROM a UNION ALL SELECT * FROM b", }, write={ "bigquery": "SELECT * FROM a UNION ALL SELECT * FROM b", "duckdb": "SELECT * FROM a UNION ALL SELECT * FROM b", "presto": "SELECT * FROM a UNION ALL SELECT * FROM b", "spark": "SELECT * FROM a UNION ALL SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a INTERSECT SELECT * FROM b", read={ "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", }, write={ "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a EXCEPT SELECT * FROM b", read={ "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", }, write={ "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a UNION DISTINCT SELECT * FROM b", write={ "bigquery": "SELECT * FROM a UNION DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a UNION SELECT * FROM b", "presto": "SELECT * FROM a UNION SELECT * FROM b", "spark": "SELECT * FROM a UNION SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", write={ "bigquery": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "clickhouse": "SELECT * FROM a INTERSECT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a INTERSECT ALL SELECT * FROM b", write={ "bigquery": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", "clickhouse": "SELECT * FROM a INTERSECT SELECT * FROM b", "duckdb": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", "presto": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", "spark": "SELECT * FROM a INTERSECT ALL SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", write={ "bigquery": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "clickhouse": "SELECT * FROM a EXCEPT DISTINCT SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT SELECT * FROM b", }, ) self.validate_all( "SELECT * FROM a EXCEPT ALL SELECT * FROM b", read={ "bigquery": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", "clickhouse": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", "duckdb": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", "presto": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", "spark": "SELECT * FROM a EXCEPT ALL SELECT * FROM b", }, ) def test_operators(self): self.validate_identity("some.column LIKE 'foo' || another.column || 'bar' || LOWER(x)") self.validate_identity("some.column LIKE 'foo' + another.column + 'bar'") self.validate_all("LIKE(x, 'z')", write={"": "'z' LIKE x"}) self.validate_all( "CONCAT(a, b, c)", write={ "": "CONCAT(a, b, c)", "redshift": "a || b || c", "sqlite": "a || b || c", }, ) self.validate_all( "x ILIKE '%y'", read={ "clickhouse": "x ILIKE '%y'", "duckdb": "x ILIKE '%y'", "postgres": "x ILIKE '%y'", "snowflake": "x ILIKE '%y'", }, write={ "bigquery": "LOWER(x) LIKE LOWER('%y')", "clickhouse": "x ILIKE '%y'", "drill": "x `ILIKE` '%y'", "duckdb": "x ILIKE '%y'", "hive": "LOWER(x) LIKE LOWER('%y')", "mysql": "LOWER(x) LIKE LOWER('%y')", "oracle": "LOWER(x) LIKE LOWER('%y')", "postgres": "x ILIKE '%y'", "presto": "LOWER(x) LIKE LOWER('%y')", "snowflake": "x ILIKE '%y'", "spark": "x ILIKE '%y'", "sqlite": "LOWER(x) LIKE LOWER('%y')", "starrocks": "LOWER(x) LIKE LOWER('%y')", "trino": "LOWER(x) LIKE LOWER('%y')", "doris": "LOWER(x) LIKE LOWER('%y')", }, ) self.validate_all( "STR_POSITION(haystack, needle)", read={ "athena": "POSITION(needle in haystack)", "clickhouse": "POSITION(needle in haystack)", "databricks": "POSITION(needle in haystack)", "drill": "POSITION(needle in haystack)", "duckdb": "POSITION(needle in haystack)", "materialize": "POSITION(needle in haystack)", "mysql": "POSITION(needle in haystack)", "postgres": "POSITION(needle in haystack)", "presto": "POSITION(needle in haystack)", "redshift": "POSITION(needle in haystack)", "risingwave": "POSITION(needle in haystack)", "snowflake": "POSITION(needle in haystack)", "spark": "POSITION(needle in haystack)", "spark2": "POSITION(needle in haystack)", "teradata": "POSITION(needle in haystack)", "trino": "POSITION(needle in haystack)", }, ) self.validate_all( "STR_POSITION(haystack, needle)", read={ "clickhouse": "POSITION(haystack, needle)", "databricks": "POSITION(needle, haystack)", "snowflake": "POSITION(needle, haystack)", "spark2": "POSITION(needle, haystack)", }, ) self.validate_all( "STR_POSITION(haystack, needle)", read={ "athena": "STRPOS(haystack, needle)", "bigquery": "STRPOS(haystack, needle)", "drill": "STRPOS(haystack, needle)", "duckdb": "STRPOS(haystack, needle)", "postgres": "STRPOS(haystack, needle)", "presto": "STRPOS(haystack, needle)", "redshift": "STRPOS(haystack, needle)", "trino": "STRPOS(haystack, needle)", }, ) self.validate_all( "STR_POSITION(haystack, needle)", read={ "bigquery": "INSTR(haystack, needle)", "databricks": "INSTR(haystack, needle)", "doris": "INSTR(haystack, needle)", "duckdb": "INSTR(haystack, needle)", "hive": "INSTR(haystack, needle)", "mysql": "INSTR(haystack, needle)", "oracle": "INSTR(haystack, needle)", "spark": "INSTR(haystack, needle)", "spark2": "INSTR(haystack, needle)", "sqlite": "INSTR(haystack, needle)", "starrocks": "INSTR(haystack, needle)", "teradata": "INSTR(haystack, needle)", }, ) self.validate_all( "STR_POSITION(haystack, needle)", read={ "clickhouse": "LOCATE(needle, haystack)", "databricks": "LOCATE(needle, haystack)", "doris": "LOCATE(needle, haystack)", "hive": "LOCATE(needle, haystack)", "mysql": "LOCATE(needle, haystack)", "spark": "LOCATE(needle, haystack)", "spark2": "LOCATE(needle, haystack)", "starrocks": "LOCATE(needle, haystack)", "teradata": "LOCATE(needle, haystack)", }, ) self.validate_all( "STR_POSITION(haystack, needle)", read={ "athena": "CHARINDEX(needle, haystack)", "databricks": "CHARINDEX(needle, haystack)", "snowflake": "CHARINDEX(needle, haystack)", "tsql": "CHARINDEX(needle, haystack)", }, ) self.validate_all( "STR_POSITION(haystack, needle)", read={ "tableau": "FIND(haystack, needle)", }, write={ "athena": "STRPOS(haystack, needle)", "bigquery": "INSTR(haystack, needle)", "clickhouse": "POSITION(haystack, needle)", "databricks": "LOCATE(needle, haystack)", "doris": "LOCATE(needle, haystack)", "drill": "STRPOS(haystack, needle)", "duckdb": "STRPOS(haystack, needle)", "hive": "LOCATE(needle, haystack)", "materialize": "POSITION(needle IN haystack)", "mysql": "LOCATE(needle, haystack)", "oracle": "INSTR(haystack, needle)", "postgres": "POSITION(needle IN haystack)", "presto": "STRPOS(haystack, needle)", "redshift": "POSITION(needle IN haystack)", "risingwave": "POSITION(needle IN haystack)", "snowflake": "CHARINDEX(needle, haystack)", "spark": "LOCATE(needle, haystack)", "spark2": "LOCATE(needle, haystack)", "sqlite": "INSTR(haystack, needle)", "tableau": "FIND(haystack, needle)", "teradata": "INSTR(haystack, needle)", "trino": "STRPOS(haystack, needle)", "tsql": "CHARINDEX(needle, haystack)", }, ) self.validate_all( "STR_POSITION(haystack, needle, position)", read={ "clickhouse": "POSITION(haystack, needle, position)", "databricks": "POSITION(needle, haystack, position)", "snowflake": "POSITION(needle, haystack, position)", "spark": "POSITION(needle, haystack, position)", "spark2": "POSITION(needle, haystack, position)", }, ) self.validate_all( "STR_POSITION(haystack, needle, position)", read={ "doris": "LOCATE(needle, haystack, position)", "hive": "LOCATE(needle, haystack, position)", "mysql": "LOCATE(needle, haystack, position)", "spark": "LOCATE(needle, haystack, position)", "spark2": "LOCATE(needle, haystack, position)", "starrocks": "LOCATE(needle, haystack, position)", "teradata": "LOCATE(needle, haystack, position)", "clickhouse": "LOCATE(needle, haystack, position)", "databricks": "LOCATE(needle, haystack, position)", }, ) self.validate_all( "STR_POSITION(haystack, needle, position)", read={ "bigquery": "INSTR(haystack, needle, position)", "doris": "INSTR(haystack, needle, position)", "oracle": "INSTR(haystack, needle, position)", "teradata": "INSTR(haystack, needle, position)", }, ) self.validate_all( "STR_POSITION(haystack, needle, position)", read={ "databricks": "CHARINDEX(needle, haystack, position)", "snowflake": "CHARINDEX(needle, haystack, position)", "tsql": "CHARINDEX(needle, haystack, position)", }, ) self.validate_all( "STR_POSITION(haystack, needle, position)", write={ "athena": "IF(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", "bigquery": "INSTR(haystack, needle, position)", "clickhouse": "POSITION(haystack, needle, position)", "databricks": "LOCATE(needle, haystack, position)", "doris": "LOCATE(needle, haystack, position)", "drill": "`IF`(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", "duckdb": "CASE WHEN STRPOS(SUBSTRING(haystack, position), needle) = 0 THEN 0 ELSE STRPOS(SUBSTRING(haystack, position), needle) + position - 1 END", "hive": "LOCATE(needle, haystack, position)", "materialize": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", "mysql": "LOCATE(needle, haystack, position)", "oracle": "INSTR(haystack, needle, position)", "postgres": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", "presto": "IF(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", "redshift": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", "risingwave": "CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM position)) = 0 THEN 0 ELSE POSITION(needle IN SUBSTRING(haystack FROM position)) + position - 1 END", "snowflake": "CHARINDEX(needle, haystack, position)", "spark": "LOCATE(needle, haystack, position)", "spark2": "LOCATE(needle, haystack, position)", "sqlite": "IIF(INSTR(SUBSTRING(haystack, position), needle) = 0, 0, INSTR(SUBSTRING(haystack, position), needle) + position - 1)", "tableau": "IF FIND(SUBSTRING(haystack, position), needle) = 0 THEN 0 ELSE FIND(SUBSTRING(haystack, position), needle) + position - 1 END", "teradata": "INSTR(haystack, needle, position)", "trino": "IF(STRPOS(SUBSTRING(haystack, position), needle) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle) + position - 1)", "tsql": "CHARINDEX(needle, haystack, position)", }, ) self.validate_all( "STR_POSITION(haystack, needle, position, occurrence)", read={ "bigquery": "INSTR(haystack, needle, position, occurrence)", "oracle": "INSTR(haystack, needle, position, occurrence)", "teradata": "INSTR(haystack, needle, position, occurrence)", }, write={ "bigquery": "INSTR(haystack, needle, position, occurrence)", "oracle": "INSTR(haystack, needle, position, occurrence)", "presto": "IF(STRPOS(SUBSTRING(haystack, position), needle, occurrence) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle, occurrence) + position - 1)", "tableau": "IF FINDNTH(SUBSTRING(haystack, position), needle, occurrence) = 0 THEN 0 ELSE FINDNTH(SUBSTRING(haystack, position), needle, occurrence) + position - 1 END", "teradata": "INSTR(haystack, needle, position, occurrence)", "trino": "IF(STRPOS(SUBSTRING(haystack, position), needle, occurrence) = 0, 0, STRPOS(SUBSTRING(haystack, position), needle, occurrence) + position - 1)", }, ) self.validate_all( "CONCAT_WS('-', 'a', 'b')", write={ "clickhouse": "CONCAT_WS('-', 'a', 'b')", "duckdb": "CONCAT_WS('-', 'a', 'b')", "presto": "CONCAT_WS('-', CAST('a' AS VARCHAR), CAST('b' AS VARCHAR))", "hive": "CONCAT_WS('-', 'a', 'b')", "spark": "CONCAT_WS('-', 'a', 'b')", "trino": "CONCAT_WS('-', CAST('a' AS VARCHAR), CAST('b' AS VARCHAR))", }, ) self.validate_all( "CONCAT_WS('-', x)", write={ "clickhouse": "CONCAT_WS('-', x)", "duckdb": "CONCAT_WS('-', x)", "hive": "CONCAT_WS('-', x)", "presto": "CONCAT_WS('-', CAST(x AS VARCHAR))", "spark": "CONCAT_WS('-', x)", "trino": "CONCAT_WS('-', CAST(x AS VARCHAR))", }, ) self.validate_all( "CONCAT(a)", write={ "clickhouse": "CONCAT(a)", "presto": "CAST(a AS VARCHAR)", "trino": "CAST(a AS VARCHAR)", "tsql": "a", }, ) self.validate_all( "CONCAT(COALESCE(a, ''))", read={ "drill": "CONCAT(a)", "duckdb": "CONCAT(a)", "postgres": "CONCAT(a)", "tsql": "CONCAT(a)", }, ) self.validate_all( "IF(x > 1, 1, 0)", write={ "drill": "`IF`(x > 1, 1, 0)", "duckdb": "CASE WHEN x > 1 THEN 1 ELSE 0 END", "presto": "IF(x > 1, 1, 0)", "hive": "IF(x > 1, 1, 0)", "spark": "IF(x > 1, 1, 0)", "tableau": "IF x > 1 THEN 1 ELSE 0 END", }, ) self.validate_all( "CASE WHEN 1 THEN x ELSE 0 END", write={ "drill": "CASE WHEN 1 THEN x ELSE 0 END", "duckdb": "CASE WHEN 1 THEN x ELSE 0 END", "presto": "CASE WHEN 1 THEN x ELSE 0 END", "hive": "CASE WHEN 1 THEN x ELSE 0 END", "spark": "CASE WHEN 1 THEN x ELSE 0 END", "tableau": "CASE WHEN 1 THEN x ELSE 0 END", }, ) self.validate_all( "x[y]", write={ "drill": "x[y]", "duckdb": "x[y]", "presto": "x[y]", "hive": "x[y]", "spark": "x[y]", }, ) self.validate_all( """'["x"]'""", write={ "duckdb": """'["x"]'""", "presto": """'["x"]'""", "hive": """'["x"]'""", "spark": """'["x"]'""", }, ) self.validate_all( 'true or null as "foo"', write={ "bigquery": "TRUE OR NULL AS `foo`", "drill": "TRUE OR NULL AS `foo`", "duckdb": 'TRUE OR NULL AS "foo"', "presto": 'TRUE OR NULL AS "foo"', "hive": "TRUE OR NULL AS `foo`", "spark": "TRUE OR NULL AS `foo`", }, ) self.validate_all( "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) as foo FROM baz", write={ "bigquery": "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz", "duckdb": "SELECT CASE WHEN COALESCE(bar, 0) = 1 THEN TRUE ELSE FALSE END AS foo FROM baz", "presto": "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz", "hive": "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz", "spark": "SELECT IF(COALESCE(bar, 0) = 1, TRUE, FALSE) AS foo FROM baz", }, ) self.validate_all( "LEVENSHTEIN(col1, col2)", read={ "bigquery": "EDIT_DISTANCE(col1, col2)", "clickhouse": "editDistance(col1, col2)", "drill": "LEVENSHTEIN_DISTANCE(col1, col2)", "duckdb": "LEVENSHTEIN(col1, col2)", "hive": "LEVENSHTEIN(col1, col2)", "spark": "LEVENSHTEIN(col1, col2)", "postgres": "LEVENSHTEIN(col1, col2)", "presto": "LEVENSHTEIN_DISTANCE(col1, col2)", "snowflake": "EDITDISTANCE(col1, col2)", "sqlite": "EDITDIST3(col1, col2)", "trino": "LEVENSHTEIN_DISTANCE(col1, col2)", }, write={ "bigquery": "EDIT_DISTANCE(col1, col2)", "clickhouse": "editDistance(col1, col2)", "drill": "LEVENSHTEIN_DISTANCE(col1, col2)", "duckdb": "LEVENSHTEIN(col1, col2)", "hive": "LEVENSHTEIN(col1, col2)", "spark": "LEVENSHTEIN(col1, col2)", "postgres": "LEVENSHTEIN(col1, col2)", "presto": "LEVENSHTEIN_DISTANCE(col1, col2)", "snowflake": "EDITDISTANCE(col1, col2)", "sqlite": "EDITDIST3(col1, col2)", "trino": "LEVENSHTEIN_DISTANCE(col1, col2)", }, ) self.validate_all( "LEVENSHTEIN(col1, col2, 1, 2, 3)", write={ "bigquery": UnsupportedError, "clickhouse": UnsupportedError, "drill": UnsupportedError, "duckdb": UnsupportedError, "hive": UnsupportedError, "spark": UnsupportedError, "postgres": "LEVENSHTEIN(col1, col2, 1, 2, 3)", "presto": UnsupportedError, "snowflake": UnsupportedError, "sqlite": UnsupportedError, "trino": UnsupportedError, }, ) self.validate_all( "LEVENSHTEIN(col1, col2, 1, 2, 3, 4)", write={ "bigquery": UnsupportedError, "clickhouse": UnsupportedError, "drill": UnsupportedError, "duckdb": UnsupportedError, "hive": UnsupportedError, "spark": UnsupportedError, "postgres": "LEVENSHTEIN_LESS_EQUAL(col1, col2, 1, 2, 3, 4)", "presto": UnsupportedError, "snowflake": UnsupportedError, "sqlite": UnsupportedError, "trino": UnsupportedError, }, ) self.validate_all( "LEVENSHTEIN(coalesce(col1, col2), coalesce(col2, col1))", write={ "bigquery": "EDIT_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1))", "duckdb": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", "drill": "LEVENSHTEIN_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1))", "presto": "LEVENSHTEIN_DISTANCE(COALESCE(col1, col2), COALESCE(col2, col1))", "hive": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", "spark": "LEVENSHTEIN(COALESCE(col1, col2), COALESCE(col2, col1))", }, ) self.validate_all( "ARRAY_FILTER(the_array, x -> x > 0)", write={ "presto": "FILTER(the_array, x -> x > 0)", "hive": "FILTER(the_array, x -> x > 0)", "spark": "FILTER(the_array, x -> x > 0)", }, ) self.validate_all( "FILTER(the_array, x -> x > 0)", write={ "presto": "FILTER(the_array, x -> x > 0)", "starrocks": "ARRAY_FILTER(the_array, x -> x > 0)", }, ) self.validate_all( "a / b", write={ "bigquery": "a / b", "clickhouse": "a / b", "databricks": "a / b", "duckdb": "a / b", "hive": "a / b", "mysql": "a / b", "oracle": "a / b", "snowflake": "a / b", "spark": "a / b", "starrocks": "a / b", "drill": "CAST(a AS DOUBLE) / b", "postgres": "CAST(a AS DOUBLE PRECISION) / b", "presto": "CAST(a AS DOUBLE) / b", "redshift": "CAST(a AS DOUBLE PRECISION) / b", "sqlite": "CAST(a AS REAL) / b", "teradata": "CAST(a AS DOUBLE PRECISION) / b", "trino": "CAST(a AS DOUBLE) / b", "tsql": "CAST(a AS FLOAT) / b", }, ) self.validate_all( "MOD(8 - 1 + 7, 7)", write={ "": "(8 - 1 + 7) % 7", "hive": "(8 - 1 + 7) % 7", "presto": "(8 - 1 + 7) % 7", "snowflake": "(8 - 1 + 7) % 7", "bigquery": "MOD(8 - 1 + 7, 7)", }, ) self.validate_all( "MOD(a, b + 1)", write={ "": "a % (b + 1)", "hive": "a % (b + 1)", "presto": "a % (b + 1)", "snowflake": "a % (b + 1)", "bigquery": "MOD(a, b + 1)", }, ) self.validate_all( "ARRAY_REMOVE(the_array, target)", write={ "": "ARRAY_REMOVE(the_array, target)", "clickhouse": "arrayFilter(_u -> _u <> target, the_array)", "duckdb": "LIST_FILTER(the_array, _u -> _u <> target)", "bigquery": "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)", "hive": "ARRAY_REMOVE(the_array, target)", "postgres": "ARRAY_REMOVE(the_array, target)", "presto": "ARRAY_REMOVE(the_array, target)", "starrocks": "ARRAY_REMOVE(the_array, target)", "databricks": "ARRAY_REMOVE(the_array, target)", "snowflake": "ARRAY_REMOVE(the_array, target)", }, ) def test_typeddiv(self): typed_div = exp.Div(this=exp.column("a"), expression=exp.column("b"), typed=True) div = exp.Div(this=exp.column("a"), expression=exp.column("b")) typed_div_dialect = "presto" div_dialect = "hive" INT = exp.DataType.Type.INT FLOAT = exp.DataType.Type.FLOAT for expression, types, dialect, expected in [ (typed_div, (None, None), typed_div_dialect, "a / b"), (typed_div, (None, None), div_dialect, "a / b"), (div, (None, None), typed_div_dialect, "CAST(a AS DOUBLE) / b"), (div, (None, None), div_dialect, "a / b"), (typed_div, (INT, INT), typed_div_dialect, "a / b"), (typed_div, (INT, INT), div_dialect, "CAST(a / b AS BIGINT)"), (div, (INT, INT), typed_div_dialect, "CAST(a AS DOUBLE) / b"), (div, (INT, INT), div_dialect, "a / b"), (typed_div, (FLOAT, FLOAT), typed_div_dialect, "a / b"), (typed_div, (FLOAT, FLOAT), div_dialect, "a / b"), (div, (FLOAT, FLOAT), typed_div_dialect, "a / b"), (div, (FLOAT, FLOAT), div_dialect, "a / b"), (typed_div, (INT, FLOAT), typed_div_dialect, "a / b"), (typed_div, (INT, FLOAT), div_dialect, "a / b"), (div, (INT, FLOAT), typed_div_dialect, "a / b"), (div, (INT, FLOAT), div_dialect, "a / b"), ]: with self.subTest(f"{expression.__class__.__name__} {types} {dialect} -> {expected}"): expression = expression.copy() expression.left.type = types[0] expression.right.type = types[1] self.assertEqual(expected, expression.sql(dialect=dialect)) def test_safediv(self): safe_div = exp.Div(this=exp.column("a"), expression=exp.column("b"), safe=True) div = exp.Div(this=exp.column("a"), expression=exp.column("b")) safe_div_dialect = "mysql" div_dialect = "snowflake" for expression, dialect, expected in [ (safe_div, safe_div_dialect, "a / b"), (safe_div, div_dialect, "a / NULLIF(b, 0)"), (div, safe_div_dialect, "a / b"), (div, div_dialect, "a / b"), ]: with self.subTest(f"{expression.__class__.__name__} {dialect} -> {expected}"): self.assertEqual(expected, expression.sql(dialect=dialect)) self.assertEqual( parse_one("CAST(x AS DECIMAL) / y", read="mysql").sql(dialect="postgres"), "CAST(x AS DECIMAL) / NULLIF(y, 0)", ) def test_limit(self): self.validate_all( "SELECT * FROM data LIMIT 10, 20", write={"sqlite": "SELECT * FROM data LIMIT 20 OFFSET 10"}, ) self.validate_all( "SELECT x FROM y LIMIT 10", read={ "teradata": "SELECT TOP 10 x FROM y", "tsql": "SELECT TOP 10 x FROM y", "snowflake": "SELECT TOP 10 x FROM y", }, write={ "sqlite": "SELECT x FROM y LIMIT 10", "oracle": "SELECT x FROM y FETCH FIRST 10 ROWS ONLY", "tsql": "SELECT TOP 10 x FROM y", }, ) self.validate_all( "SELECT x FROM y LIMIT 10 OFFSET 5", write={ "sqlite": "SELECT x FROM y LIMIT 10 OFFSET 5", "oracle": "SELECT x FROM y OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", }, ) self.validate_all( "SELECT x FROM y OFFSET 10 FETCH FIRST 3 ROWS ONLY", write={ "sqlite": "SELECT x FROM y LIMIT 3 OFFSET 10", "oracle": "SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY", }, ) self.validate_all( "SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY", write={ "oracle": "SELECT x FROM y OFFSET 10 ROWS FETCH FIRST 3 ROWS ONLY", }, ) self.validate_all( '"x" + "y"', read={ "clickhouse": '`x` + "y"', "sqlite": '`x` + "y"', "redshift": '"x" + "y"', }, ) self.validate_all( "[1, 2]", write={ "bigquery": "[1, 2]", "clickhouse": "[1, 2]", }, ) self.validate_all( "SELECT * FROM VALUES ('x'), ('y') AS t(z)", write={ "spark": "SELECT * FROM VALUES ('x'), ('y') AS t(z)", }, ) self.validate_all( "CREATE TABLE t (c CHAR, nc NCHAR, v1 VARCHAR, v2 VARCHAR2, nv NVARCHAR, nv2 NVARCHAR2)", write={ "duckdb": "CREATE TABLE t (c TEXT, nc TEXT, v1 TEXT, v2 TEXT, nv TEXT, nv2 TEXT)", "hive": "CREATE TABLE t (c STRING, nc STRING, v1 STRING, v2 STRING, nv STRING, nv2 STRING)", "oracle": "CREATE TABLE t (c CHAR, nc NCHAR, v1 VARCHAR2, v2 VARCHAR2, nv NVARCHAR2, nv2 NVARCHAR2)", "postgres": "CREATE TABLE t (c CHAR, nc CHAR, v1 VARCHAR, v2 VARCHAR, nv VARCHAR, nv2 VARCHAR)", "sqlite": "CREATE TABLE t (c TEXT, nc TEXT, v1 TEXT, v2 TEXT, nv TEXT, nv2 TEXT)", }, ) self.validate_all( "POWER(1.2, 3.4)", read={ "hive": "pow(1.2, 3.4)", "postgres": "power(1.2, 3.4)", }, ) self.validate_all( "CREATE INDEX my_idx ON tbl(a, b)", read={ "hive": "CREATE INDEX my_idx ON TABLE tbl(a, b)", "sqlite": "CREATE INDEX my_idx ON tbl(a, b)", }, write={ "hive": "CREATE INDEX my_idx ON TABLE tbl(a, b)", "postgres": "CREATE INDEX my_idx ON tbl(a NULLS FIRST, b NULLS FIRST)", "sqlite": "CREATE INDEX my_idx ON tbl(a, b)", }, ) self.validate_all( "CREATE UNIQUE INDEX my_idx ON tbl(a, b)", read={ "hive": "CREATE UNIQUE INDEX my_idx ON TABLE tbl(a, b)", "sqlite": "CREATE UNIQUE INDEX my_idx ON tbl(a, b)", }, write={ "hive": "CREATE UNIQUE INDEX my_idx ON TABLE tbl(a, b)", "postgres": "CREATE UNIQUE INDEX my_idx ON tbl(a NULLS FIRST, b NULLS FIRST)", "sqlite": "CREATE UNIQUE INDEX my_idx ON tbl(a, b)", }, ) self.validate_all( "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 TEXT, c2 TEXT(1024))", write={ "duckdb": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 TEXT, c2 TEXT(1024))", "hive": "CREATE TABLE t (b1 BINARY, b2 BINARY(1024), c1 STRING, c2 VARCHAR(1024))", "oracle": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 CLOB, c2 CLOB(1024))", "postgres": "CREATE TABLE t (b1 BYTEA, b2 BYTEA(1024), c1 TEXT, c2 TEXT(1024))", "sqlite": "CREATE TABLE t (b1 BLOB, b2 BLOB(1024), c1 TEXT, c2 TEXT(1024))", "redshift": "CREATE TABLE t (b1 VARBYTE, b2 VARBYTE(1024), c1 VARCHAR(MAX), c2 VARCHAR(1024))", }, ) def test_alias(self): self.validate_all( "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT x AS x FROM t GROUP BY x", write={ "": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT x AS x FROM t GROUP BY x", "hive": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT x AS x FROM t GROUP BY x", "oracle": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT x AS x FROM t GROUP BY x", "presto": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT x AS x FROM t GROUP BY x", }, ) self.validate_all( "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT SUM(x) AS y, y AS x FROM t GROUP BY y", write={ "": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT SUM(x) AS y, y AS x FROM t GROUP BY y", "hive": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT SUM(x) AS y, y AS x FROM t GROUP BY y", "oracle": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT SUM(x) AS y, y AS x FROM t GROUP BY y", "presto": "WITH t AS (SELECT 1 AS x, 2 AS y) SELECT SUM(x) AS y, y AS x FROM t GROUP BY y", }, ) self.validate_all( 'SELECT 1 AS "foo"', read={ "mysql": "SELECT 1 'foo'", "sqlite": "SELECT 1 'foo'", "tsql": "SELECT 1 'foo'", }, ) for dialect in ( "presto", "hive", "postgres", "clickhouse", "bigquery", "snowflake", "duckdb", ): with self.subTest(f"string alias: {dialect}"): with self.assertRaises(ParseError): parse_one("SELECT 1 'foo'", dialect=dialect) self.validate_all( "SELECT y x FROM my_table t", write={ "drill": "SELECT y AS x FROM my_table AS t", "hive": "SELECT y AS x FROM my_table AS t", "oracle": "SELECT y AS x FROM my_table t", "postgres": "SELECT y AS x FROM my_table AS t", "sqlite": "SELECT y AS x FROM my_table AS t", }, ) self.validate_all( "SELECT * FROM (SELECT * FROM my_table AS t) AS tbl", write={ "drill": "SELECT * FROM (SELECT * FROM my_table AS t) AS tbl", "hive": "SELECT * FROM (SELECT * FROM my_table AS t) AS tbl", "oracle": "SELECT * FROM (SELECT * FROM my_table t) tbl", "postgres": "SELECT * FROM (SELECT * FROM my_table AS t) AS tbl", "sqlite": "SELECT * FROM (SELECT * FROM my_table AS t) AS tbl", }, ) self.validate_all( "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", write={ "hive": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", "oracle": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 t CROSS JOIN cte2 WHERE cte1.a = cte2.c", "postgres": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", "sqlite": "WITH cte1 AS (SELECT a, b FROM table1), cte2 AS (SELECT c, e AS d FROM table2) SELECT b, d AS dd FROM cte1 AS t CROSS JOIN cte2 WHERE cte1.a = cte2.c", }, ) self.validate_all( "SELECT * FROM (SELECT 1 AS col) AS apply", read={ "": "SELECT * FROM (SELECT 1 AS col) apply", "hive": "SELECT * FROM (SELECT 1 AS col) apply", "postgres": "SELECT * FROM (SELECT 1 AS col) apply", "duckdb": "SELECT * FROM (SELECT 1 AS col) apply", "presto": "SELECT * FROM (SELECT 1 AS col) apply", "spark": "SELECT * FROM (SELECT 1 AS col) apply", "spark2": "SELECT * FROM (SELECT 1 AS col) apply", "trino": "SELECT * FROM (SELECT 1 AS col) apply", "snowflake": "SELECT * FROM (SELECT 1 AS col) apply", "bigquery": "SELECT * FROM (SELECT 1 AS col) apply", "athena": "SELECT * FROM (SELECT 1 AS col) apply", }, ) def test_nullsafe_eq(self): self.validate_all( "SELECT a IS NOT DISTINCT FROM b", read={ "mysql": "SELECT a <=> b", "postgres": "SELECT a IS NOT DISTINCT FROM b", }, write={ "mysql": "SELECT a <=> b", "postgres": "SELECT a IS NOT DISTINCT FROM b", }, ) def test_nullsafe_neq(self): self.validate_all( "SELECT a IS DISTINCT FROM b", read={ "postgres": "SELECT a IS DISTINCT FROM b", }, write={ "mysql": "SELECT NOT a <=> b", "postgres": "SELECT a IS DISTINCT FROM b", }, ) def test_hash_comments(self): self.validate_all( "SELECT 1 /* arbitrary content,,, until end-of-line */", read={ "mysql": "SELECT 1 # arbitrary content,,, until end-of-line", "bigquery": "SELECT 1 # arbitrary content,,, until end-of-line", "clickhouse": "SELECT 1 #! arbitrary content,,, until end-of-line", }, ) self.validate_all( """/* comment1 */ SELECT x, /* comment2 */ y /* comment3 */""", read={ "mysql": """SELECT # comment1 x, # comment2 y # comment3""", "bigquery": """SELECT # comment1 x, # comment2 y # comment3""", "clickhouse": """SELECT # comment1 x, # comment2 y # comment3""", }, pretty=True, ) def test_transactions(self): self.validate_all( "BEGIN TRANSACTION", write={ "bigquery": "BEGIN TRANSACTION", "mysql": "BEGIN", "postgres": "BEGIN", "presto": "START TRANSACTION", "trino": "START TRANSACTION", "redshift": "BEGIN", "snowflake": "BEGIN", "sqlite": "BEGIN TRANSACTION", "tsql": "BEGIN TRANSACTION", }, ) self.validate_all( "BEGIN READ WRITE, ISOLATION LEVEL SERIALIZABLE", read={ "presto": "START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE", "trino": "START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE", }, ) self.validate_all( "BEGIN ISOLATION LEVEL REPEATABLE READ", read={ "presto": "START TRANSACTION ISOLATION LEVEL REPEATABLE READ", "trino": "START TRANSACTION ISOLATION LEVEL REPEATABLE READ", }, ) self.validate_all( "BEGIN IMMEDIATE TRANSACTION", write={"sqlite": "BEGIN IMMEDIATE TRANSACTION"}, ) def test_merge(self): self.validate_all( """ MERGE INTO target USING source ON target.id = source.id WHEN NOT MATCHED THEN INSERT (id) values (source.id) """, write={ "bigquery": "MERGE INTO target USING source ON target.id = source.id WHEN NOT MATCHED THEN INSERT (id) VALUES (source.id)", "snowflake": "MERGE INTO target USING source ON target.id = source.id WHEN NOT MATCHED THEN INSERT (id) VALUES (source.id)", "spark": "MERGE INTO target USING source ON target.id = source.id WHEN NOT MATCHED THEN INSERT (id) VALUES (source.id)", }, ) self.validate_all( """ MERGE INTO target USING source ON target.id = source.id WHEN MATCHED AND source.is_deleted = 1 THEN DELETE WHEN MATCHED THEN UPDATE SET val = source.val WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val) """, write={ "bigquery": "MERGE INTO target USING source ON target.id = source.id WHEN MATCHED AND source.is_deleted = 1 THEN DELETE WHEN MATCHED THEN UPDATE SET val = source.val WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val)", "snowflake": "MERGE INTO target USING source ON target.id = source.id WHEN MATCHED AND source.is_deleted = 1 THEN DELETE WHEN MATCHED THEN UPDATE SET val = source.val WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val)", "spark": "MERGE INTO target USING source ON target.id = source.id WHEN MATCHED AND source.is_deleted = 1 THEN DELETE WHEN MATCHED THEN UPDATE SET val = source.val WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val)", }, ) self.validate_all( """ MERGE INTO target USING source ON target.id = source.id WHEN MATCHED THEN UPDATE * WHEN NOT MATCHED THEN INSERT * """, write={ "spark": "MERGE INTO target USING source ON target.id = source.id WHEN MATCHED THEN UPDATE * WHEN NOT MATCHED THEN INSERT *", }, ) self.validate_all( """ MERGE a b USING c d ON b.id = d.id WHEN MATCHED AND EXISTS ( SELECT b.name EXCEPT SELECT d.name ) THEN UPDATE SET b.name = d.name """, write={ "bigquery": "MERGE INTO a AS b USING c AS d ON b.id = d.id WHEN MATCHED AND EXISTS(SELECT b.name EXCEPT DISTINCT SELECT d.name) THEN UPDATE SET b.name = d.name", "snowflake": "MERGE INTO a AS b USING c AS d ON b.id = d.id WHEN MATCHED AND EXISTS(SELECT b.name EXCEPT SELECT d.name) THEN UPDATE SET b.name = d.name", "spark": "MERGE INTO a AS b USING c AS d ON b.id = d.id WHEN MATCHED AND EXISTS(SELECT b.name EXCEPT SELECT d.name) THEN UPDATE SET b.name = d.name", }, ) # needs to preserve the target alias in then WHEN condition but not in the THEN clause self.validate_all( """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET target.b = 'FOO' WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""", write={ "trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", "postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED AND target.a <> src.a THEN UPDATE SET b = 'FOO' WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", }, ) # needs to preserve the target alias in then WHEN condition and function but not in the THEN clause self.validate_all( """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED THEN UPDATE SET target.b = COALESCE(src.b, target.b) WHEN NOT MATCHED THEN INSERT (target.a, target.b) VALUES (src.a, src.b)""", write={ "trino": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED THEN UPDATE SET b = COALESCE(src.b, target.b) WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", "postgres": """MERGE INTO foo AS target USING (SELECT a, b FROM tbl) AS src ON src.a = target.a WHEN MATCHED THEN UPDATE SET b = COALESCE(src.b, target.b) WHEN NOT MATCHED THEN INSERT (a, b) VALUES (src.a, src.b)""", }, ) def test_substring(self): self.validate_all( "SUBSTR('123456', 2, 3)", write={ "bigquery": "SUBSTRING('123456', 2, 3)", "oracle": "SUBSTR('123456', 2, 3)", "postgres": "SUBSTRING('123456' FROM 2 FOR 3)", }, ) self.validate_all( "SUBSTRING('123456', 2, 3)", write={ "bigquery": "SUBSTRING('123456', 2, 3)", "oracle": "SUBSTR('123456', 2, 3)", "postgres": "SUBSTRING('123456' FROM 2 FOR 3)", }, ) def test_logarithm(self): for base in (2, 10): with self.subTest(f"Transpiling LOG base {base}"): self.validate_all( f"LOG({base}, a)", read={ "": f"LOG{base}(a)", "bigquery": f"LOG{base}(a)", "clickhouse": f"LOG{base}(a)", "databricks": f"LOG{base}(a)", "dremio": f"LOG{base}(a)", "duckdb": f"LOG{base}(a)", "mysql": f"LOG{base}(a)", "postgres": f"LOG{base}(a)", "presto": f"LOG{base}(a)", "spark": f"LOG{base}(a)", "sqlite": f"LOG{base}(a)", "trino": f"LOG{base}(a)", "tsql": f"LOG{base}(a)", }, write={ "bigquery": f"LOG(a, {base})", "clickhouse": f"LOG{base}(a)", "dremio": f"LOG({base}, a)", "duckdb": f"LOG({base}, a)", "mysql": f"LOG({base}, a)", "oracle": f"LOG({base}, a)", "postgres": f"LOG({base}, a)", "presto": f"LOG{base}(a)", "redshift": f"LOG({base}, a)", "snowflake": f"LOG({base}, a)", "spark2": f"LOG({base}, a)", "spark": f"LOG({base}, a)", "sqlite": f"LOG({base}, a)", "starrocks": f"LOG({base}, a)", "tableau": f"LOG(a, {base})", "trino": f"LOG({base}, a)", "tsql": f"LOG(a, {base})", }, ) self.validate_all( "LOG(x)", read={ "duckdb": "LOG(x)", "postgres": "LOG(x)", "redshift": "LOG(x)", "sqlite": "LOG(x)", "teradata": "LOG(x)", }, ) self.validate_all( "LN(x)", read={ "dremio": "LOG(x)", "bigquery": "LOG(x)", "clickhouse": "LOG(x)", "databricks": "LOG(x)", "drill": "LOG(x)", "hive": "LOG(x)", "mysql": "LOG(x)", "tsql": "LOG(x)", }, ) self.validate_all( "LOG(b, n)", read={ "bigquery": "LOG(n, b)", "databricks": "LOG(b, n)", "drill": "LOG(b, n)", "duckdb": "LOG(b, n)", "hive": "LOG(b, n)", "mysql": "LOG(b, n)", "oracle": "LOG(b, n)", "postgres": "LOG(b, n)", "snowflake": "LOG(b, n)", "spark": "LOG(b, n)", "sqlite": "LOG(b, n)", "trino": "LOG(b, n)", "tsql": "LOG(n, b)", }, write={ "clickhouse": UnsupportedError, "presto": UnsupportedError, }, ) def test_count_if(self): self.validate_identity("COUNT_IF(DISTINCT cond)") self.validate_all( "SELECT COUNT_IF(cond) FILTER", write={"": "SELECT COUNT_IF(cond) AS FILTER"} ) self.validate_all( "SELECT COUNT_IF(col % 2 = 0) FROM foo", write={ "": "SELECT COUNT_IF(col % 2 = 0) FROM foo", "databricks": "SELECT COUNT_IF(col % 2 = 0) FROM foo", "presto": "SELECT COUNT_IF(col % 2 = 0) FROM foo", "snowflake": "SELECT COUNT_IF(col % 2 = 0) FROM foo", "sqlite": "SELECT SUM(IIF(col % 2 = 0, 1, 0)) FROM foo", "tsql": "SELECT COUNT_IF(col % 2 = 0) FROM foo", "postgres": "SELECT SUM(CASE WHEN col % 2 = 0 THEN 1 ELSE 0 END) FROM foo", "redshift": "SELECT SUM(CASE WHEN col % 2 = 0 THEN 1 ELSE 0 END) FROM foo", }, ) self.validate_all( "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", read={ "": "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", "databricks": "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", "tsql": "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", }, write={ "": "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", "databricks": "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", "presto": "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", "sqlite": "SELECT SUM(IIF(col % 2 = 0, 1, 0)) FILTER(WHERE col < 1000) FROM foo", "tsql": "SELECT COUNT_IF(col % 2 = 0) FILTER(WHERE col < 1000) FROM foo", }, ) def test_cast_to_user_defined_type(self): self.validate_identity("CAST(x AS some_udt(1234))") self.validate_all( "CAST(x AS some_udt)", write={ "": "CAST(x AS some_udt)", "oracle": "CAST(x AS some_udt)", "postgres": "CAST(x AS some_udt)", "presto": "CAST(x AS some_udt)", "teradata": "CAST(x AS some_udt)", "tsql": "CAST(x AS some_udt)", }, ) with self.assertRaises(ParseError): parse_one("CAST(x AS some_udt)", read="bigquery") def test_qualify(self): self.validate_all( "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1", write={ "duckdb": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1", "snowflake": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1", "clickhouse": "SELECT * FROM t QUALIFY COUNT(*) OVER () > 1", "mysql": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", "oracle": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1", "postgres": "SELECT * FROM (SELECT *, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", "tsql": "SELECT * FROM (SELECT *, COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", }, ) self.validate_all( 'SELECT "user id", some_id, 1 as other_id, 2 as "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1', write={ "duckdb": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1', "snowflake": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1', "clickhouse": 'SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id" FROM t QUALIFY COUNT(*) OVER () > 1', "mysql": "SELECT `user id`, some_id, other_id, `2 nd id` FROM (SELECT `user id`, some_id, 1 AS other_id, 2 AS `2 nd id`, COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", "oracle": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) _t WHERE _w > 1', "postgres": 'SELECT "user id", some_id, other_id, "2 nd id" FROM (SELECT "user id", some_id, 1 AS other_id, 2 AS "2 nd id", COUNT(*) OVER () AS _w FROM t) AS _t WHERE _w > 1', "tsql": "SELECT [user id], some_id, other_id, [2 nd id] FROM (SELECT [user id] AS [user id], some_id AS some_id, 1 AS other_id, 2 AS [2 nd id], COUNT_BIG(*) OVER () AS _w FROM t) AS _t WHERE _w > 1", }, ) def test_window_exclude(self): for option in ("CURRENT ROW", "TIES", "GROUP"): self.validate_all( f"SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE {option})", write={ "duckdb": f"SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE {option})", "postgres": f"SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE {option})", "sqlite": f"SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE {option})", "oracle": f"SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE {option})", }, ) # EXCLUDE NO OTHERS is the default behaviour self.validate_all( "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", read={ "duckdb": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS)", "postgres": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS)", "sqlite": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS)", "oracle": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW EXCLUDE NO OTHERS)", }, write={ "duckdb": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", "postgres": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", "sqlite": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", "oracle": "SELECT SUM(X) OVER (PARTITION BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", }, ) def test_nested_ctes(self): self.validate_all( "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", write={ "bigquery": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "clickhouse": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "databricks": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c FROM t) AS subq", "duckdb": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "hive": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c FROM t) AS subq", "mysql": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "postgres": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "presto": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "redshift": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "snowflake": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "spark": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c FROM t) AS subq", "spark2": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c FROM t) AS subq", "sqlite": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "trino": "SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq", "tsql": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT c AS c FROM t) AS subq", }, ) self.validate_all( "SELECT * FROM (SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq1) AS subq2", write={ "bigquery": "SELECT * FROM (SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq1) AS subq2", "duckdb": "SELECT * FROM (SELECT * FROM (WITH t AS (SELECT 1 AS c) SELECT c FROM t) AS subq1) AS subq2", "hive": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT * FROM (SELECT c FROM t) AS subq1) AS subq2", "tsql": "WITH t AS (SELECT 1 AS c) SELECT * FROM (SELECT * FROM (SELECT c AS c FROM t) AS subq1) AS subq2", }, ) self.validate_all( "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", write={ "duckdb": "WITH t1(x) AS (SELECT 1) SELECT * FROM (WITH t2(y) AS (SELECT 2) SELECT y FROM t2) AS subq", "tsql": "WITH t1(x) AS (SELECT 1), t2(y) AS (SELECT 2) SELECT * FROM (SELECT y AS y FROM t2) AS subq", }, ) self.validate_all( """ WITH c AS ( WITH b AS ( WITH a1 AS ( SELECT 1 ), a2 AS ( SELECT 2 ) SELECT * FROM a1, a2 ) SELECT * FROM b ) SELECT * FROM c""", write={ "duckdb": "WITH c AS (WITH b AS (WITH a1 AS (SELECT 1), a2 AS (SELECT 2) SELECT * FROM a1, a2) SELECT * FROM b) SELECT * FROM c", "hive": "WITH a1 AS (SELECT 1), a2 AS (SELECT 2), b AS (SELECT * FROM a1, a2), c AS (SELECT * FROM b) SELECT * FROM c", }, ) self.validate_all( """ WITH subquery1 AS ( WITH tmp AS ( SELECT * FROM table0 ) SELECT * FROM tmp ), subquery2 AS ( WITH tmp2 AS ( SELECT * FROM table1 WHERE a IN subquery1 ) SELECT * FROM tmp2 ) SELECT * FROM subquery2 """, write={ "hive": """WITH tmp AS ( SELECT * FROM table0 ), subquery1 AS ( SELECT * FROM tmp ), tmp2 AS ( SELECT * FROM table1 WHERE a IN subquery1 ), subquery2 AS ( SELECT * FROM tmp2 ) SELECT * FROM subquery2""", }, pretty=True, ) def test_unsupported_null_ordering(self): # We'll transpile a portable query from the following dialects to MySQL / T-SQL, which # both treat NULLs as small values, so the expected output queries should be equivalent with_last_nulls = "duckdb" with_small_nulls = "spark" with_large_nulls = "postgres" sql = "SELECT * FROM t ORDER BY c" sql_nulls_last = "SELECT * FROM t ORDER BY CASE WHEN c IS NULL THEN 1 ELSE 0 END, c" sql_nulls_first = "SELECT * FROM t ORDER BY CASE WHEN c IS NULL THEN 1 ELSE 0 END DESC, c" for read_dialect, desc, nulls_first, expected_sql in ( (with_last_nulls, False, None, sql_nulls_last), (with_last_nulls, True, None, sql), (with_last_nulls, False, True, sql), (with_last_nulls, True, True, sql_nulls_first), (with_last_nulls, False, False, sql_nulls_last), (with_last_nulls, True, False, sql), (with_small_nulls, False, None, sql), (with_small_nulls, True, None, sql), (with_small_nulls, False, True, sql), (with_small_nulls, True, True, sql_nulls_first), (with_small_nulls, False, False, sql_nulls_last), (with_small_nulls, True, False, sql), (with_large_nulls, False, None, sql_nulls_last), (with_large_nulls, True, None, sql_nulls_first), (with_large_nulls, False, True, sql), (with_large_nulls, True, True, sql_nulls_first), (with_large_nulls, False, False, sql_nulls_last), (with_large_nulls, True, False, sql), ): with self.subTest( f"read: {read_dialect}, descending: {desc}, nulls first: {nulls_first}" ): sort_order = " DESC" if desc else "" null_order = ( " NULLS FIRST" if nulls_first else (" NULLS LAST" if nulls_first is not None else "") ) expected_sql = f"{expected_sql}{sort_order}" expression = parse_one(f"{sql}{sort_order}{null_order}", read=read_dialect) self.assertEqual(expression.sql(dialect="mysql"), expected_sql) self.assertEqual(expression.sql(dialect="tsql"), expected_sql) def test_random(self): self.validate_all( "RAND()", write={ "bigquery": "RAND()", "clickhouse": "randCanonical()", "databricks": "RAND()", "doris": "RAND()", "drill": "RAND()", "duckdb": "RANDOM()", "hive": "RAND()", "mysql": "RAND()", "oracle": "DBMS_RANDOM.VALUE()", "postgres": "RANDOM()", "presto": "RAND()", "spark": "RAND()", "sqlite": "RANDOM()", "tsql": "RAND()", }, read={ "bigquery": "RAND()", "clickhouse": "randCanonical()", "databricks": "RAND()", "doris": "RAND()", "drill": "RAND()", "duckdb": "RANDOM()", "hive": "RAND()", "mysql": "RAND()", "oracle": "DBMS_RANDOM.VALUE()", "postgres": "RANDOM()", "presto": "RAND()", "spark": "RAND()", "sqlite": "RANDOM()", "tsql": "RAND()", }, ) def test_array_any(self): self.validate_all( "ARRAY_ANY(arr, x -> pred)", write={ "": "ARRAY_ANY(arr, x -> pred)", "bigquery": "(ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)", "clickhouse": "(LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)", "databricks": "(SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)", "doris": UnsupportedError, "drill": UnsupportedError, "duckdb": "(ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)", "hive": UnsupportedError, "mysql": UnsupportedError, "oracle": UnsupportedError, "postgres": "(ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)", "presto": "ANY_MATCH(arr, x -> pred)", "redshift": UnsupportedError, "snowflake": UnsupportedError, "spark": "(SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)", "spark2": "(SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)", "sqlite": UnsupportedError, "starrocks": UnsupportedError, "tableau": UnsupportedError, "teradata": "(CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)", "trino": "ANY_MATCH(arr, x -> pred)", "tsql": UnsupportedError, }, ) def test_truncate(self): self.validate_identity("TRUNCATE TABLE table") self.validate_identity("TRUNCATE TABLE db.schema.test") self.validate_identity("TRUNCATE TABLE IF EXISTS db.schema.test") self.validate_identity("TRUNCATE TABLE t1, t2, t3") def test_create_sequence(self): self.validate_identity("CREATE SEQUENCE seq") self.validate_identity( "CREATE TEMPORARY SEQUENCE seq AS SMALLINT START WITH 3 INCREMENT BY 2 MINVALUE 1 MAXVALUE 10 CACHE 1 NO CYCLE OWNED BY table.col" ) self.validate_identity( "CREATE SEQUENCE seq START WITH 1 NO MINVALUE NO MAXVALUE CYCLE NO CACHE" ) self.validate_identity("CREATE OR REPLACE TEMPORARY SEQUENCE seq INCREMENT BY 1 NO CYCLE") self.validate_identity( "CREATE OR REPLACE SEQUENCE IF NOT EXISTS seq COMMENT='test comment' ORDER" ) self.validate_identity( "CREATE SEQUENCE schema.seq SHARING=METADATA NOORDER NOKEEP SCALE EXTEND SHARD EXTEND SESSION" ) self.validate_identity( "CREATE SEQUENCE schema.seq SHARING=DATA ORDER KEEP NOSCALE NOSHARD GLOBAL" ) self.validate_identity( "CREATE SEQUENCE schema.seq SHARING=DATA NOCACHE NOCYCLE SCALE NOEXTEND" ) self.validate_identity( """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE OWNED BY NONE""", """CREATE TEMPORARY SEQUENCE seq AS BIGINT INCREMENT BY 2 MINVALUE 1 CACHE 1 NOMAXVALUE NO CYCLE""", ) self.validate_identity( """CREATE TEMPORARY SEQUENCE seq START 1""", """CREATE TEMPORARY SEQUENCE seq START WITH 1""", ) self.validate_identity( """CREATE TEMPORARY SEQUENCE seq START WITH = 1 INCREMENT BY = 2""", """CREATE TEMPORARY SEQUENCE seq START WITH 1 INCREMENT BY 2""", ) def test_reserved_keywords(self): order = exp.select("*").from_("order") for dialect in ("duckdb", "presto", "redshift"): dialect = Dialect.get_or_raise(dialect) self.assertEqual( order.sql(dialect=dialect), f"SELECT * FROM {dialect.IDENTIFIER_START}order{dialect.IDENTIFIER_END}", ) self.validate_identity( """SELECT partition.d FROM t PARTITION (d)""", """SELECT partition.d FROM t AS PARTITION(d)""", ) def test_string_functions(self): for pad_func in ("LPAD", "RPAD"): ch_alias = "LEFTPAD" if pad_func == "LPAD" else "RIGHTPAD" for fill_pattern in ("", ", ' '"): with self.subTest(f"Testing {pad_func}() with pattern {fill_pattern}"): self.validate_all( f"SELECT {pad_func}('bar', 5{fill_pattern})", read={ "snowflake": f"SELECT {pad_func}('bar', 5{fill_pattern})", "databricks": f"SELECT {pad_func}('bar', 5{fill_pattern})", "spark": f"SELECT {pad_func}('bar', 5{fill_pattern})", "postgres": f"SELECT {pad_func}('bar', 5{fill_pattern})", "clickhouse": f"SELECT {ch_alias}('bar', 5{fill_pattern})", }, write={ "": f"SELECT {pad_func}('bar', 5{fill_pattern})", "spark": f"SELECT {pad_func}('bar', 5{fill_pattern})", "postgres": f"SELECT {pad_func}('bar', 5{fill_pattern})", "clickhouse": f"SELECT {pad_func}('bar', 5{fill_pattern})", "snowflake": f"SELECT {pad_func}('bar', 5{fill_pattern})", "databricks": f"SELECT {pad_func}('bar', 5{fill_pattern})", "duckdb": f"SELECT {pad_func}('bar', 5, ' ')", "mysql": f"SELECT {pad_func}('bar', 5, ' ')", "hive": f"SELECT {pad_func}('bar', 5, ' ')", "spark2": f"SELECT {pad_func}('bar', 5, ' ')", "presto": f"SELECT {pad_func}('bar', 5, ' ')", "trino": f"SELECT {pad_func}('bar', 5, ' ')", }, ) def test_generate_date_array(self): self.validate_all( "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))", write={ "bigquery": "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", "databricks": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", "duckdb": "SELECT * FROM UNNEST(CAST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK) AS DATE[]))", "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", "postgres": "SELECT * FROM (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1 WEEK') AS _t(value)) AS _unnested_generate_series", "presto": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates", "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1))) AS _t0(seq, key, path, index, value, this))", "spark": "SELECT * FROM EXPLODE(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), INTERVAL '1' WEEK))", "trino": "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))", "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates", }, ) self.validate_all( "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates", write={ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), dates AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates) SELECT * FROM dates", }, ) self.validate_all( "WITH dates1 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))), dates2 AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-03-01', INTERVAL 1 MONTH))) SELECT * FROM dates1 CROSS JOIN dates2", write={ "mysql": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_value, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) FROM _generated_dates_1 WHERE CAST(DATE_ADD(date_value, INTERVAL 1 MONTH) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", "redshift": "WITH RECURSIVE _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", "tsql": "WITH _generated_dates(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_value) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_value) AS DATE) <= CAST('2020-02-01' AS DATE)), _generated_dates_1(date_value) AS (SELECT CAST('2020-01-01' AS DATE) AS date_value UNION ALL SELECT CAST(DATEADD(MONTH, 1, date_value) AS DATE) FROM _generated_dates_1 WHERE CAST(DATEADD(MONTH, 1, date_value) AS DATE) <= CAST('2020-03-01' AS DATE)), dates1 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates) AS _generated_dates), dates2 AS (SELECT * FROM (SELECT date_value AS date_value FROM _generated_dates_1) AS _generated_dates_1) SELECT * FROM dates1 CROSS JOIN dates2", }, ) self.validate_all( "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK)) AS _q(date_week)", write={ "mysql": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) FROM _generated_dates WHERE CAST(DATE_ADD(date_week, INTERVAL 1 WEEK) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", "redshift": "WITH RECURSIVE _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week FROM _generated_dates) AS _generated_dates", "snowflake": "SELECT * FROM (SELECT DATEADD(WEEK, CAST(date_week AS INT), CAST('2020-01-01' AS DATE)) AS date_week FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1))) AS _q(seq, key, path, index, date_week, this)) AS _q(date_week)", "tsql": "WITH _generated_dates(date_week) AS (SELECT CAST('2020-01-01' AS DATE) AS date_week UNION ALL SELECT CAST(DATEADD(WEEK, 1, date_week) AS DATE) FROM _generated_dates WHERE CAST(DATEADD(WEEK, 1, date_week) AS DATE) <= CAST('2020-02-01' AS DATE)) SELECT * FROM (SELECT date_week AS date_week FROM _generated_dates) AS _generated_dates", }, ) self.validate_all( "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))", write={ "snowflake": "SELECT ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(WEEK, CAST(value AS INT), CAST('2020-01-01' AS DATE)) AS value FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE)) + 1))) AS _t0(seq, key, path, index, value, this))))", }, ) def test_set_operation_specifiers(self): self.validate_all( "SELECT 1 EXCEPT ALL SELECT 1", write={ "": "SELECT 1 EXCEPT ALL SELECT 1", "bigquery": UnsupportedError, "clickhouse": "SELECT 1 EXCEPT SELECT 1", "databricks": "SELECT 1 EXCEPT ALL SELECT 1", "duckdb": "SELECT 1 EXCEPT ALL SELECT 1", "mysql": "SELECT 1 EXCEPT ALL SELECT 1", "oracle": "SELECT 1 EXCEPT ALL SELECT 1", "postgres": "SELECT 1 EXCEPT ALL SELECT 1", "presto": UnsupportedError, "redshift": UnsupportedError, "snowflake": UnsupportedError, "spark": "SELECT 1 EXCEPT ALL SELECT 1", "sqlite": UnsupportedError, "starrocks": UnsupportedError, "trino": "SELECT 1 EXCEPT ALL SELECT 1", "tsql": UnsupportedError, }, ) def test_normalize(self): for form in ("", ", nfkc"): with self.subTest(f"Testing NORMALIZE('str'{form}) roundtrip"): self.validate_all( f"SELECT NORMALIZE('str'{form})", read={ "presto": f"SELECT NORMALIZE('str'{form})", "trino": f"SELECT NORMALIZE('str'{form})", "bigquery": f"SELECT NORMALIZE('str'{form})", }, write={ "presto": f"SELECT NORMALIZE('str'{form})", "trino": f"SELECT NORMALIZE('str'{form})", "bigquery": f"SELECT NORMALIZE('str'{form})", }, ) self.assertIsInstance(parse_one("NORMALIZE('str', NFD)").args.get("form"), exp.Var) def test_coalesce(self): """ Validate that "expressions" is a list for all the exp.Coalesce instances; This is important as some optimizer rules are coalesce specific and will iterate on "expressions" """ # Check the 2-arg aliases for func in ("COALESCE", "IFNULL", "NVL"): self.assertIsInstance(self.parse_one(f"{func}(1, 2)").expressions, list) # Check the varlen case coalesce = self.parse_one("COALESCE(x, y, z)") self.assertIsInstance(coalesce.expressions, list) self.assertIsNone(coalesce.args.get("is_nvl")) # Check Oracle's NVL which is decoupled from COALESCE oracle_nvl = parse_one("NVL(x, y)", read="oracle") self.assertIsInstance(oracle_nvl.expressions, list) self.assertTrue(oracle_nvl.args.get("is_nvl")) # Check T-SQL's ISNULL which is parsed into exp.Coalesce self.assertIsInstance(parse_one("ISNULL(x, y)", read="tsql").expressions, list) def test_trim(self): self.validate_all( "TRIM('abc', 'a')", read={ "bigquery": "TRIM('abc', 'a')", "snowflake": "TRIM('abc', 'a')", "hive": "TRIM('abc', 'a')", "spark2": "TRIM('a', 'abc')", "spark": "TRIM('a', 'abc')", "databricks": "TRIM('a', 'abc')", }, write={ "bigquery": "TRIM('abc', 'a')", "snowflake": "TRIM('abc', 'a')", "hive": "TRIM('a' FROM 'abc')", "spark2": "TRIM('a' FROM 'abc')", "spark": "TRIM('a' FROM 'abc')", "databricks": "TRIM('a' FROM 'abc')", }, ) self.validate_all( "LTRIM('Hello World', 'H')", read={ "": "LTRIM('Hello World', 'H')", "oracle": "LTRIM('Hello World', 'H')", "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')", "snowflake": "LTRIM('Hello World', 'H')", "bigquery": "LTRIM('Hello World', 'H')", "hive": "LTRIM('Hello World', 'H')", "spark2": "LTRIM('H', 'Hello World')", "spark": "LTRIM('H', 'Hello World')", "databricks": "LTRIM('H', 'Hello World')", }, write={ "clickhouse": "TRIM(LEADING 'H' FROM 'Hello World')", "oracle": "LTRIM('Hello World', 'H')", "snowflake": "LTRIM('Hello World', 'H')", "bigquery": "LTRIM('Hello World', 'H')", "hive": "TRIM(LEADING 'H' FROM 'Hello World')", "spark2": "TRIM(LEADING 'H' FROM 'Hello World')", "spark": "TRIM(LEADING 'H' FROM 'Hello World')", "databricks": "TRIM(LEADING 'H' FROM 'Hello World')", }, ) self.validate_all( "RTRIM('Hello World', 'd')", read={ "": "RTRIM('Hello World', 'd')", "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')", "oracle": "RTRIM('Hello World', 'd')", "snowflake": "RTRIM('Hello World', 'd')", "bigquery": "RTRIM('Hello World', 'd')", "hive": "RTRIM('Hello World', 'd')", "spark2": "RTRIM('d', 'Hello World')", "spark": "RTRIM('d', 'Hello World')", "databricks": "RTRIM('d', 'Hello World')", }, write={ "clickhouse": "TRIM(TRAILING 'd' FROM 'Hello World')", "oracle": "RTRIM('Hello World', 'd')", "snowflake": "RTRIM('Hello World', 'd')", "bigquery": "RTRIM('Hello World', 'd')", "hive": "TRIM(TRAILING 'd' FROM 'Hello World')", "spark2": "TRIM(TRAILING 'd' FROM 'Hello World')", "spark": "TRIM(TRAILING 'd' FROM 'Hello World')", "databricks": "TRIM(TRAILING 'd' FROM 'Hello World')", }, ) self.validate_all( "LTRIM('Hello World')", read={ "": "LTRIM('Hello World')", "clickhouse": "LTRIM('Hello World')", "oracle": "LTRIM('Hello World')", "snowflake": "LTRIM('Hello World')", "bigquery": "LTRIM('Hello World')", "hive": "LTRIM('Hello World')", "spark2": "LTRIM('Hello World')", "spark": "LTRIM('Hello World')", "databricks": "LTRIM('Hello World')", }, write={ "clickhouse": "LTRIM('Hello World')", "oracle": "LTRIM('Hello World')", "snowflake": "LTRIM('Hello World')", "bigquery": "LTRIM('Hello World')", "hive": "LTRIM('Hello World')", "spark2": "LTRIM('Hello World')", "spark": "LTRIM('Hello World')", "databricks": "LTRIM('Hello World')", }, ) self.validate_all( "RTRIM('Hello World')", read={ "": "RTRIM('Hello World')", "clickhouse": "RTRIM('Hello World')", "oracle": "RTRIM('Hello World')", "snowflake": "RTRIM('Hello World')", "bigquery": "RTRIM('Hello World')", "hive": "RTRIM('Hello World')", "spark2": "RTRIM('Hello World')", "spark": "RTRIM('Hello World')", "databricks": "RTRIM('Hello World')", }, write={ "clickhouse": "RTRIM('Hello World')", "oracle": "RTRIM('Hello World')", "snowflake": "RTRIM('Hello World')", "bigquery": "RTRIM('Hello World')", "hive": "RTRIM('Hello World')", "spark2": "RTRIM('Hello World')", "spark": "RTRIM('Hello World')", "databricks": "RTRIM('Hello World')", }, ) def test_uuid(self): self.validate_all( "UUID()", read={ "hive": "UUID()", "spark2": "UUID()", "spark": "UUID()", "databricks": "UUID()", "duckdb": "UUID()", "presto": "UUID()", "trino": "UUID()", "mysql": "UUID()", "postgres": "GEN_RANDOM_UUID()", "snowflake": "UUID_STRING()", "tsql": "NEWID()", }, write={ "hive": "UUID()", "spark2": "UUID()", "spark": "UUID()", "databricks": "UUID()", "duckdb": "UUID()", "presto": "UUID()", "trino": "UUID()", "mysql": "UUID()", "postgres": "GEN_RANDOM_UUID()", "bigquery": "GENERATE_UUID()", "snowflake": "UUID_STRING()", "tsql": "NEWID()", }, ) def test_escaped_identifier_delimiter(self): for dialect in ("databricks", "hive", "mysql", "spark2", "spark"): with self.subTest(f"Testing escaped backtick in identifier name for {dialect}"): self.validate_all( 'SELECT 1 AS "x`"', read={ dialect: "SELECT 1 AS `x```", }, write={ dialect: "SELECT 1 AS `x```", }, ) for dialect in ( "", "clickhouse", "duckdb", "postgres", "presto", "trino", "redshift", "snowflake", "sqlite", ): with self.subTest(f"Testing escaped double-quote in identifier name for {dialect}"): self.validate_all( 'SELECT 1 AS "x"""', read={ dialect: 'SELECT 1 AS "x"""', }, write={ dialect: 'SELECT 1 AS "x"""', }, ) for dialect in ("clickhouse", "sqlite"): with self.subTest(f"Testing escaped backtick in identifier name for {dialect}"): self.validate_all( 'SELECT 1 AS "x`"', read={ dialect: "SELECT 1 AS `x```", }, write={ dialect: 'SELECT 1 AS "x`"', }, ) self.validate_all( 'SELECT 1 AS "x`"', read={ "clickhouse": "SELECT 1 AS `x\\``", }, write={ "clickhouse": 'SELECT 1 AS "x`"', }, ) for name in ('"x\\""', '`x"`'): with self.subTest(f"Testing ClickHouse delimiter escaping: {name}"): self.validate_all( 'SELECT 1 AS "x"""', read={ "clickhouse": f"SELECT 1 AS {name}", }, write={ "clickhouse": 'SELECT 1 AS "x"""', }, ) for name in ("[[x]]]", '"[x]"'): with self.subTest(f"Testing T-SQL delimiter escaping: {name}"): self.validate_all( 'SELECT 1 AS "[x]"', read={ "tsql": f"SELECT 1 AS {name}", }, write={ "tsql": "SELECT 1 AS [[x]]]", }, ) for name in ('[x"]', '"x"""'): with self.subTest(f"Testing T-SQL delimiter escaping: {name}"): self.validate_all( 'SELECT 1 AS "x"""', read={ "tsql": f"SELECT 1 AS {name}", }, write={ "tsql": 'SELECT 1 AS [x"]', }, ) def test_median(self): for suffix in ( "", " OVER ()", ): self.validate_all( f"MEDIAN(x){suffix}", read={ "snowflake": f"MEDIAN(x){suffix}", "duckdb": f"MEDIAN(x){suffix}", "spark": f"MEDIAN(x){suffix}", "databricks": f"MEDIAN(x){suffix}", "redshift": f"MEDIAN(x){suffix}", "oracle": f"MEDIAN(x){suffix}", }, write={ "snowflake": f"MEDIAN(x){suffix}", "duckdb": f"MEDIAN(x){suffix}", "spark": f"MEDIAN(x){suffix}", "databricks": f"MEDIAN(x){suffix}", "redshift": f"MEDIAN(x){suffix}", "oracle": f"MEDIAN(x){suffix}", "clickhouse": f"median(x){suffix}", "postgres": f"PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", }, ) def test_current_schema(self): self.validate_all( "CURRENT_SCHEMA()", read={ "mysql": "SCHEMA()", "postgres": "CURRENT_SCHEMA()", "tsql": "SCHEMA_NAME()", }, write={ "sqlite": "'main'", "mysql": "SCHEMA()", "postgres": "CURRENT_SCHEMA", "tsql": "SCHEMA_NAME()", }, ) def test_integer_hex_strings(self): # Hex strings such as 0xCC represent INTEGER values in the read dialects integer_dialects = ("bigquery", "clickhouse") for read_dialect in integer_dialects: for write_dialect in ( "", "duckdb", "databricks", "snowflake", "spark", "redshift", ): with self.subTest(f"Testing hex string -> INTEGER evaluation for {read_dialect}"): self.assertEqual( parse_one("SELECT 0xCC", read=read_dialect).sql(write_dialect), "SELECT 204" ) for other_integer_dialects in integer_dialects: self.assertEqual( parse_one("SELECT 0xCC", read=read_dialect).sql(other_integer_dialects), "SELECT 0xCC", ) def test_ascii(self): self.validate_all( "ASCII('A')", read={ "bigquery": "ASCII('A')", "clickhouse": "ASCII('A')", "databricks": "ASCII('A')", "hive": "ASCII('A')", "mysql": "ASCII('A')", "postgres": "ASCII('A')", "redshift": "ASCII('A')", "snowflake": "ASCII('A')", "tsql": "ASCII('A')", }, write={ "bigquery": "ASCII('A')", "clickhouse": "ASCII('A')", "databricks": "ASCII('A')", "hive": "ASCII('A')", "mysql": "ASCII('A')", "postgres": "ASCII('A')", "redshift": "ASCII('A')", "snowflake": "ASCII('A')", "tsql": "ASCII('A')", }, ) def test_between(self): between = exp.column("x").between(1, 2) self.assertEqual(between.sql("postgres"), "x BETWEEN 1 AND 2") self.assertEqual(between.sql("redshift"), "x BETWEEN 1 AND 2") self.assertFalse("symmetric" in between.args) self.validate_all( "SELECT x BETWEEN 2 AND 10", read={ "": "SELECT x BETWEEN 2 AND 10", "clickhouse": "SELECT x BETWEEN 2 AND 10", "dremio": "SELECT x BETWEEN 2 AND 10", "duckdb": "SELECT x BETWEEN 2 AND 10", "materialize": "SELECT x BETWEEN 2 AND 10", "mysql": "SELECT x BETWEEN 2 AND 10", "oracle": "SELECT x BETWEEN 2 AND 10", "postgres": "SELECT x BETWEEN 2 AND 10", "redshift": "SELECT x BETWEEN 2 AND 10", "risingwave": "SELECT x BETWEEN 2 AND 10", "tsql": "SELECT x BETWEEN 2 AND 10", }, write={ "": "SELECT x BETWEEN 2 AND 10", "clickhouse": "SELECT x BETWEEN 2 AND 10", "dremio": "SELECT x BETWEEN 2 AND 10", "duckdb": "SELECT x BETWEEN 2 AND 10", "materialize": "SELECT x BETWEEN 2 AND 10", "mysql": "SELECT x BETWEEN 2 AND 10", "oracle": "SELECT x BETWEEN 2 AND 10", "postgres": "SELECT x BETWEEN 2 AND 10", "redshift": "SELECT x BETWEEN 2 AND 10", "risingwave": "SELECT x BETWEEN 2 AND 10", "tsql": "SELECT x BETWEEN 2 AND 10", }, ) self.validate_all( "SELECT x BETWEEN SYMMETRIC 10 AND 2", write={ "": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "clickhouse": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "dremio": "SELECT x BETWEEN SYMMETRIC 10 AND 2", "duckdb": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "materialize": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "mysql": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "oracle": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "postgres": "SELECT x BETWEEN SYMMETRIC 10 AND 2", "redshift": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "risingwave": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", "tsql": "SELECT (x BETWEEN 10 AND 2 OR x BETWEEN 2 AND 10)", }, ) self.validate_all( "SELECT x BETWEEN ASYMMETRIC 10 AND 2", write={ "": "SELECT x BETWEEN 10 AND 2", "clickhouse": "SELECT x BETWEEN 10 AND 2", "dremio": "SELECT x BETWEEN ASYMMETRIC 10 AND 2", "duckdb": "SELECT x BETWEEN 10 AND 2", "materialize": "SELECT x BETWEEN 10 AND 2", "mysql": "SELECT x BETWEEN 10 AND 2", "oracle": "SELECT x BETWEEN 10 AND 2", "postgres": "SELECT x BETWEEN ASYMMETRIC 10 AND 2", "redshift": "SELECT x BETWEEN 10 AND 2", "risingwave": "SELECT x BETWEEN 10 AND 2", "tsql": "SELECT x BETWEEN 10 AND 2", }, ) def test_like_quantifiers(self): for quantifier in ("ANY", "ALL"): connector = "OR" if quantifier == "ANY" else "AND" with self.subTest(f"Testing LIKE {quantifier}"): self.validate_all( f"SELECT col LIKE {quantifier} (x, y, z)", read={ "": f"SELECT col LIKE {quantifier} (x, y, z)", "bigquery": f"SELECT col LIKE {quantifier} (x, y, z)", "snowflake": f"SELECT col LIKE {quantifier} (x, y, z)", "spark": f"SELECT col LIKE {quantifier} (x, y, z)", "databricks": f"SELECT col LIKE {quantifier} (x, y, z)", }, write={ "bigquery": f"SELECT col LIKE {quantifier} (x, y, z)", "snowflake": f"SELECT col LIKE {quantifier} (x, y, z)", "spark": f"SELECT col LIKE {quantifier} (x, y, z)", "databricks": f"SELECT col LIKE {quantifier} (x, y, z)", "duckdb": f"SELECT (col LIKE x {connector} col LIKE y) {connector} col LIKE z", }, ) with self.subTest(f"Testing ILIKE {quantifier}"): self.validate_all( f"SELECT col ILIKE {quantifier} (x, y, z)", write={ "": f"SELECT col ILIKE {quantifier} (x, y, z)", "duckdb": f"SELECT (col ILIKE x {connector} col ILIKE y) {connector} col ILIKE z", }, ) self.validate_all( "SELECT 'foo' LIKE ANY((('bar', 'fo%')))", write={ "": "SELECT 'foo' LIKE ANY((('bar', 'fo%')))", "duckdb": "SELECT 'foo' LIKE 'bar' OR 'foo' LIKE 'fo%'", }, ) def test_date_to_unix_date(self): self.validate_all( "DATE_FROM_UNIX_DATE(1)", write={ "": "DATE_ADD(CAST('1970-01-01' AS DATE), 1, 'DAY')", "bigquery": "DATE_FROM_UNIX_DATE(1)", "spark": "DATE_FROM_UNIX_DATE(1)", "databricks": "DATE_FROM_UNIX_DATE(1)", "snowflake": "DATEADD(DAY, 1, CAST('1970-01-01' AS DATE))", "duckdb": "CAST('1970-01-01' AS DATE) + INTERVAL 1 DAY", "redshift": "DATEADD(DAY, 1, CAST('1970-01-01' AS DATE))", "presto": "DATE_ADD('DAY', 1, CAST('1970-01-01' AS DATE))", "trino": "DATE_ADD('DAY', 1, CAST('1970-01-01' AS DATE))", }, ) def test_week_of_year(self): self.validate_all( "WEEKOFYEAR(CAST('2025-01-01' AS DATE))", write={ "duckdb": "WEEKOFYEAR(CAST('2025-01-01' AS DATE))", "exasol": "WEEK(CAST('2025-01-01' AS DATE))", "hive": "WEEKOFYEAR(CAST('2025-01-01' AS DATE))", "mysql": "WEEKOFYEAR(CAST('2025-01-01' AS DATE))", "spark": "WEEKOFYEAR(CAST('2025-01-01' AS DATE))", "snowflake": "WEEKISO(CAST('2025-01-01' AS DATE))", }, ) def test_justify(self): self.validate_all( "JUSTIFY_DAYS(INTERVAL '1' DAY)", read={ "": "JUSTIFY_DAYS(INTERVAL '1' DAY)", "bigquery": "JUSTIFY_DAYS(INTERVAL '1' DAY)", "postgres": "JUSTIFY_DAYS(INTERVAL '1 DAY')", "materialize": "JUSTIFY_DAYS(INTERVAL '1 DAY')", }, write={ "bigquery": "JUSTIFY_DAYS(INTERVAL '1' DAY)", "postgres": "JUSTIFY_DAYS(INTERVAL '1 DAY')", "materialize": "JUSTIFY_DAYS(INTERVAL '1 DAY')", }, ) self.validate_all( "JUSTIFY_HOURS(INTERVAL '1' HOUR)", read={ "": "JUSTIFY_HOURS(INTERVAL '1' HOUR)", "bigquery": "JUSTIFY_HOURS(INTERVAL '1' HOUR)", "postgres": "JUSTIFY_HOURS(INTERVAL '1 HOUR')", "materialize": "JUSTIFY_HOURS(INTERVAL '1 HOUR')", }, write={ "bigquery": "JUSTIFY_HOURS(INTERVAL '1' HOUR)", "postgres": "JUSTIFY_HOURS(INTERVAL '1 HOUR')", "materialize": "JUSTIFY_HOURS(INTERVAL '1 HOUR')", }, ) self.validate_all( "JUSTIFY_INTERVAL(INTERVAL '1' HOUR)", read={ "": "JUSTIFY_INTERVAL(INTERVAL '1' HOUR)", "bigquery": "JUSTIFY_INTERVAL(INTERVAL '1' HOUR)", "postgres": "JUSTIFY_INTERVAL(INTERVAL '1 HOUR')", "materialize": "JUSTIFY_INTERVAL(INTERVAL '1 HOUR')", }, write={ "bigquery": "JUSTIFY_INTERVAL(INTERVAL '1' HOUR)", "postgres": "JUSTIFY_INTERVAL(INTERVAL '1 HOUR')", "materialize": "JUSTIFY_INTERVAL(INTERVAL '1 HOUR')", }, ) def test_unix_time(self): self.validate_all( "UNIX_MICROS(foo)", read={ "": "UNIX_MICROS(foo)", "bigquery": "UNIX_MICROS(foo)", "spark": "UNIX_MICROS(foo)", "databricks": "UNIX_MICROS(foo)", }, write={ "bigquery": "UNIX_MICROS(foo)", "spark": "UNIX_MICROS(foo)", "databricks": "UNIX_MICROS(foo)", }, ) self.validate_all( "UNIX_MILLIS(foo)", read={ "": "UNIX_MILLIS(foo)", "bigquery": "UNIX_MILLIS(foo)", "spark": "UNIX_MILLIS(foo)", "databricks": "UNIX_MILLIS(foo)", }, write={ "bigquery": "UNIX_MILLIS(foo)", "spark": "UNIX_MILLIS(foo)", "databricks": "UNIX_MILLIS(foo)", }, ) def test_reverse(self): self.validate_all( "REVERSE(x)", read={ "": "REVERSE(x)", "bigquery": "REVERSE(x)", "hive": "REVERSE(x)", "spark2": "REVERSE(x)", "spark": "REVERSE(x)", "databricks": "REVERSE(x)", "mysql": "REVERSE(x)", "postgres": "REVERSE(x)", "tsql": "REVERSE(x)", "snowflake": "REVERSE(x)", "doris": "REVERSE(x)", "presto": "REVERSE(x)", "trino": "REVERSE(x)", "clickhouse": "REVERSE(x)", "redshift": "REVERSE(x)", }, write={ "bigquery": "REVERSE(x)", "hive": "REVERSE(x)", "spark2": "REVERSE(x)", "spark": "REVERSE(x)", "databricks": "REVERSE(x)", "mysql": "REVERSE(x)", "postgres": "REVERSE(x)", "tsql": "REVERSE(x)", "snowflake": "REVERSE(x)", "doris": "REVERSE(x)", "presto": "REVERSE(x)", "trino": "REVERSE(x)", "clickhouse": "REVERSE(x)", "redshift": "REVERSE(x)", }, ) def test_regr_count(self): self.validate_all( "REGR_COUNT(x, y)", read={ "": "REGR_COUNT(x, y)", "databricks": "REGR_COUNT(x, y)", "duckdb": "REGR_COUNT(x, y)", "exasol": "REGR_COUNT(x, y)", "hive": "REGR_COUNT(x, y)", "oracle": "REGR_COUNT(x, y)", "postgres": "REGR_COUNT(x, y)", "presto": "REGR_COUNT(x, y)", "snowflake": "REGR_COUNT(x, y)", "spark": "REGR_COUNT(x, y)", "teradata": "REGR_COUNT(x, y)", "trino": "REGR_COUNT(x, y)", }, write={ "": "REGR_COUNT(x, y)", "databricks": "REGR_COUNT(x, y)", "duckdb": "REGR_COUNT(x, y)", "exasol": "REGR_COUNT(x, y)", "hive": "REGR_COUNT(x, y)", "oracle": "REGR_COUNT(x, y)", "postgres": "REGR_COUNT(x, y)", "presto": "REGR_COUNT(x, y)", "snowflake": "REGR_COUNT(x, y)", "spark": "REGR_COUNT(x, y)", "teradata": "REGR_COUNT(x, y)", "trino": "REGR_COUNT(x, y)", }, ) def test_regr_intercept(self): self.validate_all( "REGR_INTERCEPT(x, y)", read={ "": "REGR_INTERCEPT(x, y)", "databricks": "REGR_INTERCEPT(x, y)", "duckdb": "REGR_INTERCEPT(x, y)", "exasol": "REGR_INTERCEPT(x, y)", "hive": "REGR_INTERCEPT(x, y)", "oracle": "REGR_INTERCEPT(x, y)", "postgres": "REGR_INTERCEPT(x, y)", "presto": "REGR_INTERCEPT(x, y)", "snowflake": "REGR_INTERCEPT(x, y)", "spark": "REGR_INTERCEPT(x, y)", "teradata": "REGR_INTERCEPT(x, y)", }, write={ "": "REGR_INTERCEPT(x, y)", "databricks": "REGR_INTERCEPT(x, y)", "duckdb": "REGR_INTERCEPT(x, y)", "exasol": "REGR_INTERCEPT(x, y)", "hive": "REGR_INTERCEPT(x, y)", "oracle": "REGR_INTERCEPT(x, y)", "postgres": "REGR_INTERCEPT(x, y)", "presto": "REGR_INTERCEPT(x, y)", "snowflake": "REGR_INTERCEPT(x, y)", "spark": "REGR_INTERCEPT(x, y)", "teradata": "REGR_INTERCEPT(x, y)", }, ) def test_regr_r2(self): self.validate_all( "REGR_R2(x, y)", read={ "": "REGR_R2(x, y)", "databricks": "REGR_R2(x, y)", "duckdb": "REGR_R2(x, y)", "exasol": "REGR_R2(x, y)", "hive": "REGR_R2(x, y)", "oracle": "REGR_R2(x, y)", "postgres": "REGR_R2(x, y)", "presto": "REGR_R2(x, y)", "snowflake": "REGR_R2(x, y)", "spark": "REGR_R2(x, y)", "teradata": "REGR_R2(x, y)", }, write={ "": "REGR_R2(x, y)", "databricks": "REGR_R2(x, y)", "duckdb": "REGR_R2(x, y)", "exasol": "REGR_R2(x, y)", "hive": "REGR_R2(x, y)", "oracle": "REGR_R2(x, y)", "postgres": "REGR_R2(x, y)", "presto": "REGR_R2(x, y)", "snowflake": "REGR_R2(x, y)", "spark": "REGR_R2(x, y)", "teradata": "REGR_R2(x, y)", }, ) def test_regr_slope(self): self.validate_all( "REGR_SLOPE(x, y)", read={ "": "REGR_SLOPE(x, y)", "databricks": "REGR_SLOPE(x, y)", "duckdb": "REGR_SLOPE(x, y)", "exasol": "REGR_SLOPE(x, y)", "oracle": "REGR_SLOPE(x, y)", "postgres": "REGR_SLOPE(x, y)", "presto": "REGR_SLOPE(x, y)", "snowflake": "REGR_SLOPE(x, y)", "spark": "REGR_SLOPE(x, y)", "teradata": "REGR_SLOPE(x, y)", "trino": "REGR_SLOPE(x, y)", }, write={ "": "REGR_SLOPE(x, y)", "databricks": "REGR_SLOPE(x, y)", "duckdb": "REGR_SLOPE(x, y)", "exasol": "REGR_SLOPE(x, y)", "oracle": "REGR_SLOPE(x, y)", "postgres": "REGR_SLOPE(x, y)", "presto": "REGR_SLOPE(x, y)", "snowflake": "REGR_SLOPE(x, y)", "spark": "REGR_SLOPE(x, y)", "teradata": "REGR_SLOPE(x, y)", "trino": "REGR_SLOPE(x, y)", }, ) def test_regr_sxx(self): self.validate_all( "REGR_SXX(x, y)", read={ "": "REGR_SXX(x, y)", "databricks": "REGR_SXX(x, y)", "duckdb": "REGR_SXX(x, y)", "exasol": "REGR_SXX(x, y)", "hive": "REGR_SXX(x, y)", "oracle": "REGR_SXX(x, y)", "postgres": "REGR_SXX(x, y)", "presto": "REGR_SXX(x, y)", "snowflake": "REGR_SXX(x, y)", "spark": "REGR_SXX(x, y)", "teradata": "REGR_SXX(x, y)", }, write={ "": "REGR_SXX(x, y)", "databricks": "REGR_SXX(x, y)", "duckdb": "REGR_SXX(x, y)", "exasol": "REGR_SXX(x, y)", "hive": "REGR_SXX(x, y)", "oracle": "REGR_SXX(x, y)", "postgres": "REGR_SXX(x, y)", "presto": "REGR_SXX(x, y)", "snowflake": "REGR_SXX(x, y)", "spark": "REGR_SXX(x, y)", "teradata": "REGR_SXX(x, y)", }, ) def test_regr_sxy(self): self.validate_all( "REGR_SXY(x, y)", read={ "": "REGR_SXY(x, y)", "databricks": "REGR_SXY(x, y)", "duckdb": "REGR_SXY(x, y)", "exasol": "REGR_SXY(x, y)", "hive": "REGR_SXY(x, y)", "oracle": "REGR_SXY(x, y)", "postgres": "REGR_SXY(x, y)", "presto": "REGR_SXY(x, y)", "snowflake": "REGR_SXY(x, y)", "spark": "REGR_SXY(x, y)", "teradata": "REGR_SXY(x, y)", }, write={ "": "REGR_SXY(x, y)", "databricks": "REGR_SXY(x, y)", "duckdb": "REGR_SXY(x, y)", "exasol": "REGR_SXY(x, y)", "hive": "REGR_SXY(x, y)", "oracle": "REGR_SXY(x, y)", "postgres": "REGR_SXY(x, y)", "presto": "REGR_SXY(x, y)", "snowflake": "REGR_SXY(x, y)", "spark": "REGR_SXY(x, y)", "teradata": "REGR_SXY(x, y)", }, ) def test_regr_syy(self): self.validate_all( "REGR_SYY(x, y)", read={ "": "REGR_SYY(x, y)", "databricks": "REGR_SYY(x, y)", "duckdb": "REGR_SYY(x, y)", "exasol": "REGR_SYY(x, y)", "hive": "REGR_SYY(x, y)", "oracle": "REGR_SYY(x, y)", "postgres": "REGR_SYY(x, y)", "presto": "REGR_SYY(x, y)", "snowflake": "REGR_SYY(x, y)", "spark": "REGR_SYY(x, y)", "teradata": "REGR_SYY(x, y)", }, write={ "": "REGR_SYY(x, y)", "databricks": "REGR_SYY(x, y)", "duckdb": "REGR_SYY(x, y)", "exasol": "REGR_SYY(x, y)", "hive": "REGR_SYY(x, y)", "oracle": "REGR_SYY(x, y)", "postgres": "REGR_SYY(x, y)", "presto": "REGR_SYY(x, y)", "snowflake": "REGR_SYY(x, y)", "spark": "REGR_SYY(x, y)", "teradata": "REGR_SYY(x, y)", }, ) def test_translate(self): self.validate_all( "TRANSLATE(x, y, z)", read={ "": "TRANSLATE(x, y, z)", "bigquery": "TRANSLATE(x, y, z)", "hive": "TRANSLATE(x, y, z)", "spark2": "TRANSLATE(x, y, z)", "spark": "TRANSLATE(x, y, z)", "databricks": "TRANSLATE(x, y, z)", "postgres": "TRANSLATE(x, y, z)", "tsql": "TRANSLATE(x, y, z)", "snowflake": "TRANSLATE(x, y, z)", "doris": "TRANSLATE(x, y, z)", "trino": "TRANSLATE(x, y, z)", "clickhouse": "TRANSLATE(x, y, z)", "redshift": "TRANSLATE(x, y, z)", "oracle": "TRANSLATE(x, y, z)", }, write={ "": "TRANSLATE(x, y, z)", "bigquery": "TRANSLATE(x, y, z)", "hive": "TRANSLATE(x, y, z)", "spark2": "TRANSLATE(x, y, z)", "spark": "TRANSLATE(x, y, z)", "databricks": "TRANSLATE(x, y, z)", "postgres": "TRANSLATE(x, y, z)", "tsql": "TRANSLATE(x, y, z)", "snowflake": "TRANSLATE(x, y, z)", "doris": "TRANSLATE(x, y, z)", "trino": "TRANSLATE(x, y, z)", "clickhouse": "TRANSLATE(x, y, z)", "redshift": "TRANSLATE(x, y, z)", "oracle": "TRANSLATE(x, y, z)", }, ) def test_soundex(self): self.validate_all( "SOUNDEX(x)", read={ "": "SOUNDEX(x)", "bigquery": "SOUNDEX(x)", "hive": "SOUNDEX(x)", "spark2": "SOUNDEX(x)", "spark": "SOUNDEX(x)", "databricks": "SOUNDEX(x)", "mysql": "SOUNDEX(x)", "postgres": "SOUNDEX(x)", "tsql": "SOUNDEX(x)", "snowflake": "SOUNDEX(x)", "dremio": "SOUNDEX(x)", "trino": "SOUNDEX(x)", "clickhouse": "SOUNDEX(x)", "redshift": "SOUNDEX(x)", "oracle": "SOUNDEX(x)", }, write={ "bigquery": "SOUNDEX(x)", "hive": "SOUNDEX(x)", "spark2": "SOUNDEX(x)", "spark": "SOUNDEX(x)", "databricks": "SOUNDEX(x)", "mysql": "SOUNDEX(x)", "postgres": "SOUNDEX(x)", "tsql": "SOUNDEX(x)", "snowflake": "SOUNDEX(x)", "dremio": "SOUNDEX(x)", "trino": "SOUNDEX(x)", "clickhouse": "SOUNDEX(x)", "redshift": "SOUNDEX(x)", "oracle": "SOUNDEX(x)", }, ) def test_grouping(self): self.validate_all( "GROUPING(x)", read={ "": "GROUPING(x)", "bigquery": "GROUPING(x)", "hive": "GROUPING(x)", "spark2": "GROUPING(x)", "spark": "GROUPING(x)", "databricks": "GROUPING(x)", "mysql": "GROUPING(x)", "postgres": "GROUPING(x)", "tsql": "GROUPING(x)", "snowflake": "GROUPING(x)", "clickhouse": "GROUPING(x)", "redshift": "GROUPING(x)", "oracle": "GROUPING(x)", }, write={ "bigquery": "GROUPING(x)", "hive": "GROUPING(x)", "spark2": "GROUPING(x)", "spark": "GROUPING(x)", "databricks": "GROUPING(x)", "mysql": "GROUPING(x)", "postgres": "GROUPING(x)", "tsql": "GROUPING(x)", "snowflake": "GROUPING(x)", "clickhouse": "GROUPING(x)", "redshift": "GROUPING(x)", "oracle": "GROUPING(x)", }, ) self.validate_all( "GROUPING(col1, col2, col3)", read={ "": "GROUPING(col1, col2, col3)", "snowflake": "GROUPING(col1, col2, col3)", "mysql": "GROUPING(col1, col2, col3)", "postgres": "GROUPING(col1, col2, col3)", "clickhouse": "GROUPING(col1, col2, col3)", "redshift": "GROUPING(col1, col2, col3)", }, write={ "snowflake": "GROUPING(col1, col2, col3)", "mysql": "GROUPING(col1, col2, col3)", "postgres": "GROUPING(col1, col2, col3)", "clickhouse": "GROUPING(col1, col2, col3)", "redshift": "GROUPING(col1, col2, col3)", }, ) def test_farm_fingerprint(self): self.validate_all( "FARM_FINGERPRINT(x)", read={ "": "FARM_FINGERPRINT(x)", "bigquery": "FARM_FINGERPRINT(x)", "clickhouse": "farmFingerprint64(x)", "redshift": "FARMFINGERPRINT64(x)", }, write={ "bigquery": "FARM_FINGERPRINT(x)", "clickhouse": "farmFingerprint64(x)", "redshift": "FARMFINGERPRINT64(x)", }, ) def test_from_to_base32(self): self.validate_all( "FROM_BASE32(x)", read={ "": "FROM_BASE32(x)", "bigquery": "FROM_BASE32(x)", "presto": "FROM_BASE32(x)", "trino": "FROM_BASE32(x)", }, write={ "bigquery": "FROM_BASE32(x)", "presto": "FROM_BASE32(x)", "trino": "FROM_BASE32(x)", }, ) self.validate_all( "TO_BASE32(x)", read={ "": "TO_BASE32(x)", "bigquery": "TO_BASE32(x)", "presto": "TO_BASE32(x)", "trino": "TO_BASE32(x)", }, write={ "bigquery": "TO_BASE32(x)", "presto": "TO_BASE32(x)", "trino": "TO_BASE32(x)", }, ) def test_regexp_instr(self): self.validate_all( "REGEXP_INSTR(src, reg)", read={ "": "REGEXP_INSTR(src, reg)", "bigquery": "REGEXP_INSTR(src, reg)", "snowflake": "REGEXP_INSTR(src, reg)", "oracle": "REGEXP_INSTR(src, reg)", "spark": "REGEXP_INSTR(src, reg)", "databricks": "REGEXP_INSTR(src, reg)", "tsql": "REGEXP_INSTR(src, reg)", "mysql": "REGEXP_INSTR(src, reg)", "postgres": "REGEXP_INSTR(src, reg)", "redshift": "REGEXP_INSTR(src, reg)", }, write={ "bigquery": "REGEXP_INSTR(src, reg)", "snowflake": "REGEXP_INSTR(src, reg)", "oracle": "REGEXP_INSTR(src, reg)", "spark": "REGEXP_INSTR(src, reg)", "databricks": "REGEXP_INSTR(src, reg)", "tsql": "REGEXP_INSTR(src, reg)", "mysql": "REGEXP_INSTR(src, reg)", "postgres": "REGEXP_INSTR(src, reg)", "redshift": "REGEXP_INSTR(src, reg)", }, ) self.validate_all( "REGEXP_INSTR(src, reg, pos, occ, opt)", read={ "": "REGEXP_INSTR(src, reg, pos, occ, opt)", "bigquery": "REGEXP_INSTR(src, reg, pos, occ, opt)", "snowflake": "REGEXP_INSTR(src, reg, pos, occ, opt)", "oracle": "REGEXP_INSTR(src, reg, pos, occ, opt)", "tsql": "REGEXP_INSTR(src, reg, pos, occ, opt)", "mysql": "REGEXP_INSTR(src, reg, pos, occ, opt)", "postgres": "REGEXP_INSTR(src, reg, pos, occ, opt)", "redshift": "REGEXP_INSTR(src, reg, pos, occ, opt)", }, write={ "bigquery": "REGEXP_INSTR(src, reg, pos, occ, opt)", "snowflake": "REGEXP_INSTR(src, reg, pos, occ, opt)", "oracle": "REGEXP_INSTR(src, reg, pos, occ, opt)", "tsql": "REGEXP_INSTR(src, reg, pos, occ, opt)", "mysql": "REGEXP_INSTR(src, reg, pos, occ, opt)", "postgres": "REGEXP_INSTR(src, reg, pos, occ, opt)", "redshift": "REGEXP_INSTR(src, reg, pos, occ, opt)", }, ) self.validate_all( "REGEXP_INSTR(src, reg, pos, occ, opt, par)", read={ "": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "snowflake": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "oracle": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "tsql": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "mysql": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "postgres": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "redshift": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", }, write={ "snowflake": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "oracle": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "tsql": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "mysql": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "postgres": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", "redshift": "REGEXP_INSTR(src, reg, pos, occ, opt, par)", }, ) self.validate_all( "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", read={ "": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", "snowflake": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", "oracle": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", "tsql": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", "postgres": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", }, write={ "snowflake": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", "oracle": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", "tsql": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", "postgres": "REGEXP_INSTR(src, reg, pos, occ, opt, par, grp)", }, ) def test_format(self): self.validate_all( "FORMAT('str fmt1 fmt2', 1, 'a')", read={ "": "FORMAT('str fmt1 fmt2', 1, 'a')", "bigquery": "FORMAT('str fmt1 fmt2', 1, 'a')", "postgres": "FORMAT('str fmt1 fmt2', 1, 'a')", "duckdb": "FORMAT('str fmt1 fmt2', 1, 'a')", }, write={ "bigquery": "FORMAT('str fmt1 fmt2', 1, 'a')", "postgres": "FORMAT('str fmt1 fmt2', 1, 'a')", "spark2": "FORMAT_STRING('str fmt1 fmt2', 1, 'a')", "spark": "FORMAT_STRING('str fmt1 fmt2', 1, 'a')", "databricks": "FORMAT_STRING('str fmt1 fmt2', 1, 'a')", "duckdb": "FORMAT('str fmt1 fmt2', 1, 'a')", }, ) def test_json_array_append(self): self.validate_all( """JSON_ARRAY_APPEND(PARSE_JSON('["a", "b", "c"]'), '$', 1)""", read={ "": """JSON_ARRAY_APPEND(PARSE_JSON('["a", "b", "c"]'), '$', 1)""", "bigquery": """JSON_ARRAY_APPEND(PARSE_JSON('["a", "b", "c"]'), '$', 1)""", }, write={ "bigquery": """JSON_ARRAY_APPEND(PARSE_JSON('["a", "b", "c"]'), '$', 1)""", "mysql": """JSON_ARRAY_APPEND('["a", "b", "c"]', '$', 1)""", }, ) def test_json_array_insert(self): self.validate_all( """JSON_ARRAY_INSERT(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', 1)""", read={ "": """JSON_ARRAY_INSERT(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', 1)""", "bigquery": """JSON_ARRAY_INSERT(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', 1)""", }, write={ "bigquery": """JSON_ARRAY_INSERT(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', 1)""", "mysql": """JSON_ARRAY_INSERT('["a", ["b", "c"], "d"]', '$[1]', 1)""", }, ) def test_json_remove(self): self.validate_all( """JSON_REMOVE(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', '$[1]')""", read={ "": """JSON_REMOVE(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', '$[1]')""", "bigquery": """JSON_REMOVE(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', '$[1]')""", }, write={ "bigquery": """JSON_REMOVE(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', '$[1]')""", "mysql": """JSON_REMOVE('["a", ["b", "c"], "d"]', '$[1]', '$[1]')""", "sqlite": """JSON_REMOVE('["a", ["b", "c"], "d"]', '$[1]', '$[1]')""", }, ) def test_json_set(self): self.validate_all( """JSON_SET(PARSE_JSON('{"a": 1}'), '$', PARSE_JSON('{"b": 2, "c": 3}'))""", read={ "": """JSON_SET(PARSE_JSON('{"a": 1}'), '$', PARSE_JSON('{"b": 2, "c": 3}'))""", "bigquery": """JSON_SET(PARSE_JSON('{"a": 1}'), '$', PARSE_JSON('{"b": 2, "c": 3}'))""", }, write={ "bigquery": """JSON_SET(PARSE_JSON('{"a": 1}'), '$', PARSE_JSON('{"b": 2, "c": 3}'))""", "mysql": """JSON_SET('{"a": 1}', '$', '{"b": 2, "c": 3}')""", "sqlite": """JSON_SET('{"a": 1}', '$', '{"b": 2, "c": 3}')""", "doris": """JSON_SET('{"a": 1}', '$', '{"b": 2, "c": 3}')""", }, ) def test_json_strip_nulls(self): self.validate_all( """JSON_STRIP_NULLS(PARSE_JSON('[{"f1":1,"f2":null},2,null,3]'))""", read={ "": """JSON_STRIP_NULLS(PARSE_JSON('[{"f1":1,"f2":null},2,null,3]'))""", "bigquery": """JSON_STRIP_NULLS(PARSE_JSON('[{"f1":1,"f2":null},2,null,3]'))""", }, write={ "bigquery": """JSON_STRIP_NULLS(PARSE_JSON('[{"f1":1,"f2":null},2,null,3]'))""", "postgres": """JSON_STRIP_NULLS(CAST('[{"f1":1,"f2":null},2,null,3]' AS JSON))""", }, ) def test_is_unknown(self): # In many dialects `<...> IS UNKNOWN` is equivalent to `<...> IS NULL` self.validate_all( "x IS NULL", read={ "": "x IS UNKNOWN", "bigquery": "x IS UNKNOWN", "mysql": "x IS UNKNOWN", "postgres": "x IS UNKNOWN", "redshift": "x IS UNKNOWN", "duckdb": "x IS UNKNOWN", "spark": "x IS UNKNOWN", "databricks": "x IS UNKNOWN", }, ) self.validate_all( "NOT x IS NULL", read={ "": "x IS NOT UNKNOWN", "bigquery": "x IS NOT UNKNOWN", "mysql": "x IS NOT UNKNOWN", "postgres": "x IS NOT UNKNOWN", "redshift": "x IS NOT UNKNOWN", "duckdb": "x IS NOT UNKNOWN", "spark": "x IS NOT UNKNOWN", "databricks": "x IS NOT UNKNOWN", }, ) def test_is_with_dcolon(self): self.validate_all( "SELECT CAST(col IS NULL AS BOOLEAN) FROM (SELECT 1 AS col) AS t", read={ "": "SELECT col IS NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", "duckdb": "SELECT col IS NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", "redshift": "SELECT col IS NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", "postgres": "SELECT col IS NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", }, ) self.validate_all( "SELECT CAST(NOT col IS NULL AS BOOLEAN) FROM (SELECT 1 AS col) AS t", read={ "": "SELECT col IS NOT NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", "duckdb": "SELECT col IS NOT NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", "redshift": "SELECT col IS NOT NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", "postgres": "SELECT col IS NOT NULL::BOOLEAN FROM (SELECT 1 AS col) AS t", }, ) def test_regexp_replace(self): for target_dialect in ("postgres", "duckdb"): # Transpilations from other dialects to Postgres or DuckDB should append 'g' # since their semantics is to replace all occurrences of the pattern. for read_dialect in ("", "bigquery", "presto", "trino", "spark", "databricks"): with self.subTest( f"Testing REGEXP_REPLACE appending 'g' flag from {read_dialect} to {target_dialect}" ): sql = parse_one("REGEXP_REPLACE('aaa', 'a', 'b')", read=read_dialect).sql( target_dialect ) self.assertEqual(sql, "REGEXP_REPLACE('aaa', 'a', 'b', 'g')") def test_subquery_unwrap(self): self.validate_identity( "WITH sub_query AS (SELECT a FROM table) (SELECT a FROM sub_query)", "WITH sub_query AS (SELECT a FROM table) SELECT a FROM sub_query", ) self.validate_identity( "WITH sub_query AS (SELECT a FROM table) ((((SELECT a FROM sub_query))))", "WITH sub_query AS (SELECT a FROM table) SELECT a FROM sub_query", ) def test_initcap(self): delimiter_chars = { "": Dialect.INITCAP_DEFAULT_DELIMITER_CHARS, "bigquery": BigQuery.INITCAP_DEFAULT_DELIMITER_CHARS, "snowflake": Snowflake.INITCAP_DEFAULT_DELIMITER_CHARS, "spark": Spark2.INITCAP_DEFAULT_DELIMITER_CHARS, } with self.subTest("INITCAP without explicit delimiters"): self.assertEqual(exp.Initcap(this=exp.Literal.string("col")).sql(), "INITCAP('col')") self.assertEqual(exp.Initcap(this=exp.column("col")).sql(), "INITCAP(col)") for dialect in delimiter_chars: with self.subTest(f"Round-tripping default delimiters for {dialect or 'default'}"): self.assertEqual( parse_one("INITCAP(col)", read=dialect).sql(dialect), "INITCAP(col)" ) for read_dialect in ("", "spark"): for write_dialect in ("bigquery", "snowflake"): with self.subTest( f"Default delimiters emitted from {read_dialect or 'default'} to {write_dialect}" ): escaped_delimiters = exp.Literal.string(delimiter_chars[read_dialect]).sql( write_dialect ) self.assertEqual( parse_one("INITCAP(col)", read=read_dialect).sql(write_dialect), f"INITCAP(col, {escaped_delimiters})", ) def assert_default_duckdb_sql(read_dialect: str, default_chars: str) -> None: chr_chars = [char for char in WS_CONTROL_CHARS_TO_DUCK if char in default_chars] expression = parse_one("INITCAP(col)", read=read_dialect) self.assert_duckdb_sql( expression, includes=("ARRAY_TO_STRING(", "REGEXP_MATCHES(", "LIST_TRANSFORM("), chr_chars=chr_chars, ) for dialect, default_chars in delimiter_chars.items(): with self.subTest(f"DuckDB rewrite for {dialect or 'default'} default delimiters"): assert_default_duckdb_sql(dialect, default_chars) def assert_custom_duckdb_sql( query: str, *, includes: t.Optional[Iterable[str]] = None, excludes: t.Optional[Iterable[str]] = None, chr_chars: t.Optional[Iterable[str]] = None, ) -> None: for dialect in ("bigquery", "snowflake"): with self.subTest(f"DuckDB generation for {query} from {dialect}"): expression = parse_one(query, read=dialect) self.assert_duckdb_sql( expression, includes=includes, excludes=excludes, chr_chars=chr_chars ) assert_custom_duckdb_sql( "INITCAP(col, '')", includes=("UPPER(LEFT(",), excludes=("REGEXP_MATCHES(",) ) assert_custom_duckdb_sql("INITCAP(col, NULL)", includes=("REGEXP_MATCHES(", "REPLACE(")) assert_custom_duckdb_sql("INITCAP(col, ' ')", includes=("' '",)) assert_custom_duckdb_sql("INITCAP(col, '@')", includes=("'@'",), excludes=("CHR(",)) assert_custom_duckdb_sql("INITCAP(col, '_@')", includes=("'_@'",)) assert_custom_duckdb_sql(r"INITCAP(col, '\\\\')", includes=("\\\\",)) assert_custom_duckdb_sql( "INITCAP(col, '\u000b')", chr_chars=("\u000b",), ) assert_custom_duckdb_sql( "INITCAP(col, (SELECT delimiter FROM settings LIMIT 1))", includes=("SELECT delimiter FROM settings", "REPLACE("), ) def test_initcap_custom_delimiter_warning(self): expression = parse_one("INITCAP(col, '_')", read="bigquery") for dialect in ("postgres", "presto"): with self.subTest(f"INITCAP unsupported custom delimiters warning for {dialect}"): with self.assertLogs(generator_logger, level="WARNING") as cm: expression.sql(dialect) self.assertIn("INITCAP does not support custom delimiters", cm.output[0]) def test_parse_at_time_zone(self): parsed_expr = self.validate_identity( "SELECT CAST('2001-02-17 08:38:40' AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE 'Asia/Tokyo'" ).expressions[0] self.assertEqual(parsed_expr.args.get("zone").sql(), "'Asia/Tokyo'") self.assertEqual(parsed_expr.this.args.get("zone").sql(), "'UTC'") parsed_expr = self.validate_identity( "SELECT CAST('2001-02-17 08:38:40' AS TIMESTAMP) AT TIME ZONE INTERVAL '3' HOURS AT TIME ZONE 'Asia/Tokyo'" ).expressions[0] self.assertEqual(parsed_expr.args.get("zone").sql("postgres"), "'Asia/Tokyo'") self.assertEqual(parsed_expr.this.args.get("zone").sql("postgres"), "INTERVAL '3 HOURS'") def test_underscore_scientific_notation(self): for dialect in ("duckdb", "clickhouse"): for notation in ("e", "E"): for sign in ("", "-", "+"): with self.subTest(f"Testing notation: {notation}, sign: {sign} for {dialect}"): number = f"1_2{notation}{sign}1_0" expected = f"12{notation}{sign}10" self.assertEqual(parse_one(number, read=dialect).sql(dialect), expected) number = f"12.3_4{notation}{sign}5_6_7" expected = f"12.34{notation}{sign}567" self.assertEqual(parse_one(number, read=dialect).sql(dialect), expected) with self.subTest(f"Testing underscore separated numbers for {dialect}"): ast = parse_one("1_2_3_4_5", read=dialect) self.assertTrue(ast.is_int) self.assertEqual(ast.to_py(), 12345) self.assertEqual(ast.sql(dialect), "12345") def test_localtime_and_localtimestamp(self): for func in ("LOCALTIME", "LOCALTIMESTAMP"): with self.subTest(f"Testing {func}"): dialects = { "postgres": f"SELECT {func}", "duckdb": f"SELECT {func}", "redshift": f"SELECT {func}", "presto": f"SELECT {func}", "trino": f"SELECT {func}", "mysql": f"SELECT {func}", "singlestore": f"SELECT {func}", } if func == "LOCALTIMESTAMP": dialects["oracle"] = f"SELECT {func}" self.validate_all( f"SELECT {func}", read=dialects, write=dialects, ) with self.subTest(f"Testing {func} with precision"): dialects = { "postgres": f"SELECT {func}(2)", "redshift": f"SELECT {func}(2)", "presto": f"SELECT {func}(2)", "trino": f"SELECT {func}(2)", "mysql": f"SELECT {func}(2)", "singlestore": f"SELECT {func}(2)", } if func == "LOCALTIMESTAMP": dialects["oracle"] = f"SELECT {func}(2)" self.validate_all( f"SELECT {func}(2)", read=dialects, write=dialects, ) exp_type = exp.Localtime if func == "LOCALTIME" else exp.Localtimestamp for func_variant in (func, f"{func}(2)"): with self.subTest(f"Testing {func_variant} function node"): self.validate_identity(f"SELECT {func_variant}").selects[0].assert_is(exp_type) for dialect in ( "tsql", "oracle", "sqlite", "hive", "spark2", "spark", "databricks", "bigquery", ): for func in ("localtime", "localtimestamp"): # oracle supports localtimestamp but not localtime if func == "localtimestamp" and dialect == "oracle": continue with self.subTest(f"Testing {func} identifier in {dialect}"): sql = f"SELECT {func}" select = parse_one(sql, dialect=dialect) select.selects[0].assert_is(exp.Column) self.assertEqual(select.sql(dialect), sql) def test_current_catalog(self): sql = "SELECT CURRENT_CATALOG" unsupported_dialects = [ "bigquery", "mysql", "oracle", "clickhouse", "snowflake", "spark", "databricks", "presto", ] for dialect in unsupported_dialects: with self.subTest(f"Testing CURRENT_CATALOG as Column in {dialect}"): select = parse_one(sql, dialect=dialect) select.selects[0].assert_is(exp.Column) self.assertEqual(select.sql(dialect), sql) supported_dialects = [ "postgres", "duckdb", "trino", "databricks", ] for dialect in supported_dialects: with self.subTest(f"Testing CURRENT_CATALOG expression in {dialect}"): if dialect == "databricks": sql = "SELECT CURRENT_CATALOG()" select = parse_one(sql, dialect=dialect) select.selects[0].assert_is(exp.CurrentCatalog) self.assertEqual(select.sql(dialect), sql) def test_session_user(self): no_paren_sql = "SELECT SESSION_USER" func_sql = "SELECT SESSION_USER()" # These dialects support only SESSION_USER() for dialect in ("bigquery", "mysql"): with self.subTest(f"Testing that SESSION_USER is parsed as a Column in {dialect}"): select = parse_one(no_paren_sql, dialect=dialect) select.selects[0].assert_is(exp.Column) self.assertEqual(select.sql(dialect), no_paren_sql) select = parse_one(func_sql, dialect=dialect) select.selects[0].assert_is(exp.SessionUser) self.assertEqual(select.sql(dialect), func_sql) # These dialects support either only SESSION_USER or both no_paren_dialects = [ "postgres", "duckdb", "databricks", "tsql", "spark", ] for dialect in no_paren_dialects: with self.subTest( f"Testing that SESSION_USER is parsed as a SessionUser expression in {dialect}" ): select = parse_one(no_paren_sql, dialect=dialect) select.selects[0].assert_is(exp.SessionUser) self.assertEqual(select.sql(dialect), no_paren_sql) # These dialects support both SESSION_USER and SESSION_USER() if dialect in ("databricks", "spark", "duckdb"): self.assertEqual( parse_one(func_sql, dialect=dialect).sql(dialect), no_paren_sql ) def test_operator(self): expr = self.validate_identity("1 OPERATOR(+) 2 OPERATOR(*) 3") expr.left.assert_is(exp.Operator) expr.left.left.assert_is(exp.Literal) expr.left.right.assert_is(exp.Literal) expr.right.assert_is(exp.Literal) self.assertEqual(expr.sql(dialect="postgres"), "1 OPERATOR(+) 2 OPERATOR(*) 3") self.validate_identity("SELECT operator FROM t") self.validate_identity("SELECT 1 OPERATOR(+) 2") self.validate_identity("SELECT 1 OPERATOR(+) /* foo */ 2") self.validate_identity("SELECT 1 OPERATOR(pg_catalog.+) 2") def test_json_keys(self): self.validate_all( "JSON_KEYS(foo)", read={ "": "JSON_KEYS(foo)", "spark": "JSON_OBJECT_KEYS(foo)", "databricks": "JSON_OBJECT_KEYS(foo)", "mysql": "JSON_KEYS(foo)", "starrocks": "JSON_KEYS(foo)", "duckdb": "JSON_KEYS(foo)", "snowflake": "OBJECT_KEYS(foo)", "doris": "JSON_KEYS(foo)", "singlestore": "JSON_KEYS(foo)", }, write={ "spark": "JSON_OBJECT_KEYS(foo)", "databricks": "JSON_OBJECT_KEYS(foo)", "mysql": "JSON_KEYS(foo)", "starrocks": "JSON_KEYS(foo)", "duckdb": "JSON_KEYS(foo)", "snowflake": "OBJECT_KEYS(foo)", "doris": "JSON_KEYS(foo)", "singlestore": "JSON_KEYS(foo)", }, ) self.validate_all( "JSON_KEYS(foo, '$.a')", read={ "": "JSON_KEYS(foo, '$.a')", "mysql": "JSON_KEYS(foo, '$.a')", "starrocks": "JSON_KEYS(foo, '$.a')", "duckdb": "JSON_KEYS(foo, '$.a')", "doris": "JSON_KEYS(foo, '$.a')", }, write={ "mysql": "JSON_KEYS(foo, '$.a')", "starrocks": "JSON_KEYS(foo, '$.a')", "duckdb": "JSON_KEYS(foo, '$.a')", "doris": "JSON_KEYS(foo, '$.a')", }, ) def test_interval_with_units_dcolon(self): self.validate_identity( "SELECT interval '00:00:01'::interval AS foo", "SELECT CAST(INTERVAL '00:00:01' AS INTERVAL) AS foo", ) self.validate_identity( "SELECT ROW_NUMBER() OVER(PARTITION BY event_time + interval '00:00:01'::interval) AS foo FROM t", "SELECT ROW_NUMBER() OVER (PARTITION BY event_time + CAST(INTERVAL '00:00:01' AS INTERVAL)) AS foo FROM t", ) @unittest.skipIf(_PARSER_IS_COMPILED, "mypyc compiled parsers cannot be subclassed") def test_patch_dialect_parser(self): class CustomSnowflakeParser(SnowflakeParser): FUNCTIONS = { **SnowflakeParser.FUNCTIONS, "MY_CUSTOM_FUNC": exp.Length.from_arg_list, } original = Snowflake.parser_class try: Snowflake.parser_class = CustomSnowflakeParser result = parse_one("SELECT 1", dialect="snowflake") self.assertIsInstance(result, exp.Select) result = parse_one("SELECT MY_CUSTOM_FUNC(a)", dialect="snowflake") self.assertIsInstance(result.find(exp.Length), exp.Length) finally: Snowflake.parser_class = original @unittest.skipIf(_PARSER_IS_COMPILED, "mypyc compiled parsers cannot be subclassed") def test_custom_dialect(self): class MyDialect(Dialect): class Parser(SnowflakeParser): FUNCTIONS = { **SnowflakeParser.FUNCTIONS, "DOUBLE_IT": lambda args: exp.Mul( this=exp.Literal.number(2), expression=args[0] if args else exp.Null(), ), } result = parse_one("SELECT DOUBLE_IT(5)", dialect=MyDialect) self.assertIsInstance(result.expressions[0], exp.Mul) self.assertEqual(result.sql(), "SELECT 2 * 5") ================================================ FILE: tests/dialects/test_doris.py ================================================ from tests.dialects.test_dialect import Validator class TestDoris(Validator): dialect = "doris" def test_doris(self): self.validate_all( "SELECT TO_DATE('2020-02-02 00:00:00')", write={ "doris": "SELECT TO_DATE('2020-02-02 00:00:00')", "oracle": "SELECT CAST('2020-02-02 00:00:00' AS DATE)", }, ) self.validate_all( "SELECT MAX_BY(a, b), MIN_BY(c, d)", read={ "clickhouse": "SELECT argMax(a, b), argMin(c, d)", }, ) self.validate_all( "SELECT ARRAY_SUM(x -> x * x, ARRAY(2, 3))", read={ "clickhouse": "SELECT arraySum(x -> x*x, [2, 3])", }, write={ "clickhouse": "SELECT arraySum(x -> x * x, [2, 3])", "doris": "SELECT ARRAY_SUM(x -> x * x, ARRAY(2, 3))", }, ) self.validate_all( "MONTHS_ADD(d, n)", read={ "oracle": "ADD_MONTHS(d, n)", }, write={ "doris": "MONTHS_ADD(d, n)", "oracle": "ADD_MONTHS(d, n)", }, ) self.validate_all( """SELECT JSON_EXTRACT(CAST('{"key": 1}' AS JSONB), '$.key')""", read={ "postgres": """SELECT '{"key": 1}'::jsonb ->> 'key'""", }, write={ "doris": """SELECT JSON_EXTRACT(CAST('{"key": 1}' AS JSONB), '$.key')""", "postgres": """SELECT JSON_EXTRACT_PATH(CAST('{"key": 1}' AS JSONB), 'key')""", }, ) self.validate_all( "SELECT GROUP_CONCAT('aa', ',')", read={ "doris": "SELECT GROUP_CONCAT('aa', ',')", "mysql": "SELECT GROUP_CONCAT('aa' SEPARATOR ',')", "postgres": "SELECT STRING_AGG('aa', ',')", }, ) self.validate_all( "SELECT LAG(1, 1, NULL) OVER (ORDER BY 1)", read={ "doris": "SELECT LAG(1, 1, NULL) OVER (ORDER BY 1)", "postgres": "SELECT LAG(1) OVER (ORDER BY 1)", }, ) self.validate_all( "SELECT LAG(1, 2, NULL) OVER (ORDER BY 1)", read={ "doris": "SELECT LAG(1, 2, NULL) OVER (ORDER BY 1)", "postgres": "SELECT LAG(1, 2) OVER (ORDER BY 1)", }, ) self.validate_all( "SELECT LEAD(1, 1, NULL) OVER (ORDER BY 1)", read={ "doris": "SELECT LEAD(1, 1, NULL) OVER (ORDER BY 1)", "postgres": "SELECT LEAD(1) OVER (ORDER BY 1)", }, ) self.validate_all( "SELECT LEAD(1, 2, NULL) OVER (ORDER BY 1)", read={ "doris": "SELECT LEAD(1, 2, NULL) OVER (ORDER BY 1)", "postgres": "SELECT LEAD(1, 2) OVER (ORDER BY 1)", }, ) self.validate_identity("""JSON_TYPE('{"foo": "1" }', '$.foo')""") self.validate_identity("L2_DISTANCE(x, y)") def test_identity(self): self.validate_identity("CREATE TABLE t (c INT) PROPERTIES ('x'='y')") self.validate_identity("CREATE TABLE t (c INT) COMMENT 'c'") self.validate_identity("COALECSE(a, b, c, d)") self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo") self.validate_identity("SELECT APPROX_COUNT_DISTINCT(a) FROM x") self.validate_identity( "CREATE TABLE IF NOT EXISTS example_tbl_unique (user_id BIGINT NOT NULL, user_name VARCHAR(50) NOT NULL, city VARCHAR(20), age SMALLINT, sex TINYINT) UNIQUE KEY (user_id, user_name) DISTRIBUTED BY HASH (user_id) BUCKETS 10 PROPERTIES ('enable_unique_key_merge_on_write'='true')" ) self.validate_identity("INSERT OVERWRITE TABLE test PARTITION(p1, p2) VALUES (1, 2)") def test_time(self): self.validate_identity("TIMESTAMP('2022-01-01')") self.validate_identity("DATE_TRUNC(event_date, 'DAY')") self.validate_identity("DATE_TRUNC('2010-12-02 19:28:30', 'HOUR')") self.validate_identity("CURRENT_DATE()") def test_regex(self): self.validate_all( "SELECT REGEXP_LIKE(abc, '%foo%')", write={ "doris": "SELECT REGEXP(abc, '%foo%')", }, ) def test_analyze(self): self.validate_identity("ANALYZE TABLE tbl") self.validate_identity("ANALYZE DATABASE db") self.validate_identity("ANALYZE TABLE TBL(c1, c2)") def test_key(self): self.validate_identity("CREATE TABLE test_table (c1 INT, c2 INT) UNIQUE KEY (c1)") self.validate_identity("CREATE TABLE test_table (c1 INT, c2 INT) DUPLICATE KEY (c1)") self.validate_identity("CREATE MATERIALIZED VIEW test_table (c1 INT, c2 INT) KEY (c1)") def test_distributed(self): self.validate_identity( "CREATE TABLE test_table (c1 INT, c2 INT) UNIQUE KEY (c1) DISTRIBUTED BY HASH (c1)" ) self.validate_identity("CREATE TABLE test_table (c1 INT, c2 INT) DISTRIBUTED BY RANDOM") self.validate_identity( "CREATE TABLE test_table (c1 INT, c2 INT) DISTRIBUTED BY RANDOM BUCKETS 1" ) def test_partition(self): self.validate_identity( "CREATE TABLE test_table (c1 INT, c2 DATE) PARTITION BY RANGE (`c2`) (PARTITION `p201701` VALUES LESS THAN ('2017-02-01'), PARTITION `p201702` VALUES LESS THAN ('2017-03-01'))" ) self.validate_identity( "CREATE TABLE test_table (c1 INT, c2 DATE) PARTITION BY RANGE (`c2`) (PARTITION `p201701` VALUES [('2017-01-01'), ('2017-02-01')), PARTITION `other` VALUES LESS THAN (MAXVALUE))" ) self.validate_identity( "CREATE TABLE test_table (c1 INT, c2 DATE) PARTITION BY RANGE (`c2`) (FROM ('2000-11-14') TO ('2021-11-14') INTERVAL 2 YEAR)" ) self.validate_identity("CREATE TABLE test_table (c1 INT, c2 DATE) PARTITION BY (c2)") self.validate_identity("CREATE TABLE test_table (c1 INT, c2 DATE) PARTITION BY (c1, c2)") self.validate_identity( "CREATE TABLE test_table (c1 INT, c2 DATE) PARTITION BY (DATE_TRUNC(c2, 'MONTH'))" ) self.validate_identity( "CREATE TABLE test_table (c1 INT) PARTITION BY LIST (`c1`) (PARTITION p1 VALUES IN (1, 2), PARTITION p2 VALUES IN (3))" ) def test_table_alias_conversion(self): """Test conversion from postgres to Doris for DELETE/UPDATE statements with table aliases.""" # Test cases for DELETE statements with table aliases self.validate_all( "DELETE FROM sales s WHERE s.id = 1", read={ "postgres": "DELETE FROM sales AS s WHERE s.id = 1", }, write={ "doris": "DELETE FROM sales s WHERE s.id = 1", "postgres": "DELETE FROM sales AS s WHERE s.id = 1", }, ) # DELETE with multiple table references self.validate_all( "DELETE FROM orders o WHERE o.customer_id IN (SELECT c.id FROM customers AS c WHERE c.status_code = 'inactive')", read={ "postgres": "DELETE FROM orders AS o WHERE o.customer_id IN (SELECT c.id FROM customers AS c WHERE c.status_code = 'inactive')", }, write={ "doris": "DELETE FROM orders o WHERE o.customer_id IN (SELECT c.id FROM customers AS c WHERE c.status_code = 'inactive')", "postgres": "DELETE FROM orders AS o WHERE o.customer_id IN (SELECT c.id FROM customers AS c WHERE c.status_code = 'inactive')", }, ) # DELETE with EXISTS clause self.validate_all( "DELETE FROM temp_data t WHERE NOT EXISTS(SELECT 1 FROM main_data AS m WHERE m.id = t.id)", read={ "postgres": "DELETE FROM temp_data AS t WHERE NOT EXISTS(SELECT 1 FROM main_data AS m WHERE m.id = t.id)", }, write={ "doris": "DELETE FROM temp_data t WHERE NOT EXISTS(SELECT 1 FROM main_data AS m WHERE m.id = t.id)", "postgres": "DELETE FROM temp_data AS t WHERE NOT EXISTS(SELECT 1 FROM main_data AS m WHERE m.id = t.id)", }, ) # UPDATE statements with table aliases self.validate_all( "UPDATE employees e SET e.salary = e.salary * 1.1 WHERE e.department = 'IT'", read={ "postgres": "UPDATE employees AS e SET e.salary = e.salary * 1.1 WHERE e.department = 'IT'", }, write={ "doris": "UPDATE employees e SET e.salary = e.salary * 1.1 WHERE e.department = 'IT'", "postgres": "UPDATE employees AS e SET e.salary = e.salary * 1.1 WHERE e.department = 'IT'", }, ) # UPDATE with multiple columns self.validate_all( "UPDATE accounts a SET a.balance = a.balance + 100, a.status_code = 'active' WHERE a.account_type = 'savings'", read={ "postgres": "UPDATE accounts AS a SET a.balance = a.balance + 100, a.status_code = 'active' WHERE a.account_type = 'savings'", }, write={ "doris": "UPDATE accounts a SET a.balance = a.balance + 100, a.status_code = 'active' WHERE a.account_type = 'savings'", "postgres": "UPDATE accounts AS a SET a.balance = a.balance + 100, a.status_code = 'active' WHERE a.account_type = 'savings'", }, ) # UPDATE with multiple table references in subquery self.validate_all( "UPDATE prices p SET p.amount = p.amount * 0.9 WHERE p.product_id IN (SELECT pr.id FROM products AS pr JOIN categories AS c ON pr.category_id = c.id WHERE c.foo = 'Electronics')", read={ "postgres": "UPDATE prices AS p SET p.amount = p.amount * 0.9 WHERE p.product_id IN (SELECT pr.id FROM products AS pr JOIN categories AS c ON pr.category_id = c.id WHERE c.foo = 'Electronics')", }, write={ "doris": "UPDATE prices p SET p.amount = p.amount * 0.9 WHERE p.product_id IN (SELECT pr.id FROM products AS pr JOIN categories AS c ON pr.category_id = c.id WHERE c.foo = 'Electronics')", "postgres": "UPDATE prices AS p SET p.amount = p.amount * 0.9 WHERE p.product_id IN (SELECT pr.id FROM products AS pr JOIN categories AS c ON pr.category_id = c.id WHERE c.foo = 'Electronics')", }, ) def test_rename_table(self): self.validate_all( "ALTER TABLE db.t1 RENAME TO db.t2", write={ "snowflake": "ALTER TABLE db.t1 RENAME TO db.t2", "duckdb": "ALTER TABLE db.t1 RENAME TO t2", "doris": "ALTER TABLE db.t1 RENAME t2", }, ) def test_materialized_view_properties(self): # BUILD modes self.validate_identity("CREATE MATERIALIZED VIEW mv BUILD IMMEDIATE AS SELECT 1") self.validate_identity("CREATE MATERIALIZED VIEW mv BUILD DEFERRED AS SELECT 1") # REFRESH methods with triggers self.validate_identity("CREATE MATERIALIZED VIEW mv REFRESH COMPLETE ON MANUAL AS SELECT 1") self.validate_identity("CREATE MATERIALIZED VIEW mv REFRESH AUTO ON COMMIT AS SELECT 1") self.validate_identity( "CREATE MATERIALIZED VIEW mv REFRESH AUTO ON SCHEDULE EVERY 5 MINUTE STARTS '2025-01-01 00:00:00' AS SELECT 1" ) # Combined BUILD and REFRESH self.validate_identity( "CREATE MATERIALIZED VIEW mv BUILD DEFERRED REFRESH AUTO ON SCHEDULE EVERY 10 MINUTE AS SELECT 1" ) ================================================ FILE: tests/dialects/test_dremio.py ================================================ from tests.dialects.test_dialect import Validator from sqlglot import parse_one, exp, UnsupportedError, ErrorLevel, transpile, ParseError from sqlglot.optimizer.annotate_types import annotate_types class TestDremio(Validator): dialect = "dremio" maxDiff = None def test_type_mappings(self): self.validate_identity("CAST(x AS SMALLINT)", "CAST(x AS INT)") self.validate_identity("CAST(x AS TINYINT)", "CAST(x AS INT)") self.validate_identity("CAST(x AS BINARY)", "CAST(x AS VARBINARY)") self.validate_identity("CAST(x AS TEXT)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS NCHAR)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS CHAR)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS TIMESTAMPNTZ)", "CAST(x AS TIMESTAMP)") self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS TIMESTAMP)") self.validate_identity("CAST(x AS ARRAY)", "CAST(x AS LIST)") self.validate_identity("CAST(x AS BIT)", "CAST(x AS BOOLEAN)") # unsupported types with self.assertRaises(UnsupportedError): transpile( "CAST(x AS TIMESTAMPTZ)", read="oracle", write="dremio", unsupported_level=ErrorLevel.IMMEDIATE, ) with self.assertRaises(UnsupportedError): transpile( "CAST(x AS TIMESTAMPLTZ)", read="oracle", write="dremio", unsupported_level=ErrorLevel.IMMEDIATE, ) def test_concat_coalesce(self): self.validate_all( "SELECT CONCAT('a', NULL)", write={ "dremio": "SELECT CONCAT('a', NULL)", "": "SELECT CONCAT('a', COALESCE(NULL, ''))", }, ) def test_typed_division(self): def _div_result_type(sql: str, dialect: str): tree = parse_one(sql, read=dialect) annotate_types(tree, dialect=dialect) return tree.find(exp.Div).type.this assert _div_result_type("SELECT 5 / 2", "dremio") == exp.DataType.Type.BIGINT assert _div_result_type("SELECT 5 / 2", "oracle") == exp.DataType.Type.DOUBLE def test_user_defined_types_unsupported(self): with self.assertRaises(ParseError): self.parse_one("CAST(x AS MY_CUSTOM_TYPE)") def test_null_ordering(self): # NULLS LAST is the default, so generator can drop the clause self.validate_identity( "SELECT * FROM t ORDER BY a NULLS LAST", "SELECT * FROM t ORDER BY a" ) self.validate_identity( "SELECT * FROM t ORDER BY a DESC NULLS LAST", "SELECT * FROM t ORDER BY a DESC" ) # If the clause is not the default, it must be kept self.validate_identity( "SELECT * FROM t ORDER BY a NULLS FIRST", ) self.validate_identity( "SELECT * FROM t ORDER BY a DESC NULLS FIRST", ) def test_convert_timezone(self): self.validate_all( "SELECT CONVERT_TIMEZONE('America/Chicago', DateColumn)", write={ "dremio": "SELECT CONVERT_TIMEZONE('America/Chicago', DateColumn)", "": "SELECT DateColumn AT TIME ZONE 'America/Chicago'", }, ) def test_interval_plural(self): self.validate_identity("INTERVAL '7' DAYS", "INTERVAL '7' DAY") def test_limit_only_literals(self): self.validate_identity("SELECT * FROM t LIMIT 1 + 1", "SELECT * FROM t LIMIT 2") def test_multi_arg_distinct_unsupported(self): self.validate_identity( "SELECT COUNT(DISTINCT a, b) FROM t", "SELECT COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END) FROM t", ) def test_time_mapping(self): ts = "CAST('2025-06-24 12:34:56' AS TIMESTAMP)" self.validate_all( f"SELECT TO_CHAR({ts}, 'yyyy-mm-dd hh24:mi:ss')", read={ "dremio": f"SELECT TO_CHAR({ts}, 'yyyy-mm-dd hh24:mi:ss')", "postgres": f"SELECT TO_CHAR({ts}, 'YYYY-MM-DD HH24:MI:SS')", "oracle": f"SELECT TO_CHAR({ts}, 'YYYY-MM-DD HH24:MI:SS')", "duckdb": f"SELECT STRFTIME({ts}, '%Y-%m-%d %H:%M:%S')", }, write={ "dremio": f"SELECT TO_CHAR({ts}, 'yyyy-mm-dd hh24:mi:ss')", "postgres": f"SELECT TO_CHAR({ts}, 'YYYY-MM-DD HH24:MI:SS')", "oracle": f"SELECT TO_CHAR({ts}, 'YYYY-MM-DD HH24:MI:SS')", "duckdb": f"SELECT STRFTIME({ts}, '%Y-%m-%d %H:%M:%S')", }, ) self.validate_all( f"SELECT TO_CHAR({ts}, 'yy-ddd hh24:mi:ss.fff tzd')", read={ "dremio": f"SELECT TO_CHAR({ts}, 'yy-ddd hh24:mi:ss.fff tzd')", "postgres": f"SELECT TO_CHAR({ts}, 'YY-DDD HH24:MI:SS.US TZ')", "oracle": f"SELECT TO_CHAR({ts}, 'YY-DDD HH24:MI:SS.FF6 %Z')", "duckdb": f"SELECT STRFTIME({ts}, '%y-%j %H:%M:%S.%f %Z')", }, write={ "dremio": f"SELECT TO_CHAR({ts}, 'yy-ddd hh24:mi:ss.fff tzd')", "postgres": f"SELECT TO_CHAR({ts}, 'YY-DDD HH24:MI:SS.US TZ')", "oracle": f"SELECT TO_CHAR({ts}, 'YY-DDD HH24:MI:SS.FF6 %Z')", "duckdb": f"SELECT STRFTIME({ts}, '%y-%j %H:%M:%S.%f %Z')", }, ) def test_to_char_special(self): # Numeric formats should have is_numeric=True to_char = self.validate_identity("TO_CHAR(5555, '#')").assert_is(exp.ToChar) assert to_char.args["is_numeric"] is True to_char = self.validate_identity("TO_CHAR(3.14, '#.#')").assert_is(exp.ToChar) assert to_char.args["is_numeric"] is True to_char = self.validate_identity("TO_CHAR(columnname, '#.##')").assert_is(exp.ToChar) assert to_char.args["is_numeric"] is True # Non-numeric formats or columns should have is_numeric=None or False to_char = self.validate_identity("TO_CHAR(5555)").assert_is(exp.ToChar) assert not to_char.args.get("is_numeric") to_char = self.validate_identity("TO_CHAR(3.14, columnname)").assert_is(exp.ToChar) assert not to_char.args.get("is_numeric") to_char = self.validate_identity("TO_CHAR(123, 'abcd')").assert_is(exp.ToChar) assert not to_char.args.get("is_numeric") to_char = self.validate_identity("TO_CHAR(3.14, UPPER('abcd'))").assert_is(exp.ToChar) assert not to_char.args.get("is_numeric") def test_date_add(self): self.validate_identity("SELECT DATE_ADD(col, 1)") self.validate_identity("SELECT DATE_ADD(col, CAST(1 AS INTERVAL HOUR))") self.validate_identity( "SELECT DATE_ADD(TIMESTAMP '2022-01-01 12:00:00', CAST(-1 AS INTERVAL HOUR))", "SELECT DATE_ADD(CAST('2022-01-01 12:00:00' AS TIMESTAMP), CAST(-1 AS INTERVAL HOUR))", ) def test_date_sub(self): self.validate_identity("SELECT DATE_SUB(col, 1)") self.validate_identity("SELECT DATE_SUB(col, CAST(1 AS INTERVAL HOUR))") self.validate_identity( "SELECT DATE_SUB(TIMESTAMP '2022-01-01 12:00:00', CAST(-1 AS INTERVAL HOUR))", "SELECT DATE_SUB(CAST('2022-01-01 12:00:00' AS TIMESTAMP), CAST(-1 AS INTERVAL HOUR))", ) def test_datetime_parsing(self): self.validate_identity( "SELECT DATE_FORMAT(CAST('2025-08-18 15:30:00' AS TIMESTAMP), 'yyyy-mm-dd')", "SELECT TO_CHAR(CAST('2025-08-18 15:30:00' AS TIMESTAMP), 'yyyy-mm-dd')", ) def test_array_generate_range(self): self.validate_all( "ARRAY_GENERATE_RANGE(1, 4)", read={"dremio": "ARRAY_GENERATE_RANGE(1, 4)"}, write={"duckdb": "GENERATE_SERIES(1, 4)"}, ) def test_current_date_utc(self): self.validate_identity("SELECT CURRENT_DATE_UTC") self.validate_identity( "SELECT CURRENT_DATE_UTC()", "SELECT CURRENT_DATE_UTC", ) def test_repeatstr(self): self.validate_identity("SELECT REPEAT(x, 5)") self.validate_identity("SELECT REPEATSTR(x, 5)", "SELECT REPEAT(x, 5)") def test_regexp_like(self): self.validate_all( "REGEXP_MATCHES(x, y)", write={ "dremio": "REGEXP_LIKE(x, y)", "duckdb": "REGEXP_MATCHES(x, y)", "presto": "REGEXP_LIKE(x, y)", "hive": "x RLIKE y", "spark": "x RLIKE y", }, ) self.validate_identity("REGEXP_MATCHES(x, y)", "REGEXP_LIKE(x, y)") def test_date_part(self): self.validate_identity( "SELECT DATE_PART('YEAR', date '2021-04-01')", "SELECT EXTRACT('YEAR' FROM CAST('2021-04-01' AS DATE))", ) def test_datetype(self): self.validate_identity("DATETYPE(2024,2,2)", "DATE('2024-02-02')") self.validate_identity("DATETYPE(x,y,z)", "CAST(CONCAT(x, '-', y, '-', z) AS DATE)") def test_try_cast(self): self.validate_all( "CAST(a AS FLOAT)", read={ "dremio": "CAST(a AS FLOAT)", "": "TRY_CAST(a AS FLOAT)", "hive": "CAST(a AS FLOAT)", }, ) ================================================ FILE: tests/dialects/test_drill.py ================================================ from tests.dialects.test_dialect import Validator class TestDrill(Validator): dialect = "drill" def test_drill(self): self.validate_identity( "SELECT * FROM table(dfs.`test_data.xlsx`(type => 'excel', sheetName => 'secondSheet'))" ) self.validate_identity( "SELECT * FROM (SELECT * FROM t) PIVOT(avg(c1) AS ac1 FOR c2 IN ('V' AS v))", ) self.validate_all( "SELECT '2021-01-01' + INTERVAL 1 MONTH", write={ "drill": "SELECT '2021-01-01' + INTERVAL '1' MONTH", "mysql": "SELECT '2021-01-01' + INTERVAL '1' MONTH", }, ) def test_analyze(self): self.validate_identity("ANALYZE TABLE tbl COMPUTE STATISTICS") self.validate_identity("ANALYZE TABLE tbl COMPUTE STATISTICS SAMPLE 5 PERCENT") ================================================ FILE: tests/dialects/test_druid.py ================================================ from sqlglot.dialects.dialect import Dialects from tests.dialects.test_dialect import Validator class TestDruid(Validator): dialect = "druid" def test_druid(self): self.validate_identity("SELECT MOD(1000, 60)") self.validate_identity("SELECT CEIL(__time TO WEEK) FROM t") self.validate_identity("SELECT CEIL(col) FROM t") self.validate_identity("SELECT CEIL(price, 2) AS rounded_price FROM t") self.validate_identity("SELECT FLOOR(__time TO WEEK) FROM t") self.validate_identity("SELECT FLOOR(col) FROM t") self.validate_identity("SELECT FLOOR(price, 2) AS rounded_price FROM t") self.validate_identity("SELECT CURRENT_TIMESTAMP") self.validate_identity("SELECT ARRAY[1, 2, 3]") # validate across all dialects write = {dialect.value: "FLOOR(__time TO WEEK)" for dialect in Dialects} self.validate_all( "FLOOR(__time TO WEEK)", write=write, ) ================================================ FILE: tests/dialects/test_duckdb.py ================================================ from sqlglot import ParseError, UnsupportedError, exp, parse_one from sqlglot.generator import logger as generator_logger from sqlglot.helper import logger as helper_logger from sqlglot.optimizer.annotate_types import annotate_types from tests.dialects.test_dialect import Validator class TestDuckDB(Validator): dialect = "duckdb" def test_duckdb(self): # Numeric TRUNC - DuckDB only supports TRUNC(x), no decimals parameter self.validate_identity("TRUNC(3.14)").assert_is(exp.Trunc) self.validate_all( "TRUNC(3.14159)", read={"postgres": "TRUNC(3.14159, 2)"}, ) self.validate_identity("SELECT ([1,2,3])[:-:-1]", "SELECT ([1, 2, 3])[:-1:-1]") self.validate_identity( "SELECT INTERVAL '1 hour'::VARCHAR", "SELECT CAST(INTERVAL '1' HOUR AS TEXT)" ) self.validate_identity( "PIVOT duckdb_functions() ON schema_name USING AVG(LENGTH(function_name))::INTEGER GROUP BY schema_name", "PIVOT DUCKDB_FUNCTIONS() ON schema_name USING CAST(AVG(LENGTH(function_name)) AS INT) GROUP BY schema_name", ) self.validate_identity("SELECT str[0:1]") self.validate_identity("SELECT COSH(1.5)") self.validate_identity("SELECT MODE(category)") self.validate_identity("SELECT e'\\n'") self.validate_identity("SELECT e'\\t'") self.validate_identity( "SELECT e'update table_name set a = \\'foo\\' where 1 = 0' AS x FROM tab", "SELECT e'update table_name set a = ''foo'' where 1 = 0' AS x FROM tab", ) with self.assertRaises(ParseError): parse_one("1 //", read="duckdb") expr = annotate_types( self.validate_identity( "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT t.col['b'] FROM _data, UNNEST(_data.col) AS t(col) WHERE t.col['a'] = 1", "WITH _data AS (SELECT [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}] AS col) SELECT t.col['b'] FROM _data JOIN UNNEST(_data.col) AS t(col) ON TRUE WHERE t.col['a'] = 1", ) ) self.assertEqual( expr.sql(dialect="bigquery"), "WITH _data AS (SELECT [STRUCT(1 AS a, 2 AS b), STRUCT(2 AS a, 3 AS b)] AS col) SELECT col.b FROM _data, UNNEST(_data.col) AS col WHERE col.a = 1", ) struct_array_type = exp.maybe_parse( "STRUCT(k TEXT, v STRUCT(v_str TEXT, v_int INT, v_int_arr INT[]))[]", into=exp.DataType, dialect="duckdb", ) self.assertEqual( struct_array_type.sql("duckdb"), "STRUCT(k TEXT, v STRUCT(v_str TEXT, v_int INT, v_int_arr INT[]))[]", ) self.validate_all( "(c LIKE 'a' OR c LIKE 'b') AND other_cond", read={ "databricks": "c LIKE ANY ('a', 'b') AND other_cond", }, ) self.validate_all( "SELECT FIRST_VALUE(c IGNORE NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t", write={ "duckdb": "SELECT FIRST_VALUE(c IGNORE NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t", "sqlite": UnsupportedError, "mysql": UnsupportedError, "postgres": UnsupportedError, }, ) self.validate_all( "SELECT FIRST_VALUE(c RESPECT NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t", write={ "duckdb": "SELECT FIRST_VALUE(c RESPECT NULLS) OVER (PARTITION BY gb ORDER BY ob) FROM t", "sqlite": "SELECT FIRST_VALUE(c) OVER (PARTITION BY gb ORDER BY ob NULLS LAST) FROM t", "mysql": "SELECT FIRST_VALUE(c) RESPECT NULLS OVER (PARTITION BY gb ORDER BY CASE WHEN ob IS NULL THEN 1 ELSE 0 END, ob) FROM t", "postgres": UnsupportedError, }, ) self.validate_all( "CAST(x AS UUID)", write={ "bigquery": "CAST(x AS STRING)", "duckdb": "CAST(x AS UUID)", }, ) self.validate_all( "SELECT APPROX_TOP_K(category, 3) FROM t", write={ "snowflake": "SELECT APPROX_TOP_K(category, 3) FROM t", "duckdb": UnsupportedError, }, ) self.validate_all( """SELECT CASE WHEN JSON_VALID('{"x: 1}') THEN '{"x: 1}' ELSE NULL END""", read={ "duckdb": """SELECT CASE WHEN JSON_VALID('{"x: 1}') THEN '{"x: 1}' ELSE NULL END""", "snowflake": """SELECT TRY_PARSE_JSON('{"x: 1}')""", }, ) self.validate_all( "SELECT straight_join", write={ "duckdb": "SELECT straight_join", "mysql": "SELECT `straight_join`", }, ) self.validate_all( 'STRUCT_PACK("a b" := 1)', write={ "duckdb": "{'a b': 1}", "spark": "STRUCT(1 AS `a b`)", "snowflake": "OBJECT_CONSTRUCT('a b', 1)", }, ) self.validate_all( "ARRAY_TO_STRING(arr, delim)", read={ "bigquery": "ARRAY_TO_STRING(arr, delim)", "postgres": "ARRAY_TO_STRING(arr, delim)", "presto": "ARRAY_JOIN(arr, delim)", "spark": "ARRAY_JOIN(arr, delim)", }, write={ "bigquery": "ARRAY_TO_STRING(arr, delim)", "duckdb": "ARRAY_TO_STRING(arr, delim)", "postgres": "ARRAY_TO_STRING(arr, delim)", "presto": "ARRAY_JOIN(arr, delim)", "snowflake": "ARRAY_TO_STRING(arr, delim)", "spark": "ARRAY_JOIN(arr, delim)", "tsql": "STRING_AGG(arr, delim)", }, ) self.validate_all( "SELECT CASE WHEN delim IS NULL THEN NULL ELSE ARRAY_TO_STRING(LIST_TRANSFORM(arr, x -> COALESCE(CAST(x AS TEXT), '')), delim) END", read={ "snowflake": "SELECT ARRAY_TO_STRING(arr, delim)", }, ) self.validate_all( "SELECT SUM(X) OVER (ORDER BY x)", write={ "bigquery": "SELECT SUM(X) OVER (ORDER BY x)", "duckdb": "SELECT SUM(X) OVER (ORDER BY x)", "mysql": "SELECT SUM(X) OVER (ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x)", }, ) self.validate_all( "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", write={ "bigquery": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", "duckdb": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", "mysql": "SELECT SUM(X) OVER (ORDER BY x RANGE BETWEEN 1 PRECEDING AND CURRENT ROW)", }, ) self.validate_all( "SELECT * FROM x ORDER BY 1 NULLS LAST", write={ "duckdb": "SELECT * FROM x ORDER BY 1", "mysql": "SELECT * FROM x ORDER BY 1", }, ) self.validate_all( "CREATE TEMPORARY FUNCTION f1(a, b) AS (a + b)", read={ "bigquery": "CREATE TEMP FUNCTION f1(a INT64, b INT64) AS (a + b)", }, ) self.validate_identity("SELECT GET_BIT(CAST('0110010' AS BIT), 2)") self.validate_identity("SELECT 1 WHERE x > $1") self.validate_identity("SELECT 1 WHERE x > $name") self.validate_identity("""SELECT '{"x": 1}' -> c FROM t""") self.assertEqual( parse_one("select * from t limit (select 5)").sql(dialect="duckdb"), exp.select("*").from_("t").limit(exp.select("5").subquery()).sql(dialect="duckdb"), ) self.assertEqual( parse_one("select * from t offset (select 5)").sql(dialect="duckdb"), exp.select("*").from_("t").offset(exp.select("5").subquery()).sql(dialect="duckdb"), ) self.validate_all( "{'a': 1, 'b': '2'}", write={ "presto": "CAST(ROW(1, '2') AS ROW(a INTEGER, b VARCHAR))", }, ) self.validate_all( "struct_pack(a := 1, b := 2)", write={ "presto": "CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))", }, ) self.validate_all( "struct_pack(a := 1, b := x)", write={ "duckdb": "{'a': 1, 'b': x}", "presto": UnsupportedError, }, ) for join_type in ("SEMI", "ANTI"): exists = "EXISTS" if join_type == "SEMI" else "NOT EXISTS" self.validate_all( f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", write={ "bigquery": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "clickhouse": f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", "databricks": f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", "doris": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "drill": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "duckdb": f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", "hive": f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", "mysql": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "oracle": f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", "postgres": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "presto": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "redshift": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "snowflake": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "spark": f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", "sqlite": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "starrocks": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "teradata": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "trino": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", "tsql": f"SELECT * FROM t1 WHERE {exists}(SELECT 1 FROM t2 WHERE t1.x = t2.x)", }, ) self.validate_all( f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", read={ "duckdb": f"SELECT * FROM t1 {join_type} JOIN t2 ON t1.x = t2.x", "spark": f"SELECT * FROM t1 LEFT {join_type} JOIN t2 ON t1.x = t2.x", }, ) self.validate_identity("SELECT EXP(1)") self.validate_identity("""SELECT '{"duck": [1, 2, 3]}' -> '$.duck[#-1]'""") self.validate_all( "SELECT RANGE(1, 5)", write={ "duckdb": "SELECT RANGE(1, 5)", "spark": "SELECT SEQUENCE(1, 4)", "snowflake": "SELECT ARRAY_GENERATE_RANGE(1, 5)", }, ) self.validate_all( "SELECT RANGE(1, 5, 2)", write={ "duckdb": "SELECT RANGE(1, 5, 2)", "spark": "SELECT SEQUENCE(1, 3, 2)", }, ) self.validate_all( "SELECT RANGE(1, 1)", write={ "duckdb": "SELECT RANGE(1, 1)", "spark": "SELECT ARRAY()", }, ) self.validate_all( "SELECT RANGE(5, 1, -1)", write={ "duckdb": "SELECT RANGE(5, 1, -1)", "spark": "SELECT SEQUENCE(5, 2, -1)", "snowflake": "SELECT ARRAY_GENERATE_RANGE(5, 1, -1)", }, ) self.validate_all( "SELECT RANGE(5, 1, 0)", write={ "duckdb": "SELECT RANGE(5, 1, 0)", "spark": "SELECT ARRAY()", }, ) self.validate_all( "WITH t AS (SELECT 5 AS c) SELECT RANGE(1, c) FROM t", write={ "duckdb": "WITH t AS (SELECT 5 AS c) SELECT RANGE(1, c) FROM t", "spark": "WITH t AS (SELECT 5 AS c) SELECT IF((c - 1) < 1, ARRAY(), SEQUENCE(1, (c - 1))) FROM t", }, ) # Test edge case: RANGE(1, 2) should return [1], not [] self.validate_all( "WITH t AS (SELECT 2 AS c) SELECT RANGE(1, c) FROM t", write={ "duckdb": "WITH t AS (SELECT 2 AS c) SELECT RANGE(1, c) FROM t", "spark": "WITH t AS (SELECT 2 AS c) SELECT IF((c - 1) < 1, ARRAY(), SEQUENCE(1, (c - 1))) FROM t", }, ) self.validate_all( "SELECT RANGE(1, 2)", write={ "duckdb": "SELECT RANGE(1, 2)", "spark": "SELECT SEQUENCE(1, 1)", }, ) self.validate_all( """SELECT JSON_EXTRACT('{"duck": [1, 2, 3]}', '/duck/0')""", write={ "": """SELECT JSON_EXTRACT('{"duck": [1, 2, 3]}', '/duck/0')""", "duckdb": """SELECT '{"duck": [1, 2, 3]}' -> '/duck/0'""", }, ) self.validate_all( """SELECT JSON('{"fruit":"banana"}') -> 'fruit'""", write={ "duckdb": """SELECT JSON('{"fruit":"banana"}') -> '$.fruit'""", "snowflake": """SELECT GET_PATH(PARSE_JSON('{"fruit":"banana"}'), 'fruit')""", }, ) self.validate_all( """SELECT JSON('{"fruit": {"foo": "banana"}}') -> 'fruit' -> 'foo'""", write={ "duckdb": """SELECT JSON('{"fruit": {"foo": "banana"}}') -> '$.fruit' -> '$.foo'""", "snowflake": """SELECT GET_PATH(GET_PATH(PARSE_JSON('{"fruit": {"foo": "banana"}}'), 'fruit'), 'foo')""", }, ) self.validate_all( "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", read={ "bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table", "duckdb": "SELECT {'bla': column1, 'foo': column2, 'bar': column3} AS data FROM source_table", }, write={ "bigquery": "SELECT STRUCT(column1 AS bla, column2 AS foo, column3 AS bar) AS data FROM source_table", }, ) self.validate_all( "WITH cte(x) AS (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3) SELECT AVG(x) FILTER (WHERE x > 1) FROM cte", write={ "duckdb": "WITH cte(x) AS (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3) SELECT AVG(x) FILTER(WHERE x > 1) FROM cte", "snowflake": "WITH cte(x) AS (SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3) SELECT AVG(IFF(x > 1, x, NULL)) FROM cte", }, ) self.validate_all( "SELECT AVG(x) FILTER (WHERE TRUE) FROM t", write={ "duckdb": "SELECT AVG(x) FILTER(WHERE TRUE) FROM t", "snowflake": "SELECT AVG(IFF(TRUE, x, NULL)) FROM t", }, ) self.validate_all( "SELECT UNNEST(ARRAY[1, 2, 3]), UNNEST(ARRAY[4, 5]), UNNEST(ARRAY[6])", write={ "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_3, col_2, NULL) AS col_2, IF(pos = pos_4, col_3, NULL) AS col_3 FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([1, 2, 3]), ARRAY_LENGTH([4, 5]), ARRAY_LENGTH([6])) - 1)) AS pos CROSS JOIN UNNEST([1, 2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5]) AS col_2 WITH OFFSET AS pos_3 CROSS JOIN UNNEST([6]) AS col_3 WITH OFFSET AS pos_4 WHERE ((pos = pos_2 OR (pos > (ARRAY_LENGTH([1, 2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([1, 2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5]) - 1)))) AND (pos = pos_4 OR (pos > (ARRAY_LENGTH([6]) - 1) AND pos_4 = (ARRAY_LENGTH([6]) - 1)))", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u.pos = _u_4.pos_4, _u_4.col_3) AS col_3 FROM UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1, 2, 3]), CARDINALITY(ARRAY[4, 5]), CARDINALITY(ARRAY[6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5]) WITH ORDINALITY AS _u_3(col_2, pos_3) CROSS JOIN UNNEST(ARRAY[6]) WITH ORDINALITY AS _u_4(col_3, pos_4) WHERE ((_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1, 2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1, 2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5])))) AND (_u.pos = _u_4.pos_4 OR (_u.pos > CARDINALITY(ARRAY[6]) AND _u_4.pos_4 = CARDINALITY(ARRAY[6])))", }, ) self.validate_all( "SELECT UNNEST(ARRAY[1, 2, 3]), UNNEST(ARRAY[4, 5]), UNNEST(ARRAY[6]) FROM x", write={ "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_3, col_2, NULL) AS col_2, IF(pos = pos_4, col_3, NULL) AS col_3 FROM x CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([1, 2, 3]), ARRAY_LENGTH([4, 5]), ARRAY_LENGTH([6])) - 1)) AS pos CROSS JOIN UNNEST([1, 2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5]) AS col_2 WITH OFFSET AS pos_3 CROSS JOIN UNNEST([6]) AS col_3 WITH OFFSET AS pos_4 WHERE ((pos = pos_2 OR (pos > (ARRAY_LENGTH([1, 2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([1, 2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5]) - 1)))) AND (pos = pos_4 OR (pos > (ARRAY_LENGTH([6]) - 1) AND pos_4 = (ARRAY_LENGTH([6]) - 1)))", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u.pos = _u_4.pos_4, _u_4.col_3) AS col_3 FROM x CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1, 2, 3]), CARDINALITY(ARRAY[4, 5]), CARDINALITY(ARRAY[6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1, 2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5]) WITH ORDINALITY AS _u_3(col_2, pos_3) CROSS JOIN UNNEST(ARRAY[6]) WITH ORDINALITY AS _u_4(col_3, pos_4) WHERE ((_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1, 2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1, 2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5])))) AND (_u.pos = _u_4.pos_4 OR (_u.pos > CARDINALITY(ARRAY[6]) AND _u_4.pos_4 = CARDINALITY(ARRAY[6])))", }, ) self.validate_all( "SELECT UNNEST(x) + 1", write={ "bigquery": "SELECT IF(pos = pos_2, col, NULL) + 1 AS col FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))", }, ) self.validate_all( "SELECT UNNEST(x) + 1 AS y", write={ "bigquery": "SELECT IF(pos = pos_2, y, NULL) + 1 AS y FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS y WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))", }, ) self.validate_all( "SELECT DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2025-10-12' AS DATE))", read={ "snowflake": "SELECT DATEDIFF('day', '2020-01-01', '2025-10-12')", }, ) self.validate_all( "SELECT DATE_DIFF('SECOND', CAST('2020-01-01' AS DATE), CAST('2025-10-12 00:56:42.345' AS TIMESTAMP))", read={ "duckdb": "SELECT DATE_DIFF('SECOND', CAST('2020-01-01' AS DATE), CAST('2025-10-12 00:56:42.345' AS TIMESTAMP))", "snowflake": "SELECT DATEDIFF('second', '2020-01-01', '2025-10-12 00:56:42.345')", }, ) self.validate_all( "SELECT DATE_DIFF('SECOND', CAST('2020-01-01' AS DATE), CAST('2025-10-12 00:56:42.345+07:00' AS TIMESTAMPTZ))", read={ "duckdb": "SELECT DATE_DIFF('SECOND', CAST('2020-01-01' AS DATE), CAST('2025-10-12 00:56:42.345+07:00' AS TIMESTAMPTZ))", "snowflake": "SELECT DATEDIFF('second', '2020-01-01', '2025-10-12 00:56:42.345+07:00')", }, ) # https://github.com/duckdb/duckdb/releases/tag/v0.8.0 self.assertEqual( parse_one("a / b", read="duckdb").assert_is(exp.Div).sql(dialect="duckdb"), "a / b" ) self.assertEqual( parse_one("a // b", read="duckdb").assert_is(exp.IntDiv).sql(dialect="duckdb"), "a // b" ) self.validate_identity( "SELECT tbl.x*1e4+tbl.y FROM tbl", "SELECT tbl.x * 1e4 + tbl.y FROM tbl", ) self.validate_identity("DAYNAME(x)") self.validate_identity("MONTHNAME(x)") self.validate_identity( "SELECT LIST_TRANSFORM([5, NULL, 6], (x, y) -> COALESCE(x, y, 0) + 1)" ) self.validate_identity( "SELECT LIST_TRANSFORM([5, NULL, 6], LAMBDA x, y : COALESCE(x, y, 0) + 1)" ) self.validate_identity( "SELECT LIST_TRANSFORM(LIST_FILTER([0, 1, 2, 3, 4, 5], LAMBDA x : x % 2 = 0), LAMBDA y : y * y)" ) self.validate_identity( """ARG_MIN({'d': "DATE", 'ts': "TIMESTAMP", 'i': "INT", 'b': "BIGINT", 's': "VARCHAR"}, "DOUBLE")""" ) self.validate_identity( "ARG_MAX(keyword_name, keyword_category, 3 ORDER BY keyword_name DESC)" ) self.validate_identity("INSERT INTO t DEFAULT VALUES RETURNING (c1)") self.validate_identity("CREATE TABLE notes (watermark TEXT)") self.validate_identity("SELECT LIST_TRANSFORM([5, NULL, 6], LAMBDA x : COALESCE(x, 0) + 1)") self.validate_identity("SELECT LIST_TRANSFORM(nbr, LAMBDA x : x + 1) FROM article AS a") self.validate_identity("SELECT * FROM my_ducklake.demo AT (VERSION => 2)") self.validate_identity("SELECT TO_BINARY('test')") self.validate_identity("SELECT UUIDV7()") self.validate_identity("SELECT TRY(LOG(0))") self.validate_identity("x::timestamp", "CAST(x AS TIMESTAMP)") self.validate_identity("x::timestamp without time zone", "CAST(x AS TIMESTAMP)") self.validate_identity("x::timestamp with time zone", "CAST(x AS TIMESTAMPTZ)") self.validate_identity("CAST(x AS FOO)") self.validate_identity("SELECT UNNEST([1, 2])").selects[0].assert_is(exp.UDTF) self.validate_identity("'red' IN flags").args["field"].assert_is(exp.Column) self.validate_identity("'red' IN tbl.flags") self.validate_identity("CREATE TABLE tbl1 (u UNION(num INT, str TEXT))") self.validate_identity("INSERT INTO x BY NAME SELECT 1 AS y") self.validate_identity("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x") self.validate_identity("SELECT SUM(x) FILTER (x = 1)", "SELECT SUM(x) FILTER(WHERE x = 1)") self.validate_identity("SELECT * FROM GLOB(x)") self.validate_identity("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])") self.validate_identity("SELECT MAP {'x': 1}") self.validate_identity("SELECT (MAP {'x': 1})['x']") self.validate_identity("SELECT df1.*, df2.* FROM df1 POSITIONAL JOIN df2") self.validate_identity("MAKE_TIMESTAMP(1992, 9, 20, 13, 34, 27.123456)") self.validate_identity("MAKE_TIMESTAMP(1667810584123456)") self.validate_identity("SELECT EPOCH_MS(10) AS t") self.validate_identity("SELECT MAKE_TIMESTAMP(10) AS t") self.validate_identity("SELECT TO_TIMESTAMP(10) AS t") self.validate_identity("SELECT UNNEST(col, recursive := TRUE) FROM t") self.validate_identity("VAR_POP(a)") self.validate_identity("SELECT * FROM foo ASOF LEFT JOIN bar ON a = b") self.validate_identity("SELECT {'a': 1} AS x") self.validate_identity("SELECT {'a': {'b': {'c': 1}}, 'd': {'e': 2}} AS x") self.validate_identity("SELECT {'x': 1, 'y': 2, 'z': 3}") self.validate_identity("SELECT {'key1': 'string', 'key2': 1, 'key3': 12.345}") self.validate_identity("SELECT ROW(x, x + 1, y) FROM (SELECT 1 AS x, 'a' AS y)") self.validate_identity("SELECT (x, x + 1, y) FROM (SELECT 1 AS x, 'a' AS y)") self.validate_identity("SELECT a.x FROM (SELECT {'x': 1, 'y': 2, 'z': 3} AS a)") self.validate_identity("FROM x SELECT x UNION SELECT 1", "SELECT x FROM x UNION SELECT 1") self.validate_identity("FROM (FROM tbl)", "SELECT * FROM (SELECT * FROM tbl)") self.validate_identity("FROM tbl", "SELECT * FROM tbl") self.validate_identity( "SELECT * FROM t1 WHERE NOT EXISTS(FROM t2 WHERE t2.id = t1.id)", "SELECT * FROM t1 WHERE NOT EXISTS(SELECT * FROM t2 WHERE t2.id = t1.id)", ) self.validate_identity("x -> '$.family'") self.validate_identity("CREATE TABLE color (name ENUM('RED', 'GREEN', 'BLUE'))") self.validate_identity("SELECT * FROM foo WHERE bar > $baz AND bla = $bob") self.validate_identity("SUMMARIZE tbl").assert_is(exp.Summarize) self.validate_identity("SUMMARIZE SELECT * FROM tbl").assert_is(exp.Summarize) self.validate_identity("CREATE TABLE tbl_summary AS SELECT * FROM (SUMMARIZE tbl)") self.validate_identity("SELECT STAR(tbl, exclude := [foo])") self.validate_identity("UNION_VALUE(k1 := 1)").find(exp.PropertyEQ).this.assert_is( exp.Identifier ) self.validate_identity( "MERGE INTO people USING (SELECT 1 AS id, 98000.0 AS salary) AS salary_updates USING (id) WHEN MATCHED THEN UPDATE SET salary = salary_updates.salary" ) self.validate_identity( "MERGE INTO people USING (SELECT 1 AS id, 98000.0 AS salary) AS salary_updates USING (id) WHEN MATCHED THEN UPDATE" ) self.validate_identity( "SELECT species, island, COUNT(*) FROM t GROUP BY GROUPING SETS (species), GROUPING SETS (island)" ) self.validate_identity( "SELECT species, island, COUNT(*) FROM t GROUP BY CUBE (species), CUBE (island)" ) self.validate_identity( "SELECT species, island, COUNT(*) FROM t GROUP BY ROLLUP (species), ROLLUP (island)" ) self.validate_identity( "SUMMARIZE TABLE 'https://blobs.duckdb.org/data/Star_Trek-Season_1.csv'" ).assert_is(exp.Summarize) self.validate_identity( """COPY (SELECT * FROM "input.parquet" USING SAMPLE RESERVOIR (5000 ROWS)) TO 'output.parquet' WITH (FORMAT PARQUET, KV_METADATA {'origin': 'Dagster', 'dagster_run_id': '98c85a11-d05c-4935-bfa2-198214c2204'})""" ) for join_type in ("LEFT", "LEFT OUTER", "INNER"): with self.subTest(f"Testing transpilation of join {join_type} with UNNEST"): self.validate_all( f"SELECT * FROM x {join_type} JOIN UNNEST(y) ON TRUE", read={ "bigquery": f"SELECT * FROM x {join_type} JOIN UNNEST(y)", }, write={ "bigquery": f"SELECT * FROM x {join_type} JOIN UNNEST(y) ON TRUE", "duckdb": f"SELECT * FROM x {join_type} JOIN UNNEST(y) ON TRUE", }, ) self.validate_identity( """SELECT '{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }' ->> ['$.family', '$.species']""", ) self.validate_identity( "SELECT $🦆$foo$🦆$", "SELECT 'foo'", ) self.validate_identity( "SELECT * FROM t PIVOT(FIRST(t) AS t, FOR quarter IN ('Q1', 'Q2'))", "SELECT * FROM t PIVOT(FIRST(t) AS t FOR quarter IN ('Q1', 'Q2'))", ) self.validate_identity( """SELECT JSON_EXTRACT_STRING('{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }', ['$.family', '$.species'])""", """SELECT '{ "family": "anatidae", "species": [ "duck", "goose", "swan", null ] }' ->> ['$.family', '$.species']""", ) self.validate_identity( "SELECT col FROM t WHERE JSON_EXTRACT_STRING(col, '$.id') NOT IN ('b')", "SELECT col FROM t WHERE NOT (col ->> '$.id') IN ('b')", ) self.validate_identity( "SELECT a, LOGICAL_OR(b) FROM foo GROUP BY a", "SELECT a, BOOL_OR(CAST(b AS BOOLEAN)) FROM foo GROUP BY a", ) self.validate_identity( "SELECT JSON_EXTRACT_STRING(c, '$.k1') = 'v1'", "SELECT (c ->> '$.k1') = 'v1'", ) self.validate_identity( "SELECT JSON_EXTRACT(c, '$.k1') = 'v1'", "SELECT (c -> '$.k1') = 'v1'", ) self.validate_identity( "SELECT JSON_EXTRACT(c, '$[*].id')[0:2]", "SELECT (c -> '$[*].id')[0:2]", ) self.validate_identity( "SELECT JSON_EXTRACT_STRING(c, '$[*].id')[0:2]", "SELECT (c ->> '$[*].id')[0:2]", ) self.validate_identity( """SELECT '{"foo": [1, 2, 3]}' -> 'foo' -> 0""", """SELECT '{"foo": [1, 2, 3]}' -> '$.foo' -> '$[0]'""", ) self.validate_identity( "SELECT ($$hello)'world$$)", "SELECT ('hello)''world')", ) self.validate_identity( "SELECT $$foo$$", "SELECT 'foo'", ) self.validate_identity( "SELECT $tag$foo$tag$", "SELECT 'foo'", ) self.validate_identity( "JSON_EXTRACT(x, '$.family')", "x -> '$.family'", ) self.validate_identity( "JSON_EXTRACT_PATH(x, '$.family')", "x -> '$.family'", ) self.validate_identity( "JSON_EXTRACT_STRING(x, '$.family')", "x ->> '$.family'", ) self.validate_identity( "JSON_EXTRACT_PATH_TEXT(x, '$.family')", "x ->> '$.family'", ) self.validate_all( "SELECT NOT (data -> '$.value')", read={ "snowflake": "SELECT NOT data:value", }, ) self.validate_all( "SELECT NOT (data -> '$.value.nested')", read={ "snowflake": "SELECT NOT data:value:nested", }, ) self.validate_all( "SELECT (data -> '$.value') = 1", read={ "snowflake": "SELECT data:value = 1", }, ) self.validate_identity( "SELECT {'yes': 'duck', 'maybe': 'goose', 'huh': NULL, 'no': 'heron'}" ) self.validate_identity( "SELECT a['x space'] FROM (SELECT {'x space': 1, 'y': 2, 'z': 3} AS a)" ) self.validate_identity( "PIVOT Cities ON Year IN (2000, 2010) USING SUM(Population) GROUP BY Country" ) self.validate_identity( "PIVOT Cities ON Year USING SUM(Population) AS total, MAX(Population) AS max GROUP BY Country" ) self.validate_identity( "WITH pivot_alias AS (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) SELECT * FROM pivot_alias" ) self.validate_identity( "SELECT * FROM (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) AS pivot_alias" ) self.validate_identity( "SELECT * FROM cities PIVOT(SUM(population) FOR year IN (2000, 2010, 2020) GROUP BY country)" ) self.validate_identity( # QUALIFY comes after WINDOW "SELECT schema_name, function_name, ROW_NUMBER() OVER my_window AS function_rank FROM DUCKDB_FUNCTIONS() WINDOW my_window AS (PARTITION BY schema_name ORDER BY function_name) QUALIFY ROW_NUMBER() OVER my_window < 3" ) self.validate_identity("DATE_SUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous) self.validate_identity("DATESUB('YEAR', col, '2020-01-01')").assert_is(exp.Anonymous) self.validate_identity("SELECT SHA256('abc')") self.validate_all("0b1010", write={"": "0 AS b1010"}) self.validate_all("0x1010", write={"": "0 AS x1010"}) self.validate_identity("x ~ y", "REGEXP_FULL_MATCH(x, y)") self.validate_identity("x !~ y", "NOT REGEXP_FULL_MATCH(x, y)") self.validate_identity("REGEXP_FULL_MATCH(x, y, 'i')") self.validate_all("SELECT * FROM 'x.y'", write={"duckdb": 'SELECT * FROM "x.y"'}) self.validate_all( "SELECT LIST(DISTINCT sample_col) FROM sample_table", read={ "duckdb": "SELECT LIST(DISTINCT sample_col) FROM sample_table", "spark": "SELECT COLLECT_SET(sample_col) FROM sample_table", }, ) self.validate_all( "SELECT LIST_TRANSFORM(STR_SPLIT_REGEX('abc , dfg ', ','), x -> TRIM(x))", write={ "duckdb": "SELECT LIST_TRANSFORM(STR_SPLIT_REGEX('abc , dfg ', ','), x -> TRIM(x))", "spark": "SELECT TRANSFORM(SPLIT('abc , dfg ', ','), x -> TRIM(x))", }, ) self.validate_all( "SELECT LIST_FILTER([4, 5, 6], x -> x > 4)", write={ "duckdb": "SELECT LIST_FILTER([4, 5, 6], x -> x > 4)", "spark": "SELECT FILTER(ARRAY(4, 5, 6), x -> x > 4)", }, ) self.validate_all( "ARRAY_COMPACT([1, NULL, 2, NULL, 3])", write={ "duckdb": "LIST_FILTER([1, NULL, 2, NULL, 3], _u -> NOT _u IS NULL)", "snowflake": "ARRAY_COMPACT([1, NULL, 2, NULL, 3])", }, ) self.validate_all( "ARRAY_COMPACT(NULL)", write={ "duckdb": "LIST_FILTER(NULL, _u -> NOT _u IS NULL)", "snowflake": "ARRAY_COMPACT(NULL)", }, ) self.validate_all( "ARRAY_COMPACT([])", write={ "duckdb": "LIST_FILTER([], _u -> NOT _u IS NULL)", "snowflake": "ARRAY_COMPACT([])", }, ) self.validate_all( "ARRAY_COMPACT(['a', NULL, 'b', NULL, 'c'])", write={ "duckdb": "LIST_FILTER(['a', NULL, 'b', NULL, 'c'], _u -> NOT _u IS NULL)", "snowflake": "ARRAY_COMPACT(['a', NULL, 'b', NULL, 'c'])", }, ) self.validate_all( "ARRAY_COMPACT([[1, 2], NULL, [3, 4]])", write={ "duckdb": "LIST_FILTER([[1, 2], NULL, [3, 4]], _u -> NOT _u IS NULL)", "snowflake": "ARRAY_COMPACT([[1, 2], NULL, [3, 4]])", }, ) self.validate_all( "ARRAY_CONSTRUCT_COMPACT(1, 2, 3, 4, 5)", write={ "duckdb": "LIST_FILTER([1, 2, 3, 4, 5], _u -> NOT _u IS NULL)", "snowflake": "ARRAY_CONSTRUCT_COMPACT(1, 2, 3, 4, 5)", }, ) self.validate_all( "ARRAY_CONSTRUCT_COMPACT()", write={ "duckdb": "LIST_FILTER([], _u -> NOT _u IS NULL)", "snowflake": "ARRAY_CONSTRUCT_COMPACT()", }, ) self.validate_all( "ARRAY_CONSTRUCT_COMPACT('a', NULL, 'b', NULL, 'c')", write={ "duckdb": "LIST_FILTER(['a', NULL, 'b', NULL, 'c'], _u -> NOT _u IS NULL)", "snowflake": "ARRAY_CONSTRUCT_COMPACT('a', NULL, 'b', NULL, 'c')", }, ) self.validate_all( "SELECT ANY_VALUE(sample_column) FROM sample_table", write={ "duckdb": "SELECT ANY_VALUE(sample_column) FROM sample_table", "spark": "SELECT ANY_VALUE(sample_column) IGNORE NULLS FROM sample_table", }, ) self.validate_all( "COUNT_IF(x)", write={ "duckdb": "COUNT_IF(x)", "duckdb, version=1.0": "SUM(CASE WHEN x THEN 1 ELSE 0 END)", "duckdb, version=1.2": "COUNT_IF(x)", }, ) self.validate_all( "SELECT STRFTIME(CAST('2020-01-01' AS TIMESTAMP), CONCAT('%Y', '%m'))", write={ "duckdb": "SELECT STRFTIME(CAST('2020-01-01' AS TIMESTAMP), CONCAT('%Y', '%m'))", "spark": "SELECT DATE_FORMAT(CAST('2020-01-01' AS TIMESTAMP_NTZ), CONCAT('yyyy', 'MM'))", "tsql": "SELECT FORMAT(CAST('2020-01-01' AS DATETIME2), CONCAT('yyyy', 'MM'))", }, ) self.validate_all( """SELECT CAST('{"x": 1}' AS JSON)""", read={ "duckdb": """SELECT '{"x": 1}'::JSON""", "postgres": """SELECT '{"x": 1}'::JSONB""", }, ) self.validate_all( "SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))", read={ "duckdb": "SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))", "snowflake": "SELECT * FROM produce PIVOT(SUM(produce.sales) FOR produce.quarter IN ('Q1', 'Q2'))", }, ) self.validate_all( "SELECT UNNEST([1, 2, 3])", write={ "duckdb": "SELECT UNNEST([1, 2, 3])", "snowflake": "SELECT IFF(_u.pos = _u_2.pos_2, _u_2.col, NULL) AS col FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE([1, 2, 3])) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => [1, 2, 3])) AS _u_2(seq, key, path, pos_2, col, this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE([1, 2, 3]) - 1) AND _u_2.pos_2 = (ARRAY_SIZE([1, 2, 3]) - 1))", }, ) self.validate_all( "VAR_POP(x)", read={ "": "VARIANCE_POP(x)", }, write={ "": "VARIANCE_POP(x)", "duckdb": "VAR_POP(x)", }, ) self.validate_all( "DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))", read={ "duckdb": "DATE_DIFF('day', CAST(b AS DATE), CAST(a AS DATE))", "hive": "DATEDIFF(a, b)", "spark": "DATEDIFF(a, b)", "spark2": "DATEDIFF(a, b)", }, ) self.validate_all( "XOR(a, b)", read={ "": "a ^ b", "bigquery": "a ^ b", "presto": "BITWISE_XOR(a, b)", "postgres": "a # b", }, write={ "": "a ^ b", "bigquery": "a ^ b", "duckdb": "XOR(a, b)", "presto": "BITWISE_XOR(a, b)", "postgres": "a # b", }, ) self.validate_all( "PIVOT_WIDER Cities ON Year USING SUM(Population)", write={"duckdb": "PIVOT Cities ON Year USING SUM(Population)"}, ) self.validate_all( "WITH t AS (SELECT 1) FROM t", write={"duckdb": "WITH t AS (SELECT 1) SELECT * FROM t"} ) self.validate_all( "WITH t AS (SELECT 1) SELECT * FROM (FROM t)", write={"duckdb": "WITH t AS (SELECT 1) SELECT * FROM (SELECT * FROM t)"}, ) self.validate_all( """SELECT DATEDIFF('day', t1."A", t1."B") FROM "table" AS t1""", write={ "duckdb": """SELECT DATE_DIFF('DAY', t1."A", t1."B") FROM "table" AS t1""", "trino": """SELECT DATE_DIFF('DAY', t1."A", t1."B") FROM "table" AS t1""", }, ) self.validate_all( "SELECT DATE_DIFF('day', DATE '2020-01-01', DATE '2020-01-05')", write={ "duckdb": "SELECT DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))", "trino": "SELECT DATE_DIFF('DAY', CAST('2020-01-01' AS DATE), CAST('2020-01-05' AS DATE))", }, ) self.validate_all( "WITH 'x' AS (SELECT 1) SELECT * FROM x", write={"duckdb": 'WITH "x" AS (SELECT 1) SELECT * FROM x'}, ) self.validate_all( "CREATE TABLE IF NOT EXISTS t (cola INT, colb STRING) USING ICEBERG PARTITIONED BY (colb)", write={ "duckdb": "CREATE TABLE IF NOT EXISTS t (cola INT, colb TEXT)", }, ) self.validate_all( "CREATE TABLE IF NOT EXISTS t (cola INT COMMENT 'cola', colb STRING) USING ICEBERG PARTITIONED BY (colb)", write={ "duckdb": "CREATE TABLE IF NOT EXISTS t (cola INT, colb TEXT)", }, ) self.validate_all( "[0, 1, 2]", read={ "spark": "ARRAY(0, 1, 2)", }, write={ "bigquery": "[0, 1, 2]", "duckdb": "[0, 1, 2]", "presto": "ARRAY[0, 1, 2]", "spark": "ARRAY(0, 1, 2)", }, ) self.validate_all( "SELECT ARRAY_LENGTH([0], 1) AS x", write={"duckdb": "SELECT ARRAY_LENGTH([0], 1) AS x"}, ) self.validate_identity("REGEXP_REPLACE(this, pattern, replacement)") self.validate_identity("REGEXP_REPLACE(this, pattern, replacement, 'g')") self.validate_identity("REGEXP_REPLACE(this, pattern, replacement, 'gi')") self.validate_identity("REGEXP_REPLACE(this, pattern, replacement, 'ims')") self.validate_identity("SELECT SPLIT_PART('11.22.33', '.', 1)") self.validate_identity( "SELECT NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table" ) self.validate_identity( "SELECT NTH_VALUE(is_deleted, 2 IGNORE NULLS) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table" ) self.validate_all( "REGEXP_MATCHES(x, y)", write={ "duckdb": "REGEXP_MATCHES(x, y)", "presto": "REGEXP_LIKE(x, y)", "hive": "x RLIKE y", "spark": "x RLIKE y", }, ) self.validate_all( "STR_SPLIT(x, 'a')", write={ "duckdb": "STR_SPLIT(x, 'a')", "presto": "SPLIT(x, 'a')", "hive": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))", "spark": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))", }, ) self.validate_all( "STRING_TO_ARRAY(x, 'a')", read={ "snowflake": "STRTOK_TO_ARRAY(x, 'a')", }, write={ "duckdb": "STR_SPLIT(x, 'a')", "presto": "SPLIT(x, 'a')", "hive": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))", "spark": "SPLIT(x, CONCAT('\\\\Q', 'a', '\\\\E'))", }, ) self.validate_all( "STR_SPLIT_REGEX(x, 'a')", write={ "duckdb": "STR_SPLIT_REGEX(x, 'a')", "presto": "REGEXP_SPLIT(x, 'a')", "hive": "SPLIT(x, 'a')", "spark": "SPLIT(x, 'a')", }, ) self.validate_all( "STRUCT_EXTRACT(x, 'abc')", write={ "duckdb": "STRUCT_EXTRACT(x, 'abc')", "presto": "x.abc", "hive": "x.abc", "postgres": "x.abc", "redshift": "x.abc", "spark": "x.abc", }, ) self.validate_all( "STRUCT_EXTRACT(STRUCT_EXTRACT(x, 'y'), 'abc')", write={ "duckdb": "STRUCT_EXTRACT(STRUCT_EXTRACT(x, 'y'), 'abc')", "presto": "x.y.abc", "hive": "x.y.abc", "spark": "x.y.abc", }, ) self.validate_all( "QUANTILE(x, 0.5)", write={ "duckdb": "QUANTILE(x, 0.5)", "presto": "APPROX_PERCENTILE(x, 0.5)", "hive": "PERCENTILE(x, 0.5)", "spark": "PERCENTILE(x, 0.5)", }, ) self.validate_all( "UNNEST(x)", read={ "spark": "EXPLODE(x)", }, write={ "duckdb": "UNNEST(x)", "spark": "EXPLODE(x)", }, ) self.validate_all( "1d", write={ "duckdb": "1 AS d", "spark": "1 AS d", }, ) self.validate_all( "POWER(TRY_CAST(2 AS SMALLINT), 3)", read={ "hive": "POW(2S, 3)", "spark": "POW(2S, 3)", }, ) self.validate_all( "LIST_SUM([1, 2])", read={ "spark": "ARRAY_SUM(ARRAY(1, 2))", }, ) self.validate_identity("SELECT LIST_MAX(values) FROM table1") self.validate_identity("SELECT LIST_MIN(values) FROM table1") self.validate_all( "STRUCT_PACK(x := 1, y := '2')", write={ "bigquery": "STRUCT(1 AS x, '2' AS y)", "duckdb": "{'x': 1, 'y': '2'}", "spark": "STRUCT(1 AS x, '2' AS y)", }, ) self.validate_all( "STRUCT_PACK(key1 := 'value1', key2 := 42)", write={ "bigquery": "STRUCT('value1' AS key1, 42 AS key2)", "duckdb": "{'key1': 'value1', 'key2': 42}", "spark": "STRUCT('value1' AS key1, 42 AS key2)", }, ) self.validate_all( "ARRAY_REVERSE_SORT(x)", write={ "duckdb": "ARRAY_REVERSE_SORT(x)", "presto": "ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)", "hive": "SORT_ARRAY(x, FALSE)", "spark": "SORT_ARRAY(x, FALSE)", }, ) self.validate_all( "LIST_REVERSE_SORT(x)", write={ "duckdb": "ARRAY_REVERSE_SORT(x)", "presto": "ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)", "hive": "SORT_ARRAY(x, FALSE)", "spark": "SORT_ARRAY(x, FALSE)", }, ) self.validate_all( "LIST_SORT(x)", write={ "duckdb": "LIST_SORT(x)", "presto": "ARRAY_SORT(x)", "hive": "SORT_ARRAY(x)", "spark": "SORT_ARRAY(x)", }, ) self.validate_identity("SELECT LIST_SORT(x, 'ASC')") self.validate_identity("SELECT LIST_SORT(x, 'DESC')") self.validate_identity("SELECT LIST_SORT(x, 'ASC', 'NULLS FIRST')") self.validate_identity("SELECT LIST_SORT(x, 'ASC', 'NULLS LAST')") self.validate_identity("SELECT LIST_SORT(x, 'DESC', 'NULLS FIRST')") self.validate_identity("SELECT LIST_SORT(x, 'DESC', 'NULLS LAST')") self.validate_identity("SELECT LIST_SORT(x, 'DE' || 'SC')") self.validate_identity("SELECT LIST_SORT(x, 'DESC', 'NULLS' || ' FIRST')") self.validate_identity("SELECT LIST_SORT(x, 'DE' || 'SC', 'NULLS' || ' FIRST')") self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST", "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname", }, ) self.validate_all( "MONTH('2021-03-01')", write={ "duckdb": "MONTH('2021-03-01')", "presto": "MONTH('2021-03-01')", "hive": "MONTH('2021-03-01')", "spark": "MONTH('2021-03-01')", }, ) self.validate_all( "LIST_CONCAT([1, 2], [3, 4])", read={ "bigquery": "ARRAY_CONCAT([1, 2], [3, 4])", "postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY[3, 4])", "snowflake": "ARRAY_CAT([1, 2], [3, 4])", }, write={ "bigquery": "ARRAY_CONCAT([1, 2], [3, 4])", "duckdb": "LIST_CONCAT([1, 2], [3, 4])", "hive": "CONCAT(ARRAY(1, 2), ARRAY(3, 4))", "postgres": "ARRAY_CAT(ARRAY[1, 2], ARRAY[3, 4])", "presto": "CONCAT(ARRAY[1, 2], ARRAY[3, 4])", "snowflake": "ARRAY_CAT([1, 2], [3, 4])", "spark": "CONCAT(ARRAY(1, 2), ARRAY(3, 4))", }, ) self.validate_all( "SELECT CAST(TRY_CAST(x AS DATE) AS DATE) + INTERVAL 1 DAY", read={ "hive": "SELECT DATE_ADD(TO_DATE(x), 1)", }, ) self.validate_all( "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL 3 DAY", read={ "hive": "SELECT DATE_ADD('2018-01-01 00:00:00', 3)", }, write={ "duckdb": "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY", "hive": "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY", }, ) self.validate_all( "SELECT CAST('2020-05-06' AS DATE) - INTERVAL '5' DAY", read={"bigquery": "SELECT DATE_SUB(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"}, ) self.validate_all( "SELECT CAST('2020-05-06' AS DATE) + INTERVAL '5' DAY", read={"bigquery": "SELECT DATE_ADD(CAST('2020-05-06' AS DATE), INTERVAL 5 DAY)"}, ) self.validate_identity( "SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t", "SELECT QUANTILE_CONT(y, 0.25 ORDER BY y DESC) FROM t", ) self.validate_identity( "SELECT PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY y DESC) FROM t", "SELECT QUANTILE_DISC(y, 0.25 ORDER BY y DESC) FROM t", ) self.validate_all( "SELECT QUANTILE_CONT(x, q) FROM t", write={ "duckdb": "SELECT QUANTILE_CONT(x, q) FROM t", "postgres": "SELECT PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) FROM t", "snowflake": "SELECT PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) FROM t", }, ) self.validate_all( "SELECT QUANTILE_DISC(x, q) FROM t", write={ "duckdb": "SELECT QUANTILE_DISC(x, q) FROM t", "postgres": "SELECT PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) FROM t", "snowflake": "SELECT PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) FROM t", }, ) self.validate_all( "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t", read={ "duckdb": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t", "bigquery": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t", "snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern') FROM t", }, write={ "duckdb": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t", "bigquery": "SELECT REGEXP_EXTRACT(a, 'pattern') FROM t", "snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern') FROM t", }, ) self.validate_all( "SELECT REGEXP_EXTRACT(a, 'pattern', 2, 'i') FROM t", read={ "snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2) FROM t", }, write={ "duckdb": "SELECT REGEXP_EXTRACT(a, 'pattern', 2, 'i') FROM t", "snowflake": "SELECT REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2) FROM t", }, ) # group=0 is the default and gets normalized away when no following params self.validate_identity( "SELECT REGEXP_EXTRACT(a, 'pattern', 0)", "SELECT REGEXP_EXTRACT(a, 'pattern')", ) # group is kept when there are following params (flags) self.validate_identity("SELECT REGEXP_EXTRACT(a, 'pattern', 0, 'i')") self.validate_identity("SELECT REGEXP_EXTRACT(a, 'pattern', 1, 'i')") # REGEXP_EXTRACT_ALL round-trip tests (same normalization rules as REGEXP_EXTRACT) self.validate_identity( "SELECT REGEXP_EXTRACT_ALL(s, 'pattern', 0)", "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')", ) self.validate_identity("SELECT REGEXP_EXTRACT_ALL(s, 'pattern', 1)") self.validate_identity("SELECT REGEXP_EXTRACT_ALL(s, 'pattern', 0, 'i')") self.validate_identity("SELECT REGEXP_EXTRACT_ALL(s, 'pattern', 1, 'im')") # Array slicing for occurrence self.validate_identity( "SELECT REGEXP_EXTRACT_ALL(s, 'pattern', 0)[2:]", "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')[2:]", ) self.validate_identity("SELECT ISNAN(x)") self.validate_all( "SELECT COUNT_IF(x)", write={ "duckdb": "SELECT COUNT_IF(x)", "bigquery": "SELECT COUNTIF(x)", }, ) self.validate_identity("SELECT * FROM RANGE(1, 5, 10)") self.validate_identity("SELECT * FROM GENERATE_SERIES(2, 13, 4)") self.validate_all( "WITH t AS (SELECT i, i * i * i * i * i AS i5 FROM RANGE(1, 5) t(i)) SELECT * FROM t", write={ "duckdb": "WITH t AS (SELECT i, i * i * i * i * i AS i5 FROM RANGE(1, 5) AS t(i)) SELECT * FROM t", "sqlite": "WITH t AS (SELECT i, i * i * i * i * i AS i5 FROM (SELECT value AS i FROM GENERATE_SERIES(1, 5)) AS t) SELECT * FROM t", }, ) self.validate_identity( """SELECT i FROM RANGE(5) AS _(i) ORDER BY i ASC""", """SELECT i FROM RANGE(0, 5) AS _(i) ORDER BY i ASC""", ) self.validate_identity( """SELECT i FROM GENERATE_SERIES(12) AS _(i) ORDER BY i ASC""", """SELECT i FROM GENERATE_SERIES(0, 12) AS _(i) ORDER BY i ASC""", ) self.validate_identity( "COPY lineitem FROM 'lineitem.ndjson' WITH (FORMAT JSON, DELIMITER ',', AUTO_DETECT TRUE, COMPRESSION SNAPPY, CODEC ZSTD, FORCE_NOT_NULL (col1, col2))" ) self.validate_identity( "COPY (SELECT 42 AS a, 'hello' AS b) TO 'query.json' WITH (FORMAT JSON, ARRAY TRUE)" ) self.validate_identity("COPY lineitem (l_orderkey) TO 'orderkey.tbl' WITH (DELIMITER '|')") self.validate_all( "VARIANCE(a)", write={ "duckdb": "VARIANCE(a)", "clickhouse": "varSamp(a)", }, ) self.validate_all( "STDDEV(a)", write={ "duckdb": "STDDEV(a)", "clickhouse": "stddevSamp(a)", }, ) self.validate_all( "DATE_TRUNC('DAY', x)", write={ "duckdb": "DATE_TRUNC('DAY', x)", "clickhouse": "dateTrunc('DAY', x)", }, ) self.validate_identity("EDITDIST3(col1, col2)", "LEVENSHTEIN(col1, col2)") self.validate_identity("JARO_WINKLER_SIMILARITY('hello', 'world')") self.validate_identity("SELECT LENGTH(foo)") self.validate_identity("SELECT ARRAY[1, 2, 3]", "SELECT [1, 2, 3]") self.validate_identity("SELECT * FROM (DESCRIBE t)") self.validate_identity("SELECT UNNEST([*COLUMNS('alias_.*')]) AS column_name") self.validate_identity( "SELECT COALESCE(*COLUMNS(*)) FROM (SELECT NULL, 2, 3) AS t(a, b, c)" ) self.validate_identity( "SELECT id, STRUCT_PACK(*COLUMNS('m\\d')) AS measurements FROM many_measurements", """SELECT id, {'_0': *COLUMNS('m\\d')} AS measurements FROM many_measurements""", ) self.validate_identity("SELECT COLUMNS(c -> c LIKE '%num%') FROM numbers") self.validate_identity( "SELECT MIN(COLUMNS(* REPLACE (number + id AS number))), COUNT(COLUMNS(* EXCLUDE (number))) FROM numbers" ) self.validate_identity("SELECT COLUMNS(*) + COLUMNS(*) FROM numbers") self.validate_identity("SELECT COLUMNS('(id|numbers?)') FROM numbers") self.validate_identity( "SELECT COALESCE(COLUMNS(['a', 'b', 'c'])) AS result FROM (SELECT NULL AS a, 42 AS b, TRUE AS c)" ) self.validate_identity( "SELECT COALESCE(*COLUMNS(['a', 'b', 'c'])) AS result FROM (SELECT NULL AS a, 42 AS b, TRUE AS c)" ) self.validate_all( "SELECT UNNEST(foo) AS x", write={ "redshift": UnsupportedError, }, ) self.validate_identity("a ^ b", "POWER(a, b)") self.validate_identity("a ** b", "POWER(a, b)") self.validate_identity("a ~~~ b", "a GLOB b") self.validate_identity("a ~~ b", "a LIKE b") self.validate_identity("a @> b") self.validate_identity("a <@ b", "b @> a") self.validate_identity("a && b").assert_is(exp.ArrayOverlaps) self.validate_identity("a ^@ b", "STARTS_WITH(a, b)") self.validate_identity( "a !~~ b", "NOT a LIKE b", ) self.validate_identity( "a !~~* b", "NOT a ILIKE b", ) self.validate_all( "SELECT e'Hello\nworld'", write={ "duckdb": "SELECT e'Hello\\nworld'", "bigquery": "SELECT CAST(b'Hello\\nworld' AS STRING)", }, ) self.validate_all( "SELECT REGEXP_MATCHES('ThOmAs', 'thomas', 'i')", read={ "postgres": "SELECT 'ThOmAs' ~* 'thomas'", }, ) self.validate_identity( "SELECT DATE_ADD(CAST('2020-01-01' AS DATE), INTERVAL 1 DAY)", "SELECT CAST('2020-01-01' AS DATE) + INTERVAL '1' DAY", ) self.validate_identity("ARRAY_SLICE(x, 1, 3, 2)") self.validate_identity("SELECT #2, #1 FROM (VALUES (1, 'foo'))") self.validate_identity("SELECT #2 AS a, #1 AS b FROM (VALUES (1, 'foo'))") self.validate_all( "LIST_CONTAINS([1, 2, NULL], 1)", write={ "duckdb": "ARRAY_CONTAINS([1, 2, NULL], 1)", "postgres": "CASE WHEN 1 IS NULL THEN NULL ELSE COALESCE(1 = ANY(ARRAY[1, 2, NULL]), FALSE) END", }, ) self.validate_all( "LIST_CONTAINS([1, 2, NULL], NULL)", write={ "duckdb": "ARRAY_CONTAINS([1, 2, NULL], NULL)", "postgres": "CASE WHEN NULL IS NULL THEN NULL ELSE COALESCE(NULL = ANY(ARRAY[1, 2, NULL]), FALSE) END", }, ) self.validate_all( "LIST_HAS_ANY([1, 2, 3], [1,2])", write={ "duckdb": "[1, 2, 3] && [1, 2]", "postgres": "ARRAY[1, 2, 3] && ARRAY[1, 2]", }, ) self.validate_identity("LISTAGG(x, ', ')") self.validate_identity("STRING_AGG(x, ', ')", "LISTAGG(x, ', ')") self.validate_all( "SELECT CONCAT(foo)", write={ "duckdb": "SELECT CONCAT(foo)", "spark": "SELECT CONCAT(COALESCE(foo, ''))", }, ) self.validate_all( "SELECT CONCAT(COALESCE(['abc'], []), ['bcg'])", write={ "duckdb": "SELECT CONCAT(COALESCE(['abc'], []), ['bcg'])", "spark": "SELECT CONCAT(COALESCE(ARRAY('abc'), ARRAY()), ARRAY('bcg'))", }, ) self.validate_identity( "SELECT CUME_DIST( ORDER BY foo) OVER (ORDER BY 1) FROM (SELECT 1 AS foo)" ) self.validate_identity( "SELECT NTILE(1 ORDER BY foo) OVER (ORDER BY 1) FROM (SELECT 1 AS foo)" ) self.validate_identity( "SELECT RANK( ORDER BY foo) OVER (ORDER BY 1) FROM (SELECT 1 AS foo)" ) self.validate_identity( "SELECT PERCENT_RANK( ORDER BY foo) OVER (ORDER BY 1) FROM (SELECT 1 AS foo)" ) self.validate_identity("LIST_COSINE_DISTANCE(x, y)") self.validate_identity("LIST_DISTANCE(x, y)") self.validate_identity("SELECT * FROM t LIMIT 10 PERCENT") self.validate_identity("SELECT * FROM t LIMIT 10%", "SELECT * FROM t LIMIT 10 PERCENT") self.validate_identity("SELECT * FROM t LIMIT 10 PERCENT OFFSET 1") self.validate_identity( "SELECT * FROM t LIMIT 10% OFFSET 1", "SELECT * FROM t LIMIT 10 PERCENT OFFSET 1" ) self.validate_identity( "SELECT CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))", "SELECT CAST(ROW(1, 2) AS STRUCT(a INT, b INT))", ) self.validate_identity("SELECT row") self.validate_identity( "SELECT TRY_STRPTIME('2013-04-28T20:57:01.123456789+07:00', '%Y-%m-%dT%H:%M:%S.%n%z')" ) self.validate_identity( "DELETE FROM t USING (VALUES (1)) AS t1(c), (VALUES (1), (2)) AS t2(c) WHERE t.c = t1.c AND t.c = t2.c" ) self.validate_identity( "FROM (FROM t1 UNION FROM t2)", "SELECT * FROM (SELECT * FROM t1 UNION SELECT * FROM t2)", ) self.validate_identity( "FROM (FROM (SELECT 1) AS t2(c), (SELECT t2.c AS c0))", "SELECT * FROM (SELECT * FROM (SELECT 1) AS t2(c), (SELECT t2.c AS c0))", ) self.validate_identity( "FROM (FROM (SELECT 2000 as amount) t GROUP BY amount HAVING SUM(amount) > 1000)", "SELECT * FROM (SELECT * FROM (SELECT 2000 AS amount) AS t GROUP BY amount HAVING SUM(amount) > 1000)", ) self.validate_identity( "(FROM (SELECT 1) t1(c) EXCEPT FROM (SELECT 2) t2(c)) UNION ALL (FROM (SELECT 3) t3(c) EXCEPT FROM (SELECT 4) t4(c))", "(SELECT * FROM (SELECT 1) AS t1(c) EXCEPT SELECT * FROM (SELECT 2) AS t2(c)) UNION ALL (SELECT * FROM (SELECT 3) AS t3(c) EXCEPT SELECT * FROM (SELECT 4) AS t4(c))", ) for option in ( "ORDER BY 1", "LIMIT 1", "OFFSET 1", "ORDER BY 1 LIMIT 1", "ORDER BY 1 OFFSET 1", "ORDER BY 1 LIMIT 1 OFFSET 1", "LIMIT 1 OFFSET 1", ): with self.subTest(f"Testing DuckDB VALUES with modifier option: {option}"): self.validate_identity( f"SELECT 1 FROM (SELECT 1) AS t(c) WHERE ((VALUES (1), (c) {option}) INTERSECT (SELECT 1))" ) self.validate_identity("FORMAT('foo')") self.validate_identity("FORMAT('foo', 'foo2', 'foo3')") self.assertEqual( annotate_types(self.parse_one("LOWER('HELLO')")).sql("duckdb"), "LOWER('HELLO')" ) self.assertEqual( annotate_types(self.parse_one("UPPER('hello')")).sql("duckdb"), "UPPER('hello')" ) self.validate_all( "SELECT UUID()", write={ "duckdb": "SELECT UUID()", "bigquery": "SELECT GENERATE_UUID()", }, ) self.assertEqual( annotate_types( self.parse_one("SELECT REPLACE('apple pie', 'pie', 'cobbler') AS result") ).sql("duckdb"), "SELECT REPLACE('apple pie', 'pie', 'cobbler') AS result", ) self.validate_identity("SELECT REPLACE('apple pie', 'pie', 'cobbler') AS result") self.validate_identity( "SELECT REPLACE(CAST(CAST('apple pie' AS BLOB) AS TEXT), CAST(CAST('pie' AS BLOB) AS TEXT), CAST(CAST('cobbler' AS BLOB) AS TEXT)) AS result" ) self.assertEqual( annotate_types(self.parse_one("SELECT TRIM('***apple***', '*') AS result")).sql( "duckdb" ), "SELECT TRIM('***apple***', '*') AS result", ) self.validate_identity("SELECT TRIM('***apple***', '*') AS result") self.validate_identity( "SELECT CAST(TRIM(CAST(CAST('***apple***' AS BLOB) AS TEXT), CAST(CAST('*' AS BLOB) AS TEXT)) AS BLOB) AS result" ) self.validate_identity("SELECT GREATEST(1.0, 2.5, NULL, 3.7)") self.validate_identity("FROM t1, t2 SELECT *", "SELECT * FROM t1, t2") self.validate_identity("ROUND(2.256, 1)") # TODO: This is incorrect AST, DATE_PART creates a STRUCT of values but it's stored in 'year' arg self.validate_identity( "SELECT MAKE_DATE(DATE_PART(['year', 'month', 'day'], CURRENT_DATE))" ) self.validate_identity("SELECT * FROM t PIVOT(SUM(y) FOR foo IN y_enum)") self.validate_identity( "SELECT 20_000 AS literal", "SELECT 20000 AS literal", ) self.validate_identity("SELECT 1_2E+1_0::FLOAT", "SELECT CAST(12E+10 AS REAL)") # Test BITMAP_BUCKET_NUMBER transpilation from Snowflake to DuckDB self.validate_all( "CASE WHEN 2500 > 0 THEN ((2500 - 1) // 32768) + 1 ELSE 2500 // 32768 END", read={ "snowflake": "BITMAP_BUCKET_NUMBER(2500)", }, ) self.validate_all( "CASE WHEN 32768 > 0 THEN ((32768 - 1) // 32768) + 1 ELSE 32768 // 32768 END", read={ "snowflake": "BITMAP_BUCKET_NUMBER(32768)", }, ) self.validate_all( "CASE WHEN 32769 > 0 THEN ((32769 - 1) // 32768) + 1 ELSE 32769 // 32768 END", read={ "snowflake": "BITMAP_BUCKET_NUMBER(32769)", }, ) self.validate_all( "CASE WHEN -100 > 0 THEN ((-100 - 1) // 32768) + 1 ELSE -100 // 32768 END", read={ "snowflake": "BITMAP_BUCKET_NUMBER(-100)", }, ) self.validate_all( "CASE WHEN NULL > 0 THEN ((NULL - 1) // 32768) + 1 ELSE NULL // 32768 END", read={ "snowflake": "BITMAP_BUCKET_NUMBER(NULL)", }, ) self.validate_all( "ARRAY_CONTAINS(MAP_KEYS(CAST({'k1': 'v1', 'k2': 'v2', 'k3': 'v3'} AS MAP(TEXT, TEXT))), 'k1')", read={ "snowflake": "MAP_CONTAINS_KEY('k1', {'k1': 'v1', 'k2': 'v2', 'k3': 'v3'}::MAP(VARCHAR, VARCHAR))", }, ) self.validate_identity("SELECT [1, 2, 3][1 + 1:LENGTH([1, 2, 3]) + -1]") self.validate_identity("VERSION()") self.validate_identity("SELECT TODAY()", "SELECT CURRENT_DATE") self.validate_identity("SELECT GET_CURRENT_TIME()", "SELECT CURRENT_TIME") self.validate_identity("CURRENT_LOCALTIMESTAMP()", "LOCALTIMESTAMP").assert_is( exp.Localtimestamp ) self.validate_identity( "SELECT SUM(x) OVER (ORDER BY x GROUPS BETWEEN 1 PRECEDING AND CURRENT ROW) FROM t" ) self.validate_identity("SELECT file[:256] FROM GLOB('*')").selects[0].this.assert_is( exp.Column ) self.validate_identity("SELECT file[256] FROM GLOB('*')").selects[0].this.assert_is( exp.Column ) self.validate_identity( "SELECT LAST_VALUE(x ORDER BY x IGNORE NULLS) OVER (ORDER BY x) FROM t" ) self.validate_identity( "SELECT LAST_VALUE(x ORDER BY x RESPECT NULLS) OVER (ORDER BY x) FROM t" ) def test_array_index(self): with self.assertLogs(helper_logger) as cm: self.validate_all( "SELECT some_arr[1] AS first FROM blah", read={ "bigquery": "SELECT some_arr[0] AS first FROM blah", }, write={ "bigquery": "SELECT some_arr[0] AS first FROM blah", "duckdb": "SELECT some_arr[1] AS first FROM blah", "presto": "SELECT some_arr[1] AS first FROM blah", }, ) self.validate_identity( "[x.STRING_SPLIT(' ')[i] FOR x IN ['1', '2', 3] IF x.CONTAINS('1')]" ) self.validate_identity("SELECT [4, 5, 6] AS l, [x FOR x, i IN l IF i = 2] AS filtered") self.validate_identity( """SELECT LIST_VALUE(1)[i]""", """SELECT [1][i]""", ) self.validate_identity( """{'x': LIST_VALUE(1)[i]}""", """{'x': [1][i]}""", ) self.validate_identity( """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': LIST_VALUE(1, 2, 3)[i], 'f2': LIST_VALUE(1, 2, 3)[i]})""", """SELECT LIST_APPLY(RANGE(1, 4), i -> {'f1': [1, 2, 3][i], 'f2': [1, 2, 3][i]})""", ) self.assertEqual( cm.output, [ "INFO:sqlglot:Applying array index offset (-1)", "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (1)", ], ) def test_array_insert(self): # Test ARRAY_INSERT inserts at beginning self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE LIST_CONCAT([99], [1, 2, 3]) END", read={ "": "ARRAY_INSERT([1, 2, 3], 0, 99)", "snowflake": "ARRAY_INSERT([1, 2, 3], 0, 99)", "spark": "ARRAY_INSERT(ARRAY(1, 2, 3), 1, 99)", }, ) # Test ARRAY_INSERT inserts after first element self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE LIST_CONCAT([1, 2, 3][1:1], [99], [1, 2, 3][2:]) END", read={ "": "ARRAY_INSERT([1, 2, 3], 1, 99)", "snowflake": "ARRAY_INSERT([1, 2, 3], 1, 99)", "spark": "ARRAY_INSERT(ARRAY(1, 2, 3), 2, 99)", }, ) # Test ARRAY_INSERT inserts at end self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE LIST_CONCAT([1, 2, 3][1:3], [99], [1, 2, 3][4:]) END", read={ "": "ARRAY_INSERT([1, 2, 3], 3, 99)", "snowflake": "ARRAY_INSERT([1, 2, 3], 3, 99)", "spark": "ARRAY_INSERT(ARRAY(1, 2, 3), 4, 99)", }, ) # Test ARRAY_INSERT inserts before last element using negative position self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE LIST_CONCAT([1, 2, 3][1:LENGTH([1, 2, 3]) + -1], [99], [1, 2, 3][LENGTH([1, 2, 3]) + -1 + 1:]) END", read={ "": "ARRAY_INSERT([1, 2, 3], -1, 99)", "snowflake": "ARRAY_INSERT([1, 2, 3], -1, 99)", "spark": "ARRAY_INSERT(ARRAY(1, 2, 3), -2, 99)", }, ) def test_array_remove(self): # Test NULL propagation with column reference: Snowflake → DuckDB self.validate_all( "CASE WHEN target IS NULL THEN NULL ELSE LIST_FILTER(the_array, _u -> _u <> target) END", read={ "snowflake": "ARRAY_REMOVE(the_array, target)", }, ) # Test literal values: Snowflake → DuckDB self.validate_all( "LIST_FILTER([1, 2, 3], _u -> _u <> 2)", read={ "snowflake": "ARRAY_REMOVE([1, 2, 3], 2)", }, ) # Test NULL literal: Snowflake → DuckDB self.validate_all( "CASE WHEN NULL IS NULL THEN NULL ELSE LIST_FILTER([1, 2, 3], _u -> _u <> NULL) END", read={ "snowflake": "ARRAY_REMOVE([1, 2, 3], NULL)", }, ) def test_array_remove_at(self): # Test remove first element (position 0) self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE [1, 2, 3][2:] END", read={ "snowflake": "ARRAY_REMOVE_AT([1, 2, 3], 0)", }, ) # Test remove middle element (position 1) self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE LIST_CONCAT([1, 2, 3][1:1], [1, 2, 3][3:]) END", read={ "snowflake": "ARRAY_REMOVE_AT([1, 2, 3], 1)", }, ) # Test remove last element with positive index (position 2 for 3-element array) self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE LIST_CONCAT([1, 2, 3][1:2], [1, 2, 3][4:]) END", read={ "snowflake": "ARRAY_REMOVE_AT([1, 2, 3], 2)", }, ) # Test remove last element with negative index (position -1) self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE [1, 2, 3][1:LENGTH([1, 2, 3]) + -1] END", read={ "snowflake": "ARRAY_REMOVE_AT([1, 2, 3], -1)", }, ) # Test remove second-to-last element (position -2) self.validate_all( "CASE WHEN [1, 2, 3] IS NULL THEN NULL ELSE LIST_CONCAT([1, 2, 3][1:LENGTH([1, 2, 3]) + -2], [1, 2, 3][LENGTH([1, 2, 3]) + -2 + 2:]) END", read={ "snowflake": "ARRAY_REMOVE_AT([1, 2, 3], -2)", }, ) # Test single element array self.validate_all( "CASE WHEN [99] IS NULL THEN NULL ELSE [99][2:] END", read={ "snowflake": "ARRAY_REMOVE_AT([99], 0)", }, ) # Test NULL array with column reference self.validate_all( "CASE WHEN arr IS NULL THEN NULL ELSE arr[2:] END", read={ "snowflake": "ARRAY_REMOVE_AT(arr, 0)", }, ) # Test non-literal position (should remain untranspiled) self.validate_all( "ARRAY_REMOVE_AT([1, 2, 3], pos)", read={ "snowflake": "ARRAY_REMOVE_AT([1, 2, 3], pos)", }, ) def test_time(self): self.validate_identity("SELECT CURRENT_DATE") self.validate_identity("SELECT CURRENT_TIMESTAMP") self.validate_all( "SELECT CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)", read={ "bigquery": "SELECT CURRENT_DATE('UTC')", "duckdb": "SELECT CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)", }, ) self.validate_all( "SELECT MAKE_DATE(2016, 12, 25)", read={ "bigquery": "SELECT DATE(2016, 12, 25)", }, write={ "bigquery": "SELECT DATE(2016, 12, 25)", "duckdb": "SELECT MAKE_DATE(2016, 12, 25)", }, ) self.validate_all( "SELECT CAST(CAST('2016-12-25 23:59:59' AS TIMESTAMP) AS DATE)", read={"bigquery": "SELECT DATE(DATETIME '2016-12-25 23:59:59')"}, ) self.validate_all( "SELECT CAST(CAST(CAST('2016-12-25' AS TIMESTAMPTZ) AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE 'America/Los_Angeles' AS DATE)", read={ "bigquery": "SELECT DATE(TIMESTAMP '2016-12-25', 'America/Los_Angeles')", }, ) self.validate_all( "SELECT CAST(CAST('2024-01-15 23:30:00' AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Berlin' AS DATE)", read={ "bigquery": "SELECT DATE('2024-01-15 23:30:00', 'Europe/Berlin')", }, ) self.validate_all( "SELECT CAST(CAST(STRPTIME('05/06/2020', '%m/%d/%Y') AS DATE) AS DATE)", read={"bigquery": "SELECT DATE(PARSE_DATE('%m/%d/%Y', '05/06/2020'))"}, ) self.validate_all( "SELECT CAST('2020-01-01' AS DATE) + INTERVAL '-1' DAY", read={"mysql": "SELECT DATE '2020-01-01' + INTERVAL -1 DAY"}, ) self.validate_all( "SELECT INTERVAL '1 quarter'", write={"duckdb": "SELECT INTERVAL '1' QUARTER"}, ) self.validate_all( "SELECT ((DATE_TRUNC('DAY', CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP) + INTERVAL (0 - ((ISODOW(CAST(CAST(DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7) % 7) DAY) + INTERVAL (-5) WEEK)) AS t1", read={ "presto": "SELECT ((DATE_ADD('week', -5, DATE_TRUNC('DAY', DATE_ADD('day', (0 - MOD((DAY_OF_WEEK(CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)) % 7) - 1 + 7, 7)), CAST(CAST(DATE_TRUNC('DAY', NOW()) AS DATE) AS TIMESTAMP)))))) AS t1", }, ) self.validate_all( "EPOCH(x)", read={ "presto": "TO_UNIXTIME(x)", }, write={ "bigquery": "TIME_TO_UNIX(x)", "duckdb": "EPOCH(x)", "presto": "TO_UNIXTIME(x)", "spark": "UNIX_TIMESTAMP(x)", }, ) self.validate_all( "EPOCH_MS(x)", write={ "bigquery": "TIMESTAMP_MILLIS(x)", "clickhouse": "fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))", "duckdb": "EPOCH_MS(x)", "mysql": "FROM_UNIXTIME(x / POWER(10, 3))", "postgres": "TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))", "presto": "FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))", "spark": "TIMESTAMP_MILLIS(x)", }, ) self.validate_all( "STRFTIME(x, '%y-%-m-%S')", write={ "bigquery": "FORMAT_DATE('%y-%-m-%S', x)", "duckdb": "STRFTIME(x, '%y-%-m-%S')", "postgres": "TO_CHAR(x, 'YY-FMMM-SS')", "presto": "DATE_FORMAT(x, '%y-%c-%s')", "spark": "DATE_FORMAT(x, 'yy-M-ss')", }, ) self.validate_all( "SHA1(x)", write={ "duckdb": "SHA1(x)", "": "SHA(x)", }, ) self.validate_all( "STRFTIME(x, '%Y-%m-%d %H:%M:%S')", write={ "bigquery": "FORMAT_DATE('%F %T', x)", "duckdb": "STRFTIME(x, '%Y-%m-%d %H:%M:%S')", "presto": "DATE_FORMAT(x, '%Y-%m-%d %T')", "hive": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')", }, ) self.validate_all( "STRPTIME(x, '%y-%-m')", write={ "bigquery": "PARSE_TIMESTAMP('%y-%-m', x)", "duckdb": "STRPTIME(x, '%y-%-m')", "presto": "DATE_PARSE(x, '%y-%c')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'yy-M')) AS TIMESTAMP)", "spark": "TO_TIMESTAMP(x, 'yy-M')", }, ) self.validate_all( "TO_TIMESTAMP(x)", write={ "bigquery": "TIMESTAMP_SECONDS(x)", "duckdb": "TO_TIMESTAMP(x)", "presto": "FROM_UNIXTIME(x)", "hive": "FROM_UNIXTIME(x)", }, ) self.validate_all( "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')", write={ "bigquery": "PARSE_TIMESTAMP('%-m/%e/%y %-I:%M %p', x)", "duckdb": "STRPTIME(x, '%-m/%-d/%y %-I:%M %p')", "presto": "DATE_PARSE(x, '%c/%e/%y %l:%i %p')", "hive": "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'M/d/yy h:mm a')) AS TIMESTAMP)", "spark": "TO_TIMESTAMP(x, 'M/d/yy h:mm a')", }, ) self.validate_all( "CAST(start AS TIMESTAMPTZ) AT TIME ZONE 'America/New_York'", read={ "snowflake": "CONVERT_TIMEZONE('America/New_York', CAST(start AS TIMESTAMPTZ))", }, write={ "bigquery": "TIMESTAMP(DATETIME(CAST(start AS TIMESTAMP), 'America/New_York'))", "duckdb": "CAST(start AS TIMESTAMPTZ) AT TIME ZONE 'America/New_York'", "snowflake": "CONVERT_TIMEZONE('America/New_York', CAST(start AS TIMESTAMPTZ))", }, ) self.validate_all( "SELECT TIMESTAMP 'foo'", write={ "duckdb": "SELECT CAST('foo' AS TIMESTAMP)", "hive": "SELECT CAST('foo' AS TIMESTAMP)", "spark2": "SELECT CAST('foo' AS TIMESTAMP)", "spark": "SELECT CAST('foo' AS TIMESTAMP_NTZ)", "postgres": "SELECT CAST('foo' AS TIMESTAMP)", "mysql": "SELECT CAST('foo' AS DATETIME)", "clickhouse": "SELECT CAST('foo' AS Nullable(DateTime))", "databricks": "SELECT CAST('foo' AS TIMESTAMP_NTZ)", "snowflake": "SELECT CAST('foo' AS TIMESTAMPNTZ)", "redshift": "SELECT CAST('foo' AS TIMESTAMP)", "tsql": "SELECT CAST('foo' AS DATETIME2)", "presto": "SELECT CAST('foo' AS TIMESTAMP)", "trino": "SELECT CAST('foo' AS TIMESTAMP)", "oracle": "SELECT CAST('foo' AS TIMESTAMP)", "bigquery": "SELECT CAST('foo' AS DATETIME)", "starrocks": "SELECT CAST('foo' AS DATETIME)", }, ) def test_sample(self): self.validate_identity( "SELECT * FROM tbl USING SAMPLE 5", "SELECT * FROM tbl USING SAMPLE RESERVOIR (5 ROWS)", ) self.validate_identity( "SELECT * FROM tbl USING SAMPLE 10%", "SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT)", ) self.validate_identity( "SELECT * FROM tbl USING SAMPLE 10 PERCENT (bernoulli)", "SELECT * FROM tbl USING SAMPLE BERNOULLI (10 PERCENT)", ) self.validate_identity( "SELECT * FROM tbl USING SAMPLE reservoir(50 ROWS) REPEATABLE (100)", "SELECT * FROM tbl USING SAMPLE RESERVOIR (50 ROWS) REPEATABLE (100)", ) self.validate_identity( "SELECT * FROM tbl USING SAMPLE 10% (system, 377)", "SELECT * FROM tbl USING SAMPLE SYSTEM (10 PERCENT) REPEATABLE (377)", ) self.validate_identity( "SELECT * FROM tbl TABLESAMPLE RESERVOIR(20%), tbl2 WHERE tbl.i=tbl2.i", "SELECT * FROM tbl TABLESAMPLE RESERVOIR (20 PERCENT), tbl2 WHERE tbl.i = tbl2.i", ) self.validate_identity( "SELECT * FROM tbl, tbl2 WHERE tbl.i=tbl2.i USING SAMPLE RESERVOIR(20%)", "SELECT * FROM tbl, tbl2 WHERE tbl.i = tbl2.i USING SAMPLE RESERVOIR (20 PERCENT)", ) self.validate_all( "SELECT * FROM example TABLESAMPLE RESERVOIR (3 ROWS) REPEATABLE (82)", read={ "duckdb": "SELECT * FROM example TABLESAMPLE (3) REPEATABLE (82)", "snowflake": "SELECT * FROM example SAMPLE (3 ROWS) SEED (82)", }, write={ "duckdb": "SELECT * FROM example TABLESAMPLE RESERVOIR (3 ROWS) REPEATABLE (82)", }, ) self.validate_all( "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE (2 ROWS)", write={ "duckdb": "SELECT * FROM (SELECT * FROM t) AS t1 TABLESAMPLE RESERVOIR (1 ROWS), (SELECT * FROM t) AS t2 TABLESAMPLE RESERVOIR (2 ROWS)", "spark": "SELECT * FROM (SELECT * FROM t) TABLESAMPLE (1 ROWS) AS t1, (SELECT * FROM t) TABLESAMPLE (2 ROWS) AS t2", }, ) def test_array(self): self.validate_identity("ARRAY(SELECT id FROM t)") self.validate_identity("ARRAY((SELECT id FROM t))") def test_cast(self): self.validate_identity("x::int[3]", "CAST(x AS INT[3])") self.validate_identity("CAST(x AS REAL)") self.validate_identity("CAST(x AS UINTEGER)") self.validate_identity("CAST(x AS UBIGINT)") self.validate_identity("CAST(x AS USMALLINT)") self.validate_identity("CAST(x AS UTINYINT)") self.validate_identity("CAST(x AS TEXT)") self.validate_identity("CAST(x AS INT128)") self.validate_identity("CAST(x AS DOUBLE)") self.validate_identity("CAST(x AS DECIMAL(15, 4))") self.validate_identity("CAST(x AS STRUCT(number BIGINT))") self.validate_identity("CAST(x AS INT64)", "CAST(x AS BIGINT)") self.validate_identity("CAST(x AS INT32)", "CAST(x AS INT)") self.validate_identity("CAST(x AS INT16)", "CAST(x AS SMALLINT)") self.validate_identity("CAST(x AS INT8)", "CAST(x AS BIGINT)") self.validate_identity("CAST(x AS NUMERIC(1, 2))", "CAST(x AS DECIMAL(1, 2))") self.validate_identity("CAST(x AS HUGEINT)", "CAST(x AS INT128)") self.validate_identity("CAST(x AS UHUGEINT)", "CAST(x AS UINT128)") self.validate_identity("CAST(x AS CHAR)", "CAST(x AS TEXT)") self.validate_identity("CAST(x AS BPCHAR)", "CAST(x AS TEXT)") self.validate_identity("CAST(x AS STRING)", "CAST(x AS TEXT)") self.validate_identity("CAST(x AS VARCHAR)", "CAST(x AS TEXT)") self.validate_identity("CAST(x AS INT1)", "CAST(x AS TINYINT)") self.validate_identity("CAST(x AS FLOAT4)", "CAST(x AS REAL)") self.validate_identity("CAST(x AS FLOAT)", "CAST(x AS REAL)") self.validate_identity("CAST(x AS INT4)", "CAST(x AS INT)") self.validate_identity("CAST(x AS INTEGER)", "CAST(x AS INT)") self.validate_identity("CAST(x AS SIGNED)", "CAST(x AS INT)") self.validate_identity("CAST(x AS BLOB)", "CAST(x AS BLOB)") self.validate_identity("CAST(x AS BYTEA)", "CAST(x AS BLOB)") self.validate_identity("CAST(x AS BINARY)", "CAST(x AS BLOB)") self.validate_identity("CAST(x AS VARBINARY)", "CAST(x AS BLOB)") self.validate_identity("CAST(x AS LOGICAL)", "CAST(x AS BOOLEAN)") self.validate_identity("""CAST({'i': 1, 's': 'foo'} AS STRUCT("s" TEXT, "i" INT))""") self.validate_identity( "CAST(ROW(1, ROW(1)) AS STRUCT(number BIGINT, row STRUCT(number BIGINT)))" ) self.validate_identity( "123::CHARACTER VARYING", "CAST(123 AS TEXT)", ) self.validate_identity( "CAST([[STRUCT_PACK(a := 1)]] AS STRUCT(a BIGINT)[][])", "CAST([[{'a': 1}]] AS STRUCT(a BIGINT)[][])", ) self.validate_identity( "CAST([STRUCT_PACK(a := 1)] AS STRUCT(a BIGINT)[])", "CAST([{'a': 1}] AS STRUCT(a BIGINT)[])", ) self.validate_identity( "STRUCT_PACK(a := 'b')::json", "CAST({'a': 'b'} AS JSON)", ) self.validate_identity( "STRUCT_PACK(a := 'b')::STRUCT(a TEXT)", "CAST({'a': 'b'} AS STRUCT(a TEXT))", ) self.validate_all( "CAST(x AS TIME)", read={ "duckdb": "CAST(x AS TIME)", "presto": "CAST(x AS TIME(6))", }, ) self.validate_all( "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", read={ "duckdb": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMP)", "snowflake": "SELECT CAST('2020-01-01 12:05:01' AS TIMESTAMPNTZ)", }, ) self.validate_all( "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", read={ "duckdb": "SELECT CAST('2020-01-01' AS DATE) + INTERVAL (day_offset) DAY FROM t", "mysql": "SELECT DATE '2020-01-01' + INTERVAL day_offset DAY FROM t", }, ) self.validate_all( "SELECT CAST('09:05:03' AS TIME) + INTERVAL 2 HOUR", read={ "snowflake": "SELECT TIMEADD(HOUR, 2, TO_TIME('09:05:03'))", }, write={ "duckdb": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2' HOUR", "snowflake": "SELECT CAST('09:05:03' AS TIME) + INTERVAL '2 HOUR'", }, ) self.validate_all( "CAST(x AS VARCHAR(5))", write={ "duckdb": "CAST(x AS TEXT)", "postgres": "CAST(x AS TEXT)", }, ) self.validate_all( "CAST(x AS DECIMAL(38, 0))", read={ "snowflake": "CAST(x AS NUMBER)", "duckdb": "CAST(x AS DECIMAL(38, 0))", }, write={ "snowflake": "CAST(x AS DECIMAL(38, 0))", }, ) self.validate_all( "CAST(x AS NUMERIC)", write={ "duckdb": "CAST(x AS DECIMAL(18, 3))", "postgres": "CAST(x AS DECIMAL(18, 3))", }, ) self.validate_all( "CAST(x AS DECIMAL)", write={ "duckdb": "CAST(x AS DECIMAL(18, 3))", "postgres": "CAST(x AS DECIMAL(18, 3))", }, ) self.validate_all( "CAST(x AS BIT)", read={ "duckdb": "CAST(x AS BITSTRING)", }, write={ "duckdb": "CAST(x AS BIT)", "tsql": "CAST(x AS BIT)", }, ) self.validate_all( "cast([[1]] as int[][])", write={ "duckdb": "CAST([[1]] AS INT[][])", "spark": "CAST(ARRAY(ARRAY(1)) AS ARRAY>)", }, ) self.validate_all( "CAST(x AS DATE) + INTERVAL (7 * -1) DAY", read={ "spark": "DATE_SUB(x, 7)", }, ) self.validate_all( "TRY_CAST(1 AS DOUBLE)", read={ "hive": "1d", "spark": "1d", }, ) self.validate_all( "CAST(x AS DATE)", write={ "duckdb": "CAST(x AS DATE)", "": "CAST(x AS DATE)", }, ) self.validate_all( "COL::BIGINT[]", write={ "duckdb": "CAST(COL AS BIGINT[])", "presto": "CAST(COL AS ARRAY(BIGINT))", "hive": "CAST(COL AS ARRAY)", "spark": "CAST(COL AS ARRAY)", "postgres": "CAST(COL AS BIGINT[])", "snowflake": "CAST(COL AS ARRAY(BIGINT))", }, ) self.validate_identity("SELECT x::INT[3][3]", "SELECT CAST(x AS INT[3][3])") self.validate_identity( """SELECT ARRAY[[[1]]]::INT[1][1][1]""", """SELECT CAST([[[1]]] AS INT[1][1][1])""", ) def test_encode_decode(self): self.validate_all( "ENCODE(x)", read={ "spark": "ENCODE(x, 'utf-8')", "presto": "TO_UTF8(x)", }, write={ "duckdb": "ENCODE(x)", "spark": "ENCODE(x, 'utf-8')", "presto": "TO_UTF8(x)", }, ) self.validate_all( "DECODE(x)", read={ "spark": "DECODE(x, 'utf-8')", "presto": "FROM_UTF8(x)", }, write={ "duckdb": "DECODE(x)", "spark": "DECODE(x, 'utf-8')", "presto": "FROM_UTF8(x)", }, ) self.validate_all( "DECODE(x)", read={ "presto": "FROM_UTF8(x, y)", }, ) def test_sha(self): # Round-trip: DuckDB SHA1 should not add unnecessary casts self.validate_identity("SHA1('foo')") self.validate_identity("SHA1(x)") self.validate_identity("SHA256('foo')") self.validate_identity("SHA256(x)") def test_rename_table(self): self.validate_all( "ALTER TABLE db.t1 RENAME TO db.t2", write={ "snowflake": "ALTER TABLE db.t1 RENAME TO db.t2", "duckdb": "ALTER TABLE db.t1 RENAME TO t2", "tsql": "EXEC sp_rename 'db.t1', 't2'", }, ) self.validate_all( 'ALTER TABLE "db"."t1" RENAME TO "db"."t2"', write={ "snowflake": 'ALTER TABLE "db"."t1" RENAME TO "db"."t2"', "duckdb": 'ALTER TABLE "db"."t1" RENAME TO "t2"', "tsql": "EXEC sp_rename '[db].[t1]', 't2'", }, ) def test_timestamps_with_units(self): self.validate_all( "SELECT w::TIMESTAMP_S, x::TIMESTAMP_MS, y::TIMESTAMP_US, z::TIMESTAMP_NS", write={ "duckdb": "SELECT CAST(w AS TIMESTAMP_S), CAST(x AS TIMESTAMP_MS), CAST(y AS TIMESTAMP), CAST(z AS TIMESTAMP_NS)", }, ) def test_isnan(self): self.validate_all( "ISNAN(x)", read={"bigquery": "IS_NAN(x)"}, write={"bigquery": "IS_NAN(x)", "duckdb": "ISNAN(x)"}, ) def test_isinf(self): self.validate_all( "ISINF(x)", read={"bigquery": "IS_INF(x)"}, write={"bigquery": "IS_INF(x)", "duckdb": "ISINF(x)"}, ) def test_parameter_token(self): self.validate_all( "SELECT $foo", read={"bigquery": "SELECT @foo"}, write={"bigquery": "SELECT @foo", "duckdb": "SELECT $foo"}, ) def test_ignore_nulls(self): # Note that DuckDB differentiates window functions (e.g. LEAD, LAG) from aggregate functions (e.g. SUM) from sqlglot.dialects.duckdb import DuckDB agg_funcs = (exp.Sum, exp.Max, exp.Min) for func_type in DuckDB.Generator.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS + agg_funcs: func = func_type(this=exp.to_identifier("col")) ignore_null = exp.IgnoreNulls(this=func) windowed_ignore_null = exp.Window(this=ignore_null) if func_type in DuckDB.Generator.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS: self.assertIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb")) else: with self.assertLogs(generator_logger) as cm: self.assertEqual(ignore_null.sql("duckdb"), func.sql("duckdb")) self.assertNotIn("IGNORE NULLS", windowed_ignore_null.sql("duckdb")) self.assertEqual( str(cm.output[0]), "WARNING:sqlglot:IGNORE NULLS is not supported for non-window functions.", ) def test_attach_detach(self): # ATTACH self.validate_identity("ATTACH 'file.db'") self.validate_identity("ATTACH ':memory:' AS db_alias") self.validate_identity("ATTACH IF NOT EXISTS 'file.db' AS db_alias") self.validate_identity("ATTACH 'file.db' AS db_alias (READ_ONLY)") self.validate_identity("ATTACH 'file.db' (READ_ONLY FALSE, TYPE sqlite)") self.validate_identity("ATTACH 'file.db' (TYPE POSTGRES, SCHEMA 'public')") self.validate_identity("ATTACH DATABASE 'file.db'", "ATTACH 'file.db'") # DETACH self.validate_identity("DETACH new_database") # when 'if exists' is set, the syntax is DETACH DATABASE, not DETACH # ref: https://duckdb.org/docs/stable/sql/statements/attach.html#detach-syntax self.validate_identity("DETACH IF EXISTS file", "DETACH DATABASE IF EXISTS file") self.validate_identity("DETACH DATABASE IF EXISTS file", "DETACH DATABASE IF EXISTS file") self.validate_identity("DETACH DATABASE db", "DETACH db") def test_simplified_pivot_unpivot(self): self.validate_identity("PIVOT Cities ON Year USING SUM(Population)") self.validate_identity("PIVOT Cities ON Year USING FIRST(Population)") self.validate_identity("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country") self.validate_identity("PIVOT Cities ON Country, Name USING SUM(Population)") self.validate_identity("PIVOT Cities ON Country || '_' || Name USING SUM(Population)") self.validate_identity("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country, Name") self.validate_identity("UNPIVOT (SELECT 1 AS col1, 2 AS col2) ON foo, bar") self.validate_identity( "UNPIVOT monthly_sales ON jan, feb, mar, apr, may, jun INTO NAME month VALUE sales" ) self.validate_identity( "UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales" ) self.validate_identity( "UNPIVOT monthly_sales ON (jan, feb, mar) AS q1, (apr, may, jun) AS q2 INTO NAME quarter VALUE month_1_sales, month_2_sales, month_3_sales" ) self.validate_identity( "WITH unpivot_alias AS (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) SELECT * FROM unpivot_alias" ) self.validate_identity( "SELECT * FROM (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) AS unpivot_alias" ) self.validate_identity( "WITH cities(country, name, year, population) AS (SELECT 'NL', 'Amsterdam', 2000, 1005 UNION ALL SELECT 'US', 'Seattle', 2020, 738) PIVOT cities ON year USING SUM(population)" ) def test_from_first_with_parentheses(self): self.validate_identity( "CREATE TABLE t1 AS (FROM t2 SELECT foo1, foo2)", "CREATE TABLE t1 AS (SELECT foo1, foo2 FROM t2)", ) self.validate_identity( "FROM (FROM t1 SELECT foo1, foo2)", "SELECT * FROM (SELECT foo1, foo2 FROM t1)", ) self.validate_identity( "WITH t1 AS (FROM (FROM t2 SELECT foo1, foo2)) FROM t1", "WITH t1 AS (SELECT * FROM (SELECT foo1, foo2 FROM t2)) SELECT * FROM t1", ) def test_analyze(self): self.validate_identity("ANALYZE") def test_prefix_aliases(self): # https://duckdb.org/2025/02/25/prefix-aliases-in-sql.html self.validate_identity( "SELECT foo: 1", "SELECT 1 AS foo", ) self.validate_identity( "SELECT foo: bar", "SELECT bar AS foo", ) self.validate_identity( "SELECT foo: t.col FROM t", "SELECT t.col AS foo FROM t", ) self.validate_identity( 'SELECT "foo" /* bla */: 1', 'SELECT 1 AS "foo" /* bla */', ) self.validate_identity( 'SELECT "foo": 1 /* bla */', 'SELECT 1 AS "foo" /* bla */', ) self.validate_identity( 'SELECT "foo": /* bla */ 1', 'SELECT 1 AS "foo" /* bla */', ) self.validate_identity( 'SELECT "foo": /* bla */ 1 /* foo */', 'SELECT 1 AS "foo" /* bla */ /* foo */', ) self.validate_identity( 'SELECT "foo": 1', 'SELECT 1 AS "foo"', ) self.validate_identity( "SELECT foo: 1, bar: 2, baz: 3", "SELECT 1 AS foo, 2 AS bar, 3 AS baz", ) self.validate_identity( "SELECT e: 1 + 2, f: len('asdf'), s: (SELECT 42)", "SELECT 1 + 2 AS e, LENGTH('asdf') AS f, (SELECT 42) AS s", ) self.validate_identity( "SELECT * FROM foo: bar", "SELECT * FROM bar AS foo", ) self.validate_identity( "SELECT * FROM foo: c.db.tbl", "SELECT * FROM c.db.tbl AS foo", ) self.validate_identity( "SELECT * FROM foo /* bla */: bar", "SELECT * FROM bar AS foo /* bla */", ) self.validate_identity( "SELECT * FROM foo /* bla */: bar /* baz */", "SELECT * FROM bar AS foo /* bla */ /* baz */", ) self.validate_identity( "SELECT * FROM foo /* bla */: /* baz */ bar /* boo */", "SELECT * FROM bar AS foo /* bla */ /* baz */ /* boo */", ) self.validate_identity( "SELECT * FROM r: range(10), v: (VALUES (42)), s: (FROM range(10))", "SELECT * FROM RANGE(0, 10) AS r, (VALUES (42)) AS v, (SELECT * FROM RANGE(0, 10)) AS s", ) self.validate_identity( """ SELECT l_returnflag, l_linestatus, sum_qty: sum(l_quantity), sum_base_price: sum(l_extendedprice), sum_disc_price: sum(l_extendedprice * (1-l_discount)), sum_charge: sum(l_extendedprice * (1-l_discount) * (1+l_tax)), avg_qty: avg(l_quantity), avg_price: avg(l_extendedprice), avg_disc: avg(l_discount), count_order: count(*) """, "SELECT l_returnflag, l_linestatus, SUM(l_quantity) AS sum_qty, SUM(l_extendedprice) AS sum_base_price, SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price, SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, AVG(l_quantity) AS avg_qty, AVG(l_extendedprice) AS avg_price, AVG(l_discount) AS avg_disc, COUNT(*) AS count_order", ) def test_at_sign_to_abs(self): self.validate_identity( "SELECT @col FROM t", "SELECT ABS(col) FROM t", ) self.validate_identity( "SELECT @col + 1 FROM t", "SELECT ABS(col + 1) FROM t", ) self.validate_identity( "SELECT (@col) + 1 FROM t", "SELECT (ABS(col)) + 1 FROM t", ) self.validate_identity( "SELECT @(-1)", "SELECT ABS((-1))", ) self.validate_identity( "SELECT @(-1) + 1", "SELECT ABS((-1) + 1)", ) self.validate_identity( "SELECT (@-1) + 1", "SELECT (ABS(-1)) + 1", ) def test_show_tables(self): self.validate_identity("SHOW TABLES").assert_is(exp.Show) self.validate_identity("SHOW ALL TABLES").assert_is(exp.Show) def test_extract_date_parts(self): for part in ("WEEK", "WEEKOFYEAR"): # Both are synonyms for ISO week self.validate_identity(f"EXTRACT({part} FROM foo)", "EXTRACT(WEEK FROM foo)") for part in ( "WEEKDAY", "ISOYEAR", "ISODOW", "YEARWEEK", "TIMEZONE_HOUR", "TIMEZONE_MINUTE", ): with self.subTest(f"Testing DuckDB EXTRACT({part} FROM foo)"): # All of these should remain as is, they don't have synonyms self.validate_identity(f"EXTRACT({part} FROM foo)") def test_set_item(self): self.validate_identity("SET memory_limit = '10GB'") self.validate_identity("SET SESSION default_collation = 'nocase'") self.validate_identity("SET GLOBAL sort_order = 'desc'") self.validate_identity("SET VARIABLE my_var = 30") self.validate_identity("SET VARIABLE location_map = (SELECT foo FROM bar)") self.validate_identity("SET VARIABLE my_var TO 30", "SET VARIABLE my_var = 30") self.validate_all( "SET VARIABLE a = 1", write={ "duckdb": "SET VARIABLE a = 1", "bigquery": "SET a = 1", "snowflake": "SET a = 1", }, ) def test_reset(self): self.validate_identity("RESET threads", check_command_warning=True) self.validate_identity("RESET memory_limit", check_command_warning=True) self.validate_identity("RESET default_collation", check_command_warning=True) # Test RESET with scope modifiers self.validate_identity("RESET SESSION threads", check_command_warning=True) self.validate_identity("RESET GLOBAL memory_limit", check_command_warning=True) self.validate_identity("RESET LOCAL threads", check_command_warning=True) self.validate_identity("RESET SESSION default_collation", check_command_warning=True) def test_map_struct(self): self.validate_identity("MAP {1: 'a', 2: 'b'}") self.validate_identity("MAP {'1': 'a', '2': 'b'}") self.validate_identity("MAP {[1, 2]: 'a', [3, 4]: 'b'}") def test_create_sequence(self): self.validate_identity( "CREATE SEQUENCE serial START 101", "CREATE SEQUENCE serial START WITH 101" ) self.validate_identity("CREATE SEQUENCE serial START WITH 1 INCREMENT BY 2") self.validate_identity("CREATE SEQUENCE serial START WITH 99 INCREMENT BY -1 MAXVALUE 99") self.validate_identity("CREATE SEQUENCE serial START WITH 1 MAXVALUE 10 NO CYCLE") self.validate_identity("CREATE SEQUENCE serial START WITH 1 MAXVALUE 10 CYCLE") def test_install(self): ast = self.validate_identity("INSTALL httpfs") ast.assert_is(exp.Install).name == "httpfs" assert isinstance(ast.this, exp.Identifier) self.validate_identity("INSTALL httpfs FROM community") self.validate_identity("INSTALL httpfs FROM 'https://extensions.duckdb.org'") self.validate_identity("FORCE INSTALL httpfs").assert_is(exp.Install).name == "httpfs" self.validate_identity("FORCE INSTALL httpfs FROM community") self.validate_identity("FORCE INSTALL httpfs FROM 'https://extensions.duckdb.org'") self.validate_identity("FORCE CHECKPOINT db", check_command_warning=True) def test_cte_using_key(self): self.validate_identity( "WITH RECURSIVE tbl(a, b) USING KEY (a) AS (SELECT a, b FROM (VALUES (1, 3), (2, 4)) AS t(a, b) UNION SELECT a + 1, b FROM tbl WHERE a < 3) SELECT * FROM tbl" ) self.validate_identity( "WITH RECURSIVE tbl(a, b) USING KEY (a, b) AS (SELECT a, b FROM (VALUES (1, 3), (2, 4)) AS t(a, b) UNION SELECT a + 1, b FROM tbl WHERE a < 3) SELECT * FROM tbl" ) def test_udf(self): for keyword in ("FUNCTION", "MACRO"): with self.subTest(f"Testing DuckDB's UDF for keyword: {keyword}"): self.validate_identity(f"SELECT {keyword}") self.validate_identity(f"CREATE {keyword} add(a, b) AS a + b") self.validate_identity( f"CREATE {keyword} ifelse(a, b, c) AS CASE WHEN a THEN b ELSE c END" ) def test_bitwise_agg(self): self.validate_all( "SELECT BIT_OR(int_value) FROM t", read={ "snowflake": "SELECT BITOR_AGG(int_value) FROM t", "duckdb": "SELECT BIT_OR(int_value) FROM t", }, ) self.validate_all( "SELECT BIT_AND(int_value) FROM t", read={ "snowflake": "SELECT BITAND_AGG(int_value) FROM t", "duckdb": "SELECT BIT_AND(int_value) FROM t", }, ) self.validate_all( "SELECT BIT_XOR(int_value) FROM t", read={ "snowflake": "SELECT BITXOR_AGG(int_value) FROM t", "duckdb": "SELECT BIT_XOR(int_value) FROM t", }, ) self.validate_all( "SELECT BIT_OR(CAST(val AS FLOAT)) FROM t", write={ "duckdb": "SELECT BIT_OR(CAST(ROUND(CAST(val AS REAL)) AS INT)) FROM t", "snowflake": "SELECT BITORAGG(CAST(val AS FLOAT)) FROM t", }, ) self.validate_all( "SELECT BIT_AND(CAST(val AS DOUBLE)) FROM t", write={ "duckdb": "SELECT BIT_AND(CAST(ROUND(CAST(val AS DOUBLE)) AS INT)) FROM t", "snowflake": "SELECT BITANDAGG(CAST(val AS DOUBLE)) FROM t", }, ) self.validate_all( "SELECT BIT_OR(CAST(val AS DECIMAL(10, 2))) FROM t", write={ "duckdb": "SELECT BIT_OR(CAST(CAST(val AS DECIMAL(10, 2)) AS INT)) FROM t", "snowflake": "SELECT BITORAGG(CAST(val AS DECIMAL(10, 2))) FROM t", }, ) self.validate_all( "SELECT BIT_XOR(CAST(val AS DECIMAL)) FROM t", write={ "duckdb": "SELECT BIT_XOR(CAST(CAST(val AS DECIMAL(18, 3)) AS INT)) FROM t", "snowflake": "SELECT BITXORAGG(CAST(val AS DECIMAL(18, 3))) FROM t", }, ) def test_approx_percentile(self): self.validate_all( "SELECT APPROX_QUANTILE(a, 0.5) FROM t", read={ "snowflake": "SELECT APPROX_PERCENTILE(a, 0.5) FROM t", }, write={ "duckdb": "SELECT APPROX_QUANTILE(a, 0.5) FROM t", "snowflake": "SELECT APPROX_PERCENTILE(a, 0.5) FROM t", }, ) expr = annotate_types( parse_one("SELECT APPROX_PERCENTILE(CAST(a AS DOUBLE), 0.5) FROM t", read="snowflake") ) self.assertEqual( expr.sql(dialect="duckdb"), "SELECT CAST(APPROX_QUANTILE(CAST(a AS DOUBLE), 0.5) AS DOUBLE) FROM t", ) def test_current_database(self): self.validate_all( "SELECT CURRENT_DATABASE()", read={ "snowflake": "SELECT CURRENT_DATABASE()", }, write={ "duckdb": "SELECT CURRENT_DATABASE()", "snowflake": "SELECT CURRENT_DATABASE()", }, ) def test_current_schema(self): self.validate_all( "SELECT CURRENT_SCHEMA()", read={ "snowflake": "SELECT CURRENT_SCHEMA()", }, write={ "duckdb": "SELECT CURRENT_SCHEMA()", "snowflake": "SELECT CURRENT_SCHEMA()", }, ) def test_current_schemas(self): self.validate_all( "SELECT CURRENT_SCHEMAS(TRUE)", read={ "snowflake": "SELECT CURRENT_SCHEMAS()", }, write={ "duckdb": "SELECT CURRENT_SCHEMAS(TRUE)", "snowflake": "SELECT CURRENT_SCHEMAS()", }, ) def test_map_delete(self): self.validate_all( "SELECT MAP_FROM_ENTRIES(LIST_FILTER(MAP_ENTRIES(CAST({'a': 1, 'b': 2, 'c': 3} AS MAP(TEXT, DECIMAL(38, 0)))), x -> NOT x.key IN ('a', 'b')))", read={ "snowflake": "SELECT MAP_DELETE({'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER),'a','b')", }, write={ "duckdb": "SELECT MAP_FROM_ENTRIES(LIST_FILTER(MAP_ENTRIES(CAST({'a': 1, 'b': 2, 'c': 3} AS MAP(TEXT, DECIMAL(38, 0)))), x -> NOT x.key IN ('a', 'b')))", }, ) self.validate_all( "SELECT id, MAP_FROM_ENTRIES(LIST_FILTER(MAP_ENTRIES(attrs), x -> NOT x.key IN (del_key1, del_key2))) AS attrs_after_delete FROM demo_maps", read={ "snowflake": "SELECT id, MAP_DELETE(attrs, del_key1, del_key2) AS attrs_after_delete FROM demo_maps", }, write={ "duckdb": "SELECT id, MAP_FROM_ENTRIES(LIST_FILTER(MAP_ENTRIES(attrs), x -> NOT x.key IN (del_key1, del_key2))) AS attrs_after_delete FROM demo_maps", }, ) def test_map_size(self): self.validate_all( "SELECT CARDINALITY(CAST({'a': 1, 'b': 2, 'c': 3} AS MAP(TEXT, DECIMAL(38, 0)))) AS map_size", read={ "snowflake": "SELECT MAP_SIZE({'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER)) AS map_size", }, write={ "duckdb": "SELECT CARDINALITY(CAST({'a': 1, 'b': 2, 'c': 3} AS MAP(TEXT, DECIMAL(38, 0)))) AS map_size", }, ) self.validate_all( "SELECT id, CARDINALITY(attrs) AS attr_count FROM demo_maps", read={ "snowflake": "SELECT id, MAP_SIZE(attrs) AS attr_count FROM demo_maps", }, write={ "duckdb": "SELECT id, CARDINALITY(attrs) AS attr_count FROM demo_maps", }, ) def test_map_pick(self): sql = "SELECT MAP_PICK(t.t_map, t.t_key) FROM t" annotated = annotate_types( parse_one(sql, dialect="snowflake"), schema={"t": {"t_map": "MAP(VARCHAR, INT)", "t_key": "VARCHAR"}}, dialect="snowflake", ) self.assertEqual( annotated.sql(dialect="duckdb"), "SELECT MAP_FROM_ENTRIES(LIST_FILTER(MAP_ENTRIES(t.t_map), x -> x.key IN (t.t_key))) FROM t", ) annotated = annotate_types( parse_one(sql, dialect="snowflake"), schema={"t": {"t_map": "MAP(VARCHAR, INT)", "t_key": "ARRAY(VARCHAR)"}}, dialect="snowflake", ) self.assertEqual( annotated.sql(dialect="duckdb"), "SELECT MAP_FROM_ENTRIES(LIST_FILTER(MAP_ENTRIES(t.t_map), x -> ARRAY_CONTAINS(t.t_key, x.key))) FROM t", ) sql = "SELECT MAP_PICK(t.t_map, t.t_key1, t.t_key2) FROM t" annotated = annotate_types( parse_one(sql, dialect="snowflake"), schema={"t": {"t_map": "MAP(VARCHAR, INT)", "t_key1": "VARCHAR", "t_key2": "VARCHAR"}}, dialect="snowflake", ) self.assertEqual( annotated.sql(dialect="duckdb"), "SELECT MAP_FROM_ENTRIES(LIST_FILTER(MAP_ENTRIES(t.t_map), x -> x.key IN (t.t_key1, t.t_key2))) FROM t", ) def test_to_array(self): self.validate_all( "SELECT CASE WHEN 'hello, snowman' IS NULL THEN NULL ELSE ['hello, snowman'] END AS result", read={ "snowflake": "SELECT TO_ARRAY('hello, snowman') AS result", }, write={ "duckdb": "SELECT CASE WHEN 'hello, snowman' IS NULL THEN NULL ELSE ['hello, snowman'] END AS result", }, ) self.validate_all( "SELECT CASE WHEN 4.2 IS NULL THEN NULL ELSE [4.2] END AS result", read={ "snowflake": "SELECT TO_ARRAY(4.2) AS result", }, write={ "duckdb": "SELECT CASE WHEN 4.2 IS NULL THEN NULL ELSE [4.2] END AS result", }, ) self.validate_all( "SELECT ['a', 'b'] AS result", read={ "snowflake": "SELECT TO_ARRAY(ARRAY_CONSTRUCT('a', 'b')) AS result", }, write={ "duckdb": "SELECT ['a', 'b'] AS result", }, ) def test_map_insert(self): self.validate_all( "SELECT MAP_CONCAT(CAST({'a': 1, 'b': 2} AS MAP(TEXT, DECIMAL(38, 0))), MAP {'c': CAST(3 AS DECIMAL(38, 0))})", read={ "snowflake": "SELECT MAP_INSERT({'a':1,'b':2}::MAP(VARCHAR,NUMBER),'c',3)", }, write={ "duckdb": "SELECT MAP_CONCAT(CAST({'a': 1, 'b': 2} AS MAP(TEXT, DECIMAL(38, 0))), MAP {'c': CAST(3 AS DECIMAL(38, 0))})", }, ) self.validate_all( "SELECT MAP_CONCAT(CAST({'a': 1} AS MAP(TEXT, DECIMAL(38, 0))), MAP {'a': CAST(99 AS DECIMAL(38, 0))})", read={ "snowflake": "SELECT MAP_INSERT({'a':1}::MAP(VARCHAR,NUMBER),'a',99,TRUE)", }, write={ "duckdb": "SELECT MAP_CONCAT(CAST({'a': 1} AS MAP(TEXT, DECIMAL(38, 0))), MAP {'a': CAST(99 AS DECIMAL(38, 0))})", }, ) self.validate_all( "SELECT id, MAP_CONCAT(attrs, MAP {'new_key': 'new_value'}) AS attrs_with_insert FROM demo_maps", read={ "snowflake": "SELECT id, MAP_INSERT(attrs, 'new_key', 'new_value') AS attrs_with_insert FROM demo_maps", }, write={ "duckdb": "SELECT id, MAP_CONCAT(attrs, MAP {'new_key': 'new_value'}) AS attrs_with_insert FROM demo_maps", }, ) self.assertEqual( annotate_types( parse_one("SELECT MAP_INSERT(my_map, 'key', 42) FROM my_table", read="snowflake"), dialect="snowflake", ).sql("duckdb"), "SELECT MAP_CONCAT(my_map, MAP {'key': 42}) FROM my_table", ) self.validate_all( "SELECT TO_VARIANT('1')", write={ "duckdb": "SELECT CAST('1' AS VARIANT)", "snowflake": "SELECT TO_VARIANT('1')", }, ) ================================================ FILE: tests/dialects/test_dune.py ================================================ from sqlglot import exp from tests.dialects.test_dialect import Validator class TestDune(Validator): dialect = "dune" def test_dune(self): self.validate_identity("CAST(x AS INT256)") self.validate_identity("CAST(x AS UINT256)") for hex_literal in ( "deadbeef", "deadbeefdead", "deadbeefdeadbeef", "deadbeefdeadbeefde", "deadbeefdeadbeefdead", "deadbeefdeadbeefdeadbeef", "deadbeefdeadbeefdeadbeefdeadbeef", ): with self.subTest(f"Transpiling hex literal {hex_literal}"): self.parse_one(f"0x{hex_literal}").assert_is(exp.HexString) self.validate_all( f"SELECT 0x{hex_literal}", read={ "dune": f"SELECT X'{hex_literal}'", "postgres": f"SELECT x'{hex_literal}'", "trino": f"SELECT X'{hex_literal}'", }, write={ "dune": f"SELECT 0x{hex_literal}", "postgres": f"SELECT x'{hex_literal}'", "trino": f"SELECT x'{hex_literal}'", }, ) ================================================ FILE: tests/dialects/test_exasol.py ================================================ from sqlglot import exp, transpile, UnsupportedError, ErrorLevel from tests.dialects.test_dialect import Validator class TestExasol(Validator): dialect = "exasol" maxDiff = None def test_exasol(self): self.validate_identity( "SELECT 1 AS [x]", 'SELECT 1 AS "x"', ) self.validate_identity("SYSTIMESTAMP", "SYSTIMESTAMP()") self.validate_identity("SELECT SYSTIMESTAMP()") self.validate_identity("SELECT SYSTIMESTAMP(6)") self.validate_identity("SELECT CURDATE()", "SELECT CURRENT_DATE") self.validate_identity("SELECT USER", "SELECT CURRENT_USER") self.validate_identity("SELECT USER()", "SELECT CURRENT_USER") self.validate_identity("SELECT CURRENT_USER", "SELECT CURRENT_USER") self.validate_identity("CURRENT_SCHEMA").assert_is(exp.CurrentSchema) self.validate_identity("SELECT NOW()", "SELECT CURRENT_TIMESTAMP()") self.validate_identity("SELECT FROM_POSIX_TIME(1234567890)") self.validate_all( "SELECT FROM_POSIX_TIME(col)", read={ "mysql": "SELECT FROM_UNIXTIME(col)", }, write={ "exasol": "SELECT FROM_POSIX_TIME(col)", "mysql": "SELECT FROM_UNIXTIME(col)", }, ) def test_exasol_keywords(self): keywords = ["CS", "ADD", "BOOLEAN", "CALL", "CONTROL"] for keyword in keywords: with self.subTest(keyword=keyword): self.validate_identity(f"SELECT 1 AS {keyword}", f'SELECT 1 AS "{keyword}"') def test_qualify_unscoped_star(self): self.validate_all( "SELECT TEST.*, 1 FROM TEST", read={ "": "SELECT *, 1 FROM TEST", }, ) self.validate_identity( "SELECT t.*, 1 FROM t", ) self.validate_identity( "SELECT t.* FROM t", ) self.validate_identity( "SELECT * FROM t", ) self.validate_identity( "WITH t AS (SELECT 1 AS x) SELECT t.*, 3 FROM t", ) self.validate_all( "WITH t1 AS (SELECT 1 AS c1), t2 AS (SELECT 2 AS c2) SELECT t1.*, t2.*, 3 FROM t1, t2", read={ "": "WITH t1 AS (SELECT 1 AS c1), t2 AS (SELECT 2 AS c2) SELECT *, 3 FROM t1, t2", }, ) self.validate_all( 'SELECT "A".*, "B".*, 3 FROM "A" JOIN "B" ON 1 = 1', read={ "": 'SELECT *, 3 FROM "A" JOIN "B" ON 1=1', }, ) self.validate_all( "SELECT s.*, q.*, 7 FROM (SELECT 1 AS x) AS s CROSS JOIN (SELECT 2 AS y) AS q", read={ "": "SELECT *, 7 FROM (SELECT 1 AS x) s CROSS JOIN (SELECT 2 AS y) q", }, ) def test_type_mappings(self): self.validate_identity("CAST(x AS BLOB)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS LONGBLOB)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS LONGTEXT)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS MEDIUMBLOB)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS MEDIUMTEXT)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS TINYBLOB)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS TINYTEXT)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS TEXT)", "CAST(x AS LONG VARCHAR)") self.validate_identity( "SELECT CAST((CAST(202305 AS INT) - 100) AS LONG VARCHAR) AS CAL_YEAR_WEEK_ADJUSTED" ) self.validate_identity("CAST(x AS VARBINARY)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS VARCHAR)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS CHAR)", "CAST(x AS CHAR)") self.validate_identity("CAST(x AS TINYINT)", "CAST(x AS SMALLINT)") self.validate_identity("CAST(x AS SMALLINT)") self.validate_identity("CAST(x AS INT)") self.validate_identity("CAST(x AS MEDIUMINT)", "CAST(x AS INT)") self.validate_identity("CAST(x AS BIGINT)") self.validate_identity("CAST(x AS FLOAT)") self.validate_identity("CAST(x AS DOUBLE)") self.validate_identity("CAST(x AS DECIMAL32)", "CAST(x AS DECIMAL)") self.validate_identity("CAST(x AS DECIMAL64)", "CAST(x AS DECIMAL)") self.validate_identity("CAST(x AS DECIMAL128)", "CAST(x AS DECIMAL)") self.validate_identity("CAST(x AS DECIMAL256)", "CAST(x AS DECIMAL)") self.validate_identity("CAST(x AS DATE)") self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS TIMESTAMP)") self.validate_identity("CAST(x AS TIMESTAMP)") self.validate_all( "CAST(x AS TIMESTAMP)", read={ "tsql": "CAST(x AS DATETIME2)", }, write={ "exasol": "CAST(x AS TIMESTAMP)", }, ) self.validate_all( "CAST(x AS TIMESTAMP)", read={ "tsql": "CAST(x AS SMALLDATETIME)", }, write={ "exasol": "CAST(x AS TIMESTAMP)", }, ) self.validate_identity("CAST(x AS BOOLEAN)") self.validate_identity( "CAST(x AS TIMESTAMPLTZ)", "CAST(x AS TIMESTAMP WITH LOCAL TIME ZONE)" ) self.validate_identity( "CAST(x AS TIMESTAMP(3) WITH LOCAL TIME ZONE)", "CAST(x AS TIMESTAMP WITH LOCAL TIME ZONE)", ) def test_mod(self): self.validate_all( "SELECT MOD(x, 10)", read={"exasol": "SELECT MOD(x, 10)"}, write={ "teradata": "SELECT x MOD 10", "mysql": "SELECT x % 10", "exasol": "SELECT MOD(x, 10)", }, ) def test_bits(self): self.validate_all( "SELECT BIT_AND(x, 1)", read={ "exasol": "SELECT BIT_AND(x, 1)", "duckdb": "SELECT x & 1", "presto": "SELECT BITWISE_AND(x, 1)", "spark": "SELECT x & 1", }, write={ "exasol": "SELECT BIT_AND(x, 1)", "duckdb": "SELECT x & 1", "hive": "SELECT x & 1", "presto": "SELECT BITWISE_AND(x, 1)", "spark": "SELECT x & 1", }, ) self.validate_all( "SELECT BIT_OR(x, 1)", read={ "exasol": "SELECT BIT_OR(x, 1)", "duckdb": "SELECT x | 1", "presto": "SELECT BITWISE_OR(x, 1)", "spark": "SELECT x | 1", }, write={ "exasol": "SELECT BIT_OR(x, 1)", "duckdb": "SELECT x | 1", "hive": "SELECT x | 1", "presto": "SELECT BITWISE_OR(x, 1)", "spark": "SELECT x | 1", }, ) self.validate_all( "SELECT BIT_XOR(x, 1)", read={ "": "SELECT x ^ 1", "exasol": "SELECT BIT_XOR(x, 1)", "bigquery": "SELECT x ^ 1", "presto": "SELECT BITWISE_XOR(x, 1)", "postgres": "SELECT x # 1", }, write={ "": "SELECT x ^ 1", "exasol": "SELECT BIT_XOR(x, 1)", "bigquery": "SELECT x ^ 1", "duckdb": "SELECT XOR(x, 1)", "presto": "SELECT BITWISE_XOR(x, 1)", "postgres": "SELECT x # 1", }, ) self.validate_all( "SELECT BIT_NOT(x)", read={ "exasol": "SELECT BIT_NOT(x)", "duckdb": "SELECT ~x", "presto": "SELECT BITWISE_NOT(x)", "spark": "SELECT ~x", }, write={ "exasol": "SELECT BIT_NOT(x)", "duckdb": "SELECT ~x", "hive": "SELECT ~x", "presto": "SELECT BITWISE_NOT(x)", "spark": "SELECT ~x", }, ) self.validate_all( "SELECT BIT_LSHIFT(x, 1)", read={ "exasol": "SELECT BIT_LSHIFT(x, 1)", "spark": "SELECT SHIFTLEFT(x, 1)", "duckdb": "SELECT x << 1", "hive": "SELECT x << 1", }, write={ "exasol": "SELECT BIT_LSHIFT(x, 1)", "duckdb": "SELECT x << 1", "presto": "SELECT BITWISE_ARITHMETIC_SHIFT_LEFT(x, 1)", "hive": "SELECT x << 1", "spark": "SELECT SHIFTLEFT(x, 1)", }, ) self.validate_all( "SELECT BIT_RSHIFT(x, 1)", read={ "exasol": "SELECT BIT_RSHIFT(x, 1)", "spark": "SELECT SHIFTRIGHT(x, 1)", "duckdb": "SELECT x >> 1", "hive": "SELECT x >> 1", }, write={ "exasol": "SELECT BIT_RSHIFT(x, 1)", "duckdb": "SELECT x >> 1", "presto": "SELECT BITWISE_ARITHMETIC_SHIFT_RIGHT(x, 1)", "hive": "SELECT x >> 1", "spark": "SELECT SHIFTRIGHT(x, 1)", }, ) def test_aggregateFunctions(self): self.validate_all( "SELECT department, EVERY(age >= 30) AS EVERY FROM employee_table GROUP BY department", read={ "exasol": "SELECT department, EVERY(age >= 30) AS EVERY FROM employee_table GROUP BY department", }, write={ "exasol": "SELECT department, EVERY(age >= 30) AS EVERY FROM employee_table GROUP BY department", "duckdb": "SELECT department, ALL (age >= 30) AS EVERY FROM employee_table GROUP BY department", }, ) ( self.validate_all( "SELECT VAR_POP(current_salary)", write={ "exasol": "SELECT VAR_POP(current_salary)", "duckdb": "SELECT VAR_POP(current_salary)", "presto": "SELECT VAR_POP(current_salary)", }, read={ "exasol": "SELECT VAR_POP(current_salary)", "duckdb": "SELECT VAR_POP(current_salary)", "presto": "SELECT VAR_POP(current_salary)", }, ), ) self.validate_all( "SELECT APPROXIMATE_COUNT_DISTINCT(y)", read={ "spark": "SELECT APPROX_COUNT_DISTINCT(y)", "exasol": "SELECT APPROXIMATE_COUNT_DISTINCT(y)", }, write={ "redshift": "SELECT APPROXIMATE COUNT(DISTINCT y)", "spark": "SELECT APPROX_COUNT_DISTINCT(y)", "exasol": "SELECT APPROXIMATE_COUNT_DISTINCT(y)", }, ) for func in ("RANK", "DENSE_RANK"): with self.subTest(func=func): self.validate_all( f"SELECT a, b, {func}(b) OVER (ORDER BY b) FROM (VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1)) AS tab(a, b)", write={ "exasol": f"SELECT a, b, {func}() OVER (ORDER BY b) FROM (VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1)) AS tab(a, b)", "databricks": f"SELECT a, b, {func}(b) OVER (ORDER BY b NULLS LAST) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) AS tab(a, b)", "spark": f"SELECT a, b, {func}(b) OVER (ORDER BY b NULLS LAST) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) AS tab(a, b)", }, ) def test_stringFunctions(self): self.validate_identity( "TO_CHAR(CAST(TO_DATE(date, 'YYYYMMDD') AS TIMESTAMP), 'DY') AS day_of_week" ) self.validate_identity("SELECT TO_CHAR(12345.67890, '9999999.999999999') AS TO_CHAR") self.validate_identity( "SELECT TO_CHAR(DATE '1999-12-31') AS TO_CHAR", "SELECT TO_CHAR(CAST('1999-12-31' AS DATE)) AS TO_CHAR", ) self.validate_identity( "SELECT TO_CHAR(TIMESTAMP '1999-12-31 23:59:00', 'HH24:MI:SS DD-MM-YYYY') AS TO_CHAR", "SELECT TO_CHAR(CAST('1999-12-31 23:59:00' AS TIMESTAMP), 'HH24:MI:SS DD-MM-YYYY') AS TO_CHAR", ) self.validate_identity("SELECT TO_CHAR(12345.6789) AS TO_CHAR") self.validate_identity("SELECT TO_CHAR(-12345.67890, '000G000G000D000000MI') AS TO_CHAR") self.validate_all( "SELECT TO_CHAR(CAST('2009-10-04 22:23:00' AS TIMESTAMP), 'DAY MONTH YYYY')", read={ "mysql": "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')", }, ) self.validate_identity( "SELECT id, department, hire_date, GROUP_CONCAT(id ORDER BY hire_date SEPARATOR ',') OVER (PARTITION BY department rows between 1 preceding and 1 following) GROUP_CONCAT_RESULT from employee_table ORDER BY department, hire_date", "SELECT id, department, hire_date, LISTAGG(id, ',') WITHIN GROUP (ORDER BY hire_date) OVER (PARTITION BY department rows BETWEEN 1 preceding AND 1 following) AS GROUP_CONCAT_RESULT FROM employee_table ORDER BY department, hire_date", ) self.validate_all( "GROUP_CONCAT(DISTINCT x ORDER BY y DESC)", write={ "exasol": "LISTAGG(DISTINCT x, ',') WITHIN GROUP (ORDER BY y DESC)", "mysql": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC SEPARATOR ',')", "tsql": "STRING_AGG(x, ',') WITHIN GROUP (ORDER BY y DESC)", "databricks": "LISTAGG(DISTINCT x, ',') WITHIN GROUP (ORDER BY y DESC)", }, ) self.validate_all( "EDIT_DISTANCE(col1, col2)", read={ "exasol": "EDIT_DISTANCE(col1, col2)", "bigquery": "EDIT_DISTANCE(col1, col2)", "clickhouse": "editDistance(col1, col2)", "drill": "LEVENSHTEIN_DISTANCE(col1, col2)", "duckdb": "LEVENSHTEIN(col1, col2)", "hive": "LEVENSHTEIN(col1, col2)", }, write={ "exasol": "EDIT_DISTANCE(col1, col2)", "bigquery": "EDIT_DISTANCE(col1, col2)", "clickhouse": "editDistance(col1, col2)", "drill": "LEVENSHTEIN_DISTANCE(col1, col2)", "duckdb": "LEVENSHTEIN(col1, col2)", "hive": "LEVENSHTEIN(col1, col2)", }, ) ( self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence)", write={ "bigquery": "REGEXP_REPLACE(subject, pattern, replacement)", "exasol": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence)", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement)", "hive": "REGEXP_REPLACE(subject, pattern, replacement)", "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence)", "spark": "REGEXP_REPLACE(subject, pattern, replacement, position)", }, read={ "exasol": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence)", "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence)", "spark": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence)", }, ), ) ( self.validate_all( "SELECT TO_CHAR(CAST('1999-12-31' AS DATE)) AS TO_CHAR", write={ "exasol": "SELECT TO_CHAR(CAST('1999-12-31' AS DATE)) AS TO_CHAR", "presto": "SELECT DATE_FORMAT(CAST('1999-12-31' AS DATE)) AS TO_CHAR", "oracle": "SELECT TO_CHAR(CAST('1999-12-31' AS DATE)) AS TO_CHAR", "redshift": "SELECT CAST(CAST('1999-12-31' AS DATE) AS VARCHAR(MAX)) AS TO_CHAR", "postgres": "SELECT CAST(CAST('1999-12-31' AS DATE) AS TEXT) AS TO_CHAR", }, read={ "exasol": "SELECT TO_CHAR(DATE '1999-12-31') AS TO_CHAR", }, ), ) self.validate_all( "STRPOS(haystack, needle)", write={ "exasol": "INSTR(haystack, needle)", "bigquery": "INSTR(haystack, needle)", "databricks": "LOCATE(needle, haystack)", "oracle": "INSTR(haystack, needle)", "presto": "STRPOS(haystack, needle)", }, ) self.validate_all( r"SELECT REGEXP_SUBSTR('My mail address is my_mail@yahoo.com', '(?i)[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,4}') AS EMAIL", write={ "exasol": r"SELECT REGEXP_SUBSTR('My mail address is my_mail@yahoo.com', '(?i)[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,4}') AS EMAIL", "bigquery": r"SELECT REGEXP_EXTRACT('My mail address is my_mail@yahoo.com', '(?i)[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,4}') AS EMAIL", "snowflake": r"SELECT REGEXP_SUBSTR('My mail address is my_mail@yahoo.com', '(?i)[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,4}') AS EMAIL", "presto": r"SELECT REGEXP_EXTRACT('My mail address is my_mail@yahoo.com', '(?i)[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,4}') AS EMAIL", }, ) self.validate_all( "SELECT SUBSTR('www.apache.org', 1, NVL(NULLIF(INSTR('www.apache.org', '.', 1, 2), 0) - 1, LENGTH('www.apache.org')))", read={ "databricks": "SELECT substring_index('www.apache.org', '.', 2)", }, ) self.validate_all( "SELECT SUBSTR('555A66A777', 1, NVL(NULLIF(INSTR('555A66A777', 'a', 1, 2), 0) - 1, LENGTH('555A66A777')))", read={ "databricks": "SELECT substring_index('555A66A777' COLLATE UTF8_BINARY, 'a', 2)", }, ) self.validate_all( "SELECT SUBSTR('555A66A777', 1, NVL(NULLIF(INSTR(LOWER('555A66A777'), 'a', 1, 2), 0) - 1, LENGTH('555A66A777')))", read={ "databricks": "SELECT substring_index('555A66A777' COLLATE UTF8_LCASE, 'a', 2)", }, ) self.validate_all( "SELECT SUBSTR('A|a|A', 1, NVL(NULLIF(INSTR(LOWER('A|a|A'), LOWER('A'), 1, 2), 0) - 1, LENGTH('A|a|A')))", read={ "databricks": "SELECT substring_index('A|a|A' COLLATE UTF8_LCASE, 'A' COLLATE UTF8_LCASE, 2)", }, ) def test_datetime_functions(self): formats = { "HH12": "hour_12", "HH24": "hour_24", "ID": "iso_weekday", "IW": "iso_week_number", "uW": "week_number_uW", "VW": "week_number_VW", "IYYY": "iso_year", "MI": "minutes", "SS": "seconds", "DAY": "day_full", "DY": "day_abbr", } self.validate_identity( "SELECT TO_DATE('31-12-1999', 'dd-mm-yyyy') AS TO_DATE", "SELECT TO_DATE('31-12-1999', 'DD-MM-YYYY') AS TO_DATE", ) self.validate_identity( "SELECT TO_DATE('31-12-1999', 'dd-mm-YY') AS TO_DATE", "SELECT TO_DATE('31-12-1999', 'DD-MM-YY') AS TO_DATE", ) self.validate_identity("SELECT TO_DATE('31-DECEMBER-1999', 'DD-MONTH-YYYY') AS TO_DATE") self.validate_identity("SELECT TO_DATE('31-DEC-1999', 'DD-MON-YYYY') AS TO_DATE") self.validate_identity("SELECT WEEKOFYEAR('2024-05-22')", "SELECT WEEK('2024-05-22')") for fmt, alias in formats.items(): with self.subTest(f"Testing TO_CHAR with format '{fmt}'"): self.validate_identity( f"SELECT TO_CHAR(CAST('2024-07-08 13:45:00' AS TIMESTAMP), '{fmt}') AS {alias}" ) self.validate_all( "SELECT TO_CHAR(CAST('2024-07-08 13:45:00' AS TIMESTAMP), 'DY')", write={ "exasol": "SELECT TO_CHAR(CAST('2024-07-08 13:45:00' AS TIMESTAMP), 'DY')", "oracle": "SELECT TO_CHAR(CAST('2024-07-08 13:45:00' AS TIMESTAMP), 'DY')", "postgres": "SELECT TO_CHAR(CAST('2024-07-08 13:45:00' AS TIMESTAMP), 'TMDy')", "databricks": "SELECT DATE_FORMAT(CAST('2024-07-08 13:45:00' AS TIMESTAMP), 'EEE')", }, ) self.validate_all( "TO_DATE(x, 'YYYY-MM-DD')", write={ "exasol": "TO_DATE(x, 'YYYY-MM-DD')", "duckdb": "CAST(x AS DATE)", "hive": "TO_DATE(x)", "presto": "CAST(CAST(x AS TIMESTAMP) AS DATE)", "spark": "TO_DATE(x)", "snowflake": "TO_DATE(x, 'yyyy-mm-DD')", "databricks": "TO_DATE(x)", }, ) self.validate_all( "TO_DATE(x, 'YYYY')", write={ "exasol": "TO_DATE(x, 'YYYY')", "duckdb": "CAST(STRPTIME(x, '%Y') AS DATE)", "hive": "TO_DATE(x, 'yyyy')", "presto": "CAST(DATE_PARSE(x, '%Y') AS DATE)", "spark": "TO_DATE(x, 'yyyy')", "snowflake": "TO_DATE(x, 'yyyy')", "databricks": "TO_DATE(x, 'yyyy')", }, ) self.validate_identity( "SELECT CONVERT_TZ(CAST('2012-03-25 02:30:00' AS TIMESTAMP), 'Europe/Berlin', 'UTC', 'INVALID REJECT AMBIGUOUS REJECT') AS CONVERT_TZ" ) self.validate_all( "SELECT CONVERT_TZ('2012-05-10 12:00:00', 'Europe/Berlin', 'America/New_York')", read={ "exasol": "SELECT CONVERT_TZ('2012-05-10 12:00:00', 'Europe/Berlin', 'America/New_York')", "mysql": "SELECT CONVERT_TZ('2012-05-10 12:00:00', 'Europe/Berlin', 'America/New_York')", "databricks": "SELECT CONVERT_TIMEZONE('Europe/Berlin', 'America/New_York', '2012-05-10 12:00:00')", }, write={ "exasol": "SELECT CONVERT_TZ('2012-05-10 12:00:00', 'Europe/Berlin', 'America/New_York')", "mysql": "SELECT CONVERT_TZ('2012-05-10 12:00:00', 'Europe/Berlin', 'America/New_York')", "databricks": "SELECT CONVERT_TIMEZONE('Europe/Berlin', 'America/New_York', '2012-05-10 12:00:00')", "snowflake": "SELECT CONVERT_TIMEZONE('Europe/Berlin', 'America/New_York', '2012-05-10 12:00:00')", "spark": "SELECT CONVERT_TIMEZONE('Europe/Berlin', 'America/New_York', '2012-05-10 12:00:00')", "redshift": "SELECT CONVERT_TIMEZONE('Europe/Berlin', 'America/New_York', '2012-05-10 12:00:00')", "duckdb": "SELECT CAST('2012-05-10 12:00:00' AS TIMESTAMP) AT TIME ZONE 'Europe/Berlin' AT TIME ZONE 'America/New_York'", }, ) self.validate_identity( "TIME_TO_STR(b, '%Y-%m-%d %H:%M:%S')", "TO_CHAR(b, 'YYYY-MM-DD HH:MI:SS')", ) self.validate_identity( "SELECT TIME_TO_STR(CAST(STR_TO_TIME(date, '%Y%m%d') AS DATE), '%a') AS day_of_week", "SELECT TO_CHAR(CAST(TO_DATE(date, 'YYYYMMDD') AS DATE), 'DY') AS day_of_week", ) self.validate_identity( "SELECT CAST(CAST(CURRENT_TIMESTAMP() AS TIMESTAMP) AT TIME ZONE 'CET' AS DATE) - 1", "SELECT CAST(CONVERT_TZ(CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), 'UTC', 'CET') AS DATE) - 1", ) units = ["MM", "QUARTER", "WEEK", "MINUTE", "YEAR"] for unit in units: with self.subTest(f"Testing DATE_TRUNC with format '{unit}'"): self.validate_all( f"SELECT TRUNC(CAST('2006-12-31' AS DATE), '{unit}') AS TRUNC", write={ "exasol": f"SELECT DATE_TRUNC('{unit}', DATE '2006-12-31') AS TRUNC", "presto": f"SELECT DATE_TRUNC('{unit}', CAST('2006-12-31' AS DATE)) AS TRUNC", "databricks": f"SELECT TRUNC(CAST('2006-12-31' AS DATE), '{unit}') AS TRUNC", }, ) self.validate_all( f"SELECT DATE_TRUNC('{unit}', TIMESTAMP '2006-12-31T23:59:59') DATE_TRUNC", write={ "exasol": f"SELECT DATE_TRUNC('{unit}', TIMESTAMP '2006-12-31 23:59:59') AS DATE_TRUNC", "presto": f"SELECT DATE_TRUNC('{unit}', CAST('2006-12-31T23:59:59' AS TIMESTAMP)) AS DATE_TRUNC", "databricks": f"SELECT DATE_TRUNC('{unit}', CAST('2006-12-31T23:59:59' AS TIMESTAMP)) AS DATE_TRUNC", }, ) self.validate_all( f"SELECT DATE_TRUNC('{unit}', CURRENT_TIMESTAMP) DATE_TRUNC", write={ "exasol": f"SELECT DATE_TRUNC('{unit}', CURRENT_TIMESTAMP()) AS DATE_TRUNC", "presto": f"SELECT DATE_TRUNC('{unit}', CURRENT_TIMESTAMP) AS DATE_TRUNC", "databricks": f"SELECT DATE_TRUNC('{unit}', CURRENT_TIMESTAMP()) AS DATE_TRUNC", }, ) from sqlglot.dialects.exasol import DATE_UNITS for unit in DATE_UNITS: with self.subTest(f"Testing ADD_{unit}S"): self.validate_all( f"SELECT ADD_{unit}S(DATE '2000-02-28', 1)", write={ "exasol": f"SELECT ADD_{unit}S(CAST('2000-02-28' AS DATE), 1)", "bigquery": f"SELECT DATE_ADD(CAST('2000-02-28' AS DATE), INTERVAL 1 {unit})", "duckdb": f"SELECT CAST('2000-02-28' AS DATE) + INTERVAL 1 {unit}", "presto": f"SELECT DATE_ADD('{unit}', 1, CAST('2000-02-28' AS DATE))", "redshift": f"SELECT DATEADD({unit}, 1, CAST('2000-02-28' AS DATE))", "snowflake": f"SELECT DATEADD({unit}, 1, CAST('2000-02-28' AS DATE))", "tsql": f"SELECT DATEADD({unit}, 1, CAST('2000-02-28' AS DATE))", }, ) self.validate_all( f"SELECT ADD_{unit}S('2000-02-28', -'1')", read={ "sqlite": f"SELECT DATE_SUB('2000-02-28', INTERVAL 1 {unit})", "bigquery": f"SELECT DATE_SUB('2000-02-28', INTERVAL 1 {unit})", "presto": f"SELECT DATE_SUB('2000-02-28', INTERVAL 1 {unit})", "redshift": f"SELECT DATE_SUB('2000-02-28', INTERVAL 1 {unit})", "snowflake": f"SELECT DATE_SUB('2000-02-28', INTERVAL 1 {unit})", "tsql": f"SELECT DATE_SUB('2000-02-28', INTERVAL 1 {unit})", }, ) self.validate_all( "SELECT CAST(ADD_DAYS(ADD_MONTHS(DATE_TRUNC('MONTH', DATE '2008-11-25'), 1), -1) AS DATE)", read={ "snowflake": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE), MONTH)", "databricks": "SELECT LAST_DAY('2008-11-25')", "spark": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "presto": "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))", }, ) with self.subTest(f"Testing {unit}S_BETWEEN"): self.validate_all( f"SELECT {unit}S_BETWEEN(TIMESTAMP '2000-02-28 00:00:00', CURRENT_TIMESTAMP)", write={ "exasol": f"SELECT {unit}S_BETWEEN(CAST('2000-02-28 00:00:00' AS TIMESTAMP), CURRENT_TIMESTAMP())", "bigquery": f"SELECT DATE_DIFF(CAST('2000-02-28 00:00:00' AS DATETIME), CURRENT_TIMESTAMP(), {unit})", "duckdb": f"SELECT DATE_DIFF('{unit}', CURRENT_TIMESTAMP, CAST('2000-02-28 00:00:00' AS TIMESTAMP))", "presto": f"SELECT DATE_DIFF('{unit}', CURRENT_TIMESTAMP, CAST('2000-02-28 00:00:00' AS TIMESTAMP))", "redshift": f"SELECT DATEDIFF({unit}, GETDATE(), CAST('2000-02-28 00:00:00' AS TIMESTAMP))", "snowflake": f"SELECT DATEDIFF({unit}, CURRENT_TIMESTAMP(), CAST('2000-02-28 00:00:00' AS TIMESTAMP))", "tsql": f"SELECT DATEDIFF({unit}, GETDATE(), CAST('2000-02-28 00:00:00' AS DATETIME2))", }, ) self.validate_all( "SELECT quarter('2016-08-31')", write={ "exasol": "SELECT CEIL(MONTH(TO_DATE('2016-08-31'))/3)", "databricks": "SELECT QUARTER('2016-08-31')", }, ) def test_number_functions(self): self.validate_identity("SELECT TRUNC(123.456, 2) AS TRUNC") self.validate_identity("SELECT DIV(1234, 2) AS DIV") # Numeric truncation identity self.validate_identity("TRUNC(123.456, 2)").assert_is(exp.Trunc) self.validate_identity("TRUNC(3.14159)").assert_is(exp.Trunc) # Date truncation with typed column and unit # (parse_one because DateTrunc generates as DATE_TRUNC, not TRUNC) self.parse_one("TRUNC(CAST(x AS DATE), 'MONTH')").assert_is(exp.DateTrunc) self.parse_one("TRUNC(CAST(x AS TIMESTAMP), 'MONTH')").assert_is(exp.DateTrunc) self.parse_one("TRUNC(CAST(x AS DATETIME), 'MONTH')").assert_is(exp.DateTrunc) # Fallback to Anonymous (Exasol requires unit for date truncation) self.validate_identity("TRUNC(CAST(x AS DATE))").assert_is(exp.Anonymous) # Cross-dialect numeric truncation transpilation self.validate_all( "TRUNC(price, 2)", write={ "exasol": "TRUNC(price, 2)", "oracle": "TRUNC(price, 2)", "postgres": "TRUNC(price, 2)", "mysql": "TRUNCATE(price, 2)", "tsql": "ROUND(price, 2, 1)", }, ) # Date truncation with various units (Exasol-specific unit names) for unit in ("YYYY", "MM", "DD", "HH", "MI", "SS", "WW"): with self.subTest(f"Date/time TRUNC with {unit}"): self.validate_all( f"TRUNC(CAST(x AS TIMESTAMP), '{unit}')", write={ "exasol": f"DATE_TRUNC('{unit}', x)", "oracle": f"TRUNC(CAST(x AS TIMESTAMP), '{unit}')", }, ) # Q gets normalized to QUARTER self.validate_all( "TRUNC(CAST(x AS TIMESTAMP), 'Q')", write={ "exasol": "DATE_TRUNC('QUARTER', x)", "oracle": "TRUNC(CAST(x AS TIMESTAMP), 'QUARTER')", }, ) def test_scalar(self): self.validate_all( "SELECT CURRENT_USER", read={ "exasol": "SELECT USER", "spark": "SELECT CURRENT_USER()", "trino": "SELECT CURRENT_USER", "snowflake": "SELECT CURRENT_USER()", }, write={ "exasol": "SELECT CURRENT_USER", "spark": "SELECT CURRENT_USER()", "trino": "SELECT CURRENT_USER", "snowflake": "SELECT CURRENT_USER()", }, ) self.validate_all( 'CREATE OR REPLACE VIEW "schema"."v" ("col" COMMENT IS \'desc\') AS SELECT "src_col" AS "col"', write={ "databricks": "CREATE OR REPLACE VIEW `schema`.`v` (`col` COMMENT 'desc') AS SELECT `src_col` AS `col`", "exasol": 'CREATE OR REPLACE VIEW "schema"."v" ("col" COMMENT IS \'desc\') AS SELECT "src_col" AS "col"', }, ) self.validate_all( "HASH_SHA(x)", read={ "clickhouse": "SHA1(x)", "exasol": "HASH_SHA1(x)", "presto": "SHA1(x)", "trino": "SHA1(x)", }, write={ "exasol": "HASH_SHA(x)", "clickhouse": "SHA1(x)", "bigquery": "SHA1(x)", "": "SHA(x)", "presto": "SHA1(x)", "trino": "SHA1(x)", }, ) self.validate_all( "HASH_MD5(x)", write={ "exasol": "HASH_MD5(x)", "": "MD5(x)", "bigquery": "TO_HEX(MD5(x))", "clickhouse": "LOWER(HEX(MD5(x)))", "hive": "MD5(x)", "presto": "LOWER(TO_HEX(MD5(x)))", "spark": "MD5(x)", "trino": "LOWER(TO_HEX(MD5(x)))", }, ) self.validate_all( "HASHTYPE_MD5(x)", write={ "exasol": "HASHTYPE_MD5(x)", "": "MD5_DIGEST(x)", "bigquery": "MD5(x)", "clickhouse": "MD5(x)", "hive": "UNHEX(MD5(x))", "presto": "MD5(x)", "spark": "UNHEX(MD5(x))", "trino": "MD5(x)", }, ) self.validate_all( "HASH_SHA256(x)", read={ "clickhouse": "SHA256(x)", "presto": "SHA256(x)", "trino": "SHA256(x)", "postgres": "SHA256(x)", "duckdb": "SHA256(x)", }, write={ "exasol": "HASH_SHA256(x)", "bigquery": "SHA256(x)", "spark2": "SHA2(x, 256)", "clickhouse": "SHA256(x)", "postgres": "SHA256(x)", "presto": "SHA256(x)", "redshift": "SHA2(x, 256)", "trino": "SHA256(x)", "duckdb": "SHA256(x)", "snowflake": "SHA2(x, 256)", }, ) self.validate_all( "HASH_SHA512(x)", read={ "clickhouse": "SHA512(x)", "presto": "SHA512(x)", "trino": "SHA512(x)", }, write={ "exasol": "HASH_SHA512(x)", "clickhouse": "SHA512(x)", "bigquery": "SHA512(x)", "spark2": "SHA2(x, 512)", "presto": "SHA512(x)", "trino": "SHA512(x)", }, ) self.validate_all( "SELECT NULLIFZERO(1) NIZ1", write={ "exasol": "SELECT IF 1 = 0 THEN NULL ELSE 1 ENDIF AS NIZ1", "snowflake": "SELECT IFF(1 = 0, NULL, 1) AS NIZ1", "sqlite": "SELECT IIF(1 = 0, NULL, 1) AS NIZ1", "presto": "SELECT IF(1 = 0, NULL, 1) AS NIZ1", "spark": "SELECT IF(1 = 0, NULL, 1) AS NIZ1", "hive": "SELECT IF(1 = 0, NULL, 1) AS NIZ1", "duckdb": "SELECT CASE WHEN 1 = 0 THEN NULL ELSE 1 END AS NIZ1", }, ) self.validate_all( "SELECT ZEROIFNULL(NULL) NIZ1", write={ "exasol": "SELECT IF NULL IS NULL THEN 0 ELSE NULL ENDIF AS NIZ1", "snowflake": "SELECT IFF(NULL IS NULL, 0, NULL) AS NIZ1", "sqlite": "SELECT IIF(NULL IS NULL, 0, NULL) AS NIZ1", "presto": "SELECT IF(NULL IS NULL, 0, NULL) AS NIZ1", "spark": "SELECT IF(NULL IS NULL, 0, NULL) AS NIZ1", "hive": "SELECT IF(NULL IS NULL, 0, NULL) AS NIZ1", "duckdb": "SELECT CASE WHEN NULL IS NULL THEN 0 ELSE NULL END AS NIZ1", }, ) self.validate_identity( "SELECT name, age, IF age < 18 THEN 'underaged' ELSE 'adult' ENDIF AS LEGALITY FROM persons" ) self.validate_identity("SELECT HASHTYPE_MD5(a, b, c, d)") def test_odbc_date_literals(self): self.validate_identity("SELECT {d'2024-01-01'}", "SELECT TO_DATE('2024-01-01')") self.validate_identity( "SELECT {ts'2024-01-01 12:00:00'}", "SELECT TO_TIMESTAMP('2024-01-01 12:00:00')", ) def test_local_prefix_for_alias(self): self.validate_identity( 'SELECT ID FROM local WHERE "LOCAL".ID IS NULL', 'SELECT ID FROM "LOCAL" WHERE "LOCAL".ID IS NULL', ) self.validate_identity( 'SELECT YEAR(a_date) AS "a_year" FROM MY_SUMMARY_TABLE GROUP BY LOCAL."a_year"', ) self.validate_identity( 'SELECT a_year AS a_year FROM "LOCAL" GROUP BY "LOCAL".a_year', ) test_cases = [ ( "GROUP BY alias", "SELECT YEAR(a_date) AS a_year FROM my_table GROUP BY LOCAL.a_year", "SELECT YEAR(a_date) AS a_year FROM my_table GROUP BY a_year", ), ( "HAVING alias", "SELECT SUM(amount) AS total FROM my_table HAVING LOCAL.total > 10000", "SELECT SUM(amount) AS total FROM my_table HAVING total > 10000", ), ( "WHERE alias", "SELECT YEAR(a_date) AS a_year FROM my_table WHERE LOCAL.a_year > 2020", "SELECT YEAR(a_date) AS a_year FROM my_table WHERE a_year > 2020", ), ( "Multiple aliases", "SELECT YEAR(a_date) AS a_year, MONTH(a_date) AS a_month FROM my_table WHERE LOCAL.a_year > 2020 AND LOCAL.a_month < 6", "SELECT YEAR(a_date) AS a_year, MONTH(a_date) AS a_month FROM my_table WHERE a_year > 2020 AND a_month < 6", ), ( "Select list aliases", "SELECT YR AS THE_YEAR, ID AS YR, LOCAL.THE_YEAR + 1 AS NEXT_YEAR FROM my_table", "SELECT YR AS THE_YEAR, ID AS YR, THE_YEAR + 1 AS NEXT_YEAR FROM my_table", ), ( "Select list aliases without Local keyword", "SELECT YEAR(CURRENT_DATE) AS current_year, LOCAL.current_year + 1 AS next_year", "SELECT YEAR(CURRENT_DATE) AS current_year, current_year + 1 AS next_year", ), ] for title, exasol_sql, dbx_sql in test_cases: with self.subTest(clause=title): self.validate_all( exasol_sql, write={"exasol": exasol_sql, "databricks": dbx_sql}, ) def test_regexp_like(self): # Exasol uses binary predicate syntax: col REGEXP_LIKE pattern self.validate_identity("SELECT x REGEXP_LIKE '.*pattern.*'") # Cross-dialect: partial match semantics from other dialects get .* wrapping self.validate_all( "SELECT a REGEXP_LIKE '.*x.*'", read={ "hive": "SELECT a RLIKE 'x'", "presto": "SELECT REGEXP_LIKE(a, 'x')", }, write={ "exasol": "SELECT a REGEXP_LIKE '.*x.*'", "hive": "SELECT a RLIKE '.*x.*'", "presto": "SELECT REGEXP_LIKE(a, '.*x.*')", }, ) def test_json(self): self.validate_identity("""SELECT JSON_VALUE('{"d":"a"}', '$.d' NULL ON ERROR) AS x""") self.validate_all( """SELECT JSON_VALUE('{"d":"a"}', '$.d' NULL ON ERROR) AS x""", write={ "exasol": """SELECT JSON_VALUE('{"d":"a"}', '$.d' NULL ON ERROR) AS x""", "trino": """SELECT JSON_VALUE('{"d":"a"}', '$.d' NULL ON ERROR) AS x""", }, ) self.validate_identity( """SELECT JSON_EXTRACT('{"firstname" : "Ann", "surname" : "Smith", "age" : 29}', '$.firstname', '$.surname', '$.age') EMITS (firstname VARCHAR(100), surname VARCHAR(100), age INT)""" ) def test_group_by_all(self): self.validate_all( "SELECT id, city, COUNT(*) FROM dealer GROUP BY ALL", write={ "exasol": "SELECT id, city, COUNT(*) FROM dealer GROUP BY 1, 2", "databricks": "SELECT id, city, COUNT(*) FROM dealer GROUP BY ALL", }, ) self.validate_all( "SELECT car_model, COUNT(DISTINCT city) FROM dealer GROUP BY ALL", write={ "exasol": "SELECT car_model, COUNT(DISTINCT city) FROM dealer GROUP BY 1", "databricks": "SELECT car_model, COUNT(DISTINCT city) FROM dealer GROUP BY ALL", }, ) self.validate_all( "SELECT car_model, city FROM dealer GROUP BY ALL", write={ "exasol": "SELECT car_model, city FROM dealer GROUP BY 1, 2", "databricks": "SELECT car_model, city FROM dealer GROUP BY ALL", }, ) self.validate_all( "SELECT COUNT(*) FROM dealer GROUP BY ALL", write={ "exasol": "SELECT COUNT(*) FROM dealer", "databricks": "SELECT COUNT(*) FROM dealer GROUP BY ALL", }, ) self.validate_all( "SELECT UPPER(city), COUNT(*) FROM dealer GROUP BY ALL", write={ "exasol": "SELECT UPPER(city), COUNT(*) FROM dealer GROUP BY 1", "databricks": "SELECT UPPER(city), COUNT(*) FROM dealer GROUP BY ALL", }, ) self.validate_all( "SELECT city AS c, COUNT(*) + 1 FROM dealer GROUP BY ALL", write={ "exasol": "SELECT city AS c, COUNT(*) + 1 FROM dealer GROUP BY 1", "databricks": "SELECT city AS c, COUNT(*) + 1 FROM dealer GROUP BY ALL", }, ) self.validate_all( "SELECT city, COUNT(*) OVER () FROM dealer GROUP BY ALL", write={ "exasol": "SELECT city, COUNT(*) OVER () FROM dealer GROUP BY 1", "databricks": "SELECT city, COUNT(*) OVER () FROM dealer GROUP BY ALL", }, ) self.validate_all( "SELECT * FROM t GROUP BY ALL", write={ "exasol": "SELECT DISTINCT * FROM t", "databricks": "SELECT * FROM t GROUP BY ALL", }, ) with self.assertRaises(UnsupportedError): transpile( "SELECT *, COUNT(*) FROM t GROUP BY ALL", write="exasol", unsupported_level=ErrorLevel.RAISE, ) ================================================ FILE: tests/dialects/test_fabric.py ================================================ from tests.dialects.test_dialect import Validator class TestFabric(Validator): dialect = "fabric" maxDiff = None def test_type_mappings(self): """Test that types are correctly mapped to their alternatives""" self.validate_identity("CAST(x AS BOOLEAN)", "CAST(x AS BIT)") self.validate_identity("CAST(x AS DATE)", "CAST(x AS DATE)") self.validate_identity("CAST(x AS DATETIME)", "CAST(x AS DATETIME2(6))") self.validate_identity("CAST(x AS DECIMAL)", "CAST(x AS DECIMAL)") self.validate_identity("CAST(x AS DOUBLE)", "CAST(x AS FLOAT)") self.validate_identity("CAST(x AS IMAGE)", "CAST(x AS VARBINARY)") self.validate_identity("CAST(x AS INT)", "CAST(x AS INT)") self.validate_identity("CAST(x AS JSON)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS MONEY)", "CAST(x AS DECIMAL)") self.validate_identity("CAST(x AS NCHAR)", "CAST(x AS CHAR)") self.validate_identity("CAST(x AS NVARCHAR)", "CAST(x AS VARCHAR)") self.validate_identity("CAST(x AS ROWVERSION)", "CAST(x AS ROWVERSION)") self.validate_identity("CAST(x AS SMALLDATETIME)", "CAST(x AS DATETIME2(6))") self.validate_identity("CAST(x AS SMALLMONEY)", "CAST(x AS DECIMAL)") self.validate_identity("CAST(x AS TEXT)", "CAST(x AS VARCHAR(MAX))") self.validate_identity("CAST(x AS TIMESTAMP)", "CAST(x AS DATETIME2(6))") self.validate_identity("CAST(x AS TIMESTAMPNTZ)", "CAST(x AS DATETIME2(6))") self.validate_identity("CAST(x AS TINYINT)", "CAST(x AS SMALLINT)") self.validate_identity("CAST(x AS UTINYINT)", "CAST(x AS SMALLINT)") self.validate_identity("CAST(x AS UUID)", "CAST(x AS UNIQUEIDENTIFIER)") self.validate_identity("CAST(x AS VARIANT)", "CAST(x AS SQL_VARIANT)") self.validate_identity("CAST(x AS XML)", "CAST(x AS VARCHAR)") def test_precision_capping(self): """Test that TIME, DATETIME2 & DATETIMEOFFSET precision is capped at 6 digits""" # Default precision should be 6 self.validate_identity("CAST(x AS TIME)", "CAST(x AS TIME(6))") self.validate_identity("CAST(x AS DATETIME2)", "CAST(x AS DATETIME2(6))") # Precision <= 6 should be preserved self.validate_identity("CAST(x AS TIME(3))", "CAST(x AS TIME(3))") self.validate_identity("CAST(x AS DATETIME2(3))", "CAST(x AS DATETIME2(3))") self.validate_identity("CAST(x AS TIME(6))", "CAST(x AS TIME(6))") self.validate_identity("CAST(x AS DATETIME2(6))", "CAST(x AS DATETIME2(6))") # Precision > 6 should be capped at 6 self.validate_identity("CAST(x AS TIME(7))", "CAST(x AS TIME(6))") self.validate_identity("CAST(x AS DATETIME2(7))", "CAST(x AS DATETIME2(6))") self.validate_identity("CAST(x AS TIME(9))", "CAST(x AS TIME(6))") self.validate_identity("CAST(x AS DATETIME2(9))", "CAST(x AS DATETIME2(6))") def test_timestamptz_without_at_time_zone(self): # TIMESTAMPTZ should be cast to TIMESTAMP when not in an AT TIME ZONE self.validate_identity( "CAST(x AS TIMESTAMPTZ)", "CAST(x AS DATETIME2(6))", ) self.validate_identity( "CAST(x AS TIMESTAMPTZ(3))", "CAST(x AS DATETIME2(3))", ) self.validate_identity( "CAST(x AS TIMESTAMPTZ(6))", "CAST(x AS DATETIME2(6))", ) self.validate_identity( "CAST(x AS TIMESTAMPTZ(9))", "CAST(x AS DATETIME2(6))", ) def test_timestamptz_with_at_time_zone(self): # TIMESTAMPTZ should be DATETIMEOFFSET when in an AT TIME ZONE expression and then cast to TIMESTAMP self.validate_identity( "CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'", "CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))", ) self.validate_identity( "CAST(x AS TIMESTAMPTZ(3)) AT TIME ZONE 'Pacific Standard Time'", "CAST(CAST(x AS DATETIMEOFFSET(3)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(3))", ) self.validate_identity( "CAST(x AS TIMESTAMPTZ(6)) AT TIME ZONE 'Pacific Standard Time'", "CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))", ) self.validate_identity( "CAST(x AS TIMESTAMPTZ(9)) AT TIME ZONE 'Pacific Standard Time'", "CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))", ) def test_unix_to_time(self): """Test UnixToTime transformation to DATEADD with microseconds""" self.validate_identity( "UNIX_TO_TIME(column)", "DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))", ) def test_varchar_precision_inference(self): # Test VARCHAR without precision conversion to VARCHAR(1) self.validate_identity( "CREATE TABLE t (col VARCHAR)", "CREATE TABLE t (col VARCHAR(1))", ) # Test VARCHAR with existing precision should remain unchanged self.validate_identity("CREATE TABLE t (col VARCHAR(50))") # Test CHAR without precision conversion to CHAR(1) self.validate_identity( "CREATE TABLE t (col CHAR)", "CREATE TABLE t (col CHAR(1))", ) # Test CHAR with existing precision should remain unchanged self.validate_identity("CREATE TABLE t (col CHAR(10))") # Test cross-dialect conversion: non-TSQL VARCHAR -> TSQL VARCHAR(MAX) self.validate_all( "CREATE TABLE t (col VARCHAR(MAX))", read={ "postgres": "CREATE TABLE t (col VARCHAR)", "tsql": "CREATE TABLE t (col VARCHAR(MAX))", }, ) # Test cross-dialect conversion: non-TSQL CHAR -> TSQL CHAR(MAX) self.validate_all( "CREATE TABLE t (col CHAR(MAX))", read={ "postgres": "CREATE TABLE t (col CHAR)", "tsql": "CREATE TABLE t (col CHAR(MAX))", }, ) ================================================ FILE: tests/dialects/test_hive.py ================================================ from tests.dialects.test_dialect import Validator from sqlglot import exp class TestHive(Validator): dialect = "hive" def test_bits(self): self.validate_all( "x & 1", read={ "duckdb": "x & 1", "presto": "BITWISE_AND(x, 1)", "spark": "x & 1", }, write={ "duckdb": "x & 1", "hive": "x & 1", "presto": "BITWISE_AND(x, 1)", "spark": "x & 1", }, ) self.validate_all( "x & 1 > 0", read={ "duckdb": "x & 1 > 0", "presto": "BITWISE_AND(x, 1) > 0", "spark": "x & 1 > 0", }, write={ "duckdb": "x & 1 > 0", "presto": "BITWISE_AND(x, 1) > 0", "hive": "x & 1 > 0", "spark": "x & 1 > 0", }, ) self.validate_all( "~x", read={ "duckdb": "~x", "presto": "BITWISE_NOT(x)", "spark": "~x", }, write={ "duckdb": "~x", "hive": "~x", "presto": "BITWISE_NOT(x)", "spark": "~x", }, ) self.validate_all( "x | 1", read={ "duckdb": "x | 1", "presto": "BITWISE_OR(x, 1)", "spark": "x | 1", }, write={ "duckdb": "x | 1", "hive": "x | 1", "presto": "BITWISE_OR(x, 1)", "spark": "x | 1", }, ) self.validate_all( "x << 1", read={ "spark": "SHIFTLEFT(x, 1)", }, write={ "duckdb": "x << 1", "presto": "BITWISE_ARITHMETIC_SHIFT_LEFT(x, 1)", "hive": "x << 1", "spark": "SHIFTLEFT(x, 1)", }, ) self.validate_all( "x >> 1", read={ "spark": "SHIFTRIGHT(x, 1)", }, write={ "duckdb": "x >> 1", "presto": "BITWISE_ARITHMETIC_SHIFT_RIGHT(x, 1)", "hive": "x >> 1", "spark": "SHIFTRIGHT(x, 1)", }, ) def test_cast(self): self.validate_all( "1s", write={ "duckdb": "TRY_CAST(1 AS SMALLINT)", "presto": "TRY_CAST(1 AS SMALLINT)", "hive": "CAST(1 AS SMALLINT)", "spark": "CAST(1 AS SMALLINT)", }, ) self.validate_all( "1S", write={ "duckdb": "TRY_CAST(1 AS SMALLINT)", "presto": "TRY_CAST(1 AS SMALLINT)", "hive": "CAST(1 AS SMALLINT)", "spark": "CAST(1 AS SMALLINT)", }, ) self.validate_all( "1Y", write={ "duckdb": "TRY_CAST(1 AS TINYINT)", "presto": "TRY_CAST(1 AS TINYINT)", "hive": "CAST(1 AS TINYINT)", "spark": "CAST(1 AS TINYINT)", }, ) self.validate_all( "1L", write={ "duckdb": "TRY_CAST(1 AS BIGINT)", "presto": "TRY_CAST(1 AS BIGINT)", "hive": "CAST(1 AS BIGINT)", "spark": "CAST(1 AS BIGINT)", }, ) self.validate_all( "1.0bd", write={ "duckdb": "TRY_CAST(1.0 AS DECIMAL)", "presto": "TRY_CAST(1.0 AS DECIMAL)", "hive": "CAST(1.0 AS DECIMAL)", "spark": "CAST(1.0 AS DECIMAL)", }, ) self.validate_all( "CAST(1 AS INT)", read={ "presto": "TRY_CAST(1 AS INT)", }, write={ "duckdb": "TRY_CAST(1 AS INT)", "presto": "TRY_CAST(1 AS INTEGER)", "hive": "CAST(1 AS INT)", "spark": "CAST(1 AS INT)", }, ) def test_ddl(self): self.validate_all( "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", write={ "duckdb": "CREATE TABLE x (w TEXT)", # Partition columns should exist in table "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "spark": "CREATE TABLE x (w STRING, y INT, z INT) PARTITIONED BY (y, z)", }, ) self.validate_all( "CREATE TABLE test STORED AS parquet TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1", write={ "duckdb": "CREATE TABLE test AS SELECT 1", "presto": "CREATE TABLE test WITH (format='parquet', x='1', Z='2') AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1", "spark": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('x'='1', 'Z'='2') AS SELECT 1", }, ) self.validate_all( "CREATE TABLE test STORED AS INPUTFORMAT 'foo1' OUTPUTFORMAT 'foo2'", write={ "hive": "CREATE TABLE test STORED AS INPUTFORMAT 'foo1' OUTPUTFORMAT 'foo2'", "spark": "CREATE TABLE test STORED AS INPUTFORMAT 'foo1' OUTPUTFORMAT 'foo2'", "databricks": "CREATE TABLE test STORED AS INPUTFORMAT 'foo1' OUTPUTFORMAT 'foo2'", }, ) self.validate_identity("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)") self.validate_identity( "ALTER TABLE x CHANGE a a VARCHAR(10)", "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", ) self.validate_all( "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", write={ "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)", "spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)", }, ) self.validate_all( "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'", write={ "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) COMMENT 'comment'", "spark": "ALTER TABLE x ALTER COLUMN a COMMENT 'comment'", }, ) self.validate_all( "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)", write={ "hive": "ALTER TABLE x CHANGE COLUMN a b VARCHAR(10)", "spark": "ALTER TABLE x RENAME COLUMN a TO b", }, ) self.validate_all( "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE", write={ "hive": "ALTER TABLE x CHANGE COLUMN a a VARCHAR(10) CASCADE", "spark": "ALTER TABLE x ALTER COLUMN a TYPE VARCHAR(10)", }, ) self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING)") self.validate_identity("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE") self.validate_identity( """CREATE EXTERNAL TABLE x (y INT) ROW FORMAT SERDE 'serde' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' WITH SERDEPROPERTIES ('input.regex'='')""", ) self.validate_identity( """CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')""" ) self.validate_identity("CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'") self.validate_identity("ALTER VIEW v1 AS SELECT x, UPPER(s) AS s FROM t2") self.validate_identity("ALTER VIEW v1 (c1, c2) AS SELECT x, UPPER(s) AS s FROM t2") self.validate_identity( "ALTER VIEW v7 (c1 COMMENT 'Comment for c1', c2) AS SELECT t1.c1, t1.c2 FROM t1" ) self.validate_identity("ALTER VIEW db1.v1 RENAME TO db2.v2") self.validate_identity("ALTER VIEW v1 SET TBLPROPERTIES ('tblp1'='1', 'tblp2'='2')") self.validate_identity( "ALTER VIEW v1 UNSET TBLPROPERTIES ('tblp1', 'tblp2')", check_command_warning=True ) self.validate_identity("CREATE TABLE foo (col STRUCT)") self.validate_all( "CREATE TABLE db.example_table (col_a struct)", write={ "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "hive": "CREATE TABLE db.example_table (col_a STRUCT)", "spark": "CREATE TABLE db.example_table (col_a STRUCT)", }, ) self.validate_all( "CREATE TABLE db.example_table (col_a struct>)", write={ "bigquery": "CREATE TABLE db.example_table (col_a STRUCT>)", "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "hive": "CREATE TABLE db.example_table (col_a STRUCT>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT>)", }, ) self.validate_all( "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", read={ "spark2": "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", "spark": "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", "databricks": "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", }, write={ "hive": "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", "spark2": "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", "spark": "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", "databricks": "ALTER TABLE db.example_table ADD PARTITION(col_a = 'a') LOCATION 'b'", }, ) def test_lateral_view(self): self.validate_all( "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b", write={ "presto": "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a) CROSS JOIN UNNEST(z) AS u(b)", "duckdb": "SELECT a, b FROM x CROSS JOIN UNNEST(y) AS t(a) CROSS JOIN UNNEST(z) AS u(b)", "hive": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b", "spark": "SELECT a, b FROM x LATERAL VIEW EXPLODE(y) t AS a LATERAL VIEW EXPLODE(z) u AS b", }, ) self.validate_all( "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", write={ "presto": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", "duckdb": "SELECT a FROM x CROSS JOIN UNNEST(y) AS t(a)", "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(y) t AS a", }, ) self.validate_all( "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS pos, col", write={ "presto": "SELECT a FROM x CROSS JOIN LATERAL (SELECT pos - 1 AS pos, col FROM UNNEST(y) WITH ORDINALITY AS t(col, pos))", "trino": "SELECT a FROM x CROSS JOIN LATERAL (SELECT pos - 1 AS pos, col FROM UNNEST(y) WITH ORDINALITY AS t(col, pos))", "duckdb": "SELECT a FROM x CROSS JOIN LATERAL (SELECT pos - 1 AS pos, col FROM UNNEST(y) WITH ORDINALITY AS t(col, pos))", "hive": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS pos, col", "spark": "SELECT a FROM x LATERAL VIEW POSEXPLODE(y) t AS pos, col", }, ) self.validate_all( "SELECT * FROM x LATERAL VIEW POSEXPLODE(MAP(col, 'val')) t AS pos, key, value", write={ "presto": "SELECT * FROM x CROSS JOIN LATERAL (SELECT pos - 1 AS pos, key, value FROM UNNEST(MAP(ARRAY[col], ARRAY['val'])) WITH ORDINALITY AS t(key, value, pos))", "trino": "SELECT * FROM x CROSS JOIN LATERAL (SELECT pos - 1 AS pos, key, value FROM UNNEST(MAP(ARRAY[col], ARRAY['val'])) WITH ORDINALITY AS t(key, value, pos))", "hive": "SELECT * FROM x LATERAL VIEW POSEXPLODE(MAP(col, 'val')) t AS pos, key, value", "spark": "SELECT * FROM x LATERAL VIEW POSEXPLODE(MAP(col, 'val')) t AS pos, key, value", }, ) self.validate_all( "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", write={ "presto": "SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a)", "duckdb": "SELECT a FROM x CROSS JOIN UNNEST([y]) AS t(a)", "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", }, ) def test_quotes(self): self.validate_all( "'\\''", write={ "duckdb": "''''", "presto": "''''", "hive": "'\\''", "spark": "'\\''", }, ) self.validate_all( "'\"x\"'", write={ "duckdb": "'\"x\"'", "presto": "'\"x\"'", "hive": "'\"x\"'", "spark": "'\"x\"'", }, ) self.validate_all( "\"'x'\"", write={ "duckdb": "'''x'''", "presto": "'''x'''", "hive": "'\\'x\\''", "spark": "'\\'x\\''", }, ) self.validate_all( "'\\\\\\\\a'", read={ "drill": "'\\\\\\\\a'", "duckdb": "'\\\\a'", "presto": "'\\\\a'", }, write={ "drill": "'\\\\\\\\a'", "duckdb": "'\\\\a'", "hive": "'\\\\\\\\a'", "presto": "'\\\\a'", "spark": "'\\\\\\\\a'", }, ) def test_regex(self): self.validate_all( "a RLIKE 'x'", write={ "duckdb": "REGEXP_MATCHES(a, 'x')", "exasol": "a REGEXP_LIKE '.*x.*'", "presto": "REGEXP_LIKE(a, 'x')", "hive": "a RLIKE 'x'", "spark": "a RLIKE 'x'", }, ) self.validate_all( "a REGEXP 'x'", write={ "duckdb": "REGEXP_MATCHES(a, 'x')", "exasol": "a REGEXP_LIKE '.*x.*'", "presto": "REGEXP_LIKE(a, 'x')", "hive": "a RLIKE 'x'", "spark": "a RLIKE 'x'", }, ) def test_time(self): self.validate_all( "(UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) * 1000", read={ "presto": "DATE_DIFF('millisecond', x, y)", }, ) self.validate_all( "UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)", read={ "presto": "DATE_DIFF('second', x, y)", }, ) self.validate_all( "(UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) / 60", read={ "presto": "DATE_DIFF('minute', x, y)", }, ) self.validate_all( "(UNIX_TIMESTAMP(y) - UNIX_TIMESTAMP(x)) / 3600", read={ "presto": "DATE_DIFF('hour', x, y)", }, ) self.validate_all( "DATEDIFF(a, b)", write={ "duckdb": "DATE_DIFF('DAY', CAST(b AS DATE), CAST(a AS DATE))", "presto": "DATE_DIFF('DAY', CAST(CAST(b AS TIMESTAMP) AS DATE), CAST(CAST(a AS TIMESTAMP) AS DATE))", "hive": "DATEDIFF(a, b)", "spark": "DATEDIFF(a, b)", "": "DATEDIFF(CAST(a AS DATE), CAST(b AS DATE))", }, ) self.validate_all( """from_unixtime(x, "yyyy-MM-dd'T'HH")""", write={ "duckdb": "STRFTIME(TO_TIMESTAMP(x), '%Y-%m-%d''T''%H')", "presto": "DATE_FORMAT(FROM_UNIXTIME(x), '%Y-%m-%d''T''%H')", "hive": "FROM_UNIXTIME(x, 'yyyy-MM-dd\\'T\\'HH')", "spark": "FROM_UNIXTIME(x, 'yyyy-MM-dd\\'T\\'HH')", }, ) self.validate_all( "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')", write={ "bigquery": "FORMAT_DATE('%F %T', CAST('2020-01-01' AS DATETIME))", "duckdb": "STRFTIME(CAST('2020-01-01' AS TIMESTAMP), '%Y-%m-%d %H:%M:%S')", "presto": "DATE_FORMAT(CAST('2020-01-01' AS TIMESTAMP), '%Y-%m-%d %T')", "hive": "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')", "spark": "DATE_FORMAT('2020-01-01', 'yyyy-MM-dd HH:mm:ss')", }, ) self.validate_all( "DATE_ADD('2020-01-01', 1)", write={ "": "TS_OR_DS_ADD('2020-01-01', 1, DAY)", "bigquery": "DATE_ADD(CAST(CAST('2020-01-01' AS DATETIME) AS DATE), INTERVAL 1 DAY)", "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL 1 DAY", "hive": "DATE_ADD('2020-01-01', 1)", "presto": "DATE_ADD('DAY', 1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))", "redshift": "DATEADD(DAY, 1, '2020-01-01')", "snowflake": "DATEADD(DAY, 1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))", "spark": "DATE_ADD('2020-01-01', 1)", "tsql": "DATEADD(DAY, 1, CAST(CAST('2020-01-01' AS DATETIME2) AS DATE))", }, ) self.validate_all( "DATE_SUB('2020-01-01', 1)", write={ "": "TS_OR_DS_ADD('2020-01-01', 1 * -1, DAY)", "bigquery": "DATE_ADD(CAST(CAST('2020-01-01' AS DATETIME) AS DATE), INTERVAL (1 * -1) DAY)", "duckdb": "CAST('2020-01-01' AS DATE) + INTERVAL (1 * -1) DAY", "hive": "DATE_ADD('2020-01-01', 1 * -1)", "presto": "DATE_ADD('DAY', 1 * -1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))", "redshift": "DATEADD(DAY, 1 * -1, '2020-01-01')", "snowflake": "DATEADD(DAY, 1 * -1, CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE))", "spark": "DATE_ADD('2020-01-01', 1 * -1)", "tsql": "DATEADD(DAY, 1 * -1, CAST(CAST('2020-01-01' AS DATETIME2) AS DATE))", }, ) self.validate_all("DATE_ADD('2020-01-01', -1)", read={"": "DATE_SUB('2020-01-01', 1)"}) self.validate_all("DATE_ADD(a, b * -1)", read={"": "DATE_SUB(a, b)"}) self.validate_all( "ADD_MONTHS('2020-01-01', -2)", read={"": "DATE_SUB('2020-01-01', 2, month)"} ) self.validate_all( "DATEDIFF(TO_DATE(y), x)", write={ "duckdb": "DATE_DIFF('DAY', CAST(x AS DATE), TRY_CAST(y AS DATE))", "presto": "DATE_DIFF('DAY', CAST(CAST(x AS TIMESTAMP) AS DATE), CAST(CAST(CAST(CAST(y AS TIMESTAMP) AS DATE) AS TIMESTAMP) AS DATE))", "hive": "DATEDIFF(TO_DATE(y), x)", "spark": "DATEDIFF(TO_DATE(y), x)", "": "DATEDIFF(TRY_CAST(y AS DATE), CAST(x AS DATE))", }, ) self.validate_all( "UNIX_TIMESTAMP(x)", write={ "duckdb": "EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))", "presto": "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))", "hive": "UNIX_TIMESTAMP(x)", "spark": "UNIX_TIMESTAMP(x)", "": "STR_TO_UNIX(x, '%Y-%m-%d %H:%M:%S')", }, ) for unit in ("DAY", "MONTH", "YEAR"): self.validate_all( f"{unit}(x)", write={ "duckdb": f"{unit}(CAST(x AS DATE))", "presto": f"{unit}(CAST(CAST(x AS TIMESTAMP) AS DATE))", "hive": f"{unit}(x)", "spark": f"{unit}(x)", }, ) def test_order_by(self): self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", }, ) def test_hive(self): self.validate_identity("TO_DATE(TO_DATE(x))") self.validate_identity("DAY(TO_DATE(x))") self.validate_identity("SELECT * FROM t WHERE col IN ('stream')") self.validate_identity("SET hiveconf:some_var = 5", check_command_warning=True) self.validate_identity("(VALUES (1 AS a, 2 AS b, 3))") self.validate_identity("SELECT * FROM my_table TIMESTAMP AS OF DATE_ADD(CURRENT_DATE, -1)") self.validate_identity("SELECT * FROM my_table VERSION AS OF DATE_ADD(CURRENT_DATE, -1)") self.validate_identity( "SELECT WEEKOFYEAR('2024-05-22'), DAYOFMONTH('2024-05-22'), DAYOFWEEK('2024-05-22')" ) self.validate_identity( "SELECT ROW() OVER (DISTRIBUTE BY x SORT BY y)", "SELECT ROW() OVER (PARTITION BY x ORDER BY y)", ) self.validate_identity("SELECT transform") self.validate_identity("SELECT * FROM test DISTRIBUTE BY y SORT BY x DESC ORDER BY l") self.validate_identity( "SELECT * FROM test WHERE RAND() <= 0.1 DISTRIBUTE BY RAND() SORT BY RAND()" ) self.validate_identity("(SELECT 1 UNION SELECT 2) DISTRIBUTE BY z") self.validate_identity("(SELECT 1 UNION SELECT 2) DISTRIBUTE BY z SORT BY x") self.validate_identity("(SELECT 1 UNION SELECT 2) CLUSTER BY y DESC") self.validate_identity("SELECT * FROM test CLUSTER BY y") self.validate_identity("(SELECT 1 UNION SELECT 2) SORT BY z") self.validate_identity( "INSERT OVERWRITE TABLE zipcodes PARTITION(state = '0') VALUES (896, 'US', 'TAMPA', 33607)" ) self.validate_identity( "INSERT OVERWRITE TABLE zipcodes PARTITION(state = 0) VALUES (896, 'US', 'TAMPA', 33607)" ) self.validate_identity( "INSERT OVERWRITE DIRECTORY 'x' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' LINES TERMINATED BY '' STORED AS TEXTFILE SELECT * FROM `a`.`b`" ) self.validate_identity( "SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, b, GROUPING SETS ((a, b), a)" ) self.validate_identity( "SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, b, GROUPING SETS ((t.a, b), a)" ) self.validate_identity( "SELECT a, b, SUM(c) FROM tabl AS t GROUP BY a, FOO(b), GROUPING SETS ((a, FOO(b)), a)" ) self.validate_identity( "SELECT key, value, GROUPING__ID, COUNT(*) FROM T1 GROUP BY key, value WITH CUBE" ) self.validate_identity( "SELECT key, value, GROUPING__ID, COUNT(*) FROM T1 GROUP BY key, value WITH ROLLUP" ) self.validate_identity( "TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', address = 'abc')" ) self.validate_identity( "SELECT * FROM t1, t2", "SELECT * FROM t1 CROSS JOIN t2", ) self.validate_all( "SELECT ${hiveconf:some_var}", write={ "hive": "SELECT ${hiveconf:some_var}", "spark": "SELECT ${hiveconf:some_var}", }, ) self.validate_all( "SELECT A.1a AS b FROM test_a AS A", write={ "spark": "SELECT A.1a AS b FROM test_a AS A", }, ) self.validate_all( "SELECT 1_a AS a FROM test_table", write={ "spark": "SELECT 1_a AS a FROM test_table", "trino": 'SELECT "1_a" AS a FROM test_table', }, ) self.validate_all( "SELECT a_b AS 1_a FROM test_table", write={ "spark": "SELECT a_b AS 1_a FROM test_table", }, ) self.validate_all( "SELECT 1a_1a FROM test_a", write={ "spark": "SELECT 1a_1a FROM test_a", }, ) self.validate_all( "SELECT 1a AS 1a_1a FROM test_a", write={ "spark": "SELECT 1a AS 1a_1a FROM test_a", }, ) self.validate_all( "CREATE TABLE test_table (1a STRING)", write={ "spark": "CREATE TABLE test_table (1a STRING)", }, ) self.validate_all( "CREATE TABLE test_table2 (1a_1a STRING)", write={ "spark": "CREATE TABLE test_table2 (1a_1a STRING)", }, ) self.validate_all( "PERCENTILE_APPROX(x, 0.5)", read={ "hive": "PERCENTILE_APPROX(x, 0.5)", "presto": "APPROX_PERCENTILE(x, 0.5)", "duckdb": "APPROX_QUANTILE(x, 0.5)", "spark": "PERCENTILE_APPROX(x, 0.5)", }, write={ "hive": "PERCENTILE_APPROX(x, 0.5)", "presto": "APPROX_PERCENTILE(x, 0.5)", "duckdb": "APPROX_QUANTILE(x, 0.5)", "spark": "PERCENTILE_APPROX(x, 0.5)", }, ) self.validate_all( "PERCENTILE_APPROX(x, 0.5)", read={ "hive": "PERCENTILE_APPROX(ALL x, 0.5)", "spark2": "PERCENTILE_APPROX(ALL x, 0.5)", "spark": "PERCENTILE_APPROX(ALL x, 0.5)", "databricks": "PERCENTILE_APPROX(ALL x, 0.5)", }, ) self.validate_all( "PERCENTILE_APPROX(x, 0.5, 200)", read={ "hive": "PERCENTILE_APPROX(ALL x, 0.5, 200)", "spark2": "PERCENTILE_APPROX(ALL x, 0.5, 200)", "spark": "PERCENTILE_APPROX(ALL x, 0.5, 200)", "databricks": "PERCENTILE_APPROX(ALL x, 0.5, 200)", }, ) self.validate_all( "APPROX_COUNT_DISTINCT(a)", write={ "bigquery": "APPROX_COUNT_DISTINCT(a)", "duckdb": "APPROX_COUNT_DISTINCT(a)", "presto": "APPROX_DISTINCT(a)", "hive": "APPROX_COUNT_DISTINCT(a)", "snowflake": "APPROX_COUNT_DISTINCT(a)", "spark": "APPROX_COUNT_DISTINCT(a)", }, ) self.validate_all( "ARRAY_CONTAINS(x, 1)", read={ "duckdb": "LIST_HAS(x, 1)", "snowflake": "ARRAY_CONTAINS(1, x)", }, write={ "duckdb": "ARRAY_CONTAINS(x, 1)", "presto": "CONTAINS(x, 1)", "hive": "ARRAY_CONTAINS(x, 1)", "spark": "ARRAY_CONTAINS(x, 1)", "snowflake": "ARRAY_CONTAINS(CAST(1 AS VARIANT), x)", }, ) self.validate_all( "SIZE(x)", write={ "duckdb": "ARRAY_LENGTH(x)", "presto": "CARDINALITY(x)", "hive": "SIZE(x)", "spark": "SIZE(x)", }, ) self.validate_all( "LOCATE('a', x)", write={ "duckdb": "STRPOS(x, 'a')", "presto": "STRPOS(x, 'a')", "hive": "LOCATE('a', x)", "spark": "LOCATE('a', x)", }, ) self.validate_all( "LOCATE('a', x, 3)", write={ "duckdb": "CASE WHEN STRPOS(SUBSTRING(x, 3), 'a') = 0 THEN 0 ELSE STRPOS(SUBSTRING(x, 3), 'a') + 3 - 1 END", "presto": "IF(STRPOS(SUBSTRING(x, 3), 'a') = 0, 0, STRPOS(SUBSTRING(x, 3), 'a') + 3 - 1)", "hive": "LOCATE('a', x, 3)", "spark": "LOCATE('a', x, 3)", }, ) self.validate_all( "INITCAP('new york')", write={ "hive": "INITCAP('new york')", "spark": "INITCAP('new york')", }, ) expression = self.parse_one("INITCAP('new york')") self.assert_duckdb_sql( expression, includes=("REGEXP_MATCHES(", "ARRAY_TO_STRING("), chr_chars=("\u000b", "\u001c", "\u001d", "\u001e", "\u001f"), ) self.validate_all( "SELECT * FROM x.z TABLESAMPLE(10 PERCENT) y", write={ "hive": "SELECT * FROM x.z TABLESAMPLE (10 PERCENT) AS y", "spark": "SELECT * FROM x.z TABLESAMPLE (10 PERCENT) AS y", }, ) self.validate_all( "SELECT SORT_ARRAY(x, FALSE)", read={ "duckdb": "SELECT ARRAY_REVERSE_SORT(x)", "spark": "SELECT SORT_ARRAY(x, FALSE)", }, write={ "duckdb": "SELECT ARRAY_REVERSE_SORT(x)", "presto": "SELECT ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)", "hive": "SELECT SORT_ARRAY(x, FALSE)", "spark": "SELECT SORT_ARRAY(x, FALSE)", }, ) self.validate_all( "GET_JSON_OBJECT(x, '$.name')", write={ "presto": "JSON_EXTRACT_SCALAR(x, '$.name')", "hive": "GET_JSON_OBJECT(x, '$.name')", "spark": "GET_JSON_OBJECT(x, '$.name')", }, ) self.validate_all( "MAP(a, b, c, d)", read={ "": "VAR_MAP(a, b, c, d)", "clickhouse": "map(a, b, c, d)", "duckdb": "MAP([a, c], [b, d])", "hive": "MAP(a, b, c, d)", "presto": "MAP(ARRAY[a, c], ARRAY[b, d])", "spark": "MAP(a, b, c, d)", }, write={ "": "MAP(ARRAY(a, c), ARRAY(b, d))", "clickhouse": "map(a, b, c, d)", "duckdb": "MAP([a, c], [b, d])", "presto": "MAP(ARRAY[a, c], ARRAY[b, d])", "hive": "MAP(a, b, c, d)", "spark": "MAP(a, b, c, d)", "snowflake": "OBJECT_CONSTRUCT(a, b, c, d)", }, ) self.validate_all( "MAP(a, b)", write={ "duckdb": "MAP([a], [b])", "presto": "MAP(ARRAY[a], ARRAY[b])", "hive": "MAP(a, b)", "spark": "MAP(a, b)", "snowflake": "OBJECT_CONSTRUCT(a, b)", }, ) self.validate_all( "LOG(10)", write={ "duckdb": "LN(10)", "presto": "LN(10)", "hive": "LN(10)", "spark": "LN(10)", }, ) self.validate_all( 'ds = "2020-01-01"', write={ "duckdb": "ds = '2020-01-01'", "presto": "ds = '2020-01-01'", "hive": "ds = '2020-01-01'", "spark": "ds = '2020-01-01'", }, ) self.validate_all( "ds = \"1''2\"", write={ "duckdb": "ds = '1''''2'", "presto": "ds = '1''''2'", "hive": "ds = '1\\'\\'2'", "spark": "ds = '1\\'\\'2'", }, ) self.validate_all( "x == 1", write={ "duckdb": "x = 1", "presto": "x = 1", "hive": "x = 1", "spark": "x = 1", }, ) self.validate_all( "x DIV y", read={ "databricks": "x DIV y", "duckdb": "x // y", "hive": "x DIV y", "spark2": "x DIV y", "spark": "x DIV y", }, write={ "duckdb": "x // y", "databricks": "x DIV y", "presto": "CAST(CAST(x AS DOUBLE) / y AS INTEGER)", "spark2": "x DIV y", "spark": "x DIV y", }, ) self.validate_all( "COLLECT_LIST(x)", read={ "presto": "ARRAY_AGG(x)", }, write={ "duckdb": "ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)", "presto": "ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)", "hive": "COLLECT_LIST(x)", "spark": "COLLECT_LIST(x)", }, ) self.validate_all( "COLLECT_SET(x)", read={ "doris": "COLLECT_SET(x)", "presto": "SET_AGG(x)", "snowflake": "ARRAY_UNIQUE_AGG(x)", }, write={ "doris": "COLLECT_SET(x)", "hive": "COLLECT_SET(x)", "presto": "SET_AGG(x)", "snowflake": "ARRAY_UNIQUE_AGG(x)", "spark": "COLLECT_SET(x)", "trino": "ARRAY_AGG(DISTINCT x)", }, ) self.validate_all( "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo", read={ "presto": "SELECT * FROM x AS foo TABLESAMPLE BERNOULLI (1)", "snowflake": "SELECT * FROM x AS foo TABLESAMPLE (1)", }, write={ "hive": "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo", "snowflake": "SELECT * FROM x AS foo TABLESAMPLE (1)", "spark": "SELECT * FROM x TABLESAMPLE (1 PERCENT) AS foo", }, ) self.validate_all( "SELECT a, SUM(c) FROM t GROUP BY a, DATE_FORMAT(b, 'yyyy'), GROUPING SETS ((a, DATE_FORMAT(b, 'yyyy')), a)", write={ "hive": "SELECT a, SUM(c) FROM t GROUP BY a, DATE_FORMAT(b, 'yyyy'), GROUPING SETS ((a, DATE_FORMAT(b, 'yyyy')), a)", }, ) self.validate_all( "SELECT TRUNC(CAST(ds AS TIMESTAMP), 'MONTH')", read={ "hive": "SELECT TRUNC(CAST(ds AS TIMESTAMP), 'MONTH')", "presto": "SELECT DATE_TRUNC('MONTH', CAST(ds AS TIMESTAMP))", }, write={ "presto": "SELECT DATE_TRUNC('MONTH', TRY_CAST(ds AS TIMESTAMP))", }, ) # Hive TRUNC is date-only, should parse to TimestampTrunc (not numeric Trunc) self.validate_identity("TRUNC(date_col, 'MM')").assert_is(exp.TimestampTrunc) # Numeric TRUNC from other dialects - Hive has no native support, uses CAST to BIGINT self.validate_all( "CAST(3.14159 AS BIGINT)", read={"postgres": "TRUNC(3.14159, 2)"}, ) self.validate_all( "REGEXP_EXTRACT('abc', '(a)(b)(c)')", read={ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", }, write={ "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)", "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)", "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 1)", }, ) self.validate_identity("EXISTS(col, x -> x % 2 = 0)").assert_is(exp.Exists) self.validate_all( "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", read={ "hive": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", "spark2": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", "spark": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", "databricks": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", }, write={ "spark2": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", "spark": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", "databricks": "SELECT EXISTS(ARRAY(2, 3), x -> x % 2 = 0)", }, ) self.validate_identity("SELECT 1_2") self.validate_all( "SELECT MAP(*), STRUCT(*) FROM t", read={ "hive": "SELECT MAP(*), STRUCT(*) FROM t", "spark2": "SELECT MAP(*), STRUCT(*) FROM t", "spark": "SELECT MAP(*), STRUCT(*) FROM t", "databricks": "SELECT MAP(*), STRUCT(*) FROM t", }, write={ "spark2": "SELECT MAP(*), STRUCT(*) FROM t", "spark": "SELECT MAP(*), STRUCT(*) FROM t", "databricks": "SELECT MAP(*), STRUCT(*) FROM t", }, ) self.validate_all( "SELECT FIRST(sample_col, TRUE)", read={ "spark": "SELECT FIRST(sample_col, TRUE)", "databricks": "SELECT FIRST(sample_col, TRUE)", }, write={ "hive": "SELECT FIRST(sample_col, TRUE)", "spark2": "SELECT FIRST(sample_col, TRUE)", "spark": "SELECT FIRST(sample_col) IGNORE NULLS", "databricks": "SELECT FIRST(sample_col) IGNORE NULLS", "duckdb": "SELECT ANY_VALUE(sample_col)", }, ) self.validate_all( "SELECT FIRST_VALUE(sample_col, TRUE)", read={ "spark": "SELECT FIRST_VALUE(sample_col, TRUE)", "databricks": "SELECT FIRST_VALUE(sample_col, TRUE)", }, write={ "hive": "SELECT FIRST_VALUE(sample_col, TRUE)", "spark2": "SELECT FIRST_VALUE(sample_col, TRUE)", "spark": "SELECT FIRST_VALUE(sample_col) IGNORE NULLS", "databricks": "SELECT FIRST_VALUE(sample_col) IGNORE NULLS", "duckdb": "SELECT FIRST_VALUE(sample_col IGNORE NULLS)", }, ) self.validate_all( "SELECT LAST_VALUE(sample_col, TRUE)", read={ "spark": "SELECT LAST_VALUE(sample_col, TRUE)", "databricks": "SELECT LAST_VALUE(sample_col, TRUE)", }, write={ "hive": "SELECT LAST_VALUE(sample_col, TRUE)", "spark2": "SELECT LAST_VALUE(sample_col, TRUE)", "spark": "SELECT LAST_VALUE(sample_col) IGNORE NULLS", "databricks": "SELECT LAST_VALUE(sample_col) IGNORE NULLS", "duckdb": "SELECT LAST_VALUE(sample_col IGNORE NULLS)", }, ) self.validate_all( "SELECT LAST(sample_col, TRUE)", read={ "spark": "SELECT LAST(sample_col, TRUE)", "databricks": "SELECT LAST(sample_col, TRUE)", }, write={ "hive": "SELECT LAST(sample_col, TRUE)", "spark2": "SELECT LAST(sample_col, TRUE)", "spark": "SELECT LAST(sample_col) IGNORE NULLS", "databricks": "SELECT LAST(sample_col) IGNORE NULLS", }, ) self.validate_identity( "DATE_SUB(CURRENT_DATE, 1 + 1)", "DATE_ADD(CURRENT_DATE, (1 + 1) * -1)" ) self.validate_identity("SELECT ELT(2, 'foo', 'bar', 'baz') AS Result") self.validate_all( """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT GET_JSON_OBJECT(c, '$.x-y') FROM t""", write={ "hive": """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT GET_JSON_OBJECT(c, '$.x-y') FROM t""", "spark2": """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT GET_JSON_OBJECT(c, '$.x-y') FROM t""", "spark": """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT GET_JSON_OBJECT(c, '$.x-y') FROM t""", "databricks": """WITH t AS (SELECT '{"x-y": "z"}' AS c) SELECT c:["x-y"] FROM t""", }, ) def test_escapes(self) -> None: self.validate_identity("'\n'", "'\\n'") self.validate_identity("'\\n'") self.validate_identity("'\\\n'", "'\\\\\\n'") self.validate_identity("'\\\\n'") self.validate_identity("''") self.validate_identity("'\\\\'") self.validate_identity("'\\\\z'") def test_data_type(self): self.validate_all( "CAST(a AS BIT)", write={ "hive": "CAST(a AS BOOLEAN)", }, ) def test_joins_without_on(self): for join in ("FULL OUTER", "LEFT", "RIGHT", "LEFT OUTER", "RIGHT OUTER", "INNER"): with self.subTest(f"Testing transpilation of {join} without ON"): self.validate_all( f"SELECT * FROM t1 {join} JOIN t2 ON TRUE", read={ "hive": f"SELECT * FROM t1 {join} JOIN t2", "spark2": f"SELECT * FROM t1 {join} JOIN t2", "spark": f"SELECT * FROM t1 {join} JOIN t2", "databricks": f"SELECT * FROM t1 {join} JOIN t2", "sqlite": f"SELECT * FROM t1 {join} JOIN t2", }, write={ "hive": f"SELECT * FROM t1 {join} JOIN t2 ON TRUE", "spark2": f"SELECT * FROM t1 {join} JOIN t2 ON TRUE", "spark": f"SELECT * FROM t1 {join} JOIN t2 ON TRUE", "databricks": f"SELECT * FROM t1 {join} JOIN t2 ON TRUE", "sqlite": f"SELECT * FROM t1 {join} JOIN t2 ON TRUE", "duckdb": f"SELECT * FROM t1 {join} JOIN t2 ON TRUE", }, ) def test_percentile(self): self.validate_all( "PERCENTILE(x, 0.5)", write={ "duckdb": "QUANTILE(x, 0.5)", "presto": "APPROX_PERCENTILE(x, 0.5)", "hive": "PERCENTILE(x, 0.5)", "spark2": "PERCENTILE(x, 0.5)", "spark": "PERCENTILE(x, 0.5)", "databricks": "PERCENTILE(x, 0.5)", }, ) self.validate_all( "PERCENTILE(DISTINCT x, 0.5)", read={ "hive": "PERCENTILE(DISTINCT x, 0.5)", "spark": "PERCENTILE(DISTINCT x, 0.5)", "databricks": "PERCENTILE(DISTINCT x, 0.5)", }, write={ "spark": "PERCENTILE(DISTINCT x, 0.5)", "databricks": "PERCENTILE(DISTINCT x, 0.5)", }, ) self.validate_all( "PERCENTILE(x, 0.5)", read={ "hive": "PERCENTILE(ALL x, 0.5)", "spark2": "PERCENTILE(ALL x, 0.5)", "spark": "PERCENTILE(ALL x, 0.5)", "databricks": "PERCENTILE(ALL x, 0.5)", }, ) quantile_expr = self.validate_identity("PERCENTILE(DISTINCT x, 0.5)") quantile_expr.assert_is(exp.Quantile) quantile_expr.this.assert_is(exp.Distinct) quantile_expr.args.get("quantile").assert_is(exp.Literal) quantile_expr = self.validate_identity("PERCENTILE(ALL x, 0.5)", "PERCENTILE(x, 0.5)") quantile_expr.assert_is(exp.Quantile) quantile_expr.this.assert_is(exp.Column) quantile_expr.args.get("quantile").assert_is(exp.Literal) ================================================ FILE: tests/dialects/test_materialize.py ================================================ from tests.dialects.test_dialect import Validator class TestMaterialize(Validator): dialect = "materialize" def test_materialize(self): self.validate_all( "CREATE TABLE example (id INT PRIMARY KEY, name TEXT)", write={ "materialize": "CREATE TABLE example (id INT, name TEXT)", "postgres": "CREATE TABLE example (id INT PRIMARY KEY, name TEXT)", }, ) self.validate_all( "INSERT INTO example (id, name) VALUES (1, 'Alice') ON CONFLICT(id) DO NOTHING", write={ "materialize": "INSERT INTO example (id, name) VALUES (1, 'Alice')", "postgres": "INSERT INTO example (id, name) VALUES (1, 'Alice') ON CONFLICT(id) DO NOTHING", }, ) self.validate_all( "CREATE TABLE example (id SERIAL, name TEXT)", write={ "materialize": "CREATE TABLE example (id INT NOT NULL, name TEXT)", "postgres": "CREATE TABLE example (id INT GENERATED BY DEFAULT AS IDENTITY NOT NULL, name TEXT)", }, ) self.validate_all( "CREATE TABLE example (id INT AUTO_INCREMENT, name TEXT)", write={ "materialize": "CREATE TABLE example (id INT NOT NULL, name TEXT)", "postgres": "CREATE TABLE example (id INT GENERATED BY DEFAULT AS IDENTITY NOT NULL, name TEXT)", }, ) self.validate_all( 'SELECT JSON_EXTRACT_PATH_TEXT(\'{ "farm": {"barn": { "color": "red", "feed stocked": true }}}\', \'farm\', \'barn\', \'color\')', write={ "materialize": 'SELECT JSON_EXTRACT_PATH_TEXT(\'{ "farm": {"barn": { "color": "red", "feed stocked": true }}}\', \'farm\', \'barn\', \'color\')', "postgres": 'SELECT JSON_EXTRACT_PATH_TEXT(\'{ "farm": {"barn": { "color": "red", "feed stocked": true }}}\', \'farm\', \'barn\', \'color\')', }, ) self.validate_all( "SELECT MAP['a' => 1]", write={ "duckdb": "SELECT MAP {'a': 1}", "materialize": "SELECT MAP['a' => 1]", }, ) # Test now functions. self.validate_identity("CURRENT_TIMESTAMP") self.validate_identity("NOW()", write_sql="CURRENT_TIMESTAMP") self.validate_identity("MZ_NOW()") # Test custom timestamp type. self.validate_identity("SELECT CAST(1 AS mz_timestamp)") # Test DDL. self.validate_identity("CREATE TABLE example (id INT, name LIST)") # Test list types. self.validate_identity("SELECT LIST[]") self.validate_identity("SELECT LIST[1, 2, 3]") self.validate_identity("SELECT LIST[LIST[1], LIST[2], NULL]") self.validate_identity("SELECT CAST(LIST[1, 2, 3] AS INT LIST)") self.validate_identity("SELECT CAST(NULL AS INT LIST)") self.validate_identity("SELECT CAST(NULL AS INT LIST LIST LIST)") self.validate_identity("SELECT LIST(SELECT 1)") # Test map types. self.validate_identity("SELECT MAP[]") self.validate_identity("SELECT MAP['a' => MAP['b' => 'c']]") self.validate_identity("SELECT CAST(MAP['a' => 1] AS MAP[TEXT => INT])") self.validate_identity("SELECT CAST(NULL AS MAP[TEXT => INT])") self.validate_identity("SELECT CAST(NULL AS MAP[TEXT => MAP[TEXT => INT]])") self.validate_identity("SELECT MAP(SELECT 'a', 1)") ================================================ FILE: tests/dialects/test_mysql.py ================================================ import unittest import sys from sqlglot import UnsupportedError, expressions as exp from sqlglot.dialects.mysql import MySQL from tests.dialects.test_dialect import Validator class TestMySQL(Validator): dialect = "mysql" def test_ddl(self): for t in ("BIGINT", "INT", "MEDIUMINT", "SMALLINT", "TINYINT"): self.validate_identity(f"CREATE TABLE t (id {t} UNSIGNED)") self.validate_identity(f"CREATE TABLE t (id {t}(10) UNSIGNED)") self.validate_identity("CREATE TABLE bar (abacate DOUBLE(10, 2) UNSIGNED)") self.validate_identity("CREATE TABLE t (id DECIMAL(20, 4) UNSIGNED)") self.validate_identity("CREATE TABLE foo (a BIGINT, UNIQUE (b) USING BTREE)") self.validate_identity("CREATE TABLE foo (a VARCHAR(32) NOT NULL UNIQUE COMMENT 'test')") self.validate_identity("CREATE TABLE foo (id BIGINT)") self.validate_identity("CREATE TABLE 00f (1d BIGINT)") self.validate_identity("CREATE TABLE temp (id SERIAL PRIMARY KEY)") self.validate_identity("UPDATE items SET items.price = 0 WHERE items.id >= 5 LIMIT 10") self.validate_identity("DELETE FROM t WHERE a <= 10 LIMIT 10") self.validate_identity("DELETE FROM t FORCE INDEX (idx) WHERE a > 5 ORDER BY id") self.validate_identity("CREATE TABLE foo (a BIGINT, INDEX USING BTREE (b))") self.validate_identity("CREATE TABLE foo (a BIGINT, FULLTEXT INDEX (b))") self.validate_identity("CREATE TABLE foo (a BIGINT, SPATIAL INDEX (b))") self.validate_identity("CREATE TABLE foo (a INT UNSIGNED ZEROFILL)") self.validate_identity("ALTER TABLE t1 ADD COLUMN x INT, ALGORITHM=INPLACE, LOCK=EXCLUSIVE") self.validate_identity("ALTER TABLE t ADD INDEX `i` (`c`)") self.validate_identity("ALTER TABLE t ADD UNIQUE `i` (`c`)") self.validate_identity("ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT") self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo") self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100") self.validate_identity( "ALTER ALGORITHM = MERGE VIEW v AS SELECT * FROM foo", check_command_warning=True ) self.validate_identity( "ALTER DEFINER = 'admin'@'localhost' VIEW v AS SELECT * FROM foo", check_command_warning=True, ) self.validate_identity( "CREATE SQL SECURITY INVOKER VIEW id_test (id, foo) AS SELECT 0, foo FROM test" ) self.validate_identity( "CREATE SQL SECURITY DEFINER VIEW id_test (id, foo) AS SELECT 0, foo FROM test" ) self.validate_identity( "ALTER SQL SECURITY = DEFINER VIEW v AS SELECT * FROM foo", check_command_warning=True ) self.validate_identity( "INSERT INTO things (a, b) VALUES (1, 2) AS new_data ON DUPLICATE KEY UPDATE id = LAST_INSERT_ID(id), a = new_data.a, b = new_data.b" ) self.validate_identity( "CREATE TABLE `oauth_consumer` (`key` VARCHAR(32) NOT NULL, UNIQUE `OAUTH_CONSUMER_KEY` (`key`))" ) self.validate_identity( "CREATE TABLE `x` (`username` VARCHAR(200), PRIMARY KEY (`username`(16)))" ) self.validate_identity( "UPDATE items SET items.price = 0 WHERE items.id >= 5 ORDER BY items.id LIMIT 10" ) self.validate_identity( "CREATE TABLE foo (a BIGINT, INDEX b USING HASH (c) COMMENT 'd' VISIBLE ENGINE_ATTRIBUTE = 'e' WITH PARSER foo)" ) self.validate_identity( "DELETE t1 FROM t1 LEFT JOIN t2 ON t1.id = t2.id WHERE t2.id IS NULL" ) self.validate_identity( "DELETE t1, t2 FROM t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.id = t2.id AND t2.id = t3.id" ) self.validate_identity( "DELETE FROM t1, t2 USING t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.id = t2.id AND t2.id = t3.id" ) self.validate_identity( "INSERT IGNORE INTO subscribers (email) VALUES ('john.doe@gmail.com'), ('jane.smith@ibm.com')" ) self.validate_identity( "INSERT INTO t1 (a, b, c) VALUES (1, 2, 3), (4, 5, 6) ON DUPLICATE KEY UPDATE c = VALUES(a) + VALUES(b)" ) self.validate_identity( "INSERT INTO t1 (a, b) SELECT c, d FROM t2 UNION SELECT e, f FROM t3 ON DUPLICATE KEY UPDATE b = b + c" ) self.validate_identity( "INSERT INTO t1 (a, b, c) VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = c + 1" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON DUPLICATE KEY UPDATE x.id = 1" ) self.validate_identity( "CREATE OR REPLACE VIEW my_view AS SELECT column1 AS `boo`, column2 AS `foo` FROM my_table WHERE column3 = 'some_value' UNION SELECT q.* FROM fruits_table, JSON_TABLE(Fruits, '$[*]' COLUMNS(id VARCHAR(255) PATH '$.$id', value VARCHAR(255) PATH '$.value')) AS q", ) self.validate_identity( "CREATE TABLE test_table (id INT AUTO_INCREMENT, PRIMARY KEY (id) USING BTREE)" ) self.validate_identity( "CREATE TABLE test_table (id INT AUTO_INCREMENT, PRIMARY KEY (id) USING HASH)" ) self.validate_identity("CREATE TABLE test (id INT, PRIMARY KEY pk_name (id))") self.validate_identity("CREATE TABLE test (id INT, PRIMARY KEY `pk_name` (id))") self.validate_identity( 'CREATE TABLE test (id INT, PRIMARY KEY "pk_name" (id))', "CREATE TABLE test (id INT, PRIMARY KEY `pk_name` (id))", ) self.validate_identity("CREATE TABLE test (id INT, CONSTRAINT pk_name PRIMARY KEY (id))") self.validate_identity( "CREATE TABLE test (a INT, b INT GENERATED ALWAYS AS (a + a) STORED)" ) self.validate_identity( "CREATE TABLE test (a INT, b INT GENERATED ALWAYS AS (a + a) VIRTUAL)" ) self.validate_identity( "CREATE TABLE test (a INT, b INT AS (a + a) STORED)", "CREATE TABLE test (a INT, b INT GENERATED ALWAYS AS (a + a) STORED)", ) self.validate_identity( "CREATE TABLE test (a INT, b INT AS (a + a) VIRTUAL)", "CREATE TABLE test (a INT, b INT GENERATED ALWAYS AS (a + a) VIRTUAL)", ) self.validate_identity( "/*left*/ EXPLAIN SELECT /*hint*/ col FROM t1 /*right*/", "/* left */ DESCRIBE /* hint */ SELECT col FROM t1 /* right */", ) self.validate_identity( "CREATE TABLE t (name VARCHAR)", "CREATE TABLE t (name TEXT)", ) self.validate_identity( "ALTER TABLE t ADD KEY `i` (`c`)", "ALTER TABLE t ADD INDEX `i` (`c`)", ) self.validate_identity( "CREATE TABLE `foo` (`id` char(36) NOT NULL DEFAULT (uuid()), PRIMARY KEY (`id`), UNIQUE KEY `id` (`id`))", "CREATE TABLE `foo` (`id` CHAR(36) NOT NULL DEFAULT (UUID()), PRIMARY KEY (`id`), UNIQUE `id` (`id`))", ) self.validate_identity( "CREATE TABLE IF NOT EXISTS industry_info (a BIGINT(20) NOT NULL AUTO_INCREMENT, b BIGINT(20) NOT NULL, c VARCHAR(1000), PRIMARY KEY (a), UNIQUE KEY d (b), KEY e (b))", "CREATE TABLE IF NOT EXISTS industry_info (a BIGINT(20) NOT NULL AUTO_INCREMENT, b BIGINT(20) NOT NULL, c VARCHAR(1000), PRIMARY KEY (a), UNIQUE d (b), INDEX e (b))", ) self.validate_identity( "CREATE TABLE test (ts TIMESTAMP, ts_tz TIMESTAMPTZ, ts_ltz TIMESTAMPLTZ)", "CREATE TABLE test (ts TIMESTAMP, ts_tz TIMESTAMP, ts_ltz TIMESTAMP)", ) self.validate_identity( "ALTER TABLE test_table ALTER COLUMN test_column SET DATA TYPE LONGTEXT", "ALTER TABLE test_table MODIFY COLUMN test_column LONGTEXT", ) self.validate_identity( "CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP) DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC", "CREATE TABLE t (c DATETIME DEFAULT CURRENT_TIMESTAMP() ON UPDATE CURRENT_TIMESTAMP()) DEFAULT CHARACTER SET=utf8 ROW_FORMAT=DYNAMIC", ) self.validate_identity( "CREATE TABLE `foo` (a VARCHAR(10), KEY idx_a (a DESC))", "CREATE TABLE `foo` (a VARCHAR(10), INDEX idx_a (a DESC))", ) self.validate_identity( "CREATE TABLE `foo` (a VARCHAR(10), UNIQUE INDEX idx_a (a))", "CREATE TABLE `foo` (a VARCHAR(10), UNIQUE idx_a (a))", ) self.validate_all( "insert into t(i) values (default)", write={ "duckdb": "INSERT INTO t (i) VALUES (DEFAULT)", "mysql": "INSERT INTO t (i) VALUES (DEFAULT)", }, ) self.validate_all( "CREATE TABLE t (id INT UNSIGNED)", write={ "duckdb": "CREATE TABLE t (id UINTEGER)", "mysql": "CREATE TABLE t (id INT UNSIGNED)", }, ) self.validate_all( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", write={ "duckdb": "CREATE TABLE z (a INT)", "mysql": "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'", "spark": "CREATE TABLE z (a INT) COMMENT 'x'", "sqlite": "CREATE TABLE z (a INTEGER)", }, ) self.validate_all( "CREATE TABLE x (id int not null auto_increment, primary key (id))", write={ "mysql": "CREATE TABLE x (id INT NOT NULL AUTO_INCREMENT, PRIMARY KEY (id))", "sqlite": "CREATE TABLE x (id INTEGER NOT NULL AUTOINCREMENT PRIMARY KEY)", }, ) self.validate_identity("ALTER TABLE t ALTER INDEX i INVISIBLE") self.validate_identity("ALTER TABLE t ALTER INDEX i VISIBLE") self.validate_identity("ALTER TABLE t ALTER COLUMN c SET INVISIBLE") self.validate_identity("ALTER TABLE t ALTER COLUMN c SET VISIBLE") self.validate_identity( "UPDATE foo JOIN bar ON TRUE SET foo.a = bar.a WHERE foo.id = bar.id" ) # PARTITION BY RANGE - simple column self.validate_identity( "CREATE TABLE t (id INT, created_at DATE) PARTITION BY RANGE (id) (PARTITION p0 VALUES LESS THAN (10), PARTITION p1 VALUES LESS THAN (20), PARTITION p2 VALUES LESS THAN (MAXVALUE))" ) self.validate_identity( "CREATE TABLE t (id INT, name VARCHAR(50)) PARTITION BY RANGE (id) (PARTITION p0 VALUES LESS THAN (100))" ) # PARTITION BY RANGE - with expression self.validate_identity( "CREATE TABLE orders (id INT, order_date DATE) PARTITION BY RANGE (YEAR(order_date)) (PARTITION p2023 VALUES LESS THAN (2024), PARTITION p2024 VALUES LESS THAN (2025), PARTITION pmax VALUES LESS THAN (MAXVALUE))" ) self.validate_identity( "CREATE TABLE sales (id INT, sale_date DATE) PARTITION BY RANGE (MONTH(sale_date)) (PARTITION q1 VALUES LESS THAN (4), PARTITION q2 VALUES LESS THAN (7), PARTITION q3 VALUES LESS THAN (10), PARTITION q4 VALUES LESS THAN (13))" ) # PARTITION BY LIST - simple column self.validate_identity( "CREATE TABLE t (id INT, region VARCHAR(10)) PARTITION BY LIST (id) (PARTITION p_east VALUES IN (1, 2, 3), PARTITION p_west VALUES IN (4, 5, 6))" ) self.validate_identity( "CREATE TABLE t (id INT) PARTITION BY LIST (id) (PARTITION p0 VALUES IN (1, 2))" ) self.validate_identity( "CREATE TABLE employees (id INT, store_id INT) PARTITION BY LIST (store_id) (PARTITION pNorth VALUES IN (3, 5, 6), PARTITION pSouth VALUES IN (1, 2, 10))" ) self.validate_identity( "CREATE FUNCTION f () RETURNS VARCHAR LANGUAGE SQL SQL SECURITY INVOKER SELECT 'abc'", "CREATE FUNCTION f() RETURNS TEXT LANGUAGE SQL SQL SECURITY INVOKER AS SELECT 'abc'", ) def test_identity(self): self.validate_identity("SELECT HIGH_PRIORITY STRAIGHT_JOIN SQL_CALC_FOUND_ROWS * FROM t") self.validate_identity("SELECT CAST(COALESCE(`id`, 'NULL') AS CHAR CHARACTER SET binary)") self.validate_identity("SELECT e.* FROM e STRAIGHT_JOIN p ON e.x = p.y") self.validate_identity("ALTER TABLE test_table ALTER COLUMN test_column SET DEFAULT 1") self.validate_identity("SELECT DATE_FORMAT(NOW(), '%Y-%m-%d %H:%i:00.0000')") self.validate_identity("SELECT @var1 := 1, @var2") self.validate_identity("UNLOCK TABLES") self.validate_identity("LOCK TABLES `app_fields` WRITE", check_command_warning=True) self.validate_identity("SELECT 1 XOR 0") self.validate_identity("SELECT 1 && 0", "SELECT 1 AND 0") self.validate_identity("SELECT /*+ BKA(t1) NO_BKA(t2) */ * FROM t1 INNER JOIN t2") self.validate_identity("SELECT /*+ MERGE(dt) */ * FROM (SELECT * FROM t1) AS dt") self.validate_identity("SELECT /*+ INDEX(t, i) */ c1 FROM t WHERE c2 = 'value'") self.validate_identity("SELECT @a MEMBER OF(@c), @b MEMBER OF(@c)") self.validate_identity("SELECT JSON_ARRAY(4, 5) MEMBER OF('[[3,4],[4,5]]')") self.validate_identity("SELECT CAST('[4,5]' AS JSON) MEMBER OF('[[3,4],[4,5]]')") self.validate_identity("""SELECT 'ab' MEMBER OF('[23, "abc", 17, "ab", 10]')""") self.validate_identity("""SELECT * FROM foo WHERE 'ab' MEMBER OF(content)""") self.validate_identity("SELECT CURRENT_TIMESTAMP(6)") self.validate_identity("SELECT CURRENT_ROLE()") self.validate_identity("SELECT CURTIME()", "SELECT CURRENT_TIME()") self.validate_identity("x ->> '$.name'") self.validate_identity("SELECT CAST(`a`.`b` AS CHAR) FROM foo") self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ')") self.validate_identity("SELECT TRIM(TRAILING 'bla' FROM ' XXX ')") self.validate_identity("SELECT TRIM(BOTH 'bla' FROM ' XXX ')") self.validate_identity("SELECT TRIM('bla' FROM ' XXX ')") self.validate_identity("@@GLOBAL.max_connections") self.validate_identity("CREATE TABLE A LIKE B") self.validate_identity("SELECT * FROM t1, t2 FOR SHARE OF t1, t2 SKIP LOCKED") self.validate_identity("SELECT a || b", "SELECT a OR b") self.validate_identity( "SELECT * FROM source, JSON_TABLE(source.links, '$.org[*]' COLUMNS(row_id FOR ORDINALITY, link VARCHAR(255) PATH '$.link')) AS links" ) self.validate_identity( "SELECT * FROM x ORDER BY BINARY a", "SELECT * FROM x ORDER BY CAST(a AS BINARY)" ) self.validate_identity( """SELECT * FROM foo WHERE 3 MEMBER OF(JSON_EXTRACT(info, '$.value'))""" ) self.validate_identity( "SELECT * FROM t1, t2, t3 FOR SHARE OF t1 NOWAIT FOR UPDATE OF t2, t3 SKIP LOCKED" ) self.validate_identity( "REPLACE INTO table SELECT id FROM table2 WHERE cnt > 100", check_command_warning=True ) self.validate_identity( "CAST(x AS VARCHAR)", "CAST(x AS CHAR)", ) self.validate_identity( """SELECT * FROM foo WHERE 3 MEMBER OF(info->'$.value')""", """SELECT * FROM foo WHERE 3 MEMBER OF(JSON_EXTRACT(info, '$.value'))""", ) self.validate_identity( "SELECT 1 AS row", "SELECT 1 AS `row`", ) # Index hints self.validate_identity( "SELECT * FROM table1 USE INDEX (col1_index, col2_index) WHERE col1 = 1 AND col2 = 2 AND col3 = 3" ) self.validate_identity( "SELECT * FROM table1 IGNORE INDEX (col3_index) WHERE col1 = 1 AND col2 = 2 AND col3 = 3" ) self.validate_identity( "SELECT * FROM t1 USE INDEX (i1) IGNORE INDEX FOR ORDER BY (i2) ORDER BY a" ) self.validate_identity("SELECT * FROM t1 USE INDEX (i1) USE INDEX (i1, i1)") self.validate_identity("SELECT * FROM t1 USE INDEX FOR JOIN (i1) FORCE INDEX FOR JOIN (i2)") self.validate_identity( "SELECT * FROM t1 USE INDEX () IGNORE INDEX (i2) USE INDEX (i1) USE INDEX (i2)" ) # SET Commands self.validate_identity("SET @var_name = expr") self.validate_identity("SET @name = 43") self.validate_identity("SET @total_tax = (SELECT SUM(tax) FROM taxable_transactions)") self.validate_identity("SET GLOBAL max_connections = 1000") self.validate_identity("SET @@GLOBAL.max_connections = 1000") self.validate_identity("SET SESSION sql_mode = 'TRADITIONAL'") self.validate_identity("SET LOCAL sql_mode = 'TRADITIONAL'") self.validate_identity("SET @@SESSION.sql_mode = 'TRADITIONAL'") self.validate_identity("SET @@LOCAL.sql_mode = 'TRADITIONAL'") self.validate_identity("SET @@sql_mode = 'TRADITIONAL'") self.validate_identity("SET sql_mode = 'TRADITIONAL'") self.validate_identity("SET PERSIST max_connections = 1000") self.validate_identity("SET @@PERSIST.max_connections = 1000") self.validate_identity("SET PERSIST_ONLY back_log = 100") self.validate_identity("SET @@PERSIST_ONLY.back_log = 100") self.validate_identity("SET @@SESSION.max_join_size = DEFAULT") self.validate_identity("SET @@SESSION.max_join_size = @@GLOBAL.max_join_size") self.validate_identity("SET @x = 1, SESSION sql_mode = ''") self.validate_identity("SET GLOBAL max_connections = 1000, sort_buffer_size = 1000000") self.validate_identity("SET @@GLOBAL.sort_buffer_size = 50000, sort_buffer_size = 1000000") self.validate_identity("SET CHARACTER SET 'utf8'") self.validate_identity("SET CHARACTER SET utf8") self.validate_identity("SET CHARACTER SET DEFAULT") self.validate_identity("SET NAMES 'utf8'") self.validate_identity("SET NAMES DEFAULT") self.validate_identity("SET NAMES 'utf8' COLLATE 'utf8_unicode_ci'") self.validate_identity("SET NAMES utf8 COLLATE utf8_unicode_ci") self.validate_identity("SET autocommit = ON") self.validate_identity("SET GLOBAL TRANSACTION ISOLATION LEVEL SERIALIZABLE") self.validate_identity("SET TRANSACTION READ ONLY") self.validate_identity("SET GLOBAL TRANSACTION ISOLATION LEVEL REPEATABLE READ, READ WRITE") self.validate_identity("DATABASE()", "SCHEMA()") self.validate_identity( "SET GLOBAL sort_buffer_size = 1000000, SESSION sort_buffer_size = 1000000" ) self.validate_identity( "SET @@GLOBAL.sort_buffer_size = 1000000, @@LOCAL.sort_buffer_size = 1000000" ) self.validate_identity("INTERVAL '1' YEAR") self.validate_identity("DATE_ADD(x, INTERVAL '1' YEAR)") self.validate_identity("CHAR(0)") self.validate_identity("CHAR(77, 121, 83, 81, '76')") self.validate_identity("CHAR(77, 77.3, '77.3' USING utf8mb4)") self.validate_identity("SELECT * FROM t1 PARTITION(p0)") self.validate_identity("SELECT @var1 := 1, @var2") self.validate_identity("SELECT @var1, @var2 := @var1") self.validate_identity("SELECT @var1 := COUNT(*) FROM t1") self.validate_identity("SET @var1 := 1", "SET @var1 = 1") self.validate_identity( "SELECT DISTINCTROW tbl.col FROM tbl", "SELECT DISTINCT tbl.col FROM tbl" ) self.validate_identity("ATAN(y, x)") self.validate_identity( "SELECT 'foo' SOUNDS LIKE 'bar'", "SELECT SOUNDEX('foo') = SOUNDEX('bar')" ) self.validate_identity( "SELECT 'foo' NOT SOUNDS LIKE 'bar'", "SELECT NOT SOUNDEX('foo') = SOUNDEX('bar')" ) self.validate_identity("SELECT SUBSTR(1 FROM 2 FOR 3)", "SELECT SUBSTRING(1, 2, 3)") self.validate_identity("SELECT ELT(2, 'foo', 'bar', 'baz') AS Result") self.validate_identity("SELECT CHARSET(CHAR(100 USING utf8))") self.validate_identity("SELECT VERSION()") def test_types(self): for char_type in MySQL.Generator.CHAR_CAST_MAPPING: with self.subTest(f"MySQL cast into {char_type}"): self.validate_identity(f"CAST(x AS {char_type.value})", "CAST(x AS CHAR)") for signed_type in MySQL.Generator.SIGNED_CAST_MAPPING: with self.subTest(f"MySQL cast into {signed_type}"): self.validate_identity(f"CAST(x AS {signed_type.value})", "CAST(x AS SIGNED)") self.validate_identity("CAST(x AS ENUM('a', 'b'))") self.validate_identity("CAST(x AS SET('a', 'b'))") self.validate_identity( "CAST(x AS MEDIUMINT) + CAST(y AS YEAR(4))", "CAST(x AS SIGNED) + CAST(y AS YEAR(4))", ) self.validate_identity( "CAST(x AS TIMESTAMP)", "TIMESTAMP(x)", ) self.validate_identity( "CAST(x AS TIMESTAMPTZ)", "TIMESTAMP(x)", ) self.validate_identity( "CAST(x AS TIMESTAMPLTZ)", "TIMESTAMP(x)", ) self.validate_all( "CAST(x AS MEDIUMTEXT) + CAST(y AS LONGTEXT) + CAST(z AS TINYTEXT)", write={ "mysql": "CAST(x AS CHAR) + CAST(y AS CHAR) + CAST(z AS CHAR)", "spark": "CAST(x AS TEXT) + CAST(y AS TEXT) + CAST(z AS TEXT)", }, ) self.validate_all( "CAST(x AS MEDIUMBLOB) + CAST(y AS LONGBLOB) + CAST(z AS TINYBLOB)", write={ "mysql": "CAST(x AS CHAR) + CAST(y AS CHAR) + CAST(z AS CHAR)", "spark": "CAST(x AS BLOB) + CAST(y AS BLOB) + CAST(z AS BLOB)", }, ) def test_canonical_functions(self): self.validate_identity("SELECT LEFT('str', 2)", "SELECT LEFT('str', 2)") self.validate_identity("SELECT INSTR('str', 'substr')", "SELECT LOCATE('substr', 'str')") self.validate_identity("SELECT UCASE('foo')", "SELECT UPPER('foo')") self.validate_identity("SELECT LCASE('foo')", "SELECT LOWER('foo')") self.validate_identity( "SELECT DAY_OF_MONTH('2023-01-01')", "SELECT DAYOFMONTH('2023-01-01')" ) self.validate_identity("SELECT DAY_OF_WEEK('2023-01-01')", "SELECT DAYOFWEEK('2023-01-01')") self.validate_identity("SELECT DAY_OF_YEAR('2023-01-01')", "SELECT DAYOFYEAR('2023-01-01')") self.validate_identity( "SELECT WEEK_OF_YEAR('2023-01-01')", "SELECT WEEKOFYEAR('2023-01-01')" ) self.validate_all( "CHAR(10)", write={ "mysql": "CHAR(10)", "presto": "CHR(10)", "sqlite": "CHAR(10)", "tsql": "CHAR(10)", }, ) self.validate_identity("CREATE TABLE t (foo VARBINARY(5))") self.validate_all( "CREATE TABLE t (foo BLOB)", write={ "mysql": "CREATE TABLE t (foo BLOB)", "oracle": "CREATE TABLE t (foo BLOB)", "postgres": "CREATE TABLE t (foo BYTEA)", "tsql": "CREATE TABLE t (foo VARBINARY)", "sqlite": "CREATE TABLE t (foo BLOB)", "duckdb": "CREATE TABLE t (foo VARBINARY)", "hive": "CREATE TABLE t (foo BINARY)", "bigquery": "CREATE TABLE t (foo BYTES)", "redshift": "CREATE TABLE t (foo VARBYTE)", "clickhouse": "CREATE TABLE t (foo Nullable(String))", }, ) def test_escape(self): self.validate_identity("""'"abc"'""") self.validate_identity( r"'\'a'", "'''a'", ) self.validate_identity( '''"'abc'"''', "'''abc'''", ) self.validate_all( r"'a \' b '' '", write={ "mysql": r"'a '' b '' '", "spark": r"'a \' b \' '", }, ) self.validate_identity( r"'\"'", """\'"\'""", ) self.validate_identity("'\\\\\"a'") self.validate_identity( "'\t'", "'\\t'", ) self.validate_identity( r"'\j'", "'j'", ) def test_introducers(self): self.validate_all( "_utf8mb4 'hola'", read={ "mysql": "_utf8mb4'hola'", }, write={ "mysql": "_utf8mb4 'hola'", }, ) self.validate_all( "N'some text'", read={ "mysql": "n'some text'", }, write={ "mysql": "N'some text'", }, ) self.validate_all( "_latin1 x'4D7953514C'", read={ "mysql": "_latin1 X'4D7953514C'", }, write={ "mysql": "_latin1 x'4D7953514C'", }, ) def test_hexadecimal_literal(self): write_CC = { "bigquery": "SELECT FROM_HEX('CC')", "clickhouse": UnsupportedError, "databricks": "SELECT X'CC'", "drill": "SELECT 204", "duckdb": "SELECT UNHEX('CC')", "hive": "SELECT 204", "mysql": "SELECT x'CC'", "oracle": "SELECT 204", "postgres": "SELECT x'CC'", "presto": "SELECT x'CC'", "redshift": "SELECT 204", "snowflake": "SELECT x'CC'", "spark": "SELECT X'CC'", "sqlite": "SELECT x'CC'", "starrocks": "SELECT x'CC'", "tableau": "SELECT 204", "teradata": "SELECT X'CC'", "trino": "SELECT x'CC'", "tsql": "SELECT 0xCC", } write_CC_with_leading_zeros = { "bigquery": "SELECT FROM_HEX('0000CC')", "clickhouse": UnsupportedError, "databricks": "SELECT X'0000CC'", "drill": "SELECT 204", "duckdb": "SELECT UNHEX('0000CC')", "hive": "SELECT 204", "mysql": "SELECT x'0000CC'", "oracle": "SELECT 204", "postgres": "SELECT x'0000CC'", "presto": "SELECT x'0000CC'", "redshift": "SELECT 204", "snowflake": "SELECT x'0000CC'", "spark": "SELECT X'0000CC'", "sqlite": "SELECT x'0000CC'", "starrocks": "SELECT x'0000CC'", "tableau": "SELECT 204", "teradata": "SELECT X'0000CC'", "trino": "SELECT x'0000CC'", "tsql": "SELECT 0x0000CC", } self.validate_all("SELECT X'1A'", write={"mysql": "SELECT x'1A'"}) self.validate_all("SELECT 0xz", write={"mysql": "SELECT `0xz`"}) self.validate_all("SELECT 0xCC", write=write_CC) self.validate_all("SELECT 0xCC ", write=write_CC) self.validate_all("SELECT x'CC'", write=write_CC) self.validate_all("SELECT 0x0000CC", write=write_CC_with_leading_zeros) self.validate_all("SELECT x'0000CC'", write=write_CC_with_leading_zeros) def test_bits_literal(self): write_1011 = { "bigquery": "SELECT 11", "clickhouse": "SELECT 0b1011", "databricks": "SELECT 11", "drill": "SELECT 11", "hive": "SELECT 11", "mysql": "SELECT b'1011'", "oracle": "SELECT 11", "postgres": "SELECT b'1011'", "presto": "SELECT 11", "redshift": "SELECT 11", "snowflake": "SELECT 11", "spark": "SELECT 11", "sqlite": "SELECT 11", "tableau": "SELECT 11", "teradata": "SELECT 11", "trino": "SELECT 11", "tsql": "SELECT 11", } self.validate_all("SELECT 0b1011", write=write_1011) self.validate_all("SELECT b'1011'", write=write_1011) def test_string_literals(self): self.validate_all( 'SELECT "2021-01-01" + INTERVAL 1 MONTH', write={ "mysql": "SELECT '2021-01-01' + INTERVAL '1' MONTH", }, ) def test_convert(self): self.validate_all( "CONVERT(x USING latin1)", write={ "mysql": "CAST(x AS CHAR CHARACTER SET latin1)", }, ) self.validate_all( "CAST(x AS CHAR CHARACTER SET latin1)", write={ "mysql": "CAST(x AS CHAR CHARACTER SET latin1)", }, ) self.validate_identity( "CONVERT('a' USING binary)", "CAST('a' AS CHAR CHARACTER SET binary)" ) def test_match_against(self): self.validate_all( "MATCH(col1, col2, col3) AGAINST('abc')", read={ "": "MATCH(col1, col2, col3) AGAINST('abc')", "mysql": "MATCH(col1, col2, col3) AGAINST('abc')", }, write={ "": "MATCH(col1, col2, col3) AGAINST('abc')", "mysql": "MATCH(col1, col2, col3) AGAINST('abc')", "postgres": "(col1 @@ 'abc' OR col2 @@ 'abc' OR col3 @@ 'abc')", # not quite correct because it's not ts_query }, ) self.validate_all( "MATCH(col1, col2) AGAINST('abc' IN NATURAL LANGUAGE MODE)", write={"mysql": "MATCH(col1, col2) AGAINST('abc' IN NATURAL LANGUAGE MODE)"}, ) self.validate_all( "MATCH(col1, col2) AGAINST('abc' IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION)", write={ "mysql": "MATCH(col1, col2) AGAINST('abc' IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION)" }, ) self.validate_all( "MATCH(col1, col2) AGAINST('abc' IN BOOLEAN MODE)", write={"mysql": "MATCH(col1, col2) AGAINST('abc' IN BOOLEAN MODE)"}, ) self.validate_all( "MATCH(col1, col2) AGAINST('abc' WITH QUERY EXPANSION)", write={"mysql": "MATCH(col1, col2) AGAINST('abc' WITH QUERY EXPANSION)"}, ) self.validate_all( "MATCH(a.b) AGAINST('abc')", write={"mysql": "MATCH(a.b) AGAINST('abc')"}, ) def test_date_format(self): self.validate_all( "SELECT DATE_FORMAT('2017-06-15', '%Y')", write={ "mysql": "SELECT DATE_FORMAT('2017-06-15', '%Y')", "snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'yyyy')", "exasol": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'YYYY')", }, ) self.validate_all( "SELECT DATE_FORMAT('2017-06-15', '%m')", write={ "mysql": "SELECT DATE_FORMAT('2017-06-15', '%m')", "snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'mm')", }, ) self.validate_all( "SELECT DATE_FORMAT('2017-06-15', '%d')", write={ "mysql": "SELECT DATE_FORMAT('2017-06-15', '%d')", "snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'DD')", }, ) self.validate_all( "SELECT DATE_FORMAT('2017-06-15', '%Y-%m-%d')", write={ "mysql": "SELECT DATE_FORMAT('2017-06-15', '%Y-%m-%d')", "snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'yyyy-mm-DD')", "exasol": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'YYYY-MM-DD')", }, ) self.validate_all( "SELECT DATE_FORMAT('2017-06-15 22:23:34', '%H')", write={ "mysql": "SELECT DATE_FORMAT('2017-06-15 22:23:34', '%H')", "snowflake": "SELECT TO_CHAR(CAST('2017-06-15 22:23:34' AS TIMESTAMP), 'hh24')", }, ) self.validate_all( "SELECT DATE_FORMAT('2017-06-15', '%w')", write={ "mysql": "SELECT DATE_FORMAT('2017-06-15', '%w')", "snowflake": "SELECT TO_CHAR(CAST('2017-06-15' AS TIMESTAMP), 'dy')", }, ) self.validate_all( "SELECT DATE_FORMAT('2024-08-22 14:53:12', '%a')", write={ "mysql": "SELECT DATE_FORMAT('2024-08-22 14:53:12', '%a')", "snowflake": "SELECT TO_CHAR(CAST('2024-08-22 14:53:12' AS TIMESTAMP), 'DY')", }, ) self.validate_all( "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%a %M %Y')", write={ "mysql": "SELECT DATE_FORMAT('2009-10-04 22:23:00', '%a %M %Y')", "snowflake": "SELECT TO_CHAR(CAST('2009-10-04 22:23:00' AS TIMESTAMP), 'DY mmmm yyyy')", }, ) self.validate_all( "SELECT DATE_FORMAT('2007-10-04 22:23:00', '%H:%i:%s')", write={ "mysql": "SELECT DATE_FORMAT('2007-10-04 22:23:00', '%T')", "snowflake": "SELECT TO_CHAR(CAST('2007-10-04 22:23:00' AS TIMESTAMP), 'hh24:mi:ss')", "exasol": "SELECT TO_CHAR(CAST('2007-10-04 22:23:00' AS TIMESTAMP), 'HH:MI:SS')", }, ) self.validate_all( "SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %a %d %m %b')", write={ "mysql": "SELECT DATE_FORMAT('1900-10-04 22:23:00', '%d %y %a %d %m %b')", "snowflake": "SELECT TO_CHAR(CAST('1900-10-04 22:23:00' AS TIMESTAMP), 'DD yy DY DD mm mon')", }, ) def test_mysql_time(self): self.validate_identity("TIME_STR_TO_UNIX(x)", "UNIX_TIMESTAMP(x)") self.validate_identity("SELECT FROM_UNIXTIME(1711366265, '%Y %D %M')") self.validate_all( "SELECT TO_DAYS(x)", write={ "mysql": "SELECT (DATEDIFF(x, '0000-01-01') + 1)", "presto": "SELECT (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)", }, ) self.validate_all( "SELECT DATEDIFF(x, y)", read={ "exasol": "SELECT DAYS_BETWEEN(x, y)", "presto": "SELECT DATE_DIFF('DAY', y, x)", "redshift": "SELECT DATEDIFF(DAY, y, x)", }, write={ "exasol": "SELECT DAYS_BETWEEN(x, y)", "mysql": "SELECT DATEDIFF(x, y)", "presto": "SELECT DATE_DIFF('DAY', y, x)", "redshift": "SELECT DATEDIFF(DAY, y, x)", }, ) self.validate_all( "DAYOFYEAR(x)", write={ "mysql": "DAYOFYEAR(x)", "": "DAY_OF_YEAR(CAST(x AS DATE))", }, ) self.validate_all( "DAYOFMONTH(x)", write={"mysql": "DAYOFMONTH(x)", "": "DAY_OF_MONTH(CAST(x AS DATE))"}, ) self.validate_all( "DAYOFWEEK(x)", write={"mysql": "DAYOFWEEK(x)", "": "DAY_OF_WEEK(CAST(x AS DATE))"}, ) self.validate_all( "WEEKOFYEAR(x)", write={"mysql": "WEEKOFYEAR(x)", "": "WEEK_OF_YEAR(CAST(x AS DATE))"}, ) self.validate_all( "DAY(x)", write={"mysql": "DAY(x)", "": "DAY(CAST(x AS DATE))"}, ) self.validate_all( "WEEK(x)", write={"mysql": "WEEK(x)", "": "WEEK(CAST(x AS DATE))"}, ) self.validate_all( "YEAR(x)", write={"mysql": "YEAR(x)", "": "YEAR(CAST(x AS DATE))"}, ) self.validate_all( "DATE(x)", read={"": "TS_OR_DS_TO_DATE(x)"}, ) self.validate_all( "STR_TO_DATE(x, '%M')", read={"": "TS_OR_DS_TO_DATE(x, '%B')"}, ) self.validate_all( "STR_TO_DATE(x, '%Y-%m-%d')", write={"presto": "CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)"}, ) self.validate_all( "STR_TO_DATE(x, '%Y-%m-%dT%T')", write={"presto": "DATE_PARSE(x, '%Y-%m-%dT%T')"} ) self.validate_all( "SELECT FROM_UNIXTIME(col)", read={ "postgres": "SELECT TO_TIMESTAMP(col)", }, write={ "mysql": "SELECT FROM_UNIXTIME(col)", "postgres": "SELECT TO_TIMESTAMP(col)", "redshift": "SELECT (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')", }, ) # No timezone, make sure DATETIME captures the correct precision self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123456+00:00')", write_sql="SELECT CAST('2023-01-01 13:14:15.123456+00:00' AS DATETIME(6))", ) self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.123+00:00')", write_sql="SELECT CAST('2023-01-01 13:14:15.123+00:00' AS DATETIME(3))", ) self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15+00:00')", write_sql="SELECT CAST('2023-01-01 13:14:15+00:00' AS DATETIME)", ) # With timezone, make sure the TIMESTAMP constructor is used # also TIMESTAMP doesnt have the subsecond precision truncation issue that DATETIME does so we dont need to TIMESTAMP(6) self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')", write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')", ) self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15-08:00', 'America/Los_Angeles')", write_sql="SELECT TIMESTAMP('2023-01-01 13:14:15-08:00')", ) @unittest.skipUnless( sys.version_info >= (3, 11), "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds", ) def test_mysql_time_python311(self): self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12345+00:00')", write_sql="SELECT CAST('2023-01-01 13:14:15.12345+00:00' AS DATETIME(6))", ) self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1234+00:00')", write_sql="SELECT CAST('2023-01-01 13:14:15.1234+00:00' AS DATETIME(6))", ) self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.12+00:00')", write_sql="SELECT CAST('2023-01-01 13:14:15.12+00:00' AS DATETIME(3))", ) self.validate_identity( "SELECT TIME_STR_TO_TIME('2023-01-01 13:14:15.1+00:00')", write_sql="SELECT CAST('2023-01-01 13:14:15.1+00:00' AS DATETIME(3))", ) def test_mysql(self): for func in ("CHAR_LENGTH", "CHARACTER_LENGTH"): with self.subTest(f"Testing MySQL's {func}"): self.validate_all( f"SELECT {func}('foo')", write={ "duckdb": "SELECT LENGTH('foo')", "mysql": "SELECT CHAR_LENGTH('foo')", "postgres": "SELECT LENGTH('foo')", }, ) self.validate_all( "CURDATE()", write={ "mysql": "CURRENT_DATE", "postgres": "CURRENT_DATE", }, ) self.validate_all( "SELECT CONCAT('11', '22')", read={ "postgres": "SELECT '11' || '22'", }, write={ "mysql": "SELECT CONCAT('11', '22')", "postgres": "SELECT '11' || '22'", }, ) self.validate_all( "SELECT department, GROUP_CONCAT(name) AS employee_names FROM data GROUP BY department", read={ "postgres": "SELECT department, array_agg(name) AS employee_names FROM data GROUP BY department", }, ) self.validate_all( "SELECT UNIX_TIMESTAMP(CAST('2024-04-29 12:00:00' AS DATETIME))", read={ "mysql": "SELECT UNIX_TIMESTAMP(CAST('2024-04-29 12:00:00' AS DATETIME))", "postgres": "SELECT EXTRACT(epoch FROM TIMESTAMP '2024-04-29 12:00:00')", }, ) self.validate_all( "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]')", read={ "sqlite": "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]')", }, write={ "mysql": "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]')", "sqlite": "SELECT '[10, 20, [30, 40]]' -> '$[1]'", }, ) self.validate_all( "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]', '$[0]')", read={ "sqlite": "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]', '$[0]')", }, write={ "mysql": "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]', '$[0]')", "sqlite": "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[1]', '$[0]')", }, ) self.validate_all( "SELECT * FROM x LEFT JOIN y ON x.id = y.id UNION ALL SELECT * FROM x RIGHT JOIN y ON x.id = y.id WHERE NOT EXISTS(SELECT 1 FROM x WHERE x.id = y.id) ORDER BY 1 LIMIT 0", read={ "postgres": "SELECT * FROM x FULL JOIN y ON x.id = y.id ORDER BY 1 LIMIT 0", }, ) self.validate_all( # MySQL doesn't support FULL OUTER joins "SELECT * FROM t1 LEFT OUTER JOIN t2 ON t1.x = t2.x UNION ALL SELECT * FROM t1 RIGHT OUTER JOIN t2 ON t1.x = t2.x WHERE NOT EXISTS(SELECT 1 FROM t1 WHERE t1.x = t2.x)", read={ "postgres": "SELECT * FROM t1 FULL OUTER JOIN t2 ON t1.x = t2.x", }, ) self.validate_all( "SELECT * FROM t1 LEFT OUTER JOIN t2 USING (x) UNION ALL SELECT * FROM t1 RIGHT OUTER JOIN t2 USING (x) WHERE NOT EXISTS(SELECT 1 FROM t1 WHERE t1.x = t2.x)", read={ "postgres": "SELECT * FROM t1 FULL OUTER JOIN t2 USING (x) ", }, ) self.validate_all( "SELECT * FROM t1 LEFT OUTER JOIN t2 USING (x, y) UNION ALL SELECT * FROM t1 RIGHT OUTER JOIN t2 USING (x, y) WHERE NOT EXISTS(SELECT 1 FROM t1 WHERE t1.x = t2.x AND t1.y = t2.y)", read={ "postgres": "SELECT * FROM t1 FULL OUTER JOIN t2 USING (x, y) ", }, ) self.validate_all( "a XOR b", read={ "mysql": "a XOR b", "snowflake": "BOOLXOR(a, b)", }, write={ "duckdb": "(a AND (NOT b)) OR ((NOT a) AND b)", "mysql": "a XOR b", "postgres": "(a AND (NOT b)) OR ((NOT a) AND b)", "snowflake": "BOOLXOR(a, b)", "trino": "(a AND (NOT b)) OR ((NOT a) AND b)", }, ) self.validate_all( "SELECT * FROM test LIMIT 0 + 1, 0 + 1", write={ "mysql": "SELECT * FROM test LIMIT 1 OFFSET 1", "postgres": "SELECT * FROM test LIMIT 0 + 1 OFFSET 0 + 1", "presto": "SELECT * FROM test OFFSET 1 LIMIT 1", "snowflake": "SELECT * FROM test LIMIT 1 OFFSET 1", "trino": "SELECT * FROM test OFFSET 1 LIMIT 1", "bigquery": "SELECT * FROM test LIMIT 1 OFFSET 1", }, ) self.validate_all( "CAST(x AS TEXT)", write={ "mysql": "CAST(x AS CHAR)", "presto": "CAST(x AS VARCHAR)", "starrocks": "CAST(x AS STRING)", }, ) self.validate_all("CAST(x AS SIGNED)", write={"mysql": "CAST(x AS SIGNED)"}) self.validate_all("CAST(x AS SIGNED INTEGER)", write={"mysql": "CAST(x AS SIGNED)"}) self.validate_all("CAST(x AS UNSIGNED)", write={"mysql": "CAST(x AS UNSIGNED)"}) self.validate_all("CAST(x AS UNSIGNED INTEGER)", write={"mysql": "CAST(x AS UNSIGNED)"}) self.validate_all("TIME_STR_TO_TIME(x)", write={"mysql": "CAST(x AS DATETIME)"}) self.validate_all( """SELECT 17 MEMBER OF('[23, "abc", 17, "ab", 10]')""", write={ "": """SELECT JSON_ARRAY_CONTAINS(17, '[23, "abc", 17, "ab", 10]')""", "mysql": """SELECT 17 MEMBER OF('[23, "abc", 17, "ab", 10]')""", }, ) self.validate_all( "SELECT DATE_ADD('2023-06-23 12:00:00', INTERVAL 2 * 2 MONTH) FROM foo", write={ "mysql": "SELECT DATE_ADD('2023-06-23 12:00:00', INTERVAL (2 * 2) MONTH) FROM foo", }, ) self.validate_all( "SELECT * FROM t LOCK IN SHARE MODE", write={"mysql": "SELECT * FROM t FOR SHARE"} ) self.validate_all( "SELECT DATE(DATE_SUB(`dt`, INTERVAL DAYOFMONTH(`dt`) - 1 DAY)) AS __timestamp FROM tableT", write={ "mysql": "SELECT DATE(DATE_SUB(`dt`, INTERVAL (DAYOFMONTH(`dt`) - 1) DAY)) AS __timestamp FROM tableT", }, ) self.validate_identity("SELECT name FROM temp WHERE name = ? FOR UPDATE") self.validate_all( "SELECT a FROM tbl FOR UPDATE", write={ "": "SELECT a FROM tbl", "mysql": "SELECT a FROM tbl FOR UPDATE", "oracle": "SELECT a FROM tbl FOR UPDATE", "postgres": "SELECT a FROM tbl FOR UPDATE", "redshift": "SELECT a FROM tbl", "tsql": "SELECT a FROM tbl", }, ) self.validate_all( "SELECT a FROM tbl FOR SHARE", write={ "": "SELECT a FROM tbl", "mysql": "SELECT a FROM tbl FOR SHARE", "oracle": "SELECT a FROM tbl FOR SHARE", "postgres": "SELECT a FROM tbl FOR SHARE", "tsql": "SELECT a FROM tbl", }, ) self.validate_all( "GROUP_CONCAT(DISTINCT x ORDER BY y DESC)", write={ "mysql": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC SEPARATOR ',')", "sqlite": "GROUP_CONCAT(DISTINCT x)", "tsql": "STRING_AGG(x, ',') WITHIN GROUP (ORDER BY y DESC)", "databricks": "LISTAGG(DISTINCT x, ',') WITHIN GROUP (ORDER BY y DESC)", "postgres": "STRING_AGG(DISTINCT x, ',' ORDER BY y DESC NULLS LAST)", }, ) self.validate_all( "GROUP_CONCAT(x ORDER BY y SEPARATOR z)", write={ "mysql": "GROUP_CONCAT(x ORDER BY y SEPARATOR z)", "sqlite": "GROUP_CONCAT(x, z)", "tsql": "STRING_AGG(x, z) WITHIN GROUP (ORDER BY y)", "databricks": "LISTAGG(x, z) WITHIN GROUP (ORDER BY y)", "postgres": "STRING_AGG(x, z ORDER BY y NULLS FIRST)", }, ) self.validate_all( "GROUP_CONCAT(DISTINCT x ORDER BY y DESC SEPARATOR '')", write={ "mysql": "GROUP_CONCAT(DISTINCT x ORDER BY y DESC SEPARATOR '')", "sqlite": "GROUP_CONCAT(DISTINCT x, '')", "tsql": "STRING_AGG(x, '') WITHIN GROUP (ORDER BY y DESC)", "databricks": "LISTAGG(DISTINCT x, '') WITHIN GROUP (ORDER BY y DESC)", "postgres": "STRING_AGG(DISTINCT x, '' ORDER BY y DESC NULLS LAST)", }, ) self.validate_all( "GROUP_CONCAT(a, b, c SEPARATOR ',')", write={ "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR ',')", "sqlite": "GROUP_CONCAT(a || b || c, ',')", "tsql": "STRING_AGG(a + b + c, ',')", "postgres": "STRING_AGG(a || b || c, ',')", "databricks": "LISTAGG(CONCAT(a, b, c), ',')", "presto": "ARRAY_JOIN(ARRAY_AGG(CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR), CAST(c AS VARCHAR))), ',')", }, ) self.validate_all( "GROUP_CONCAT(a, b, c SEPARATOR '')", write={ "mysql": "GROUP_CONCAT(CONCAT(a, b, c) SEPARATOR '')", "sqlite": "GROUP_CONCAT(a || b || c, '')", "tsql": "STRING_AGG(a + b + c, '')", "databricks": "LISTAGG(CONCAT(a, b, c), '')", "postgres": "STRING_AGG(a || b || c, '')", }, ) self.validate_all( "GROUP_CONCAT(DISTINCT a, b, c SEPARATOR '')", write={ "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) SEPARATOR '')", "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')", "tsql": "STRING_AGG(a + b + c, '')", "databricks": "LISTAGG(DISTINCT CONCAT(a, b, c), '')", "postgres": "STRING_AGG(DISTINCT a || b || c, '')", }, ) self.validate_all( "GROUP_CONCAT(a, b, c ORDER BY d SEPARATOR '')", write={ "mysql": "GROUP_CONCAT(CONCAT(a, b, c) ORDER BY d SEPARATOR '')", "sqlite": "GROUP_CONCAT(a || b || c, '')", "tsql": "STRING_AGG(a + b + c, '') WITHIN GROUP (ORDER BY d)", "databricks": "LISTAGG(CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)", "postgres": "STRING_AGG(a || b || c, '' ORDER BY d NULLS FIRST)", }, ) self.validate_all( "GROUP_CONCAT(DISTINCT a, b, c ORDER BY d SEPARATOR '')", write={ "mysql": "GROUP_CONCAT(DISTINCT CONCAT(a, b, c) ORDER BY d SEPARATOR '')", "sqlite": "GROUP_CONCAT(DISTINCT a || b || c, '')", "tsql": "STRING_AGG(a + b + c, '') WITHIN GROUP (ORDER BY d)", "databricks": "LISTAGG(DISTINCT CONCAT(a, b, c), '') WITHIN GROUP (ORDER BY d)", "postgres": "STRING_AGG(DISTINCT a || b || c, '' ORDER BY d NULLS FIRST)", }, ) self.validate_identity( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" ) self.validate_identity( "CREATE TABLE z (a INT) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" ) self.validate_identity( "CREATE TABLE z (a INT DEFAULT NULL, PRIMARY KEY (a)) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='x'" ) self.validate_all( """ CREATE TABLE `t_customer_account` ( `id` int(11) NOT NULL AUTO_INCREMENT, `customer_id` int(11) DEFAULT NULL COMMENT '客户id', `bank` varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别', `account_no` varchar(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号', PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表' """, write={ "mysql": """CREATE TABLE `t_customer_account` ( `id` INT(11) NOT NULL AUTO_INCREMENT, `customer_id` INT(11) DEFAULT NULL COMMENT '客户id', `bank` VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '行别', `account_no` VARCHAR(100) COLLATE utf8_bin DEFAULT NULL COMMENT '账号', PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARACTER SET=utf8 COLLATE=utf8_bin COMMENT='客户账户表'""" }, pretty=True, ) def test_show_simple(self): for key, write_key in [ ("BINARY LOGS", "BINARY LOGS"), ("MASTER LOGS", "BINARY LOGS"), ("STORAGE ENGINES", "ENGINES"), ("ENGINES", "ENGINES"), ("EVENTS", "EVENTS"), ("MASTER STATUS", "MASTER STATUS"), ("PLUGINS", "PLUGINS"), ("PRIVILEGES", "PRIVILEGES"), ("PROFILES", "PROFILES"), ("REPLICAS", "REPLICAS"), ("SLAVE HOSTS", "REPLICAS"), ]: show = self.validate_identity(f"SHOW {key}", f"SHOW {write_key}") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, write_key) def test_show_events(self): for key in ["BINLOG", "RELAYLOG"]: show = self.validate_identity(f"SHOW {key} EVENTS") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, f"{key} EVENTS") show = self.validate_identity(f"SHOW {key} EVENTS IN 'log' FROM 1 LIMIT 2, 3") self.assertEqual(show.text("log"), "log") self.assertEqual(show.text("position"), "1") self.assertEqual(show.text("limit"), "3") self.assertEqual(show.text("offset"), "2") show = self.validate_identity(f"SHOW {key} EVENTS LIMIT 1") self.assertEqual(show.text("limit"), "1") self.assertIsNone(show.args.get("offset")) def test_show_like_or_where(self): for key, write_key in [ ("CHARSET", "CHARACTER SET"), ("CHARACTER SET", "CHARACTER SET"), ("COLLATION", "COLLATION"), ("DATABASES", "DATABASES"), ("SCHEMAS", "DATABASES"), ("FUNCTION STATUS", "FUNCTION STATUS"), ("PROCEDURE STATUS", "PROCEDURE STATUS"), ("GLOBAL STATUS", "GLOBAL STATUS"), ("SESSION STATUS", "STATUS"), ("STATUS", "STATUS"), ("GLOBAL VARIABLES", "GLOBAL VARIABLES"), ("SESSION VARIABLES", "VARIABLES"), ("VARIABLES", "VARIABLES"), ]: expected_name = write_key.strip("GLOBAL").strip() template = "SHOW {}" show = self.validate_identity(template.format(key), template.format(write_key)) self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, expected_name) template = "SHOW {} LIKE '%foo%'" show = self.validate_identity(template.format(key), template.format(write_key)) self.assertIsInstance(show, exp.Show) self.assertIsInstance(show.args["like"], exp.Literal) self.assertEqual(show.text("like"), "%foo%") template = "SHOW {} WHERE Column_name LIKE '%foo%'" show = self.validate_identity(template.format(key), template.format(write_key)) self.assertIsInstance(show, exp.Show) self.assertIsInstance(show.args["where"], exp.Where) self.assertEqual(show.args["where"].sql(), "WHERE Column_name LIKE '%foo%'") def test_show_columns(self): show = self.validate_identity("SHOW COLUMNS FROM tbl_name") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "COLUMNS") self.assertEqual(show.text("target"), "tbl_name") self.assertFalse(show.args["full"]) show = self.validate_identity("SHOW FULL COLUMNS FROM tbl_name FROM db_name LIKE '%foo%'") self.assertIsInstance(show, exp.Show) self.assertEqual(show.text("target"), "tbl_name") self.assertTrue(show.args["full"]) self.assertEqual(show.text("db"), "db_name") self.assertIsInstance(show.args["like"], exp.Literal) self.assertEqual(show.text("like"), "%foo%") def test_show_name(self): for key in [ "CREATE DATABASE", "CREATE EVENT", "CREATE FUNCTION", "CREATE PROCEDURE", "CREATE TABLE", "CREATE TRIGGER", "CREATE VIEW", "FUNCTION CODE", "PROCEDURE CODE", ]: show = self.validate_identity(f"SHOW {key} foo") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, key) self.assertEqual(show.text("target"), "foo") def test_show_grants(self): show = self.validate_identity("SHOW GRANTS FOR foo") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "GRANTS") self.assertEqual(show.text("target"), "foo") def test_show_engine(self): show = self.validate_identity("SHOW ENGINE foo STATUS") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "ENGINE") self.assertEqual(show.text("target"), "foo") self.assertFalse(show.args["mutex"]) show = self.validate_identity("SHOW ENGINE foo MUTEX") self.assertEqual(show.name, "ENGINE") self.assertEqual(show.text("target"), "foo") self.assertTrue(show.args["mutex"]) def test_show_errors(self): for key in ["ERRORS", "WARNINGS"]: show = self.validate_identity(f"SHOW {key}") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, key) show = self.validate_identity(f"SHOW {key} LIMIT 2, 3") self.assertEqual(show.text("limit"), "3") self.assertEqual(show.text("offset"), "2") def test_show_index(self): show = self.validate_identity("SHOW INDEX FROM foo") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "INDEX") self.assertEqual(show.text("target"), "foo") show = self.validate_identity("SHOW INDEX FROM foo FROM bar") self.assertEqual(show.text("db"), "bar") self.validate_all( "SHOW INDEX FROM bar.foo", write={"mysql": "SHOW INDEX FROM foo FROM bar"} ) def test_show_db_like_or_where_sql(self): for key in [ "OPEN TABLES", "TABLE STATUS", "TRIGGERS", ]: show = self.validate_identity(f"SHOW {key}") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, key) show = self.validate_identity(f"SHOW {key} FROM db_name") self.assertEqual(show.name, key) self.assertEqual(show.text("db"), "db_name") show = self.validate_identity(f"SHOW {key} LIKE '%foo%'") self.assertEqual(show.name, key) self.assertIsInstance(show.args["like"], exp.Literal) self.assertEqual(show.text("like"), "%foo%") show = self.validate_identity(f"SHOW {key} WHERE Column_name LIKE '%foo%'") self.assertEqual(show.name, key) self.assertIsInstance(show.args["where"], exp.Where) self.assertEqual(show.args["where"].sql(), "WHERE Column_name LIKE '%foo%'") def test_show_processlist(self): show = self.validate_identity("SHOW PROCESSLIST") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "PROCESSLIST") self.assertFalse(show.args["full"]) show = self.validate_identity("SHOW FULL PROCESSLIST") self.assertEqual(show.name, "PROCESSLIST") self.assertTrue(show.args["full"]) def test_show_profile(self): show = self.validate_identity("SHOW PROFILE") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "PROFILE") show = self.validate_identity("SHOW PROFILE BLOCK IO") self.assertEqual(show.args["types"][0].name, "BLOCK IO") show = self.validate_identity( "SHOW PROFILE BLOCK IO, PAGE FAULTS FOR QUERY 1 OFFSET 2 LIMIT 3" ) self.assertEqual(show.args["types"][0].name, "BLOCK IO") self.assertEqual(show.args["types"][1].name, "PAGE FAULTS") self.assertEqual(show.text("query"), "1") self.assertEqual(show.text("offset"), "2") self.assertEqual(show.text("limit"), "3") def test_show_replica_status(self): show = self.validate_identity("SHOW REPLICA STATUS") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "REPLICA STATUS") show = self.validate_identity("SHOW SLAVE STATUS", "SHOW REPLICA STATUS") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "REPLICA STATUS") show = self.validate_identity("SHOW REPLICA STATUS FOR CHANNEL channel_name") self.assertEqual(show.text("channel"), "channel_name") def test_show_tables(self): show = self.validate_identity("SHOW TABLES") self.assertIsInstance(show, exp.Show) self.assertEqual(show.name, "TABLES") show = self.validate_identity("SHOW FULL TABLES FROM db_name LIKE '%foo%'") self.assertTrue(show.args["full"]) self.assertEqual(show.text("db"), "db_name") self.assertIsInstance(show.args["like"], exp.Literal) self.assertEqual(show.text("like"), "%foo%") def test_set_variable(self): cmd = self.parse_one("SET SESSION x = 1") item = cmd.expressions[0] self.assertEqual(item.text("kind"), "SESSION") self.assertIsInstance(item.this, exp.EQ) self.assertEqual(item.this.left.name, "x") self.assertEqual(item.this.right.name, "1") cmd = self.parse_one("SET @@GLOBAL.x = @@GLOBAL.y") item = cmd.expressions[0] self.assertEqual(item.text("kind"), "") self.assertIsInstance(item.this, exp.EQ) self.assertIsInstance(item.this.left, exp.SessionParameter) self.assertIsInstance(item.this.right, exp.SessionParameter) cmd = self.parse_one("SET NAMES 'charset_name' COLLATE 'collation_name'") item = cmd.expressions[0] self.assertEqual(item.text("kind"), "NAMES") self.assertEqual(item.name, "charset_name") self.assertEqual(item.text("collate"), "collation_name") cmd = self.parse_one("SET CHARSET DEFAULT") item = cmd.expressions[0] self.assertEqual(item.text("kind"), "CHARACTER SET") self.assertEqual(item.this.name, "DEFAULT") cmd = self.parse_one("SET x = 1, y = 2") self.assertEqual(len(cmd.expressions), 2) def test_json_object(self): self.validate_identity("SELECT JSON_OBJECT('id', 87, 'name', 'carrot')") def test_is_null(self): self.validate_all( "SELECT ISNULL(x)", write={"": "SELECT (x IS NULL)", "mysql": "SELECT (x IS NULL)"} ) def test_monthname(self): self.validate_all( "MONTHNAME(x)", write={ "": "TIME_TO_STR(CAST(x AS DATE), '%B')", "mysql": "DATE_FORMAT(x, '%M')", }, ) def test_safe_div(self): self.validate_all( "a / b", write={ "bigquery": "a / NULLIF(b, 0)", "clickhouse": "a / b", "databricks": "a / NULLIF(b, 0)", "duckdb": "a / b", "hive": "a / b", "mysql": "a / b", "oracle": "a / NULLIF(b, 0)", "snowflake": "a / NULLIF(b, 0)", "spark": "a / b", "starrocks": "a / b", "drill": "CAST(a AS DOUBLE) / NULLIF(b, 0)", "postgres": "CAST(a AS DOUBLE PRECISION) / NULLIF(b, 0)", "presto": "CAST(a AS DOUBLE) / NULLIF(b, 0)", "redshift": "CAST(a AS DOUBLE PRECISION) / NULLIF(b, 0)", "sqlite": "CAST(a AS REAL) / b", "teradata": "CAST(a AS DOUBLE PRECISION) / NULLIF(b, 0)", "trino": "CAST(a AS DOUBLE) / NULLIF(b, 0)", "tsql": "CAST(a AS FLOAT) / NULLIF(b, 0)", }, ) def test_timestamp_trunc(self): hive_dialects = ("spark", "databricks") for dialect in ("postgres", "snowflake", *hive_dialects): for unit in ( "SECOND", "DAY", "MONTH", "YEAR", ): with self.subTest(f"MySQL -> {dialect} Timestamp Trunc with unit {unit}: "): cast = ( "TIMESTAMP('2001-02-16 20:38:40')" if dialect in hive_dialects else "CAST('2001-02-16 20:38:40' AS DATETIME)" ) self.validate_all( f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', {cast})) {unit})", read={ dialect: f"DATE_TRUNC({unit}, TIMESTAMP '2001-02-16 20:38:40')", }, write={ "mysql": f"DATE_ADD('0000-01-01 00:00:00', INTERVAL (TIMESTAMPDIFF({unit}, '0000-01-01 00:00:00', {cast})) {unit})", }, ) def test_at_time_zone(self): with self.assertLogs() as cm: # Check AT TIME ZONE doesnt discard the column name and also raises a warning self.validate_identity( "SELECT foo AT TIME ZONE 'UTC'", write_sql="SELECT foo", ) assert "AT TIME ZONE is not supported" in cm.output[0] def test_json_value(self): json_doc = """'{"item": "shoes", "price": "49.95"}'""" self.validate_identity(f"""SELECT JSON_VALUE({json_doc}, '$.price')""") self.validate_identity( f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2))""" ) for on_option in ("NULL", "ERROR", "DEFAULT 1"): self.validate_identity( f"""SELECT JSON_VALUE({json_doc}, '$.price' RETURNING DECIMAL(4, 2) {on_option} ON EMPTY {on_option} ON ERROR) AS price""" ) def test_grant(self): grant_cmds = [ "GRANT 'role1', 'role2' TO 'user1'@'localhost', 'user2'@'localhost'", "GRANT SELECT ON world.* TO 'role3'", "GRANT SELECT ON db2.invoice TO 'jeffrey'@'localhost'", "GRANT INSERT ON `d%`.* TO u", "GRANT ALL ON test.* TO ''@'localhost'", "GRANT SELECT (col1), INSERT (col1, col2) ON mydb.mytbl TO 'someuser'@'somehost'", "GRANT SELECT, INSERT, UPDATE ON *.* TO u2", ] for sql in grant_cmds: with self.subTest(f"Testing MySQL's GRANT command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) def test_revoke(self): revoke_cmds = [ "REVOKE 'role1', 'role2' FROM 'user1'@'localhost', 'user2'@'localhost'", "REVOKE SELECT ON world.* FROM 'role3'", "REVOKE SELECT ON db2.invoice FROM 'jeffrey'@'localhost'", "REVOKE INSERT ON `d%`.* FROM u", "REVOKE ALL ON test.* FROM ''@'localhost'", "REVOKE SELECT (col1), INSERT (col1, col2) ON mydb.mytbl FROM 'someuser'@'somehost'", "REVOKE SELECT, INSERT, UPDATE ON *.* FROM u2", ] for sql in revoke_cmds: with self.subTest(f"Testing MySQL's REVOKE command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) def test_explain(self): self.validate_identity( "EXPLAIN ANALYZE SELECT * FROM t", "DESCRIBE ANALYZE SELECT * FROM t" ) expression = self.parse_one("EXPLAIN ANALYZE SELECT * FROM t") self.assertIsInstance(expression, exp.Describe) self.assertEqual(expression.text("style"), "ANALYZE") for format in ("JSON", "TRADITIONAL", "TREE"): self.validate_identity(f"DESCRIBE FORMAT={format} UPDATE test SET test_col = 'abc'") def test_number_format(self): self.validate_all( "SELECT FORMAT(12332.123456, 4)", write={ "duckdb": "SELECT FORMAT('{:,.4f}', 12332.123456)", "mysql": "SELECT FORMAT(12332.123456, 4)", }, ) self.validate_all( "SELECT FORMAT(12332.1, 4)", write={ "duckdb": "SELECT FORMAT('{:,.4f}', 12332.1)", "mysql": "SELECT FORMAT(12332.1, 4)", }, ) self.validate_all( "SELECT FORMAT(12332.2, 0)", write={ "duckdb": "SELECT FORMAT('{:,.0f}', 12332.2)", "mysql": "SELECT FORMAT(12332.2, 0)", }, ) self.validate_all( "SELECT FORMAT(12332.2, 2, 'de_DE')", write={ "duckdb": UnsupportedError, "mysql": "SELECT FORMAT(12332.2, 2, 'de_DE')", }, ) def test_analyze(self): self.validate_identity("ANALYZE LOCAL TABLE tbl") self.validate_identity("ANALYZE NO_WRITE_TO_BINLOG TABLE tbl") self.validate_identity("ANALYZE tbl UPDATE HISTOGRAM ON col1") self.validate_identity("ANALYZE tbl UPDATE HISTOGRAM ON col1 USING DATA 'json_data'") self.validate_identity("ANALYZE tbl UPDATE HISTOGRAM ON col1 WITH 5 BUCKETS") self.validate_identity("ANALYZE tbl UPDATE HISTOGRAM ON col1 WITH 5 BUCKETS AUTO UPDATE") self.validate_identity("ANALYZE tbl UPDATE HISTOGRAM ON col1 WITH 5 BUCKETS MANUAL UPDATE") self.validate_identity("ANALYZE tbl DROP HISTOGRAM ON col1") def test_utc_time(self): self.validate_identity("UTC_TIME()").assert_is(exp.UtcTime) self.validate_identity("UTC_TIME(6)").assert_is(exp.UtcTime) self.validate_identity("UTC_TIMESTAMP()").assert_is(exp.UtcTimestamp) self.validate_identity("UTC_TIMESTAMP(6)").assert_is(exp.UtcTimestamp) def test_mod(self): self.validate_identity("x % y").assert_is(exp.Mod) self.validate_identity("x MOD y", "x % y").assert_is(exp.Mod) self.validate_identity("MOD(x, y)", "x % y").assert_is(exp.Mod) def test_numeric_trunc(self): # MySQL uses TRUNCATE for numeric truncation self.validate_identity("TRUNCATE(3.14159, 2)").assert_is(exp.Trunc) self.validate_identity("TRUNCATE(price, 0)").assert_is(exp.Trunc) # TRUNC alias normalizes to TRUNCATE in MySQL self.validate_identity("TRUNC(3.14159, 2)", "TRUNCATE(3.14159, 2)").assert_is(exp.Trunc) # Cross-dialect numeric truncation transpilation self.validate_all( "TRUNCATE(3.14159, 2)", write={ "mysql": "TRUNCATE(3.14159, 2)", "oracle": "TRUNC(3.14159, 2)", "postgres": "TRUNC(3.14159, 2)", "snowflake": "TRUNC(3.14159, 2)", "tsql": "ROUND(3.14159, 2, 1)", }, ) def test_valid_interval_units(self): for unit in ( "SECOND_MICROSECOND", "MINUTE_MICROSECOND", "MINUTE_SECOND", "HOUR_MICROSECOND", "HOUR_SECOND", "HOUR_MINUTE", "DAY_MICROSECOND", "DAY_SECOND", "DAY_MINUTE", "DAY_HOUR", "YEAR_MONTH", ): with self.subTest(f"Testing INTERVAL unit: {unit}"): self.validate_identity(f"DATE_ADD(base_date, INTERVAL day_interval {unit})") def test_create_trigger(self): """Test that MySQL CREATE TRIGGER statements fall back to Command parsing.""" self.validate_identity( "CREATE TRIGGER check_age BEFORE INSERT ON users FOR EACH ROW BEGIN SET NEW.created_at = NOW() END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER audit_update AFTER UPDATE ON accounts FOR EACH ROW BEGIN INSERT INTO audit_log (user_id, old_balance, new_balance, changed_at) VALUES (OLD.user_id, OLD.balance, NEW.balance, NOW()) END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER track_deletes BEFORE DELETE ON orders FOR EACH ROW BEGIN UPDATE statistics SET delete_count = delete_count + 1 WHERE table_name = 'orders' END", check_command_warning=True, ) ================================================ FILE: tests/dialects/test_oracle.py ================================================ from sqlglot import exp, UnsupportedError, ParseError, parse, parse_one from tests.dialects.test_dialect import Validator from sqlglot.optimizer.qualify import qualify class TestOracle(Validator): dialect = "oracle" def test_oracle(self): self.validate_identity("1 /* /* */", "1 /* / * */") self.validate_all( "SELECT CONNECT_BY_ROOT x y", write={ "": "SELECT CONNECT_BY_ROOT x AS y", "oracle": "SELECT CONNECT_BY_ROOT x AS y", }, ) self.parse_one("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol").assert_is(exp.Alter) self.validate_identity("XMLELEMENT(EVALNAME foo + bar)") self.validate_identity("SELECT BITMAP_BUCKET_NUMBER(32769)") self.validate_identity("SELECT BITMAP_CONSTRUCT_AGG(value)") self.validate_identity("DBMS_RANDOM.NORMAL") self.validate_identity("DBMS_RANDOM.VALUE(low, high)").assert_is(exp.Rand) self.validate_identity("DBMS_RANDOM.VALUE()").assert_is(exp.Rand) self.validate_identity("CAST(value AS NUMBER DEFAULT 0 ON CONVERSION ERROR)") self.validate_identity("SYSDATE") self.validate_identity("CREATE GLOBAL TEMPORARY TABLE t AS SELECT * FROM orders") self.validate_identity("CREATE PRIVATE TEMPORARY TABLE t AS SELECT * FROM orders") self.validate_identity("REGEXP_REPLACE('source', 'search')") self.validate_identity("TIMESTAMP(3) WITH TIME ZONE") self.validate_identity("SYSTIMESTAMP").assert_is(exp.Systimestamp) self.validate_identity("SELECT SYSTIMESTAMP AT TIME ZONE 'UTC'") self.validate_identity("CURRENT_TIMESTAMP(precision)") self.validate_identity("ALTER TABLE tbl_name DROP FOREIGN KEY fk_symbol") self.validate_identity("ALTER TABLE Payments ADD Stock NUMBER NOT NULL") self.validate_identity("SELECT x FROM t WHERE cond FOR UPDATE") self.validate_identity("SELECT JSON_OBJECT(k1: v1 FORMAT JSON, k2: v2 FORMAT JSON)") self.validate_identity("SELECT JSON_OBJECT('name': first_name || ' ' || last_name) FROM t") self.validate_identity("COALESCE(c1, c2, c3)") self.validate_identity("SELECT * FROM TABLE(foo)") self.validate_identity("SELECT a$x#b") self.validate_identity("SELECT :OBJECT") self.validate_identity("SELECT * FROM t FOR UPDATE") self.validate_identity("SELECT * FROM t FOR UPDATE WAIT 5") self.validate_identity("SELECT * FROM t FOR UPDATE NOWAIT") self.validate_identity("SELECT * FROM t FOR UPDATE SKIP LOCKED") self.validate_identity("SELECT * FROM t FOR UPDATE OF s.t.c, s.t.v") self.validate_identity("SELECT * FROM t FOR UPDATE OF s.t.c, s.t.v NOWAIT") self.validate_identity("SELECT * FROM t FOR UPDATE OF s.t.c, s.t.v SKIP LOCKED") self.validate_identity("SELECT STANDARD_HASH('hello')") self.validate_identity("SELECT STANDARD_HASH('hello', 'MD5')") self.validate_identity("SELECT * FROM table_name@dblink_name.database_link_domain") self.validate_identity("SELECT * FROM table_name SAMPLE (25) s") self.validate_identity("SELECT COUNT(*) * 10 FROM orders SAMPLE (10) SEED (1)") self.validate_identity("SELECT * FROM V$SESSION") self.validate_identity("SELECT TO_DATE('January 15, 1989, 11:00 A.M.')") self.validate_identity("SELECT INSTR(haystack, needle)") self.validate_identity( "SELECT (TIMESTAMP '2025-12-30 20:00:00' - TIMESTAMP '2025-12-29 14:30:00') DAY TO SECOND", "SELECT (TO_TIMESTAMP('2025-12-30 20:00:00', 'YYYY-MM-DD HH24:MI:SS.FF6') - TO_TIMESTAMP('2025-12-29 14:30:00', 'YYYY-MM-DD HH24:MI:SS.FF6')) DAY TO SECOND", ) self.validate_identity("SELECT (SYSTIMESTAMP - order_date) DAY(9) TO SECOND FROM orders") self.validate_identity("SELECT (SYSTIMESTAMP - order_date) DAY(9) TO SECOND(3) FROM orders") self.validate_identity( "SELECT * FROM consumer LEFT JOIN groceries ON consumer.groceries_id = consumer.id PIVOT(MAX(type_id) FOR consumer_type IN (1, 2, 3, 4))" ) self.validate_identity( "SELECT * FROM test UNPIVOT INCLUDE NULLS (value FOR Description IN (col AS 'PREFIX ' || CHR(38) || ' SUFFIX'))" ) self.validate_identity( "SELECT last_name, employee_id, manager_id, LEVEL FROM employees START WITH employee_id = 100 CONNECT BY PRIOR employee_id = manager_id ORDER SIBLINGS BY last_name" ) self.validate_identity( "ALTER TABLE Payments ADD (Stock NUMBER NOT NULL, dropid VARCHAR2(500) NOT NULL)" ) self.validate_identity( "SELECT JSON_ARRAYAGG(JSON_OBJECT('RNK': RNK, 'RATING_CODE': RATING_CODE, 'DATE_VALUE': DATE_VALUE, 'AGENT_ID': AGENT_ID RETURNING CLOB) RETURNING CLOB) AS JSON_DATA FROM tablename" ) self.validate_identity( "SELECT JSON_ARRAY(FOO() FORMAT JSON, BAR() NULL ON NULL RETURNING CLOB STRICT)" ) self.validate_identity( "SELECT JSON_ARRAYAGG(FOO() FORMAT JSON ORDER BY bar NULL ON NULL RETURNING CLOB STRICT)" ) self.validate_identity( "SELECT COUNT(1) INTO V_Temp FROM TABLE(CAST(somelist AS data_list)) WHERE col LIKE '%contact'" ) self.validate_identity( "SELECT * FROM t WHERE c LIKE (:v)", ) self.validate_identity( "SELECT department_id INTO v_department_id FROM departments FETCH FIRST 1 ROWS ONLY" ) self.validate_identity( "SELECT department_id BULK COLLECT INTO v_department_ids FROM departments" ) self.validate_identity( "SELECT department_id, department_name BULK COLLECT INTO v_department_ids, v_department_names FROM departments" ) self.validate_identity( "SELECT MIN(column_name) KEEP (DENSE_RANK FIRST ORDER BY column_name DESC) FROM table_name" ) self.validate_identity( 'XMLELEMENT("ImageID", image.id)', 'XMLELEMENT(NAME "ImageID", image.id)', ) self.validate_identity( "SELECT CAST('January 15, 1989, 11:00 A.M.' AS DATE DEFAULT NULL ON CONVERSION ERROR, 'Month dd, YYYY, HH:MI A.M.') FROM DUAL", "SELECT TO_DATE('January 15, 1989, 11:00 A.M.', 'Month dd, YYYY, HH12:MI A.M.') FROM DUAL", ) self.validate_identity( "SELECT TRUNC(SYSDATE)", "SELECT TRUNC(SYSDATE, 'DD')", ) self.validate_identity( """SELECT JSON_OBJECT(KEY 'key1' IS emp.column1, KEY 'key2' IS emp.column1) "emp_key" FROM emp""", """SELECT JSON_OBJECT('key1': emp.column1, 'key2': emp.column1) AS "emp_key" FROM emp""", ) self.validate_identity( "SELECT JSON_OBJECTAGG(KEY department_name VALUE department_id) FROM dep WHERE id <= 30", "SELECT JSON_OBJECTAGG(department_name: department_id) FROM dep WHERE id <= 30", ) self.validate_identity( "SELECT last_name, department_id, salary, MIN(salary) KEEP (DENSE_RANK FIRST ORDER BY commission_pct) " 'OVER (PARTITION BY department_id) AS "Worst", MAX(salary) KEEP (DENSE_RANK LAST ORDER BY commission_pct) ' 'OVER (PARTITION BY department_id) AS "Best" FROM employees ORDER BY department_id, salary, last_name' ) self.validate_identity( "SELECT UNIQUE col1, col2 FROM table", "SELECT DISTINCT col1, col2 FROM table", ) self.validate_identity( "SELECT * FROM T ORDER BY I OFFSET NVL(:variable1, 10) ROWS FETCH NEXT NVL(:variable2, 10) ROWS ONLY", ) self.validate_identity( "SELECT * FROM t SAMPLE (.25)", "SELECT * FROM t SAMPLE (0.25)", ) self.validate_identity("SELECT TO_CHAR(-100, 'L99', 'NL_CURRENCY = '' AusDollars '' ')") self.validate_identity( "SELECT * FROM t START WITH col CONNECT BY NOCYCLE PRIOR col1 = col2" ) self.validate_all( "SELECT DBMS_RANDOM.VALUE()", read={ "oracle": "SELECT DBMS_RANDOM.VALUE", "postgres": "SELECT RANDOM()", }, write={ "oracle": "SELECT DBMS_RANDOM.VALUE()", "postgres": "SELECT RANDOM()", }, ) self.validate_all( "SELECT TRIM('|' FROM '||Hello ||| world||')", write={ "clickhouse": "SELECT TRIM(BOTH '|' FROM '||Hello ||| world||')", "oracle": "SELECT TRIM('|' FROM '||Hello ||| world||')", }, ) self.validate_all( "SELECT department_id, department_name INTO v_department_id, v_department_name FROM departments FETCH FIRST 1 ROWS ONLY", write={ "oracle": "SELECT department_id, department_name INTO v_department_id, v_department_name FROM departments FETCH FIRST 1 ROWS ONLY", "postgres": UnsupportedError, "tsql": UnsupportedError, }, ) self.validate_all( "SELECT * FROM test WHERE MOD(col1, 4) = 3", read={ "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3", }, write={ "duckdb": "SELECT * FROM test WHERE col1 % 4 = 3", "oracle": "SELECT * FROM test WHERE MOD(col1, 4) = 3", }, ) self.validate_all( "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'YYYY/MM/DD') AND TO_DATE(f.C_EDATE, 'YYYY/MM/DD')", read={ "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'yyyy/mm/dd') AND TO_DATE(f.C_EDATE, 'yyyy/mm/dd')", }, write={ "oracle": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'YYYY/MM/DD') AND TO_DATE(f.C_EDATE, 'YYYY/MM/DD')", "postgres": "CURRENT_TIMESTAMP BETWEEN TO_DATE(f.C_SDATE, 'YYYY/MM/DD') AND TO_DATE(f.C_EDATE, 'YYYY/MM/DD')", }, ) self.validate_all( "TO_CHAR(x)", write={ "doris": "CAST(x AS STRING)", "oracle": "TO_CHAR(x)", }, ) self.validate_all( "TO_NUMBER(expr, fmt, nlsparam)", read={ "teradata": "TO_NUMBER(expr, fmt, nlsparam)", }, write={ "oracle": "TO_NUMBER(expr, fmt, nlsparam)", "teradata": "TO_NUMBER(expr, fmt, nlsparam)", }, ) self.validate_all( "TO_NUMBER(x)", write={ "bigquery": "CAST(x AS FLOAT64)", "doris": "CAST(x AS DOUBLE)", "drill": "CAST(x AS DOUBLE)", "duckdb": "CAST(x AS DOUBLE)", "hive": "CAST(x AS DOUBLE)", "mysql": "CAST(x AS DOUBLE)", "oracle": "TO_NUMBER(x)", "postgres": "CAST(x AS DOUBLE PRECISION)", "presto": "CAST(x AS DOUBLE)", "redshift": "CAST(x AS DOUBLE PRECISION)", "snowflake": "TO_NUMBER(x)", "spark": "CAST(x AS DOUBLE)", "spark2": "CAST(x AS DOUBLE)", "starrocks": "CAST(x AS DOUBLE)", "tableau": "CAST(x AS DOUBLE)", "teradata": "TO_NUMBER(x)", }, ) self.validate_all( "TO_NUMBER(x, fmt)", read={ "databricks": "TO_NUMBER(x, fmt)", "drill": "TO_NUMBER(x, fmt)", "postgres": "TO_NUMBER(x, fmt)", "snowflake": "TO_NUMBER(x, fmt)", "spark": "TO_NUMBER(x, fmt)", "redshift": "TO_NUMBER(x, fmt)", "teradata": "TO_NUMBER(x, fmt)", }, write={ "databricks": "TO_NUMBER(x, fmt)", "drill": "TO_NUMBER(x, fmt)", "oracle": "TO_NUMBER(x, fmt)", "postgres": "TO_NUMBER(x, fmt)", "snowflake": "TO_NUMBER(x, fmt)", "spark": "TO_NUMBER(x, fmt)", "redshift": "TO_NUMBER(x, fmt)", "teradata": "TO_NUMBER(x, fmt)", }, ) self.validate_all( "SELECT CAST(NULL AS VARCHAR2(2328 CHAR)) AS COL1", write={ "oracle": "SELECT CAST(NULL AS VARCHAR2(2328 CHAR)) AS COL1", "spark": "SELECT CAST(NULL AS VARCHAR(2328)) AS COL1", }, ) self.validate_all( "SELECT CAST(NULL AS VARCHAR2(2328 BYTE)) AS COL1", write={ "oracle": "SELECT CAST(NULL AS VARCHAR2(2328 BYTE)) AS COL1", "spark": "SELECT CAST(NULL AS VARCHAR(2328)) AS COL1", }, ) self.validate_all( "DATE '2022-01-01'", write={ "": "DATE_STR_TO_DATE('2022-01-01')", "mysql": "CAST('2022-01-01' AS DATE)", "oracle": "TO_DATE('2022-01-01', 'YYYY-MM-DD')", "postgres": "CAST('2022-01-01' AS DATE)", }, ) self.validate_all( "x::binary_double", write={ "oracle": "CAST(x AS DOUBLE PRECISION)", "": "CAST(x AS DOUBLE)", }, ) self.validate_all( "x::binary_float", write={ "oracle": "CAST(x AS FLOAT)", "": "CAST(x AS FLOAT)", }, ) self.validate_all( "CAST(x AS sch.udt)", read={ "postgres": "CAST(x AS sch.udt)", }, write={ "oracle": "CAST(x AS sch.udt)", "postgres": "CAST(x AS sch.udt)", }, ) self.validate_all( "SELECT TO_TIMESTAMP('2024-12-12 12:12:12.000000', 'YYYY-MM-DD HH24:MI:SS.FF6')", write={ "oracle": "SELECT TO_TIMESTAMP('2024-12-12 12:12:12.000000', 'YYYY-MM-DD HH24:MI:SS.FF6')", "duckdb": "SELECT STRPTIME('2024-12-12 12:12:12.000000', '%Y-%m-%d %H:%M:%S.%f')", }, ) self.validate_all( "SELECT TO_DATE('2024-12-12', 'YYYY-MM-DD')", write={ "oracle": "SELECT TO_DATE('2024-12-12', 'YYYY-MM-DD')", "duckdb": "SELECT CAST(STRPTIME('2024-12-12', '%Y-%m-%d') AS DATE)", }, ) self.validate_identity( """SELECT * FROM t ORDER BY a ASC NULLS LAST, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC NULLS FIRST""", """SELECT * FROM t ORDER BY a ASC, b ASC NULLS FIRST, c DESC NULLS LAST, d DESC""", ) self.validate_all( "NVL(NULL, 1)", write={ "oracle": "NVL(NULL, 1)", "": "COALESCE(NULL, 1)", "clickhouse": "COALESCE(NULL, 1)", }, ) self.validate_all( "TRIM(BOTH 'h' FROM 'Hello World')", write={ "oracle": "TRIM(BOTH 'h' FROM 'Hello World')", "clickhouse": "TRIM(BOTH 'h' FROM 'Hello World')", }, ) self.validate_identity( "SELECT /*+ ORDERED */* FROM tbl", "SELECT /*+ ORDERED */ * FROM tbl" ) self.validate_identity( "SELECT /* test */ /*+ ORDERED */* FROM tbl", "/* test */ SELECT /*+ ORDERED */ * FROM tbl", ) self.validate_identity( "SELECT /*+ ORDERED */*/* test */ FROM tbl", "SELECT /*+ ORDERED */ * /* test */ FROM tbl", ) self.validate_all( "SELECT * FROM t FETCH FIRST 10 ROWS ONLY", write={ "oracle": "SELECT * FROM t FETCH FIRST 10 ROWS ONLY", "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 0 ROWS FETCH FIRST 10 ROWS ONLY", }, ) self.validate_identity("CREATE OR REPLACE FORCE VIEW foo1.foo2") self.validate_identity("TO_TIMESTAMP('foo')") self.validate_identity( "SELECT TO_TIMESTAMP('05 Dec 2000 10:00 AM', 'DD Mon YYYY HH12:MI AM')" ) self.validate_identity( "SELECT TO_TIMESTAMP('05 Dec 2000 10:00 PM', 'DD Mon YYYY HH12:MI PM')" ) self.validate_identity( "SELECT TO_TIMESTAMP('05 Dec 2000 10:00 A.M.', 'DD Mon YYYY HH12:MI A.M.')" ) self.validate_identity( "SELECT TO_TIMESTAMP('05 Dec 2000 10:00 P.M.', 'DD Mon YYYY HH12:MI P.M.')" ) self.validate_identity( "SELECT CUME_DIST(15, 0.05) WITHIN GROUP (ORDER BY col1, col2) FROM t" ) self.validate_identity( "SELECT DENSE_RANK(15, 0.05) WITHIN GROUP (ORDER BY col1, col2) FROM t" ) self.validate_identity("SELECT RANK(15, 0.05) WITHIN GROUP (ORDER BY col1, col2) FROM t") self.validate_identity( "SELECT PERCENT_RANK(15, 0.05) WITHIN GROUP (ORDER BY col1, col2) FROM t" ) self.validate_identity("L2_DISTANCE(x, y)") self.validate_identity("BITMAP_OR_AGG(x)") def test_join_marker(self): self.validate_identity("SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y (+) = e2.y") self.validate_all( "SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y = e2.y (+)", write={"": UnsupportedError}, ) self.validate_all( "SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y = e2.y (+)", write={ "": "SELECT e1.x, e2.x FROM e AS e1, e AS e2 WHERE e1.y = e2.y", "oracle": "SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y = e2.y (+)", }, ) def test_hints(self): self.validate_identity("SELECT /*+ USE_NL(A B) */ A.COL_TEST FROM TABLE_A A, TABLE_B B") self.validate_identity( "SELECT /*+ INDEX(v.j jhist_employee_ix (employee_id start_date)) */ * FROM v" ) self.validate_identity( "SELECT /*+ USE_NL(A B C) */ A.COL_TEST FROM TABLE_A A, TABLE_B B, TABLE_C C" ) self.validate_identity( "SELECT /*+ NO_INDEX(employees emp_empid) */ employee_id FROM employees WHERE employee_id > 200" ) self.validate_identity( "SELECT /*+ NO_INDEX_FFS(items item_order_ix) */ order_id FROM order_items items" ) self.validate_identity( "SELECT /*+ LEADING(e j) */ * FROM employees e, departments d, job_history j WHERE e.department_id = d.department_id AND e.hire_date = j.start_date" ) self.validate_identity("INSERT /*+ APPEND */ INTO IAP_TBL (id, col1) VALUES (2, 'test2')") self.validate_identity("INSERT /*+ APPEND_VALUES */ INTO dest_table VALUES (i, 'Value')") self.validate_identity("INSERT /*+ APPEND(d) */ INTO dest d VALUES (i, 'Value')") self.validate_identity( "INSERT /*+ APPEND(d) */ INTO dest d (i, value) SELECT 1, 'value' FROM dual" ) self.validate_identity( "SELECT /*+ LEADING(departments employees) USE_NL(employees) */ * FROM employees JOIN departments ON employees.department_id = departments.department_id", """SELECT /*+ LEADING(departments employees) USE_NL(employees) */ * FROM employees JOIN departments ON employees.department_id = departments.department_id""", pretty=True, ) self.validate_identity( "SELECT /*+ USE_NL(bbbbbbbbbbbbbbbbbbbbbbbb) LEADING(aaaaaaaaaaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbbbbbbbb cccccccccccccccccccccccc dddddddddddddddddddddddd) INDEX(cccccccccccccccccccccccc) */ * FROM aaaaaaaaaaaaaaaaaaaaaaaa JOIN bbbbbbbbbbbbbbbbbbbbbbbb ON aaaaaaaaaaaaaaaaaaaaaaaa.id = bbbbbbbbbbbbbbbbbbbbbbbb.a_id JOIN cccccccccccccccccccccccc ON bbbbbbbbbbbbbbbbbbbbbbbb.id = cccccccccccccccccccccccc.b_id JOIN dddddddddddddddddddddddd ON cccccccccccccccccccccccc.id = dddddddddddddddddddddddd.c_id", ) self.validate_identity( "SELECT /*+ USE_NL(bbbbbbbbbbbbbbbbbbbbbbbb) LEADING(aaaaaaaaaaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbbbbbbbb cccccccccccccccccccccccc dddddddddddddddddddddddd) INDEX(cccccccccccccccccccccccc) */ * FROM aaaaaaaaaaaaaaaaaaaaaaaa JOIN bbbbbbbbbbbbbbbbbbbbbbbb ON aaaaaaaaaaaaaaaaaaaaaaaa.id = bbbbbbbbbbbbbbbbbbbbbbbb.a_id JOIN cccccccccccccccccccccccc ON bbbbbbbbbbbbbbbbbbbbbbbb.id = cccccccccccccccccccccccc.b_id JOIN dddddddddddddddddddddddd ON cccccccccccccccccccccccc.id = dddddddddddddddddddddddd.c_id", """SELECT /*+ USE_NL(bbbbbbbbbbbbbbbbbbbbbbbb) LEADING( aaaaaaaaaaaaaaaaaaaaaaaa bbbbbbbbbbbbbbbbbbbbbbbb cccccccccccccccccccccccc dddddddddddddddddddddddd ) INDEX(cccccccccccccccccccccccc) */ * FROM aaaaaaaaaaaaaaaaaaaaaaaa JOIN bbbbbbbbbbbbbbbbbbbbbbbb ON aaaaaaaaaaaaaaaaaaaaaaaa.id = bbbbbbbbbbbbbbbbbbbbbbbb.a_id JOIN cccccccccccccccccccccccc ON bbbbbbbbbbbbbbbbbbbbbbbb.id = cccccccccccccccccccccccc.b_id JOIN dddddddddddddddddddddddd ON cccccccccccccccccccccccc.id = dddddddddddddddddddddddd.c_id""", pretty=True, ) # Test that parsing error with keywords like select where etc falls back self.validate_identity( "SELECT /*+ LEADING(departments employees) USE_NL(employees) select where group by is order by */ * FROM employees JOIN departments ON employees.department_id = departments.department_id", """SELECT /*+ LEADING(departments employees) USE_NL(employees) select where group by is order by */ * FROM employees JOIN departments ON employees.department_id = departments.department_id""", pretty=True, ) # Test that parsing error with , inside hint function falls back self.validate_identity( "SELECT /*+ LEADING(departments, employees) */ * FROM employees JOIN departments ON employees.department_id = departments.department_id" ) # Test that parsing error with keyword inside hint function falls back self.validate_identity( "SELECT /*+ LEADING(departments select) */ * FROM employees JOIN departments ON employees.department_id = departments.department_id" ) def test_xml_table(self): self.validate_identity("XMLTABLE('x')") self.validate_identity("XMLTABLE('x' RETURNING SEQUENCE BY REF)") self.validate_identity("XMLTABLE('x' PASSING y)") self.validate_identity("XMLTABLE('x' PASSING y RETURNING SEQUENCE BY REF)") self.validate_identity( "XMLTABLE('x' RETURNING SEQUENCE BY REF COLUMNS a VARCHAR2, b FLOAT)" ) self.validate_identity( "SELECT x.* FROM example t, XMLTABLE(XMLNAMESPACES(DEFAULT 'http://example.com/default', 'http://example.com/ns1' AS \"ns1\"), '/root/data' PASSING t.xml COLUMNS id NUMBER PATH '@id', value VARCHAR2(100) PATH 'ns1:value/text()') x" ) self.validate_all( """SELECT warehouse_name warehouse, warehouse2."Water", warehouse2."Rail" FROM warehouses, XMLTABLE('/Warehouse' PASSING warehouses.warehouse_spec COLUMNS "Water" varchar2(6) PATH 'WaterAccess', "Rail" varchar2(6) PATH 'RailAccess') warehouse2""", write={ "oracle": """SELECT warehouse_name AS warehouse, warehouse2."Water", warehouse2."Rail" FROM warehouses, XMLTABLE( '/Warehouse' PASSING warehouses.warehouse_spec COLUMNS "Water" VARCHAR2(6) PATH 'WaterAccess', "Rail" VARCHAR2(6) PATH 'RailAccess' ) warehouse2""", }, pretty=True, ) self.validate_all( """SELECT table_name, column_name, data_default FROM xmltable('ROWSET/ROW' passing dbms_xmlgen.getxmltype('SELECT table_name, column_name, data_default FROM user_tab_columns') columns table_name VARCHAR2(128) PATH '*[1]' , column_name VARCHAR2(128) PATH '*[2]' , data_default VARCHAR2(2000) PATH '*[3]' );""", write={ "oracle": """SELECT table_name, column_name, data_default FROM XMLTABLE( 'ROWSET/ROW' PASSING dbms_xmlgen.getxmltype('SELECT table_name, column_name, data_default FROM user_tab_columns') COLUMNS table_name VARCHAR2(128) PATH '*[1]', column_name VARCHAR2(128) PATH '*[2]', data_default VARCHAR2(2000) PATH '*[3]' )""", }, pretty=True, ) def test_match_recognize(self): self.validate_identity( """SELECT * FROM sales_history MATCH_RECOGNIZE ( PARTITION BY product ORDER BY tstamp MEASURES STRT.tstamp AS start_tstamp, LAST(UP.tstamp) AS peak_tstamp, LAST(DOWN.tstamp) AS end_tstamp, MATCH_NUMBER() AS mno ONE ROW PER MATCH AFTER MATCH SKIP TO LAST DOWN PATTERN (STRT UP+ FLAT* DOWN+) DEFINE UP AS UP.units_sold > PREV(UP.units_sold), FLAT AS FLAT.units_sold = PREV(FLAT.units_sold), DOWN AS DOWN.units_sold < PREV(DOWN.units_sold) ) MR""", pretty=True, ) def test_json_table(self): self.validate_identity( "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS(foo PATH 'bar'))" ) self.validate_identity( "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS foo PATH 'bar')", "SELECT * FROM JSON_TABLE(foo FORMAT JSON, 'bla' ERROR ON ERROR NULL ON EMPTY COLUMNS(foo PATH 'bar'))", ) self.validate_identity( """SELECT CASE WHEN DBMS_LOB.GETLENGTH(info) < 32000 THEN DBMS_LOB.SUBSTR(info) END AS info_txt, info AS info_clob FROM schemaname.tablename ar INNER JOIN JSON_TABLE(:emps, '$[*]' COLUMNS(empno NUMBER PATH '$')) jt ON ar.empno = jt.empno""", pretty=True, ) self.validate_identity( """SELECT * FROM JSON_TABLE(res, '$.info[*]' COLUMNS( tempid NUMBER PATH '$.tempid', NESTED PATH '$.calid[*]' COLUMNS(last_dt PATH '$.last_dt ') )) src""", pretty=True, ) self.validate_identity("CONVERT('foo', 'dst')") self.validate_identity("CONVERT('foo', 'dst', 'src')") def test_connect_by(self): start = "START WITH last_name = 'King'" connect = "CONNECT BY PRIOR employee_id = manager_id AND LEVEL <= 4" body = """ SELECT last_name "Employee", LEVEL, SYS_CONNECT_BY_PATH(last_name, '/') "Path" FROM employees WHERE level <= 3 AND department_id = 80 """ pretty = """SELECT last_name AS "Employee", LEVEL, SYS_CONNECT_BY_PATH(last_name, '/') AS "Path" FROM employees WHERE level <= 3 AND department_id = 80 START WITH last_name = 'King' CONNECT BY PRIOR employee_id = manager_id AND LEVEL <= 4""" for query in (f"{body}{start}{connect}", f"{body}{connect}{start}"): self.validate_identity(query, pretty, pretty=True) def test_query_restrictions(self): for restriction in ("READ ONLY", "CHECK OPTION"): for constraint_name in (" CONSTRAINT name", ""): with self.subTest(f"Restriction: {restriction}"): self.validate_identity(f"SELECT * FROM tbl WITH {restriction}{constraint_name}") self.validate_identity( f"CREATE VIEW view AS SELECT * FROM tbl WITH {restriction}{constraint_name}" ) def test_multitable_inserts(self): self.maxDiff = None self.validate_identity( "INSERT ALL " "INTO dest_tab1 (id, description) VALUES (id, description) " "INTO dest_tab2 (id, description) VALUES (id, description) " "INTO dest_tab3 (id, description) VALUES (id, description) " "SELECT id, description FROM source_tab" ) self.validate_identity( "INSERT ALL " "INTO pivot_dest (id, day, val) VALUES (id, 'mon', mon_val) " "INTO pivot_dest (id, day, val) VALUES (id, 'tue', tue_val) " "INTO pivot_dest (id, day, val) VALUES (id, 'wed', wed_val) " "INTO pivot_dest (id, day, val) VALUES (id, 'thu', thu_val) " "INTO pivot_dest (id, day, val) VALUES (id, 'fri', fri_val) " "SELECT * " "FROM pivot_source" ) self.validate_identity( "INSERT ALL " "WHEN id <= 3 THEN " "INTO dest_tab1 (id, description) VALUES (id, description) " "WHEN id BETWEEN 4 AND 7 THEN " "INTO dest_tab2 (id, description) VALUES (id, description) " "WHEN id >= 8 THEN " "INTO dest_tab3 (id, description) VALUES (id, description) " "SELECT id, description " "FROM source_tab" ) self.validate_identity( "INSERT ALL " "WHEN id <= 3 THEN " "INTO dest_tab1 (id, description) VALUES (id, description) " "WHEN id BETWEEN 4 AND 7 THEN " "INTO dest_tab2 (id, description) VALUES (id, description) " "WHEN 1 = 1 THEN " "INTO dest_tab3 (id, description) VALUES (id, description) " "SELECT id, description " "FROM source_tab" ) self.validate_identity( "INSERT FIRST " "WHEN id <= 3 THEN " "INTO dest_tab1 (id, description) VALUES (id, description) " "WHEN id <= 5 THEN " "INTO dest_tab2 (id, description) VALUES (id, description) " "ELSE " "INTO dest_tab3 (id, description) VALUES (id, description) " "SELECT id, description " "FROM source_tab" ) self.validate_identity( "INSERT FIRST " "WHEN id <= 3 THEN " "INTO dest_tab1 (id, description) VALUES (id, description) " "ELSE " "INTO dest_tab2 (id, description) VALUES (id, description) " "INTO dest_tab3 (id, description) VALUES (id, description) " "SELECT id, description " "FROM source_tab" ) self.validate_identity( "/* COMMENT */ INSERT FIRST " "WHEN salary > 4000 THEN INTO emp2 " "WHEN salary > 5000 THEN INTO emp3 " "WHEN salary > 6000 THEN INTO emp4 " "SELECT salary FROM employees" ) def test_json_functions(self): for format_json in ("", " FORMAT JSON"): for on_cond in ( "", " TRUE ON ERROR", " NULL ON EMPTY", " DEFAULT 1 ON ERROR TRUE ON EMPTY", ): for passing in ("", " PASSING 'name1' AS \"var1\", 'name2' AS \"var2\""): with self.subTest("Testing JSON_EXISTS()"): self.validate_identity( f"SELECT * FROM t WHERE JSON_EXISTS(name{format_json}, '$[1].middle'{passing}{on_cond})" ) def test_grant(self): grant_cmds = [ "GRANT purchases_reader_role TO george, maria", "GRANT USAGE ON TYPE price TO finance_role", "GRANT USAGE ON DERBY AGGREGATE types.maxPrice TO sales_role", ] for sql in grant_cmds: with self.subTest(f"Testing Oracles's GRANT command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) self.validate_identity("GRANT SELECT ON TABLE t TO maria, harry") self.validate_identity("GRANT SELECT ON TABLE s.v TO PUBLIC") self.validate_identity("GRANT SELECT ON TABLE t TO purchases_reader_role") self.validate_identity("GRANT UPDATE, TRIGGER ON TABLE t TO anita, zhi") self.validate_identity("GRANT EXECUTE ON PROCEDURE p TO george") self.validate_identity("GRANT USAGE ON SEQUENCE order_id TO sales_role") def test_revoke(self): revoke_cmds = [ "REVOKE purchases_reader_role FROM george, maria", "REVOKE USAGE ON TYPE price FROM finance_role", "REVOKE USAGE ON DERBY AGGREGATE types.maxPrice FROM sales_role", ] for sql in revoke_cmds: with self.subTest(f"Testing Oracle's REVOKE command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) self.validate_identity("REVOKE SELECT ON TABLE t FROM maria, harry") self.validate_identity("REVOKE SELECT ON TABLE s.v FROM PUBLIC") self.validate_identity("REVOKE SELECT ON TABLE t FROM purchases_reader_role") self.validate_identity("REVOKE UPDATE, TRIGGER ON TABLE t FROM anita, zhi") self.validate_identity("REVOKE EXECUTE ON PROCEDURE p FROM george") self.validate_identity("REVOKE USAGE ON SEQUENCE order_id FROM sales_role") def test_datetrunc(self): self.validate_all( "TRUNC(SYSDATE, 'YEAR')", write={ "clickhouse": "DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())", "oracle": "TRUNC(SYSDATE, 'YEAR')", }, ) # Make sure units are not normalized e.g 'Q' -> 'QUARTER' and 'W' -> 'WEEK' # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ROUND-and-TRUNC-Date-Functions.html for unit in ( "'Q'", "'W'", ): self.validate_identity(f"TRUNC(x, {unit})") def test_trunc_type_inference(self): # Tests for build_trunc discrimination logic (shared across Oracle, Exasol, Snowflake) # 5 cases: temporal+?, ?+string, numeric+?, ?+int, ?+? # temporal + string: first arg typed as temporal self.parse_one("TRUNC(CAST(x AS DATE), 'MONTH')").assert_is(exp.DateTrunc) self.parse_one("TRUNC(SYSDATE, 'MONTH')").assert_is(exp.DateTrunc) # ? + string: untyped first arg, string second arg infers DateTrunc self.parse_one("TRUNC(col, 'MONTH')").assert_is(exp.DateTrunc) # numeric + int: first arg typed as numeric (literal infers type) self.validate_identity("TRUNC(3.14159, 2)").assert_is(exp.Trunc) # ? + int: untyped first arg, int second arg infers Trunc self.validate_identity("TRUNC(price, 0)").assert_is(exp.Trunc) # ? + ?: neither arg typed, fallback to Anonymous self.validate_identity("TRUNC(foo, bar)").assert_is(exp.Anonymous) def test_trunc(self): # Numeric truncation identity and transpilation self.validate_identity("TRUNC(3.14159)").assert_is(exp.Trunc) self.validate_all( "TRUNC(3.14159)", write={ "oracle": "TRUNC(3.14159)", "postgres": "TRUNC(3.14159)", "mysql": "TRUNCATE(3.14159)", "tsql": "ROUND(3.14159, 0, 1)", }, ) # Cross-dialect numeric truncation transpilation self.validate_all( "TRUNC(3.14159, 2)", read={ "mysql": "TRUNCATE(3.14159, 2)", "postgres": "TRUNC(3.14159, 2)", "snowflake": "TRUNC(3.14159, 2)", }, write={ "oracle": "TRUNC(3.14159, 2)", "postgres": "TRUNC(3.14159, 2)", "mysql": "TRUNCATE(3.14159, 2)", "tsql": "ROUND(3.14159, 2, 1)", "snowflake": "TRUNC(3.14159, 2)", "bigquery": "TRUNC(3.14159, 2)", "duckdb": "TRUNC(3.14159)", "presto": "TRUNCATE(3.14159, 2)", "clickhouse": "trunc(3.14159, 2)", "spark": "CAST(3.14159 AS BIGINT)", }, ) # Date truncation with various units for unit in ("DAY", "WEEK", "MONTH", "QUARTER", "YEAR"): with self.subTest(f"Date TRUNC with {unit}"): self.validate_all( f"TRUNC(CAST(x AS DATE), '{unit}')", write={ "oracle": f"TRUNC(CAST(x AS DATE), '{unit}')", "snowflake": f"DATE_TRUNC('{unit}', CAST(x AS DATE))", "postgres": f"DATE_TRUNC('{unit}', CAST(x AS DATE))", "bigquery": f"DATE_TRUNC(CAST(x AS DATE), {unit})", "duckdb": f"DATE_TRUNC('{unit}', CAST(x AS DATE))", "tsql": f"DATE_TRUNC('{unit}', CAST(x AS DATE))", "spark": f"TRUNC(CAST(x AS DATE), '{unit}')", }, ) # Timestamp truncation with various units for unit in ("HOUR", "MINUTE", "SECOND", "DAY", "MONTH", "YEAR"): with self.subTest(f"Timestamp TRUNC with {unit}"): self.validate_all( f"TRUNC(CAST(x AS TIMESTAMP), '{unit}')", write={ "oracle": f"TRUNC(CAST(x AS TIMESTAMP), '{unit}')", "snowflake": f"DATE_TRUNC('{unit}', CAST(x AS TIMESTAMP))", "postgres": f"DATE_TRUNC('{unit}', CAST(x AS TIMESTAMP))", "duckdb": f"DATE_TRUNC('{unit}', CAST(x AS TIMESTAMP))", "tsql": f"DATE_TRUNC('{unit}', CAST(x AS DATETIME2))", "spark": f"TRUNC(CAST(x AS TIMESTAMP), '{unit}')", }, ) def test_analyze(self): self.validate_identity("ANALYZE TABLE tbl") self.validate_identity("ANALYZE INDEX ndx") self.validate_identity("ANALYZE TABLE db.tbl PARTITION(foo = 'foo', bar = 'bar')") self.validate_identity("ANALYZE TABLE db.tbl SUBPARTITION(foo = 'foo', bar = 'bar')") self.validate_identity("ANALYZE INDEX db.ndx PARTITION(foo = 'foo', bar = 'bar')") self.validate_identity("ANALYZE INDEX db.ndx PARTITION(part1)") self.validate_identity("ANALYZE CLUSTER db.cluster") self.validate_identity("ANALYZE TABLE tbl VALIDATE REF UPDATE") self.validate_identity("ANALYZE LIST CHAINED ROWS") self.validate_identity("ANALYZE LIST CHAINED ROWS INTO tbl") self.validate_identity("ANALYZE DELETE STATISTICS") self.validate_identity("ANALYZE DELETE SYSTEM STATISTICS") self.validate_identity("ANALYZE VALIDATE REF UPDATE") self.validate_identity("ANALYZE VALIDATE REF UPDATE SET DANGLING TO NULL") self.validate_identity("ANALYZE VALIDATE STRUCTURE") self.validate_identity("ANALYZE VALIDATE STRUCTURE CASCADE FAST") self.validate_identity( "ANALYZE TABLE tbl VALIDATE STRUCTURE CASCADE COMPLETE ONLINE INTO db.tbl" ) self.validate_identity( "ANALYZE TABLE tbl VALIDATE STRUCTURE CASCADE COMPLETE OFFLINE INTO db.tbl" ) def test_prior(self): self.validate_identity( "SELECT id, PRIOR name AS parent_name, name FROM tree CONNECT BY NOCYCLE PRIOR id = parent_id" ) with self.assertRaises(ParseError): parse_one("PRIOR as foo", read="oracle") def test_utc_time(self): self.validate_identity("UTC_TIME()").assert_is(exp.UtcTime) self.validate_identity("UTC_TIME(6)").assert_is(exp.UtcTime) self.validate_identity("UTC_TIMESTAMP()").assert_is(exp.UtcTimestamp) self.validate_identity("UTC_TIMESTAMP(6)").assert_is(exp.UtcTimestamp) def test_merge_builder_alias(self): merge_stmt = exp.merge( "WHEN MATCHED THEN UPDATE SET my_table.col1 = source_table.col1", "WHEN NOT MATCHED THEN INSERT (my_table.id, my_table.col1) VALUES (source_table.id, source_table.col1)", into="my_table", using="(SELECT * FROM something) source_table", on="my_table.id = source_table.id", dialect="oracle", ) self.assertEqual( merge_stmt.sql("oracle"), "MERGE INTO my_table USING (SELECT * FROM something) source_table ON my_table.id = source_table.id WHEN MATCHED THEN UPDATE SET my_table.col1 = source_table.col1 WHEN NOT MATCHED THEN INSERT (my_table.id, my_table.col1) VALUES (source_table.id, source_table.col1)", ) def test_pseudocolumns(self): ast = self.validate_identity( "WITH t AS (SELECT 1 AS COL) SELECT col, ROWID FROM t WHERE ROWNUM = 1" ) self.assertIsNone(ast.find(exp.Pseudocolumn)) qualified = qualify(ast, dialect="oracle") self.assertIsNotNone(qualified.find(exp.Pseudocolumn)) self.assertEqual( qualified.sql(dialect="oracle"), 'WITH "T" AS (SELECT 1 AS "COL") SELECT "T"."COL" AS "COL", ROWID AS "ROWID" FROM "T" "T" WHERE ROWNUM = 1', ) def test_chr(self): self.validate_identity("SELECT CHR(187 USING NCHAR_CS)") self.validate_identity("SELECT CHR(187)") def test_full_procedure(self): sql = """ CREATE OR REPLACE PROCEDURE query_emp( p_id IN VARCHAR2, p_name OUT VARCHAR2, p_salary OUT NUMBER ) AS BEGIN SELECT last_name, salary INTO p_name, p_salary FROM employees WHERE employee_id = p_id; END; """ expected_sqls = [ "CREATE OR REPLACE PROCEDURE query_emp(p_id IN VARCHAR2, p_name OUT VARCHAR2, p_salary OUT NUMBER) AS BEGIN SELECT last_name, salary INTO p_name, p_salary FROM employees WHERE employee_id = p_id; END", ] for expr, expected_sql in zip(parse(sql, read="oracle"), expected_sqls): self.assertEqual(expr.sql(dialect="oracle"), expected_sql) sql = """ CREATE OR REPLACE PROCEDURE test_proc ( a NUMBER, b IN NUMBER, c IN OUT NUMBER, d OUT NUMBER ) AS BEGIN c := c + a + b; d := 42 + c; END; """ expected_sqls = [ "CREATE OR REPLACE PROCEDURE test_proc(a NUMBER, b IN NUMBER, c IN OUT NUMBER, d OUT NUMBER) AS BEGIN c := c + a + b; d := 42 + c; END", ] for expr, expected_sql in zip(parse(sql, read="oracle"), expected_sqls): self.assertEqual(expr.sql(dialect="oracle"), expected_sql) def test_create_trigger(self): """Test that Oracle CREATE TRIGGER statements fall back to Command parsing.""" self.validate_identity( "CREATE TRIGGER check_salary BEFORE INSERT ON employees FOR EACH ROW BEGIN :NEW.status := 'PENDING' END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER audit_trigger AFTER UPDATE ON accounts FOR EACH ROW BEGIN INSERT INTO audit_log (user_id, old_balance, new_balance, changed_at) VALUES (:OLD.id, :OLD.balance, :NEW.balance, SYSDATE) END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER view_insert INSTEAD OF INSERT ON employee_view FOR EACH ROW BEGIN INSERT INTO employees (id, name, dept_id) VALUES (:NEW.id, :NEW.name, :NEW.dept_id) END", check_command_warning=True, ) ================================================ FILE: tests/dialects/test_pipe_syntax.py ================================================ from tests.dialects.test_dialect import Validator class TestPipeSyntax(Validator): def test_select(self): self.validate_identity("FROM x", "SELECT * FROM x") self.validate_identity( "FROM x |> SELECT x1, x2", "WITH __tmp1 AS (SELECT x1, x2 FROM x) SELECT * FROM __tmp1" ) self.validate_identity( "FROM x |> SELECT x.x1, x.x2", "WITH __tmp1 AS (SELECT x.x1, x.x2 FROM x) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> SELECT x1 as c1, x2 as c2", "WITH __tmp1 AS (SELECT x1 AS c1, x2 AS c2 FROM x) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> SELECT x1 + 1 as x1_a, x2 - 1 as x2_a |> WHERE x1_a > 1", "WITH __tmp1 AS (SELECT x1 + 1 AS x1_a, x2 - 1 AS x2_a FROM x) SELECT * FROM __tmp1 WHERE x1_a > 1", ) self.validate_identity( "FROM x |> SELECT x1 + 1 as x1_a, x2 - 1 as x2_a |> WHERE x1_a > 1 |> SELECT x2_a", "WITH __tmp1 AS (SELECT x1 + 1 AS x1_a, x2 - 1 AS x2_a FROM x), __tmp2 AS (SELECT x2_a FROM __tmp1 WHERE x1_a > 1) SELECT * FROM __tmp2", ) self.validate_identity( "FROM x |> WHERE x1 > 0 OR x2 > 0 |> WHERE x3 > 1 AND x4 > 1 |> SELECT x1, x4", "WITH __tmp1 AS (SELECT x1, x4 FROM x WHERE (x1 > 0 OR x2 > 0) AND (x3 > 1 AND x4 > 1)) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> WHERE x1 > 1 |> WHERE x2 > 2 |> SELECT x1 as gt1, x2 as gt2", "WITH __tmp1 AS (SELECT x1 AS gt1, x2 AS gt2 FROM x WHERE x1 > 1 AND x2 > 2) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> WHERE x1 > 1 AND x2 > 2 |> SELECT x1 as gt1, x2 as gt2 |> SELECT gt1 * 2 + gt2 * 2 AS gt2_2", "WITH __tmp1 AS (SELECT x1 AS gt1, x2 AS gt2 FROM x WHERE x1 > 1 AND x2 > 2), __tmp2 AS (SELECT gt1 * 2 + gt2 * 2 AS gt2_2 FROM __tmp1) SELECT * FROM __tmp2", ) self.validate_identity( "SELECT 1 AS y, 2 AS x |> SELECT x, y", "WITH __tmp1 AS (SELECT x, y FROM (SELECT 1 AS y, 2 AS x)) SELECT * FROM __tmp1", ) self.validate_identity( "SELECT x1, x2, x3 FROM x |> AS a_x |> WHERE a_x.x1 > 0", "WITH a_x AS (SELECT x1, x2, x3 FROM x) SELECT * FROM a_x WHERE a_x.x1 > 0", ) self.validate_identity( "SELECT x,y FROM (SELECT 1 as x, 2 as y) |> SELECT x, y", "WITH __tmp1 AS (SELECT x, y FROM (SELECT 1 AS x, 2 AS y)) SELECT * FROM __tmp1", ) self.validate_identity( "SELECT 'foo1' AS item1, 2 AS item2 UNION ALL SELECT 'foo2' AS item1, 5 AS item2 |> EXTEND SUM(item2) OVER() AS item2_sum", "WITH __tmp1 AS (SELECT *, SUM(item2) OVER () AS item2_sum FROM (SELECT 'foo1' AS item1, 2 AS item2 UNION ALL SELECT 'foo2' AS item1, 5 AS item2)) SELECT * FROM __tmp1", ) self.validate_identity( "SELECT x, x1 FROM (FROM (SELECT 1 as x, 2 as x1) |> AGGREGATE SUM(x1) as xx GROUP BY x,x1) |> SELECT x", "WITH __tmp2 AS (SELECT x FROM (SELECT * FROM (WITH __tmp1 AS (SELECT SUM(x1) AS xx, x, x1 FROM (SELECT 1 AS x, 2 AS x1) GROUP BY x, x1) SELECT * FROM __tmp1))) SELECT * FROM __tmp2", ) self.validate_identity( "FROM (SELECT 1 as x1) AS x |> SELECT x.x1 |> UNION ALL (FROM (SELECT 1 AS c) |> SELECT c) |> SELECT x1", "SELECT * FROM (WITH __tmp1 AS (SELECT x.x1 FROM (SELECT 1 AS x1) AS x), __tmp3 AS (SELECT * FROM __tmp1), __tmp4 AS (SELECT * FROM __tmp3 UNION ALL SELECT * FROM (WITH __tmp2 AS (SELECT c FROM (SELECT 1 AS c)) SELECT * FROM __tmp2)), __tmp5 AS (SELECT x1 FROM __tmp4) SELECT * FROM __tmp5)", ) self.validate_identity( "FROM (SELECT x1 FROM (SELECT 1 as x1) |> SELECT x1) |> SELECT x1", "SELECT * FROM (WITH __tmp2 AS (SELECT x1 FROM ((WITH __tmp1 AS (SELECT x1 FROM (SELECT 1 AS x1)) SELECT * FROM __tmp1))) SELECT * FROM __tmp2)", ) self.validate_identity( "SELECT * FROM (FROM t2 |> SELECT id)", "SELECT * FROM (WITH __tmp1 AS (SELECT id FROM t2) SELECT * FROM __tmp1)", ) self.validate_identity( "SELECT * FROM t1 LEFT JOIN (FROM t2 |> SELECT id) ON TRUE", "SELECT * FROM t1 LEFT JOIN (WITH __tmp1 AS (SELECT id FROM t2) SELECT * FROM __tmp1) ON TRUE", ) def test_order_by(self): self.validate_identity("FROM x |> ORDER BY x1", "SELECT * FROM x ORDER BY x1") self.validate_identity( "FROM x |> ORDER BY x1 |> ORDER BY x2", "SELECT * FROM x ORDER BY x2" ) self.validate_identity( "FROM x |> ORDER BY x1 |> WHERE x1 > 0 OR x1 != 1 |> ORDER BY x2 |> WHERE x2 > 0 AND x2 != 1 |> SELECT x1, x2", "WITH __tmp1 AS (SELECT x1, x2 FROM x WHERE (x1 > 0 OR x1 <> 1) AND (x2 > 0 AND x2 <> 1) ORDER BY x2) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> ORDER BY x1 |> WHERE x1 > 0 |> SELECT x1", "WITH __tmp1 AS (SELECT x1 FROM x WHERE x1 > 0 ORDER BY x1) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> WHERE x1 > 0 |> SELECT x1 |> ORDER BY x1", "WITH __tmp1 AS (SELECT x1 FROM x WHERE x1 > 0) SELECT * FROM __tmp1 ORDER BY x1", ) self.validate_identity( "FROM x |> SELECT x1, x2, x3 |> ORDER BY x1 DESC NULLS FIRST, x2 ASC NULLS LAST, x3", "WITH __tmp1 AS (SELECT x1, x2, x3 FROM x) SELECT * FROM __tmp1 ORDER BY x1 DESC NULLS FIRST, x2 ASC NULLS LAST, x3", ) def test_limit(self): for option in ("LIMIT 1", "LIMIT 1 OFFSET 2"): with self.subTest(f"Testing pipe syntax LIMIT and OFFSET option: {option}"): self.validate_identity(f"FROM x |> {option}", f"SELECT * FROM x {option}") self.validate_identity(f"FROM x |> {option}", f"SELECT * FROM x {option}") self.validate_identity( f"FROM x |> {option} |> SELECT x1, x2 |> WHERE x1 > 0 |> WHERE x2 > 0 |> ORDER BY x1, x2", f"WITH __tmp1 AS (SELECT x1, x2 FROM x {option}) SELECT * FROM __tmp1 WHERE x1 > 0 AND x2 > 0 ORDER BY x1, x2", ) self.validate_identity( f"FROM x |> SELECT x1, x2 |> WHERE x1 > 0 |> WHERE x2 > 0 |> ORDER BY x1, x2 |> {option}", f"WITH __tmp1 AS (SELECT x1, x2 FROM x) SELECT * FROM __tmp1 WHERE x1 > 0 AND x2 > 0 ORDER BY x1, x2 {option}", ) self.validate_identity( "FROM x |> SELECT x1, x2 |> LIMIT 2 |> LIMIT 4", "WITH __tmp1 AS (SELECT x1, x2 FROM x) SELECT * FROM __tmp1 LIMIT 2", ) self.validate_identity( "FROM x |> SELECT x1, x2 |> LIMIT 2 OFFSET 2 |> LIMIT 4 OFFSET 2", "WITH __tmp1 AS (SELECT x1, x2 FROM x) SELECT * FROM __tmp1 LIMIT 2 OFFSET 4", ) def test_aggregate(self): self.validate_identity( "FROM x |> AGGREGATE SUM(x1), MAX(x2), MIN(x3)", "WITH __tmp1 AS (SELECT SUM(x1), MAX(x2), MIN(x3) FROM x) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> AGGREGATE SUM(x1) AS s_x1 |> SELECT s_x1", "WITH __tmp1 AS (SELECT SUM(x1) AS s_x1 FROM x), __tmp2 AS (SELECT s_x1 FROM __tmp1) SELECT * FROM __tmp2", ) self.validate_identity( "FROM x |> AGGREGATE SUM(x1), MAX(x2), MIN(x3) GROUP BY x4, x5", "WITH __tmp1 AS (SELECT SUM(x1), MAX(x2), MIN(x3), x4, x5 FROM x GROUP BY x4, x5) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> AGGREGATE SUM(x1), MAX(x2), MIN(x3) GROUP BY x4 AS a_x4, x5 AS a_x5", "WITH __tmp1 AS (SELECT SUM(x1), MAX(x2), MIN(x3), x4 AS a_x4, x5 AS a_x5 FROM x GROUP BY a_x4, a_x5) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> AGGREGATE SUM(x1) as s_x1 GROUP BY x1 |> SELECT s_x1, x1 as ss_x1", "WITH __tmp1 AS (SELECT SUM(x1) AS s_x1, x1 FROM x GROUP BY x1), __tmp2 AS (SELECT s_x1, x1 AS ss_x1 FROM __tmp1) SELECT * FROM __tmp2", ) self.validate_identity( "FROM x |> AGGREGATE SUM(x1) GROUP", "WITH __tmp1 AS (SELECT SUM(x1) AS GROUP FROM x) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> AGGREGATE SUM(x1) as s_x1 GROUP BY x2 as g_x2 |> WHERE s_x1 > 0", "WITH __tmp1 AS (SELECT SUM(x1) AS s_x1, x2 AS g_x2 FROM x GROUP BY g_x2) SELECT * FROM __tmp1 WHERE s_x1 > 0", ) for order_option in ("ASC", "DESC", "ASC NULLS LAST", "DESC NULLS FIRST"): with self.subTest(f"Testing pipe syntax AGGREGATE for order option: {order_option}"): self.validate_all( f"WITH __tmp1 AS (SELECT SUM(x1) AS x_s FROM x ORDER BY x_s {order_option}) SELECT * FROM __tmp1", read={ "bigquery": f"FROM x |> AGGREGATE SUM(x1) AS x_s {order_option}", }, ) self.validate_all( f"WITH __tmp1 AS (SELECT SUM(x1) AS x_s, x1 AS g_x1 FROM x GROUP BY g_x1 ORDER BY x_s {order_option}) SELECT * FROM __tmp1", read={ "bigquery": f"FROM x |> AGGREGATE SUM(x1) AS x_s {order_option} GROUP BY x1 AS g_x1", }, ) with self.subTest( f"Testing pipe syntax AGGREGATE with GROUP AND ORDER BY for order option: {order_option}" ): self.validate_all( f"WITH __tmp1 AS (SELECT SUM(x1) AS x_s, x1 AS g_x1 FROM x GROUP BY g_x1 ORDER BY g_x1 {order_option}), __tmp2 AS (SELECT g_x1, x_s FROM __tmp1) SELECT * FROM __tmp2", read={ "bigquery": f"FROM x |> AGGREGATE SUM(x1) AS x_s GROUP AND ORDER BY x1 AS g_x1 {order_option} |> SELECT g_x1, x_s", }, ) def test_set_operators(self): self.validate_identity( "FROM x |> SELECT x.x1 |> UNION ALL (SELECT 1 AS c)", "WITH __tmp1 AS (SELECT x.x1 FROM x), __tmp2 AS (SELECT * FROM __tmp1), __tmp3 AS (SELECT * FROM __tmp2 UNION ALL SELECT 1 AS c) SELECT * FROM __tmp3", ) for op_operator in ( "UNION ALL", "UNION DISTINCT", "INTERSECT DISTINCT", "EXCEPT DISTINCT", ): with self.subTest(f"Testing pipe syntax SET OPERATORS: {op_operator}"): self.validate_all( f"FROM x|> {op_operator} (SELECT y1 FROM y), (SELECT z1 FROM z)", write={ "bigquery": f"WITH __tmp1 AS (SELECT * FROM x), __tmp2 AS (SELECT * FROM __tmp1 {op_operator} SELECT y1 FROM y {op_operator} SELECT z1 FROM z) SELECT * FROM __tmp2" }, ) for op_prefix in ("LEFT OUTER", "FULL OUTER"): for op_operator in ( "UNION ALL", "UNION DISTINCT", "INTERSECT DISTINCT", "EXCEPT DISTINCT", ): with self.subTest(f"Testing pipe syntax SET OPERATORS: {op_prefix} {op_operator}"): self.validate_all( f"FROM x|> SELECT x1, x2 |> {op_prefix} {op_operator} BY NAME (SELECT y1, y2 FROM y), (SELECT z1, z2 FROM z)", write={ "bigquery": f"WITH __tmp1 AS (SELECT x1, x2 FROM x), __tmp2 AS (SELECT * FROM __tmp1), __tmp3 AS (SELECT * FROM __tmp2 {op_prefix} {op_operator} BY NAME SELECT y1, y2 FROM y {op_prefix} {op_operator} BY NAME SELECT z1, z2 FROM z) SELECT * FROM __tmp3", }, ) self.validate_identity( "FROM d.x |> SELECT x.x1 |> UNION (SELECT 2 AS a1) |> SELECT x1 |> UNION (SELECT 3 as a2) |> SELECT x1 |> WHERE x1 > 100", """WITH __tmp1 AS ( SELECT x.x1 FROM d.x ), __tmp2 AS ( SELECT * FROM __tmp1 ), __tmp3 AS ( SELECT * FROM __tmp2 UNION SELECT 2 AS a1 ), __tmp4 AS ( SELECT x1 FROM __tmp3 ), __tmp5 AS ( SELECT * FROM __tmp4 ), __tmp6 AS ( SELECT * FROM __tmp5 UNION SELECT 3 AS a2 ), __tmp7 AS ( SELECT x1 FROM __tmp6 ) SELECT * FROM __tmp7 WHERE x1 > 100""", pretty=True, ) self.validate_identity( "FROM c.x |> UNION ALL (SELECT 2 AS a1, '2' as a2) |> AGGREGATE AVG(x1) as m_x1 |> SELECT * |> UNION ALL (SELECT y1 FROM c.y) |> SELECT m_x1", """WITH __tmp1 AS ( SELECT * FROM c.x ), __tmp2 AS ( SELECT * FROM __tmp1 UNION ALL SELECT 2 AS a1, '2' AS a2 ), __tmp3 AS ( SELECT AVG(x1) AS m_x1 FROM __tmp2 ), __tmp4 AS ( SELECT * FROM __tmp3 ), __tmp5 AS ( SELECT * FROM __tmp4 ), __tmp6 AS ( SELECT * FROM __tmp5 UNION ALL SELECT y1 FROM c.y ), __tmp7 AS ( SELECT m_x1 FROM __tmp6 ) SELECT * FROM __tmp7""", pretty=True, ) self.validate_identity( "FROM c.x |> UNION ALL (SELECT 2 AS a1, '2' as a2) |> UNION ALL (SELECT y1 FROM c.y) |> WHERE x > 200", """WITH __tmp1 AS ( SELECT * FROM c.x ), __tmp2 AS ( SELECT * FROM __tmp1 UNION ALL SELECT 2 AS a1, '2' AS a2 ), __tmp3 AS ( SELECT * FROM __tmp2 ), __tmp4 AS ( SELECT * FROM __tmp3 UNION ALL SELECT y1 FROM c.y ) SELECT * FROM __tmp4 WHERE x > 200""", pretty=True, ) def test_join(self): self.validate_identity("FROM x |> CROSS JOIN y", "SELECT * FROM x CROSS JOIN y") for join_type in ( "JOIN", "INNER JOIN", "FULL JOIN", "FULL OUTER JOIN", "LEFT JOIN", "LEFT OUTER JOIN", "RIGHT JOIN", "RIGHT OUTER JOIN", ): with self.subTest(f"Testing pipe syntax no projecton with JOIN : {join_type}"): self.validate_identity( f"FROM x |> {join_type} y ON x.id = y.id", f"SELECT * FROM x {join_type} y ON x.id = y.id", ) with self.subTest(f"Testing pipe syntax projection with JOIN: {join_type}"): self.validate_identity( f"FROM x |> SELECT id |> {join_type} y ON x.id = y.id", f"WITH __tmp1 AS (SELECT id FROM x) SELECT * FROM __tmp1 {join_type} y ON x.id = y.id", ) with self.subTest(f"Testing pipe syntax complex queries with JOIN: {join_type}"): self.validate_identity( f"FROM x |> {join_type} y ON x.id = y.id |> SELECT x1 as a_x1, x2 |> UNION ALL (SELECT 1, 2) |> WHERE a_x1 > 0", f"""WITH __tmp1 AS ( SELECT x1 AS a_x1, x2 FROM x {join_type} y ON x.id = y.id ), __tmp2 AS ( SELECT * FROM __tmp1 ), __tmp3 AS ( SELECT * FROM __tmp2 UNION ALL SELECT 1, 2 ) SELECT * FROM __tmp3 WHERE a_x1 > 0""", pretty=True, ) def test_pivot_unpivot(self): self.validate_identity( "FROM x |> PIVOT(SUM(x1) FOR quarter IN ('foo1', 'foo2'))", "WITH __tmp1 AS (SELECT * FROM x PIVOT(SUM(x1) FOR quarter IN ('foo1', 'foo2'))) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> JOIN y on x.id = y.id |> PIVOT(SUM(x1) FOR quarter IN ('foo1', 'foo2'))", "WITH __tmp1 AS (SELECT * FROM x PIVOT(SUM(x1) FOR quarter IN ('foo1', 'foo2')) JOIN y ON x.id = y.id) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> UNPIVOT(col FOR item IN (foo1, foo2))", "WITH __tmp1 AS (SELECT * FROM x UNPIVOT(col FOR item IN (foo1, foo2))) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> JOIN y on x.id = y.id |> UNPIVOT(col FOR item IN (foo1, foo2))", "WITH __tmp1 AS (SELECT * FROM x UNPIVOT(col FOR item IN (foo1, foo2)) JOIN y ON x.id = y.id) SELECT * FROM __tmp1", ) def test_as(self): self.validate_identity( "FROM x |> AS a_x |> WHERE a_x.x1 > 0", "WITH a_x AS (SELECT * FROM x) SELECT * FROM a_x WHERE a_x.x1 > 0", ) self.validate_identity( "FROM x AS t |> AGGREGATE SUM(x1) AS s_x1 GROUP BY id, x2 |> AS t1 |> JOIN y AS t2 ON t1.id = t2.id |> SELECT t2.id, s_x1", "WITH __tmp1 AS (SELECT SUM(x1) AS s_x1, id, x2 FROM x AS t GROUP BY id, x2), t1 AS (SELECT * FROM __tmp1), __tmp2 AS (SELECT t2.id, s_x1 FROM t1 JOIN y AS t2 ON t1.id = t2.id) SELECT * FROM __tmp2", ) self.validate_identity( "FROM x |> JOIN y ON x.x1 = y.y1 |> AS a |> WHERE a.x2 > 1", "WITH a AS (SELECT * FROM x JOIN y ON x.x1 = y.y1) SELECT * FROM a WHERE a.x2 > 1", ) def test_extend(self): self.validate_identity( "FROM x |> EXTEND id IN (1, 2) AS is_1_2, id + 1 as a_id", "WITH __tmp1 AS (SELECT *, id IN (1, 2) AS is_1_2, id + 1 AS a_id FROM x) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> SELECT x.x1, x.x2 |> EXTEND x1 + 1 as x1_1, x2 + 1 as x2_1 |> WHERE x1_1 > 0 AND x2_1 > 0", "WITH __tmp1 AS (SELECT x.x1, x.x2 FROM x), __tmp2 AS (SELECT *, x1 + 1 AS x1_1, x2 + 1 AS x2_1 FROM __tmp1) SELECT * FROM __tmp2 WHERE x1_1 > 0 AND x2_1 > 0", ) self.validate_identity( "FROM (SELECT 'foo1' AS item1, 2 AS item2 UNION ALL SELECT 'foo2' AS item1, 5 AS item2) |> EXTEND SUM(item2) OVER() AS item2_sum", "SELECT * FROM (WITH __tmp1 AS (SELECT *, SUM(item2) OVER () AS item2_sum FROM (SELECT 'foo1' AS item1, 2 AS item2 UNION ALL SELECT 'foo2' AS item1, 5 AS item2)) SELECT * FROM __tmp1)", ) def test_tablesample(self): self.validate_identity( "FROM x |> TABLESAMPLE SYSTEM (1 PERCENT)", "SELECT * FROM x TABLESAMPLE SYSTEM (1 PERCENT)", ) self.validate_identity( "FROM x |> SELECT x.x1 |> TABLESAMPLE SYSTEM (1 PERCENT)", "WITH __tmp1 AS (SELECT x.x1 FROM x TABLESAMPLE SYSTEM (1 PERCENT)) SELECT * FROM __tmp1", ) self.validate_identity( "FROM x |> TABLESAMPLE SYSTEM (1 PERCENT) |> WHERE x.x1 > 0 |> SELECT x1, x2", "WITH __tmp1 AS (SELECT x1, x2 FROM x WHERE x.x1 > 0 TABLESAMPLE SYSTEM (1 PERCENT)) SELECT * FROM __tmp1", ) ================================================ FILE: tests/dialects/test_postgres.py ================================================ from sqlglot import ParseError, UnsupportedError, exp, transpile from sqlglot.helper import logger as helper_logger from tests.dialects.test_dialect import Validator class TestPostgres(Validator): maxDiff = None dialect = "postgres" def test_postgres(self): expr = self.parse_one("SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)") unnest = expr.args["joins"][0].this.this unnest.assert_is(exp.Unnest) alter_table_only = """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE NO ACTION ON UPDATE NO ACTION""" expr = self.parse_one(alter_table_only) self.assertIsInstance(expr, exp.Alter) self.assertEqual(expr.sql(dialect="postgres"), alter_table_only) sql = "ARRAY[x" + ",x" * 27 + "]" expected_sql = "ARRAY[\n x" + (",\n x" * 27) + "\n]" self.validate_identity(sql, expected_sql, pretty=True) self.validate_identity("SELECT GET_BIT(CAST(44 AS BIT(10)), 6)") self.validate_identity("SELECT * FROM t GROUP BY ROLLUP (a || '^' || b)") self.validate_identity("SELECT COSH(1.5)") self.validate_identity("SELECT EXP(1)") self.validate_identity( "SELECT MODE() WITHIN GROUP (ORDER BY status DESC) AS most_common FROM orders" ) self.validate_identity("SELECT ST_DISTANCE(gg1, gg2, FALSE) AS sphere_dist") self.validate_identity("SHA384(x)") self.validate_identity("1.x", "1. AS x") self.validate_identity("|/ x", "SQRT(x)") self.validate_identity("||/ x", "CBRT(x)") self.validate_identity("SELECT EXTRACT(QUARTER FROM CAST('2025-04-26' AS DATE))") self.validate_identity("SELECT DATE_TRUNC('QUARTER', CAST('2025-04-26' AS DATE))") self.validate_identity("STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')") self.validate_identity("SELECT x FROM t WHERE CAST($1 AS TEXT) = 'ok'") self.validate_identity("SELECT * FROM t TABLESAMPLE SYSTEM (50) REPEATABLE (55)") self.validate_identity("x @@ y") self.validate_identity("CAST(x AS MONEY)") self.validate_identity("CAST(x AS INT4RANGE)") self.validate_identity("CAST(x AS INT4MULTIRANGE)") self.validate_identity("CAST(x AS INT8RANGE)") self.validate_identity("CAST(x AS INT8MULTIRANGE)") self.validate_identity("CAST(x AS NUMRANGE)") self.validate_identity("CAST(x AS NUMMULTIRANGE)") self.validate_identity("CAST(x AS TSRANGE)") self.validate_identity("CAST(x AS TSMULTIRANGE)") self.validate_identity("CAST(x AS TSTZRANGE)") self.validate_identity("CAST(x AS TSTZMULTIRANGE)") self.validate_identity("CAST(x AS DATERANGE)") self.validate_identity("CAST(x AS DATEMULTIRANGE)") self.validate_identity("x$") self.validate_identity("LENGTH(x)") self.validate_identity("LENGTH(x, utf8)") self.validate_identity("CHAR_LENGTH(x)", "LENGTH(x)") self.validate_identity("CHARACTER_LENGTH(x)", "LENGTH(x)") self.validate_identity("SELECT ARRAY[1, 2, 3]") self.validate_identity("SELECT ARRAY(SELECT 1)") self.validate_identity("STRING_AGG(x, y)") self.validate_identity("STRING_AGG(x, ',' ORDER BY y)") self.validate_identity("STRING_AGG(x, ',' ORDER BY y DESC)") self.validate_identity("STRING_AGG(DISTINCT x, ',' ORDER BY y DESC)") self.validate_identity("SELECT CASE WHEN SUBSTRING('abcdefg') IN ('ab') THEN 1 ELSE 0 END") self.validate_identity("COMMENT ON TABLE mytable IS 'this'") self.validate_identity("COMMENT ON MATERIALIZED VIEW my_view IS 'this'") self.validate_identity("SELECT e'\\xDEADBEEF'") self.validate_identity("SELECT CAST(e'\\176' AS BYTEA)") self.validate_identity("SELECT * FROM x WHERE SUBSTRING('Thomas' FROM '...$') IN ('mas')") self.validate_identity("SELECT TRIM(' X' FROM ' XXX ')") self.validate_identity("SELECT TRIM(LEADING 'bla' FROM ' XXX ' COLLATE utf8_bin)") self.validate_identity("""SELECT * FROM JSON_TO_RECORDSET(z) AS y("rank" INT)""") self.validate_identity("SELECT ~x") self.validate_identity("x ~ 'y'") self.validate_identity("x ~* 'y'") self.validate_identity("SELECT * FROM r CROSS JOIN LATERAL UNNEST(ARRAY[1]) AS s(location)") self.validate_identity("CAST(1 AS DECIMAL) / CAST(2 AS DECIMAL) * -100") self.validate_identity("EXEC AS myfunc @id = 123", check_command_warning=True) self.validate_identity("SELECT CURRENT_SCHEMA") self.validate_identity("SELECT CURRENT_USER") self.validate_identity("SELECT CURRENT_ROLE") self.validate_identity("SELECT VERSION()") self.validate_identity("SELECT * FROM ONLY t1") self.validate_identity("SELECT INTERVAL '-1 MONTH'") self.validate_identity("SELECT INTERVAL '4.1 DAY'") self.validate_identity("SELECT INTERVAL '3.14159 HOUR'") self.validate_identity("SELECT INTERVAL '2.5 MONTH'") self.validate_identity("SELECT INTERVAL '-10.75 MINUTE'") self.validate_identity("SELECT INTERVAL '0.123456789 SECOND'") self.validate_identity( "SELECT SUM(x) OVER (PARTITION BY y ORDER BY interval ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - SUM(x) OVER (PARTITION BY y ORDER BY interval ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS total" ) self.validate_identity( "SELECT * FROM test_data, LATERAL JSONB_ARRAY_ELEMENTS(data) WITH ORDINALITY AS elem(value, ordinality)" ) self.validate_identity( "SELECT id, name FROM xml_data AS t, XMLTABLE('/root/user' PASSING t.xml COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()') AS x" ) self.validate_identity( "SELECT id, value FROM xml_content AS t, XMLTABLE(XMLNAMESPACES('http://example.com/ns1' AS ns1, 'http://example.com/ns2' AS ns2), '/root/data' PASSING t.xml COLUMNS id INT PATH '@ns1:id', value TEXT PATH 'ns2:value/text()') AS x" ) self.validate_identity( "SELECT * FROM t WHERE some_column >= CURRENT_DATE + INTERVAL '1 day 1 hour' AND some_another_column IS TRUE" ) self.validate_identity( """UPDATE "x" SET "y" = CAST('0 days 60.000000 seconds' AS INTERVAL) WHERE "x"."id" IN (2, 3)""" ) self.validate_identity( "WITH t1 AS MATERIALIZED (SELECT 1), t2 AS NOT MATERIALIZED (SELECT 2) SELECT * FROM t1, t2" ) self.validate_identity( """LAST_VALUE("col1") OVER (ORDER BY "col2" RANGE BETWEEN INTERVAL '1 DAY' PRECEDING AND '1 month' FOLLOWING)""" ) self.validate_identity( """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE CASCADE""" ) self.validate_identity( """ALTER TABLE ONLY "Album" ADD CONSTRAINT "FK_AlbumArtistId" FOREIGN KEY ("ArtistId") REFERENCES "Artist" ("ArtistId") ON DELETE RESTRICT""" ) self.validate_identity( "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY" ) self.validate_identity( "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY AS kv_json" ) self.validate_identity( "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY AS kv_json(a, b)" ) self.validate_identity( "SELECT SUM(x) OVER a, SUM(y) OVER b FROM c WINDOW a AS (PARTITION BY d), b AS (PARTITION BY e)" ) self.validate_identity( "SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1) IN ('ab') THEN 1 ELSE 0 END" ) self.validate_identity( "SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1 FOR 2) IN ('ab') THEN 1 ELSE 0 END" ) self.validate_identity( 'SELECT * FROM "x" WHERE SUBSTRING("x"."foo" FROM 1 FOR 2) IN (\'mas\')' ) self.validate_identity( "SELECT * FROM x WHERE SUBSTRING('Thomas' FROM '%#\"o_a#\"_' FOR '#') IN ('mas')" ) self.validate_identity( "SELECT SUBSTRING('bla' + 'foo' || 'bar' FROM 3 - 1 + 5 FOR 4 + SOME_FUNC(arg1, arg2))" ) self.validate_identity( "SELECT TO_TIMESTAMP(1284352323.5), TO_TIMESTAMP('05 Dec 2000', 'DD Mon YYYY')" ) self.validate_identity( "SELECT TO_TIMESTAMP('05 Dec 2000 10:00 AM', 'DD Mon YYYY HH:MI AM')" ) self.validate_identity( "SELECT TO_TIMESTAMP('05 Dec 2000 10:00 PM', 'DD Mon YYYY HH:MI PM')" ) self.validate_identity( "SELECT * FROM foo, LATERAL (SELECT * FROM bar WHERE bar.id = foo.bar_id) AS ss" ) self.validate_identity( "SELECT c.oid, n.nspname, c.relname " "FROM pg_catalog.pg_class AS c " "LEFT JOIN pg_catalog.pg_namespace AS n ON n.oid = c.relnamespace " "WHERE c.relname OPERATOR(pg_catalog.~) '^(courses)$' COLLATE pg_catalog.default AND " "pg_catalog.PG_TABLE_IS_VISIBLE(c.oid) " "ORDER BY 2, 3" ) self.validate_identity( "SELECT e'foo \\' bar'", "SELECT e'foo '' bar'", ) self.validate_identity("SELECT e'\\n'") self.validate_identity("SELECT e'\\t'") self.validate_identity( "SELECT e'update table_name set a = \\'foo\\' where 1 = 0' AS x FROM tab", "SELECT e'update table_name set a = ''foo'' where 1 = 0' AS x FROM tab", ) self.validate_identity( "select count() OVER(partition by a order by a range offset preceding exclude current row)", "SELECT COUNT() OVER (PARTITION BY a ORDER BY a range BETWEEN offset preceding AND CURRENT ROW EXCLUDE CURRENT ROW)", ) self.validate_identity( "x::JSON -> 'duration' ->> -1", "JSON_EXTRACT_PATH_TEXT(CAST(x AS JSON) -> 'duration', -1)", ).assert_is(exp.JSONExtractScalar).this.assert_is(exp.JSONExtract) self.validate_identity( "SELECT SUBSTRING('Thomas' FOR 3 FROM 2)", "SELECT SUBSTRING('Thomas' FROM 2 FOR 3)", ) self.validate_identity( "SELECT ARRAY[1, 2, 3] <@ ARRAY[1, 2]", "SELECT ARRAY[1, 2] @> ARRAY[1, 2, 3]", ) self.validate_identity( "SELECT DATE_PART('isodow'::varchar(6), current_date)", "SELECT EXTRACT(CAST('isodow' AS VARCHAR(6)) FROM CURRENT_DATE)", ) self.validate_identity( "END WORK AND NO CHAIN", "COMMIT AND NO CHAIN", ) self.validate_identity( "END AND CHAIN", "COMMIT AND CHAIN", ) self.validate_identity( """x ? 'x'""", "x ? 'x'", ) self.validate_identity( "SELECT $$a$$", "SELECT 'a'", ) self.validate_identity( "SELECT $$Dianne's horse$$", "SELECT 'Dianne''s horse'", ) self.validate_identity( "SELECT $$The price is $9.95$$ AS msg", "SELECT 'The price is $9.95' AS msg", ) self.validate_identity( "COMMENT ON TABLE mytable IS $$doc this$$", "COMMENT ON TABLE mytable IS 'doc this'" ) self.validate_identity( "UPDATE MYTABLE T1 SET T1.COL = 13", "UPDATE MYTABLE AS T1 SET T1.COL = 13", ) self.validate_identity( "x !~ 'y'", "NOT x ~ 'y'", ) self.validate_identity( "x !~* 'y'", "NOT x ~* 'y'", ) self.validate_identity( "x ~~ 'y'", "x LIKE 'y'", ) self.validate_identity( "x ~~* 'y'", "x ILIKE 'y'", ) self.validate_identity( "x !~~ 'y'", "NOT x LIKE 'y'", ) self.validate_identity( "x !~~* 'y'", "NOT x ILIKE 'y'", ) self.validate_identity( "'45 days'::interval day", "CAST('45 days' AS INTERVAL DAY)", ) self.validate_identity( "'x' 'y' 'z'", "CONCAT('x', 'y', 'z')", ) self.validate_identity( "x::cstring", "CAST(x AS CSTRING)", ) self.validate_identity( "x::oid", "CAST(x AS OID)", ) self.validate_identity( "x::regclass", "CAST(x AS REGCLASS)", ) self.validate_identity( "x::regcollation", "CAST(x AS REGCOLLATION)", ) self.validate_identity( "x::regconfig", "CAST(x AS REGCONFIG)", ) self.validate_identity( "x::regdictionary", "CAST(x AS REGDICTIONARY)", ) self.validate_identity( "x::regnamespace", "CAST(x AS REGNAMESPACE)", ) self.validate_identity( "x::regoper", "CAST(x AS REGOPER)", ) self.validate_identity( "x::regoperator", "CAST(x AS REGOPERATOR)", ) self.validate_identity( "x::regproc", "CAST(x AS REGPROC)", ) self.validate_identity( "x::regprocedure", "CAST(x AS REGPROCEDURE)", ) self.validate_identity( "x::regrole", "CAST(x AS REGROLE)", ) self.validate_identity( "x::regtype", "CAST(x AS REGTYPE)", ) self.validate_identity( "123::CHARACTER VARYING", "CAST(123 AS VARCHAR)", ) self.validate_identity( "TO_TIMESTAMP(123::DOUBLE PRECISION)", "TO_TIMESTAMP(CAST(123 AS DOUBLE PRECISION))", ) self.validate_identity( "SELECT to_timestamp(123)::time without time zone", "SELECT CAST(TO_TIMESTAMP(123) AS TIME)", ) self.validate_identity( "SELECT SUM(x) OVER (PARTITION BY a ORDER BY d ROWS 1 PRECEDING)", "SELECT SUM(x) OVER (PARTITION BY a ORDER BY d ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)", ) self.validate_identity( "SELECT SUBSTRING(2022::CHAR(4) || LPAD(3::CHAR(2), 2, '0') FROM 3 FOR 4)", "SELECT SUBSTRING(CAST(2022 AS CHAR(4)) || LPAD(CAST(3 AS CHAR(2)), 2, '0') FROM 3 FOR 4)", ) self.validate_identity( "SELECT m.name FROM manufacturers AS m LEFT JOIN LATERAL GET_PRODUCT_NAMES(m.id) pname ON TRUE WHERE pname IS NULL", "SELECT m.name FROM manufacturers AS m LEFT JOIN LATERAL GET_PRODUCT_NAMES(m.id) AS pname ON TRUE WHERE pname IS NULL", ) self.validate_identity( "SELECT p1.id, p2.id, v1, v2 FROM polygons AS p1, polygons AS p2, LATERAL VERTICES(p1.poly) v1, LATERAL VERTICES(p2.poly) v2 WHERE (v1 <-> v2) < 10 AND p1.id <> p2.id", "SELECT p1.id, p2.id, v1, v2 FROM polygons AS p1, polygons AS p2, LATERAL VERTICES(p1.poly) AS v1, LATERAL VERTICES(p2.poly) AS v2 WHERE (v1 <-> v2) < 10 AND p1.id <> p2.id", ) self.validate_identity( "SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE deleted NOTNULL", "SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE NOT deleted IS NULL", ) self.validate_identity( "SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE NOT deleted ISNULL", "SELECT id, email, CAST(deleted AS TEXT) FROM users WHERE NOT deleted IS NULL", ) self.validate_identity( """'{"x": {"y": 1}}'::json->'x'->'y'""", """CAST('{"x": {"y": 1}}' AS JSON) -> 'x' -> 'y'""", ) self.validate_identity( """'[1,2,3]'::json->>2""", "CAST('[1,2,3]' AS JSON) ->> 2", ) self.validate_identity( """'{"a":1,"b":2}'::json->>'b'""", """CAST('{"a":1,"b":2}' AS JSON) ->> 'b'""", ) self.validate_identity( """'{"a":[1,2,3],"b":[4,5,6]}'::json#>'{a,2}'""", """CAST('{"a":[1,2,3],"b":[4,5,6]}' AS JSON) #> '{a,2}'""", ) self.validate_identity( """'{"a":[1,2,3],"b":[4,5,6]}'::json#>>'{a,2}'""", """CAST('{"a":[1,2,3],"b":[4,5,6]}' AS JSON) #>> '{a,2}'""", ) self.validate_identity( "'[1,2,3]'::json->2", "CAST('[1,2,3]' AS JSON) -> 2", ) self.validate_identity( """SELECT JSON_ARRAY_ELEMENTS((foo->'sections')::JSON) AS sections""", """SELECT JSON_ARRAY_ELEMENTS(CAST((foo -> 'sections') AS JSON)) AS sections""", ) self.validate_identity( "MERGE INTO x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET x.a = y.b WHEN NOT MATCHED THEN INSERT (a, b) VALUES (y.a, y.b)", "MERGE INTO x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b WHEN NOT MATCHED THEN INSERT (a, b) VALUES (y.a, y.b)", ) self.validate_identity( "SELECT * FROM t1*", "SELECT * FROM t1", ) self.validate_identity( "SELECT SUBSTRING('afafa' for 1)", "SELECT SUBSTRING('afafa' FROM 1 FOR 1)", ) self.validate_identity( "CAST(x AS INT8)", "CAST(x AS BIGINT)", ) self.validate_identity( """ WITH json_data AS (SELECT '{"field_id": [1, 2, 3]}'::JSON AS data), field_ids AS (SELECT 'field_id' AS field_id) SELECT JSON_ARRAY_ELEMENTS(json_data.data -> field_ids.field_id) AS element FROM json_data, field_ids """, """WITH json_data AS ( SELECT CAST('{"field_id": [1, 2, 3]}' AS JSON) AS data ), field_ids AS ( SELECT 'field_id' AS field_id ) SELECT JSON_ARRAY_ELEMENTS(JSON_EXTRACT_PATH(json_data.data, field_ids.field_id)) AS element FROM json_data, field_ids""", pretty=True, ) self.validate_all( "x ? y", write={ "": "JSONB_CONTAINS(x, y)", "postgres": "x ? y", }, ) self.validate_all( "SELECT CURRENT_TIMESTAMP + INTERVAL '-3 MONTH'", read={ "mysql": "SELECT DATE_ADD(CURRENT_TIMESTAMP, INTERVAL -1 QUARTER)", "postgres": "SELECT CURRENT_TIMESTAMP + INTERVAL '-3 MONTH'", "tsql": "SELECT DATEADD(QUARTER, -1, GETDATE())", }, ) self.validate_all( "SELECT ARRAY[]::INT[] AS foo", write={ "postgres": "SELECT CAST(ARRAY[] AS INT[]) AS foo", "duckdb": "SELECT CAST([] AS INT[]) AS foo", }, ) self.validate_all( "STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')", read={ "doris": "SPLIT_BY_STRING('xx~^~yy~^~zz', '~^~', 'yy')", }, write={ "doris": "SPLIT_BY_STRING('xx~^~yy~^~zz', '~^~', 'yy')", "postgres": "STRING_TO_ARRAY('xx~^~yy~^~zz', '~^~', 'yy')", }, ) self.validate_all( "SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", read={ "duckdb": "SELECT [1, 2, 3] @> [1, 2]", }, write={ "duckdb": "SELECT [1, 2, 3] @> [1, 2]", "mysql": UnsupportedError, "postgres": "SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", }, ) self.validate_all( "SELECT REGEXP_REPLACE('mr .', '[^a-zA-Z]', '', 'g')", write={ "duckdb": "SELECT REGEXP_REPLACE('mr .', '[^a-zA-Z]', '', 'g')", "postgres": "SELECT REGEXP_REPLACE('mr .', '[^a-zA-Z]', '', 'g')", }, ) self.validate_all( "CREATE TABLE t (c INT)", read={ "mysql": "CREATE TABLE t (c INT COMMENT 'comment 1') COMMENT = 'comment 2'", }, ) self.validate_all( 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', write={ "bigquery": "SELECT * FROM `test_table` ORDER BY RAND() NULLS LAST LIMIT 5", "duckdb": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', "postgres": 'SELECT * FROM "test_table" ORDER BY RANDOM() LIMIT 5', "tsql": "SELECT TOP 5 * FROM [test_table] ORDER BY RAND()", }, ) self.validate_all( "SELECT (data -> 'en-US') AS acat FROM my_table", write={ "duckdb": """SELECT (data -> '$."en-US"') AS acat FROM my_table""", "postgres": "SELECT (data -> 'en-US') AS acat FROM my_table", }, ) self.validate_all( "SELECT (data ->> 'en-US') AS acat FROM my_table", write={ "duckdb": """SELECT (data ->> '$."en-US"') AS acat FROM my_table""", "postgres": "SELECT (data ->> 'en-US') AS acat FROM my_table", }, ) self.validate_all( "SELECT JSON_EXTRACT_PATH_TEXT(x, k1, k2, k3) FROM t", read={ "clickhouse": "SELECT JSONExtractString(x, k1, k2, k3) FROM t", "redshift": "SELECT JSON_EXTRACT_PATH_TEXT(x, k1, k2, k3) FROM t", }, write={ "clickhouse": "SELECT JSONExtractString(x, k1, k2, k3) FROM t", "postgres": "SELECT JSON_EXTRACT_PATH_TEXT(x, k1, k2, k3) FROM t", "redshift": "SELECT JSON_EXTRACT_PATH_TEXT(x, k1, k2, k3) FROM t", }, ) self.validate_all( "x #> 'y'", read={ "": "JSONB_EXTRACT(x, 'y')", }, write={ "": "JSONB_EXTRACT(x, 'y')", "postgres": "x #> 'y'", }, ) self.validate_all( "x #>> 'y'", read={ "": "JSONB_EXTRACT_SCALAR(x, 'y')", }, write={ "": "JSONB_EXTRACT_SCALAR(x, 'y')", "postgres": "x #>> 'y'", }, ) self.validate_all( "x -> 'y' -> 0 -> 'z'", write={ "": "JSON_EXTRACT(JSON_EXTRACT(JSON_EXTRACT(x, '$.y'), '$[0]'), '$.z')", "postgres": "x -> 'y' -> 0 -> 'z'", }, ) self.validate_all( """JSON_EXTRACT_PATH('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}','f4')""", write={ "": """JSON_EXTRACT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', '$.f4')""", "bigquery": """JSON_EXTRACT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', '$.f4')""", "duckdb": """'{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}' -> '$.f4'""", "mysql": """JSON_EXTRACT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', '$.f4')""", "postgres": """JSON_EXTRACT_PATH('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', 'f4')""", "presto": """JSON_EXTRACT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', '$.f4')""", "redshift": """JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', 'f4')""", "spark": """GET_JSON_OBJECT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', '$.f4')""", "sqlite": """'{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}' -> '$.f4'""", "tsql": """ISNULL(JSON_QUERY('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', '$.f4'), JSON_VALUE('{"f2":{"f3":1},"f4":{"f5":99,"f6":"foo"}}', '$.f4'))""", }, ) self.validate_all( """JSON_EXTRACT_PATH_TEXT('{"farm": ["a", "b", "c"]}', 'farm', '0')""", read={ "duckdb": """'{"farm": ["a", "b", "c"]}' ->> '$.farm[0]'""", "redshift": """JSON_EXTRACT_PATH_TEXT('{"farm": ["a", "b", "c"]}', 'farm', '0')""", }, write={ "duckdb": """'{"farm": ["a", "b", "c"]}' ->> '$.farm[0]'""", "postgres": """JSON_EXTRACT_PATH_TEXT('{"farm": ["a", "b", "c"]}', 'farm', '0')""", "redshift": """JSON_EXTRACT_PATH_TEXT('{"farm": ["a", "b", "c"]}', 'farm', '0')""", }, ) self.validate_all( "JSON_EXTRACT_PATH(x, 'x', 'y', 'z')", read={ "duckdb": "x -> '$.x.y.z'", "postgres": "JSON_EXTRACT_PATH(x, 'x', 'y', 'z')", }, write={ "duckdb": "x -> '$.x.y.z'", "redshift": "JSON_EXTRACT_PATH_TEXT(x, 'x', 'y', 'z')", }, ) self.validate_all( "SELECT * FROM t TABLESAMPLE SYSTEM (50)", write={ "postgres": "SELECT * FROM t TABLESAMPLE SYSTEM (50)", "redshift": UnsupportedError, }, ) self.validate_all( "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount)", write={ "databricks": "SELECT PERCENTILE_APPROX(amount, 0.5)", "postgres": "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY amount)", "presto": "SELECT APPROX_PERCENTILE(amount, 0.5)", "spark": "SELECT PERCENTILE_APPROX(amount, 0.5)", "trino": "SELECT APPROX_PERCENTILE(amount, 0.5)", }, ) self.validate_all( "e'x'", write={ "mysql": "x", }, ) self.validate_all( "SELECT DATE_PART('minute', timestamp '2023-01-04 04:05:06.789')", write={ "postgres": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", "redshift": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", "snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", }, ) self.validate_all( "SELECT DATE_PART('month', date '20220502')", write={ "postgres": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", "redshift": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", "snowflake": "SELECT DATE_PART(month, CAST('20220502' AS DATE))", }, ) self.validate_all( "SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')", write={ "postgres": "SELECT (CAST('2016-01-10' AS DATE), CAST('2016-02-01' AS DATE)) OVERLAPS (CAST('2016-01-20' AS DATE), CAST('2016-02-10' AS DATE))", "tsql": "SELECT (CAST('2016-01-10' AS DATE), CAST('2016-02-01' AS DATE)) OVERLAPS (CAST('2016-01-20' AS DATE), CAST('2016-02-10' AS DATE))", }, ) self.validate_all( "SELECT DATE_PART('epoch', CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", read={ "": "SELECT TIME_TO_UNIX(TIMESTAMP '2023-01-04 04:05:06.789')", }, ) self.validate_all( "x ^ y", write={ "": "POWER(x, y)", "postgres": "POWER(x, y)", }, ) self.validate_all( "x # y", write={ "": "x ^ y", "postgres": "x # y", }, ) self.validate_all( "SELECT GENERATE_SERIES(1, 5)", write={ "bigquery": UnsupportedError, "postgres": "SELECT GENERATE_SERIES(1, 5)", }, ) self.validate_all( "WITH dates AS (SELECT GENERATE_SERIES('2020-01-01'::DATE, '2024-01-01'::DATE, '1 day'::INTERVAL) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", write={ "duckdb": "WITH dates AS (SELECT UNNEST(GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL))) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", "postgres": "WITH dates AS (SELECT GENERATE_SERIES(CAST('2020-01-01' AS DATE), CAST('2024-01-01' AS DATE), CAST('1 day' AS INTERVAL)) AS date), date_table AS (SELECT DISTINCT DATE_TRUNC('MONTH', date) AS date FROM dates) SELECT * FROM date_table", }, ) self.validate_all( "GENERATE_SERIES(a, b, ' 2 days ')", write={ "postgres": "GENERATE_SERIES(a, b, INTERVAL '2 DAYS')", "presto": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))", "trino": "UNNEST(SEQUENCE(a, b, INTERVAL '2' DAY))", }, ) self.validate_all( "GENERATE_SERIES('2019-01-01'::TIMESTAMP, NOW(), '1day')", write={ "databricks": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", "hive": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", "postgres": "GENERATE_SERIES(CAST('2019-01-01' AS TIMESTAMP), CURRENT_TIMESTAMP, INTERVAL '1 DAY')", "presto": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))", "spark": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", "spark2": "EXPLODE(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP() AS TIMESTAMP), INTERVAL '1' DAY))", "trino": "UNNEST(SEQUENCE(CAST('2019-01-01' AS TIMESTAMP), CAST(CURRENT_TIMESTAMP AS TIMESTAMP), INTERVAL '1' DAY))", }, ) self.validate_all( "SELECT * FROM GENERATE_SERIES(a, b)", read={ "tsql": "SELECT * FROM GENERATE_SERIES(a, b)", }, write={ "databricks": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", "hive": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", "postgres": "SELECT * FROM GENERATE_SERIES(a, b)", "presto": "SELECT * FROM UNNEST(SEQUENCE(a, b))", "spark": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", "spark2": "SELECT * FROM EXPLODE(SEQUENCE(a, b))", "trino": "SELECT * FROM UNNEST(SEQUENCE(a, b))", "tsql": "SELECT * FROM GENERATE_SERIES(a, b)", }, ) self.validate_all( "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4)", write={ "postgres": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4)", "presto": "SELECT * FROM t CROSS JOIN UNNEST(SEQUENCE(2, 4))", "trino": "SELECT * FROM t CROSS JOIN UNNEST(SEQUENCE(2, 4))", "tsql": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4)", }, ) self.validate_all( "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", write={ "postgres": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", "presto": "SELECT * FROM t CROSS JOIN UNNEST(SEQUENCE(2, 4)) AS _u(s)", "trino": "SELECT * FROM t CROSS JOIN UNNEST(SEQUENCE(2, 4)) AS _u(s)", "tsql": "SELECT * FROM t CROSS JOIN GENERATE_SERIES(2, 4) AS s", }, ) self.validate_all( "SELECT * FROM x FETCH 1 ROW", write={ "postgres": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY", "presto": "SELECT * FROM x FETCH FIRST 1 ROWS ONLY", "hive": "SELECT * FROM x LIMIT 1", "spark": "SELECT * FROM x LIMIT 1", "sqlite": "SELECT * FROM x LIMIT 1", }, ) self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname ASC, lname", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST", }, ) self.validate_all( "SELECT CASE WHEN SUBSTRING('abcdefg' FROM 1 FOR 2) IN ('ab') THEN 1 ELSE 0 END", write={ "hive": "SELECT CASE WHEN SUBSTRING('abcdefg', 1, 2) IN ('ab') THEN 1 ELSE 0 END", "spark": "SELECT CASE WHEN SUBSTRING('abcdefg', 1, 2) IN ('ab') THEN 1 ELSE 0 END", }, ) self.validate_all( "SELECT * FROM x WHERE SUBSTRING(col1 FROM 3 + LENGTH(col1) - 10 FOR 10) IN (col2)", write={ "hive": "SELECT * FROM x WHERE SUBSTRING(col1, 3 + LENGTH(col1) - 10, 10) IN (col2)", "spark": "SELECT * FROM x WHERE SUBSTRING(col1, 3 + LENGTH(col1) - 10, 10) IN (col2)", }, ) self.validate_all( "SELECT TRIM(BOTH ' XXX ')", write={ "mysql": "SELECT TRIM(' XXX ')", "postgres": "SELECT TRIM(' XXX ')", "hive": "SELECT TRIM(' XXX ')", }, ) self.validate_all( "TRIM(LEADING FROM ' XXX ')", write={ "mysql": "LTRIM(' XXX ')", "postgres": "LTRIM(' XXX ')", "hive": "LTRIM(' XXX ')", "presto": "LTRIM(' XXX ')", }, ) self.validate_all( "TRIM(TRAILING FROM ' XXX ')", write={ "mysql": "RTRIM(' XXX ')", "postgres": "RTRIM(' XXX ')", "hive": "RTRIM(' XXX ')", "presto": "RTRIM(' XXX ')", }, ) self.validate_all( "TRIM(BOTH 'as' FROM 'as string as')", write={ "postgres": "TRIM(BOTH 'as' FROM 'as string as')", "spark": "TRIM(BOTH 'as' FROM 'as string as')", }, ) self.validate_identity( """SELECT TRIM(LEADING ' XXX ' COLLATE "de_DE")""", """SELECT LTRIM(' XXX ' COLLATE "de_DE")""", ) self.validate_identity( """SELECT TRIM(TRAILING ' XXX ' COLLATE "de_DE")""", """SELECT RTRIM(' XXX ' COLLATE "de_DE")""", ) self.validate_identity("LEVENSHTEIN(col1, col2)") self.validate_identity("LEVENSHTEIN_LESS_EQUAL(col1, col2, 1)") self.validate_identity("LEVENSHTEIN(col1, col2, 1, 2, 3)") self.validate_identity("LEVENSHTEIN_LESS_EQUAL(col1, col2, 1, 2, 3, 4)") self.validate_all( """'{"a":1,"b":2}'::json->'b'""", write={ "postgres": """CAST('{"a":1,"b":2}' AS JSON) -> 'b'""", "redshift": """JSON_EXTRACT_PATH_TEXT('{"a":1,"b":2}', 'b')""", }, ) self.validate_all( """merge into x as x using (select id) as y on a = b WHEN matched then update set X."A" = y.b""", write={ "postgres": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET "A" = y.b""", "trino": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET "A" = y.b""", "snowflake": """MERGE INTO x AS x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET X."A" = y.b""", }, ) self.validate_all( "merge into x as z using (select id) as y on a = b WHEN matched then update set X.a = y.b", write={ "postgres": "MERGE INTO x AS z USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b", "snowflake": "MERGE INTO x AS z USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET X.a = y.b", }, ) self.validate_all( "merge into x as z using (select id) as y on a = b WHEN matched then update set Z.a = y.b", write={ "postgres": "MERGE INTO x AS z USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b", "snowflake": "MERGE INTO x AS z USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET Z.a = y.b", }, ) self.validate_all( "merge into x using (select id) as y on a = b WHEN matched then update set x.a = y.b", write={ "postgres": "MERGE INTO x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b", "snowflake": "MERGE INTO x USING (SELECT id) AS y ON a = b WHEN MATCHED THEN UPDATE SET x.a = y.b", }, ) self.validate_all( "x / y ^ z", write={ "": "x / POWER(y, z)", "postgres": "x / POWER(y, z)", }, ) self.validate_all( "CAST(x AS NAME)", read={ "redshift": "CAST(x AS NAME)", }, write={ "postgres": "CAST(x AS NAME)", "redshift": "CAST(x AS NAME)", }, ) self.assertIsInstance(self.parse_one("id::UUID"), exp.Cast) self.validate_identity('1::"int"', "CAST(1 AS INT)").to.is_type(exp.DataType.Type.INT) self.validate_identity( '1::"udt"', 'CAST(1 AS "udt")' ).to.this == exp.DataType.Type.USERDEFINED self.validate_identity( "COPY tbl (col1, col2) FROM 'file' WITH (FORMAT format, HEADER MATCH, FREEZE TRUE)" ) self.validate_identity( "COPY tbl (col1, col2) TO 'file' WITH (FORMAT format, HEADER MATCH, FREEZE TRUE)" ) self.validate_identity( "COPY (SELECT * FROM t) TO 'file' WITH (FORMAT format, HEADER MATCH, FREEZE TRUE)" ) self.validate_identity("cast(a as FLOAT)", "CAST(a AS DOUBLE PRECISION)") self.validate_identity("cast(a as FLOAT8)", "CAST(a AS DOUBLE PRECISION)") self.validate_identity("cast(a as FLOAT4)", "CAST(a AS REAL)") self.validate_all( "1 / DIV(4, 2)", read={ "postgres": "1 / DIV(4, 2)", }, write={ "sqlite": "1 / CAST(CAST(CAST(4 AS REAL) / 2 AS INTEGER) AS REAL)", "duckdb": "1 / CAST(4 // 2 AS DECIMAL)", "bigquery": "1 / CAST(DIV(4, 2) AS NUMERIC)", }, ) self.validate_all( "CAST(DIV(4, 2) AS DECIMAL(5, 3))", read={ "duckdb": "CAST(4 // 2 AS DECIMAL(5, 3))", }, write={ "duckdb": "CAST(CAST(4 // 2 AS DECIMAL) AS DECIMAL(5, 3))", "postgres": "CAST(DIV(4, 2) AS DECIMAL(5, 3))", }, ) self.validate_all( "SELECT TO_DATE('01/01/2000', 'MM/DD/YYYY')", write={ "duckdb": "SELECT CAST(STRPTIME('01/01/2000', '%m/%d/%Y') AS DATE)", "postgres": "SELECT TO_DATE('01/01/2000', 'MM/DD/YYYY')", }, ) self.validate_identity( 'SELECT js, js IS JSON AS "json?", js IS JSON VALUE AS "scalar?", js IS JSON SCALAR AS "scalar?", js IS JSON OBJECT AS "object?", js IS JSON ARRAY AS "array?" FROM t' ) self.validate_identity( 'SELECT js, js IS JSON ARRAY WITH UNIQUE KEYS AS "array w. UK?", js IS JSON ARRAY WITHOUT UNIQUE KEYS AS "array w/o UK?", js IS JSON ARRAY UNIQUE KEYS AS "array w UK 2?" FROM t' ) self.validate_identity( "MERGE INTO target_table USING source_table AS source ON target.id = source.id WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN DO NOTHING RETURNING MERGE_ACTION(), *" ) self.validate_identity( "SELECT 1 FROM ((VALUES (1)) AS vals(id) LEFT OUTER JOIN tbl ON vals.id = tbl.id)" ) self.validate_identity("SELECT OVERLAY(a PLACING b FROM 1)") self.validate_identity("SELECT OVERLAY(a PLACING b FROM 1 FOR 1)") self.validate_identity("ARRAY[1, 2, 3] && ARRAY[1, 2]").assert_is(exp.ArrayOverlaps) self.validate_all( """SELECT JSONB_EXISTS('{"a": [1,2,3]}', 'a')""", write={ "postgres": """SELECT JSONB_EXISTS('{"a": [1,2,3]}', 'a')""", "duckdb": """SELECT JSON_EXISTS('{"a": [1,2,3]}', '$.a')""", }, ) self.validate_all( "WITH t AS (SELECT ARRAY[1, 2, 3] AS col) SELECT * FROM t WHERE 1 <= ANY(col) AND 2 = ANY(col)", write={ "postgres": "WITH t AS (SELECT ARRAY[1, 2, 3] AS col) SELECT * FROM t WHERE 1 <= ANY(col) AND 2 = ANY(col)", "hive": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)", "spark2": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)", "spark": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)", "databricks": "WITH t AS (SELECT ARRAY(1, 2, 3) AS col) SELECT * FROM t WHERE EXISTS(col, x -> 1 <= x) AND EXISTS(col, x -> 2 = x)", }, ) self.validate_identity( "/*+ some comment*/ SELECT b.foo, b.bar FROM baz AS b", "/* + some comment */ SELECT b.foo, b.bar FROM baz AS b", ) self.validate_identity( "SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY a) FILTER(WHERE CAST(b AS BOOLEAN)) AS mean_value FROM (VALUES (0, 't')) AS fake_data(a, b)" ) self.validate_all( "SELECT JSON_OBJECT_AGG(k, v) FROM t", write={ "postgres": "SELECT JSON_OBJECT_AGG(k, v) FROM t", "duckdb": "SELECT JSON_GROUP_OBJECT(k, v) FROM t", }, ) self.validate_all( "SELECT JSONB_OBJECT_AGG(k, v) FROM t", write={ "postgres": "SELECT JSONB_OBJECT_AGG(k, v) FROM t", "duckdb": "SELECT JSON_GROUP_OBJECT(k, v) FROM t", }, ) self.validate_all( "SELECT DATE_BIN('30 days', timestamp_col, (SELECT MIN(TIMESTAMP) from table)) FROM table", write={ "postgres": "SELECT DATE_BIN('30 days', timestamp_col, (SELECT MIN(TIMESTAMP) FROM table)) FROM table", "duckdb": 'SELECT TIME_BUCKET(\'30 days\', timestamp_col, (SELECT MIN(TIMESTAMP) FROM "table")) FROM "table"', }, ) # Postgres introduced ANY_VALUE in version 16 self.validate_all( "SELECT ANY_VALUE(1) AS col", write={ "postgres": "SELECT ANY_VALUE(1) AS col", "postgres, version=16": "SELECT ANY_VALUE(1) AS col", "postgres, version=17.5": "SELECT ANY_VALUE(1) AS col", "postgres, version=15": "SELECT MAX(1) AS col", "postgres, version=13.9": "SELECT MAX(1) AS col", }, ) self.validate_identity("SELECT * FROM foo WHERE id = %s") self.validate_identity("SELECT * FROM foo WHERE id = %(id_param)s") self.validate_identity("SELECT * FROM foo WHERE id = ?") self.validate_identity("a ?| b").assert_is(exp.JSONBContainsAnyTopKeys) self.validate_identity( """SELECT '{"a":1, "b":2, "c":3}'::jsonb ?| array['b', 'c']""", """SELECT CAST('{"a":1, "b":2, "c":3}' AS JSONB) ?| ARRAY['b', 'c']""", ) self.validate_identity("a ?& b").assert_is(exp.JSONBContainsAllTopKeys) self.validate_identity( """SELECT '["a", "b"]'::jsonb ?& array['a', 'b']""", """SELECT CAST('["a", "b"]' AS JSONB) ?& ARRAY['a', 'b']""", ) self.validate_identity("a #- b").assert_is(exp.JSONBDeleteAtPath) self.validate_identity( """SELECT '["a", {"b":1}]'::jsonb #- '{1,b}'""", """SELECT CAST('["a", {"b":1}]' AS JSONB) #- '{1,b}'""", ) self.validate_identity("SELECT JSON_AGG(DISTINCT name) FROM users") self.validate_identity( "SELECT JSON_AGG(c1 ORDER BY c1) FROM (VALUES ('c'), ('b'), ('a')) AS t(c1)" ) self.validate_identity( "SELECT JSON_AGG(DISTINCT c1 ORDER BY c1) FROM (VALUES ('c'), ('b'), ('a')) AS t(c1)" ) self.validate_all( "SELECT REGEXP_REPLACE('aaa', 'a', 'b')", read={ "postgres": "SELECT REGEXP_REPLACE('aaa', 'a', 'b')", "duckdb": "SELECT REGEXP_REPLACE('aaa', 'a', 'b')", }, write={ "duckdb": "SELECT REGEXP_REPLACE('aaa', 'a', 'b')", }, ) self.validate_all( "SELECT TO_CHAR(foo, bar)", read={ "redshift": "SELECT TO_CHAR(foo, bar)", }, write={ "postgres": "SELECT TO_CHAR(foo, bar)", "redshift": "SELECT TO_CHAR(foo, bar)", }, ) self.validate_all( "CREATE TABLE table1 (a INT, b INT, PRIMARY KEY (a))", read={ "sqlite": "CREATE TABLE table1 (a INT, b INT, PRIMARY KEY (a))", "postgres": "CREATE TABLE table1 (a INT, b INT, PRIMARY KEY (a))", }, ) self.validate_identity("SELECT NUMRANGE(1.1, 2.2) -|- NUMRANGE(2.2, 3.3)") self.validate_identity( "SELECT SLOPE(point '(4,4)', point '(0,0)')", "SELECT SLOPE(CAST('(4,4)' AS POINT), CAST('(0,0)' AS POINT))", ) width_bucket = self.validate_identity("WIDTH_BUCKET(10, ARRAY[5, 15])") self.assertIsNotNone(width_bucket.args.get("threshold")) width_bucket = self.validate_identity("WIDTH_BUCKET(10, 5, 15, 25)") self.assertIsNone(width_bucket.args.get("threshold")) self.validate_all( "UPDATE foo SET a = bar.a, b = bar.b FROM bar WHERE foo.id = bar.id", write={ "postgres": "UPDATE foo SET a = bar.a, b = bar.b FROM bar WHERE foo.id = bar.id", "doris": "UPDATE foo SET a = bar.a, b = bar.b FROM bar WHERE foo.id = bar.id", "starrocks": "UPDATE foo SET a = bar.a, b = bar.b FROM bar WHERE foo.id = bar.id", "mysql": "UPDATE foo JOIN bar ON TRUE SET foo.a = bar.a, foo.b = bar.b WHERE foo.id = bar.id", "singlestore": "UPDATE foo JOIN bar ON TRUE SET foo.a = bar.a, foo.b = bar.b WHERE foo.id = bar.id", }, ) self.validate_identity("SELECT MLEAST(VARIADIC ARRAY[10, -1, 5, 4.4])") self.validate_identity( "SELECT MLEAST(VARIADIC ARRAY[]::numeric[])", "SELECT MLEAST(VARIADIC CAST(ARRAY[] AS DECIMAL[]))", ) self.validate_identity( "SELECT * FROM schema_name.table_name st WHERE JSON_EXTRACT_PATH_TEXT((st.data)::json, variadic array['test'::text]) = 'test'::text", "SELECT * FROM schema_name.table_name AS st WHERE JSON_EXTRACT_PATH_TEXT(CAST((st.data) AS JSON), VARIADIC ARRAY[CAST('test' AS TEXT)]) = CAST('test' AS TEXT)", ) def test_ddl(self): # Checks that user-defined types are parsed into DataType instead of Identifier self.parse_one("CREATE TABLE t (a udt)").this.expressions[0].args["kind"].assert_is( exp.DataType ) # Checks that OID is parsed into a DataType (ObjectIdentifier) self.assertIsInstance( self.parse_one("CREATE TABLE p.t (c oid)").find(exp.DataType), exp.ObjectIdentifier ) expr = self.parse_one("CREATE TABLE t (x INTERVAL day)") cdef = expr.find(exp.ColumnDef) cdef.args["kind"].assert_is(exp.DataType) self.assertEqual(expr.sql(dialect="postgres"), "CREATE TABLE t (x INTERVAL DAY)") self.validate_identity('ALTER INDEX "IX_Ratings_Column1" RENAME TO "IX_Ratings_Column2"') self.validate_identity('CREATE TABLE x (a TEXT COLLATE "de_DE")') self.validate_identity('CREATE TABLE x (a TEXT COLLATE pg_catalog."default")') self.validate_identity("CREATE TABLE t (col INT[3][5])") self.validate_identity("CREATE TABLE t (col INT[3])") self.validate_identity("CREATE INDEX IF NOT EXISTS ON t(c)") self.validate_identity("CREATE INDEX et_vid_idx ON et(vid) INCLUDE (fid)") self.validate_identity("CREATE INDEX idx_x ON x USING BTREE(x, y) WHERE (NOT y IS NULL)") self.validate_identity("CREATE TABLE test (elems JSONB[])") self.validate_identity("CREATE TABLE public.y (x TSTZRANGE NOT NULL)") self.validate_identity("CREATE TABLE test (foo HSTORE)") self.validate_identity("CREATE TABLE test (foo JSONB)") self.validate_identity("CREATE TABLE test (foo VARCHAR(64)[])") self.validate_identity("CREATE TABLE test (foo INT) PARTITION BY HASH(foo)") self.validate_identity("INSERT INTO x VALUES (1, 'a', 2.0) RETURNING a") self.validate_identity("INSERT INTO x VALUES (1, 'a', 2.0) RETURNING a, b") self.validate_identity("INSERT INTO x VALUES (1, 'a', 2.0) RETURNING *") self.validate_identity("UPDATE tbl_name SET foo = 123 RETURNING a") self.validate_identity("CREATE TABLE cities_partdef PARTITION OF cities DEFAULT") self.validate_identity("CREATE TABLE t (c CHAR(2) UNIQUE NOT NULL) INHERITS (t1)") self.validate_identity("CREATE TABLE s.t (c CHAR(2) UNIQUE NOT NULL) INHERITS (s.t1, s.t2)") self.validate_identity("CREATE FUNCTION x(INT) RETURNS INT SET search_path = 'public'") self.validate_identity("TRUNCATE TABLE t1 CONTINUE IDENTITY") self.validate_identity("TRUNCATE TABLE t1 RESTART IDENTITY") self.validate_identity("TRUNCATE TABLE t1 CASCADE") self.validate_identity("TRUNCATE TABLE t1 RESTRICT") self.validate_identity("TRUNCATE TABLE t1 CONTINUE IDENTITY CASCADE") self.validate_identity("TRUNCATE TABLE t1 RESTART IDENTITY RESTRICT") self.validate_identity("ALTER TABLE t1 SET LOGGED") self.validate_identity("ALTER TABLE t1 SET UNLOGGED") self.validate_identity("ALTER TABLE t1 SET WITHOUT CLUSTER") self.validate_identity("ALTER TABLE t1 SET WITHOUT OIDS") self.validate_identity("ALTER TABLE t1 SET ACCESS METHOD method") self.validate_identity("ALTER TABLE t1 SET TABLESPACE tablespace") self.validate_identity("ALTER TABLE t1 SET (fillfactor = 5, autovacuum_enabled = TRUE)") self.validate_identity( "INSERT INTO book (isbn, title) VALUES ($1, $2) ON CONFLICT(isbn) WHERE deleted_at IS NULL DO UPDATE SET title = EXCLUDED.title RETURNING id, isbn" ) self.validate_identity( "INSERT INTO newtable AS t(a, b, c) VALUES (1, 2, 3) ON CONFLICT(c) DO UPDATE SET a = t.a + 1 WHERE t.a < 1" ) self.validate_identity( "ALTER TABLE tested_table ADD CONSTRAINT unique_example UNIQUE (column_name) NOT VALID" ) self.validate_identity( "CREATE FUNCTION pymax(a INT, b INT) RETURNS INT LANGUAGE plpython3u AS $$\n if a > b:\n return a\n return b\n$$", ) self.validate_identity( "CREATE TABLE t (vid INT NOT NULL, CONSTRAINT ht_vid_nid_fid_idx EXCLUDE (INT4RANGE(vid, nid) WITH &&, INT4RANGE(fid, fid, '[]') WITH &&))" ) self.validate_identity("CREATE TABLE t (i INT, a TEXT, PRIMARY KEY (i) INCLUDE (a))") self.validate_identity( "CREATE TABLE t (i INT, PRIMARY KEY (i), EXCLUDE USING gist(col varchar_pattern_ops DESC NULLS LAST WITH &&) WITH (sp1=1, sp2=2))" ) self.validate_identity( "CREATE TABLE t (i INT, EXCLUDE USING btree(INT4RANGE(vid, nid, '[]') ASC NULLS FIRST WITH &&) INCLUDE (col1, col2))" ) self.validate_identity( "CREATE TABLE t (i INT, EXCLUDE USING gin(col1 WITH &&, col2 WITH ||) USING INDEX TABLESPACE tablespace WHERE (id > 5))" ) self.validate_identity( "CREATE TABLE A (LIKE B INCLUDING CONSTRAINT INCLUDING COMPRESSION EXCLUDING COMMENTS)" ) self.validate_identity( "CREATE TABLE cust_part3 PARTITION OF customers FOR VALUES WITH (MODULUS 3, REMAINDER 2)" ) self.validate_identity( "CREATE TABLE measurement_y2016m07 PARTITION OF measurement (unitsales DEFAULT 0) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01')" ) self.validate_identity( "CREATE TABLE measurement_ym_older PARTITION OF measurement_year_month FOR VALUES FROM (MINVALUE, MINVALUE) TO (2016, 11)" ) self.validate_identity( "CREATE TABLE measurement_ym_y2016m11 PARTITION OF measurement_year_month FOR VALUES FROM (2016, 11) TO (2016, 12)" ) self.validate_identity( "CREATE TABLE cities_ab PARTITION OF cities (CONSTRAINT city_id_nonzero CHECK (city_id <> 0)) FOR VALUES IN ('a', 'b')" ) self.validate_identity( "CREATE TABLE cities_ab PARTITION OF cities (CONSTRAINT city_id_nonzero CHECK (city_id <> 0)) FOR VALUES IN ('a', 'b') PARTITION BY RANGE(population)" ) self.validate_identity( "CREATE INDEX foo ON bar.baz USING btree(col1 varchar_pattern_ops ASC, col2)" ) self.validate_identity( "CREATE INDEX index_issues_on_title_trigram ON public.issues USING gin(title public.gin_trgm_ops)" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO NOTHING RETURNING *" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = 1 RETURNING *" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT(id) DO UPDATE SET x.id = excluded.id RETURNING *" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT ON CONSTRAINT pkey DO NOTHING RETURNING *" ) self.validate_identity( "INSERT INTO x VALUES (1, 'a', 2.0) ON CONFLICT ON CONSTRAINT pkey DO UPDATE SET x.id = 1 RETURNING *" ) self.validate_identity( "DELETE FROM event USING sales AS s WHERE event.eventid = s.eventid RETURNING a" ) self.validate_identity( "WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp" ) self.validate_identity( "CREATE TABLE test (x TIMESTAMP WITHOUT TIME ZONE[][])", "CREATE TABLE test (x TIMESTAMP[][])", ) self.validate_identity( "CREATE FUNCTION add(integer, integer) RETURNS INT LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT AS 'select $1 + $2;'", ) self.validate_identity( "CREATE FUNCTION add(integer, integer) RETURNS INT LANGUAGE SQL IMMUTABLE STRICT AS 'select $1 + $2;'" ) self.validate_identity( "CREATE FUNCTION add(INT, INT) RETURNS INT SET search_path TO 'public' AS 'select $1 + $2;' LANGUAGE SQL IMMUTABLE", check_command_warning=True, ) self.validate_identity( "CREATE FUNCTION x(INT) RETURNS INT SET foo FROM CURRENT", check_command_warning=True, ) self.validate_identity( "CREATE FUNCTION add(integer, integer) RETURNS integer AS 'select $1 + $2;' LANGUAGE SQL IMMUTABLE CALLED ON NULL INPUT", check_command_warning=True, ) self.validate_identity( "CREATE CONSTRAINT TRIGGER my_trigger AFTER INSERT OR DELETE OR UPDATE OF col_a, col_b ON public.my_table DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION DO_STH()" ) self.validate_identity( "CREATE UNLOGGED TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp" ) self.validate_identity( "ALTER TABLE foo ADD COLUMN id BIGINT NOT NULL PRIMARY KEY DEFAULT 1, ADD CONSTRAINT fk_orders_user FOREIGN KEY (id) REFERENCES foo (id)" ) self.validate_identity( "CREATE TABLE t (col integer ARRAY[3])", "CREATE TABLE t (col INT[3])", ) self.validate_identity( "CREATE TABLE t (col integer ARRAY)", "CREATE TABLE t (col INT[])", ) self.validate_identity( "CREATE FUNCTION x(INT) RETURNS INT SET search_path TO 'public'", "CREATE FUNCTION x(INT) RETURNS INT SET search_path = 'public'", ) self.validate_identity( "CREATE TABLE test (x TIMESTAMP WITHOUT TIME ZONE[][])", "CREATE TABLE test (x TIMESTAMP[][])", ) self.validate_identity( "CREATE OR REPLACE FUNCTION function_name (input_a character varying DEFAULT NULL::character varying)", "CREATE OR REPLACE FUNCTION function_name(input_a VARCHAR DEFAULT CAST(NULL AS VARCHAR))", ) # Function parameter modes self.validate_identity("CREATE FUNCTION foo(a INT)") self.validate_identity("CREATE FUNCTION foo(IN a INT)") self.validate_identity("CREATE FUNCTION foo(OUT a INT)") self.validate_identity("CREATE FUNCTION foo(INOUT a INT)") self.validate_identity("CREATE FUNCTION foo(VARIADIC a INT[])") self.validate_identity("CREATE FUNCTION foo(out INT)") # "out" as identifier self.validate_identity("CREATE FUNCTION foo(inout VARCHAR)") # "inout" as identifier self.validate_identity("CREATE FUNCTION foo(variadic INT[])") # "variadic" as identifier self.validate_identity( "CREATE FUNCTION foo(a INT, OUT b INT, INOUT c VARCHAR, VARIADIC d INT[])" ) self.validate_identity("CREATE OR REPLACE FUNCTION foo(INOUT id UUID)") self.validate_identity( "CREATE OR REPLACE FUNCTION foo(id UUID, OUT created_at TIMESTAMPTZ)" ) self.validate_identity("CREATE FUNCTION foo(OUT x INT DEFAULT 5)") self.validate_identity("CREATE FUNCTION foo(INOUT y VARCHAR DEFAULT 'test')") self.validate_identity("CREATE FUNCTION foo(IN a INT DEFAULT 0, OUT b INT)") self.validate_all( "CREATE FUNCTION foo(VARIADIC args INT[] DEFAULT ARRAY[]::INT[])", write={ "postgres": "CREATE FUNCTION foo(VARIADIC args INT[] DEFAULT CAST(ARRAY[] AS INT[]))", }, ) self.validate_identity("CREATE FUNCTION foo(OUT result INT, IN input INT DEFAULT 10)") self.validate_identity( "CREATE TABLE products (product_no INT UNIQUE, name TEXT, price DECIMAL)", "CREATE TABLE products (product_no INT UNIQUE, name TEXT, price DECIMAL)", ) self.validate_identity( "CREATE TABLE products (product_no INT CONSTRAINT must_be_different UNIQUE, name TEXT CONSTRAINT present NOT NULL, price DECIMAL)", "CREATE TABLE products (product_no INT CONSTRAINT must_be_different UNIQUE, name TEXT CONSTRAINT present NOT NULL, price DECIMAL)", ) self.validate_identity( "CREATE TABLE products (product_no INT, name TEXT, price DECIMAL, UNIQUE (product_no, name))", "CREATE TABLE products (product_no INT, name TEXT, price DECIMAL, UNIQUE (product_no, name))", ) self.validate_identity( "CREATE TABLE products (" "product_no INT UNIQUE," " name TEXT," " price DECIMAL CHECK (price > 0)," " discounted_price DECIMAL CONSTRAINT positive_discount CHECK (discounted_price > 0)," " CHECK (product_no > 1)," " CONSTRAINT valid_discount CHECK (price > discounted_price))" ) self.validate_identity( """ CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree (commit_id, artifacts_expire_at, id) WHERE ( ((type)::text = 'Ci::Build'::text) AND ((retried = false) OR (retried IS NULL)) AND ((name)::text = ANY (ARRAY[ ('sast'::character varying)::text, ('dependency_scanning'::character varying)::text, ('sast:container'::character varying)::text, ('container_scanning'::character varying)::text, ('dast'::character varying)::text ])) ) """, "CREATE INDEX index_ci_builds_on_commit_id_and_artifacts_expireatandidpartial ON public.ci_builds USING btree(commit_id, artifacts_expire_at, id) WHERE ((CAST((type) AS TEXT) = CAST('Ci::Build' AS TEXT)) AND ((retried = FALSE) OR (retried IS NULL)) AND (CAST((name) AS TEXT) = ANY(ARRAY[CAST((CAST('sast' AS VARCHAR)) AS TEXT), CAST((CAST('dependency_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('sast:container' AS VARCHAR)) AS TEXT), CAST((CAST('container_scanning' AS VARCHAR)) AS TEXT), CAST((CAST('dast' AS VARCHAR)) AS TEXT)])))", ) self.validate_identity( "CREATE INDEX index_ci_pipelines_on_project_idandrefandiddesc ON public.ci_pipelines USING btree(project_id, ref, id DESC)" ) self.validate_identity( "TRUNCATE TABLE ONLY t1, t2*, ONLY t3, t4, t5* RESTART IDENTITY CASCADE", "TRUNCATE TABLE ONLY t1, t2, ONLY t3, t4, t5 RESTART IDENTITY CASCADE", ) self.validate_all( "CREATE TABLE x (a UUID, b BYTEA)", write={ "duckdb": "CREATE TABLE x (a UUID, b BLOB)", "presto": "CREATE TABLE x (a UUID, b VARBINARY)", "hive": "CREATE TABLE x (a UUID, b BINARY)", "spark": "CREATE TABLE x (a STRING, b BINARY)", "tsql": "CREATE TABLE x (a UNIQUEIDENTIFIER, b VARBINARY)", }, ) self.validate_identity("CREATE TABLE tbl (col INT UNIQUE NULLS NOT DISTINCT DEFAULT 9.99)") self.validate_identity("CREATE TABLE tbl (col UUID UNIQUE DEFAULT GEN_RANDOM_UUID())") self.validate_identity("CREATE TABLE tbl (col UUID, UNIQUE NULLS NOT DISTINCT (col))") self.validate_identity("CREATE TABLE tbl (col_a INT GENERATED ALWAYS AS (1 + 2) STORED)") self.validate_identity( "CREATE TABLE tbl (col_a INTERVAL GENERATED ALWAYS AS (a - b) STORED)" ) self.validate_identity("CREATE INDEX CONCURRENTLY ix_table_id ON tbl USING btree(id)") self.validate_identity( "CREATE INDEX CONCURRENTLY IF NOT EXISTS ix_table_id ON tbl USING btree(id)" ) self.validate_identity("DROP INDEX ix_table_id") self.validate_identity("DROP INDEX IF EXISTS ix_table_id") self.validate_identity("DROP INDEX CONCURRENTLY ix_table_id") self.validate_identity("DROP INDEX CONCURRENTLY IF EXISTS ix_table_id") self.validate_identity( """ CREATE TABLE IF NOT EXISTS public.rental ( inventory_id INT NOT NULL, CONSTRAINT rental_customer_id_fkey FOREIGN KEY (customer_id) REFERENCES public.customer (customer_id) MATCH FULL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_inventory_id_fkey FOREIGN KEY (inventory_id) REFERENCES public.inventory (inventory_id) MATCH PARTIAL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_staff_id_fkey FOREIGN KEY (staff_id) REFERENCES public.staff (staff_id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE RESTRICT, INITIALLY IMMEDIATE ) """, "CREATE TABLE IF NOT EXISTS public.rental (inventory_id INT NOT NULL, CONSTRAINT rental_customer_id_fkey FOREIGN KEY (customer_id) REFERENCES public.customer (customer_id) MATCH FULL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_inventory_id_fkey FOREIGN KEY (inventory_id) REFERENCES public.inventory (inventory_id) MATCH PARTIAL ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT rental_staff_id_fkey FOREIGN KEY (staff_id) REFERENCES public.staff (staff_id) MATCH SIMPLE ON UPDATE CASCADE ON DELETE RESTRICT, INITIALLY IMMEDIATE)", ) for op in ("=", ">=", "<=", "<", ">", "&&", "||", "@>", "<@"): with self.subTest(f"Testing EXCLUDE with operator {op}"): self.validate_identity( f"CREATE TABLE circles (c circle, EXCLUDE USING gist(c WITH {op}))" ) with self.assertRaises(ParseError): transpile("CREATE TABLE products (price DECIMAL CHECK price > 0)", read="postgres") with self.assertRaises(ParseError): transpile( "CREATE TABLE products (price DECIMAL, CHECK price > 1)", read="postgres", ) def test_unnest(self): self.validate_identity( "SELECT * FROM UNNEST(ARRAY[1, 2], ARRAY['foo', 'bar', 'baz']) AS x(a, b)" ) self.validate_all( "SELECT UNNEST(c) FROM t", write={ "hive": "SELECT EXPLODE(c) FROM t", "postgres": "SELECT UNNEST(c) FROM t", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM t CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(c)))) AS _u(pos) CROSS JOIN UNNEST(c) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(c) AND _u_2.pos_2 = CARDINALITY(c))", }, ) self.validate_all( "SELECT UNNEST(ARRAY[1])", write={ "hive": "SELECT EXPLODE(ARRAY(1))", "postgres": "SELECT UNNEST(ARRAY[1])", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1]) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1]))", }, ) def test_array_offset(self): with self.assertLogs(helper_logger) as cm: self.validate_all( "SELECT col[1]", write={ "bigquery": "SELECT col[0]", "duckdb": "SELECT col[1]", "hive": "SELECT col[0]", "postgres": "SELECT col[1]", "presto": "SELECT col[1]", }, ) self.assertEqual( cm.output, [ "INFO:sqlglot:Applying array index offset (-1)", "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (1)", ], ) def test_bool_or(self): self.validate_identity( "SELECT a, LOGICAL_OR(b) FROM table GROUP BY a", "SELECT a, BOOL_OR(b) FROM table GROUP BY a", ) def test_string_concat(self): self.validate_identity("SELECT CONCAT('abcde', 2, NULL, 22)") self.validate_all( "CONCAT(a, b)", write={ "": "CONCAT(COALESCE(a, ''), COALESCE(b, ''))", "clickhouse": "CONCAT(COALESCE(a, ''), COALESCE(b, ''))", "duckdb": "CONCAT(a, b)", "postgres": "CONCAT(a, b)", "presto": "CONCAT(COALESCE(CAST(a AS VARCHAR), ''), COALESCE(CAST(b AS VARCHAR), ''))", }, ) self.validate_all( "a || b", write={ "": "a || b", "clickhouse": "a || b", "duckdb": "a || b", "postgres": "a || b", "presto": "CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR))", }, ) def test_variance(self): self.validate_identity( "VAR_SAMP(x)", "VAR_SAMP(x)", ) self.validate_identity( "VAR_POP(x)", "VAR_POP(x)", ) self.validate_identity( "VARIANCE(x)", "VAR_SAMP(x)", ) self.validate_all( "VAR_POP(x)", read={ "": "VARIANCE_POP(x)", }, write={ "postgres": "VAR_POP(x)", }, ) def test_corr(self): self.validate_all( "SELECT CORR(a, b)", write={ "duckdb": "SELECT CORR(a, b)", "postgres": "SELECT CORR(a, b)", }, ) self.validate_all( "SELECT CORR(a, b) OVER (PARTITION BY c)", write={ "duckdb": "SELECT CORR(a, b) OVER (PARTITION BY c)", "postgres": "SELECT CORR(a, b) OVER (PARTITION BY c)", }, ) self.validate_all( "SELECT CORR(a, b) FILTER(WHERE c > 0)", write={ "duckdb": "SELECT CORR(a, b) FILTER(WHERE c > 0)", "postgres": "SELECT CORR(a, b) FILTER(WHERE c > 0)", }, ) self.validate_all( "SELECT CORR(a, b) FILTER(WHERE c > 0) OVER (PARTITION BY d)", write={ "duckdb": "SELECT CORR(a, b) FILTER(WHERE c > 0) OVER (PARTITION BY d)", "postgres": "SELECT CORR(a, b) FILTER(WHERE c > 0) OVER (PARTITION BY d)", }, ) def test_regexp_binary(self): """See https://github.com/tobymao/sqlglot/pull/2404 for details.""" self.assertIsInstance(self.parse_one("'thomas' ~ '.*thomas.*'"), exp.Binary) self.assertIsInstance(self.parse_one("'thomas' ~* '.*thomas.*'"), exp.Binary) def test_unnest_json_array(self): trino_input = """ WITH t(boxcrate) AS ( SELECT JSON '[{"boxes": [{"name": "f1", "type": "plant", "color": "red"}]}]' ) SELECT JSON_EXTRACT_SCALAR(boxes,'$.name') AS name, JSON_EXTRACT_SCALAR(boxes,'$.type') AS type, JSON_EXTRACT_SCALAR(boxes,'$.color') AS color FROM t CROSS JOIN UNNEST(CAST(boxcrate AS array(json))) AS x(tbox) CROSS JOIN UNNEST(CAST(json_extract(tbox, '$.boxes') AS array(json))) AS y(boxes) """ expected_postgres = """WITH t(boxcrate) AS ( SELECT CAST('[{"boxes": [{"name": "f1", "type": "plant", "color": "red"}]}]' AS JSON) ) SELECT JSON_EXTRACT_PATH_TEXT(boxes, 'name') AS name, JSON_EXTRACT_PATH_TEXT(boxes, 'type') AS type, JSON_EXTRACT_PATH_TEXT(boxes, 'color') AS color FROM t CROSS JOIN JSON_ARRAY_ELEMENTS(CAST(boxcrate AS JSON)) AS x(tbox) CROSS JOIN JSON_ARRAY_ELEMENTS(CAST(JSON_EXTRACT_PATH(tbox, 'boxes') AS JSON)) AS y(boxes)""" self.validate_all(expected_postgres, read={"trino": trino_input}, pretty=True) def test_rows_from(self): self.validate_identity("""SELECT * FROM ROWS FROM (FUNC1(col1, col2))""") self.validate_identity( """SELECT * FROM ROWS FROM (FUNC1(col1) AS alias1("col1" TEXT), FUNC2(col2) AS alias2("col2" INT)) WITH ORDINALITY""" ) self.validate_identity( """SELECT * FROM table1, ROWS FROM (FUNC1(col1) AS alias1("col1" TEXT)) WITH ORDINALITY AS alias3("col3" INT, "col4" TEXT)""" ) def test_array_length(self): self.validate_identity("SELECT ARRAY_LENGTH(ARRAY[1, 2, 3], 1)") self.validate_all( "ARRAY_LENGTH(arr, 1)", read={ "bigquery": "ARRAY_LENGTH(arr)", "duckdb": "ARRAY_LENGTH(arr)", "presto": "CARDINALITY(arr)", "drill": "REPEATED_COUNT(arr)", "teradata": "CARDINALITY(arr)", "hive": "SIZE(arr)", "spark2": "SIZE(arr)", "spark": "SIZE(arr)", "databricks": "SIZE(arr)", }, write={ "duckdb": "ARRAY_LENGTH(arr, 1)", "presto": "CARDINALITY(arr)", "teradata": "CARDINALITY(arr)", "bigquery": "ARRAY_LENGTH(arr)", "drill": "REPEATED_COUNT(arr)", "clickhouse": "LENGTH(arr)", "hive": "SIZE(arr)", "spark2": "SIZE(arr)", "spark": "SIZE(arr)", "databricks": "SIZE(arr)", }, ) self.validate_all( "ARRAY_LENGTH(arr, foo)", write={ "duckdb": "ARRAY_LENGTH(arr, foo)", "hive": UnsupportedError, "spark2": UnsupportedError, "spark": UnsupportedError, "databricks": UnsupportedError, "presto": UnsupportedError, "teradata": UnsupportedError, "bigquery": UnsupportedError, "drill": UnsupportedError, "clickhouse": UnsupportedError, }, ) def test_xmlelement(self): self.validate_identity("SELECT XMLELEMENT(NAME foo)") self.validate_identity("SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar))") self.validate_identity("SELECT XMLELEMENT(NAME test, XMLATTRIBUTES(a, b)) FROM test") self.validate_identity( "SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES(CURRENT_DATE AS bar), 'cont', 'ent')" ) self.validate_identity( """SELECT XMLELEMENT(NAME "foo$bar", XMLATTRIBUTES('xyz' AS "a&b"))""" ) self.validate_identity( "SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar), XMLELEMENT(NAME abc), XMLCOMMENT('test'), XMLELEMENT(NAME xyz))" ) def test_analyze(self): self.validate_identity("ANALYZE TBL") self.validate_identity("ANALYZE TBL(col1, col2)") self.validate_identity("ANALYZE VERBOSE SKIP_LOCKED TBL(col1, col2)") self.validate_identity("ANALYZE BUFFER_USAGE_LIMIT 1337 TBL") def test_recursive_cte(self): for kind in ("BREADTH", "DEPTH"): self.validate_identity( f"WITH RECURSIVE search_tree(id, link, data) AS (SELECT t.id, t.link, t.data FROM tree AS t UNION ALL SELECT t.id, t.link, t.data FROM tree AS t, search_tree AS st WHERE t.id = st.link) SEARCH {kind} FIRST BY id SET ordercol SELECT * FROM search_tree ORDER BY ordercol" ) self.validate_identity( "WITH RECURSIVE search_graph(id, link, data, depth) AS (SELECT g.id, g.link, g.data, 1 FROM graph AS g UNION ALL SELECT g.id, g.link, g.data, sg.depth + 1 FROM graph AS g, search_graph AS sg WHERE g.id = sg.link) CYCLE id SET is_cycle USING path SELECT * FROM search_graph" ) def test_json_extract(self): for arrow_op in ("->", "->>"): with self.subTest(f"Ensure {arrow_op} operator roundtrips int values as subscripts"): self.validate_all( f"SELECT foo {arrow_op} 1", write={ "postgres": f"SELECT foo {arrow_op} 1", "duckdb": f"SELECT foo {arrow_op} '$[1]'", }, ) with self.subTest( f"Ensure {arrow_op} operator roundtrips string values that represent integers as keys" ): self.validate_all( f"SELECT foo {arrow_op} '12'", write={ "postgres": f"SELECT foo {arrow_op} '12'", "clickhouse": "SELECT JSONExtractString(foo, '12')", }, ) def test_udt(self): def _validate_udt(sql: str): self.validate_identity(sql).to.assert_is(exp.DataType) _validate_udt("CAST(5 AS MyType)") _validate_udt('CAST(5 AS "MyType")') _validate_udt("CAST(5 AS MySchema.MyType)") _validate_udt('CAST(5 AS "MySchema"."MyType")') _validate_udt('CAST(5 AS MySchema."MyType")') _validate_udt('CAST(5 AS "MyCatalog"."MySchema"."MyType")') def test_round(self): self.validate_identity("ROUND(x)") self.validate_identity("ROUND(x, y)") self.validate_identity("ROUND(CAST(x AS DOUBLE PRECISION))") self.validate_identity("ROUND(CAST(x AS DECIMAL), 4)") self.validate_identity("ROUND(CAST(x AS INT), 4)") self.validate_all( "ROUND(CAST(CAST(x AS DOUBLE PRECISION) AS DECIMAL), 4)", read={ "postgres": "ROUND(x::DOUBLE, 4)", "hive": "ROUND(x::DOUBLE, 4)", "bigquery": "ROUND(x::DOUBLE, 4)", }, ) self.validate_all( "ROUND(CAST(x AS DECIMAL(18, 3)), 4)", read={"duckdb": "ROUND(x::DECIMAL, 4)"} ) def test_datatype(self): self.assertEqual(exp.DataType.build("XML", dialect="postgres").sql("postgres"), "XML") self.validate_identity("CREATE TABLE foo (data XML)") def test_locks(self): for key_type in ("FOR SHARE", "FOR UPDATE", "FOR NO KEY UPDATE", "FOR KEY SHARE"): with self.subTest(f"Test lock type {key_type}"): self.validate_identity(f"SELECT 1 FROM foo AS x {key_type} OF x") def test_grant(self): grant_cmds = [ "GRANT SELECT ON TABLE users TO role1", "GRANT INSERT, DELETE ON TABLE orders TO user1", "GRANT SELECT ON employees TO manager WITH GRANT OPTION", "GRANT USAGE ON SCHEMA finance TO user2", "GRANT ALL PRIVILEGES ON DATABASE mydb TO PUBLIC", "GRANT CREATE ON SCHEMA public TO developer", "GRANT CONNECT ON DATABASE testdb TO readonly_user", "GRANT TEMPORARY ON DATABASE testdb TO temp_user", "GRANT TRIGGER ON orders TO audit_role", "GRANT REFERENCES ON products TO foreign_key_user", "GRANT TRUNCATE ON logs TO admin_role", "GRANT UPDATE(salary) ON employees TO hr_manager", "GRANT SELECT(id, name), UPDATE(email) ON customers TO customer_service", ] for sql in grant_cmds: with self.subTest(f"Testing PostgreSQL's GRANT statement: {sql}"): self.validate_identity(sql) self.validate_identity( "GRANT EXECUTE ON FUNCTION calculate_bonus(integer) TO analyst", "GRANT EXECUTE ON FUNCTION CALCULATE_BONUS(integer) TO analyst", ) advanced_grants = [ "GRANT INSERT, DELETE ON ALL TABLES IN SCHEMA myschema TO user1", "GRANT developer_role TO john", "GRANT admin_role TO mary WITH ADMIN OPTION", ] for sql in advanced_grants: with self.subTest(f"Testing PostgreSQL's advanced GRANT statement: {sql}"): self.validate_identity(sql, check_command_warning=True) def test_revoke(self): revoke_cmds = [ "REVOKE SELECT ON TABLE users FROM role1", "REVOKE INSERT, DELETE ON TABLE orders FROM user1", "REVOKE USAGE ON SCHEMA finance FROM user2", "REVOKE ALL PRIVILEGES ON DATABASE mydb FROM PUBLIC", "REVOKE CREATE ON SCHEMA public FROM developer", "REVOKE CONNECT ON DATABASE testdb FROM readonly_user", "REVOKE TEMPORARY ON DATABASE testdb FROM temp_user", "REVOKE TRIGGER ON orders FROM audit_role", "REVOKE REFERENCES ON products FROM foreign_key_user", "REVOKE TRUNCATE ON logs FROM admin_role", "REVOKE USAGE ON SCHEMA finance FROM user2 CASCADE", "REVOKE SELECT ON TABLE orders FROM user1 RESTRICT", "REVOKE GRANT OPTION FOR SELECT ON employees FROM manager", "REVOKE GRANT OPTION FOR SELECT ON employees FROM manager RESTRICT", "REVOKE UPDATE(salary) ON employees FROM hr_manager", "REVOKE SELECT(id, name), UPDATE(email) ON customers FROM customer_service", ] for sql in revoke_cmds: with self.subTest(f"Testing PostgreSQL's REVOKE statement: {sql}"): self.validate_identity(sql) self.validate_identity( "REVOKE EXECUTE ON FUNCTION calculate_bonus(integer) FROM analyst", "REVOKE EXECUTE ON FUNCTION CALCULATE_BONUS(integer) FROM analyst", ) advanced_revoke_cmds = [ "REVOKE INSERT, DELETE ON ALL TABLES IN SCHEMA myschema FROM user1", "REVOKE developer_role FROM john", "REVOKE admin_role FROM mary", ] for sql in advanced_revoke_cmds: with self.subTest(f"Testing PostgreSQL's advanced REVOKE statement: {sql}"): self.validate_identity(sql, check_command_warning=True) def test_begin_transaction(self): self.validate_all( "BEGIN", write={ "postgres": "BEGIN", "presto": "START TRANSACTION", "trino": "START TRANSACTION", }, ) for keyword in ("TRANSACTION", "WORK"): for level in ( "ISOLATION LEVEL SERIALIZABLE", "ISOLATION LEVEL READ COMMITTED", "NOT DEFFERABLE", "READ WRITE", "DEFERRABLE", ): with self.subTest(f"Testing Postgres's BEGIN {keyword} {level}"): self.validate_identity( f"BEGIN {keyword} {level}, {level}", f"BEGIN {level}, {level}" ).assert_is(exp.Transaction) def test_interval_span(self): for time_str in ["1 01:", "1 01:00", "1.5 01:", "-0.25 01:"]: with self.subTest(f"Postgres INTERVAL span, omitted DAY TO MINUTE unit: {time_str}"): self.validate_identity(f"INTERVAL '{time_str}'") for time_str in [ "1 01:01:", "1 01:01:", "1 01:01:01", "1 01:01:01.01", "1.5 01:01:", "-0.25 01:01:", ]: with self.subTest(f"Postgres INTERVAL span, omitted DAY TO SECOND unit: {time_str}"): self.validate_identity(f"INTERVAL '{time_str}'") # Ensure AND is not consumed as a unit following an omitted-span interval with self.subTest("Postgres INTERVAL span, omitted unit with following AND"): day_time_str = "a > INTERVAL '1 00:00' AND TRUE" self.validate_identity(day_time_str, "a > INTERVAL '1 00:00' AND TRUE") self.assertIsInstance(self.parse_one(day_time_str), exp.And) def test_postgres_create_trigger(self): basic_triggers = [ "CREATE TRIGGER check_update BEFORE UPDATE ON accounts FOR EACH ROW EXECUTE FUNCTION CHECK_ACCOUNT_UPDATE()", "CREATE TRIGGER log_insert AFTER INSERT ON users FOR EACH ROW EXECUTE FUNCTION LOG_CHANGES()", "CREATE TRIGGER audit_changes AFTER INSERT OR UPDATE OR DELETE ON products FOR EACH ROW EXECUTE FUNCTION AUDIT_LOG()", "CREATE TRIGGER check_balance BEFORE UPDATE OF balance, status ON accounts FOR EACH ROW EXECUTE FUNCTION VALIDATE_BALANCE()", "CREATE TRIGGER conditional_trigger BEFORE UPDATE ON users FOR EACH ROW WHEN (OLD.id <> NEW.id) EXECUTE FUNCTION CHECK_ID_CHANGE()", "CREATE TRIGGER statement_trigger AFTER INSERT ON orders FOR EACH STATEMENT EXECUTE FUNCTION UPDATE_SUMMARY()", "CREATE TRIGGER instead_trigger INSTEAD OF INSERT ON user_view FOR EACH ROW EXECUTE FUNCTION HANDLE_INSERT()", "CREATE OR REPLACE TRIGGER replace_trigger BEFORE INSERT ON users FOR EACH ROW EXECUTE FUNCTION LOG_INSERT()", "CREATE TRIGGER param_trigger BEFORE INSERT ON users FOR EACH ROW EXECUTE FUNCTION LOG_WITH_PARAMS('insert', 'users')", "CREATE TRIGGER my_trigger BEFORE INSERT ON myschema.users FOR EACH ROW EXECUTE FUNCTION LOG_CHANGES()", "CREATE TRIGGER truncate_trigger BEFORE TRUNCATE ON users FOR EACH STATEMENT EXECUTE FUNCTION LOG_TRUNCATE()", "CREATE TRIGGER complex_when BEFORE UPDATE ON accounts FOR EACH ROW WHEN (OLD.balance IS DISTINCT FROM NEW.balance AND NEW.balance > 0) EXECUTE FUNCTION CHECK_BALANCE()", "CREATE TRIGGER emp_stamp BEFORE INSERT OR UPDATE ON emp FOR EACH ROW EXECUTE FUNCTION EMP_STAMP()", "CREATE TRIGGER view_insert INSTEAD OF INSERT ON my_view FOR EACH ROW EXECUTE FUNCTION VIEW_INSERT_ROW()", "CREATE TRIGGER check_update BEFORE UPDATE OF balance ON accounts FOR EACH ROW EXECUTE FUNCTION CHECK_ACCOUNT_UPDATE()", "CREATE TRIGGER restock AFTER UPDATE ON products FOR EACH ROW WHEN (OLD.count <> NEW.count) EXECUTE FUNCTION RESTOCK_ITEM()", "CREATE TRIGGER multi_col_update BEFORE UPDATE OF col1, col2, col3, col4 ON accounts FOR EACH ROW EXECUTE FUNCTION CHECK_COLUMNS()", "CREATE TRIGGER all_events AFTER INSERT OR UPDATE OR DELETE OR TRUNCATE ON audit_table FOR EACH STATEMENT EXECUTE FUNCTION LOG_ALL_CHANGES()", ] referencing_triggers = [ "CREATE TRIGGER track_new_rows AFTER INSERT ON users REFERENCING NEW TABLE AS new_data FOR EACH STATEMENT EXECUTE FUNCTION PROCESS_NEW_USERS()", "CREATE TRIGGER track_changes AFTER UPDATE ON accounts REFERENCING OLD TABLE AS old_data NEW TABLE AS new_data FOR EACH STATEMENT EXECUTE FUNCTION COMPARE_CHANGES()", "CREATE TRIGGER statistics_update AFTER UPDATE ON sales REFERENCING OLD TABLE AS old_sales NEW TABLE AS new_sales FOR EACH STATEMENT EXECUTE FUNCTION UPDATE_STATISTICS()", ] constraint_triggers = [ "CREATE CONSTRAINT TRIGGER check_integrity AFTER INSERT ON users FOR EACH ROW EXECUTE FUNCTION VALIDATE_USER()", "CREATE CONSTRAINT TRIGGER deferred_check AFTER INSERT ON orders DEFERRABLE FOR EACH ROW EXECUTE FUNCTION CHECK_ORDER()", "CREATE CONSTRAINT TRIGGER deferred_check AFTER INSERT ON orders DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION CHECK_ORDER()", "CREATE CONSTRAINT TRIGGER immediate_check AFTER INSERT ON orders NOT DEFERRABLE INITIALLY IMMEDIATE FOR EACH ROW EXECUTE FUNCTION CHECK_ORDER()", "CREATE CONSTRAINT TRIGGER fk_check AFTER UPDATE ON orders FROM users FOR EACH ROW EXECUTE FUNCTION CHECK_FOREIGN_KEY()", "CREATE CONSTRAINT TRIGGER fk_check AFTER UPDATE ON orders FROM public.users FOR EACH ROW EXECUTE FUNCTION CHECK_FOREIGN_KEY()", "CREATE CONSTRAINT TRIGGER if_dist_exists AFTER INSERT OR UPDATE ON films DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION CHECK_FOREIGN_KEY('distributors', 'did')", "CREATE CONSTRAINT TRIGGER check_fk AFTER INSERT ON orders FROM customers FOR EACH ROW EXECUTE FUNCTION CHECK_CUSTOMER_EXISTS()", "CREATE CONSTRAINT TRIGGER complex_trigger AFTER UPDATE OF col1, col2 ON mytable FROM reftable DEFERRABLE INITIALLY DEFERRED REFERENCING OLD TABLE AS old_data NEW TABLE AS new_data FOR EACH STATEMENT WHEN (OLD.status <> NEW.status) EXECUTE FUNCTION COMPLEX_CHECK('param1', 'param2')", ] for sql in basic_triggers + referencing_triggers + constraint_triggers: with self.subTest(sql): self.validate_identity(sql) self.validate_identity( "CREATE TRIGGER proc_trigger BEFORE INSERT ON users FOR EACH ROW EXECUTE PROCEDURE LOG_CHANGES()", "CREATE TRIGGER proc_trigger BEFORE INSERT ON users FOR EACH ROW EXECUTE FUNCTION LOG_CHANGES()", ) self.validate_identity( 'CREATE TRIGGER "MyTrigger" BEFORE INSERT ON "MyTable" FOR EACH ROW EXECUTE FUNCTION MYFUNCTION()' ) ================================================ FILE: tests/dialects/test_presto.py ================================================ from sqlglot import UnsupportedError, exp, parse_one from sqlglot.helper import logger as helper_logger from tests.dialects.test_dialect import Validator class TestPresto(Validator): dialect = "presto" def test_cast(self): self.validate_identity("DEALLOCATE PREPARE my_query", check_command_warning=True) self.validate_identity("DESCRIBE INPUT x", check_command_warning=True) self.validate_identity("DESCRIBE OUTPUT x", check_command_warning=True) self.validate_identity("SELECT * FROM x qualify", "SELECT * FROM x AS qualify") self.validate_identity("CAST(x AS IPADDRESS)") self.validate_identity("CAST(x AS IPPREFIX)") self.validate_identity("CAST(TDIGEST_AGG(1) AS TDIGEST)") self.validate_identity("CAST(x AS HYPERLOGLOG)") self.validate_identity( "RESET SESSION hive.optimized_reader_enabled", check_command_warning=True ) self.validate_identity( "TIMESTAMP '2025-06-20 11:22:29 Europe/Prague'", "CAST('2025-06-20 11:22:29 Europe/Prague' AS TIMESTAMP WITH TIME ZONE)", ) self.validate_all( "CAST(x AS BOOLEAN)", read={ "tsql": "CAST(x AS BIT)", }, write={ "presto": "CAST(x AS BOOLEAN)", "tsql": "CAST(x AS BIT)", }, ) self.validate_all( "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')", write={ "duckdb": "SELECT CAST('2020-05-11T11:15:05' AS TIMESTAMPTZ)", "presto": "SELECT FROM_ISO8601_TIMESTAMP('2020-05-11T11:15:05')", }, ) self.validate_all( "CAST(x AS INTERVAL YEAR TO MONTH)", write={ "oracle": "CAST(x AS INTERVAL YEAR TO MONTH)", "presto": "CAST(x AS INTERVAL YEAR TO MONTH)", }, ) self.validate_all( "CAST(x AS INTERVAL DAY TO SECOND)", write={ "oracle": "CAST(x AS INTERVAL DAY TO SECOND)", "presto": "CAST(x AS INTERVAL DAY TO SECOND)", }, ) self.validate_all( "SELECT CAST('10C' AS INTEGER)", read={ "postgres": "SELECT CAST('10C' AS INTEGER)", "presto": "SELECT CAST('10C' AS INTEGER)", "redshift": "SELECT CAST('10C' AS INTEGER)", }, ) self.validate_all( "SELECT CAST('1970-01-01 00:00:00' AS TIMESTAMP)", read={"postgres": "SELECT 'epoch'::TIMESTAMP"}, ) self.validate_all( "FROM_BASE64(x)", read={ "hive": "UNBASE64(x)", }, write={ "hive": "UNBASE64(x)", "presto": "FROM_BASE64(x)", }, ) self.validate_all( "TO_BASE64(x)", read={ "hive": "BASE64(x)", }, write={ "hive": "BASE64(x)", "presto": "TO_BASE64(x)", }, ) self.validate_all( "CAST(a AS ARRAY(INT))", write={ "bigquery": "CAST(a AS ARRAY)", "duckdb": "CAST(a AS INT[])", "presto": "CAST(a AS ARRAY(INTEGER))", "spark": "CAST(a AS ARRAY)", "snowflake": "CAST(a AS ARRAY(INT))", }, ) self.validate_all( "CAST(a AS VARCHAR)", write={ "bigquery": "CAST(a AS STRING)", "duckdb": "CAST(a AS TEXT)", "presto": "CAST(a AS VARCHAR)", "spark": "CAST(a AS STRING)", }, ) self.validate_all( "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", write={ "bigquery": "ARRAY[1, 2]", "duckdb": "CAST([1, 2] AS BIGINT[])", "presto": "CAST(ARRAY[1, 2] AS ARRAY(BIGINT))", "spark": "CAST(ARRAY(1, 2) AS ARRAY)", "snowflake": "CAST([1, 2] AS ARRAY(BIGINT))", }, ) self.validate_all( "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INT))", write={ "duckdb": "CAST(MAP(['key'], [1]) AS MAP(TEXT, INT))", "presto": "CAST(MAP(ARRAY['key'], ARRAY[1]) AS MAP(VARCHAR, INTEGER))", "hive": "CAST(MAP('key', 1) AS MAP)", "snowflake": "CAST(OBJECT_CONSTRUCT('key', 1) AS MAP(VARCHAR, INT))", "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('key'), ARRAY(1)) AS MAP)", }, ) self.validate_all( "CAST(MAP(ARRAY['a','b','c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INT)))", write={ "bigquery": "CAST(MAP(['a', 'b', 'c'], [[1], [2], [3]]) AS MAP>)", "duckdb": "CAST(MAP(['a', 'b', 'c'], [[1], [2], [3]]) AS MAP(TEXT, INT[]))", "presto": "CAST(MAP(ARRAY['a', 'b', 'c'], ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]]) AS MAP(VARCHAR, ARRAY(INTEGER)))", "hive": "CAST(MAP('a', ARRAY(1), 'b', ARRAY(2), 'c', ARRAY(3)) AS MAP>)", "spark": "CAST(MAP_FROM_ARRAYS(ARRAY('a', 'b', 'c'), ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))) AS MAP>)", "snowflake": "CAST(OBJECT_CONSTRUCT('a', [1], 'b', [2], 'c', [3]) AS MAP(VARCHAR, ARRAY(INT)))", }, ) self.validate_all( "CAST(x AS TIME(5) WITH TIME ZONE)", write={ "duckdb": "CAST(x AS TIMETZ)", "postgres": "CAST(x AS TIMETZ(5))", "presto": "CAST(x AS TIME(5) WITH TIME ZONE)", "redshift": "CAST(x AS TIME(5) WITH TIME ZONE)", }, ) self.validate_all( "CAST(x AS TIMESTAMP(9) WITH TIME ZONE)", write={ "bigquery": "CAST(x AS TIMESTAMP)", "duckdb": "CAST(x AS TIMESTAMPTZ)", "presto": "CAST(x AS TIMESTAMP(9) WITH TIME ZONE)", "hive": "CAST(x AS TIMESTAMP)", "spark": "CAST(x AS TIMESTAMP)", }, ) def test_replace(self): self.validate_all( "REPLACE(subject, pattern)", write={ "bigquery": "REPLACE(subject, pattern, '')", "duckdb": "REPLACE(subject, pattern, '')", "hive": "REPLACE(subject, pattern, '')", "snowflake": "REPLACE(subject, pattern, '')", "spark": "REPLACE(subject, pattern, '')", "presto": "REPLACE(subject, pattern, '')", }, ) self.validate_all( "REPLACE(subject, pattern, replacement)", read={ "bigquery": "REPLACE(subject, pattern, replacement)", "duckdb": "REPLACE(subject, pattern, replacement)", "hive": "REPLACE(subject, pattern, replacement)", "spark": "REPLACE(subject, pattern, replacement)", "presto": "REPLACE(subject, pattern, replacement)", }, write={ "bigquery": "REPLACE(subject, pattern, replacement)", "duckdb": "REPLACE(subject, pattern, replacement)", "hive": "REPLACE(subject, pattern, replacement)", "snowflake": "REPLACE(subject, pattern, replacement)", "spark": "REPLACE(subject, pattern, replacement)", "presto": "REPLACE(subject, pattern, replacement)", }, ) def test_regex(self): self.validate_all( "REGEXP_REPLACE('abcd', '[ab]')", write={ "presto": "REGEXP_REPLACE('abcd', '[ab]', '')", "spark": "REGEXP_REPLACE('abcd', '[ab]', '')", }, ) self.validate_all( "REGEXP_LIKE(a, 'x')", write={ "duckdb": "REGEXP_MATCHES(a, 'x')", "presto": "REGEXP_LIKE(a, 'x')", "hive": "a RLIKE 'x'", "spark": "a RLIKE 'x'", }, ) self.validate_all( "SPLIT(x, 'a.')", write={ "duckdb": "STR_SPLIT(x, 'a.')", "presto": "SPLIT(x, 'a.')", "hive": "SPLIT(x, CONCAT('\\\\Q', 'a.', '\\\\E'))", "spark": "SPLIT(x, CONCAT('\\\\Q', 'a.', '\\\\E'))", }, ) self.validate_all( "REGEXP_SPLIT(x, 'a.')", write={ "duckdb": "STR_SPLIT_REGEX(x, 'a.')", "presto": "REGEXP_SPLIT(x, 'a.')", "hive": "SPLIT(x, 'a.')", "spark": "SPLIT(x, 'a.')", }, ) self.validate_all( "CARDINALITY(x)", write={ "duckdb": "ARRAY_LENGTH(x)", "presto": "CARDINALITY(x)", "hive": "SIZE(x)", "spark": "SIZE(x)", }, ) self.validate_all( "ARRAY_JOIN(x, '-', 'a')", write={ "hive": "CONCAT_WS('-', x)", "spark": "ARRAY_JOIN(x, '-', 'a')", }, ) self.validate_all( "STRPOS(haystack, needle, occurrence)", write={ "bigquery": "INSTR(haystack, needle, 1, occurrence)", "oracle": "INSTR(haystack, needle, 1, occurrence)", "presto": "STRPOS(haystack, needle, occurrence)", "tableau": "FINDNTH(haystack, needle, occurrence)", "trino": "STRPOS(haystack, needle, occurrence)", "teradata": "INSTR(haystack, needle, 1, occurrence)", }, ) def test_interval_plural_to_singular(self): # Microseconds, weeks and quarters are not supported in Presto/Trino INTERVAL literals unit_to_expected = { "SeCoNds": "SECOND", "minutes": "MINUTE", "hours": "HOUR", "days": "DAY", "months": "MONTH", "years": "YEAR", } for unit, expected in unit_to_expected.items(): self.validate_all( f"SELECT INTERVAL '1' {unit}", write={ "bigquery": f"SELECT INTERVAL '1' {expected}", "presto": f"SELECT INTERVAL '1' {expected}", "trino": f"SELECT INTERVAL '1' {expected}", "mysql": f"SELECT INTERVAL '1' {expected}", "doris": f"SELECT INTERVAL '1' {expected}", }, ) def test_time(self): expr = parse_one("TIME(7) WITH TIME ZONE", into=exp.DataType, read="presto") self.assertEqual(expr.this, exp.DataType.Type.TIMETZ) self.validate_identity("FROM_UNIXTIME(a, b)") self.validate_identity("FROM_UNIXTIME(a, b, c)") self.validate_identity("TRIM(a, b)") self.validate_identity("VAR_POP(a)") self.validate_all( "SELECT FROM_UNIXTIME(col) FROM tbl", write={ "presto": "SELECT FROM_UNIXTIME(col) FROM tbl", "spark": "SELECT CAST(FROM_UNIXTIME(col) AS TIMESTAMP) FROM tbl", "trino": "SELECT FROM_UNIXTIME(col) FROM tbl", }, ) self.validate_all( "DATE_FORMAT(x, '%Y-%m-%d %H:%i:%S')", write={ "bigquery": "FORMAT_DATE('%F %T', x)", "duckdb": "STRFTIME(x, '%Y-%m-%d %H:%M:%S')", "presto": "DATE_FORMAT(x, '%Y-%m-%d %T')", "hive": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')", "spark": "DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss')", }, ) self.validate_all( "DATE_PARSE(x, '%Y-%m-%d %H:%i:%S')", write={ "duckdb": "STRPTIME(x, '%Y-%m-%d %H:%M:%S')", "presto": "DATE_PARSE(x, '%Y-%m-%d %T')", "hive": "CAST(x AS TIMESTAMP)", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-dd HH:mm:ss')", }, ) self.validate_all( "DATE_PARSE(x, '%Y-%m-%d')", write={ "duckdb": "STRPTIME(x, '%Y-%m-%d')", "presto": "DATE_PARSE(x, '%Y-%m-%d')", "hive": "CAST(x AS TIMESTAMP)", "spark": "TO_TIMESTAMP(x, 'yyyy-MM-dd')", }, ) self.validate_all( "DATE_FORMAT(x, '%T')", write={ "hive": "DATE_FORMAT(x, 'HH:mm:ss')", }, ) self.validate_all( "DATE_PARSE(SUBSTR(x, 1, 10), '%Y-%m-%d')", write={ "duckdb": "STRPTIME(SUBSTRING(x, 1, 10), '%Y-%m-%d')", "presto": "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')", "hive": "CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP)", "spark": "TO_TIMESTAMP(SUBSTRING(x, 1, 10), 'yyyy-MM-dd')", }, ) self.validate_all( "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')", write={ "duckdb": "STRPTIME(SUBSTRING(x, 1, 10), '%Y-%m-%d')", "presto": "DATE_PARSE(SUBSTRING(x, 1, 10), '%Y-%m-%d')", "hive": "CAST(SUBSTRING(x, 1, 10) AS TIMESTAMP)", "spark": "TO_TIMESTAMP(SUBSTRING(x, 1, 10), 'yyyy-MM-dd')", }, ) self.validate_all( "FROM_UNIXTIME(x)", write={ "duckdb": "TO_TIMESTAMP(x)", "presto": "FROM_UNIXTIME(x)", "hive": "FROM_UNIXTIME(x)", "spark": "CAST(FROM_UNIXTIME(x) AS TIMESTAMP)", }, ) self.validate_all( "TO_UNIXTIME(x)", write={ "duckdb": "EPOCH(x)", "presto": "TO_UNIXTIME(x)", "hive": "UNIX_TIMESTAMP(x)", "spark": "UNIX_TIMESTAMP(x)", }, ) self.validate_all( "DATE_ADD('DAY', 1, x)", write={ "duckdb": "x + INTERVAL 1 DAY", "presto": "DATE_ADD('DAY', 1, x)", "hive": "DATE_ADD(x, 1)", "spark": "DATE_ADD(x, 1)", }, ) self.validate_all( "DATE_ADD('DAY', 1 * -1, x)", write={ "presto": "DATE_ADD('DAY', 1 * -1, x)", }, ) self.validate_all( "NOW()", write={ "presto": "CURRENT_TIMESTAMP", "hive": "CURRENT_TIMESTAMP()", }, ) self.validate_all( "SELECT DATE_ADD('DAY', 1, CAST(CURRENT_DATE AS TIMESTAMP))", read={ "redshift": "SELECT DATEADD(DAY, 1, CURRENT_DATE)", }, ) self.validate_all( "((DAY_OF_WEEK(CAST(CAST(TRY_CAST('2012-08-08 01:00:00' AS TIMESTAMP WITH TIME ZONE) AS TIMESTAMP) AS DATE)) % 7) + 1)", read={ "spark": "DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", }, ) self.validate_all( "DAY_OF_WEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", read={ "duckdb": "ISODOW(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", }, write={ "spark": "((DAYOFWEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP)) % 7) + 1)", "presto": "DAY_OF_WEEK(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", "duckdb": "ISODOW(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", }, ) self.validate_all( "DAY_OF_MONTH(timestamp '2012-08-08 01:00:00')", write={ "spark": "DAYOFMONTH(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", "presto": "DAY_OF_MONTH(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", "duckdb": "DAYOFMONTH(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", }, ) self.validate_all( "DAY_OF_YEAR(timestamp '2012-08-08 01:00:00')", write={ "spark": "DAYOFYEAR(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", "presto": "DAY_OF_YEAR(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", "duckdb": "DAYOFYEAR(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", }, ) self.validate_all( "WEEK_OF_YEAR(timestamp '2012-08-08 01:00:00')", write={ "spark": "WEEKOFYEAR(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", "presto": "WEEK_OF_YEAR(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", "duckdb": "WEEKOFYEAR(CAST('2012-08-08 01:00:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT CAST('2012-10-31 00:00' AS TIMESTAMP) AT TIME ZONE 'America/Sao_Paulo'", write={ "spark": "SELECT FROM_UTC_TIMESTAMP(CAST('2012-10-31 00:00' AS TIMESTAMP), 'America/Sao_Paulo')", "presto": "SELECT AT_TIMEZONE(CAST('2012-10-31 00:00' AS TIMESTAMP), 'America/Sao_Paulo')", }, ) self.validate_all( "SELECT AT_TIMEZONE(CAST('2012-10-31 00:00' AS TIMESTAMP WITH TIME ZONE), 'America/Sao_Paulo')", read={ "spark": "SELECT FROM_UTC_TIMESTAMP(TIMESTAMP '2012-10-31 00:00', 'America/Sao_Paulo')", }, ) self.validate_all( "CAST(x AS TIMESTAMP)", write={"presto": "CAST(x AS TIMESTAMP)"}, read={"mysql": "CAST(x AS DATETIME)", "clickhouse": "CAST(x AS DATETIME64)"}, ) self.validate_all( "CAST(x AS TIMESTAMP)", read={"mysql": "TIMESTAMP(x)"}, ) # this case isn't really correct, but it's a fall back for mysql's version self.validate_all( "TIMESTAMP(x, '12:00:00')", write={ "duckdb": "TIMESTAMP(x, '12:00:00')", "presto": "TIMESTAMP(x, '12:00:00')", }, ) self.validate_all( "DATE_ADD('DAY', CAST(x AS BIGINT), y)", write={ "presto": "DATE_ADD('DAY', CAST(x AS BIGINT), y)", }, read={ "presto": "DATE_ADD('DAY', x, y)", }, ) self.validate_identity("DATE_ADD('DAY', 1, y)") self.validate_all( "SELECT DATE_ADD('MINUTE', 30, col)", write={ "presto": "SELECT DATE_ADD('MINUTE', 30, col)", "trino": "SELECT DATE_ADD('MINUTE', 30, col)", }, ) self.validate_identity("DATE_ADD('DAY', FLOOR(5), y)") self.validate_identity( """SELECT DATE_ADD('DAY', MOD(5, 2.5), y), DATE_ADD('DAY', CEIL(5.5), y)""", """SELECT DATE_ADD('DAY', CAST(5 % 2.5 AS BIGINT), y), DATE_ADD('DAY', CAST(CEIL(5.5) AS BIGINT), y)""", ) self.validate_all( "DATE_ADD('MINUTE', CAST(FLOOR(CAST(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP) AS DOUBLE) / NULLIF(30, 0)) * 30 AS BIGINT), col)", read={ "spark": "TIMESTAMPADD(MINUTE, FLOOR(EXTRACT(MINUTE FROM CURRENT_TIMESTAMP)/30)*30, col)", }, ) self.validate_all( "SELECT WEEK_OF_YEAR(y)", read={ "presto": "SELECT WEEK(y)", }, write={ "spark": "SELECT WEEKOFYEAR(y)", "presto": "SELECT WEEK_OF_YEAR(y)", "trino": "SELECT WEEK_OF_YEAR(y)", }, ) def test_ddl(self): self.validate_all( "CREATE TABLE test WITH (FORMAT = 'PARQUET') AS SELECT 1", write={ "duckdb": "CREATE TABLE test AS SELECT 1", "presto": "CREATE TABLE test WITH (format='PARQUET') AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", "spark": "CREATE TABLE test USING PARQUET AS SELECT 1", }, ) self.validate_all( "CREATE TABLE test STORED AS 'PARQUET' AS SELECT 1", write={ "duckdb": "CREATE TABLE test AS SELECT 1", "presto": "CREATE TABLE test WITH (format='PARQUET') AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", "spark": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", }, ) self.validate_all( "CREATE TABLE test WITH (FORMAT = 'PARQUET', X = '1', Z = '2') AS SELECT 1", write={ "duckdb": "CREATE TABLE test AS SELECT 1", "presto": "CREATE TABLE test WITH (format='PARQUET', X='1', Z='2') AS SELECT 1", "hive": "CREATE TABLE test STORED AS PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1", "spark": "CREATE TABLE test USING PARQUET TBLPROPERTIES ('X'='1', 'Z'='2') AS SELECT 1", }, ) self.validate_all( "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", write={ "duckdb": "CREATE TABLE x (w TEXT, y INT, z INT)", "presto": "CREATE TABLE x (w VARCHAR, y INTEGER, z INTEGER) WITH (PARTITIONED_BY=ARRAY['y', 'z'])", "hive": "CREATE TABLE x (w STRING) PARTITIONED BY (y INT, z INT)", "spark": "CREATE TABLE x (w STRING, y INT, z INT) PARTITIONED BY (y, z)", }, ) self.validate_all( "CREATE TABLE x WITH (bucket_by = ARRAY['y'], bucket_count = 64) AS SELECT 1 AS y", write={ "duckdb": "CREATE TABLE x AS SELECT 1 AS y", "presto": "CREATE TABLE x WITH (bucket_by=ARRAY['y'], bucket_count=64) AS SELECT 1 AS y", "hive": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y", "spark": "CREATE TABLE x TBLPROPERTIES ('bucket_by'=ARRAY('y'), 'bucket_count'=64) AS SELECT 1 AS y", }, ) self.validate_all( "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", write={ "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "hive": "CREATE TABLE db.example_table (col_a STRUCT)", "spark": "CREATE TABLE db.example_table (col_a STRUCT)", }, ) self.validate_all( "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", write={ "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "hive": "CREATE TABLE db.example_table (col_a STRUCT>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT>)", }, ) self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST", }, ) self.validate_all( "CREATE OR REPLACE VIEW x (cola) SELECT 1 as cola", write={ "spark": "CREATE OR REPLACE VIEW x (cola) AS SELECT 1 AS cola", "presto": "CREATE OR REPLACE VIEW x AS SELECT 1 AS cola", }, ) self.validate_all( """CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" TEXT) COMMENT 'comment' WITH (PARTITIONED BY=("ds"))""", write={ "spark": "CREATE TABLE IF NOT EXISTS x (`cola` INT, `ds` STRING) COMMENT 'comment' PARTITIONED BY (`ds`)", "presto": """CREATE TABLE IF NOT EXISTS x ("cola" INTEGER, "ds" VARCHAR) COMMENT 'comment' WITH (PARTITIONED_BY=ARRAY['ds'])""", }, ) self.validate_identity("""CREATE OR REPLACE VIEW v SECURITY DEFINER AS SELECT id FROM t""") self.validate_identity("""CREATE OR REPLACE VIEW v SECURITY INVOKER AS SELECT id FROM t""") def test_quotes(self): self.validate_all( "''''", write={ "duckdb": "''''", "presto": "''''", "hive": "'\\''", "spark": "'\\''", }, ) self.validate_all( "'x'", write={ "duckdb": "'x'", "presto": "'x'", "hive": "'x'", "spark": "'x'", }, ) self.validate_all( "'''x'''", write={ "duckdb": "'''x'''", "presto": "'''x'''", "hive": "'\\'x\\''", "spark": "'\\'x\\''", }, ) self.validate_all( "'''x'", write={ "duckdb": "'''x'", "presto": "'''x'", "hive": "'\\'x'", "spark": "'\\'x'", }, ) self.validate_all( "x IN ('a', 'a''b')", write={ "duckdb": "x IN ('a', 'a''b')", "presto": "x IN ('a', 'a''b')", "hive": "x IN ('a', 'a\\'b')", "spark": "x IN ('a', 'a\\'b')", }, ) def test_unnest(self): self.validate_all( "SELECT a FROM x CROSS JOIN UNNEST(ARRAY(y)) AS t (a)", write={ "presto": "SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a)", "hive": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", "spark": "SELECT a FROM x LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", }, ) self.validate_all( "SELECT a FROM x CROSS JOIN UNNEST(ARRAY(y)) AS t (a) CROSS JOIN b", write={ "presto": "SELECT a FROM x CROSS JOIN UNNEST(ARRAY[y]) AS t(a) CROSS JOIN b", "hive": "SELECT a FROM x CROSS JOIN b LATERAL VIEW EXPLODE(ARRAY(y)) t AS a", }, ) def test_unicode_string(self): for prefix in ("u&", "U&"): self.validate_all( f"{prefix}'Hello winter \\2603 !'", write={ "oracle": "U'Hello winter \\2603 !'", "presto": "U&'Hello winter \\2603 !'", "snowflake": "'Hello winter \\u2603 !'", "spark": "'Hello winter \\u2603 !'", }, ) self.validate_all( f"{prefix}'Hello winter #2603 !' UESCAPE '#'", write={ "oracle": "U'Hello winter \\2603 !'", "presto": "U&'Hello winter #2603 !' UESCAPE '#'", "snowflake": "'Hello winter \\u2603 !'", "spark": "'Hello winter \\u2603 !'", }, ) def test_presto(self): self.assertEqual( exp.func("md5", exp.func("concat", exp.cast("x", "text"), exp.Literal.string("s"))).sql( dialect="presto" ), "LOWER(TO_HEX(MD5(TO_UTF8(CONCAT(CAST(x AS VARCHAR), CAST('s' AS VARCHAR))))))", ) with self.assertLogs(helper_logger): self.validate_all( "SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, '1')]), id), quantity) FROM my_table", write={ "postgres": UnsupportedError, "presto": "SELECT COALESCE(ELEMENT_AT(MAP_FROM_ENTRIES(ARRAY[(51, '1')]), id), quantity) FROM my_table", }, ) self.validate_all( "SELECT ELEMENT_AT(ARRAY[1, 2, 3], 4)", write={ "": "SELECT ARRAY(1, 2, 3)[3]", "bigquery": "SELECT [1, 2, 3][SAFE_ORDINAL(4)]", "postgres": "SELECT (ARRAY[1, 2, 3])[4]", "presto": "SELECT ELEMENT_AT(ARRAY[1, 2, 3], 4)", }, ) self.validate_identity("SELECT a FROM t GROUP BY a, ROLLUP (b), ROLLUP (c), ROLLUP (d)") self.validate_identity("SELECT a FROM test TABLESAMPLE BERNOULLI (50)") self.validate_identity("SELECT a FROM test TABLESAMPLE SYSTEM (75)") self.validate_identity("string_agg(x, ',')", "ARRAY_JOIN(ARRAY_AGG(x), ',')") self.validate_identity("SELECT * FROM x OFFSET 1 LIMIT 1") self.validate_identity("SELECT * FROM x OFFSET 1 FETCH FIRST 1 ROWS ONLY") self.validate_identity("SELECT BOOL_OR(a > 10) FROM asd AS T(a)") # Numeric TRUNCATE self.validate_identity("TRUNCATE(3.14159, 2)").assert_is(exp.Trunc) self.validate_identity("TRUNCATE(3.14159)").assert_is(exp.Trunc) self.validate_all( "TRUNCATE(3.14159, 2)", read={"postgres": "TRUNC(3.14159, 2)"}, ) self.validate_identity("SELECT * FROM (VALUES (1))") self.validate_identity("START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE") self.validate_identity("START TRANSACTION ISOLATION LEVEL REPEATABLE READ") self.validate_identity("APPROX_PERCENTILE(a, b, c, d)") self.validate_identity( "SELECT SPLIT_TO_MAP('a:1;b:2;a:3', ';', ':', (k, v1, v2) -> CONCAT(v1, v2))" ) self.validate_identity( "SELECT * FROM example.testdb.customer_orders FOR VERSION AS OF 8954597067493422955" ) self.validate_identity( "SELECT * FROM example.testdb.customer_orders FOR TIMESTAMP AS OF CAST('2022-03-23 09:59:29.803 Europe/Vienna' AS TIMESTAMP)" ) self.validate_identity( "SELECT origin_state, destination_state, origin_zip, SUM(package_weight) FROM shipping GROUP BY ALL CUBE (origin_state, destination_state), ROLLUP (origin_state, origin_zip)" ) self.validate_identity( "SELECT origin_state, destination_state, origin_zip, SUM(package_weight) FROM shipping GROUP BY DISTINCT CUBE (origin_state, destination_state), ROLLUP (origin_state, origin_zip)" ) self.validate_identity( "SELECT JSON_EXTRACT_SCALAR(CAST(extra AS JSON), '$.value_b'), COUNT(*) FROM table_a GROUP BY DISTINCT (JSON_EXTRACT_SCALAR(CAST(extra AS JSON), '$.value_b'))" ) self.validate_all( "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))", read={ "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", }, write={ "duckdb": "SELECT LAST_DAY(CAST('2008-11-25' AS DATE))", "presto": "SELECT LAST_DAY_OF_MONTH(CAST('2008-11-25' AS DATE))", }, ) self.validate_all( "SELECT MAX_BY(a.id, a.timestamp) FROM a", read={ "bigquery": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "clickhouse": "SELECT argMax(a.id, a.timestamp) FROM a", "duckdb": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "snowflake": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "spark": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "teradata": "SELECT MAX_BY(a.id, a.timestamp) FROM a", }, write={ "bigquery": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "clickhouse": "SELECT argMax(a.id, a.timestamp) FROM a", "duckdb": "SELECT ARG_MAX(a.id, a.timestamp) FROM a", "presto": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "snowflake": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "spark": "SELECT MAX_BY(a.id, a.timestamp) FROM a", "teradata": "SELECT MAX_BY(a.id, a.timestamp) FROM a", }, ) self.validate_all( "SELECT MIN_BY(a.id, a.timestamp, 3) FROM a", write={ "clickhouse": "SELECT argMin(a.id, a.timestamp) FROM a", "duckdb": "SELECT ARG_MIN(a.id, a.timestamp, 3) FROM a", "presto": "SELECT MIN_BY(a.id, a.timestamp, 3) FROM a", "snowflake": "SELECT MIN_BY(a.id, a.timestamp, 3) FROM a", "spark": "SELECT MIN_BY(a.id, a.timestamp) FROM a", "teradata": "SELECT MIN_BY(a.id, a.timestamp, 3) FROM a", }, ) self.validate_all( """JSON '"foo"'""", write={ "bigquery": """PARSE_JSON('"foo"')""", "postgres": """CAST('"foo"' AS JSON)""", "presto": """JSON_PARSE('"foo"')""", "snowflake": """PARSE_JSON('"foo"')""", }, ) self.validate_all( "SELECT ROW(1, 2)", write={ "presto": "SELECT ROW(1, 2)", "spark": "SELECT STRUCT(1, 2)", }, ) self.validate_all( "ARBITRARY(x)", read={ "bigquery": "ANY_VALUE(x)", "clickhouse": "any(x)", "databricks": "ANY_VALUE(x)", "doris": "ANY_VALUE(x)", "drill": "ANY_VALUE(x)", "hive": "FIRST(x)", "mysql": "ANY_VALUE(x)", "oracle": "ANY_VALUE(x)", "redshift": "ANY_VALUE(x)", "snowflake": "ANY_VALUE(x)", "spark": "ANY_VALUE(x)", "spark2": "FIRST(x)", }, write={ "bigquery": "ANY_VALUE(x)", "clickhouse": "any(x)", "databricks": "ANY_VALUE(x)", "doris": "ANY_VALUE(x)", "drill": "ANY_VALUE(x)", "duckdb": "ANY_VALUE(x)", "hive": "FIRST(x)", "mysql": "ANY_VALUE(x)", "oracle": "ANY_VALUE(x)", "postgres": "ANY_VALUE(x)", "presto": "ARBITRARY(x)", "redshift": "ANY_VALUE(x)", "snowflake": "ANY_VALUE(x)", "spark": "ANY_VALUE(x)", "spark2": "FIRST(x)", "sqlite": "MAX(x)", "tsql": "MAX(x)", }, ) self.validate_all( "STARTS_WITH('abc', 'a')", read={"spark": "STARTSWITH('abc', 'a')"}, write={ "presto": "STARTS_WITH('abc', 'a')", "snowflake": "STARTSWITH('abc', 'a')", "spark": "STARTSWITH('abc', 'a')", }, ) self.validate_all( "IS_NAN(x)", read={ "spark": "ISNAN(x)", }, write={ "presto": "IS_NAN(x)", "spark": "ISNAN(x)", "spark2": "ISNAN(x)", }, ) self.validate_all("VALUES 1, 2, 3", write={"presto": "VALUES (1), (2), (3)"}) self.validate_all("INTERVAL '1 day'", write={"trino": "INTERVAL '1' DAY"}) self.validate_all("(5 * INTERVAL '7' DAY)", read={"": "INTERVAL '5' WEEK"}) self.validate_all("(5 * INTERVAL '7' DAY)", read={"": "INTERVAL '5' WEEKS"}) self.validate_all( "SELECT SUBSTRING(a, 1, 3), SUBSTRING(a, LENGTH(a) - (3 - 1))", read={ "redshift": "SELECT LEFT(a, 3), RIGHT(a, 3)", }, ) self.validate_all( "WITH RECURSIVE t(n) AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) FROM t", read={ "postgres": "WITH RECURSIVE t AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) FROM t", }, ) self.validate_all( "WITH RECURSIVE t(n, k) AS (SELECT 1 AS n, 2 AS k) SELECT SUM(n) FROM t", read={ "postgres": "WITH RECURSIVE t AS (SELECT 1 AS n, 2 as k) SELECT SUM(n) FROM t", }, ) self.validate_all( "WITH RECURSIVE t1(n) AS (SELECT 1 AS n), t2(n) AS (SELECT 2 AS n) SELECT SUM(t1.n), SUM(t2.n) FROM t1, t2", read={ "postgres": "WITH RECURSIVE t1 AS (SELECT 1 AS n), t2 AS (SELECT 2 AS n) SELECT SUM(t1.n), SUM(t2.n) FROM t1, t2", }, ) self.validate_all( "WITH RECURSIVE t(n, _c_0) AS (SELECT 1 AS n, (1 + 2)) SELECT * FROM t", read={ "postgres": "WITH RECURSIVE t AS (SELECT 1 AS n, (1 + 2)) SELECT * FROM t", }, ) self.validate_all( 'WITH RECURSIVE t(n, "1") AS (SELECT n, 1 FROM tbl) SELECT * FROM t', read={ "postgres": "WITH RECURSIVE t AS (SELECT n, 1 FROM tbl) SELECT * FROM t", }, ) self.validate_all( "SELECT JSON_OBJECT(KEY 'key1' VALUE 1, KEY 'key2' VALUE TRUE)", write={ "presto": "SELECT JSON_OBJECT('key1': 1, 'key2': TRUE)", }, ) self.validate_all( "ARRAY_AGG(x ORDER BY y DESC)", write={ "hive": "COLLECT_LIST(x)", "presto": "ARRAY_AGG(x ORDER BY y DESC)", "spark": "COLLECT_LIST(x)", "trino": "ARRAY_AGG(x ORDER BY y DESC)", }, ) self.validate_all( 'SELECT a."b" FROM "foo"', write={ "duckdb": 'SELECT a."b" FROM "foo"', "presto": 'SELECT a."b" FROM "foo"', "spark": "SELECT a.`b` FROM `foo`", }, ) self.validate_all( "SELECT ARRAY[1, 2]", write={ "bigquery": "SELECT [1, 2]", "duckdb": "SELECT [1, 2]", "presto": "SELECT ARRAY[1, 2]", "spark": "SELECT ARRAY(1, 2)", }, ) self.validate_all( "SELECT APPROX_DISTINCT(a) FROM foo", write={ "duckdb": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", "presto": "SELECT APPROX_DISTINCT(a) FROM foo", "hive": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", "spark": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", }, ) self.validate_all( "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", write={ "duckdb": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", "presto": "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", "hive": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", "spark": "SELECT APPROX_COUNT_DISTINCT(a, 0.1) FROM foo", }, ) self.validate_all( "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", write={ "presto": "SELECT APPROX_DISTINCT(a, 0.1) FROM foo", "hive": UnsupportedError, "spark": "SELECT APPROX_COUNT_DISTINCT(a, 0.1) FROM foo", }, ) self.validate_all( "SELECT JSON_EXTRACT(x, '$.name')", write={ "presto": "SELECT JSON_EXTRACT(x, '$.name')", "hive": "SELECT GET_JSON_OBJECT(x, '$.name')", "spark": "SELECT GET_JSON_OBJECT(x, '$.name')", }, ) self.validate_all( "SELECT JSON_EXTRACT_SCALAR(x, '$.name')", write={ "presto": "SELECT JSON_EXTRACT_SCALAR(x, '$.name')", "hive": "SELECT GET_JSON_OBJECT(x, '$.name')", "spark": "SELECT GET_JSON_OBJECT(x, '$.name')", }, ) self.validate_all( "'\u6bdb'", write={ "presto": "'\u6bdb'", "hive": "'\u6bdb'", "spark": "'\u6bdb'", }, ) self.validate_all( "SELECT ARRAY_SORT(x, (left, right) -> -1)", write={ "duckdb": "SELECT ARRAY_SORT(x)", "presto": 'SELECT ARRAY_SORT(x, ("left", "right") -> -1)', "hive": "SELECT SORT_ARRAY(x)", "spark": "SELECT ARRAY_SORT(x, (left, right) -> -1)", }, ) self.validate_all( "SELECT ARRAY_SORT(x)", write={ "presto": "SELECT ARRAY_SORT(x)", "hive": "SELECT SORT_ARRAY(x)", "spark": "SELECT ARRAY_SORT(x)", }, ) self.validate_all( "SELECT ARRAY_SORT(x, (left, right) -> -1)", write={ "hive": UnsupportedError, }, ) self.validate_all( "MAP(a, b)", write={ "hive": UnsupportedError, "spark": "MAP_FROM_ARRAYS(a, b)", "snowflake": UnsupportedError, }, ) self.validate_all( "MAP(ARRAY(a, b), ARRAY(c, d))", write={ "hive": "MAP(a, c, b, d)", "presto": "MAP(ARRAY[a, b], ARRAY[c, d])", "spark": "MAP_FROM_ARRAYS(ARRAY(a, b), ARRAY(c, d))", "snowflake": "OBJECT_CONSTRUCT(a, c, b, d)", }, ) self.validate_all( "MAP(ARRAY('a'), ARRAY('b'))", write={ "hive": "MAP('a', 'b')", "presto": "MAP(ARRAY['a'], ARRAY['b'])", "spark": "MAP_FROM_ARRAYS(ARRAY('a'), ARRAY('b'))", "snowflake": "OBJECT_CONSTRUCT('a', 'b')", }, ) self.validate_all( "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x", write={ "bigquery": "SELECT * FROM UNNEST(['7', '14'])", "presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x", "hive": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x", "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x", }, ) self.validate_all( "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x(y)", write={ "bigquery": "SELECT * FROM UNNEST(['7', '14']) AS y", "presto": "SELECT * FROM UNNEST(ARRAY['7', '14']) AS x(y)", "hive": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x(y)", "spark": "SELECT * FROM EXPLODE(ARRAY('7', '14')) AS x(y)", }, ) self.validate_all( "WITH RECURSIVE t(n) AS (VALUES (1) UNION ALL SELECT n+1 FROM t WHERE n < 100 ) SELECT SUM(n) FROM t", write={ "presto": "WITH RECURSIVE t(n) AS (VALUES (1) UNION ALL SELECT n + 1 FROM t WHERE n < 100) SELECT SUM(n) FROM t", "spark": "WITH RECURSIVE t(n) AS (VALUES (1) UNION ALL SELECT n + 1 FROM t WHERE n < 100) SELECT SUM(n) FROM t", }, ) self.validate_all( "SELECT a, b, c, d, sum(y) FROM z GROUP BY CUBE(a) ROLLUP(a), GROUPING SETS((b, c)), d", write={ "presto": "SELECT a, b, c, d, SUM(y) FROM z GROUP BY d, GROUPING SETS ((b, c)), CUBE (a), ROLLUP (a)", "hive": "SELECT a, b, c, d, SUM(y) FROM z GROUP BY d, GROUPING SETS ((b, c)), CUBE (a), ROLLUP (a)", }, ) self.validate_all( "JSON_FORMAT(CAST(MAP_FROM_ENTRIES(ARRAY[('action_type', 'at')]) AS JSON))", write={ "presto": "JSON_FORMAT(CAST(MAP_FROM_ENTRIES(ARRAY[('action_type', 'at')]) AS JSON))", "spark": "TO_JSON(MAP_FROM_ENTRIES(ARRAY(('action_type', 'at'))))", }, ) self.validate_all( "JSON_FORMAT(x)", write={ "bigquery": "TO_JSON_STRING(x)", "duckdb": "CAST(TO_JSON(x) AS TEXT)", "presto": "JSON_FORMAT(x)", "spark": "TO_JSON(x)", }, ) self.validate_all( """JSON_FORMAT(JSON '"x"')""", write={ "bigquery": """TO_JSON_STRING(PARSE_JSON('"x"'))""", "duckdb": """CAST(TO_JSON(JSON('"x"')) AS TEXT)""", "presto": """JSON_FORMAT(JSON_PARSE('"x"'))""", "spark": """REGEXP_EXTRACT(TO_JSON(FROM_JSON('["x"]', SCHEMA_OF_JSON('["x"]'))), '^.(.*).$', 1)""", }, ) self.validate_all( """SELECT JSON_FORMAT(JSON '{"a": 1, "b": "c"}')""", write={ "spark": """SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON('[{"a": 1, "b": "c"}]', SCHEMA_OF_JSON('[{"a": 1, "b": "c"}]'))), '^.(.*).$', 1)""", }, ) self.validate_all( """SELECT JSON_FORMAT(JSON '[1, 2, 3]')""", write={ "spark": "SELECT REGEXP_EXTRACT(TO_JSON(FROM_JSON('[[1, 2, 3]]', SCHEMA_OF_JSON('[[1, 2, 3]]'))), '^.(.*).$', 1)", }, ) self.validate_all( "REGEXP_EXTRACT('abc', '(a)(b)(c)')", read={ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "snowflake": "REGEXP_SUBSTR('abc', '(a)(b)(c)')", }, write={ "presto": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "trino": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "duckdb": "REGEXP_EXTRACT('abc', '(a)(b)(c)')", "snowflake": "REGEXP_SUBSTR('abc', '(a)(b)(c)')", "hive": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)", "spark2": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)", "spark": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)", "databricks": "REGEXP_EXTRACT('abc', '(a)(b)(c)', 0)", }, ) self.validate_all( "CURRENT_USER", read={ "presto": "CURRENT_USER", "trino": "CURRENT_USER", "snowflake": "CURRENT_USER()", # Although the ANSI standard is CURRENT_USER }, write={ "trino": "CURRENT_USER", "snowflake": "CURRENT_USER()", }, ) self.validate_identity( "SELECT id, FIRST_VALUE(is_deleted) OVER (PARTITION BY id) AS first_is_deleted, NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted, LAST_VALUE(is_deleted) OVER (PARTITION BY id) AS last_is_deleted FROM my_table" ) self.validate_all( "SELECT NULLABLE FROM system.jdbc.types", read={ "presto": "SELECT NULLABLE FROM system.jdbc.types", "trino": "SELECT NULLABLE FROM system.jdbc.types", }, ) self.validate_identity( "SELECT * FROM foo FOR TIMESTAMP AS OF CAST('2020-01-01 00:00:00' AS TIMESTAMP) AS bar" ) def test_encode_decode(self): self.validate_identity("FROM_UTF8(x, y)") self.validate_all( "TO_UTF8(x)", read={ "duckdb": "ENCODE(x)", "spark": "ENCODE(x, 'utf-8')", }, write={ "duckdb": "ENCODE(x)", "presto": "TO_UTF8(x)", "spark": "ENCODE(x, 'utf-8')", }, ) self.validate_all( "FROM_UTF8(x)", read={ "duckdb": "DECODE(x)", "spark": "DECODE(x, 'utf-8')", }, write={ "duckdb": "DECODE(x)", "presto": "FROM_UTF8(x)", "spark": "DECODE(x, 'utf-8')", }, ) self.validate_all( "ENCODE(x, 'invalid')", write={ "presto": UnsupportedError, "duckdb": UnsupportedError, }, ) self.validate_all( "DECODE(x, 'invalid')", write={ "presto": UnsupportedError, "duckdb": UnsupportedError, }, ) def test_hex_unhex(self): self.validate_all( "TO_HEX(x)", write={ "spark": "HEX(x)", }, ) self.validate_all( "FROM_HEX(x)", write={ "spark": "UNHEX(x)", }, ) self.validate_all( "HEX(x)", write={ "presto": "TO_HEX(x)", }, ) self.validate_all( "UNHEX(x)", write={ "presto": "FROM_HEX(x)", }, ) def test_json(self): with self.assertLogs(helper_logger): self.validate_all( """SELECT JSON_EXTRACT_SCALAR(TRY(FILTER(CAST(JSON_EXTRACT('{"k1": [{"k2": "{\\"k3\\": 1}", "k4": "v"}]}', '$.k1') AS ARRAY(MAP(VARCHAR, VARCHAR))), x -> x['k4'] = 'v')[1]['k2']), '$.k3')""", write={ "presto": """SELECT JSON_EXTRACT_SCALAR(TRY(FILTER(CAST(JSON_EXTRACT('{"k1": [{"k2": "{\\"k3\\": 1}", "k4": "v"}]}', '$.k1') AS ARRAY(MAP(VARCHAR, VARCHAR))), x -> x['k4'] = 'v')[1]['k2']), '$.k3')""", "spark": """SELECT GET_JSON_OBJECT(FILTER(FROM_JSON(GET_JSON_OBJECT('{"k1": [{"k2": "{\\\\"k3\\\\": 1}", "k4": "v"}]}', '$.k1'), 'ARRAY>'), x -> x['k4'] = 'v')[0]['k2'], '$.k3')""", }, ) self.validate_all( "SELECT CAST(JSON '[1,23,456]' AS ARRAY(INTEGER))", write={ "spark": "SELECT FROM_JSON('[1,23,456]', 'ARRAY')", "presto": "SELECT CAST(JSON_PARSE('[1,23,456]') AS ARRAY(INTEGER))", }, ) self.validate_all( """SELECT CAST(JSON '{"k1":1,"k2":23,"k3":456}' AS MAP(VARCHAR, INTEGER))""", write={ "spark": 'SELECT FROM_JSON(\'{"k1":1,"k2":23,"k3":456}\', \'MAP\')', "presto": 'SELECT CAST(JSON_PARSE(\'{"k1":1,"k2":23,"k3":456}\') AS MAP(VARCHAR, INTEGER))', }, ) self.validate_all( "SELECT CAST(ARRAY [1, 23, 456] AS JSON)", write={ "spark": "SELECT TO_JSON(ARRAY(1, 23, 456))", "presto": "SELECT CAST(ARRAY[1, 23, 456] AS JSON)", }, ) def test_match_recognize(self): self.validate_identity( """SELECT * FROM orders MATCH_RECOGNIZE ( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price, LAST(B.totalprice) AS bottom_price, LAST(C.totalprice) AS top_price ONE ROW PER MATCH AFTER MATCH SKIP PAST LAST ROW PATTERN (A B+ C+ D+) DEFINE B AS totalprice < PREV(totalprice), C AS totalprice > PREV(totalprice) AND totalprice <= A.totalprice, D AS totalprice > PREV(totalprice), E AS MAX(foo) >= NEXT(bar) )""", pretty=True, ) def test_to_char(self): self.validate_all( "TO_CHAR(ts, 'dd')", write={ "bigquery": "FORMAT_DATE('%d', ts)", "presto": "DATE_FORMAT(ts, '%d')", }, ) self.validate_all( "TO_CHAR(ts, 'hh')", write={ "bigquery": "FORMAT_DATE('%H', ts)", "presto": "DATE_FORMAT(ts, '%H')", }, ) self.validate_all( "TO_CHAR(ts, 'hh24')", write={ "bigquery": "FORMAT_DATE('%H', ts)", "presto": "DATE_FORMAT(ts, '%H')", }, ) self.validate_all( "TO_CHAR(ts, 'mi')", write={ "bigquery": "FORMAT_DATE('%M', ts)", "presto": "DATE_FORMAT(ts, '%i')", }, ) self.validate_all( "TO_CHAR(ts, 'mm')", write={ "bigquery": "FORMAT_DATE('%m', ts)", "presto": "DATE_FORMAT(ts, '%m')", }, ) self.validate_all( "TO_CHAR(ts, 'ss')", write={ "bigquery": "FORMAT_DATE('%S', ts)", "presto": "DATE_FORMAT(ts, '%s')", }, ) self.validate_all( "TO_CHAR(ts, 'yyyy')", write={ "bigquery": "FORMAT_DATE('%Y', ts)", "presto": "DATE_FORMAT(ts, '%Y')", }, ) self.validate_all( "TO_CHAR(ts, 'yy')", write={ "bigquery": "FORMAT_DATE('%y', ts)", "presto": "DATE_FORMAT(ts, '%y')", }, ) def test_signum(self): self.validate_all( "SIGN(x)", read={ "presto": "SIGN(x)", "spark": "SIGNUM(x)", "starrocks": "SIGN(x)", }, write={ "presto": "SIGN(x)", "spark": "SIGN(x)", "starrocks": "SIGN(x)", }, ) def test_json_vs_row_extract(self): for dialect in ("trino", "presto"): s = parse_one('SELECT col:x:y."special string"', read="snowflake") dialect_json_extract_setting = f"{dialect}, variant_extract_is_json_extract=True" dialect_row_access_setting = f"{dialect}, variant_extract_is_json_extract=False" # By default, Snowflake VARIANT will generate JSON_EXTRACT() in Presto/Trino json_extract_result = """SELECT JSON_EXTRACT(col, '$.x.y["special string"]')""" self.assertEqual(s.sql(dialect), json_extract_result) self.assertEqual(s.sql(dialect_json_extract_setting), json_extract_result) # If the setting is overriden to False, then generate ROW access (dot notation) self.assertEqual(s.sql(dialect_row_access_setting), 'SELECT col.x.y."special string"') def test_analyze(self): self.validate_identity("ANALYZE tbl") self.validate_identity("ANALYZE tbl WITH (prop1=val1, prop2=val2)") def test_bit_aggs(self): self.validate_all( "BITWISE_AND_AGG(x)", read={ "presto": "BITWISE_AND_AGG(x)", "trino": "BITWISE_AND_AGG(x)", "oracle": "BITWISE_AND_AGG(x)", }, ) self.validate_all( "BITWISE_OR_AGG(x)", read={ "presto": "BITWISE_OR_AGG(x)", "trino": "BITWISE_OR_AGG(x)", "oracle": "BITWISE_OR_AGG(x)", }, ) self.validate_all( "BITWISE_XOR_AGG(x)", read={ "presto": "BITWISE_XOR_AGG(x)", "trino": "BITWISE_XOR_AGG(x)", "oracle": "BITWISE_XOR_AGG(x)", }, ) def test_initcap(self): self.validate_all( "INITCAP(col)", write={ "presto": "REGEXP_REPLACE(col, '(\\w)(\\w*)', x -> UPPER(x[1]) || LOWER(x[2]))", }, ) ================================================ FILE: tests/dialects/test_prql.py ================================================ from tests.dialects.test_dialect import Validator class TestPRQL(Validator): dialect = "prql" def test_prql(self): self.validate_all( "from x", write={ "": "SELECT * FROM x", }, ) self.validate_all( "from x derive a + 1", write={ "": "SELECT *, a + 1 FROM x", }, ) self.validate_all( "from x derive x = a + 1", write={ "": "SELECT *, a + 1 AS x FROM x", }, ) self.validate_all( "from x derive {a + 1}", write={ "": "SELECT *, a + 1 FROM x", }, ) self.validate_all( "from x derive {x = a + 1, b}", write={ "": "SELECT *, a + 1 AS x, b FROM x", }, ) self.validate_all( "from x derive {x = a + 1, b} select {y = x, 2}", write={"": "SELECT a + 1 AS y, 2 FROM x"}, ) self.validate_all( "from x take 10", write={ "": "SELECT * FROM x LIMIT 10", }, ) self.validate_all( "from x take 10 take 5", write={ "": "SELECT * FROM x LIMIT 5", }, ) self.validate_all( "from x filter age > 25", write={ "": "SELECT * FROM x WHERE age > 25", }, ) self.validate_all( "from x derive {x = a + 1, b} filter age > 25", write={ "": "SELECT *, a + 1 AS x, b FROM x WHERE age > 25", }, ) self.validate_all( "from x filter dept != 'IT'", write={ "": "SELECT * FROM x WHERE dept <> 'IT'", }, ) self.validate_all( "from x filter p == 'product' select { a, b }", write={"": "SELECT a, b FROM x WHERE p = 'product'"}, ) self.validate_all( "from x filter age > 25 filter age < 27", write={"": "SELECT * FROM x WHERE age > 25 AND age < 27"}, ) self.validate_all( "from x filter (age > 25 && age < 27)", write={"": "SELECT * FROM x WHERE (age > 25 AND age < 27)"}, ) self.validate_all( "from x filter (age > 25 || age < 27)", write={"": "SELECT * FROM x WHERE (age > 25 OR age < 27)"}, ) self.validate_all( "from x filter (age > 25 || age < 22) filter age > 26 filter age < 27", write={ "": "SELECT * FROM x WHERE ((age > 25 OR age < 22) AND age > 26) AND age < 27", }, ) self.validate_all( "from x sort age", write={ "": "SELECT * FROM x ORDER BY age", }, ) self.validate_all( "from x sort {-age}", write={ "": "SELECT * FROM x ORDER BY age DESC", }, ) self.validate_all( "from x sort {age, name}", write={ "": "SELECT * FROM x ORDER BY age, name", }, ) self.validate_all( "from x sort {-age, +name}", write={ "": "SELECT * FROM x ORDER BY age DESC, name", }, ) self.validate_all( "from x append y", write={ "": "SELECT * FROM x UNION ALL SELECT * FROM y", }, ) self.validate_all( "from x remove y", write={ "": "SELECT * FROM x EXCEPT ALL SELECT * FROM y", }, ) self.validate_all( "from x intersect y", write={"": "SELECT * FROM x INTERSECT ALL SELECT * FROM y"}, ) self.validate_all( "from x filter a == null filter null != b", write={ "": "SELECT * FROM x WHERE a IS NULL AND NOT b IS NULL", }, ) self.validate_all( "from x filter (a > 1 || null != b || c != null)", write={ "": "SELECT * FROM x WHERE (a > 1 OR NOT b IS NULL OR NOT c IS NULL)", }, ) self.validate_all( "from a aggregate { average x }", write={ "": "SELECT AVG(x) FROM a", }, ) self.validate_all( "from a aggregate { average x, min y, ct = sum z }", write={ "": "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) AS ct FROM a", }, ) self.validate_all( "from a aggregate { average x, min y, sum z }", write={ "": "SELECT AVG(x), MIN(y), COALESCE(SUM(z), 0) FROM a", }, ) self.validate_all( "from a aggregate { min y, b = stddev x, max z }", write={ "": "SELECT MIN(y), STDDEV(x) AS b, MAX(z) FROM a", }, ) ================================================ FILE: tests/dialects/test_redshift.py ================================================ from sqlglot import exp, ParseError, parse_one, transpile from sqlglot.optimizer.annotate_types import annotate_types from tests.dialects.test_dialect import Validator class TestRedshift(Validator): dialect = "redshift" def test_redshift(self): self.validate_identity("SELECT COSH(1.5)") self.validate_identity( "ROUND(CAST(a AS DOUBLE PRECISION) / CAST(b AS DOUBLE PRECISION), 2)" ) self.validate_all( "SELECT SPLIT_TO_ARRAY('12,345,6789')", write={ "postgres": "SELECT STRING_TO_ARRAY('12,345,6789', ',')", "redshift": "SELECT SPLIT_TO_ARRAY('12,345,6789', ',')", }, ) self.validate_all( "GETDATE()", read={ "duckdb": "CURRENT_TIMESTAMP", }, write={ "duckdb": "CURRENT_TIMESTAMP", "redshift": "GETDATE()", }, ) self.validate_all( """SELECT JSON_EXTRACT_PATH_TEXT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', 'farm', 'barn', 'color')""", write={ "bigquery": """SELECT JSON_EXTRACT_SCALAR('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""", "databricks": """SELECT '{ "farm": {"barn": { "color": "red", "feed stocked": true }}}':farm.barn.color""", "duckdb": """SELECT '{ "farm": {"barn": { "color": "red", "feed stocked": true }}}' ->> '$.farm.barn.color'""", "postgres": """SELECT JSON_EXTRACT_PATH_TEXT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', 'farm', 'barn', 'color')""", "presto": """SELECT JSON_EXTRACT_SCALAR('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""", "redshift": """SELECT JSON_EXTRACT_PATH_TEXT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', 'farm', 'barn', 'color')""", "spark": """SELECT GET_JSON_OBJECT('{ "farm": {"barn": { "color": "red", "feed stocked": true }}}', '$.farm.barn.color')""", "sqlite": """SELECT '{ "farm": {"barn": { "color": "red", "feed stocked": true }}}' ->> '$.farm.barn.color'""", }, ) self.validate_all( "LISTAGG(sellerid, ', ')", read={ "duckdb": "STRING_AGG(sellerid, ', ')", "databricks": "STRING_AGG(sellerid, ', ')", }, write={ # GROUP_CONCAT, LISTAGG and STRING_AGG are aliases in DuckDB "duckdb": "LISTAGG(sellerid, ', ')", "redshift": "LISTAGG(sellerid, ', ')", "spark, version=3.0.0": "ARRAY_JOIN(COLLECT_LIST(sellerid), ', ')", "spark, version=4.0.0": "LISTAGG(sellerid, ', ')", "spark": "LISTAGG(sellerid, ', ')", "databricks": "LISTAGG(sellerid, ', ')", }, ) self.validate_all( "SELECT APPROXIMATE COUNT(DISTINCT y)", read={ "spark": "SELECT APPROX_COUNT_DISTINCT(y)", }, write={ "redshift": "SELECT APPROXIMATE COUNT(DISTINCT y)", "spark": "SELECT APPROX_COUNT_DISTINCT(y)", }, ) self.validate_all( "x ~* 'pat'", write={ "redshift": "x ~* 'pat'", "snowflake": "REGEXP_LIKE(x, 'pat', 'i')", }, ) self.validate_all( "SELECT CAST('01:03:05.124' AS TIME(2) WITH TIME ZONE)", read={ "postgres": "SELECT CAST('01:03:05.124' AS TIMETZ(2))", }, write={ "postgres": "SELECT CAST('01:03:05.124' AS TIMETZ(2))", "redshift": "SELECT CAST('01:03:05.124' AS TIME(2) WITH TIME ZONE)", }, ) self.validate_all( "SELECT CAST('2020-02-02 01:03:05.124' AS TIMESTAMP(2) WITH TIME ZONE)", read={ "postgres": "SELECT CAST('2020-02-02 01:03:05.124' AS TIMESTAMPTZ(2))", }, write={ "postgres": "SELECT CAST('2020-02-02 01:03:05.124' AS TIMESTAMPTZ(2))", "redshift": "SELECT CAST('2020-02-02 01:03:05.124' AS TIMESTAMP(2) WITH TIME ZONE)", }, ) self.validate_all( "SELECT INTERVAL '5 DAYS'", read={ "": "SELECT INTERVAL '5' days", }, ) self.validate_all( "SELECT ADD_MONTHS('2008-03-31', 1)", write={ "bigquery": "SELECT DATE_ADD(CAST('2008-03-31' AS DATETIME), INTERVAL 1 MONTH)", "duckdb": "SELECT CAST('2008-03-31' AS TIMESTAMP) + INTERVAL 1 MONTH", "redshift": "SELECT DATEADD(MONTH, 1, '2008-03-31')", "trino": "SELECT DATE_ADD('MONTH', 1, CAST('2008-03-31' AS TIMESTAMP))", "tsql": "SELECT DATEADD(MONTH, 1, CAST('2008-03-31' AS DATETIME2))", }, ) self.validate_all( "SELECT STRTOL('abc', 16)", read={ "trino": "SELECT FROM_BASE('abc', 16)", }, write={ "redshift": "SELECT STRTOL('abc', 16)", "trino": "SELECT FROM_BASE('abc', 16)", }, ) self.validate_all( "SELECT SNAPSHOT, type", write={ "": "SELECT SNAPSHOT, type", "redshift": 'SELECT "SNAPSHOT", "type"', }, ) self.validate_all( "x is true", write={ "redshift": "x IS TRUE", "presto": "x", }, ) self.validate_all( "x is false", write={ "redshift": "x IS FALSE", "presto": "NOT x", }, ) self.validate_all( "x is not false", write={ "redshift": "NOT x IS FALSE", "presto": "NOT NOT x", }, ) self.validate_all( "LEN(x)", write={ "redshift": "LENGTH(x)", "presto": "LENGTH(x)", }, ) self.validate_all( "x LIKE 'abc' || '%'", read={ "duckdb": "STARTS_WITH(x, 'abc')", }, write={ "redshift": "x LIKE 'abc' || '%'", }, ) self.validate_all( "SELECT SYSDATE", write={ "": "SELECT CURRENT_TIMESTAMP()", "postgres": "SELECT CURRENT_TIMESTAMP", "redshift": "SELECT SYSDATE", }, ) self.validate_all( "SELECT DATE_PART(minute, timestamp '2023-01-04 04:05:06.789')", write={ "postgres": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", "redshift": "SELECT EXTRACT(minute FROM CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", "snowflake": "SELECT DATE_PART(minute, CAST('2023-01-04 04:05:06.789' AS TIMESTAMP))", }, ) self.validate_all( "SELECT DATE_PART(month, date '20220502')", write={ "postgres": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", "redshift": "SELECT EXTRACT(month FROM CAST('20220502' AS DATE))", "snowflake": "SELECT DATE_PART(month, CAST('20220502' AS DATE))", }, ) self.validate_all( 'create table "group" ("col" char(10))', write={ "redshift": 'CREATE TABLE "group" ("col" CHAR(10))', "mysql": "CREATE TABLE `group` (`col` CHAR(10))", }, ) self.validate_all( 'create table if not exists city_slash_id("city/id" integer not null, state char(2) not null)', write={ "redshift": 'CREATE TABLE IF NOT EXISTS city_slash_id ("city/id" INTEGER NOT NULL, state CHAR(2) NOT NULL)', "presto": 'CREATE TABLE IF NOT EXISTS city_slash_id ("city/id" INTEGER NOT NULL, state CHAR(2) NOT NULL)', }, ) self.validate_all( "SELECT ST_AsEWKT(ST_GeomFromEWKT('SRID=4326;POINT(10 20)')::geography)", write={ "redshift": "SELECT ST_ASEWKT(CAST(ST_GEOMFROMEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))", "bigquery": "SELECT ST_AsEWKT(CAST(ST_GeomFromEWKT('SRID=4326;POINT(10 20)') AS GEOGRAPHY))", }, ) self.validate_all( "SELECT ST_AsEWKT(ST_GeogFromText('LINESTRING(110 40, 2 3, -10 80, -7 9)')::geometry)", write={ "redshift": "SELECT ST_ASEWKT(CAST(ST_GEOGFROMTEXT('LINESTRING(110 40, 2 3, -10 80, -7 9)') AS GEOMETRY))", }, ) self.validate_all( "SELECT 'abc'::BINARY", write={ "redshift": "SELECT CAST('abc' AS VARBYTE)", }, ) self.validate_all( "CREATE TABLE a (b BINARY VARYING(10))", write={ "redshift": "CREATE TABLE a (b VARBYTE(10))", }, ) self.validate_all( "SELECT 'abc'::CHARACTER", write={ "redshift": "SELECT CAST('abc' AS CHAR)", }, ) self.validate_all( "SELECT DISTINCT ON (a) a, b FROM x ORDER BY c DESC", write={ "bigquery": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "databricks": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "drill": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "hive": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "mysql": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY CASE WHEN c IS NULL THEN 1 ELSE 0 END DESC, c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", "oracle": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) _t WHERE _row_number = 1", "presto": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "redshift": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", "snowflake": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", "spark": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "sqlite": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "starrocks": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY CASE WHEN c IS NULL THEN 1 ELSE 0 END DESC, c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", "tableau": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "teradata": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "trino": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC NULLS FIRST) AS _row_number FROM x) AS _t WHERE _row_number = 1", "tsql": "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY CASE WHEN c IS NULL THEN 1 ELSE 0 END DESC, c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", }, ) self.validate_all( "DECODE(x, a, b, c, d)", write={ "": "DECODE(x, a, b, c, d)", "duckdb": "CASE WHEN x = a OR (x IS NULL AND a IS NULL) THEN b WHEN x = c OR (x IS NULL AND c IS NULL) THEN d END", "oracle": "DECODE(x, a, b, c, d)", "redshift": "DECODE(x, a, b, c, d)", "snowflake": "DECODE(x, a, b, c, d)", "spark": "DECODE(x, a, b, c, d)", }, ) self.validate_all( "NVL(a, b, c, d)", write={ "redshift": "COALESCE(a, b, c, d)", "mysql": "COALESCE(a, b, c, d)", "postgres": "COALESCE(a, b, c, d)", }, ) self.validate_identity( "DATEDIFF(days, a, b)", "DATEDIFF(DAY, a, b)", ) self.validate_all( "DATEDIFF('day', a, b)", write={ "bigquery": "DATE_DIFF(CAST(b AS DATETIME), CAST(a AS DATETIME), DAY)", "duckdb": "DATE_DIFF('DAY', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))", "hive": "DATEDIFF(b, a)", "redshift": "DATEDIFF(DAY, a, b)", "presto": "DATE_DIFF('DAY', CAST(a AS TIMESTAMP), CAST(b AS TIMESTAMP))", }, ) self.validate_all( "SELECT DATEADD(month, 18, '2008-02-28')", write={ "bigquery": "SELECT DATE_ADD(CAST('2008-02-28' AS DATETIME), INTERVAL 18 MONTH)", "duckdb": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL 18 MONTH", "hive": "SELECT ADD_MONTHS('2008-02-28', 18)", "mysql": "SELECT DATE_ADD('2008-02-28', INTERVAL 18 MONTH)", "postgres": "SELECT CAST('2008-02-28' AS TIMESTAMP) + INTERVAL '18 MONTH'", "presto": "SELECT DATE_ADD('MONTH', 18, CAST('2008-02-28' AS TIMESTAMP))", "redshift": "SELECT DATEADD(MONTH, 18, '2008-02-28')", "snowflake": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS TIMESTAMP))", "tsql": "SELECT DATEADD(MONTH, 18, CAST('2008-02-28' AS DATETIME2))", "spark": "SELECT DATE_ADD(MONTH, 18, '2008-02-28')", "spark2": "SELECT ADD_MONTHS('2008-02-28', 18)", "databricks": "SELECT DATE_ADD(MONTH, 18, '2008-02-28')", }, ) self.validate_all( "SELECT DATEDIFF(week, '2009-01-01', '2009-12-31')", write={ "bigquery": "SELECT DATE_DIFF(CAST('2009-12-31' AS DATETIME), CAST('2009-01-01' AS DATETIME), WEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))", "hive": "SELECT CAST(DATEDIFF('2009-12-31', '2009-01-01') / 7 AS INT)", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST('2009-12-31' AS TIMESTAMP) - CAST('2009-01-01' AS TIMESTAMP))) / 7 AS BIGINT)", "presto": "SELECT DATE_DIFF('WEEK', CAST('2009-01-01' AS TIMESTAMP), CAST('2009-12-31' AS TIMESTAMP))", "redshift": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')", "snowflake": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')", "tsql": "SELECT DATEDIFF(WEEK, '2009-01-01', '2009-12-31')", }, ) self.validate_all( "SELECT EXTRACT(EPOCH FROM CURRENT_DATE)", write={ "snowflake": "SELECT DATE_PART(EPOCH, CURRENT_DATE)", "redshift": "SELECT EXTRACT(EPOCH FROM CURRENT_DATE)", }, ) self.validate_identity("SELECT VERSION()") def test_identity(self): self.validate_identity("SELECT GETBIT(FROM_HEX('4d'), 2)") self.validate_identity("SELECT EXP(1)") self.validate_identity("ALTER TABLE table_name ALTER COLUMN bla TYPE VARCHAR") self.validate_identity("SELECT CAST(value AS FLOAT(8))") self.validate_identity("1 div", "1 AS div") self.validate_identity("LISTAGG(DISTINCT foo, ', ')") self.validate_identity("CREATE MATERIALIZED VIEW orders AUTO REFRESH YES AS SELECT 1") self.validate_identity("SELECT DATEADD(DAY, 1, 'today')") self.validate_identity("SELECT * FROM #x") self.validate_identity("SELECT INTERVAL '5 DAY'") self.validate_identity("foo$") self.validate_identity("CAST('bla' AS SUPER)") self.validate_identity("CREATE TABLE real1 (realcol REAL)") self.validate_identity("CAST('foo' AS HLLSKETCH)") self.validate_identity("'abc' SIMILAR TO '(b|c)%'") self.validate_identity("CREATE TABLE datetable (start_date DATE, end_date DATE)") self.validate_identity("SELECT APPROXIMATE AS y") self.validate_identity("CREATE TABLE t (c BIGINT IDENTITY(0, 1))") self.validate_identity( "COPY test_staging_tbl FROM 's3://your/bucket/prefix/here' IAM_ROLE default FORMAT AS AVRO 'auto'" ) self.validate_identity( "COPY test_staging_tbl FROM 's3://your/bucket/prefix/here' IAM_ROLE default FORMAT AS JSON 's3://jsonpaths_file'" ) self.validate_identity( "SELECT * FROM venue WHERE (venuecity, venuestate) IN (('Miami', 'FL'), ('Tampa', 'FL')) ORDER BY venueid" ) self.validate_identity( """SELECT tablename, "column" FROM pg_table_def WHERE "column" LIKE '%start\\\\_%' LIMIT 5""" ) self.validate_identity( """SELECT JSON_EXTRACT_PATH_TEXT('{"f2":{"f3":1},"f4":{"f5":99,"f6":"star"}', 'f4', 'f6', TRUE)""" ) self.validate_identity( 'DATE_PART(year, "somecol")', 'EXTRACT(year FROM "somecol")', ).this.assert_is(exp.Var) self.validate_identity( "SELECT CONCAT('abc', 'def')", "SELECT 'abc' || 'def'", ) self.validate_identity( "SELECT CONCAT_WS('DELIM', 'abc', 'def', 'ghi')", "SELECT 'abc' || 'DELIM' || 'def' || 'DELIM' || 'ghi'", ) self.validate_identity( "SELECT TOP 1 x FROM y", "SELECT x FROM y LIMIT 1", ) self.validate_identity( "SELECT DATE_DIFF('month', CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))", "SELECT DATEDIFF(MONTH, CAST('2020-02-29 00:00:00' AS TIMESTAMP), CAST('2020-03-02 00:00:00' AS TIMESTAMP))", ) self.validate_identity( "SELECT * FROM x WHERE y = DATEADD('month', -1, DATE_TRUNC('month', (SELECT y FROM #temp_table)))", "SELECT * FROM x WHERE y = DATEADD(MONTH, -1, DATE_TRUNC('MONTH', (SELECT y FROM #temp_table)))", ) self.validate_identity( "SELECT 'a''b'", "SELECT 'a\\'b'", ) self.validate_identity( "CREATE TABLE t (c BIGINT GENERATED BY DEFAULT AS IDENTITY (0, 1))", "CREATE TABLE t (c BIGINT IDENTITY(0, 1))", ) self.validate_identity( "SELECT DATEADD(HOUR, 0, CAST('2020-02-02 01:03:05.124' AS TIMESTAMP))" ) self.validate_identity( "SELECT DATEDIFF(SECOND, '2020-02-02 00:00:00.000', '2020-02-02 01:03:05.124')" ) self.validate_identity( "CREATE OR REPLACE VIEW v1 AS SELECT id, AVG(average_metric1) AS m1, AVG(average_metric2) AS m2 FROM t GROUP BY id WITH NO SCHEMA BINDING" ) self.validate_identity( "SELECT caldate + INTERVAL '1 SECOND' AS dateplus FROM date WHERE caldate = '12-31-2008'" ) self.validate_identity( "SELECT COUNT(*) FROM event WHERE eventname LIKE '%Ring%' OR eventname LIKE '%Die%'" ) self.validate_identity( "CREATE TABLE SOUP (LIKE other_table) DISTKEY(soup1) SORTKEY(soup2) DISTSTYLE ALL" ) self.validate_identity( "CREATE TABLE sales (salesid INTEGER NOT NULL) DISTKEY(listid) COMPOUND SORTKEY(listid, sellerid) DISTSTYLE AUTO" ) self.validate_identity( "COPY customer FROM 's3://mybucket/customer' IAM_ROLE 'arn:aws:iam::0123456789012:role/MyRedshiftRole' REGION 'us-east-1' FORMAT orc", ) self.validate_identity( "COPY customer FROM 's3://mybucket/mydata' CREDENTIALS 'aws_iam_role=arn:aws:iam:::role/;master_symmetric_key=' emptyasnull blanksasnull timeformat 'YYYY-MM-DD HH:MI:SS'" ) self.validate_identity( "UNLOAD ('select * from venue') TO 's3://mybucket/unload/' IAM_ROLE 'arn:aws:iam::0123456789012:role/MyRedshiftRole'", check_command_warning=True, ) self.validate_identity( "CREATE TABLE SOUP (SOUP1 VARCHAR(50) NOT NULL ENCODE ZSTD, SOUP2 VARCHAR(70) NULL ENCODE DELTA)" ) self.validate_identity( "SELECT DATEADD('day', ndays, caldate)", "SELECT DATEADD(DAY, ndays, caldate)", ) self.validate_identity( "CONVERT(INT, x)", "CAST(x AS INTEGER)", ) self.validate_identity( "SELECT DATE_ADD('day', 1, DATE('2023-01-01'))", "SELECT DATEADD(DAY, 1, DATE('2023-01-01'))", ) self.validate_identity( """SELECT c_name, orders.o_orderkey AS orderkey, index AS orderkey_index FROM customer_orders_lineitem AS c, c.c_orders AS orders AT index ORDER BY orderkey_index""", pretty=True, ) self.validate_identity( "SELECT attr AS attr, JSON_TYPEOF(val) AS value_type FROM customer_orders_lineitem AS c, UNPIVOT c.c_orders[0] WHERE c_custkey = 9451" ) self.validate_identity( "SELECT attr AS attr, JSON_TYPEOF(val) AS value_type FROM customer_orders_lineitem AS c, UNPIVOT c.c_orders AS val AT attr WHERE c_custkey = 9451" ) self.validate_identity("SELECT JSON_PARSE('[]')") self.validate_identity("SELECT ARRAY(1, 2, 3)") self.validate_identity("SELECT ARRAY[1, 2, 3]") self.validate_identity( """SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')""", """SELECT CONVERT_TIMEZONE('UTC', 'America/New_York', '2024-08-06 09:10:00.000')""", ) self.validate_all( "SELECT *, 4 AS col4 EXCLUDE (col2, col3) FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3)", write={ "redshift": "SELECT *, 4 AS col4 EXCLUDE (col2, col3) FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3)", "duckdb": "SELECT * EXCLUDE (col2, col3) FROM (SELECT *, 4 AS col4 FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3))", "snowflake": "SELECT * EXCLUDE (col2, col3) FROM (SELECT *, 4 AS col4 FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3))", }, ) self.validate_all( "SELECT *, 4 AS col4 EXCLUDE col2, col3 FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3)", write={ "redshift": "SELECT *, 4 AS col4 EXCLUDE (col2, col3) FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3)", "duckdb": "SELECT * EXCLUDE (col2, col3) FROM (SELECT *, 4 AS col4 FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3))", "snowflake": "SELECT * EXCLUDE (col2, col3) FROM (SELECT *, 4 AS col4 FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3))", }, ) self.validate_all( "SELECT col1, *, col2 EXCLUDE(col3) FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3)", write={ "redshift": "SELECT col1, *, col2 EXCLUDE (col3) FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3)", "duckdb": "SELECT * EXCLUDE (col3) FROM (SELECT col1, *, col2 FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3))", "snowflake": "SELECT * EXCLUDE (col3) FROM (SELECT col1, *, col2 FROM (SELECT 1 AS col1, 2 AS col2, 3 AS col3))", }, ) self.validate_identity("SELECT 1 EXCLUDE", "SELECT 1 AS EXCLUDE") self.validate_identity("SELECT 1 EXCLUDE FROM t", "SELECT 1 AS EXCLUDE FROM t") self.validate_identity("SELECT 1 AS EXCLUDE") self.validate_identity("SELECT * FROM (SELECT 1 AS EXCLUDE) AS t") self.validate_identity("SELECT 1 AS EXCLUDE, 2 AS foo") def test_values(self): # Test crazy-sized VALUES clause to UNION ALL conversion to ensure we don't get RecursionError values = [str(v) for v in range(0, 10000)] values_query = f"SELECT * FROM (VALUES {', '.join('(' + v + ')' for v in values)})" union_query = f"SELECT * FROM ({' UNION ALL '.join('SELECT ' + v for v in values)})" self.assertEqual(transpile(values_query, write="redshift")[0], union_query) values_sql = transpile("SELECT * FROM (VALUES (1), (2))", write="redshift", pretty=True)[0] self.assertEqual( values_sql, """SELECT * FROM ( SELECT 1 UNION ALL SELECT 2 )""", ) self.validate_identity("INSERT INTO t (a) VALUES (1), (2), (3)") self.validate_identity("INSERT INTO t (a, b) VALUES (1, 2), (3, 4)") self.validate_all( "SELECT * FROM (SELECT 1, 2) AS t", read={ "": "SELECT * FROM (VALUES (1, 2)) AS t", }, write={ "mysql": "SELECT * FROM (SELECT 1, 2) AS t", "presto": "SELECT * FROM (SELECT 1, 2) AS t", }, ) self.validate_all( "SELECT * FROM (SELECT 1 AS id) AS t1 CROSS JOIN (SELECT 1 AS id) AS t2", read={ "": "SELECT * FROM (VALUES (1)) AS t1(id) CROSS JOIN (VALUES (1)) AS t2(id)", }, ) self.validate_all( "SELECT a, b FROM (SELECT 1 AS a, 2 AS b) AS t", read={ "": "SELECT a, b FROM (VALUES (1, 2)) AS t (a, b)", }, ) self.validate_all( 'SELECT a, b FROM (SELECT 1 AS a, 2 AS b UNION ALL SELECT 3, 4) AS "t"', read={ "": 'SELECT a, b FROM (VALUES (1, 2), (3, 4)) AS "t" (a, b)', }, ) self.validate_all( "SELECT a, b FROM (SELECT 1 AS a, 2 AS b UNION ALL SELECT 3, 4 UNION ALL SELECT 5, 6 UNION ALL SELECT 7, 8) AS t", read={ "": "SELECT a, b FROM (VALUES (1, 2), (3, 4), (5, 6), (7, 8)) AS t (a, b)", }, ) self.validate_all( "INSERT INTO t (a, b) SELECT a, b FROM (SELECT 1 AS a, 2 AS b UNION ALL SELECT 3, 4) AS t", read={ "": "INSERT INTO t(a, b) SELECT a, b FROM (VALUES (1, 2), (3, 4)) AS t (a, b)", }, ) self.validate_identity("CREATE TABLE table_backup BACKUP NO AS SELECT * FROM event") self.validate_identity("CREATE TABLE table_backup BACKUP YES AS SELECT * FROM event") self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP NO") self.validate_identity("CREATE TABLE table_backup (i INTEGER, b VARCHAR) BACKUP YES") self.validate_identity( "select foo, bar from table_1 minus select foo, bar from table_2", "SELECT foo, bar FROM table_1 EXCEPT SELECT foo, bar FROM table_2", ) def test_create_table_like(self): self.validate_identity( "CREATE TABLE SOUP (LIKE other_table) DISTKEY(soup1) SORTKEY(soup2) DISTSTYLE ALL" ) self.validate_all( "CREATE TABLE t1 (LIKE t2)", write={ "postgres": "CREATE TABLE t1 (LIKE t2)", "presto": "CREATE TABLE t1 (LIKE t2)", "redshift": "CREATE TABLE t1 (LIKE t2)", "trino": "CREATE TABLE t1 (LIKE t2)", }, ) self.validate_all( "CREATE TABLE t1 (col VARCHAR, LIKE t2)", write={ "postgres": "CREATE TABLE t1 (col VARCHAR, LIKE t2)", "presto": "CREATE TABLE t1 (col VARCHAR, LIKE t2)", "redshift": "CREATE TABLE t1 (col VARCHAR, LIKE t2)", "trino": "CREATE TABLE t1 (col VARCHAR, LIKE t2)", }, ) def test_alter_table(self): self.validate_identity("ALTER TABLE s.t ALTER SORTKEY (c)") self.validate_identity("ALTER TABLE t ALTER SORTKEY AUTO") self.validate_identity("ALTER TABLE t ALTER SORTKEY NONE") self.validate_identity("ALTER TABLE t ALTER SORTKEY (c1, c2)") self.validate_identity("ALTER TABLE t ALTER SORTKEY (c1, c2)") self.validate_identity("ALTER TABLE t ALTER COMPOUND SORTKEY (c1, c2)") self.validate_identity("ALTER TABLE t ALTER DISTSTYLE ALL") self.validate_identity("ALTER TABLE t ALTER DISTSTYLE EVEN") self.validate_identity("ALTER TABLE t ALTER DISTSTYLE AUTO") self.validate_identity("ALTER TABLE t ALTER DISTSTYLE KEY DISTKEY c") self.validate_identity("ALTER TABLE t SET TABLE PROPERTIES ('a' = '5', 'b' = 'c')") self.validate_identity("ALTER TABLE t SET LOCATION 's3://bucket/folder/'") self.validate_identity("ALTER TABLE t SET FILE FORMAT AVRO") self.validate_identity( "ALTER TABLE t ALTER DISTKEY c", "ALTER TABLE t ALTER DISTSTYLE KEY DISTKEY c", ) self.validate_all( "ALTER TABLE db.t1 RENAME TO db.t2", write={ "spark": "ALTER TABLE db.t1 RENAME TO db.t2", "redshift": "ALTER TABLE db.t1 RENAME TO t2", }, ) def test_varchar_max(self): self.validate_all( 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))', read={ "redshift": "CREATE TABLE TEST (cola VARCHAR(max))", "tsql": "CREATE TABLE TEST (cola VARCHAR(max))", }, write={ "redshift": 'CREATE TABLE "TEST" ("cola" VARCHAR(MAX))', }, identify=True, ) def test_no_schema_binding(self): self.validate_all( "CREATE OR REPLACE VIEW v1 AS SELECT cola, colb FROM t1 WITH NO SCHEMA BINDING", write={ "redshift": "CREATE OR REPLACE VIEW v1 AS SELECT cola, colb FROM t1 WITH NO SCHEMA BINDING", }, ) def test_column_unnesting(self): self.validate_identity("SELECT c.*, o FROM bloo AS c, c.c_orders AS o") self.validate_identity( "SELECT c.*, o, l FROM bloo AS c, c.c_orders AS o, o.o_lineitems AS l" ) ast = parse_one("SELECT * FROM t.t JOIN t.c1 ON c1.c2 = t.c3", read="redshift") ast.args["from_"].this.assert_is(exp.Table) ast.args["joins"][0].this.assert_is(exp.Table) self.assertEqual(ast.sql("redshift"), "SELECT * FROM t.t JOIN t.c1 ON c1.c2 = t.c3") ast = parse_one("SELECT * FROM t AS t CROSS JOIN t.c1", read="redshift") ast.args["from_"].this.assert_is(exp.Table) ast.args["joins"][0].this.assert_is(exp.Unnest) self.assertEqual(ast.sql("redshift"), "SELECT * FROM t AS t CROSS JOIN t.c1") ast = parse_one( "SELECT * FROM x AS a, a.b AS c, c.d.e AS f, f.g.h.i.j.k AS l", read="redshift" ) joins = ast.args["joins"] ast.args["from_"].this.assert_is(exp.Table) joins[0].this.assert_is(exp.Unnest) joins[1].this.assert_is(exp.Unnest) joins[2].this.assert_is(exp.Unnest).expressions[0].assert_is(exp.Dot) self.assertEqual( ast.sql("redshift"), "SELECT * FROM x AS a, a.b AS c, c.d.e AS f, f.g.h.i.j.k AS l" ) def test_join_markers(self): self.validate_identity( "select a.foo, b.bar, a.baz from a, b where a.baz = b.baz (+)", "SELECT a.foo, b.bar, a.baz FROM a, b WHERE a.baz = b.baz (+)", ) def test_time(self): self.validate_all( "TIME_TO_STR(a, '%Y-%m-%d %H:%M:%S.%f')", write={"redshift": "TO_CHAR(a, 'YYYY-MM-DD HH24:MI:SS.US')"}, ) def test_grant(self): grant_cmds = [ "GRANT SELECT ON ALL TABLES IN SCHEMA qa_tickit TO fred", "GRANT USAGE ON DATASHARE salesshare TO NAMESPACE '13b8833d-17c6-4f16-8fe4-1a018f5ed00d'", "GRANT USAGE FOR SCHEMAS IN DATABASE Sales_db TO ROLE Sales", "GRANT EXECUTE FOR FUNCTIONS IN SCHEMA Sales_schema TO bob", "GRANT SELECT FOR TABLES IN DATABASE Sales_db TO alice WITH GRANT OPTION", "GRANT ALL FOR TABLES IN SCHEMA ShareSchema DATABASE ShareDb TO ROLE Sales", "GRANT ASSUMEROLE ON 'arn:aws:iam::123456789012:role/Redshift-Exfunc' TO reg_user1 FOR EXTERNAL FUNCTION", "GRANT ROLE sample_role1 TO ROLE sample_role2", ] for sql in grant_cmds: with self.subTest(f"Testing Redshift's GRANT command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) self.validate_identity("GRANT SELECT ON TABLE sales TO fred") self.validate_identity("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users") self.validate_identity("GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users") self.validate_identity( "GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users, GROUP ro_users" ) self.validate_identity("GRANT ALL ON view_date TO view_user") self.validate_identity( "GRANT SELECT(cust_name, cust_phone), UPDATE(cust_contact_preference) ON cust_profile TO GROUP sales_group" ) self.validate_identity( "GRANT ALL(cust_name, cust_phone, cust_contact_preference) ON cust_profile TO GROUP sales_admin" ) self.validate_identity("GRANT USAGE ON DATABASE sales_db TO Bob") self.validate_identity("GRANT USAGE ON SCHEMA sales_schema TO ROLE Analyst_role") self.validate_identity("GRANT SELECT ON sales_db.sales_schema.tickit_sales_redshift TO Bob") def test_revoke(self): revoke_cmds = [ "REVOKE SELECT ON ALL TABLES IN SCHEMA qa_tickit FROM fred", "REVOKE USAGE ON DATASHARE salesshare FROM NAMESPACE '13b8833d-17c6-4f16-8fe4-1a018f5ed00d'", "REVOKE USAGE FOR SCHEMAS IN DATABASE Sales_db FROM ROLE Sales", "REVOKE EXECUTE FOR FUNCTIONS IN SCHEMA Sales_schema FROM bob", "REVOKE SELECT FOR TABLES IN DATABASE Sales_db FROM alice", "REVOKE ROLE sample_role1 FROM ROLE sample_role2", ] for sql in revoke_cmds: with self.subTest(f"Testing Redshift's REVOKE command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) self.validate_identity("REVOKE SELECT ON TABLE sales FROM fred") self.validate_identity("REVOKE ALL ON SCHEMA qa_tickit FROM GROUP qa_users") self.validate_identity("REVOKE USAGE ON DATABASE sales_db FROM Bob") self.validate_identity("REVOKE USAGE ON SCHEMA sales_schema FROM ROLE Analyst_role") def test_analyze(self): self.validate_identity("ANALYZE TBL(col1, col2)") self.validate_identity("ANALYZE VERBOSE TBL") self.validate_identity("ANALYZE TBL PREDICATE COLUMNS") self.validate_identity("ANALYZE TBL ALL COLUMNS") def test_cast(self): self.validate_identity('1::"int"', "CAST(1 AS INTEGER)").to.is_type(exp.DataType.Type.INT) with self.assertRaises(ParseError): parse_one('1::"udt"', read="redshift") def test_fetch_to_limit(self): self.validate_all( "SELECT * FROM t FETCH FIRST 1 ROWS ONLY", write={ "redshift": "SELECT * FROM t LIMIT 1", "postgres": "SELECT * FROM t FETCH FIRST 1 ROWS ONLY", }, ) def test_to_timestamp(self): # Redshift's TO_TIMESTAMP returns TIMESTAMPTZ # https://docs.aws.amazon.com/redshift/latest/dg/r_TO_TIMESTAMP.html expr = annotate_types( parse_one("SELECT TO_TIMESTAMP('2023-01-01', 'YYYY-MM-DD')", dialect="redshift"), dialect="redshift", ) self.assertEqual(expr.expressions[0].type.this, exp.DataType.Type.TIMESTAMPTZ) self.validate_identity("SELECT LAG(x) IGNORE NULLS OVER (PARTITION BY y ORDER BY z)") self.validate_identity("SELECT LAG(x) RESPECT NULLS OVER (PARTITION BY y ORDER BY z)") self.validate_identity( "SELECT LAG(x IGNORE NULLS) OVER (PARTITION BY y ORDER BY z)", "SELECT LAG(x) IGNORE NULLS OVER (PARTITION BY y ORDER BY z)", ) self.validate_identity( "SELECT LAG(x RESPECT NULLS) OVER (PARTITION BY y ORDER BY z)", "SELECT LAG(x) RESPECT NULLS OVER (PARTITION BY y ORDER BY z)", ) def test_regexp_extract(self): self.validate_all( "SELECT REGEXP_SUBSTR(abc, 'pattern(group)', 2) FROM table", write={ "redshift": '''SELECT REGEXP_SUBSTR(abc, 'pattern(group)', 2) FROM "table"''', "duckdb": '''SELECT REGEXP_EXTRACT(SUBSTRING(abc, 2), 'pattern(group)') FROM "table"''', }, ) ================================================ FILE: tests/dialects/test_risingwave.py ================================================ from tests.dialects.test_dialect import Validator class TestRisingWave(Validator): dialect = "risingwave" maxDiff = None def test_risingwave(self): self.validate_all( "SELECT a FROM tbl", read={ "": "SELECT a FROM tbl FOR UPDATE", }, ) self.validate_identity( "CREATE SOURCE from_kafka (*, gen_i32_field INT AS int32_field + 2, gen_i64_field INT AS int64_field + 2, WATERMARK FOR time_col AS time_col - INTERVAL '5 SECOND') INCLUDE header foo VARCHAR AS myheader INCLUDE key AS mykey WITH (connector='kafka', topic='my_topic') FORMAT PLAIN ENCODE PROTOBUF (A=1, B=2) KEY ENCODE PROTOBUF (A=3, B=4)" ) self.validate_identity( "CREATE SINK my_sink AS SELECT * FROM A WITH (connector='kafka', topic='my_topic') FORMAT PLAIN ENCODE PROTOBUF (A=1, B=2) KEY ENCODE PROTOBUF (A=3, B=4)" ) self.validate_identity( "WITH t1 AS MATERIALIZED (SELECT 1), t2 AS NOT MATERIALIZED (SELECT 2) SELECT * FROM t1, t2" ) def test_datatypes(self): self.validate_identity("SELECT CAST(NULL AS MAP(VARCHAR, INT)) AS map_column") self.validate_identity( "SELECT NULL::MAP AS map_column", "SELECT CAST(NULL AS MAP(VARCHAR, INT)) AS map_column", ) self.validate_identity("CREATE TABLE t (map_col MAP(VARCHAR, INT))") self.validate_identity( "CREATE TABLE t (map_col MAP)", "CREATE TABLE t (map_col MAP(VARCHAR, INT))", ) ================================================ FILE: tests/dialects/test_singlestore.py ================================================ from sqlglot import parse_one, exp from sqlglot.optimizer.qualify import qualify from tests.dialects.test_dialect import Validator class TestSingleStore(Validator): dialect = "singlestore" def test_singlestore(self): ast = parse_one( "SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id HAVING my_id = 1", dialect=self.dialect, ) ast = qualify(ast, dialect=self.dialect, schema={"data": {"id": "INT", "my_id": "INT"}}) self.assertEqual( "SELECT `data`.`id` AS `my_id` FROM `data` AS `data` WHERE `data`.`my_id` = 1 GROUP BY `data`.`my_id` HAVING `data`.`id` = 1", ast.sql(dialect=self.dialect), ) self.validate_identity("SELECT 1") self.validate_identity("SELECT * FROM `users` ORDER BY ALL") self.validate_identity("SELECT ELT(2, 'foo', 'bar', 'baz')") self.validate_identity("SELECT CHARSET(CHAR(100 USING utf8))") self.validate_identity("SELECT TO_JSON(ROW(1, 2) :> RECORD(a INT, b INT))") self.validate_identity("JSON_KEYS(json_doc, 'a', 'b', 'c', 2)") self.validate_identity("SELECT VERSION()") self.validate_identity("SELECT CURTIME()", "SELECT CURRENT_TIME()") def test_byte_strings(self): self.validate_identity("SELECT e'text'") self.validate_identity("SELECT E'text'", "SELECT e'text'") def test_national_strings(self): self.validate_all( "SELECT 'text'", read={"": "SELECT N'text'", "singlestore": "SELECT 'text'"} ) def test_restricted_keywords(self): self.validate_identity("SELECT * FROM abs", "SELECT * FROM `abs`") self.validate_identity("SELECT * FROM ABS", "SELECT * FROM `ABS`") self.validate_identity( "SELECT * FROM security_lists_intersect", "SELECT * FROM `security_lists_intersect`" ) self.validate_identity("SELECT * FROM vacuum", "SELECT * FROM `vacuum`") def test_time_formatting(self): self.validate_identity("SELECT STR_TO_DATE('March 3rd, 2015', '%M %D, %Y')") self.validate_identity("SELECT DATE_FORMAT(NOW(), '%Y-%m-%d %h:%i:%s')") self.validate_identity( "SELECT TO_DATE('03/01/2019', 'MM/DD/YYYY') AS `result`", ) self.validate_identity( "SELECT TO_TIMESTAMP('The date and time are 01/01/2018 2:30:15.123456', 'The date and time are MM/DD/YYYY HH12:MI:SS.FF6') AS `result`", ) self.validate_identity( "SELECT TO_CHAR('2018-03-01', 'MM/DD')", ) self.validate_identity( "SELECT TIME_FORMAT('12:05:47', '%s, %i, %h')", "SELECT DATE_FORMAT('12:05:47' :> TIME(6), '%s, %i, %h')", ) self.validate_identity("SELECT DATE('2019-01-01 05:06')") self.validate_all( "SELECT DATE('2019-01-01 05:06')", read={ "": "SELECT TS_OR_DS_TO_DATE('2019-01-01 05:06')", "singlestore": "SELECT DATE('2019-01-01 05:06')", }, ) def test_cast(self): self.validate_all( "SELECT 1 :> INT", read={ "": "SELECT CAST(1 AS INT)", }, write={ "singlestore": "SELECT 1 :> INT", "": "SELECT CAST(1 AS INT)", }, ) self.validate_all( "SELECT 1 !:> INT", read={ "": "SELECT TRY_CAST(1 AS INT)", }, write={ "singlestore": "SELECT 1 !:> INT", "": "SELECT TRY_CAST(1 AS INT)", }, ) self.validate_identity("SELECT '{\"a\" : 1}' :> JSON") self.validate_identity("SELECT NOW() !:> TIMESTAMP(6)") self.validate_identity("SELECT x :> GEOGRAPHYPOINT") self.validate_all( "SELECT age :> TEXT FROM `users`", read={ "": "SELECT CAST(age, 'TEXT') FROM users", "singlestore": "SELECT age :> TEXT FROM `users`", }, ) def test_unix_functions(self): self.validate_identity("SELECT FROM_UNIXTIME(1234567890)") self.validate_identity("SELECT FROM_UNIXTIME(1234567890, '%M %D, %Y')") self.validate_identity("SELECT UNIX_TIMESTAMP()") self.validate_identity("SELECT UNIX_TIMESTAMP('2009-02-13 23:31:30') AS funday") self.validate_all( "SELECT UNIX_TIMESTAMP('2009-02-13 23:31:30')", read={"duckdb": "SELECT EPOCH('2009-02-13 23:31:30')"}, ) self.validate_all( "SELECT UNIX_TIMESTAMP('2009-02-13 23:31:30')", read={"duckdb": "SELECT TIME_STR_TO_UNIX('2009-02-13 23:31:30')"}, ) self.validate_all( "SELECT UNIX_TIMESTAMP('2009-02-13 23:31:30')", read={"": "SELECT TIME_STR_TO_UNIX('2009-02-13 23:31:30')"}, ) self.validate_all( "SELECT UNIX_TIMESTAMP('2009-02-13 23:31:30')", read={"": "SELECT UNIX_SECONDS('2009-02-13 23:31:30')"}, ) self.validate_all( "SELECT FROM_UNIXTIME(1234567890, '%Y-%m-%d %T')", read={"hive": "SELECT FROM_UNIXTIME(1234567890)"}, ) self.validate_all( "SELECT FROM_UNIXTIME(1234567890) :> TEXT", read={"": "SELECT UNIX_TO_TIME_STR(1234567890)"}, ) def test_json_extract(self): self.validate_identity("SELECT a::b FROM t", "SELECT JSON_EXTRACT_JSON(a, 'b') FROM t") self.validate_identity("SELECT a::b FROM t", "SELECT JSON_EXTRACT_JSON(a, 'b') FROM t") self.validate_identity("SELECT a::$b FROM t", "SELECT JSON_EXTRACT_STRING(a, 'b') FROM t") self.validate_identity("SELECT a::%b FROM t", "SELECT JSON_EXTRACT_DOUBLE(a, 'b') FROM t") self.validate_identity( "SELECT a::`b`::`2` FROM t", "SELECT JSON_EXTRACT_JSON(JSON_EXTRACT_JSON(a, 'b'), '2') FROM t", ) self.validate_identity("SELECT a::2 FROM t", "SELECT JSON_EXTRACT_JSON(a, '2') FROM t") self.validate_all( "SELECT JSON_EXTRACT_JSON(a, 'b') FROM t", read={ "mysql": "SELECT JSON_EXTRACT(a, '$.b') FROM t", "singlestore": "SELECT JSON_EXTRACT_JSON(a, 'b') FROM t", }, write={"mysql": "SELECT JSON_EXTRACT(a, '$.b') FROM t"}, ) self.validate_all( "SELECT JSON_EXTRACT_STRING(a, 'b') FROM t", write={"": "SELECT JSON_EXTRACT_SCALAR(a, '$.b', STRING) FROM t"}, ) self.validate_all( "SELECT JSON_EXTRACT_DOUBLE(a, 'b') FROM t", write={"": "SELECT JSON_EXTRACT_SCALAR(a, '$.b', DOUBLE) FROM t"}, ) self.validate_all( "SELECT JSON_EXTRACT_BIGINT(a, 'b') FROM t", write={"": "SELECT JSON_EXTRACT_SCALAR(a, '$.b', BIGINT) FROM t"}, ) self.validate_all( "SELECT JSON_EXTRACT_BIGINT(a, 'b') FROM t", write={"": "SELECT JSON_EXTRACT_SCALAR(a, '$.b', BIGINT) FROM t"}, ) self.validate_all( "SELECT JSON_EXTRACT_JSON(a, 'b', '2') FROM t", read={ "mysql": "SELECT JSON_EXTRACT(a, '$.b[2]') FROM t", "singlestore": "SELECT JSON_EXTRACT_JSON(a, 'b', '2') FROM t", }, write={"mysql": "SELECT JSON_EXTRACT(a, '$.b[2]') FROM t"}, ) self.validate_all( "SELECT JSON_EXTRACT_STRING(a, 'b', 2) FROM t", write={"": "SELECT JSON_EXTRACT_SCALAR(a, '$.b[2]', STRING) FROM t"}, ) self.validate_all( "SELECT BSON_EXTRACT_BSON(a, 'b') FROM t", read={ "mysql": "SELECT JSONB_EXTRACT(a, 'b') FROM t", "singlestore": "SELECT BSON_EXTRACT_BSON(a, 'b') FROM t", }, write={"mysql": "SELECT JSONB_EXTRACT(a, '$.b') FROM t"}, ) self.validate_all( "SELECT BSON_EXTRACT_STRING(a, 'b') FROM t", write={"": "SELECT JSONB_EXTRACT_SCALAR(a, '$.b', STRING) FROM t"}, ) self.validate_all( "SELECT BSON_EXTRACT_DOUBLE(a, 'b') FROM t", write={"": "SELECT JSONB_EXTRACT_SCALAR(a, '$.b', DOUBLE) FROM t"}, ) self.validate_all( "SELECT BSON_EXTRACT_BIGINT(a, 'b') FROM t", write={"": "SELECT JSONB_EXTRACT_SCALAR(a, '$.b', BIGINT) FROM t"}, ) self.validate_all( "SELECT BSON_EXTRACT_BIGINT(a, 'b') FROM t", write={"": "SELECT JSONB_EXTRACT_SCALAR(a, '$.b', BIGINT) FROM t"}, ) self.validate_all( "SELECT BSON_EXTRACT_BSON(a, 'b', 2) FROM t", write={"": "SELECT JSONB_EXTRACT(a, '$.b[2]') FROM t"}, ) self.validate_all( "SELECT BSON_EXTRACT_STRING(a, 'b', 2) FROM t", write={"": "SELECT JSONB_EXTRACT_SCALAR(a, '$.b[2]', STRING) FROM t"}, ) self.validate_all( 'SELECT JSON_EXTRACT_STRING(\'{"item": "shoes", "price": "49.95"}\', \'price\') :> DECIMAL(4, 2)', read={ "mysql": 'SELECT JSON_VALUE(\'{"item": "shoes", "price": "49.95"}\', \'$.price\' RETURNING DECIMAL(4, 2))' }, ) def test_json(self): self.validate_identity("SELECT JSON_ARRAY_CONTAINS_STRING('[\"a\", \"b\"]', 'b')") self.validate_identity("SELECT JSON_ARRAY_CONTAINS_DOUBLE('[1, 2]', 1)") self.validate_identity('SELECT JSON_ARRAY_CONTAINS_JSON(\'["{"a": 1}"]\', \'{"a": 1}\')') self.validate_all( "SELECT JSON_ARRAY_CONTAINS_JSON('[\"a\"]', TO_JSON('a'))", read={ "mysql": "SELECT 'a' MEMBER OF ('[\"a\"]')", "singlestore": "SELECT JSON_ARRAY_CONTAINS_JSON('[\"a\"]', TO_JSON('a'))", }, ) self.validate_all( 'SELECT JSON_PRETTY(\'["G","alpha","20",10]\')', read={ "singlestore": 'SELECT JSON_PRETTY(\'["G","alpha","20",10]\')', "": 'SELECT JSON_FORMAT(\'["G","alpha","20",10]\')', }, ) self.validate_all( "SELECT JSON_AGG(name ORDER BY id ASC NULLS LAST, name DESC NULLS FIRST) FROM t", read={ "singlestore": "SELECT JSON_AGG(name ORDER BY id ASC NULLS LAST, name DESC NULLS FIRST) FROM t", "oracle": "SELECT JSON_ARRAYAGG(name ORDER BY id ASC, name DESC) FROM t", }, ) self.validate_identity("SELECT JSON_AGG(name) FROM t") self.validate_identity("SELECT JSON_AGG(t.*) FROM t") self.validate_all( "SELECT JSON_BUILD_ARRAY(id, name) FROM t", read={ "singlestore": "SELECT JSON_BUILD_ARRAY(id, name) FROM t", "oracle": "SELECT JSON_ARRAY(id, name) FROM t", }, ) self.validate_identity("JSON_BUILD_ARRAY(id, name)").assert_is(exp.JSONArray) self.validate_all( "SELECT BSON_MATCH_ANY_EXISTS('{\"x\":true}', 'x')", read={ "singlestore": "SELECT BSON_MATCH_ANY_EXISTS('{\"x\":true}', 'x')", "": "SELECT JSONB_EXISTS('{\"x\":true}', 'x')", }, ) self.validate_all( "SELECT JSON_MATCH_ANY_EXISTS('{\"a\":1}', 'a')", read={ "singlestore": "SELECT JSON_MATCH_ANY_EXISTS('{\"a\":1}', 'a')", "oracle": "SELECT JSON_EXISTS('{\"a\":1}', '$.a')", }, ) self.validate_all( "SELECT JSON_BUILD_OBJECT('name', name) FROM t", read={ "singlestore": "SELECT JSON_BUILD_OBJECT('name', name) FROM t", "": "SELECT JSON_OBJECT('name', name) FROM t", }, ) self.validate_identity("JSON_BUILD_OBJECT('name', name)").assert_is(exp.JSONObject) def test_date_parts_functions(self): self.validate_identity( "SELECT DAYNAME('2014-04-18')", "SELECT DATE_FORMAT('2014-04-18', '%W')" ) self.validate_identity( "SELECT HOUR('2009-02-13 23:31:30')", "SELECT DATE_FORMAT('2009-02-13 23:31:30' :> TIME(6), '%k') :> INT", ) self.validate_identity( "SELECT MICROSECOND('2009-02-13 23:31:30.123456')", "SELECT DATE_FORMAT('2009-02-13 23:31:30.123456' :> TIME(6), '%f') :> INT", ) self.validate_identity( "SELECT SECOND('2009-02-13 23:31:30.123456')", "SELECT DATE_FORMAT('2009-02-13 23:31:30.123456' :> TIME(6), '%s') :> INT", ) self.validate_identity( "SELECT MONTHNAME('2014-04-18')", "SELECT DATE_FORMAT('2014-04-18', '%M')" ) self.validate_identity( "SELECT WEEKDAY('2014-04-18')", "SELECT (DAYOFWEEK('2014-04-18') + 5) % 7" ) self.validate_identity( "SELECT MINUTE('2009-02-13 23:31:30.123456')", "SELECT DATE_FORMAT('2009-02-13 23:31:30.123456' :> TIME(6), '%i') :> INT", ) self.validate_all( "SELECT ((DAYOFWEEK('2014-04-18') % 7) + 1)", read={ "singlestore": "SELECT ((DAYOFWEEK('2014-04-18') % 7) + 1)", "": "SELECT DAYOFWEEK_ISO('2014-04-18')", }, ) self.validate_all( "SELECT DAY('2014-04-18')", read={ "singlestore": "SELECT DAY('2014-04-18')", "": "SELECT DAY_OF_MONTH('2014-04-18')", }, ) def test_math_functions(self): self.validate_all( "SELECT APPROX_COUNT_DISTINCT(asset_id) AS approx_distinct_asset_id FROM acd_assets", read={ "singlestore": "SELECT APPROX_COUNT_DISTINCT(asset_id) AS approx_distinct_asset_id FROM acd_assets", "": "SELECT HLL(asset_id) AS approx_distinct_asset_id FROM acd_assets", }, ) self.validate_identity( "SELECT APPROX_COUNT_DISTINCT(asset_id1, asset_id2) AS approx_distinct_asset_id FROM acd_assets" ) self.validate_all( "SELECT APPROX_COUNT_DISTINCT(asset_id) AS approx_distinct_asset_id FROM acd_assets", read={ "singlestore": "SELECT APPROX_COUNT_DISTINCT(asset_id) AS approx_distinct_asset_id FROM acd_assets", "": "SELECT APPROX_DISTINCT(asset_id) AS approx_distinct_asset_id FROM acd_assets", }, ) self.validate_all( "SELECT SUM(CASE WHEN age > 18 THEN 1 ELSE 0 END) FROM `users`", read={ "singlestore": "SELECT SUM(CASE WHEN age > 18 THEN 1 ELSE 0 END) FROM `users`", "": "SELECT COUNT_IF(age > 18) FROM users", }, ) self.validate_all( "SELECT MAX(ABS(age > 18)) FROM `users`", read={ "singlestore": "SELECT MAX(ABS(age > 18)) FROM `users`", "": "SELECT LOGICAL_OR(age > 18) FROM users", }, ) self.validate_all( "SELECT MIN(ABS(age > 18)) FROM `users`", read={ "singlestore": "SELECT MIN(ABS(age > 18)) FROM `users`", "": "SELECT LOGICAL_AND(age > 18) FROM users", }, ) self.validate_identity( "SELECT `class`, student_id, test1, APPROX_PERCENTILE(test1, 0.3) OVER (PARTITION BY `class`) AS percentile FROM test_scores" ) self.validate_identity( "SELECT `class`, student_id, test1, APPROX_PERCENTILE(test1, 0.3, 0.4) OVER (PARTITION BY `class`) AS percentile FROM test_scores" ) self.validate_all( "SELECT APPROX_PERCENTILE(test1, 0.3) FROM test_scores", read={ "singlestore": "SELECT APPROX_PERCENTILE(test1, 0.3) FROM test_scores", # accuracy parameter is not supported in SingleStore, so it is ignored "": "SELECT APPROX_QUANTILE(test1, 0.3, 0.4) FROM test_scores", }, ) self.validate_all( "SELECT VAR_SAMP(yearly_total) FROM player_scores", read={ "singlestore": "SELECT VAR_SAMP(yearly_total) FROM player_scores", "": "SELECT VARIANCE(yearly_total) FROM player_scores", }, write={ "": "SELECT VARIANCE(yearly_total) FROM player_scores", }, ) self.validate_all( "SELECT VAR_POP(yearly_total) FROM player_scores", read={ "singlestore": "SELECT VARIANCE(yearly_total) FROM player_scores", "": "SELECT VARIANCE_POP(yearly_total) FROM player_scores", }, write={ "": "SELECT VARIANCE_POP(yearly_total) FROM player_scores", }, ) self.validate_all( "SELECT POWER(id, 1 / 3) FROM orders", read={ "": "SELECT CBRT(id) FROM orders", "singlestore": "SELECT POWER(id, 1 / 3) FROM orders", }, ) def test_logical(self): self.validate_all( "SELECT (TRUE AND (NOT FALSE)) OR ((NOT TRUE) AND FALSE)", read={ "mysql": "SELECT TRUE XOR FALSE", "singlestore": "SELECT (TRUE AND (NOT FALSE)) OR ((NOT TRUE) AND FALSE)", }, ) def test_string_functions(self): self.validate_all( "SELECT 'a' RLIKE 'b'", read={ "bigquery": "SELECT REGEXP_CONTAINS('a', 'b')", "singlestore": "SELECT 'a' RLIKE 'b'", }, ) self.validate_identity("SELECT 'a' REGEXP 'b'", "SELECT 'a' RLIKE 'b'") self.validate_all( "SELECT LPAD('', LENGTH('a') * 3, 'a')", read={ "": "SELECT REPEAT('a', 3)", "singlestore": "SELECT LPAD('', LENGTH('a') * 3, 'a')", }, ) self.validate_all( "SELECT REGEXP_SUBSTR('adog', 'O', 1, 1, 'c')", read={ # group parameter is not supported in SingleStore, so it is ignored "": "SELECT REGEXP_EXTRACT('adog', 'O', 1, 1, 'c', 'gr1')", "singlestore": "SELECT REGEXP_SUBSTR('adog', 'O', 1, 1, 'c')", }, ) self.validate_all( "SELECT ('a' RLIKE '^[\x00-\x7f]*$')", read={"singlestore": "SELECT ('a' RLIKE '^[\x00-\x7f]*$')", "": "SELECT IS_ASCII('a')"}, ) self.validate_all( "SELECT UNHEX(MD5('data'))", read={ "singlestore": "SELECT UNHEX(MD5('data'))", "": "SELECT MD5_DIGEST('data')", }, ) self.validate_all( "SELECT CHAR(101)", read={"": "SELECT CHR(101)", "singlestore": "SELECT CHAR(101)"} ) self.validate_all( "SELECT INSTR('ohai', 'i')", read={ "": "SELECT CONTAINS('ohai', 'i')", "singlestore": "SELECT INSTR('ohai', 'i')", }, ) self.validate_all( "SELECT REGEXP_MATCH('adog', 'O', 'c')", read={ # group, position, occurrence parameters are not supported in SingleStore, so they are ignored "": "SELECT REGEXP_EXTRACT_ALL('adog', 'O', 1, 'c', 1, 'gr1')", "singlestore": "SELECT REGEXP_MATCH('adog', 'O', 'c')", }, ) self.validate_all( "SELECT REGEXP_SUBSTR('adog', 'O', 1, 1, 'c')", read={ # group parameter is not supported in SingleStore, so it is ignored "": "SELECT REGEXP_EXTRACT('adog', 'O', 1, 1, 'c', 'gr1')", "singlestore": "SELECT REGEXP_SUBSTR('adog', 'O', 1, 1, 'c')", }, ) self.validate_all( "SELECT REGEXP_INSTR('abcd', CONCAT('^', 'ab'))", read={ "": "SELECT STARTS_WITH('abcd', 'ab')", "singlestore": "SELECT REGEXP_INSTR('abcd', CONCAT('^', 'ab'))", }, ) self.validate_all( "SELECT CONV('f', 16, 10)", read={ "redshift": "SELECT STRTOL('f',16)", "singlestore": "SELECT CONV('f', 16, 10)", }, ) self.validate_all( "SELECT LOWER('ABC') RLIKE LOWER('a.*')", read={ "postgres": "SELECT 'ABC' ~* 'a.*'", "singlestore": "SELECT LOWER('ABC') RLIKE LOWER('a.*')", }, ) self.validate_all( "SELECT CONCAT(SUBSTRING('abcdef', 1, 2 - 1), 'xyz', SUBSTRING('abcdef', 2 + 3))", read={ "singlestore": "SELECT CONCAT(SUBSTRING('abcdef', 1, 2 - 1), 'xyz', SUBSTRING('abcdef', 2 + 3))", "": "SELECT STUFF('abcdef', 2, 3, 'xyz')", }, ) self.validate_all( "SELECT SHA(email) FROM t", read={ "singlestore": "SELECT SHA(email) FROM t", "": "SELECT STANDARD_HASH(email) FROM t", }, ) self.validate_all( "SELECT SHA(email) FROM t", read={ "singlestore": "SELECT SHA(email) FROM t", "": "SELECT STANDARD_HASH(email, 'sha') FROM t", }, ) self.validate_all( "SELECT MD5(email) FROM t", read={ "singlestore": "SELECT MD5(email) FROM t", "": "SELECT STANDARD_HASH(email, 'MD5') FROM t", }, ) def test_reduce_functions(self): self.validate_all( "SELECT REDUCE(0, JSON_TO_ARRAY('[1,2,3,4]'), REDUCE_ACC() + REDUCE_VALUE()) AS `Result`", read={ # finish argument is not supported in SingleStore, so it is ignored "": "SELECT REDUCE(JSON_TO_ARRAY('[1,2,3,4]'), 0, REDUCE_ACC() + REDUCE_VALUE(), REDUCE_ACC() + REDUCE_VALUE()) AS Result", "singlestore": "SELECT REDUCE(0, JSON_TO_ARRAY('[1,2,3,4]'), REDUCE_ACC() + REDUCE_VALUE()) AS `Result`", }, ) def test_time_functions(self): self.validate_all( "SELECT TIME_BUCKET('1d', '2019-03-14 06:04:12', '2019-03-13 03:00:00')", read={ # unit and zone parameters are not supported in SingleStore, so they are ignored "": "SELECT DATE_BIN('1d', '2019-03-14 06:04:12', DAY, 'UTC', '2019-03-13 03:00:00')", "singlestore": "SELECT TIME_BUCKET('1d', '2019-03-14 06:04:12', '2019-03-13 03:00:00')", }, ) self.validate_all( "SELECT '2019-03-14 06:04:12' :> DATE", read={ "": "SELECT TIME_STR_TO_DATE('2019-03-14 06:04:12')", "singlestore": "SELECT '2019-03-14 06:04:12' :> DATE", }, ) self.validate_all( "SELECT CONVERT_TZ(NOW() :> TIMESTAMP, 'GMT', 'UTC')", read={ "spark2": "SELECT TO_UTC_TIMESTAMP(NOW(), 'GMT')", "singlestore": "SELECT CONVERT_TZ(NOW() :> TIMESTAMP, 'GMT', 'UTC')", }, ) self.validate_all( "SELECT STR_TO_DATE(20190314, '%Y%m%d')", read={ "": "SELECT DI_TO_DATE(20190314)", "singlestore": "SELECT STR_TO_DATE(20190314, '%Y%m%d')", }, ) self.validate_all( "SELECT (DATE_FORMAT('2019-03-14 06:04:12', '%Y%m%d') :> INT)", read={ "singlestore": "SELECT (DATE_FORMAT('2019-03-14 06:04:12', '%Y%m%d') :> INT)", "": "SELECT DATE_TO_DI('2019-03-14 06:04:12')", }, ) self.validate_all( "SELECT (DATE_FORMAT('2019-03-14 06:04:12', '%Y%m%d') :> INT)", read={ "singlestore": "SELECT (DATE_FORMAT('2019-03-14 06:04:12', '%Y%m%d') :> INT)", "": "SELECT TS_OR_DI_TO_DI('2019-03-14 06:04:12')", }, ) self.validate_all( "SELECT '2019-03-14 06:04:12' :> TIME", read={ # zone parameter is not supported in SingleStore, so it is ignored "bigquery": "SELECT TIME('2019-03-14 06:04:12', 'GMT')", "singlestore": "SELECT '2019-03-14 06:04:12' :> TIME", }, ) self.validate_all( "SELECT DATE_ADD(NOW(), INTERVAL '1' MONTH)", read={ "bigquery": "SELECT DATETIME_ADD(NOW(), INTERVAL 1 MONTH)", "singlestore": "SELECT DATE_ADD(NOW(), INTERVAL '1' MONTH)", }, ) self.validate_all( "SELECT DATE_TRUNC('MINUTE', '2016-08-08 12:05:31')", read={ "bigquery": "SELECT DATETIME_TRUNC('2016-08-08 12:05:31', MINUTE)", "singlestore": "SELECT DATE_TRUNC('MINUTE', '2016-08-08 12:05:31')", }, ) self.validate_all( "SELECT DATE_SUB('2010-04-02', INTERVAL '1' WEEK)", read={ "bigquery": "SELECT DATETIME_SUB('2010-04-02', INTERVAL '1' WEEK)", "singlestore": "SELECT DATE_SUB('2010-04-02', INTERVAL '1' WEEK)", }, ) self.validate_all( "SELECT TIMESTAMPDIFF(QUARTER, '2009-02-13', '2013-09-01')", read={ "singlestore": "SELECT TIMESTAMPDIFF(QUARTER, '2009-02-13', '2013-09-01')", "": "SELECT DATETIME_DIFF('2013-09-01', '2009-02-13', QUARTER)", }, ) self.validate_all( "SELECT TIMESTAMPDIFF(QUARTER, '2009-02-13', '2013-09-01')", read={ "singlestore": "SELECT TIMESTAMPDIFF(QUARTER, '2009-02-13', '2013-09-01')", "bigquery": "SELECT DATE_DIFF('2013-09-01', '2009-02-13', QUARTER)", "duckdb": "SELECT DATE_DIFF('QUARTER', '2009-02-13', '2013-09-01')", }, ) self.validate_all( "SELECT DATEDIFF(DATE('2013-09-01'), DATE('2009-02-13'))", read={ "hive": "SELECT DATEDIFF('2013-09-01', '2009-02-13')", "singlestore": "SELECT DATEDIFF(DATE('2013-09-01'), DATE('2009-02-13'))", }, ) self.validate_all( "SELECT DATE_TRUNC('MINUTE', '2016-08-08 12:05:31')", read={ "": "SELECT TIMESTAMP_TRUNC('2016-08-08 12:05:31', MINUTE)", "singlestore": "SELECT DATE_TRUNC('MINUTE', '2016-08-08 12:05:31')", }, ) self.validate_all( "SELECT TIMESTAMPDIFF(WEEK, '2009-01-01', '2009-12-31') AS numweeks", read={ "redshift": "SELECT datediff(week,'2009-01-01','2009-12-31') AS numweeks", "singlestore": "SELECT TIMESTAMPDIFF(WEEK, '2009-01-01', '2009-12-31') AS numweeks", }, ) self.validate_all( "SELECT DATEDIFF('2009-12-31', '2009-01-01') AS numweeks", read={ "": "SELECT TS_OR_DS_DIFF('2009-12-31', '2009-01-01') AS numweeks", "singlestore": "SELECT DATEDIFF('2009-12-31', '2009-01-01') AS numweeks", }, ) self.validate_all( "SELECT CURRENT_DATE()", read={ "": "SELECT CURRENT_DATE()", "singlestore": "SELECT CURRENT_DATE", }, ) self.validate_all( "SELECT UTC_DATE()", read={ "": "SELECT CURRENT_DATE('UTC')", "singlestore": "SELECT UTC_DATE", }, write={"": "SELECT CURRENT_DATE('UTC')"}, ) self.validate_all( "SELECT CURRENT_TIME()", read={ "": "SELECT CURRENT_TIME()", "singlestore": "SELECT CURRENT_TIME", }, ) self.validate_identity("SELECT CURRENT_TIME(6)") self.validate_all( "SELECT UTC_TIME()", read={ "": "SELECT CURRENT_TIME('UTC')", "singlestore": "SELECT UTC_TIME", }, write={"": "SELECT CURRENT_TIME('UTC')"}, ) self.validate_all( "SELECT CURRENT_TIMESTAMP()", read={ "": "SELECT CURRENT_TIMESTAMP()", "singlestore": "SELECT CURRENT_TIMESTAMP", }, ) self.validate_identity("SELECT CURRENT_TIMESTAMP(6)") self.validate_all( "SELECT UTC_TIMESTAMP()", read={ "": "SELECT CURRENT_TIMESTAMP('UTC')", "singlestore": "SELECT UTC_TIMESTAMP", }, write={"": "SELECT CURRENT_TIMESTAMP('UTC')"}, ) self.validate_all( "SELECT CURRENT_TIMESTAMP(6) :> DATETIME(6)", read={ "bigquery": "SELECT CURRENT_DATETIME()", "singlestore": "SELECT CURRENT_TIMESTAMP(6) :> DATETIME(6)", }, ) self.validate_identity("SELECT UTC_TIMESTAMP(6)") self.validate_identity("SELECT UTC_TIME(6)") def test_types(self): self.validate_all( "CREATE TABLE testTypes (a DECIMAL(10, 20))", read={ "singlestore": "CREATE TABLE testTypes (a DECIMAL(10, 20))", "bigquery": "CREATE TABLE testTypes (a BIGDECIMAL(10, 20))", }, ) self.validate_all( "CREATE TABLE testTypes (a BOOLEAN)", read={ "singlestore": "CREATE TABLE testTypes (a BOOLEAN)", "tsql": "CREATE TABLE testTypes (a BIT)", }, ) self.validate_all( "CREATE TABLE testTypes (a DATE)", read={ "singlestore": "CREATE TABLE testTypes (a DATE)", "clickhouse": "CREATE TABLE testTypes (a DATE32)", }, ) self.validate_all( "CREATE TABLE testTypes (a DATETIME)", read={ "singlestore": "CREATE TABLE testTypes (a DATETIME)", "clickhouse": "CREATE TABLE testTypes (a DATETIME64)", }, ) self.validate_all( "CREATE TABLE testTypes (a DECIMAL(9, 3))", read={ "singlestore": "CREATE TABLE testTypes (a DECIMAL(9, 3))", "clickhouse": "CREATE TABLE testTypes (a DECIMAL32(3))", }, ) self.validate_all( "CREATE TABLE testTypes (a DECIMAL(18, 3))", read={ "singlestore": "CREATE TABLE testTypes (a DECIMAL(18, 3))", "clickhouse": "CREATE TABLE testTypes (a DECIMAL64(3))", }, ) self.validate_all( "CREATE TABLE testTypes (a DECIMAL(38, 3))", read={ "singlestore": "CREATE TABLE testTypes (a DECIMAL(38, 3))", "clickhouse": "CREATE TABLE testTypes (a DECIMAL128(3))", }, ) self.validate_all( "CREATE TABLE testTypes (a DECIMAL(65, 3))", read={ "singlestore": "CREATE TABLE testTypes (a DECIMAL(65, 3))", "clickhouse": "CREATE TABLE testTypes (a DECIMAL256(3))", }, ) self.validate_all( "CREATE TABLE testTypes (a ENUM('a'))", read={ "singlestore": "CREATE TABLE testTypes (a ENUM('a'))", "clickhouse": "CREATE TABLE testTypes (a ENUM8('a'))", }, ) self.validate_all( "CREATE TABLE testTypes (a ENUM('a'))", read={ "singlestore": "CREATE TABLE testTypes (a ENUM('a'))", "clickhouse": "CREATE TABLE testTypes (a ENUM16('a'))", }, ) self.validate_all( "CREATE TABLE testTypes (a TEXT(2))", read={ "singlestore": "CREATE TABLE testTypes (a TEXT(2))", "clickhouse": "CREATE TABLE testTypes (a FIXEDSTRING(2))", }, ) self.validate_all( "CREATE TABLE testTypes (a GEOGRAPHY)", read={ "singlestore": "CREATE TABLE testTypes (a GEOGRAPHY)", "snowflake": "CREATE TABLE testTypes (a GEOMETRY)", }, ) self.validate_all( "CREATE TABLE testTypes (a GEOGRAPHYPOINT)", read={ "singlestore": "CREATE TABLE testTypes (a GEOGRAPHYPOINT)", "clickhouse": "CREATE TABLE testTypes (a POINT)", }, ) self.validate_all( "CREATE TABLE testTypes (a GEOGRAPHY)", read={ "singlestore": "CREATE TABLE testTypes (a GEOGRAPHY)", "clickhouse": "CREATE TABLE testTypes (a RING)", }, ) self.validate_all( "CREATE TABLE testTypes (a GEOGRAPHY)", read={ "singlestore": "CREATE TABLE testTypes (a GEOGRAPHY)", "clickhouse": "CREATE TABLE testTypes (a LINESTRING)", }, ) self.validate_all( "CREATE TABLE testTypes (a GEOGRAPHY)", read={ "singlestore": "CREATE TABLE testTypes (a GEOGRAPHY)", "clickhouse": "CREATE TABLE testTypes (a POLYGON)", }, ) self.validate_all( "CREATE TABLE testTypes (a GEOGRAPHY)", read={ "singlestore": "CREATE TABLE testTypes (a GEOGRAPHY)", "clickhouse": "CREATE TABLE testTypes (a MULTIPOLYGON)", }, ) self.validate_all( "CREATE TABLE testTypes (a BSON)", read={ "singlestore": "CREATE TABLE testTypes (a BSON)", "postgres": "CREATE TABLE testTypes (a JSONB)", }, ) self.validate_identity("CREATE TABLE testTypes (a TIMESTAMP(6))") self.validate_all( "CREATE TABLE testTypes (a TIMESTAMP)", read={ "singlestore": "CREATE TABLE testTypes (a TIMESTAMP)", "duckdb": "CREATE TABLE testTypes (a TIMESTAMP_S)", }, ) self.validate_all( "CREATE TABLE testTypes (a TIMESTAMP(6))", read={ "singlestore": "CREATE TABLE testTypes (a TIMESTAMP(6))", "duckdb": "CREATE TABLE testTypes (a TIMESTAMP_MS)", }, ) self.validate_all( "CREATE TABLE testTypes (a BLOB)", read={ "singlestore": "CREATE TABLE testTypes (a BLOB)", "": "CREATE TABLE testTypes (a VARBINARY)", }, ) def test_column_with_tablename(self): self.validate_identity("SELECT `t0`.`name` FROM `t0`") def test_unicodestring_sql(self): self.validate_all( "SELECT 'data'", read={"presto": "SELECT U&'d\\0061t\\0061'", "singlestore": "SELECT 'data'"}, ) def test_collate_sql(self): self.validate_all( "SELECT name :> LONGTEXT COLLATE 'utf8mb4_bin' FROM `users`", read={ "": "SELECT name COLLATE 'utf8mb4_bin' FROM users", }, ) self.validate_identity( "SELECT name :> LONGTEXT COLLATE 'utf8mb4_bin' FROM `users`", "SELECT name :> LONGTEXT :> LONGTEXT COLLATE 'utf8mb4_bin' FROM `users`", ) def test_match_against(self): self.validate_identity( "SELECT MATCH(name) AGAINST('search term') FROM products" ).expressions[0].assert_is(exp.MatchAgainst) self.validate_identity( "SELECT MATCH(name, name) AGAINST('book') FROM products" ).expressions[0].assert_is(exp.MatchAgainst) self.validate_identity( "SELECT MATCH(TABLE products2) AGAINST('search term') FROM products2" ).expressions[0].assert_is(exp.MatchAgainst) def test_show(self): self.validate_identity("SHOW AGGREGATES FROM db1") self.validate_identity("SHOW AGGREGATES LIKE 'multiply%'") self.validate_identity("SHOW CDC EXTRACTOR POOL") self.validate_identity("SHOW CREATE AGGREGATE avg_udaf") self.validate_identity("SHOW CREATE PIPELINE mypipeline") self.validate_identity("SHOW CREATE PROJECTION lineitem_sort_shipdate FOR TABLE lineitem") self.validate_identity("SHOW DATABASE STATUS") self.validate_identity("SHOW DISTRIBUTED_PLANCACHE STATUS") self.validate_identity("SHOW FULLTEXT SERVICE STATUS") self.validate_identity("SHOW FULLTEXT SERVICE METRICS LOCAL") self.validate_identity("SHOW FULLTEXT SERVICE METRICS FOR NODE 1") self.validate_identity("SHOW FUNCTIONS FROM db LIKE 'a'") self.validate_identity("SHOW GROUPS") self.validate_identity("SHOW GROUPS FOR ROLE 'role_name_0'") self.validate_identity("SHOW GROUPS FOR USER 'root'") self.validate_identity("SHOW INDEXES FROM mytbl", "SHOW INDEX FROM mytbl") self.validate_identity("SHOW KEYS FROM mytbl", "SHOW INDEX FROM mytbl") self.validate_identity("SHOW LINKS ON Orderdb") self.validate_identity("SHOW LOAD ERRORS") self.validate_identity("SHOW LOAD WARNINGS") self.validate_identity("SHOW PARTITIONS ON memsql_demo") self.validate_identity("SHOW PIPELINES") self.validate_identity("SHOW PLAN JSON 25") self.validate_identity("SHOW PLAN 25") self.validate_identity("SHOW PLANCACHE") self.validate_identity("SHOW PROCEDURES FROM dbExample") self.validate_identity("SHOW PROCEDURES LIKE '%sp%'") self.validate_identity("SHOW PROJECTIONS ON TABLE t") self.validate_identity("SHOW PROJECTIONS") self.validate_identity("SHOW REPLICATION STATUS") self.validate_identity("SHOW REPRODUCTION") self.validate_identity("SHOW REPRODUCTION INTO OUTFILE 'a'") self.validate_identity("SHOW RESOURCE POOLS") self.validate_identity("SHOW ROLES LIKE 'xyz'") self.validate_identity("SHOW ROLES FOR GROUP 'group_0'") self.validate_identity("SHOW ROLES FOR USER 'root'") self.validate_identity("SHOW STATUS") self.validate_identity("SHOW USERS") self.validate_identity("SHOW USERS FOR GROUP 'group_name'") self.validate_identity("SHOW USERS FOR ROLE 'role_name'") def test_truncate(self): self.validate_all( "TRUNCATE t1; TRUNCATE t2", read={ "": "TRUNCATE TABLE t1, t2", }, ) def test_vector(self): self.validate_all( "CREATE TABLE t (a VECTOR(10, I32))", read={ "snowflake": "CREATE TABLE t (a VECTOR(INT, 10))", "singlestore": "CREATE TABLE t (a VECTOR(10, I32))", }, write={ "snowflake": "CREATE TABLE t (a VECTOR(INT, 10))", }, ) self.validate_all( "CREATE TABLE t (a VECTOR(10))", read={ "snowflake": "CREATE TABLE t (a VECTOR(10))", "singlestore": "CREATE TABLE t (a VECTOR(10))", }, write={ "snowflake": "CREATE TABLE t (a VECTOR(10))", }, ) def test_alter(self): self.validate_identity("ALTER TABLE t CHANGE middle_initial middle_name") self.validate_identity("ALTER TABLE t MODIFY COLUMN name TEXT COLLATE 'binary'") def test_constraints(self): self.validate_all( "CREATE TABLE ComputedColumnConstraint (points INT, score AS (points * 2) PERSISTED AUTO NOT NULL)", read={ "": "CREATE TABLE ComputedColumnConstraint (points INT, score AS (points * 2) PERSISTED NOT NULL)", "singlestore": "CREATE TABLE ComputedColumnConstraint (points INT, score AS (points * 2) AUTO NOT NULL)", }, ) self.validate_identity( "CREATE TABLE ComputedColumnConstraint (points INT, score AS (points * 2) PERSISTED BIGINT NOT NULL)" ) def test_dcolonqmark(self): self.validate_identity("SELECT * FROM employee WHERE JSON_MATCH_ANY(payroll::?names)") ================================================ FILE: tests/dialects/test_snowflake.py ================================================ from unittest import mock from sqlglot import ParseError, UnsupportedError, exp, parse_one from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.qualify_columns import quote_identifiers from tests.dialects.test_dialect import Validator class TestSnowflake(Validator): maxDiff = None dialect = "snowflake" def test_snowflake(self): self.validate_identity( """WITH t AS (SELECT PARSE_JSON('{"level1": {"level2": {"level3": "value"}}}') AS data) SELECT data: level1 : level2 : level3::VARIANT FROM t""", """WITH t AS (SELECT PARSE_JSON('{"level1": {"level2": {"level3": "value"}}}') AS data) SELECT CAST(GET_PATH(data, 'level1.level2.level3') AS VARIANT) FROM t""", ) self.validate_identity( "SELECT * FROM x ASOF JOIN y OFFSET MATCH_CONDITION (x.a > y.a)", "SELECT * FROM x ASOF JOIN y AS OFFSET MATCH_CONDITION (x.a > y.a)", ) self.validate_identity( "SELECT * FROM x ASOF JOIN y LIMIT MATCH_CONDITION (x.a > y.a)", "SELECT * FROM x ASOF JOIN y AS LIMIT MATCH_CONDITION (x.a > y.a)", ) self.validate_identity("SELECT session") self.validate_identity("x::nvarchar()", "CAST(x AS VARCHAR)") ast = self.parse_one("DATEADD(DAY, n, d)") ast.set("unit", exp.Literal.string("MONTH")) self.assertEqual(ast.sql("snowflake"), "DATEADD(MONTH, n, d)") self.validate_identity("SELECT DATE_PART(EPOCH_MILLISECOND, CURRENT_TIMESTAMP()) AS a") self.validate_identity("SELECT GET(a, b)") self.validate_identity("SELECT HASH_AGG(a, b, c, d)") self.validate_identity("SELECT GREATEST(1, 2, 3, NULL)") self.validate_identity("SELECT GREATEST_IGNORE_NULLS(1, 2, 3, NULL)") self.validate_identity("SELECT LEAST(5, NULL, 7, 3)") self.validate_identity("SELECT LEAST_IGNORE_NULLS(5, NULL, 7, 3)") self.validate_identity("SELECT MAX(x)") self.validate_identity("SELECT COUNT(x)") self.validate_identity("SELECT MIN(amount)") self.validate_identity("SELECT MODE(x)") self.validate_identity("SELECT MODE(status) OVER (PARTITION BY region) FROM orders") self.validate_identity("SELECT TAN(x)") self.validate_identity("SELECT COS(x)") self.validate_identity("SELECT SINH(1.5)") self.validate_identity("SELECT MOD(x, y)", "SELECT x % y") self.validate_identity("SELECT ROUND(x)") self.validate_identity("SELECT ROUND(123.456, -1)") self.validate_identity("SELECT ROUND(123.456, 2, 'HALF_AWAY_FROM_ZERO')") self.validate_identity("SELECT FLOOR(x)") self.validate_identity("SELECT FLOOR(135.135, 1)") self.validate_identity("SELECT FLOOR(x, -1)") self.validate_identity( "SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY salary) FROM employees" ) self.assertEqual( # Ensures we don't fail when generating ParseJSON with the `safe` arg set to `True` self.validate_identity("""SELECT TRY_PARSE_JSON('{"x: 1}')""").sql(), """SELECT PARSE_JSON('{"x: 1}')""", ) self.validate_identity( "SELECT APPROX_TOP_K(col) FROM t", "SELECT APPROX_TOP_K(col, 1) FROM t", ) self.validate_identity("SELECT APPROX_TOP_K(category, 3) FROM t") self.validate_identity("APPROX_TOP_K(C4, 3, 5)").assert_is(exp.AggFunc) self.validate_identity("SELECT MINHASH(5, col)") self.validate_identity("SELECT MINHASH(5, col1, col2)") self.validate_identity("SELECT MINHASH(5, *)") self.validate_identity("SELECT MINHASH_COMBINE(minhash_col)") self.validate_identity("SELECT APPROXIMATE_SIMILARITY(minhash_col)") self.validate_identity( "SELECT APPROXIMATE_JACCARD_INDEX(minhash_col)", "SELECT APPROXIMATE_SIMILARITY(minhash_col)", ) self.validate_identity("SELECT APPROX_PERCENTILE_ACCUMULATE(col)") self.validate_identity("SELECT APPROX_PERCENTILE_ESTIMATE(state, 0.5)") self.validate_identity("SELECT APPROX_TOP_K_ACCUMULATE(col, 10)") self.validate_identity("SELECT APPROX_TOP_K_COMBINE(state, 2)") self.validate_identity("SELECT APPROX_TOP_K_COMBINE(state)") self.validate_identity("SELECT APPROX_TOP_K_ESTIMATE(state_column, 4)") self.validate_identity("SELECT APPROX_TOP_K_ESTIMATE(state_column)") self.validate_identity("SELECT APPROX_PERCENTILE_COMBINE(state_column)") self.validate_identity("SELECT EQUAL_NULL(1, 2)") self.validate_identity("SELECT EXP(1)") self.validate_identity("SELECT FACTORIAL(5)") self.validate_identity("SELECT BIT_LENGTH('abc')") self.validate_identity("SELECT BIT_LENGTH(x'A1B2')") self.validate_all( "SELECT BITMAP_BIT_POSITION(10)", write={ "duckdb": "SELECT (CASE WHEN 10 > 0 THEN 10 - 1 ELSE ABS(10) END) % 32768", "snowflake": "SELECT BITMAP_BIT_POSITION(10)", }, ) self.validate_identity("SELECT BITMAP_BUCKET_NUMBER(32769)") self.validate_identity("SELECT BITMAP_CONSTRUCT_AGG(value)") self.validate_all( "SELECT BITMAP_CONSTRUCT_AGG(v) FROM t", write={ "snowflake": "SELECT BITMAP_CONSTRUCT_AGG(v) FROM t", "duckdb": "SELECT (SELECT CASE WHEN l IS NULL OR LENGTH(l) = 0 THEN NULL WHEN LENGTH(l) <> LENGTH(LIST_FILTER(l, __v -> __v BETWEEN 0 AND 32767)) THEN NULL WHEN LENGTH(l) < 5 THEN UNHEX(PRINTF('%04X', LENGTH(l)) || h || REPEAT('00', GREATEST(0, 4 - LENGTH(l)) * 2)) ELSE UNHEX('08000000000000000000' || h) END FROM (SELECT l, COALESCE(LIST_REDUCE(LIST_TRANSFORM(l, __x -> PRINTF('%02X%02X', CAST(__x AS INT) & 255, (CAST(__x AS INT) >> 8) & 255)), (__a, __b) -> __a || __b, ''), '') AS h FROM (SELECT LIST_SORT(LIST_DISTINCT(LIST(v) FILTER(WHERE NOT v IS NULL))) AS l))) FROM t", }, ) self.validate_identity( "SELECT BITMAP_COUNT(BITMAP_CONSTRUCT_AGG(value)) FROM TABLE(FLATTEN(INPUT => ARRAY_CONSTRUCT(1, 2, 3, 5)))", "SELECT BITMAP_COUNT(BITMAP_CONSTRUCT_AGG(value)) FROM TABLE(FLATTEN(INPUT => [1, 2, 3, 5]))", ) self.validate_all( "SELECT ARRAY_MAX([1, 2, 3])", write={ "duckdb": "SELECT LIST_MAX([1, 2, 3])", "snowflake": "SELECT ARRAY_MAX([1, 2, 3])", }, ) self.validate_all( "SELECT ARRAY_MIN([1, 2, 3])", write={ "duckdb": "SELECT LIST_MIN([1, 2, 3])", "snowflake": "SELECT ARRAY_MIN([1, 2, 3])", }, ) self.validate_identity("SELECT BOOLAND(1, -2)") self.validate_identity("SELECT BOOLXOR(2, 0)") self.validate_identity("SELECT BOOLOR(1, 0)") self.validate_identity("SELECT TO_BOOLEAN('true')") self.validate_identity("SELECT TO_BOOLEAN(1)") self.validate_identity("SELECT TO_VARIANT(123)") self.validate_identity("SELECT IS_NULL_VALUE(GET_PATH(payload, 'field'))") self.validate_identity("SELECT RTRIMMED_LENGTH(' ABCD ')") self.validate_identity("SELECT HEX_DECODE_STRING('48656C6C6F')") self.validate_identity("SELECT HEX_ENCODE('Hello World')") self.validate_identity("SELECT HEX_ENCODE('Hello World', 1)") self.validate_identity("SELECT HEX_ENCODE('Hello World', 0)") self.validate_identity("SELECT IFNULL(col1, col2)", "SELECT COALESCE(col1, col2)") self.validate_identity("SELECT NEXT_DAY('2025-10-15', 'FRIDAY')") self.validate_identity("SELECT NVL2(col1, col2, col3)") self.validate_identity("SELECT NVL(col1, col2)", "SELECT COALESCE(col1, col2)") self.validate_identity("SELECT CHR(8364)") self.validate_identity('SELECT CHECK_JSON(\'{"key": "value"}\')') self.validate_identity( "SELECT CHECK_XML('value')" ) self.validate_identity( "SELECT CHECK_XML('value', TRUE)" ) self.validate_identity("SELECT COMPRESS('Hello World', 'ZLIB')") self.validate_identity("SELECT DECOMPRESS_BINARY('compressed_data', 'SNAPPY')") self.validate_identity("SELECT DECOMPRESS_STRING('compressed_data', 'ZSTD')") self.validate_identity("SELECT LPAD('Hello', 10, '*')") self.validate_identity("SELECT LPAD(tbl.bin_col, 10)") self.validate_identity("SELECT RPAD('Hello', 10, '*')") self.validate_identity("SELECT RPAD(tbl.bin_col, 10)") self.validate_all( "SELECT RPAD('test', 10, 'ab')", write={ "snowflake": "SELECT RPAD('test', 10, 'ab')", "duckdb": "SELECT RPAD('test', 10, 'ab')", }, ) self.validate_all( "SELECT RPAD('data', 8)", write={ "snowflake": "SELECT RPAD('data', 8)", "duckdb": "SELECT RPAD('data', 8, ' ')", "postgres": "SELECT RPAD('data', 8)", }, ) self.validate_all( "SELECT RPAD('exact', 5, '*')", write={ "snowflake": "SELECT RPAD('exact', 5, '*')", "duckdb": "SELECT RPAD('exact', 5, '*')", }, ) ast = self.validate_identity( "SELECT RPAD(TO_BINARY('Hi', 'UTF8'), 10, TO_BINARY('_', 'UTF8'))" ) annotated = annotate_types(ast, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "SELECT ENCODE('Hi') || REPEAT(ENCODE('_'), GREATEST(0, 10 - OCTET_LENGTH(ENCODE('Hi'))))", ) self.validate_identity("SELECT SOUNDEX(column_name)") self.validate_identity("SELECT SOUNDEX_P123(column_name)") self.validate_identity("SELECT ABS(x)") self.validate_identity("SELECT ASIN(0.5)") self.validate_identity("SELECT ASINH(0.5)") self.validate_identity("SELECT ATAN(0.5)") self.validate_identity("SELECT ATAN2(0.5, 0.3)") self.validate_identity("SELECT ATANH(0.5)") self.validate_identity("SELECT CBRT(27.0)") self.validate_identity("SELECT POW(2, 3)", "SELECT POWER(2, 3)") self.validate_identity("SELECT POW(2.5, 3.0)", "SELECT POWER(2.5, 3.0)") self.validate_identity("SELECT SQUARE(2.5)", "SELECT POWER(2.5, 2)") self.validate_identity("SELECT SIGN(x)") self.validate_identity("SELECT COSH(1.5)") self.validate_identity("SELECT TANH(0.5)") self.validate_all( "JAROWINKLER_SIMILARITY('hello', 'world')", write={ "snowflake": "JAROWINKLER_SIMILARITY('hello', 'world')", "duckdb": "JARO_WINKLER_SIMILARITY(UPPER('hello'), UPPER('world'))", "clickhouse": "jaroWinklerSimilarity(UPPER('hello'), UPPER('world'))", }, ) self.validate_identity("SELECT TRANSLATE(column_name, 'abc', '123')") self.validate_identity("SELECT UNICODE(column_name)") self.validate_identity("SELECT WIDTH_BUCKET(col, 0, 100, 10)") self.validate_all( "SELECT SPLIT_PART('11.22.33', '.', 2)", write={ "snowflake": "SELECT SPLIT_PART('11.22.33', '.', 2)", "duckdb": "SELECT CASE WHEN '.' = '' THEN (CASE WHEN (CASE WHEN 2 = 0 THEN 1 ELSE 2 END) = 1 OR (CASE WHEN 2 = 0 THEN 1 ELSE 2 END) = -1 THEN '11.22.33' ELSE '' END) ELSE SPLIT_PART('11.22.33', '.', (CASE WHEN 2 = 0 THEN 1 ELSE 2 END)) END", }, ) self.validate_all( "SELECT SPLIT('127.0.0.1', '.')", write={ "snowflake": "SELECT SPLIT('127.0.0.1', '.')", "duckdb": "SELECT CASE WHEN '.' IS NULL THEN NULL WHEN '.' = '' THEN ['127.0.0.1'] ELSE STR_SPLIT('127.0.0.1', '.') END", }, ) self.validate_identity("SELECT PI()") self.validate_identity("SELECT DEGREES(PI() / 3)") self.validate_identity("SELECT DEGREES(1)") self.validate_identity("SELECT RADIANS(180)") self.validate_all( "SELECT REGR_VALX(y, x)", write={ "snowflake": "SELECT REGR_VALX(y, x)", "duckdb": "SELECT CASE WHEN y IS NULL THEN CAST(NULL AS DOUBLE) ELSE x END", }, ) self.validate_all( "SELECT REGR_VALY(y, x)", write={ "snowflake": "SELECT REGR_VALY(y, x)", "duckdb": "SELECT CASE WHEN x IS NULL THEN CAST(NULL AS DOUBLE) ELSE y END", }, ) self.validate_identity("SELECT REGR_AVGX(y, x)") self.validate_identity("SELECT REGR_AVGY(y, x)") self.validate_identity("SELECT REGR_COUNT(y, x)") self.validate_identity("SELECT REGR_INTERCEPT(y, x)") self.validate_identity("SELECT REGR_R2(y, x)") self.validate_identity("SELECT REGR_SXX(y, x)") self.validate_identity("SELECT REGR_SXY(y, x)") self.validate_identity("SELECT REGR_SYY(y, x)") self.validate_identity("SELECT REGR_SLOPE(y, x)") self.validate_all( "SELECT IS_ARRAY(PARSE_JSON('[1,2,3]'))", write={ "snowflake": "SELECT IS_ARRAY(PARSE_JSON('[1,2,3]'))", "duckdb": "SELECT JSON_TYPE(JSON('[1,2,3]')) = 'ARRAY'", }, ) self.validate_all( "SELECT IFF(x > 5, 10, 20)", write={ "snowflake": "SELECT IFF(x > 5, 10, 20)", "duckdb": "SELECT CASE WHEN x > 5 THEN 10 ELSE 20 END", }, ) self.validate_all( "SELECT IFF(col IS NULL, 0, col)", write={ "snowflake": "SELECT IFF(col IS NULL, 0, col)", "duckdb": "SELECT CASE WHEN col IS NULL THEN 0 ELSE col END", }, ) self.validate_all( "SELECT VAR_SAMP(x)", write={ "snowflake": "SELECT VARIANCE(x)", "duckdb": "SELECT VARIANCE(x)", "postgres": "SELECT VAR_SAMP(x)", }, ) self.validate_all( "SELECT GREATEST(1, 2)", write={ "snowflake": "SELECT GREATEST(1, 2)", "duckdb": "SELECT CASE WHEN 1 IS NULL OR 2 IS NULL THEN NULL ELSE GREATEST(1, 2) END", }, ) self.validate_all( "SELECT GREATEST_IGNORE_NULLS(1, 2)", write={ "snowflake": "SELECT GREATEST_IGNORE_NULLS(1, 2)", "duckdb": "SELECT GREATEST(1, 2)", }, ) self.validate_all( "SELECT LEAST(1, 2)", write={ "snowflake": "SELECT LEAST(1, 2)", "duckdb": "SELECT CASE WHEN 1 IS NULL OR 2 IS NULL THEN NULL ELSE LEAST(1, 2) END", }, ) self.validate_all( "SELECT LEAST_IGNORE_NULLS(1, 2)", write={ "snowflake": "SELECT LEAST_IGNORE_NULLS(1, 2)", "duckdb": "SELECT LEAST(1, 2)", }, ) self.validate_all( "SELECT VAR_POP(x)", write={ "snowflake": "SELECT VARIANCE_POP(x)", "duckdb": "SELECT VAR_POP(x)", "postgres": "SELECT VAR_POP(x)", }, ) self.validate_all( "SELECT SKEW(a)", write={ "snowflake": "SELECT SKEW(a)", "duckdb": "SELECT SKEWNESS(a)", "spark": "SELECT SKEWNESS(a)", "trino": "SELECT SKEWNESS(a)", }, read={ "duckdb": "SELECT SKEWNESS(a)", "spark": "SELECT SKEWNESS(a)", "trino": "SELECT SKEWNESS(a)", }, ) self.validate_all( "SELECT RANDOM()", write={ "snowflake": "SELECT RANDOM()", "duckdb": "SELECT CAST(-9.223372036854776E+18 + RANDOM() * (9.223372036854776e+18 - -9.223372036854776E+18) AS BIGINT)", }, ) self.validate_all( "SELECT RANDOM(123)", write={ "snowflake": "SELECT RANDOM(123)", "duckdb": "SELECT CAST(-9.223372036854776E+18 + RANDOM() * (9.223372036854776e+18 - -9.223372036854776E+18) AS BIGINT)", }, ) self.validate_identity("SELECT RANDSTR(123, 456)") self.validate_identity("SELECT RANDSTR(123, RANDOM())") self.validate_identity("SELECT NORMAL(0, 1, RANDOM())") self.validate_all( "IS_NULL_VALUE(x)", write={ "duckdb": "JSON_TYPE(x) = 'NULL'", "snowflake": "IS_NULL_VALUE(x)", }, ) # Test RANDSTR transpilation to DuckDB self.validate_all( "SELECT RANDSTR(10, 123)", write={ "snowflake": "SELECT RANDSTR(10, 123)", "duckdb": "SELECT (SELECT LISTAGG(SUBSTRING('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 1 + CAST(FLOOR(random_value * 62) AS INT), 1), '') FROM (SELECT (ABS(HASH(i + 123)) % 1000) / 1000.0 AS random_value FROM RANGE(10) AS t(i)))", }, ) self.validate_all( "SELECT RANDSTR(10, RANDOM(123))", write={ "snowflake": "SELECT RANDSTR(10, RANDOM(123))", "duckdb": "SELECT (SELECT LISTAGG(SUBSTRING('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 1 + CAST(FLOOR(random_value * 62) AS INT), 1), '') FROM (SELECT (ABS(HASH(i + 123)) % 1000) / 1000.0 AS random_value FROM RANGE(10) AS t(i)))", }, ) self.validate_all( "SELECT RANDSTR(10, RANDOM())", write={ "snowflake": "SELECT RANDSTR(10, RANDOM())", "duckdb": "SELECT (SELECT LISTAGG(SUBSTRING('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 1 + CAST(FLOOR(random_value * 62) AS INT), 1), '') FROM (SELECT (ABS(HASH(i + CAST(-9.223372036854776E+18 + RANDOM() * (9.223372036854776e+18 - -9.223372036854776E+18) AS BIGINT))) % 1000) / 1000.0 AS random_value FROM RANGE(10) AS t(i)))", }, ) self.validate_all( "SELECT BOOLNOT(0)", write={ "snowflake": "SELECT BOOLNOT(0)", "duckdb": "SELECT NOT (ROUND(0, 0))", }, ) expr = self.validate_identity("RIGHT('GAJGSKD', 2)") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "RIGHT('GAJGSKD', 2)") expr = self.validate_identity("RIGHT(TO_BINARY('SNOWIKOPN', 'utf-8'), ABS(-3))") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "UNHEX(RIGHT(HEX(ENCODE('SNOWIKOPN')), ABS(-3) * 2))" ) self.validate_all( "SELECT ZIPF(1, 10, 1234)", write={ "duckdb": "SELECT (WITH rand AS (SELECT (ABS(HASH(1234)) % 1000000) / 1000000.0 AS r), weights AS (SELECT i, 1.0 / POWER(i, 1) AS w FROM RANGE(1, 10 + 1) AS t(i)), cdf AS (SELECT i, SUM(w) OVER (ORDER BY i NULLS FIRST) / SUM(w) OVER () AS p FROM weights) SELECT MIN(i) FROM cdf WHERE p >= (SELECT r FROM rand))", "snowflake": "SELECT ZIPF(1, 10, 1234)", }, ) self.validate_all( "SELECT ZIPF(2, 100, RANDOM())", write={ "duckdb": "SELECT (WITH rand AS (SELECT RANDOM() AS r), weights AS (SELECT i, 1.0 / POWER(i, 2) AS w FROM RANGE(1, 100 + 1) AS t(i)), cdf AS (SELECT i, SUM(w) OVER (ORDER BY i NULLS FIRST) / SUM(w) OVER () AS p FROM weights) SELECT MIN(i) FROM cdf WHERE p >= (SELECT r FROM rand))", "snowflake": "SELECT ZIPF(2, 100, RANDOM())", }, ) self.validate_identity("SELECT GROUPING_ID(a, b) AS g_id FROM x GROUP BY ROLLUP (a, b)") self.validate_identity("PARSE_URL('https://example.com/path')") self.validate_identity("PARSE_URL('https://example.com/path', 1)") self.validate_identity("SELECT XMLGET(object_col, 'level2')") self.validate_identity("SELECT XMLGET(object_col, 'level3', 1)") self.validate_identity("SELECT {*} FROM my_table") self.validate_identity("SELECT {my_table.*} FROM my_table") self.validate_identity("SELECT {* ILIKE 'col1%'} FROM my_table") self.validate_identity("SELECT {* EXCLUDE (col1)} FROM my_table") self.validate_identity("SELECT {* EXCLUDE (col1, col2)} FROM my_table") self.validate_identity("SELECT a, b, COUNT(*) FROM x GROUP BY ALL LIMIT 100") self.validate_identity("STRTOK_TO_ARRAY('a b c')") self.validate_identity("STRTOK_TO_ARRAY('a.b.c', '.')") self.validate_identity("GET(a, b)") self.validate_identity("INSERT INTO test VALUES (x'48FAF43B0AFCEF9B63EE3A93EE2AC2')") self.validate_identity("SELECT STAR(tbl, exclude := [foo])") self.validate_identity("SELECT CAST([1, 2, 3] AS VECTOR(FLOAT, 3))") self.validate_identity("SELECT VECTOR_COSINE_SIMILARITY(a, b)") self.validate_identity("SELECT VECTOR_INNER_PRODUCT(a, b)") self.validate_identity("SELECT VECTOR_L1_DISTANCE(a, b)") self.validate_identity("SELECT VECTOR_L2_DISTANCE(a, b)") self.validate_identity("SELECT CONNECT_BY_ROOT test AS test_column_alias") self.validate_identity("SELECT number").selects[0].assert_is(exp.Column) self.validate_identity("INTERVAL '4 years, 5 months, 3 hours'") self.validate_identity("ALTER TABLE table1 CLUSTER BY (name DESC)") self.validate_identity("SELECT rename, replace") self.validate_identity("SELECT TIMEADD(HOUR, 2, CAST('09:05:03' AS TIME))") self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS MAP(VARCHAR, INT))") self.validate_identity( "SELECT MAP_CAT(CAST(col AS MAP(VARCHAR, VARCHAR)), CAST(col AS MAP(VARCHAR, VARCHAR)))" ) self.validate_all( "SELECT MAP_CAT(CAST(m1 AS MAP(VARCHAR, INT)), CAST(m2 AS MAP(VARCHAR, INT)))", write={ "duckdb": "SELECT CASE WHEN CAST(m1 AS MAP(TEXT, INT)) IS NULL OR CAST(m2 AS MAP(TEXT, INT)) IS NULL THEN NULL ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(CAST(m1 AS MAP(TEXT, INT))), MAP_KEYS(CAST(m2 AS MAP(TEXT, INT))))), __k -> STRUCT_PACK(key := __k, value := COALESCE(CAST(m2 AS MAP(TEXT, INT))[__k], CAST(m1 AS MAP(TEXT, INT))[__k]))), __x -> NOT __x.value IS NULL)) END", "snowflake": "SELECT MAP_CAT(CAST(m1 AS MAP(VARCHAR, INT)), CAST(m2 AS MAP(VARCHAR, INT)))", }, ) self.validate_all( "SELECT MAP_CAT(CAST(OBJECT_CONSTRUCT() AS MAP(VARCHAR, INT)), CAST(OBJECT_CONSTRUCT('a', 1) AS MAP(VARCHAR, INT)))", write={ "duckdb": "SELECT CASE WHEN CAST(MAP() AS MAP(TEXT, INT)) IS NULL OR CAST({'a': 1} AS MAP(TEXT, INT)) IS NULL THEN NULL ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(CAST(MAP() AS MAP(TEXT, INT))), MAP_KEYS(CAST({'a': 1} AS MAP(TEXT, INT))))), __k -> STRUCT_PACK(key := __k, value := COALESCE(CAST({'a': 1} AS MAP(TEXT, INT))[__k], CAST(MAP() AS MAP(TEXT, INT))[__k]))), __x -> NOT __x.value IS NULL)) END", "snowflake": "SELECT MAP_CAT(CAST(OBJECT_CONSTRUCT() AS MAP(VARCHAR, INT)), CAST(OBJECT_CONSTRUCT('a', 1) AS MAP(VARCHAR, INT)))", }, ) self.validate_identity("SELECT MAP_CONTAINS_KEY('k1', CAST(col AS MAP(VARCHAR, VARCHAR)))") self.validate_identity("SELECT MAP_DELETE(CAST(col AS MAP(VARCHAR, VARCHAR)), 'k1')") self.validate_identity("SELECT MAP_INSERT(CAST(col AS MAP(VARCHAR, VARCHAR)), 'b', '2')") self.validate_identity("SELECT MAP_KEYS(CAST(col AS MAP(VARCHAR, VARCHAR)))") self.validate_identity("SELECT MAP_PICK(CAST(col AS MAP(VARCHAR, VARCHAR)), 'a', 'c')") self.validate_identity("SELECT MAP_SIZE(CAST(col AS MAP(VARCHAR, VARCHAR)))") self.validate_identity("SELECT CAST(OBJECT_CONSTRUCT('a', 1) AS OBJECT(a CHAR NOT NULL))") self.validate_identity("SELECT CAST([1, 2, 3] AS ARRAY(INT))") self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR) RENAME FIELDS)") self.validate_identity("SELECT CAST(obj AS OBJECT(x CHAR, y VARCHAR) ADD FIELDS)") self.validate_identity("SELECT TO_TIMESTAMP(123.4)").selects[0].assert_is(exp.Anonymous) self.validate_identity("SELECT TO_TIMESTAMP(x) FROM t") self.validate_identity("SELECT TO_TIMESTAMP_NTZ(x) FROM t") self.validate_identity("SELECT TO_TIMESTAMP_LTZ(x) FROM t") self.validate_identity("SELECT TO_TIMESTAMP_TZ(x) FROM t") self.validate_identity("TO_DECIMAL(expr)", "TO_NUMBER(expr)") self.validate_identity("TO_DECIMAL(expr, fmt)", "TO_NUMBER(expr, fmt)") self.validate_identity( "TO_DECIMAL(expr, fmt, precision, scale)", "TO_NUMBER(expr, fmt, precision, scale)" ) self.validate_identity("TO_NUMBER(expr)") self.validate_identity("TO_NUMBER(expr, fmt)") self.validate_identity("TO_NUMBER(expr, fmt, precision, scale)") self.validate_identity("TO_DECFLOAT('123.456')") self.validate_identity("TO_DECFLOAT('1,234.56', '999,999.99')") self.validate_identity("TRY_TO_DECFLOAT('123.456')") self.validate_identity("TRY_TO_DECFLOAT('1,234.56', '999,999.99')") self.validate_all( "TRY_TO_BOOLEAN('true')", write={ "snowflake": "TRY_TO_BOOLEAN('true')", "duckdb": "CASE WHEN UPPER(CAST('true' AS TEXT)) = 'ON' THEN TRUE WHEN UPPER(CAST('true' AS TEXT)) = 'OFF' THEN FALSE ELSE TRY_CAST('true' AS BOOLEAN) END", }, ) self.validate_identity("TRY_TO_DECIMAL('123.45')", "TRY_TO_NUMBER('123.45')") self.validate_identity( "TRY_TO_DECIMAL('123.45', '999.99')", "TRY_TO_NUMBER('123.45', '999.99')" ) self.validate_identity( "TRY_TO_DECIMAL('123.45', '999.99', 10, 2)", "TRY_TO_NUMBER('123.45', '999.99', 10, 2)" ) self.validate_all( "TRY_TO_DOUBLE('123.456')", write={ "snowflake": "TRY_TO_DOUBLE('123.456')", "duckdb": "TRY_CAST('123.456' AS DOUBLE)", }, ) self.validate_identity("TRY_TO_DOUBLE('123.456', '999.99')") self.validate_all( "TRY_TO_DOUBLE('-4.56E-03', 'S9.99EEEE')", write={ "snowflake": "TRY_TO_DOUBLE('-4.56E-03', 'S9.99EEEE')", "duckdb": UnsupportedError, }, ) self.validate_identity("TO_FILE(object_col)") self.validate_identity("TO_FILE('file.csv')") self.validate_identity("TO_FILE('file.csv', 'relativepath/')") self.validate_identity("TRY_TO_FILE(object_col)") self.validate_identity("TRY_TO_FILE('file.csv')") self.validate_identity("TRY_TO_FILE('file.csv', 'relativepath/')") self.validate_identity("TRY_TO_NUMBER('123.45')") self.validate_identity("TRY_TO_NUMBER('123.45', '999.99')") self.validate_identity("TRY_TO_NUMBER('123.45', '999.99', 10, 2)") self.validate_identity("TO_NUMERIC('123.45')", "TO_NUMBER('123.45')") self.validate_identity("TO_NUMERIC('123.45', '999.99')", "TO_NUMBER('123.45', '999.99')") self.validate_identity( "TO_NUMERIC('123.45', '999.99', 10, 2)", "TO_NUMBER('123.45', '999.99', 10, 2)" ) self.validate_identity("TRY_TO_NUMERIC('123.45')", "TRY_TO_NUMBER('123.45')") self.validate_identity( "TRY_TO_NUMERIC('123.45', '999.99')", "TRY_TO_NUMBER('123.45', '999.99')" ) self.validate_identity( "TRY_TO_NUMERIC('123.45', '999.99', 10, 2)", "TRY_TO_NUMBER('123.45', '999.99', 10, 2)" ) self.validate_all( "TRY_TO_TIME('12:30:00')", write={ "snowflake": "TRY_CAST('12:30:00' AS TIME)", "duckdb": "TRY_CAST('12:30:00' AS TIME)", }, ) self.validate_identity("TRY_TO_TIME('12:30:00', 'AUTO')") self.validate_all( "TRY_TO_TIMESTAMP('2024-01-15 12:30:00')", write={ "snowflake": "TRY_CAST('2024-01-15 12:30:00' AS TIMESTAMP)", "duckdb": "TRY_CAST('2024-01-15 12:30:00' AS TIMESTAMP)", }, ) self.validate_identity("TRY_TO_TIMESTAMP('2024-01-15 12:30:00', 'AUTO')") self.validate_identity("ALTER TABLE authors ADD CONSTRAINT c1 UNIQUE (id, email)") self.validate_identity("RM @parquet_stage", check_command_warning=True) self.validate_identity("REMOVE @parquet_stage", check_command_warning=True) self.validate_identity("SELECT TIMESTAMP_FROM_PARTS(2024, 5, 9, 14, 30, 45)") self.validate_identity("SELECT TIMESTAMP_FROM_PARTS(2024, 5, 9, 14, 30, 45, 123)") self.validate_identity("SELECT TIMESTAMP_LTZ_FROM_PARTS(2013, 4, 5, 12, 00, 00)") self.validate_identity("SELECT TIMESTAMP_TZ_FROM_PARTS(2013, 4, 5, 12, 00, 00)") self.validate_identity( "SELECT TIMESTAMP_TZ_FROM_PARTS(2013, 4, 5, 12, 00, 00, 0, 'America/Los_Angeles')" ) self.validate_identity( "SELECT TIMESTAMP_FROM_PARTS(CAST('2024-05-09' AS DATE), CAST('14:30:45' AS TIME))" ) self.validate_identity( "SELECT TIMESTAMP_NTZ_FROM_PARTS(TO_DATE('2013-04-05'), TO_TIME('12:00:00'))", "SELECT TIMESTAMP_FROM_PARTS(CAST('2013-04-05' AS DATE), CAST('12:00:00' AS TIME))", ) self.validate_identity( "SELECT TIMESTAMP_NTZ_FROM_PARTS(2013, 4, 5, 12, 00, 00, 987654321)", "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00, 987654321)", ) self.validate_identity("SELECT DATE_FROM_PARTS(1977, 8, 7)") self.validate_identity("SELECT GET_PATH(v, 'attr[0].name') FROM vartab") self.validate_identity("SELECT TO_ARRAY(CAST(x AS ARRAY))") self.validate_identity("SELECT TO_ARRAY(CAST(['test'] AS VARIANT))") self.validate_identity("SELECT ARRAY_UNIQUE_AGG(x)") self.validate_identity("SELECT ARRAY_APPEND([1, 2, 3], 4)") self.validate_identity("SELECT ARRAY_CAT([1, 2], [3, 4])") self.validate_identity("SELECT ARRAY_PREPEND([2, 3, 4], 1)") self.validate_identity("SELECT ARRAY_REMOVE([1, 2, 3], 2)") self.validate_identity("SELECT ARRAYS_ZIP([1, 2, 3])") self.validate_identity("SELECT ARRAYS_ZIP([1, 2, 3], ['a', 'b', 'c'], [10, 20, 30])") self.validate_identity("SELECT AI_AGG(review, 'Summarize the reviews')") self.validate_identity("SELECT AI_SUMMARIZE_AGG(review)") self.validate_identity("SELECT AI_CLASSIFY('text', ['travel', 'cooking'])") self.validate_identity("SELECT OBJECT_CONSTRUCT()") self.validate_identity("SELECT CURRENT_ACCOUNT()") self.validate_identity("SELECT CURRENT_ACCOUNT_NAME()") self.validate_identity("SELECT CURRENT_AVAILABLE_ROLES()") self.validate_identity("SELECT CURRENT_CLIENT()") self.validate_identity("SELECT CURRENT_IP_ADDRESS()") self.validate_identity("SELECT CURRENT_DATABASE()") self.validate_identity("SELECT CURRENT_SCHEMAS()") self.validate_identity("SELECT CURRENT_SECONDARY_ROLES()") self.validate_identity("SELECT CURRENT_SESSION()") self.validate_identity("SELECT CURRENT_STATEMENT()") self.validate_identity("SELECT CURRENT_VERSION()") self.validate_identity("SELECT CURRENT_TRANSACTION()") self.validate_identity("SELECT CURRENT_WAREHOUSE()") self.validate_identity("SELECT CURRENT_ORGANIZATION_USER()") self.validate_identity("SELECT CURRENT_REGION()") self.validate_identity("SELECT CURRENT_ROLE()") self.validate_identity("SELECT CURRENT_ROLE_TYPE()") self.validate_identity("SELECT DAY(CURRENT_TIMESTAMP())") self.validate_identity("SELECT DAYOFMONTH(CURRENT_TIMESTAMP())") self.validate_identity("SELECT DAYOFYEAR(CURRENT_TIMESTAMP())") self.validate_identity("SELECT MONTH(CURRENT_TIMESTAMP())") self.validate_identity("SELECT QUARTER(CURRENT_TIMESTAMP())") self.validate_identity("SELECT WEEK(CURRENT_TIMESTAMP())") self.validate_identity("SELECT WEEKISO(CURRENT_TIMESTAMP())") self.validate_identity("WEEKOFYEAR(tstamp)", "WEEK(tstamp)") self.validate_identity("SELECT YEAR(CURRENT_TIMESTAMP())") self.validate_identity("SELECT YEAROFWEEK(CURRENT_TIMESTAMP())") self.validate_identity("SELECT YEAROFWEEKISO(CURRENT_TIMESTAMP())") self.validate_all( "SELECT DAYOFWEEKISO('2024-01-15'::DATE)", write={ "snowflake": "SELECT DAYOFWEEKISO(CAST('2024-01-15' AS DATE))", "duckdb": "SELECT ISODOW(CAST('2024-01-15' AS DATE))", }, ) self.validate_all( "SELECT YEAROFWEEK('2024-12-31'::DATE)", write={ "snowflake": "SELECT YEAROFWEEK(CAST('2024-12-31' AS DATE))", "duckdb": "SELECT EXTRACT(ISOYEAR FROM CAST('2024-12-31' AS DATE))", }, ) self.validate_all( "SELECT YEAROFWEEKISO('2024-12-31'::DATE)", write={ "snowflake": "SELECT YEAROFWEEKISO(CAST('2024-12-31' AS DATE))", "duckdb": "SELECT EXTRACT(ISOYEAR FROM CAST('2024-12-31' AS DATE))", }, ) self.validate_all( "SELECT WEEKISO('2024-01-15'::DATE)", write={ "snowflake": "SELECT WEEKISO(CAST('2024-01-15' AS DATE))", "duckdb": "SELECT WEEKOFYEAR(CAST('2024-01-15' AS DATE))", }, ) self.validate_identity("SELECT SUM(amount) FROM mytable GROUP BY ALL") self.validate_identity("SELECT STDDEV(x)") self.validate_identity("SELECT STDDEV(x) OVER (PARTITION BY 1)") self.validate_identity("SELECT STDDEV_POP(x)") self.validate_identity("SELECT STDDEV_POP(x) OVER (PARTITION BY 1)") self.validate_identity("SELECT STDDEV_SAMP(x)", "SELECT STDDEV(x)") self.validate_identity( "SELECT STDDEV_SAMP(x) OVER (PARTITION BY 1)", "SELECT STDDEV(x) OVER (PARTITION BY 1)" ) self.validate_identity("SELECT KURTOSIS(x)") self.validate_identity("SELECT KURTOSIS(x) OVER (PARTITION BY 1)") self.validate_identity("WITH x AS (SELECT 1 AS foo) SELECT foo FROM IDENTIFIER('x')") self.validate_identity("WITH x AS (SELECT 1 AS foo) SELECT IDENTIFIER('foo') FROM x") self.validate_identity("INITCAP('iqamqinterestedqinqthisqtopic', 'q')") self.validate_identity("OBJECT_CONSTRUCT(*)") self.validate_identity("SELECT CAST('2021-01-01' AS DATE) + INTERVAL '1 DAY'") self.validate_identity("SELECT HLL(*)") self.validate_identity("SELECT HLL(a)") self.validate_identity("SELECT HLL(DISTINCT t.a)") self.validate_identity("SELECT HLL(a, b, c)") self.validate_identity("SELECT HLL(DISTINCT a, b, c)") self.validate_identity("$x") # parameter self.validate_identity("a$b") # valid snowflake identifier self.validate_identity("SELECT REGEXP_LIKE(a, b, c)") self.validate_identity("CREATE TABLE foo (bar DOUBLE AUTOINCREMENT START 0 INCREMENT 1)") self.validate_identity("COMMENT IF EXISTS ON TABLE foo IS 'bar'") self.validate_identity("SELECT CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', col)") self.validate_identity("SELECT CURRENT_ORGANIZATION_NAME()") self.validate_identity("ALTER TABLE a SWAP WITH b") self.validate_identity("SELECT MATCH_CONDITION") self.validate_identity("SELECT OBJECT_AGG(key, value) FROM tbl") self.validate_identity("1 /* /* */", "1 /* / * */") self.validate_identity("TO_TIMESTAMP(col, fmt)") self.validate_identity("SELECT TO_CHAR(CAST('12:05:05' AS TIME))") self.validate_identity("SELECT TRIM(COALESCE(TO_CHAR(CAST(c AS TIME)), '')) FROM t") self.validate_identity("SELECT GET_PATH(PARSE_JSON(foo), 'bar')") self.validate_identity("SELECT PARSE_IP('192.168.1.1', 'INET')") self.validate_identity("SELECT PARSE_IP('192.168.1.1', 'INET', 0)") self.validate_identity("SELECT GET_PATH(foo, 'bar')") self.validate_identity("SELECT a, exclude, b FROM xxx") self.validate_identity("SELECT ARRAY_SORT(x, TRUE, FALSE)") self.validate_all( "SELECT ARRAY_SORT(x)", read={"snowflake": "SELECT ARRAY_SORT(x)"}, write={ "duckdb": "SELECT LIST_SORT(x)", "snowflake": "SELECT ARRAY_SORT(x)", }, ) self.validate_all( "SELECT ARRAY_SORT(x, FALSE)", read={"snowflake": "SELECT ARRAY_SORT(x, FALSE)"}, write={ "duckdb": "SELECT LIST_SORT(x, 'DESC', 'NULLS FIRST')", "snowflake": "SELECT ARRAY_SORT(x, FALSE)", }, ) self.validate_all( "SELECT ARRAY_SORT(x, foo, TRUE)", read={"snowflake": "SELECT ARRAY_SORT(x, foo, TRUE)"}, write={ "duckdb": "SELECT LIST_SORT(x, foo, 'NULLS FIRST')", "snowflake": "SELECT ARRAY_SORT(x, foo, TRUE)", }, ) self.validate_identity("SELECT BOOLXOR_AGG(col) FROM tbl") self.validate_identity( "SELECT PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY col) OVER (PARTITION BY category)" ) self.validate_identity( "SELECT DATEADD(DAY, -7, DATEADD(t.m, 1, CAST('2023-01-03' AS DATE))) FROM (SELECT 'month' AS m) AS t" ).selects[0].this.unit.assert_is(exp.Column) self.validate_all( "SELECT STRTOK('a$b$c', SUBSTRING('.$^', 1, 2), 2)", write={ "snowflake": "SELECT STRTOK('a$b$c', SUBSTRING('.$^', 1, 2), 2)", "duckdb": r"""SELECT CASE WHEN SUBSTRING('.$^', 1, 2) = '' AND 'a$b$c' = '' THEN NULL WHEN SUBSTRING('.$^', 1, 2) = '' AND 2 = 1 THEN 'a$b$c' WHEN SUBSTRING('.$^', 1, 2) = '' THEN NULL WHEN 2 < 0 THEN NULL WHEN 'a$b$c' IS NULL OR SUBSTRING('.$^', 1, 2) IS NULL OR 2 IS NULL THEN NULL ELSE LIST_FILTER(REGEXP_SPLIT_TO_ARRAY('a$b$c', CASE WHEN SUBSTRING('.$^', 1, 2) = '' THEN '' ELSE '[' || REGEXP_REPLACE(SUBSTRING('.$^', 1, 2), '([\[\]^.\-*+?(){}|$\\])', '\\\1', 'g') || ']' END), x -> NOT x = '')[2] END""", }, ) self.validate_all( "SELECT STRTOK('a$b/cg', '$/.')", write={ "snowflake": "SELECT STRTOK('a$b/cg', '$/.', 1)", "duckdb": r"""SELECT CASE WHEN '$/.' = '' AND 'a$b/cg' = '' THEN NULL WHEN '$/.' = '' AND 1 = 1 THEN 'a$b/cg' WHEN '$/.' = '' THEN NULL WHEN 1 < 0 THEN NULL WHEN 'a$b/cg' IS NULL OR '$/.' IS NULL OR 1 IS NULL THEN NULL ELSE LIST_FILTER(REGEXP_SPLIT_TO_ARRAY('a$b/cg', CASE WHEN '$/.' = '' THEN '' ELSE '[' || REGEXP_REPLACE('$/.', '([\[\]^.\-*+?(){}|$\\])', '\\\1', 'g') || ']' END), x -> NOT x = '')[1] END""", }, ) self.validate_all( "SELECT STRTOK('ab')", write={ "snowflake": "SELECT STRTOK('ab', ' ', 1)", "duckdb": r"""SELECT CASE WHEN ' ' = '' AND 'ab' = '' THEN NULL WHEN ' ' = '' AND 1 = 1 THEN 'ab' WHEN ' ' = '' THEN NULL WHEN 1 < 0 THEN NULL WHEN 'ab' IS NULL OR ' ' IS NULL OR 1 IS NULL THEN NULL ELSE LIST_FILTER(REGEXP_SPLIT_TO_ARRAY('ab', CASE WHEN ' ' = '' THEN '' ELSE '[' || REGEXP_REPLACE(' ', '([\[\]^.\-*+?(){}|$\\])', '\\\1', 'g') || ']' END), x -> NOT x = '')[1] END""", }, ) self.validate_identity("SELECT FILE_URL FROM DIRECTORY(@mystage) WHERE SIZE > 100000").args[ "from_" ].this.this.assert_is(exp.DirectoryStage).this.assert_is(exp.Var) self.validate_identity( "SELECT AI_CLASSIFY('text', ['travel', 'cooking'], OBJECT_CONSTRUCT('output_mode', 'multi'))" ) self.validate_identity( "SELECT * FROM table AT (TIMESTAMP => '2024-07-24') UNPIVOT(a FOR b IN (c)) AS pivot_table" ) self.validate_identity( "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ('2023_Q1', '2023_Q2', '2023_Q3', '2023_Q4', '2024_Q1') DEFAULT ON NULL (0)) ORDER BY empid" ) self.validate_identity( "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (SELECT DISTINCT quarter FROM ad_campaign_types_by_quarter WHERE television = TRUE ORDER BY quarter)) ORDER BY empid" ) self.validate_identity( "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ORDER BY empid" ) self.validate_identity( "SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY)) ORDER BY empid" ) self.validate_identity( "MERGE INTO my_db AS ids USING (SELECT new_id FROM my_model WHERE NOT col IS NULL) AS new_ids ON ids.type = new_ids.type AND ids.source = new_ids.source WHEN NOT MATCHED THEN INSERT VALUES (new_ids.new_id)" ) self.validate_identity( "INSERT OVERWRITE TABLE t SELECT 1", "INSERT OVERWRITE INTO t SELECT 1" ) self.validate_identity( 'DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage' ) self.validate_identity( "SELECT * FROM DATA AS DATA_L ASOF JOIN DATA AS DATA_R MATCH_CONDITION (DATA_L.VAL > DATA_R.VAL) ON DATA_L.ID = DATA_R.ID" ) self.validate_identity( """SELECT TO_TIMESTAMP('2025-01-16T14:45:30.123+0500', 'yyyy-mm-DDThh24:mi:ss.ff9tzhtzm')""" ) self.validate_identity( "SELECT * REPLACE (CAST(col AS TEXT) AS scol) FROM t", "SELECT * REPLACE (CAST(col AS VARCHAR) AS scol) FROM t", ) self.validate_identity( "GET(value, 'foo')::VARCHAR", "CAST(GET(value, 'foo') AS VARCHAR)", ) self.validate_identity( "SELECT 1 put", "SELECT 1 AS put", ) self.validate_identity( "SELECT 1 get", "SELECT 1 AS get", ) self.validate_identity( "WITH t (SELECT 1 AS c) SELECT c FROM t", "WITH t AS (SELECT 1 AS c) SELECT c FROM t", ) self.validate_identity( "GET_PATH(json_data, '$id')", """GET_PATH(json_data, '["$id"]')""", ) self.validate_identity( "CAST(x AS GEOGRAPHY)", "TO_GEOGRAPHY(x)", ) self.validate_identity( "CAST(x AS GEOMETRY)", "TO_GEOMETRY(x)", ) self.validate_identity("TO_GEOGRAPHY(x)") self.validate_identity("TO_GEOMETRY(x)") self.validate_identity("TO_GEOGRAPHY(x, y)") self.validate_identity("TO_GEOMETRY(x, y)") self.validate_identity( "transform(x, a int -> a + a + 1)", "TRANSFORM(x, a -> CAST(a AS INT) + CAST(a AS INT) + 1)", ) self.validate_identity( "SELECT * FROM s WHERE c NOT IN (1, 2, 3)", "SELECT * FROM s WHERE NOT c IN (1, 2, 3)", ) self.validate_identity( "SELECT * FROM s WHERE c NOT IN (SELECT * FROM t)", "SELECT * FROM s WHERE c <> ALL (SELECT * FROM t)", ) self.validate_identity( "SELECT * FROM t1 INNER JOIN t2 USING (t1.col)", "SELECT * FROM t1 INNER JOIN t2 USING (col)", ) self.validate_identity( "CURRENT_TIMESTAMP - INTERVAL '1 w' AND (1 = 1)", "CURRENT_TIMESTAMP() - INTERVAL '1 WEEK' AND (1 = 1)", ) self.validate_identity( "REGEXP_REPLACE('target', 'pattern', '\n')", "REGEXP_REPLACE('target', 'pattern', '\\n')", ) self.validate_identity( "SELECT a:from::STRING, a:from || ' test' ", "SELECT CAST(GET_PATH(a, 'from') AS VARCHAR), GET_PATH(a, 'from') || ' test'", ) self.validate_identity( "SELECT a:select", "SELECT GET_PATH(a, 'select')", ) self.validate_identity("x:from", "GET_PATH(x, 'from')") self.validate_identity( "value:values::string::int", "CAST(CAST(GET_PATH(value, 'values') AS VARCHAR) AS INT)", ) self.validate_identity( """SELECT GET_PATH(PARSE_JSON('{"y": [{"z": 1}]}'), 'y[0]:z')""", """SELECT GET_PATH(PARSE_JSON('{"y": [{"z": 1}]}'), 'y[0].z')""", ) self.validate_identity( "SELECT p FROM t WHERE p:val NOT IN ('2')", "SELECT p FROM t WHERE NOT GET_PATH(p, 'val') IN ('2')", ) self.validate_identity( """SELECT PARSE_JSON('{"x": "hello"}'):x LIKE 'hello'""", """SELECT GET_PATH(PARSE_JSON('{"x": "hello"}'), 'x') LIKE 'hello'""", ) self.validate_identity( """SELECT data:x LIKE 'hello' FROM some_table""", """SELECT GET_PATH(data, 'x') LIKE 'hello' FROM some_table""", ) self.validate_identity( "SELECT SUM({ fn CONVERT(123, SQL_DOUBLE) })", "SELECT SUM(CAST(123 AS DOUBLE))", ) self.validate_identity( "SELECT SUM({ fn CONVERT(123, SQL_VARCHAR) })", "SELECT SUM(CAST(123 AS VARCHAR))", ) self.validate_identity( "SELECT TIMESTAMPFROMPARTS(d, t)", "SELECT TIMESTAMP_FROM_PARTS(d, t)", ) self.validate_identity( "SELECT v:attr[0].name FROM vartab", "SELECT GET_PATH(v, 'attr[0].name') FROM vartab", ) self.validate_identity( 'SELECT v:"fruit" FROM vartab', """SELECT GET_PATH(v, 'fruit') FROM vartab""", ) self.validate_identity( "v:attr[0]:name", "GET_PATH(v, 'attr[0].name')", ) self.validate_identity( "a.x:from.b:c.d::int", "CAST(GET_PATH(a.x, 'from.b.c.d') AS INT)", ) self.validate_identity( """SELECT PARSE_JSON('{"food":{"fruit":"banana"}}'):food.fruit::VARCHAR""", """SELECT CAST(GET_PATH(PARSE_JSON('{"food":{"fruit":"banana"}}'), 'food.fruit') AS VARCHAR)""", ) self.validate_identity( "SELECT * FROM t, UNNEST(x) WITH ORDINALITY", "SELECT * FROM t, TABLE(FLATTEN(INPUT => x)) AS _t0(seq, key, path, index, value, this)", ) self.validate_identity( "CREATE TABLE foo (ID INT COMMENT $$some comment$$)", "CREATE TABLE foo (ID INT COMMENT 'some comment')", ) self.validate_identity( "SELECT state, city, SUM(retail_price * quantity) AS gross_revenue FROM sales GROUP BY ALL" ) self.validate_identity( "SELECT * FROM foo window", "SELECT * FROM foo AS window", ) self.validate_identity( r"SELECT RLIKE(a, $$regular expression with \ characters: \d{2}-\d{3}-\d{4}$$, 'i') FROM log_source", r"SELECT REGEXP_LIKE(a, 'regular expression with \\ characters: \\d{2}-\\d{3}-\\d{4}', 'i') FROM log_source", ) self.validate_identity( r"SELECT $$a ' \ \t \x21 z $ $$", r"SELECT 'a \' \\ \\t \\x21 z $ '", ) self.validate_identity( "SELECT {'test': 'best'}::VARIANT", "SELECT CAST(OBJECT_CONSTRUCT('test', 'best') AS VARIANT)", ) self.validate_identity( "SELECT {fn DAYNAME('2022-5-13')}", "SELECT DAYNAME('2022-5-13')", ) self.validate_identity( "SELECT {fn LOG(5)}", "SELECT LN(5)", ) self.validate_identity( "SELECT {fn CEILING(5.3)}", "SELECT CEIL(5.3)", ) self.validate_identity( "SELECT CEIL(3.14)", ) self.validate_identity( "SELECT CEIL(3.14, 1)", ) self.validate_identity( "CAST(x AS BYTEINT)", "CAST(x AS INT)", ) self.validate_identity( "CAST(x AS CHAR VARYING)", "CAST(x AS VARCHAR)", ) self.validate_identity( "CAST(x AS CHARACTER VARYING)", "CAST(x AS VARCHAR)", ) self.validate_identity( "CAST(x AS NCHAR VARYING)", "CAST(x AS VARCHAR)", ) self.validate_identity( "CREATE OR REPLACE TEMPORARY TABLE x (y NUMBER IDENTITY(0, 1))", "CREATE OR REPLACE TEMPORARY TABLE x (y DECIMAL(38, 0) AUTOINCREMENT START 0 INCREMENT 1)", ) self.validate_identity( "CREATE TEMPORARY TABLE x (y NUMBER AUTOINCREMENT(0, 1))", "CREATE TEMPORARY TABLE x (y DECIMAL(38, 0) AUTOINCREMENT START 0 INCREMENT 1)", ) self.validate_identity( "CREATE OR REPLACE TABLE x (y NUMBER(38, 0) NOT NULL AUTOINCREMENT START 1 INCREMENT 1 ORDER)", "CREATE OR REPLACE TABLE x (y DECIMAL(38, 0) NOT NULL AUTOINCREMENT START 1 INCREMENT 1 ORDER)", ) self.validate_identity( "CREATE OR REPLACE TABLE x (y NUMBER(38, 0) NOT NULL AUTOINCREMENT START 1 INCREMENT 1 NOORDER)", "CREATE OR REPLACE TABLE x (y DECIMAL(38, 0) NOT NULL AUTOINCREMENT START 1 INCREMENT 1 NOORDER)", ) self.validate_identity( "CREATE TABLE x (y NUMBER IDENTITY START 0 INCREMENT 1)", "CREATE TABLE x (y DECIMAL(38, 0) AUTOINCREMENT START 0 INCREMENT 1)", ) self.validate_identity( "ALTER TABLE foo ADD COLUMN id INT identity(1, 1)", "ALTER TABLE foo ADD id INT AUTOINCREMENT START 1 INCREMENT 1", ) self.validate_identity( "SELECT DAYOFWEEK('2016-01-02T23:39:20.123-07:00'::TIMESTAMP)", "SELECT DAYOFWEEK(CAST('2016-01-02T23:39:20.123-07:00' AS TIMESTAMP))", ) self.validate_identity( "SELECT * FROM xxx WHERE col ilike '%Don''t%'", "SELECT * FROM xxx WHERE col ILIKE '%Don\\'t%'", ) self.validate_identity( "SELECT * EXCLUDE a, b FROM xxx", "SELECT * EXCLUDE (a), b FROM xxx", ) self.validate_identity( "SELECT * RENAME a AS b, c AS d FROM xxx", "SELECT * RENAME (a AS b), c AS d FROM xxx", ) # Support for optional trailing commas after tables in from clause self.validate_identity( "SELECT * FROM xxx, yyy, zzz,", "SELECT * FROM xxx, yyy, zzz", ) self.validate_identity( "SELECT * FROM xxx, yyy, zzz, WHERE foo = bar", "SELECT * FROM xxx, yyy, zzz WHERE foo = bar", ) self.validate_identity( "SELECT * FROM xxx, yyy, zzz", "SELECT * FROM xxx, yyy, zzz", ) self.validate_all( "SELECT LTRIM(RTRIM(col)) FROM t1", write={ "duckdb": "SELECT LTRIM(RTRIM(col)) FROM t1", "snowflake": "SELECT LTRIM(RTRIM(col)) FROM t1", }, ) self.validate_all( "SELECT value['x'] AS x FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 'x')])) AS _t0(seq, key, path, index, value, this)", read={ "bigquery": "SELECT x FROM UNNEST([STRUCT('x' AS x)])", "snowflake": "SELECT value['x'] AS x FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 'x')])) AS _t0(seq, key, path, index, value, this)", }, ) self.validate_all( "SELECT value['x'] AS x, value['y'] AS y, value['z'] AS z FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 1, 'y', 2, 'z', 3)])) AS _t0(seq, key, path, index, value, this)", read={ "bigquery": "SELECT x, y, z FROM UNNEST([STRUCT(1 AS x, 2 AS y, 3 AS z)])", "snowflake": "SELECT value['x'] AS x, value['y'] AS y, value['z'] AS z FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 1, 'y', 2, 'z', 3)])) AS _t0(seq, key, path, index, value, this)", }, ) self.validate_all( "SELECT u1['x'] AS x, u2['y'] AS y FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 1)])) AS _t0(seq, key, path, index, u1, this) CROSS JOIN TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('y', 2)])) AS _t1(seq, key, path, index, u2, this)", read={ "bigquery": "SELECT u1.x, u2.y FROM UNNEST([STRUCT(1 AS x)]) AS u1, UNNEST([STRUCT(2 AS y)]) AS u2", "snowflake": "SELECT u1['x'] AS x, u2['y'] AS y FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 1)])) AS _t0(seq, key, path, index, u1, this) CROSS JOIN TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('y', 2)])) AS _t1(seq, key, path, index, u2, this)", }, ) self.validate_all( "SELECT t.id, value['name'] AS name, value['age'] AS age FROM t CROSS JOIN TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('name', 'John', 'age', 30)])) AS _t0(seq, key, path, index, value, this)", read={ "bigquery": "SELECT t.id, name, age FROM t, UNNEST([STRUCT('John' AS name, 30 AS age)])", "snowflake": "SELECT t.id, value['name'] AS name, value['age'] AS age FROM t CROSS JOIN TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('name', 'John', 'age', 30)])) AS _t0(seq, key, path, index, value, this)", }, ) self.validate_all( "SELECT value FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 1)])) AS _t0(seq, key, path, index, value, this)", read={ "bigquery": "SELECT value FROM UNNEST([STRUCT(1 AS x)]) AS value", "snowflake": "SELECT value FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 1)])) AS _t0(seq, key, path, index, value, this)", }, ) self.validate_all( "SELECT t.col1, value['field1'] AS field1, other_col, value['field2'] AS field2 FROM t CROSS JOIN TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('field1', 'a', 'field2', 'b')])) AS _t0(seq, key, path, index, value, this)", read={ "bigquery": "SELECT t.col1, field1, other_col, field2 FROM t, UNNEST([STRUCT('a' AS field1, 'b' AS field2)])", "snowflake": "SELECT t.col1, value['field1'] AS field1, other_col, value['field2'] AS field2 FROM t CROSS JOIN TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('field1', 'a', 'field2', 'b')])) AS _t0(seq, key, path, index, value, this)", }, ) self.validate_all( "SELECT * FROM (SELECT value['x'] AS x FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 'value')])) AS _t0(seq, key, path, index, value, this))", read={ "bigquery": "SELECT * FROM (SELECT x FROM UNNEST([STRUCT('value' AS x)]))", "snowflake": "SELECT * FROM (SELECT value['x'] AS x FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 'value')])) AS _t0(seq, key, path, index, value, this))", }, ) self.validate_all( "SELECT value FROM TABLE(FLATTEN(INPUT => [1, 2, 3])) AS _t0(seq, key, path, index, value, this)", read={ "bigquery": "SELECT value FROM UNNEST([1, 2, 3]) AS value", "snowflake": "SELECT value FROM TABLE(FLATTEN(INPUT => [1, 2, 3])) AS _t0(seq, key, path, index, value, this)", }, ) self.validate_all( "SELECT * FROM t1 AS t1 CROSS JOIN t2 AS t2 LEFT JOIN t3 AS t3 ON t1.a = t3.i", read={ "bigquery": "SELECT * FROM t1 AS t1, t2 AS t2 LEFT JOIN t3 AS t3 ON t1.a = t3.i", "snowflake": "SELECT * FROM t1 AS t1 CROSS JOIN t2 AS t2 LEFT JOIN t3 AS t3 ON t1.a = t3.i", }, ) self.validate_all( "SELECT value['x'] AS x, yval, zval FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 'x', 'y', ['y1', 'y2', 'y3'], 'z', ['z1', 'z2', 'z3'])])) AS _t0(seq, key, path, index, value, this) CROSS JOIN TABLE(FLATTEN(INPUT => value['y'])) AS _t1(seq, key, path, index, yval, this) CROSS JOIN TABLE(FLATTEN(INPUT => value['z'])) AS _t2(seq, key, path, index, zval, this)", read={ "bigquery": "SELECT x, yval, zval FROM UNNEST([STRUCT('x' AS x, ['y1', 'y2', 'y3'] AS y, ['z1', 'z2', 'z3'] AS z)]), UNNEST(y) AS yval, UNNEST(z) AS zval", "snowflake": "SELECT value['x'] AS x, yval, zval FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('x', 'x', 'y', ['y1', 'y2', 'y3'], 'z', ['z1', 'z2', 'z3'])])) AS _t0(seq, key, path, index, value, this) CROSS JOIN TABLE(FLATTEN(INPUT => value['y'])) AS _t1(seq, key, path, index, yval, this) CROSS JOIN TABLE(FLATTEN(INPUT => value['z'])) AS _t2(seq, key, path, index, zval, this)", }, ) self.validate_all( "SELECT _u['foo'] AS foo, bar, baz FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('foo', 'x', 'bars', ['y', 'z'], 'bazs', ['w'])])) AS _t0(seq, key, path, index, _u, this) CROSS JOIN TABLE(FLATTEN(INPUT => _u['bars'])) AS _t1(seq, key, path, index, bar, this) CROSS JOIN TABLE(FLATTEN(INPUT => _u['bazs'])) AS _t2(seq, key, path, index, baz, this)", read={ "bigquery": "SELECT _u.foo, bar, baz FROM UNNEST([struct('x' AS foo, ['y', 'z'] AS bars, ['w'] AS bazs)]) AS _u, UNNEST(_u.bars) AS bar, UNNEST(_u.bazs) AS baz", }, ) self.validate_all( "SELECT _u, _u['foo'] AS foo, _u['bar'] AS bar FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('foo', 'x', 'bar', 'y')])) AS _t0(seq, key, path, index, _u, this)", read={ "bigquery": "select _u, _u.foo, _u.bar from unnest([struct('x' as foo, 'y' AS bar)]) as _u", }, ) self.validate_all( "SELECT _u['foo'][0].bar FROM TABLE(FLATTEN(INPUT => [OBJECT_CONSTRUCT('foo', [OBJECT_CONSTRUCT('bar', 1)])])) AS _t0(seq, key, path, index, _u, this)", read={ "bigquery": "select _u.foo[0].bar from unnest([struct([struct(1 as bar)] as foo)]) as _u", }, ) self.validate_all( "SELECT ARRAYS_OVERLAP(col1, col2)", read={ "snowflake": "SELECT ARRAYS_OVERLAP(col1, col2)", }, write={ "snowflake": "SELECT ARRAYS_OVERLAP(col1, col2)", "duckdb": "SELECT (col1 && col2) OR (ARRAY_LENGTH(col1) <> LIST_COUNT(col1) AND ARRAY_LENGTH(col2) <> LIST_COUNT(col2))", }, ) self.validate_all( "SELECT ARRAY_INTERSECTION([1, 2], [2, 3])", write={ "snowflake": "SELECT ARRAY_INTERSECTION([1, 2], [2, 3])", "starrocks": "SELECT ARRAY_INTERSECT([1, 2], [2, 3])", "duckdb": "SELECT CASE WHEN [1, 2] IS NULL OR [2, 3] IS NULL THEN NULL ELSE LIST_TRANSFORM(LIST_FILTER(LIST_ZIP([1, 2], GENERATE_SERIES(1, LENGTH([1, 2]))), pair -> (LENGTH(LIST_FILTER([1, 2][1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])) <= LENGTH(LIST_FILTER([2, 3], e -> e IS NOT DISTINCT FROM pair[1])))), pair -> pair[1]) END", }, ) self.validate_all( "CREATE TABLE test_table (id NUMERIC NOT NULL AUTOINCREMENT)", write={ "duckdb": "CREATE TABLE test_table (id DECIMAL(38, 0) NOT NULL)", "snowflake": "CREATE TABLE test_table (id DECIMAL(38, 0) NOT NULL AUTOINCREMENT)", }, ) self.validate_all( "SELECT TO_TIMESTAMP('2025-01-16 14:45:30.123', 'yyyy-mm-DD hh24:mi:ss.ff6')", write={ "": "SELECT STR_TO_TIME('2025-01-16 14:45:30.123', '%Y-%m-%d %H:%M:%S.%f')", "snowflake": "SELECT TO_TIMESTAMP('2025-01-16 14:45:30.123', 'yyyy-mm-DD hh24:mi:ss.ff6')", }, ) self.validate_all( "ARRAY_CONSTRUCT_COMPACT(1, null, 2)", write={ "spark": "ARRAY_COMPACT(ARRAY(1, NULL, 2))", "snowflake": "ARRAY_CONSTRUCT_COMPACT(1, NULL, 2)", }, ) self.validate_all( "ARRAY_COMPACT(arr)", read={ "spark": "ARRAY_COMPACT(arr)", "databricks": "ARRAY_COMPACT(arr)", "snowflake": "ARRAY_COMPACT(arr)", }, write={ "spark": "ARRAY_COMPACT(arr)", "databricks": "ARRAY_COMPACT(arr)", }, ) self.validate_all( "OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)", read={ "bigquery": "JSON_OBJECT(['key_1', 'key_2'], ['one', NULL])", "duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", }, write={ "bigquery": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", "duckdb": "JSON_OBJECT('key_1', 'one', 'key_2', NULL)", "snowflake": "OBJECT_CONSTRUCT_KEEP_NULL('key_1', 'one', 'key_2', NULL)", }, ) # Test simple case - uses MAKE_TIME (values within normal ranges) self.validate_all( "SELECT TIME_FROM_PARTS(12, 34, 56)", write={ "duckdb": "SELECT MAKE_TIME(12, 34, 56)", "snowflake": "SELECT TIME_FROM_PARTS(12, 34, 56)", }, ) # Test with nanoseconds - uses INTERVAL arithmetic self.validate_all( "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)", write={ "duckdb": "SELECT CAST('00:00:00' AS TIME) + INTERVAL ((12 * 3600) + (34 * 60) + 56 + (987654321 / 1000000000.0)) SECOND", "snowflake": "SELECT TIME_FROM_PARTS(12, 34, 56, 987654321)", }, ) # Test overflow normalization - documented Snowflake feature with INTERVAL arithmetic self.validate_all( "SELECT TIME_FROM_PARTS(0, 100, 0)", write={ "duckdb": "SELECT CAST('00:00:00' AS TIME) + INTERVAL ((0 * 3600) + (100 * 60) + 0) SECOND", "snowflake": "SELECT TIME_FROM_PARTS(0, 100, 0)", }, ) self.validate_identity( "SELECT TIMESTAMPNTZFROMPARTS(2013, 4, 5, 12, 00, 00)", "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)", ) self.validate_all( "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)", read={ "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)", "snowflake": "SELECT TIMESTAMP_NTZ_FROM_PARTS(2013, 4, 5, 12, 00, 00)", }, write={ "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00)", "snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00)", }, ) self.validate_all( "SELECT TIMESTAMP_FROM_PARTS(TO_DATE('2023-06-15'), TO_TIME('14:30:45'))", write={ "duckdb": "SELECT CAST('2023-06-15' AS DATE) + CAST('14:30:45' AS TIME)", "snowflake": "SELECT TIMESTAMP_FROM_PARTS(CAST('2023-06-15' AS DATE), CAST('14:30:45' AS TIME))", }, ) self.validate_all( "SELECT TIMESTAMP_NTZ_FROM_PARTS(TO_DATE('2023-06-15'), TO_TIME('14:30:45'))", write={ "duckdb": "SELECT CAST('2023-06-15' AS DATE) + CAST('14:30:45' AS TIME)", "snowflake": "SELECT TIMESTAMP_FROM_PARTS(CAST('2023-06-15' AS DATE), CAST('14:30:45' AS TIME))", }, ) self.validate_all( "SELECT TIMESTAMP_LTZ_FROM_PARTS(2023, 6, 15, 14, 30, 45)", write={ "duckdb": "SELECT CAST(MAKE_TIMESTAMP(2023, 6, 15, 14, 30, 45) AS TIMESTAMPTZ)", "snowflake": "SELECT TIMESTAMP_LTZ_FROM_PARTS(2023, 6, 15, 14, 30, 45)", }, ) self.validate_all( "SELECT TIMESTAMP_TZ_FROM_PARTS(2023, 6, 15, 14, 30, 45, 0, 'America/Los_Angeles')", write={ "duckdb": "SELECT MAKE_TIMESTAMP(2023, 6, 15, 14, 30, 45) AT TIME ZONE 'America/Los_Angeles'", "snowflake": "SELECT TIMESTAMP_TZ_FROM_PARTS(2023, 6, 15, 14, 30, 45, 0, 'America/Los_Angeles')", }, ) self.validate_all( """WITH vartab(v) AS (select parse_json('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""", write={ "bigquery": """WITH vartab AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]') AS v) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", "duckdb": """WITH vartab(v) AS (SELECT JSON('[{"attr": [{"name": "banana"}]}]')) SELECT v -> '$[0].attr[0].name' FROM vartab""", "mysql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", "presto": """WITH vartab(v) AS (SELECT JSON_PARSE('[{"attr": [{"name": "banana"}]}]')) SELECT JSON_EXTRACT(v, '$[0].attr[0].name') FROM vartab""", "snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('[{"attr": [{"name": "banana"}]}]')) SELECT GET_PATH(v, '[0].attr[0].name') FROM vartab""", "tsql": """WITH vartab(v) AS (SELECT '[{"attr": [{"name": "banana"}]}]') SELECT ISNULL(JSON_QUERY(v, '$[0].attr[0].name'), JSON_VALUE(v, '$[0].attr[0].name')) FROM vartab""", }, ) self.validate_all( """WITH vartab(v) AS (select parse_json('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""", write={ "bigquery": """WITH vartab AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}') AS v) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", "duckdb": """WITH vartab(v) AS (SELECT JSON('{"attr": [{"name": "banana"}]}')) SELECT v -> '$.attr[0].name' FROM vartab""", "mysql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", "presto": """WITH vartab(v) AS (SELECT JSON_PARSE('{"attr": [{"name": "banana"}]}')) SELECT JSON_EXTRACT(v, '$.attr[0].name') FROM vartab""", "snowflake": """WITH vartab(v) AS (SELECT PARSE_JSON('{"attr": [{"name": "banana"}]}')) SELECT GET_PATH(v, 'attr[0].name') FROM vartab""", "tsql": """WITH vartab(v) AS (SELECT '{"attr": [{"name": "banana"}]}') SELECT ISNULL(JSON_QUERY(v, '$.attr[0].name'), JSON_VALUE(v, '$.attr[0].name')) FROM vartab""", }, ) self.validate_all( """SELECT PARSE_JSON('{"fruit":"banana"}'):fruit""", write={ "bigquery": """SELECT JSON_EXTRACT(PARSE_JSON('{"fruit":"banana"}'), '$.fruit')""", "databricks": """SELECT PARSE_JSON('{"fruit":"banana"}'):fruit""", "duckdb": """SELECT JSON('{"fruit":"banana"}') -> '$.fruit'""", "mysql": """SELECT JSON_EXTRACT('{"fruit":"banana"}', '$.fruit')""", "presto": """SELECT JSON_EXTRACT(JSON_PARSE('{"fruit":"banana"}'), '$.fruit')""", "snowflake": """SELECT GET_PATH(PARSE_JSON('{"fruit":"banana"}'), 'fruit')""", "spark": """SELECT GET_JSON_OBJECT('{"fruit":"banana"}', '$.fruit')""", "tsql": """SELECT ISNULL(JSON_QUERY('{"fruit":"banana"}', '$.fruit'), JSON_VALUE('{"fruit":"banana"}', '$.fruit'))""", }, ) self.validate_all( "SELECT TO_ARRAY(['test'])", write={ "snowflake": "SELECT TO_ARRAY(['test'])", "spark": "SELECT ARRAY('test')", }, ) self.validate_all( "SELECT TO_ARRAY(['test'])", write={ "snowflake": "SELECT TO_ARRAY(['test'])", "spark": "SELECT ARRAY('test')", }, ) self.validate_all( # We need to qualify the columns in this query because "value" would be ambiguous 'WITH t(x, "value") AS (SELECT [1, 2, 3], 1) SELECT IFF(_u.pos = _u_2.pos_2, _u_2."value", NULL) AS "value" FROM t CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(t.x)) - 1) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => t.x)) AS _u_2(seq, key, path, pos_2, "value", this) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > (ARRAY_SIZE(t.x) - 1) AND _u_2.pos_2 = (ARRAY_SIZE(t.x) - 1))', read={ "duckdb": 'WITH t(x, "value") AS (SELECT [1,2,3], 1) SELECT UNNEST(t.x) AS "value" FROM t', }, ) self.validate_all( "SELECT { 'Manitoba': 'Winnipeg', 'foo': 'bar' } AS province_capital", write={ "duckdb": "SELECT {'Manitoba': 'Winnipeg', 'foo': 'bar'} AS province_capital", "snowflake": "SELECT OBJECT_CONSTRUCT('Manitoba', 'Winnipeg', 'foo', 'bar') AS province_capital", "spark": "SELECT STRUCT('Winnipeg' AS Manitoba, 'bar' AS foo) AS province_capital", }, ) self.validate_all( "SELECT COLLATE('B', 'und:ci')", write={ "bigquery": "SELECT COLLATE('B', 'und:ci')", "snowflake": "SELECT COLLATE('B', 'und:ci')", }, ) self.validate_all( "SELECT To_BOOLEAN('T')", write={ "duckdb": "SELECT CASE WHEN UPPER(CAST('T' AS TEXT)) = 'ON' THEN TRUE WHEN UPPER(CAST('T' AS TEXT)) = 'OFF' THEN FALSE WHEN ISNAN(TRY_CAST('T' AS REAL)) OR ISINF(TRY_CAST('T' AS REAL)) THEN ERROR('TO_BOOLEAN: Non-numeric values NaN and INF are not supported') ELSE CAST('T' AS BOOLEAN) END", }, ) self.validate_all( "SELECT * FROM x START WITH a = b CONNECT BY c = PRIOR d", read={ "oracle": "SELECT * FROM x START WITH a = b CONNECT BY c = PRIOR d", }, write={ "oracle": "SELECT * FROM x START WITH a = b CONNECT BY c = PRIOR d", "snowflake": "SELECT * FROM x START WITH a = b CONNECT BY c = PRIOR d", }, ) self.validate_all( "SELECT INSERT(a, 0, 0, 'b')", read={ "mysql": "SELECT INSERT(a, 0, 0, 'b')", "snowflake": "SELECT INSERT(a, 0, 0, 'b')", "tsql": "SELECT STUFF(a, 0, 0, 'b')", }, write={ "mysql": "SELECT INSERT(a, 0, 0, 'b')", "snowflake": "SELECT INSERT(a, 0, 0, 'b')", "tsql": "SELECT STUFF(a, 0, 0, 'b')", }, ) self.validate_all( "ARRAY_GENERATE_RANGE(0, 3)", write={ "bigquery": "GENERATE_ARRAY(0, 3 - 1)", "duckdb": "RANGE(0, 3)", "postgres": "GENERATE_SERIES(0, 3 - 1)", "presto": "SEQUENCE(0, 2)", "snowflake": "ARRAY_GENERATE_RANGE(0, 3)", }, ) self.validate_all( "ARRAY_GENERATE_RANGE(0, 3 + 1)", read={ "bigquery": "GENERATE_ARRAY(0, 3)", "postgres": "GENERATE_SERIES(0, 3)", "presto": "SEQUENCE(0, 3)", }, ) self.validate_all( "SELECT ARRAY_GENERATE_RANGE(-5, -25, -10)", write={ "duckdb": "SELECT RANGE(-5, -25, -10)", "snowflake": "SELECT ARRAY_GENERATE_RANGE(-5, -25, -10)", }, ) self.validate_all( "SELECT ARRAY_GENERATE_RANGE(5, 1, -1)", write={ "duckdb": "SELECT RANGE(5, 1, -1)", "snowflake": "SELECT ARRAY_GENERATE_RANGE(5, 1, -1)", }, ) self.validate_all( "SELECT DATE_PART('year', TIMESTAMP '2020-01-01')", write={ "hive": "SELECT EXTRACT(year FROM CAST('2020-01-01' AS TIMESTAMP))", "snowflake": "SELECT DATE_PART('year', CAST('2020-01-01' AS TIMESTAMP))", "spark": "SELECT EXTRACT(year FROM CAST('2020-01-01' AS TIMESTAMP))", }, ) self.validate_all( "SELECT * FROM (VALUES (0) foo(bar))", write={"snowflake": "SELECT * FROM (VALUES (0)) AS foo(bar)"}, ) self.validate_all( "OBJECT_CONSTRUCT('a', b, 'c', d)", read={ "": "STRUCT(b as a, d as c)", }, write={ "duckdb": "{'a': b, 'c': d}", "snowflake": "OBJECT_CONSTRUCT('a', b, 'c', d)", "": "STRUCT(b AS a, d AS c)", }, ) self.validate_identity("OBJECT_CONSTRUCT(a, b, c, d)") self.validate_all( "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1", write={ "": "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o NULLS LAST) = 1", "databricks": "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o NULLS LAST) = 1", "hive": "SELECT i, p, o FROM (SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o NULLS LAST) AS _w FROM qt) AS _t WHERE _w = 1", "presto": "SELECT i, p, o FROM (SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS _w FROM qt) AS _t WHERE _w = 1", "snowflake": "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1", "spark": "SELECT i, p, o FROM (SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o NULLS LAST) AS _w FROM qt) AS _t WHERE _w = 1", "sqlite": "SELECT i, p, o FROM (SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o NULLS LAST) AS _w FROM qt) AS _t WHERE _w = 1", "trino": "SELECT i, p, o FROM (SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS _w FROM qt) AS _t WHERE _w = 1", }, ) # NTH_VALUE FROM FIRST not supported in DuckDB self.validate_all( "SELECT NTH_VALUE(is_deleted, 2) FROM FIRST IGNORE NULLS OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", write={ "snowflake": "SELECT NTH_VALUE(is_deleted, 2) FROM FIRST IGNORE NULLS OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", "duckdb": "SELECT NTH_VALUE(is_deleted, 2 IGNORE NULLS) OVER (PARTITION BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS nth_is_deleted FROM my_table", }, ) # NTH_VALUE FROM LAST not supported in DuckDB self.validate_all( "SELECT NTH_VALUE(is_deleted, 2) FROM LAST RESPECT NULLS OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", write={ "snowflake": "SELECT NTH_VALUE(is_deleted, 2) FROM LAST RESPECT NULLS OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", "duckdb": "SELECT NTH_VALUE(is_deleted, 2 RESPECT NULLS) OVER (PARTITION BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS nth_is_deleted FROM my_table", }, ) self.validate_all( "SELECT NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", write={ "snowflake": "SELECT NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", "duckdb": "SELECT NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS nth_is_deleted FROM my_table", }, ) self.validate_all( "SELECT NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS nth_is_deleted FROM my_table", write={ "snowflake": "SELECT NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS nth_is_deleted FROM my_table", "duckdb": "SELECT NTH_VALUE(is_deleted, 2) OVER (PARTITION BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS nth_is_deleted FROM my_table", }, ) for func in ( "FIRST_VALUE", "LAST_VALUE", ): for options in ( " IGNORE NULLS", " RESPECT NULLS", "", ): self.validate_all( f"SELECT {func}(is_deleted){options} OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", write={ "snowflake": f"SELECT {func}(is_deleted){options} OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", "duckdb": f"SELECT {func}(is_deleted{options}) OVER (PARTITION BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS nth_is_deleted FROM my_table", }, ) self.validate_all( f"SELECT {func}(is_deleted){options} OVER (PARTITION BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS nth_is_deleted FROM my_table", write={ "snowflake": f"SELECT {func}(is_deleted){options} OVER (PARTITION BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS nth_is_deleted FROM my_table", "duckdb": f"SELECT {func}(is_deleted{options}) OVER (PARTITION BY id ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS nth_is_deleted FROM my_table", }, ) self.validate_all( "SELECT LEAD(is_deleted, 2, -10) RESPECT NULLS OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", write={ "snowflake": "SELECT LEAD(is_deleted, 2, -10) RESPECT NULLS OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", "duckdb": "SELECT LEAD(is_deleted, 2, -10 RESPECT NULLS) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", }, ) self.validate_all( "SELECT LEAD(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", write={ "snowflake": "SELECT LEAD(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", "duckdb": "SELECT LEAD(is_deleted, 2) OVER (PARTITION BY id) AS nth_is_deleted FROM my_table", }, ) self.validate_all( "SELECT LAG(amount) OVER (ORDER BY seq) AS basic_lag", write={ "snowflake": "SELECT LAG(amount) OVER (ORDER BY seq) AS basic_lag", "duckdb": "SELECT LAG(amount) OVER (ORDER BY seq) AS basic_lag", }, ) self.validate_all( "SELECT LAG(amount, 2) IGNORE NULLS OVER (PARTITION BY category ORDER BY seq) AS lag_offset_ignore_nulls", write={ "snowflake": "SELECT LAG(amount, 2) IGNORE NULLS OVER (PARTITION BY category ORDER BY seq) AS lag_offset_ignore_nulls", "duckdb": "SELECT LAG(amount, 2 IGNORE NULLS) OVER (PARTITION BY category ORDER BY seq) AS lag_offset_ignore_nulls", }, ) self.validate_all( "SELECT LAG(amount, 2, -777) RESPECT NULLS OVER (PARTITION BY category ORDER BY seq ASC) AS lag_full_ignore_nulls", write={ "snowflake": "SELECT LAG(amount, 2, -777) RESPECT NULLS OVER (PARTITION BY category ORDER BY seq ASC) AS lag_full_ignore_nulls", "duckdb": "SELECT LAG(amount, 2, -777 RESPECT NULLS) OVER (PARTITION BY category ORDER BY seq ASC) AS lag_full_ignore_nulls", }, ) self.validate_all( "SELECT BOOLOR_AGG(c1), BOOLOR_AGG(c2) FROM test", write={ "": "SELECT LOGICAL_OR(c1), LOGICAL_OR(c2) FROM test", "duckdb": "SELECT BOOL_OR(CAST(c1 AS BOOLEAN)), BOOL_OR(CAST(c2 AS BOOLEAN)) FROM test", "oracle": "SELECT MAX(c1), MAX(c2) FROM test", "postgres": "SELECT BOOL_OR(c1), BOOL_OR(c2) FROM test", "snowflake": "SELECT BOOLOR_AGG(c1), BOOLOR_AGG(c2) FROM test", "spark": "SELECT BOOL_OR(c1), BOOL_OR(c2) FROM test", "sqlite": "SELECT MAX(c1), MAX(c2) FROM test", }, ) self.validate_all( "SELECT BOOLAND_AGG(c1), BOOLAND_AGG(c2) FROM test", write={ "": "SELECT LOGICAL_AND(c1), LOGICAL_AND(c2) FROM test", "duckdb": "SELECT BOOL_AND(CAST(c1 AS BOOLEAN)), BOOL_AND(CAST(c2 AS BOOLEAN)) FROM test", "oracle": "SELECT MIN(c1), MIN(c2) FROM test", "postgres": "SELECT BOOL_AND(c1), BOOL_AND(c2) FROM test", "snowflake": "SELECT BOOLAND_AGG(c1), BOOLAND_AGG(c2) FROM test", "spark": "SELECT BOOL_AND(c1), BOOL_AND(c2) FROM test", "sqlite": "SELECT MIN(c1), MIN(c2) FROM test", "mysql": "SELECT MIN(c1), MIN(c2) FROM test", }, ) self.validate_all( "SELECT BOOLXOR_AGG(c1) FROM test", write={ "duckdb": "SELECT COUNT_IF(CAST(c1 AS BOOLEAN)) = 1 FROM test", "snowflake": "SELECT BOOLXOR_AGG(c1) FROM test", }, ) for suffix in ( "", " OVER ()", ): self.validate_all( f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", read={ "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", }, write={ "": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x NULLS LAST){suffix}", "duckdb": f"SELECT QUANTILE_CONT(x, 0.5 ORDER BY x){suffix}", "postgres": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", "snowflake": f"SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x){suffix}", }, ) for func in ( "COVAR_POP", "COVAR_SAMP", ): self.validate_all( f"SELECT {func}(y, x){suffix}", write={ "": f"SELECT {func}(y, x){suffix}", "duckdb": f"SELECT {func}(y, x){suffix}", "postgres": f"SELECT {func}(y, x){suffix}", "snowflake": f"SELECT {func}(y, x){suffix}", }, ) self.validate_all( "TO_CHAR(x, y)", read={ "": "TO_CHAR(x, y)", "snowflake": "TO_VARCHAR(x, y)", }, write={ "": "CAST(x AS TEXT)", "databricks": "TO_CHAR(x, y)", "drill": "TO_CHAR(x, y)", "oracle": "TO_CHAR(x, y)", "postgres": "TO_CHAR(x, y)", "snowflake": "TO_CHAR(x, y)", "teradata": "TO_CHAR(x, y)", }, ) for to_func in ("TO_CHAR", "TO_VARCHAR"): with self.subTest(f"Testing transpilation of {to_func}"): self.validate_identity( f"{to_func}(foo::DATE, 'yyyy')", "TO_CHAR(CAST(foo AS DATE), 'yyyy')", ) self.validate_all( f"{to_func}(foo::TIMESTAMP, 'YYYY-MM')", write={ "snowflake": "TO_CHAR(CAST(foo AS TIMESTAMP), 'yyyy-mm')", "duckdb": "STRFTIME(CAST(foo AS TIMESTAMP), '%Y-%m')", }, ) self.validate_all( "SQUARE(x)", write={ "bigquery": "POWER(x, 2)", "clickhouse": "POWER(x, 2)", "databricks": "POWER(x, 2)", "drill": "POW(x, 2)", "duckdb": "POWER(x, 2)", "hive": "POWER(x, 2)", "mysql": "POWER(x, 2)", "oracle": "POWER(x, 2)", "postgres": "POWER(x, 2)", "presto": "POWER(x, 2)", "redshift": "POWER(x, 2)", "snowflake": "POWER(x, 2)", "spark": "POWER(x, 2)", "sqlite": "POWER(x, 2)", "starrocks": "POWER(x, 2)", "teradata": "x ** 2", "trino": "POWER(x, 2)", "tsql": "POWER(x, 2)", }, ) self.validate_all( "POWER(x, 2)", read={ "oracle": "SQUARE(x)", "snowflake": "SQUARE(x)", "tsql": "SQUARE(x)", }, ) self.validate_all( "DIV0(foo, bar)", write={ "snowflake": "IFF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)", "sqlite": "IIF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS REAL) / bar)", "presto": "IF(bar = 0 AND NOT foo IS NULL, 0, CAST(foo AS DOUBLE) / bar)", "spark": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)", "hive": "IF(bar = 0 AND NOT foo IS NULL, 0, foo / bar)", "duckdb": "CASE WHEN bar = 0 AND NOT foo IS NULL THEN 0 ELSE foo / bar END", }, ) self.validate_all( "DIV0(a - b, c - d)", write={ "snowflake": "IFF((c - d) = 0 AND NOT (a - b) IS NULL, 0, (a - b) / (c - d))", "sqlite": "IIF((c - d) = 0 AND NOT (a - b) IS NULL, 0, CAST((a - b) AS REAL) / (c - d))", "presto": "IF((c - d) = 0 AND NOT (a - b) IS NULL, 0, CAST((a - b) AS DOUBLE) / (c - d))", "spark": "IF((c - d) = 0 AND NOT (a - b) IS NULL, 0, (a - b) / (c - d))", "hive": "IF((c - d) = 0 AND NOT (a - b) IS NULL, 0, (a - b) / (c - d))", "duckdb": "CASE WHEN (c - d) = 0 AND NOT (a - b) IS NULL THEN 0 ELSE (a - b) / (c - d) END", }, ) self.validate_all( "DIV0NULL(foo, bar)", write={ "snowflake": "IFF(bar = 0 OR bar IS NULL, 0, foo / bar)", "sqlite": "IIF(bar = 0 OR bar IS NULL, 0, CAST(foo AS REAL) / bar)", "presto": "IF(bar = 0 OR bar IS NULL, 0, CAST(foo AS DOUBLE) / bar)", "spark": "IF(bar = 0 OR bar IS NULL, 0, foo / bar)", "hive": "IF(bar = 0 OR bar IS NULL, 0, foo / bar)", "duckdb": "CASE WHEN bar = 0 OR bar IS NULL THEN 0 ELSE foo / bar END", }, ) self.validate_all( "DIV0NULL(a - b, c - d)", write={ "snowflake": "IFF((c - d) = 0 OR (c - d) IS NULL, 0, (a - b) / (c - d))", "sqlite": "IIF((c - d) = 0 OR (c - d) IS NULL, 0, CAST((a - b) AS REAL) / (c - d))", "presto": "IF((c - d) = 0 OR (c - d) IS NULL, 0, CAST((a - b) AS DOUBLE) / (c - d))", "spark": "IF((c - d) = 0 OR (c - d) IS NULL, 0, (a - b) / (c - d))", "hive": "IF((c - d) = 0 OR (c - d) IS NULL, 0, (a - b) / (c - d))", "duckdb": "CASE WHEN (c - d) = 0 OR (c - d) IS NULL THEN 0 ELSE (a - b) / (c - d) END", }, ) self.validate_all( "ZEROIFNULL(foo)", write={ "snowflake": "IFF(foo IS NULL, 0, foo)", "sqlite": "IIF(foo IS NULL, 0, foo)", "presto": "IF(foo IS NULL, 0, foo)", "spark": "IF(foo IS NULL, 0, foo)", "hive": "IF(foo IS NULL, 0, foo)", "duckdb": "CASE WHEN foo IS NULL THEN 0 ELSE foo END", }, ) self.validate_all( "NULLIFZERO(foo)", write={ "snowflake": "IFF(foo = 0, NULL, foo)", "sqlite": "IIF(foo = 0, NULL, foo)", "presto": "IF(foo = 0, NULL, foo)", "spark": "IF(foo = 0, NULL, foo)", "hive": "IF(foo = 0, NULL, foo)", "duckdb": "CASE WHEN foo = 0 THEN NULL ELSE foo END", }, ) self.validate_all( "SELECT * EXCLUDE (a, b) REPLACE (c AS d, E AS F) FROM xxx", read={ "duckdb": "SELECT * EXCLUDE (a, b) REPLACE (c AS d, E AS F) FROM xxx", }, write={ "snowflake": "SELECT * EXCLUDE (a, b) REPLACE (c AS d, E AS F) FROM xxx", "duckdb": "SELECT * EXCLUDE (a, b) REPLACE (c AS d, E AS F) FROM xxx", }, ) self.validate_all( '''SELECT PARSE_JSON('{"a": {"b c": "foo"}}'):a:"b c"''', write={ "duckdb": """SELECT JSON('{"a": {"b c": "foo"}}') -> '$.a."b c"'""", "mysql": """SELECT JSON_EXTRACT('{"a": {"b c": "foo"}}', '$.a."b c"')""", "snowflake": """SELECT GET_PATH(PARSE_JSON('{"a": {"b c": "foo"}}'), 'a["b c"]')""", }, ) self.validate_all( "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", write={ "bigquery": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a NULLS LAST LIMIT 10", "snowflake": "SELECT a FROM test WHERE a = 1 GROUP BY a HAVING a = 2 QUALIFY z ORDER BY a LIMIT 10", }, ) self.validate_all( "SELECT a FROM test AS t QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY Z) = 1", write={ "bigquery": "SELECT a FROM test AS t QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY Z NULLS LAST) = 1", "snowflake": "SELECT a FROM test AS t QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY Z) = 1", }, ) self.validate_all( "SELECT TO_TIMESTAMP(col, 'DD-MM-YYYY HH12:MI:SS') FROM t", write={ "bigquery": "SELECT PARSE_TIMESTAMP('%d-%m-%Y %I:%M:%S', col) FROM t", "duckdb": "SELECT STRPTIME(col, '%d-%m-%Y %I:%M:%S') FROM t", "snowflake": "SELECT TO_TIMESTAMP(col, 'DD-mm-yyyy hh12:mi:ss') FROM t", "spark": "SELECT TO_TIMESTAMP(col, 'dd-MM-yyyy hh:mm:ss') FROM t", }, ) self.validate_all( "SELECT TO_TIMESTAMP(1659981729)", write={ "bigquery": "SELECT TIMESTAMP_SECONDS(1659981729)", "snowflake": "SELECT TO_TIMESTAMP(1659981729)", "spark": "SELECT CAST(FROM_UNIXTIME(1659981729) AS TIMESTAMP)", "redshift": "SELECT (TIMESTAMP 'epoch' + 1659981729 * INTERVAL '1 SECOND')", }, ) self.validate_all( "SELECT TO_TIMESTAMP(1659981729000, 3)", write={ "bigquery": "SELECT TIMESTAMP_MILLIS(1659981729000)", "snowflake": "SELECT TO_TIMESTAMP(1659981729000, 3)", "spark": "SELECT TIMESTAMP_MILLIS(1659981729000)", "redshift": "SELECT (TIMESTAMP 'epoch' + (1659981729000 / POWER(10, 3)) * INTERVAL '1 SECOND')", }, ) self.validate_all( "SELECT TO_TIMESTAMP(16599817290000, 4)", write={ "bigquery": "SELECT TIMESTAMP_SECONDS(CAST(16599817290000 / POWER(10, 4) AS INT64))", "snowflake": "SELECT TO_TIMESTAMP(16599817290000, 4)", "spark": "SELECT TIMESTAMP_SECONDS(16599817290000 / POWER(10, 4))", "redshift": "SELECT (TIMESTAMP 'epoch' + (16599817290000 / POWER(10, 4)) * INTERVAL '1 SECOND')", }, ) self.validate_all( "SELECT TO_TIMESTAMP('1659981729')", write={ "snowflake": "SELECT TO_TIMESTAMP('1659981729')", "spark": "SELECT CAST(FROM_UNIXTIME('1659981729') AS TIMESTAMP)", }, ) self.validate_all( "SELECT TO_TIMESTAMP(1659981729000000000, 9)", write={ "bigquery": "SELECT TIMESTAMP_SECONDS(CAST(1659981729000000000 / POWER(10, 9) AS INT64))", "duckdb": "SELECT TO_TIMESTAMP(1659981729000000000 / POWER(10, 9)) AT TIME ZONE 'UTC'", "presto": "SELECT FROM_UNIXTIME(CAST(1659981729000000000 AS DOUBLE) / POW(10, 9))", "snowflake": "SELECT TO_TIMESTAMP(1659981729000000000, 9)", "spark": "SELECT TIMESTAMP_SECONDS(1659981729000000000 / POWER(10, 9))", "redshift": "SELECT (TIMESTAMP 'epoch' + (1659981729000000000 / POWER(10, 9)) * INTERVAL '1 SECOND')", }, ) self.validate_all( "SELECT TO_TIMESTAMP('2013-04-05 01:02:03')", write={ "bigquery": "SELECT CAST('2013-04-05 01:02:03' AS DATETIME)", "snowflake": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMP)", "spark": "SELECT CAST('2013-04-05 01:02:03' AS TIMESTAMP)", }, ) self.validate_all( "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/DD/yyyy hh24:mi:ss')", read={ "bigquery": "SELECT PARSE_TIMESTAMP('%m/%d/%Y %H:%M:%S', '04/05/2013 01:02:03')", "duckdb": "SELECT STRPTIME('04/05/2013 01:02:03', '%m/%d/%Y %H:%M:%S')", }, write={ "bigquery": "SELECT PARSE_TIMESTAMP('%m/%d/%Y %T', '04/05/2013 01:02:03')", "snowflake": "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/DD/yyyy hh24:mi:ss')", "spark": "SELECT TO_TIMESTAMP('04/05/2013 01:02:03', 'MM/dd/yyyy HH:mm:ss')", }, ) self.validate_all( "TO_TIMESTAMP('2024-01-15 3:00 AM', 'YYYY-MM-DD HH12:MI PM')", write={ "duckdb": "STRPTIME('2024-01-15 3:00 AM', '%Y-%m-%d %I:%M %p')", "snowflake": "TO_TIMESTAMP('2024-01-15 3:00 AM', 'yyyy-mm-DD hh12:mi pm')", }, ) self.validate_all( "TO_TIMESTAMP('2024-01-15 3:00 PM', 'YYYY-MM-DD HH12:MI AM')", write={ "duckdb": "STRPTIME('2024-01-15 3:00 PM', '%Y-%m-%d %I:%M %p')", "snowflake": "TO_TIMESTAMP('2024-01-15 3:00 PM', 'yyyy-mm-DD hh12:mi pm')", }, ) self.validate_all( "TO_TIMESTAMP('2024-01-15 3:00 PM', 'YYYY-MM-DD HH12:MI PM')", write={ "duckdb": "STRPTIME('2024-01-15 3:00 PM', '%Y-%m-%d %I:%M %p')", "snowflake": "TO_TIMESTAMP('2024-01-15 3:00 PM', 'yyyy-mm-DD hh12:mi pm')", }, ) self.validate_all( "TO_TIMESTAMP('2024-01-15 3:00 AM', 'YYYY-MM-DD HH12:MI AM')", write={ "duckdb": "STRPTIME('2024-01-15 3:00 AM', '%Y-%m-%d %I:%M %p')", "snowflake": "TO_TIMESTAMP('2024-01-15 3:00 AM', 'yyyy-mm-DD hh12:mi pm')", }, ) self.validate_all( "SELECT IFF(TRUE, 'true', 'false')", write={ "snowflake": "SELECT IFF(TRUE, 'true', 'false')", "spark": "SELECT IF(TRUE, 'true', 'false')", }, ) self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname", "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname ASC, lname", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname NULLS LAST", "snowflake": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname ASC, lname", }, ) self.validate_all( "SELECT ARRAY_AGG(DISTINCT a)", write={ "spark": "SELECT COLLECT_LIST(DISTINCT a)", "snowflake": "SELECT ARRAY_AGG(DISTINCT a)", "duckdb": "SELECT ARRAY_AGG(DISTINCT a) FILTER(WHERE a IS NOT NULL)", "presto": "SELECT ARRAY_AGG(DISTINCT a) FILTER(WHERE a IS NOT NULL)", }, ) self.validate_all( "SELECT ARRAY_AGG(col) WITHIN GROUP (ORDER BY sort_col)", write={ "snowflake": "SELECT ARRAY_AGG(col) WITHIN GROUP (ORDER BY sort_col)", "duckdb": "SELECT ARRAY_AGG(col ORDER BY sort_col) FILTER(WHERE col IS NOT NULL)", }, ) self.validate_all( "SELECT ARRAY_AGG(DISTINCT col) WITHIN GROUP (ORDER BY col DESC)", write={ "snowflake": "SELECT ARRAY_AGG(DISTINCT col) WITHIN GROUP (ORDER BY col DESC)", "duckdb": "SELECT ARRAY_AGG(DISTINCT col ORDER BY col DESC NULLS FIRST) FILTER(WHERE col IS NOT NULL)", }, ) self.validate_all( "SELECT ARRAY_TO_STRING(x, '')", write={ "spark": "SELECT ARRAY_JOIN(x, '')", "snowflake": "SELECT ARRAY_TO_STRING(x, '')", "duckdb": "SELECT CASE WHEN '' IS NULL THEN NULL ELSE ARRAY_TO_STRING(LIST_TRANSFORM(x, x -> COALESCE(CAST(x AS TEXT), '')), '') END", }, ) self.validate_all( "SELECT ARRAY_TO_STRING(x, NULL)", write={ "snowflake": "SELECT ARRAY_TO_STRING(x, NULL)", "duckdb": "SELECT CASE WHEN NULL IS NULL THEN NULL ELSE ARRAY_TO_STRING(LIST_TRANSFORM(x, x -> COALESCE(CAST(x AS TEXT), '')), NULL) END", }, ) self.validate_all( "SELECT ARRAY_TO_STRING([], ',')", write={ "snowflake": "SELECT ARRAY_TO_STRING([], ',')", "duckdb": "SELECT CASE WHEN ',' IS NULL THEN NULL ELSE ARRAY_TO_STRING(LIST_TRANSFORM([], x -> COALESCE(CAST(x AS TEXT), '')), ',') END", }, ) self.validate_all( "TO_ARRAY(x)", write={ "spark": "IF(x IS NULL, NULL, ARRAY(x))", "snowflake": "TO_ARRAY(x)", }, ) self.validate_all( "SELECT * FROM a INTERSECT ALL SELECT * FROM b", write={ "snowflake": UnsupportedError, }, ) self.validate_all( "SELECT * FROM a EXCEPT ALL SELECT * FROM b", write={ "snowflake": UnsupportedError, }, ) self.validate_all( "SELECT ARRAY_UNION_AGG(a)", write={ "snowflake": "SELECT ARRAY_UNION_AGG(a)", }, ) self.validate_all( "SELECT $$a$$", write={ "snowflake": "SELECT 'a'", }, ) self.validate_all( "SELECT RLIKE(a, b)", write={ "duckdb": "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$')", "hive": "SELECT a RLIKE b", "snowflake": "SELECT REGEXP_LIKE(a, b)", "spark": "SELECT a RLIKE b", }, ) self.validate_all( "SELECT RLIKE(a, b, 'i')", write={ "duckdb": "SELECT REGEXP_MATCHES(a, '^(' || (b) || ')$', 'i')", "snowflake": "SELECT REGEXP_LIKE(a, b, 'i')", }, ) self.validate_all( "'foo' REGEXP 'bar'", write={ "snowflake": "REGEXP_LIKE('foo', 'bar')", "postgres": "'foo' ~ 'bar'", "mysql": "REGEXP_LIKE('foo', 'bar')", "bigquery": "REGEXP_CONTAINS('foo', 'bar')", }, ) self.validate_all( "'foo' NOT REGEXP 'bar'", write={ "snowflake": "NOT REGEXP_LIKE('foo', 'bar')", "postgres": "NOT 'foo' ~ 'bar'", "mysql": "NOT REGEXP_LIKE('foo', 'bar')", "bigquery": "NOT REGEXP_CONTAINS('foo', 'bar')", }, ) self.validate_all( "SELECT a FROM test pivot", write={ "snowflake": "SELECT a FROM test AS pivot", }, ) self.validate_all( "SELECT a FROM test unpivot", write={ "snowflake": "SELECT a FROM test AS unpivot", }, ) self.validate_all( "trim(date_column, 'UTC')", write={ "bigquery": "TRIM(date_column, 'UTC')", "snowflake": "TRIM(date_column, 'UTC')", "postgres": "TRIM('UTC' FROM date_column)", }, ) self.validate_all( "trim(date_column)", write={ "snowflake": "TRIM(date_column)", "bigquery": "TRIM(date_column)", }, ) self.validate_all( "DECODE(x, a, b, c, d, e)", write={ "duckdb": "CASE WHEN x = a OR (x IS NULL AND a IS NULL) THEN b WHEN x = c OR (x IS NULL AND c IS NULL) THEN d ELSE e END", "snowflake": "DECODE(x, a, b, c, d, e)", }, ) self.validate_all( "DECODE(TRUE, a.b = 'value', 'value')", write={ "duckdb": "CASE WHEN TRUE = (a.b = 'value') OR (TRUE IS NULL AND (a.b = 'value') IS NULL) THEN 'value' END", "snowflake": "DECODE(TRUE, a.b = 'value', 'value')", }, ) self.validate_all( "SELECT BOOLAND(1, -2)", read={ "snowflake": "SELECT BOOLAND(1, -2)", }, write={ "snowflake": "SELECT BOOLAND(1, -2)", "duckdb": "SELECT ((ROUND(1, 0)) AND (ROUND(-2, 0)))", }, ) self.validate_all( "SELECT BOOLOR(1, 0)", write={ "snowflake": "SELECT BOOLOR(1, 0)", "duckdb": "SELECT ((ROUND(1, 0)) OR (ROUND(0, 0)))", }, ) self.validate_all( "SELECT BOOLXOR(2, 0.3)", read={ "snowflake": "SELECT BOOLXOR(2, 0.3)", }, write={ "snowflake": "SELECT BOOLXOR(2, 0.3)", "duckdb": "SELECT (ROUND(2, 0) AND (NOT ROUND(0.3, 0))) OR ((NOT ROUND(2, 0)) AND ROUND(0.3, 0))", }, ) self.validate_all( "SELECT APPROX_PERCENTILE(a, 0.5) FROM t", read={ "trino": "SELECT APPROX_PERCENTILE(a, 1, 0.5, 0.001) FROM t", "presto": "SELECT APPROX_PERCENTILE(a, 1, 0.5, 0.001) FROM t", }, write={ "trino": "SELECT APPROX_PERCENTILE(a, 0.5) FROM t", "presto": "SELECT APPROX_PERCENTILE(a, 0.5) FROM t", "snowflake": "SELECT APPROX_PERCENTILE(a, 0.5) FROM t", }, ) self.validate_all( "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT('key5', 'value5'), 'key1', 5), 'key2', 2.2), 'key3', 'value3')", write={ "snowflake": "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT('key5', 'value5'), 'key1', 5), 'key2', 2.2), 'key3', 'value3')", "duckdb": "SELECT STRUCT_INSERT(STRUCT_INSERT(STRUCT_INSERT({'key5': 'value5'}, key1 := 5), key2 := 2.2), key3 := 'value3')", }, ) self.validate_all( "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT(), 'key1', 5), 'key2', 2.2), 'key3', 'value3')", write={ "snowflake": "SELECT OBJECT_INSERT(OBJECT_INSERT(OBJECT_INSERT(OBJECT_CONSTRUCT(), 'key1', 5), 'key2', 2.2), 'key3', 'value3')", "duckdb": "SELECT STRUCT_INSERT(STRUCT_INSERT(STRUCT_PACK(key1 := 5), key2 := 2.2), key3 := 'value3')", }, ) self.validate_identity( """SELECT ARRAY_CONSTRUCT('foo')::VARIANT[0]""", """SELECT CAST(['foo'] AS VARIANT)[0]""", ) self.validate_all( "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", write={ "snowflake": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", "spark": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", "databricks": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", "redshift": "SELECT CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000')", }, ) self.validate_all( "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", write={ "snowflake": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", "spark": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", "databricks": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", "redshift": "SELECT CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000')", "mysql": "SELECT CONVERT_TZ('2024-08-06 09:10:00.000', 'America/Los_Angeles', 'America/New_York')", "duckdb": "SELECT CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'America/Los_Angeles' AT TIME ZONE 'America/New_York'", }, ) self.validate_identity( "SELECT UUID_STRING(), UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')" ) self.validate_all( "UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')", read={ "snowflake": "UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')", }, write={ "hive": "UUID()", "spark2": "UUID()", "spark": "UUID()", "databricks": "UUID()", "duckdb": "UUID()", "presto": "UUID()", "trino": "UUID()", "postgres": "GEN_RANDOM_UUID()", "bigquery": "GENERATE_UUID()", }, ) self.validate_identity("TRY_TO_TIMESTAMP(foo)").assert_is(exp.Anonymous) self.validate_identity("TRY_TO_TIMESTAMP('12345')").assert_is(exp.Anonymous) self.validate_all( "SELECT TRY_TO_TIMESTAMP('2024-01-15 12:30:00.000')", write={ "snowflake": "SELECT TRY_CAST('2024-01-15 12:30:00.000' AS TIMESTAMP)", "duckdb": "SELECT TRY_CAST('2024-01-15 12:30:00.000' AS TIMESTAMP)", }, ) self.validate_all( "SELECT TRY_TO_TIMESTAMP('invalid')", write={ "snowflake": "SELECT TRY_CAST('invalid' AS TIMESTAMP)", "duckdb": "SELECT TRY_CAST('invalid' AS TIMESTAMP)", }, ) self.validate_all( "SELECT TRY_TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/DD/yyyy hh24:mi:ss')", write={ "snowflake": "SELECT TRY_TO_TIMESTAMP('04/05/2013 01:02:03', 'mm/DD/yyyy hh24:mi:ss')", "duckdb": "SELECT CAST(TRY_STRPTIME('04/05/2013 01:02:03', '%m/%d/%Y %H:%M:%S') AS TIMESTAMP)", }, ) self.validate_all( "EDITDISTANCE(col1, col2)", write={ "duckdb": "LEVENSHTEIN(col1, col2)", "snowflake": "EDITDISTANCE(col1, col2)", }, ) self.validate_all( "EDITDISTANCE(col1, col2, 3)", write={ "bigquery": "EDIT_DISTANCE(col1, col2, max_distance => 3)", "duckdb": "CASE WHEN LEVENSHTEIN(col1, col2) IS NULL OR 3 IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(col1, col2), 3) END", "postgres": "LEVENSHTEIN_LESS_EQUAL(col1, col2, 3)", "snowflake": "EDITDISTANCE(col1, col2, 3)", }, ) self.validate_identity("MINHASH(100, col1)") self.validate_identity("MINHASH(100, col1, col2)") self.validate_all( "MINHASH(4, col1)", write={ "duckdb": "(SELECT JSON_OBJECT('state', LIST(min_h ORDER BY seed NULLS FIRST), 'type', 'minhash', 'version', 1) FROM (SELECT seed, LIST_MIN(LIST_TRANSFORM(vals, __v -> HASH(CAST(__v AS TEXT) || CAST(seed AS TEXT)))) AS min_h FROM (SELECT LIST(col1) AS vals), RANGE(0, 4) AS t(seed)))", "snowflake": "MINHASH(4, col1)", }, ) self.validate_identity("MINHASH_COMBINE(sig_col)") self.validate_all( "MINHASH_COMBINE(sig_col)", write={ "duckdb": "(SELECT JSON_OBJECT('state', LIST(min_h ORDER BY idx NULLS FIRST), 'type', 'minhash', 'version', 1) FROM (SELECT pos AS idx, MIN(val) AS min_h FROM UNNEST(LIST(sig_col)) AS _(sig) JOIN UNNEST(CAST(sig -> '$.state' AS UBIGINT[])) WITH ORDINALITY AS t(val, pos) ON TRUE GROUP BY pos))", "snowflake": "MINHASH_COMBINE(sig_col)", }, ) self.validate_identity("APPROXIMATE_SIMILARITY(sig_col)") self.validate_all( "APPROXIMATE_SIMILARITY(sig_col)", write={ "duckdb": "(SELECT CAST(SUM(CASE WHEN num_distinct = 1 THEN 1 ELSE 0 END) AS DOUBLE) / COUNT(*) FROM (SELECT pos, COUNT(DISTINCT h) AS num_distinct FROM (SELECT h, pos FROM UNNEST(LIST(sig_col)) AS _(sig) JOIN UNNEST(CAST(sig -> '$.state' AS UBIGINT[])) WITH ORDINALITY AS s(h, pos) ON TRUE) GROUP BY pos))", "snowflake": "APPROXIMATE_SIMILARITY(sig_col)", }, ) self.validate_identity("SELECT BITNOT(a)") self.validate_identity("SELECT BIT_NOT(a)", "SELECT BITNOT(a)") self.validate_all( "SELECT BITNOT(-1)", write={ "duckdb": "SELECT ~(-1)", "snowflake": "SELECT BITNOT(-1)", }, ) self.validate_identity("SELECT BITAND(a, b)") self.validate_identity("SELECT BITAND(a, b, 'LEFT')") self.validate_identity("SELECT BIT_AND(a, b)", "SELECT BITAND(a, b)") self.validate_identity("SELECT BIT_AND(a, b, 'LEFT')", "SELECT BITAND(a, b, 'LEFT')") self.validate_identity("SELECT BITOR(a, b)") self.validate_identity("SELECT BITOR(a, b, 'LEFT')") self.validate_identity("SELECT BIT_OR(a, b)", "SELECT BITOR(a, b)") self.validate_identity("SELECT BIT_OR(a, b, 'RIGHT')", "SELECT BITOR(a, b, 'RIGHT')") self.validate_identity("SELECT BITXOR(a, b)") self.validate_identity("SELECT BITXOR(a, b, 'LEFT')") self.validate_identity("SELECT BIT_XOR(a, b)", "SELECT BITXOR(a, b)") self.validate_identity("SELECT BIT_XOR(a, b, 'LEFT')", "SELECT BITXOR(a, b, 'LEFT')") # duckdb has an order of operations precedence issue with bitshift and bitwise operators self.validate_all( "SELECT BITOR(BITSHIFTLEFT(5, 16), BITSHIFTLEFT(3, 8))", write={"duckdb": "SELECT (CAST(5 AS INT128) << 16) | (CAST(3 AS INT128) << 8)"}, ) self.validate_all( "SELECT BITAND(BITSHIFTLEFT(255, 4), BITSHIFTLEFT(15, 2))", write={ "snowflake": "SELECT BITAND(BITSHIFTLEFT(255, 4), BITSHIFTLEFT(15, 2))", "duckdb": "SELECT (CAST(255 AS INT128) << 4) & (CAST(15 AS INT128) << 2)", }, ) self.validate_all( "SELECT BITSHIFTLEFT(255, 4)", write={ "snowflake": "SELECT BITSHIFTLEFT(255, 4)", "duckdb": "SELECT CAST(255 AS INT128) << 4", }, ) self.validate_all( "SELECT BITSHIFTRIGHT(255, 4)", write={ "snowflake": "SELECT BITSHIFTRIGHT(255, 4)", "duckdb": "SELECT CAST(255 AS INT128) >> 4", }, ) self.validate_all( "SELECT BITSHIFTLEFT(X'002A'::BINARY, 1)", write={ "snowflake": "SELECT BITSHIFTLEFT(CAST(x'002A' AS BINARY), 1)", "duckdb": "SELECT CAST(CAST(CAST(UNHEX('002A') AS BLOB) AS BIT) << 1 AS BLOB)", }, ) self.validate_all( "SELECT BITSHIFTRIGHT(X'002A'::BINARY, 1)", write={ "snowflake": "SELECT BITSHIFTRIGHT(CAST(x'002A' AS BINARY), 1)", "duckdb": "SELECT CAST(CAST(CAST(UNHEX('002A') AS BLOB) AS BIT) >> 1 AS BLOB)", }, ) self.validate_all( "OCTET_LENGTH('A')", read={ "bigquery": "BYTE_LENGTH('A')", "snowflake": "OCTET_LENGTH('A')", }, ) self.validate_identity("CREATE TABLE t (id INT PRIMARY KEY AUTOINCREMENT)") self.validate_all( "SELECT HEX_DECODE_BINARY('65')", write={ "bigquery": "SELECT FROM_HEX('65')", "duckdb": "SELECT UNHEX('65')", "snowflake": "SELECT HEX_DECODE_BINARY('65')", }, ) self.validate_all( "DAYOFWEEKISO(foo)", read={ "snowflake": "DAYOFWEEKISO(foo)", "presto": "DAY_OF_WEEK(foo)", "trino": "DAY_OF_WEEK(foo)", }, write={ "duckdb": "ISODOW(foo)", }, ) self.validate_all( "DAYOFWEEKISO(foo)", read={ "presto": "DOW(foo)", "trino": "DOW(foo)", }, ) self.validate_all( "DAYOFYEAR(foo)", read={ "presto": "DOY(foo)", "trino": "DOY(foo)", }, write={ "snowflake": "DAYOFYEAR(foo)", }, ) self.validate_identity("TO_JSON(OBJECT_CONSTRUCT('name', 'Alice'))") with self.assertRaises(ParseError): parse_one( "SELECT id, PRIOR name AS parent_name, name FROM tree CONNECT BY NOCYCLE PRIOR id = parent_id", dialect="snowflake", ) self.validate_all( "SELECT CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)", read={ "bigquery": "SELECT CAST(1 AS BIGDECIMAL), CAST(1 AS BIGNUMERIC)", }, write={ "snowflake": "SELECT CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)", }, ) self.validate_all( "SELECT DATE_PART(WEEKISO, CAST('2013-12-25' AS DATE))", read={ "bigquery": "SELECT EXTRACT(ISOWEEK FROM CAST('2013-12-25' AS DATE))", "snowflake": "SELECT DATE_PART(WEEKISO, CAST('2013-12-25' AS DATE))", }, write={ "duckdb": "SELECT CAST(STRFTIME(CAST('2013-12-25' AS DATE), '%V') AS INT)", }, ) # DATE_PART/EXTRACT with specifiers not supported in DuckDB self.validate_all( "SELECT DATE_PART(YEAROFWEEK, CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(YEAROFWEEK, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06' AS DATE), '%G') AS INT)", }, ) self.validate_all( "SELECT DATE_PART(YEAROFWEEKISO, CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(YEAROFWEEKISO, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06' AS DATE), '%G') AS INT)", }, ) self.validate_all( "SELECT DATE_PART(NANOSECOND, CAST('2026-01-06 11:45:00.123456789' AS TIMESTAMPNTZ))", write={ "snowflake": "SELECT DATE_PART(NANOSECOND, CAST('2026-01-06 11:45:00.123456789' AS TIMESTAMPNTZ))", "duckdb": "SELECT CAST(STRFTIME(CAST(CAST('2026-01-06 11:45:00.123456789' AS TIMESTAMP) AS TIMESTAMP_NS), '%n') AS BIGINT)", }, ) # TIMESTAMP_NTZ tests - using NTZ for consistent behavior across timezones self.validate_all( "SELECT EXTRACT(YEAR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(YEAR, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(YEAR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(QUARTER FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(QUARTER, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(QUARTER FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(MONTH FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(MONTH, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(MONTH FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(WEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(WEEK, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(WEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(WEEKISO FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(WEEKISO, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06 11:45:00' AS TIMESTAMP), '%V') AS INT)", }, ) self.validate_all( "SELECT EXTRACT(DAY FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(DAY, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(DAY FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFMONTH FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(DAY, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(DAY FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFWEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(DAYOFWEEK, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(DAYOFWEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFWEEKISO FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(DAYOFWEEKISO, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(ISODOW FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFYEAR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(DAYOFYEAR, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(DAYOFYEAR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(YEAROFWEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(YEAROFWEEK, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06 11:45:00' AS TIMESTAMP), '%G') AS INT)", }, ) self.validate_all( "SELECT EXTRACT(YEAROFWEEKISO FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(YEAROFWEEKISO, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06 11:45:00' AS TIMESTAMP), '%G') AS INT)", }, ) self.validate_all( "SELECT EXTRACT(HOUR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(HOUR, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(HOUR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(MINUTE FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(MINUTE, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(MINUTE FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(SECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(SECOND, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EXTRACT(SECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(NANOSECOND FROM CAST('2026-01-06 11:45:00.123456789' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(NANOSECOND, CAST('2026-01-06 11:45:00.123456789' AS TIMESTAMPNTZ))", "duckdb": "SELECT CAST(STRFTIME(CAST(CAST('2026-01-06 11:45:00.123456789' AS TIMESTAMP) AS TIMESTAMP_NS), '%n') AS BIGINT)", }, ) self.validate_all( "SELECT EXTRACT(EPOCH_SECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(EPOCH_SECOND, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT CAST(EPOCH(CAST('2026-01-06 11:45:00' AS TIMESTAMP)) AS BIGINT)", }, ) self.validate_all( "SELECT EXTRACT(EPOCH_MILLISECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(EPOCH_MILLISECOND, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EPOCH_MS(CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(EPOCH_MICROSECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(EPOCH_MICROSECOND, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EPOCH_US(CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) self.validate_all( "SELECT EXTRACT(EPOCH_NANOSECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ))", write={ "snowflake": "SELECT DATE_PART(EPOCH_NANOSECOND, CAST('2026-01-06 11:45:00' AS TIMESTAMPNTZ))", "duckdb": "SELECT EPOCH_NS(CAST('2026-01-06 11:45:00' AS TIMESTAMP))", }, ) # EXTRACT from DATE - exhaustive tests self.validate_all( "SELECT EXTRACT(YEAR FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(YEAR, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(YEAR FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(QUARTER FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(QUARTER, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(QUARTER FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(MONTH FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(MONTH, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(MONTH FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(WEEK FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(WEEK, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(WEEK FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(WEEKISO FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(WEEKISO, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06' AS DATE), '%V') AS INT)", }, ) self.validate_all( "SELECT EXTRACT(DAY FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(DAY, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(DAY FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFMONTH FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(DAY, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(DAY FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFWEEK FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(DAYOFWEEK, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(DAYOFWEEK FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFWEEKISO FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(DAYOFWEEKISO, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(ISODOW FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(DAYOFYEAR FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(DAYOFYEAR, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT EXTRACT(DAYOFYEAR FROM CAST('2026-01-06' AS DATE))", }, ) self.validate_all( "SELECT EXTRACT(YEAROFWEEK FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(YEAROFWEEK, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06' AS DATE), '%G') AS INT)", }, ) self.validate_all( "SELECT EXTRACT(YEAROFWEEKISO FROM CAST('2026-01-06' AS DATE))", write={ "snowflake": "SELECT DATE_PART(YEAROFWEEKISO, CAST('2026-01-06' AS DATE))", "duckdb": "SELECT CAST(STRFTIME(CAST('2026-01-06' AS DATE), '%G') AS INT)", }, ) self.validate_all( "SELECT EXTRACT(HOUR FROM CAST('11:45:00.123456789' AS TIME))", write={ "snowflake": "SELECT DATE_PART(HOUR, CAST('11:45:00.123456789' AS TIME))", "duckdb": "SELECT EXTRACT(HOUR FROM CAST('11:45:00.123456789' AS TIME))", }, ) self.validate_all( "SELECT EXTRACT(MINUTE FROM CAST('11:45:00.123456789' AS TIME))", write={ "snowflake": "SELECT DATE_PART(MINUTE, CAST('11:45:00.123456789' AS TIME))", "duckdb": "SELECT EXTRACT(MINUTE FROM CAST('11:45:00.123456789' AS TIME))", }, ) self.validate_all( "SELECT EXTRACT(SECOND FROM CAST('11:45:00.123456789' AS TIME))", write={ "snowflake": "SELECT DATE_PART(SECOND, CAST('11:45:00.123456789' AS TIME))", "duckdb": "SELECT EXTRACT(SECOND FROM CAST('11:45:00.123456789' AS TIME))", }, ) self.validate_all( "SELECT ST_MAKEPOINT(10, 20)", write={ "snowflake": "SELECT ST_MAKEPOINT(10, 20)", "starrocks": "SELECT ST_POINT(10, 20)", }, ) self.validate_all( "LAST_DAY(CAST('2023-04-15' AS DATE))", write={ "snowflake": "LAST_DAY(CAST('2023-04-15' AS DATE))", "duckdb": "LAST_DAY(CAST('2023-04-15' AS DATE))", }, ) self.validate_all( "LAST_DAY(CAST('2023-04-15' AS DATE), MONTH)", write={ "snowflake": "LAST_DAY(CAST('2023-04-15' AS DATE), MONTH)", "duckdb": "LAST_DAY(CAST('2023-04-15' AS DATE))", }, ) self.validate_all( "LAST_DAY(CAST('2024-06-15' AS DATE), YEAR)", write={ "snowflake": "LAST_DAY(CAST('2024-06-15' AS DATE), YEAR)", "duckdb": "MAKE_DATE(EXTRACT(YEAR FROM CAST('2024-06-15' AS DATE)), 12, 31)", }, ) self.validate_all( "LAST_DAY(CAST('2024-01-15' AS DATE), QUARTER)", write={ "snowflake": "LAST_DAY(CAST('2024-01-15' AS DATE), QUARTER)", "duckdb": "LAST_DAY(MAKE_DATE(EXTRACT(YEAR FROM CAST('2024-01-15' AS DATE)), EXTRACT(QUARTER FROM CAST('2024-01-15' AS DATE)) * 3, 1))", }, ) self.validate_all( "LAST_DAY(CAST('2025-12-15' AS DATE), WEEK)", write={ "snowflake": "LAST_DAY(CAST('2025-12-15' AS DATE), WEEK)", "duckdb": "CAST(CAST('2025-12-15' AS DATE) + INTERVAL ((7 - EXTRACT(DAYOFWEEK FROM CAST('2025-12-15' AS DATE))) % 7) DAY AS DATE)", }, ) self.validate_all( "SELECT ST_DISTANCE(a, b)", write={ "snowflake": "SELECT ST_DISTANCE(a, b)", "starrocks": "SELECT ST_DISTANCE_SPHERE(ST_X(a), ST_Y(a), ST_X(b), ST_Y(b))", }, ) self.validate_all( "SELECT DATE_PART(DAYOFWEEKISO, foo)", read={ "snowflake": "SELECT DATE_PART(WEEKDAY_ISO, foo)", }, write={ "snowflake": "SELECT DATE_PART(DAYOFWEEKISO, foo)", "duckdb": "SELECT EXTRACT(ISODOW FROM foo)", }, ) self.validate_all( "SELECT DATE_PART(DAYOFWEEK_ISO, foo)", write={ "snowflake": "SELECT DATE_PART(DAYOFWEEKISO, foo)", "duckdb": "SELECT EXTRACT(ISODOW FROM foo)", }, ) self.validate_identity("ALTER TABLE foo ADD col1 VARCHAR(512), col2 VARCHAR(512)") self.validate_identity( "ALTER TABLE foo ADD col1 VARCHAR NOT NULL TAG (key1='value_1'), col2 VARCHAR NOT NULL TAG (key2='value_2')" ) self.validate_identity("ALTER TABLE foo ADD IF NOT EXISTS col1 INT, col2 INT") self.validate_identity("ALTER TABLE foo ADD IF NOT EXISTS col1 INT, IF NOT EXISTS col2 INT") self.validate_identity("ALTER TABLE foo ADD col1 INT, IF NOT EXISTS col2 INT") self.validate_identity("ALTER TABLE IF EXISTS foo ADD IF NOT EXISTS col1 INT") # ADD_MONTHS - Basic integer months with type preservation self.validate_all( "SELECT ADD_MONTHS('2023-01-31', 1)", write={ "duckdb": "SELECT CASE WHEN LAST_DAY(CAST('2023-01-31' AS TIMESTAMP)) = CAST('2023-01-31' AS TIMESTAMP) THEN LAST_DAY(CAST('2023-01-31' AS TIMESTAMP) + INTERVAL 1 MONTH) ELSE CAST('2023-01-31' AS TIMESTAMP) + INTERVAL 1 MONTH END", "snowflake": "SELECT ADD_MONTHS('2023-01-31', 1)", }, ) self.validate_all( "SELECT ADD_MONTHS('2023-01-31'::date, 1)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2023-01-31' AS DATE)) = CAST('2023-01-31' AS DATE) THEN LAST_DAY(CAST('2023-01-31' AS DATE) + INTERVAL 1 MONTH) ELSE CAST('2023-01-31' AS DATE) + INTERVAL 1 MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2023-01-31' AS DATE), 1)", }, ) self.validate_all( "SELECT ADD_MONTHS('2023-01-31'::timestamptz, 1)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2023-01-31' AS TIMESTAMPTZ)) = CAST('2023-01-31' AS TIMESTAMPTZ) THEN LAST_DAY(CAST('2023-01-31' AS TIMESTAMPTZ) + INTERVAL 1 MONTH) ELSE CAST('2023-01-31' AS TIMESTAMPTZ) + INTERVAL 1 MONTH END AS TIMESTAMPTZ)", "snowflake": "SELECT ADD_MONTHS(CAST('2023-01-31' AS TIMESTAMPTZ), 1)", }, ) # ADD_MONTHS - Float month values (rounded to integer) self.validate_all( "SELECT ADD_MONTHS('2016-05-15'::DATE, 2.7)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-15' AS DATE)) = CAST('2016-05-15' AS DATE) THEN LAST_DAY(CAST('2016-05-15' AS DATE) + TO_MONTHS(CAST(ROUND(2.7) AS INT))) ELSE CAST('2016-05-15' AS DATE) + TO_MONTHS(CAST(ROUND(2.7) AS INT)) END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-15' AS DATE), 2.7)", }, ) self.validate_all( "SELECT ADD_MONTHS('2016-05-15'::DATE, -2.3)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-15' AS DATE)) = CAST('2016-05-15' AS DATE) THEN LAST_DAY(CAST('2016-05-15' AS DATE) + TO_MONTHS(CAST(ROUND(-2.3) AS INT))) ELSE CAST('2016-05-15' AS DATE) + TO_MONTHS(CAST(ROUND(-2.3) AS INT)) END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-15' AS DATE), -2.3)", }, ) # ADD_MONTHS - Decimal month values (rounded to integer) self.validate_all( "SELECT ADD_MONTHS('2016-05-15'::DATE, 3.2::DECIMAL(10,2))", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-15' AS DATE)) = CAST('2016-05-15' AS DATE) THEN LAST_DAY(CAST('2016-05-15' AS DATE) + TO_MONTHS(CAST(ROUND(CAST(3.2 AS DECIMAL(10, 2))) AS INT))) ELSE CAST('2016-05-15' AS DATE) + TO_MONTHS(CAST(ROUND(CAST(3.2 AS DECIMAL(10, 2))) AS INT)) END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-15' AS DATE), CAST(3.2 AS DECIMAL(10, 2)))", }, ) # ADD_MONTHS - End-of-month preservation (Snowflake semantic) self.validate_all( "SELECT ADD_MONTHS('2016-02-29'::DATE, 1)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-02-29' AS DATE)) = CAST('2016-02-29' AS DATE) THEN LAST_DAY(CAST('2016-02-29' AS DATE) + INTERVAL 1 MONTH) ELSE CAST('2016-02-29' AS DATE) + INTERVAL 1 MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-02-29' AS DATE), 1)", }, ) self.validate_all( "SELECT ADD_MONTHS('2016-05-31'::DATE, 1)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-31' AS DATE)) = CAST('2016-05-31' AS DATE) THEN LAST_DAY(CAST('2016-05-31' AS DATE) + INTERVAL 1 MONTH) ELSE CAST('2016-05-31' AS DATE) + INTERVAL 1 MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-31' AS DATE), 1)", }, ) self.validate_all( "SELECT ADD_MONTHS('2016-05-31'::DATE, -1)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-31' AS DATE)) = CAST('2016-05-31' AS DATE) THEN LAST_DAY(CAST('2016-05-31' AS DATE) + INTERVAL (-1) MONTH) ELSE CAST('2016-05-31' AS DATE) + INTERVAL (-1) MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-31' AS DATE), -1)", }, ) # ADD_MONTHS - Mid-month dates (end-of-month logic should not trigger) self.validate_all( "SELECT ADD_MONTHS('2016-05-15'::DATE, 1)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-15' AS DATE)) = CAST('2016-05-15' AS DATE) THEN LAST_DAY(CAST('2016-05-15' AS DATE) + INTERVAL 1 MONTH) ELSE CAST('2016-05-15' AS DATE) + INTERVAL 1 MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-15' AS DATE), 1)", }, ) # ADD_MONTHS - NULL handling self.validate_all( "SELECT ADD_MONTHS(NULL::DATE, 2)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST(NULL AS DATE)) = CAST(NULL AS DATE) THEN LAST_DAY(CAST(NULL AS DATE) + INTERVAL 2 MONTH) ELSE CAST(NULL AS DATE) + INTERVAL 2 MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST(NULL AS DATE), 2)", }, ) self.validate_all( "SELECT ADD_MONTHS('2016-05-15'::DATE, NULL)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-15' AS DATE)) = CAST('2016-05-15' AS DATE) THEN LAST_DAY(CAST('2016-05-15' AS DATE) + INTERVAL (NULL) MONTH) ELSE CAST('2016-05-15' AS DATE) + INTERVAL (NULL) MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-15' AS DATE), NULL)", }, ) # ADD_MONTHS - Zero months self.validate_all( "SELECT ADD_MONTHS('2016-05-15'::DATE, 0)", write={ "duckdb": "SELECT CAST(CASE WHEN LAST_DAY(CAST('2016-05-15' AS DATE)) = CAST('2016-05-15' AS DATE) THEN LAST_DAY(CAST('2016-05-15' AS DATE) + INTERVAL 0 MONTH) ELSE CAST('2016-05-15' AS DATE) + INTERVAL 0 MONTH END AS DATE)", "snowflake": "SELECT ADD_MONTHS(CAST('2016-05-15' AS DATE), 0)", }, ) self.validate_identity("SELECT HOUR(CAST('08:50:57' AS TIME))") self.validate_identity("SELECT MINUTE(CAST('08:50:57' AS TIME))") self.validate_identity("SELECT SECOND(CAST('08:50:57' AS TIME))") self.validate_identity("SELECT HOUR(CAST('2024-05-09 08:50:57' AS TIMESTAMP))") self.validate_identity("SELECT MONTHNAME(CAST('2024-05-09' AS DATE))") self.validate_all( "SELECT DAYNAME(TO_DATE('2025-01-15'))", write={ "duckdb": "SELECT STRFTIME(CAST('2025-01-15' AS DATE), '%a')", "snowflake": "SELECT DAYNAME(CAST('2025-01-15' AS DATE))", }, ) self.validate_all( "SELECT DAYNAME(TO_TIMESTAMP('2025-02-28 10:30:45'))", write={ "duckdb": "SELECT STRFTIME(CAST('2025-02-28 10:30:45' AS TIMESTAMP), '%a')", "snowflake": "SELECT DAYNAME(CAST('2025-02-28 10:30:45' AS TIMESTAMP))", }, ) self.validate_all( "SELECT MONTHNAME(TO_DATE('2025-01-15'))", write={ "duckdb": "SELECT STRFTIME(CAST('2025-01-15' AS DATE), '%b')", "snowflake": "SELECT MONTHNAME(CAST('2025-01-15' AS DATE))", }, ) self.validate_all( "SELECT MONTHNAME(TO_TIMESTAMP('2025-02-28 10:30:45'))", write={ "duckdb": "SELECT STRFTIME(CAST('2025-02-28 10:30:45' AS TIMESTAMP), '%b')", "snowflake": "SELECT MONTHNAME(CAST('2025-02-28 10:30:45' AS TIMESTAMP))", }, ) self.validate_identity("SELECT PREVIOUS_DAY(CAST('2024-05-09' AS DATE), 'MONDAY')") self.validate_identity("SELECT TIME_FROM_PARTS(14, 30, 45)") self.validate_identity("SELECT TIME_FROM_PARTS(14, 30, 45, 123)") self.validate_identity( "SELECT MONTHS_BETWEEN(CAST('2019-03-15' AS DATE), CAST('2019-02-15' AS DATE))" ) self.validate_identity( "SELECT MONTHS_BETWEEN(CAST('2019-03-01 02:00:00' AS TIMESTAMP), CAST('2019-02-15 01:00:00' AS TIMESTAMP))" ) self.validate_identity( "SELECT TIME_SLICE(CAST('2024-05-09 08:50:57.891' AS TIMESTAMP), 15, 'MINUTE')" ) self.validate_identity("SELECT TIME_SLICE(CAST('2024-05-09' AS DATE), 1, 'DAY')") self.validate_identity( "SELECT TIME_SLICE(CAST('2024-05-09 08:50:57.891' AS TIMESTAMP), 1, 'HOUR', 'start')" ) # TIME_SLICE transpilation to DuckDB self.validate_all( "SELECT TIME_SLICE(TIMESTAMP '2024-03-15 14:37:42', 1, 'HOUR')", write={ "snowflake": "SELECT TIME_SLICE(CAST('2024-03-15 14:37:42' AS TIMESTAMP), 1, 'HOUR')", "duckdb": "SELECT TIME_BUCKET(INTERVAL 1 HOUR, CAST('2024-03-15 14:37:42' AS TIMESTAMP))", }, ) self.validate_all( "SELECT TIME_SLICE(TIMESTAMP '2024-03-15 14:37:42', 1, 'HOUR', 'END')", write={ "snowflake": "SELECT TIME_SLICE(CAST('2024-03-15 14:37:42' AS TIMESTAMP), 1, 'HOUR', 'END')", "duckdb": "SELECT TIME_BUCKET(INTERVAL 1 HOUR, CAST('2024-03-15 14:37:42' AS TIMESTAMP)) + INTERVAL 1 HOUR", }, ) self.validate_all( "SELECT TIME_SLICE(DATE '2024-03-15', 1, 'DAY')", write={ "snowflake": "SELECT TIME_SLICE(CAST('2024-03-15' AS DATE), 1, 'DAY')", "duckdb": "SELECT TIME_BUCKET(INTERVAL 1 DAY, CAST('2024-03-15' AS DATE))", }, ) self.validate_all( "SELECT TIME_SLICE(DATE '2024-03-15', 1, 'DAY', 'END')", write={ "snowflake": "SELECT TIME_SLICE(CAST('2024-03-15' AS DATE), 1, 'DAY', 'END')", "duckdb": "SELECT CAST(TIME_BUCKET(INTERVAL 1 DAY, CAST('2024-03-15' AS DATE)) + INTERVAL 1 DAY AS DATE)", }, ) self.validate_all( "SELECT TIME_SLICE(TIMESTAMP '2024-03-15 14:37:42', 15, 'MINUTE')", write={ "snowflake": "SELECT TIME_SLICE(CAST('2024-03-15 14:37:42' AS TIMESTAMP), 15, 'MINUTE')", "duckdb": "SELECT TIME_BUCKET(INTERVAL 15 MINUTE, CAST('2024-03-15 14:37:42' AS TIMESTAMP))", }, ) self.validate_all( "SELECT TIME_SLICE(TIMESTAMP '2024-03-15 14:37:42', 1, 'QUARTER')", write={ "snowflake": "SELECT TIME_SLICE(CAST('2024-03-15 14:37:42' AS TIMESTAMP), 1, 'QUARTER')", "duckdb": "SELECT TIME_BUCKET(INTERVAL 1 QUARTER, CAST('2024-03-15 14:37:42' AS TIMESTAMP))", }, ) self.validate_all( "SELECT TIME_SLICE(DATE '2024-03-15', 1, 'WEEK', 'END')", write={ "snowflake": "SELECT TIME_SLICE(CAST('2024-03-15' AS DATE), 1, 'WEEK', 'END')", "duckdb": "SELECT CAST(TIME_BUCKET(INTERVAL 1 WEEK, CAST('2024-03-15' AS DATE)) + INTERVAL 1 WEEK AS DATE)", }, ) for join in ("FULL OUTER", "LEFT", "RIGHT", "LEFT OUTER", "RIGHT OUTER", "INNER"): with self.subTest(f"Testing transpilation of {join} from Snowflake to DuckDB"): self.validate_all( f"SELECT * FROM t1 {join} JOIN t2", read={ "snowflake": f"SELECT * FROM t1 {join} JOIN t2", }, write={ "duckdb": "SELECT * FROM t1, t2", }, ) self.validate_identity( "SELECT * EXCLUDE foo RENAME bar AS baz FROM tbl", "SELECT * EXCLUDE (foo) RENAME (bar AS baz) FROM tbl", ) self.validate_all( "WITH foo AS (SELECT [1] AS arr_1) SELECT (SELECT unnested_arr FROM TABLE(FLATTEN(INPUT => arr_1)) AS _t0(seq, key, path, index, unnested_arr, this)) AS f FROM foo", read={ "bigquery": "WITH foo AS (SELECT [1] AS arr_1) SELECT (SELECT unnested_arr FROM UNNEST(arr_1) AS unnested_arr) AS f FROM foo", }, ) self.validate_identity("SELECT LIKE(col, 'pattern')", "SELECT col LIKE 'pattern'") self.validate_identity("SELECT ILIKE(col, 'pattern')", "SELECT col ILIKE 'pattern'") self.validate_identity( "SELECT LIKE(col, 'pattern', '\\\\')", "SELECT col LIKE 'pattern' ESCAPE '\\\\'" ) self.validate_identity( "SELECT ILIKE(col, 'pattern', '\\\\')", "SELECT col ILIKE 'pattern' ESCAPE '\\\\'" ) self.validate_identity( "SELECT LIKE(col, 'pattern', '!')", "SELECT col LIKE 'pattern' ESCAPE '!'" ) self.validate_identity( "SELECT ILIKE(col, 'pattern', '!')", "SELECT col ILIKE 'pattern' ESCAPE '!'" ) expr = self.validate_identity("SELECT BASE64_ENCODE('Hello World')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "SELECT TO_BASE64(ENCODE('Hello World'))") self.validate_all( "SELECT BASE64_ENCODE(x)", write={ "duckdb": "SELECT TO_BASE64(x)", "snowflake": "SELECT BASE64_ENCODE(x)", }, ) self.validate_all( "SELECT BASE64_ENCODE(x, 76)", write={ "duckdb": "SELECT RTRIM(REGEXP_REPLACE(TO_BASE64(x), '(.{76})', '\\1' || CHR(10), 'g'), CHR(10))", "snowflake": "SELECT BASE64_ENCODE(x, 76)", }, ) self.validate_all( "SELECT BASE64_ENCODE(x, 76, '+/=')", write={ "duckdb": "SELECT RTRIM(REGEXP_REPLACE(TO_BASE64(x), '(.{76})', '\\1' || CHR(10), 'g'), CHR(10))", "snowflake": "SELECT BASE64_ENCODE(x, 76, '+/=')", }, ) self.validate_all( "SELECT BASE64_DECODE_STRING('U25vd2ZsYWtl')", write={ "snowflake": "SELECT BASE64_DECODE_STRING('U25vd2ZsYWtl')", "duckdb": "SELECT DECODE(FROM_BASE64('U25vd2ZsYWtl'))", }, ) self.validate_all( "SELECT BASE64_DECODE_STRING('U25vd2ZsYWtl', '-_+')", write={ "snowflake": "SELECT BASE64_DECODE_STRING('U25vd2ZsYWtl', '-_+')", "duckdb": "SELECT DECODE(FROM_BASE64(REPLACE(REPLACE(REPLACE('U25vd2ZsYWtl', '-', '+'), '_', '/'), '+', '=')))", }, ) self.validate_all( "SELECT BASE64_DECODE_BINARY(x)", write={ "snowflake": "SELECT BASE64_DECODE_BINARY(x)", "duckdb": "SELECT FROM_BASE64(x)", }, ) self.validate_all( "SELECT BASE64_DECODE_BINARY(x, '-_+')", write={ "snowflake": "SELECT BASE64_DECODE_BINARY(x, '-_+')", "duckdb": "SELECT FROM_BASE64(REPLACE(REPLACE(REPLACE(x, '-', '+'), '_', '/'), '+', '='))", }, ) self.validate_identity("SELECT TRY_HEX_DECODE_BINARY('48656C6C6F')") self.validate_identity("SELECT TRY_HEX_DECODE_STRING('48656C6C6F')") self.validate_all( "SELECT ARRAY_CONTAINS(CAST('1' AS VARIANT), ['1'])", read={ "presto": "SELECT CONTAINS(ARRAY['1'], '1')", "snowflake": "SELECT ARRAY_CONTAINS(CAST('1' AS VARIANT), ['1'])", }, ) self.validate_all( "SELECT ARRAY_CONTAINS(CAST(CAST('2020-10-10' AS DATE) AS VARIANT), [CAST('2020-10-10' AS DATE)])", read={ "presto": "SELECT CONTAINS(ARRAY[DATE '2020-10-10'], DATE '2020-10-10')", "snowflake": "SELECT ARRAY_CONTAINS(CAST(CAST('2020-10-10' AS DATE) AS VARIANT), [CAST('2020-10-10' AS DATE)])", }, ) self.validate_identity("SELECT ARRAY_CONTAINS(1, [1])") self.validate_all( "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", write={ "snowflake": "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])", "duckdb": "SELECT CASE WHEN x IS NULL THEN NULLIF(ARRAY_LENGTH([1, NULL, 3]) <> LIST_COUNT([1, NULL, 3]), FALSE) ELSE ARRAY_CONTAINS([1, NULL, 3], x) END", }, ) self.validate_identity("SELECT ARRAY_DISTINCT(['A', 'B', 'A'])") self.validate_all( "SELECT ARRAY_DISTINCT(['A', NULL, 'B', NULL])", write={ "snowflake": "SELECT ARRAY_DISTINCT(['A', NULL, 'B', NULL])", "duckdb": "SELECT CASE WHEN ARRAY_LENGTH(['A', NULL, 'B', NULL]) <> LIST_COUNT(['A', NULL, 'B', NULL]) THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(['A', NULL, 'B', NULL], _u -> NOT _u IS NULL)), NULL) ELSE LIST_DISTINCT(['A', NULL, 'B', NULL]) END", }, ) self.validate_all( "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])", write={ "snowflake": "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])", "duckdb": "SELECT CASE WHEN ARRAY_LENGTH([1, 2, 2, 3, 1]) <> LIST_COUNT([1, 2, 2, 3, 1]) THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER([1, 2, 2, 3, 1], _u -> NOT _u IS NULL)), NULL) ELSE LIST_DISTINCT([1, 2, 2, 3, 1]) END", }, ) self.validate_all( "SELECT x'ABCD'", write={ "snowflake": "SELECT x'ABCD'", "duckdb": "SELECT UNHEX('ABCD')", }, ) self.validate_all( "SET a = 1", write={ "snowflake": "SET a = 1", "bigquery": "SET a = 1", "duckdb": "SET VARIABLE a = 1", }, ) self.validate_all( "CAST(6.43 AS FLOAT)", write={ "snowflake": "CAST(6.43 AS DOUBLE)", "duckdb": "CAST(6.43 AS DOUBLE)", }, ) self.validate_all( "UNIFORM(1, 10, RANDOM(5))", write={ "snowflake": "UNIFORM(1, 10, RANDOM(5))", "databricks": "UNIFORM(1, 10, 5)", "duckdb": "CAST(FLOOR(1 + RANDOM() * (10 - 1 + 1)) AS BIGINT)", }, ) self.validate_all( "UNIFORM(1, 10, RANDOM())", write={ "snowflake": "UNIFORM(1, 10, RANDOM())", "databricks": "UNIFORM(1, 10)", "duckdb": "CAST(FLOOR(1 + RANDOM() * (10 - 1 + 1)) AS BIGINT)", }, ) self.validate_all( "UNIFORM(1, 10, 5)", write={ "snowflake": "UNIFORM(1, 10, 5)", "databricks": "UNIFORM(1, 10, 5)", "duckdb": "CAST(FLOOR(1 + (ABS(HASH(5)) % 1000000) / 1000000.0 * (10 - 1 + 1)) AS BIGINT)", }, ) self.validate_all( "NORMAL(0, 1, 42)", write={ "snowflake": "NORMAL(0, 1, 42)", "duckdb": "0 + (1 * SQRT(-2 * LN(GREATEST((ABS(HASH(42)) % 1000000) / 1000000.0, 1e-10))) * COS(2 * PI() * (ABS(HASH(42 + 1)) % 1000000) / 1000000.0))", }, ) self.validate_all( "NORMAL(10.5, 2.5, RANDOM())", write={ "snowflake": "NORMAL(10.5, 2.5, RANDOM())", "duckdb": "10.5 + (2.5 * SQRT(-2 * LN(GREATEST(RANDOM(), 1e-10))) * COS(2 * PI() * RANDOM()))", }, ) self.validate_all( "NORMAL(10.5, 2.5, RANDOM(5))", write={ "snowflake": "NORMAL(10.5, 2.5, RANDOM(5))", "duckdb": "10.5 + (2.5 * SQRT(-2 * LN(GREATEST((ABS(HASH(5)) % 1000000) / 1000000.0, 1e-10))) * COS(2 * PI() * (ABS(HASH(5 + 1)) % 1000000) / 1000000.0))", }, ) self.validate_all( "SYSDATE()", write={ "snowflake": "SYSDATE()", "duckdb": "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'", }, ) self.validate_identity("SYSTIMESTAMP()", "CURRENT_TIMESTAMP()") self.validate_identity("GETDATE()", "CURRENT_TIMESTAMP()") self.validate_identity("LOCALTIMESTAMP", "CURRENT_TIMESTAMP") self.validate_identity("LOCALTIMESTAMP()", "CURRENT_TIMESTAMP()") self.validate_identity("LOCALTIMESTAMP(3)", "CURRENT_TIMESTAMP(3)") self.validate_all( "SELECT CURRENT_TIME(4)", write={ "snowflake": "SELECT CURRENT_TIME(4)", "duckdb": "SELECT LOCALTIME", }, ) self.validate_all( "SELECT CURRENT_TIME", write={ "snowflake": "SELECT CURRENT_TIME", "duckdb": "SELECT LOCALTIME", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(2026, 1, 100)", write={ "snowflake": "SELECT DATE_FROM_PARTS(2026, 1, 100)", "duckdb": "SELECT CAST(MAKE_DATE(2026, 1, 1) + INTERVAL (1 - 1) MONTH + INTERVAL (100 - 1) DAY AS DATE)", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(2026, 14, 32)", write={ "snowflake": "SELECT DATE_FROM_PARTS(2026, 14, 32)", "duckdb": "SELECT CAST(MAKE_DATE(2026, 1, 1) + INTERVAL (14 - 1) MONTH + INTERVAL (32 - 1) DAY AS DATE)", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(2026, 0, 0)", write={ "snowflake": "SELECT DATE_FROM_PARTS(2026, 0, 0)", "duckdb": "SELECT CAST(MAKE_DATE(2026, 1, 1) + INTERVAL (0 - 1) MONTH + INTERVAL (0 - 1) DAY AS DATE)", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(2026, -14, -32)", write={ "snowflake": "SELECT DATE_FROM_PARTS(2026, -14, -32)", "duckdb": "SELECT CAST(MAKE_DATE(2026, 1, 1) + INTERVAL (-14 - 1) MONTH + INTERVAL (-32 - 1) DAY AS DATE)", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(2024, 1, 60)", write={ "snowflake": "SELECT DATE_FROM_PARTS(2024, 1, 60)", "duckdb": "SELECT CAST(MAKE_DATE(2024, 1, 1) + INTERVAL (1 - 1) MONTH + INTERVAL (60 - 1) DAY AS DATE)", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(2026, NULL, 100)", write={ "snowflake": "SELECT DATE_FROM_PARTS(2026, NULL, 100)", "duckdb": "SELECT CAST(MAKE_DATE(2026, 1, 1) + INTERVAL (NULL - 1) MONTH + INTERVAL (100 - 1) DAY AS DATE)", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(2024 + 2, 1 + 2, 2 + 3)", write={ "snowflake": "SELECT DATE_FROM_PARTS(2024 + 2, 1 + 2, 2 + 3)", "duckdb": "SELECT CAST(MAKE_DATE(2024 + 2, 1, 1) + INTERVAL ((1 + 2) - 1) MONTH + INTERVAL ((2 + 3) - 1) DAY AS DATE)", }, ) self.validate_all( "SELECT DATE_FROM_PARTS(year, month, date)", write={ "snowflake": "SELECT DATE_FROM_PARTS(year, month, date)", "duckdb": "SELECT CAST(MAKE_DATE(year, 1, 1) + INTERVAL (month - 1) MONTH + INTERVAL (date - 1) DAY AS DATE)", }, ) self.validate_all( "EQUAL_NULL(a, b)", write={ "snowflake": "EQUAL_NULL(a, b)", "duckdb": "a IS NOT DISTINCT FROM b", }, ) self.validate_all( "SELECT CURRENT_VERSION()", write={ "snowflake": "SELECT CURRENT_VERSION()", "databricks": "SELECT CURRENT_VERSION()", "spark": "SELECT VERSION()", "mysql": "SELECT VERSION()", "singlestore": "SELECT VERSION()", "starrocks": "SELECT CURRENT_VERSION()", "postgres": "SELECT VERSION()", "redshift": "SELECT VERSION()", "clickhouse": "SELECT VERSION()", "trino": "SELECT VERSION()", "duckdb": "SELECT VERSION()", }, ) self.validate_identity("SELECT CURRENT_DATABASE()") self.validate_identity("SELECT CURRENT_SCHEMA()") self.validate_all( "SELECT 1 WHERE 'abc' ILIKE ANY('%a%')", write={ "snowflake": "SELECT 1 WHERE 'abc' ILIKE ANY('%a%')", "duckdb": "SELECT 1 WHERE 'abc' ILIKE '%a%'", }, ) self.validate_all( "SELECT 1 WHERE 'abc' LIKE ALL ('%a%')", write={ "snowflake": "SELECT 1 WHERE 'abc' LIKE ALL ('%a%')", "duckdb": "SELECT 1 WHERE 'abc' LIKE '%a%'", }, ) def test_null_treatment(self): self.validate_all( r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", write={ "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2) AS MY_ALIAS FROM TABLE1" }, ) self.validate_all( r"SELECT FIRST_VALUE(TABLE1.COLUMN1 RESPECT NULLS) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", write={ "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) RESPECT NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2) AS MY_ALIAS FROM TABLE1" }, ) self.validate_all( r"SELECT FIRST_VALUE(TABLE1.COLUMN1) RESPECT NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", write={ "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) RESPECT NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2) AS MY_ALIAS FROM TABLE1" }, ) self.validate_all( r"SELECT FIRST_VALUE(TABLE1.COLUMN1 IGNORE NULLS) OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", write={ "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) IGNORE NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2) AS MY_ALIAS FROM TABLE1" }, ) self.validate_all( r"SELECT FIRST_VALUE(TABLE1.COLUMN1) IGNORE NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS MY_ALIAS FROM TABLE1", write={ "snowflake": r"SELECT FIRST_VALUE(TABLE1.COLUMN1) IGNORE NULLS OVER (PARTITION BY RANDOM_COLUMN1, RANDOM_COLUMN2) AS MY_ALIAS FROM TABLE1" }, ) self.validate_all( "SELECT * FROM foo WHERE 'str' IN (SELECT value FROM TABLE(FLATTEN(INPUT => vals)) AS _u(seq, key, path, index, value, this))", read={ "bigquery": "SELECT * FROM foo WHERE 'str' IN UNNEST(vals)", }, write={ "snowflake": "SELECT * FROM foo WHERE 'str' IN (SELECT value FROM TABLE(FLATTEN(INPUT => vals)) AS _u(seq, key, path, index, value, this))", }, ) def test_staged_files(self): # Ensure we don't treat staged file paths as identifiers (i.e. they're not normalized) staged_file = parse_one("SELECT * FROM @foo", read="snowflake") self.assertEqual( normalize_identifiers(staged_file, dialect="snowflake").sql(dialect="snowflake"), staged_file.sql(dialect="snowflake"), ) self.validate_identity('SELECT * FROM @"mystage"') self.validate_identity('SELECT * FROM @"myschema"."mystage"/file.gz') self.validate_identity('SELECT * FROM @"my_DB"."schEMA1".mystage/file.gz') self.validate_identity("SELECT metadata$filename FROM @s1/") self.validate_identity("SELECT * FROM @~") self.validate_identity("SELECT * FROM @~/some/path/to/file.csv") self.validate_identity("SELECT * FROM @mystage") self.validate_identity("SELECT * FROM '@mystage'") self.validate_identity("SELECT * FROM @namespace.mystage/path/to/file.json.gz") self.validate_identity("SELECT * FROM @namespace.%table_name/path/to/file.json.gz") self.validate_identity("SELECT * FROM '@external/location' (FILE_FORMAT => 'path.to.csv')") self.validate_identity("PUT file:///dir/tmp.csv @%table", check_command_warning=True) self.validate_identity("SELECT * FROM (SELECT a FROM @foo)") self.validate_identity( "SELECT * FROM (SELECT * FROM '@external/location' (FILE_FORMAT => 'path.to.csv'))" ) self.validate_identity( "SELECT * FROM @foo/bar (FILE_FORMAT => ds_sandbox.test.my_csv_format, PATTERN => 'test') AS bla" ) self.validate_identity( "SELECT t.$1, t.$2 FROM @mystage1 (FILE_FORMAT => 'myformat', PATTERN => '.*data.*[.]csv.gz') AS t" ) self.validate_identity( "SELECT parse_json($1):a.b FROM @mystage2/data1.json.gz", "SELECT GET_PATH(PARSE_JSON($1), 'a.b') FROM @mystage2/data1.json.gz", ) self.validate_identity( "SELECT * FROM @mystage t (c1)", "SELECT * FROM @mystage AS t(c1)", ) self.validate_identity( "SELECT * FROM @foo/bar (PATTERN => 'test', FILE_FORMAT => ds_sandbox.test.my_csv_format) AS bla", "SELECT * FROM @foo/bar (FILE_FORMAT => ds_sandbox.test.my_csv_format, PATTERN => 'test') AS bla", ) self.validate_identity( "SELECT * FROM @test.public.thing/location/somefile.csv( FILE_FORMAT => 'fmt' )", "SELECT * FROM @test.public.thing/location/somefile.csv (FILE_FORMAT => 'fmt')", ) def test_sample(self): self.validate_identity("SELECT * FROM testtable TABLESAMPLE BERNOULLI (20.3)") self.validate_identity("SELECT * FROM testtable TABLESAMPLE SYSTEM (3) SEED (82)") self.validate_identity( "SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) AS x TABLESAMPLE BERNOULLI (0.1)" ) self.validate_identity( "SELECT i, j FROM table1 AS t1 INNER JOIN table2 AS t2 TABLESAMPLE BERNOULLI (50) WHERE t2.j = t1.i" ) self.validate_identity( "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE BERNOULLI (1)" ) self.validate_identity( "SELECT * FROM testtable TABLESAMPLE (10 ROWS)", "SELECT * FROM testtable TABLESAMPLE BERNOULLI (10 ROWS)", ) self.validate_identity( "SELECT * FROM testtable TABLESAMPLE (100)", "SELECT * FROM testtable TABLESAMPLE BERNOULLI (100)", ) self.validate_identity( "SELECT * FROM testtable SAMPLE (10)", "SELECT * FROM testtable TABLESAMPLE BERNOULLI (10)", ) self.validate_identity( "SELECT * FROM testtable SAMPLE ROW (0)", "SELECT * FROM testtable TABLESAMPLE ROW (0)", ) self.validate_identity( "SELECT a FROM test SAMPLE BLOCK (0.5) SEED (42)", "SELECT a FROM test TABLESAMPLE BLOCK (0.5) SEED (42)", ) self.validate_identity( "SELECT user_id, value FROM table_name SAMPLE BERNOULLI ($s) SEED (0)", "SELECT user_id, value FROM table_name TABLESAMPLE BERNOULLI ($s) SEED (0)", ) self.validate_all( "SELECT * FROM example TABLESAMPLE BERNOULLI (3) SEED (82)", read={ "duckdb": "SELECT * FROM example TABLESAMPLE BERNOULLI (3 PERCENT) REPEATABLE (82)", }, write={ "databricks": "SELECT * FROM example TABLESAMPLE (3 PERCENT) REPEATABLE (82)", "duckdb": "SELECT * FROM example TABLESAMPLE BERNOULLI (3 PERCENT) REPEATABLE (82)", "snowflake": "SELECT * FROM example TABLESAMPLE BERNOULLI (3) SEED (82)", }, ) self.validate_all( "SELECT * FROM test AS _tmp TABLESAMPLE (5)", write={ "postgres": "SELECT * FROM test AS _tmp TABLESAMPLE BERNOULLI (5)", "snowflake": "SELECT * FROM test AS _tmp TABLESAMPLE BERNOULLI (5)", }, ) self.validate_all( """ SELECT i, j FROM table1 AS t1 SAMPLE (25) -- 25% of rows in table1 INNER JOIN table2 AS t2 SAMPLE (50) -- 50% of rows in table2 WHERE t2.j = t1.i""", write={ "snowflake": "SELECT i, j FROM table1 AS t1 TABLESAMPLE BERNOULLI (25) /* 25% of rows in table1 */ INNER JOIN table2 AS t2 TABLESAMPLE BERNOULLI (50) /* 50% of rows in table2 */ WHERE t2.j = t1.i", }, ) self.validate_all( "SELECT * FROM testtable SAMPLE BLOCK (0.012) REPEATABLE (99992)", write={ "snowflake": "SELECT * FROM testtable TABLESAMPLE BLOCK (0.012) SEED (99992)", }, ) self.validate_all( "SELECT * FROM (SELECT * FROM t1 join t2 on t1.a = t2.c) SAMPLE (1)", write={ "snowflake": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE BERNOULLI (1)", "spark": "SELECT * FROM (SELECT * FROM t1 JOIN t2 ON t1.a = t2.c) TABLESAMPLE (1 PERCENT)", }, ) self.validate_all( "TO_DOUBLE(expr)", write={ "snowflake": "TO_DOUBLE(expr)", "duckdb": "CAST(expr AS DOUBLE)", }, ) self.validate_all( "TO_DOUBLE(expr, fmt)", write={ "snowflake": "TO_DOUBLE(expr, fmt)", "duckdb": UnsupportedError, }, ) def test_timestamps(self): self.validate_identity("SELECT CAST('12:00:00' AS TIME)") self.validate_identity("SELECT DATE_PART(month, a)") self.validate_identity( "SELECT DATE_PART(year FROM CAST('2024-04-08' AS DATE))", "SELECT DATE_PART(year, CAST('2024-04-08' AS DATE))", ).expressions[0].assert_is(exp.Extract) self.validate_identity( "SELECT DATE_PART('month' FROM CAST('2024-04-08' AS DATE))", "SELECT DATE_PART('month', CAST('2024-04-08' AS DATE))", ).expressions[0].assert_is(exp.Extract) self.validate_identity( "SELECT DATE_PART(day FROM a)", "SELECT DATE_PART(day, a)" ).expressions[0].assert_is(exp.Extract) for data_type in ( "TIMESTAMP", "TIMESTAMPLTZ", "TIMESTAMPNTZ", ): self.validate_identity(f"CAST(a AS {data_type})") self.validate_identity("CAST(a AS TIMESTAMP_NTZ)", "CAST(a AS TIMESTAMPNTZ)") self.validate_identity("CAST(a AS TIMESTAMP_LTZ)", "CAST(a AS TIMESTAMPLTZ)") self.validate_all( "SELECT a::TIMESTAMP_LTZ(9)", write={ "snowflake": "SELECT CAST(a AS TIMESTAMPLTZ(9))", }, ) self.validate_all( "SELECT a::TIMESTAMPLTZ", write={ "snowflake": "SELECT CAST(a AS TIMESTAMPLTZ)", }, ) self.validate_all( "SELECT a::TIMESTAMP WITH LOCAL TIME ZONE", write={ "snowflake": "SELECT CAST(a AS TIMESTAMPLTZ)", }, ) self.validate_all( "SELECT EXTRACT('month', a)", write={ "snowflake": "SELECT DATE_PART('month', a)", }, ) self.validate_all( "SELECT DATE_PART('month', a)", write={ "snowflake": "SELECT DATE_PART('month', a)", }, ) self.validate_all( "SELECT DATE_PART(month, a::DATETIME)", write={ "snowflake": "SELECT DATE_PART(month, CAST(a AS DATETIME))", }, ) self.validate_all( "SELECT DATE_PART(epoch_second, foo) as ddate from table_name", write={ "snowflake": "SELECT DATE_PART(EPOCH_SECOND, foo) AS ddate FROM table_name", "duckdb": "SELECT CAST(EPOCH(foo) AS BIGINT) AS ddate FROM table_name", "presto": "SELECT TO_UNIXTIME(CAST(foo AS TIMESTAMP)) AS ddate FROM table_name", }, ) self.validate_all( "SELECT DATE_PART(epoch_milliseconds, foo) as ddate from table_name", write={ "snowflake": "SELECT DATE_PART(EPOCH_MILLISECOND, foo) AS ddate FROM table_name", "duckdb": "SELECT EPOCH_MS(foo) AS ddate FROM table_name", "presto": "SELECT TO_UNIXTIME(CAST(foo AS TIMESTAMP)) * 1000 AS ddate FROM table_name", }, ) self.validate_all( "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))", read={ "snowflake": "TIMESTAMPADD(DAY, 5, CAST('2008-12-25' AS DATE))", }, write={ "bigquery": "DATE_ADD(CAST('2008-12-25' AS DATE), INTERVAL 5 DAY)", "snowflake": "DATEADD(DAY, 5, CAST('2008-12-25' AS DATE))", }, ) self.validate_identity( "DATEDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))" ) self.validate_identity( "TIMEDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))", "DATEDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))", ) self.validate_identity( "TIMESTAMPDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))", "DATEDIFF(DAY, CAST('2007-12-25' AS DATE), CAST('2008-12-25' AS DATE))", ) # Test DATEDIFF with WEEK unit - week boundary crossing self.validate_all( "DATEDIFF(WEEK, '2024-12-13', '2024-12-17')", write={ "duckdb": "DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-12-13' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-12-17' AS DATE)))", "snowflake": "DATEDIFF(WEEK, '2024-12-13', '2024-12-17')", }, ) self.validate_all( "DATEDIFF(WEEK, '2024-12-15', '2024-12-16')", write={ "duckdb": "DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST('2024-12-15' AS DATE)), DATE_TRUNC('WEEK', CAST('2024-12-16' AS DATE)))", "snowflake": "DATEDIFF(WEEK, '2024-12-15', '2024-12-16')", }, ) # Test DATEDIFF with other date parts - should not use DATE_TRUNC self.validate_all( "DATEDIFF(YEAR, '2020-01-15', '2023-06-20')", write={ "duckdb": "DATE_DIFF('YEAR', CAST('2020-01-15' AS DATE), CAST('2023-06-20' AS DATE))", "snowflake": "DATEDIFF(YEAR, '2020-01-15', '2023-06-20')", }, ) self.validate_all( "DATEDIFF(MONTH, '2020-01-15', '2023-06-20')", write={ "duckdb": "DATE_DIFF('MONTH', CAST('2020-01-15' AS DATE), CAST('2023-06-20' AS DATE))", "snowflake": "DATEDIFF(MONTH, '2020-01-15', '2023-06-20')", }, ) self.validate_all( "DATEDIFF(QUARTER, '2020-01-15', '2023-06-20')", write={ "duckdb": "DATE_DIFF('QUARTER', CAST('2020-01-15' AS DATE), CAST('2023-06-20' AS DATE))", "snowflake": "DATEDIFF(QUARTER, '2020-01-15', '2023-06-20')", }, ) # Test DATEDIFF with NANOSECOND - DuckDB uses EPOCH_NS since DATE_DIFF doesn't support NANOSECOND self.validate_all( "DATEDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')", write={ "duckdb": "EPOCH_NS(CAST('2023-01-01 10:00:00.123456789' AS TIMESTAMP_NS)) - EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS))", "snowflake": "DATEDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')", }, ) # Test DATEDIFF with NANOSECOND on columns self.validate_all( "DATEDIFF(NANOSECOND, start_time, end_time)", write={ "duckdb": "EPOCH_NS(CAST(end_time AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start_time AS TIMESTAMP_NS))", "snowflake": "DATEDIFF(NANOSECOND, start_time, end_time)", }, ) # Test DATEADD with NANOSECOND - DuckDB uses MAKE_TIMESTAMP_NS since INTERVAL doesn't support NANOSECOND self.validate_all( "DATEADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')", write={ "duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS)) + 123456789)", "snowflake": "DATEADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')", }, ) # Test DATEADD with NANOSECOND on columns self.validate_all( "DATEADD(NANOSECOND, nano_offset, timestamp_col)", write={ "duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(timestamp_col AS TIMESTAMP_NS)) + nano_offset)", "snowflake": "DATEADD(NANOSECOND, nano_offset, timestamp_col)", }, ) # Test negative NANOSECOND values (subtraction) self.validate_all( "DATEADD(NANOSECOND, -123456789, '2023-01-01 10:00:00.500000000')", write={ "duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST('2023-01-01 10:00:00.500000000' AS TIMESTAMP_NS)) + -123456789)", "snowflake": "DATEADD(NANOSECOND, -123456789, '2023-01-01 10:00:00.500000000')", }, ) # Test TIMESTAMPDIFF with NANOSECOND - Snowflake parser converts to DATEDIFF self.validate_all( "TIMESTAMPDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')", write={ "duckdb": "EPOCH_NS(CAST('2023-01-01 10:00:00.123456789' AS TIMESTAMP_NS)) - EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS))", "snowflake": "DATEDIFF(NANOSECOND, '2023-01-01 10:00:00.000000000', '2023-01-01 10:00:00.123456789')", }, ) # Test TIMESTAMPADD with NANOSECOND - Snowflake parser converts to DATEADD self.validate_all( "TIMESTAMPADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')", write={ "duckdb": "MAKE_TIMESTAMP_NS(EPOCH_NS(CAST('2023-01-01 10:00:00.000000000' AS TIMESTAMP_NS)) + 123456789)", "snowflake": "DATEADD(NANOSECOND, 123456789, '2023-01-01 10:00:00.000000000')", }, ) self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)") self.validate_identity("DATEADD(y, 5, x)", "DATEADD(YEAR, 5, x)") self.validate_identity("DATE_PART(yyy, x)", "DATE_PART(YEAR, x)") self.validate_identity("DATE_TRUNC(yr, x)", "DATE_TRUNC('YEAR', x)") self.validate_all( "DATE_TRUNC('YEAR', CAST('2024-06-15' AS DATE))", write={ "snowflake": "DATE_TRUNC('YEAR', CAST('2024-06-15' AS DATE))", "duckdb": "DATE_TRUNC('YEAR', CAST('2024-06-15' AS DATE))", }, ) self.validate_all( "DATE_TRUNC('HOUR', CAST('2026-01-01 00:00:00' AS TIMESTAMP))", write={ "snowflake": "DATE_TRUNC('HOUR', CAST('2026-01-01 00:00:00' AS TIMESTAMP))", "duckdb": "DATE_TRUNC('HOUR', CAST('2026-01-01 00:00:00' AS TIMESTAMP))", }, ) # Snowflake's DATE_TRUNC return type matches type of the expresison # DuckDB's DATE_TRUNC return type matches type of granularity part. # In Snowflake --> DuckDB, DATE_TRUNC(date_part, timestamp) should be cast to timestamp to preserve Snowflake behavior. self.validate_all( "DATE_TRUNC(YEAR, TIMESTAMP '2026-01-01 00:00:00')", write={ "snowflake": "DATE_TRUNC('YEAR', CAST('2026-01-01 00:00:00' AS TIMESTAMP))", "duckdb": "CAST(DATE_TRUNC('YEAR', CAST('2026-01-01 00:00:00' AS TIMESTAMP)) AS TIMESTAMP)", }, ) self.validate_all( "DATE_TRUNC(MONTH, CAST('2024-06-15 14:23:45' AS TIMESTAMPTZ))", write={ "snowflake": "DATE_TRUNC('MONTH', CAST('2024-06-15 14:23:45' AS TIMESTAMPTZ))", "duckdb": "CAST(DATE_TRUNC('MONTH', CAST('2024-06-15 14:23:45' AS TIMESTAMPTZ)) AS TIMESTAMPTZ)", }, ) self.validate_all( "DATE_TRUNC('WEEK', CURRENT_DATE)", write={ "snowflake": "DATE_TRUNC('WEEK', CURRENT_DATE)", "duckdb": "DATE_TRUNC('WEEK', CURRENT_DATE)", }, ) # In Snowflake --> DuckDB, DATE_TRUNC(time_part, date) should be cast to date to preserve Snowflake behavior. self.validate_all( "DATE_TRUNC('HOUR', CAST('2026-01-01' AS DATE))", write={ "snowflake": "DATE_TRUNC('HOUR', CAST('2026-01-01' AS DATE))", "duckdb": "CAST(DATE_TRUNC('HOUR', CAST('2026-01-01' AS DATE)) AS DATE)", }, ) # DuckDB does not support DATE_TRUNC(time_part, time), so we add a dummy date to generate DATE_TRUNC(time_part, date) --> DATE in DuckDB # Then it is casted to a time (HH:MM:SS) to match Snowflake. self.validate_all( "DATE_TRUNC('HOUR', CAST('14:23:45.123456' AS TIME))", write={ "snowflake": "DATE_TRUNC('HOUR', CAST('14:23:45.123456' AS TIME))", "duckdb": "CAST(DATE_TRUNC('HOUR', CAST('1970-01-01' AS DATE) + CAST('14:23:45.123456' AS TIME)) AS TIME)", }, ) self.validate_all( "DATE(x)", write={ "duckdb": "CAST(x AS DATE)", "snowflake": "TO_DATE(x)", }, ) self.validate_all( "DATE('01-01-2000', 'MM-DD-YYYY')", write={ "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')", "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", }, ) self.validate_identity("SELECT TO_TIME(x) FROM t") self.validate_all( "SELECT TO_TIME('12:05:00')", write={ "bigquery": "SELECT CAST('12:05:00' AS TIME)", "snowflake": "SELECT CAST('12:05:00' AS TIME)", "duckdb": "SELECT CAST('12:05:00' AS TIME)", }, ) self.validate_all( "SELECT TO_TIME('2024-01-15 14:30:00'::TIMESTAMP)", write={ "bigquery": "SELECT TIME(CAST('2024-01-15 14:30:00' AS DATETIME))", "snowflake": "SELECT TO_TIME(CAST('2024-01-15 14:30:00' AS TIMESTAMP))", "duckdb": "SELECT CAST(CAST('2024-01-15 14:30:00' AS TIMESTAMP) AS TIME)", }, ) self.validate_all( "SELECT TO_TIME(CONVERT_TIMEZONE('UTC', 'US/Pacific', '2024-08-06 09:10:00.000')) AS pst_time", write={ "snowflake": "SELECT TO_TIME(CONVERT_TIMEZONE('UTC', 'US/Pacific', '2024-08-06 09:10:00.000')) AS pst_time", "duckdb": "SELECT CAST(CAST('2024-08-06 09:10:00.000' AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE 'US/Pacific' AS TIME) AS pst_time", }, ) self.validate_all( "SELECT TO_TIME('11.15.00', 'hh24.mi.ss')", write={ "snowflake": "SELECT TO_TIME('11.15.00', 'hh24.mi.ss')", "duckdb": "SELECT CAST(STRPTIME('11.15.00', '%H.%M.%S') AS TIME)", }, ) self.validate_all( "SELECT TO_TIME('093000', 'HH24MISS')", write={ "duckdb": "SELECT CAST(STRPTIME('093000', '%H%M%S') AS TIME)", "snowflake": "SELECT TO_TIME('093000', 'hh24miss')", }, ) self.validate_all( "SELECT TRY_TO_TIME('093000', 'HH24MISS')", write={ "snowflake": "SELECT TRY_TO_TIME('093000', 'hh24miss')", "duckdb": "SELECT TRY_CAST(TRY_STRPTIME('093000', '%H%M%S') AS TIME)", }, ) self.validate_all( "SELECT TRY_TO_TIME('11.15.00')", write={ "snowflake": "SELECT TRY_CAST('11.15.00' AS TIME)", "duckdb": "SELECT TRY_CAST('11.15.00' AS TIME)", }, ) self.validate_all( "SELECT TRY_TO_TIME('11.15.00', 'hh24.mi.ss')", write={ "snowflake": "SELECT TRY_TO_TIME('11.15.00', 'hh24.mi.ss')", "duckdb": "SELECT TRY_CAST(TRY_STRPTIME('11.15.00', '%H.%M.%S') AS TIME)", }, ) def test_to_date(self): self.validate_identity("TO_DATE('12345')").assert_is(exp.Anonymous) self.validate_identity("TO_DATE(x)").assert_is(exp.TsOrDsToDate) self.validate_all( "TO_DATE('01-01-2000', 'MM-DD-YYYY')", write={ "snowflake": "TO_DATE('01-01-2000', 'mm-DD-yyyy')", "duckdb": "CAST(STRPTIME('01-01-2000', '%m-%d-%Y') AS DATE)", }, ) self.validate_all( "TO_DATE(x, 'MM-DD-YYYY')", write={ "snowflake": "TO_DATE(x, 'mm-DD-yyyy')", "duckdb": "CAST(STRPTIME(x, '%m-%d-%Y') AS DATE)", }, ) self.validate_identity( "SELECT TO_DATE('2019-02-28') + INTERVAL '1 day, 1 year'", "SELECT CAST('2019-02-28' AS DATE) + INTERVAL '1 day, 1 year'", ) self.validate_identity("TRY_TO_DATE(x)").assert_is(exp.TsOrDsToDate) self.validate_all( "TRY_TO_DATE('2024-01-31')", write={ "snowflake": "TRY_CAST('2024-01-31' AS DATE)", "duckdb": "TRY_CAST('2024-01-31' AS DATE)", }, ) self.validate_identity("TRY_TO_DATE('2024-01-31', 'AUTO')") self.validate_all( "TRY_TO_DATE('01-01-2000', 'MM-DD-YYYY')", write={ "snowflake": "TRY_TO_DATE('01-01-2000', 'mm-DD-yyyy')", "duckdb": "CAST(CAST(TRY_STRPTIME('01-01-2000', '%m-%d-%Y') AS TIMESTAMP) AS DATE)", }, ) for i in range(1, 10): fractional_format = "ff" + str(i) duck_db_format = "%n" if i == 3: duck_db_format = "%g" elif i == 6: duck_db_format = "%f" with self.subTest(f"Testing snowflake {fractional_format} format"): self.validate_all( f"TRY_TO_DATE('2013-04-28T20:57:01', 'yyyy-mm-DDThh24:mi:ss.{fractional_format}')", write={ "snowflake": f"TRY_TO_DATE('2013-04-28T20:57:01', 'yyyy-mm-DDThh24:mi:ss.{fractional_format}')", "duckdb": f"CAST(CAST(TRY_STRPTIME('2013-04-28T20:57:01', '%Y-%m-%dT%H:%M:%S.{duck_db_format}') AS TIMESTAMP) AS DATE)", }, ) self.validate_all( "TRY_TO_DATE('2013-04-28T20:57:01.888', 'yyyy-mm-DDThh24:mi:ss.ff')", write={ "snowflake": "TRY_TO_DATE('2013-04-28T20:57:01.888', 'yyyy-mm-DDThh24:mi:ss.ff9')", "duckdb": "CAST(CAST(TRY_STRPTIME('2013-04-28T20:57:01.888', '%Y-%m-%dT%H:%M:%S.%n') AS TIMESTAMP) AS DATE)", }, ) tz_to_format = { "tzh:tzm": "+07:00", "tzhtzm": "+0700", "tzh": "+07", } for tz_format, tz in tz_to_format.items(): with self.subTest(f"Testing snowflake {tz_format} timezone format"): self.validate_all( f"TRY_TO_DATE('2013-04-28 20:57 {tz}', 'YYYY-MM-DD HH24:MI {tz_format}')", write={ "snowflake": f"TRY_TO_DATE('2013-04-28 20:57 {tz}', 'yyyy-mm-DD hh24:mi {tz_format}')", "duckdb": f"CAST(CAST(TRY_STRPTIME('2013-04-28 20:57 {tz}', '%Y-%m-%d %H:%M %z') AS TIMESTAMP) AS DATE)", }, ) self.validate_all( """TRY_TO_DATE('2013-04-28T20:57', 'YYYY-MM-DD"T"HH24:MI:SS')""", write={ "snowflake": "TRY_TO_DATE('2013-04-28T20:57', 'yyyy-mm-DDThh24:mi:ss')", "duckdb": "CAST(CAST(TRY_STRPTIME('2013-04-28T20:57', '%Y-%m-%dT%H:%M:%S') AS TIMESTAMP) AS DATE)", }, ) def test_trunc(self): # Numeric truncation identity self.validate_identity("TRUNC(3.14159, 2)").assert_is(exp.Trunc) self.validate_identity("TRUNC(price, 0)").assert_is(exp.Trunc) self.validate_identity("TRUNC(3.14159)").assert_is(exp.Trunc) # Single-arg TRUNC is always numeric in Snowflake (date trunc requires unit) self.validate_identity("TRUNC(col)").assert_is(exp.Trunc) # Date truncation with typed column and unit # (parse_one because DateTrunc generates as DATE_TRUNC, not TRUNC) self.parse_one("TRUNC(CAST(x AS DATE), 'MONTH')").assert_is(exp.DateTrunc) self.parse_one("TRUNC(CAST(x AS TIMESTAMP), 'MONTH')").assert_is(exp.DateTrunc) self.parse_one("TRUNC(CAST(x AS DATETIME), 'MONTH')").assert_is(exp.DateTrunc) # Fallback to Anonymous when type cannot be determined self.validate_identity("TRUNC(foo, bar)").assert_is(exp.Anonymous) # Cross-dialect numeric truncation transpilation self.validate_all( "TRUNC(3.14159, 2)", write={ "snowflake": "TRUNC(3.14159, 2)", "oracle": "TRUNC(3.14159, 2)", "postgres": "TRUNC(3.14159, 2)", "mysql": "TRUNCATE(3.14159, 2)", "tsql": "ROUND(3.14159, 2, 1)", "bigquery": "TRUNC(3.14159, 2)", "duckdb": "TRUNC(3.14159)", "presto": "TRUNCATE(3.14159, 2)", "clickhouse": "trunc(3.14159, 2)", "spark": "CAST(3.14159 AS BIGINT)", }, ) # Single-argument numeric TRUNC transpilation self.validate_all( "TRUNC(3.14159)", write={ "snowflake": "TRUNC(3.14159)", "oracle": "TRUNC(3.14159)", "postgres": "TRUNC(3.14159)", "mysql": "TRUNCATE(3.14159)", "tsql": "ROUND(3.14159, 0, 1)", }, ) # Read numeric TRUNC from other dialects self.validate_all( "TRUNC(price, 2)", read={ "mysql": "TRUNCATE(price, 2)", "oracle": "TRUNC(price, 2)", "postgres": "TRUNC(price, 2)", }, write={ "snowflake": "TRUNC(price, 2)", }, ) def test_semi_structured_types(self): self.validate_identity("SELECT CAST(a AS VARIANT)") self.validate_identity("SELECT CAST(a AS ARRAY)") self.validate_all( "SELECT a::VARIANT", write={ "snowflake": "SELECT CAST(a AS VARIANT)", "tsql": "SELECT CAST(a AS SQL_VARIANT)", }, ) self.validate_all( "ARRAY_CONSTRUCT(0, 1, 2)", write={ "snowflake": "[0, 1, 2]", "bigquery": "[0, 1, 2]", "duckdb": "[0, 1, 2]", "presto": "ARRAY[0, 1, 2]", "spark": "ARRAY(0, 1, 2)", }, ) self.validate_all( "ARRAYS_ZIP([1, 2], [3, 4], [4, 5])", write={ "snowflake": "ARRAYS_ZIP([1, 2], [3, 4], [4, 5])", "duckdb": "CASE WHEN [1, 2] IS NULL OR [3, 4] IS NULL OR [4, 5] IS NULL THEN NULL WHEN LENGTH([1, 2]) = 0 AND LENGTH([3, 4]) = 0 AND LENGTH([4, 5]) = 0 THEN [{'$1': NULL, '$2': NULL, '$3': NULL}] ELSE LIST_TRANSFORM(RANGE(0, CASE WHEN LENGTH([1, 2]) IS NULL OR LENGTH([3, 4]) IS NULL OR LENGTH([4, 5]) IS NULL THEN NULL ELSE GREATEST(LENGTH([1, 2]), LENGTH([3, 4]), LENGTH([4, 5])) END), __i -> {'$1': COALESCE([1, 2], [])[__i + 1], '$2': COALESCE([3, 4], [])[__i + 1], '$3': COALESCE([4, 5], [])[__i + 1]}) END", }, ) self.validate_all( "ARRAYS_ZIP([1, 2, 3])", write={ "snowflake": "ARRAYS_ZIP([1, 2, 3])", "duckdb": "CASE WHEN [1, 2, 3] IS NULL THEN NULL WHEN LENGTH([1, 2, 3]) = 0 THEN [{'$1': NULL}] ELSE LIST_TRANSFORM(RANGE(0, LENGTH([1, 2, 3])), __i -> {'$1': COALESCE([1, 2, 3], [])[__i + 1]}) END", }, ) self.validate_all( "SELECT a::OBJECT", write={ "snowflake": "SELECT CAST(a AS OBJECT)", }, ) def test_next_day(self): self.validate_all( "SELECT NEXT_DAY(CAST('2024-01-01' AS DATE), 'Monday')", write={ "snowflake": "SELECT NEXT_DAY(CAST('2024-01-01' AS DATE), 'Monday')", "duckdb": "SELECT CAST(CAST('2024-01-01' AS DATE) + INTERVAL ((((1 - ISODOW(CAST('2024-01-01' AS DATE))) + 6) % 7) + 1) DAY AS DATE)", }, ) self.validate_all( "SELECT NEXT_DAY(CAST('2024-01-05' AS DATE), 'Friday')", write={ "snowflake": "SELECT NEXT_DAY(CAST('2024-01-05' AS DATE), 'Friday')", "duckdb": "SELECT CAST(CAST('2024-01-05' AS DATE) + INTERVAL ((((5 - ISODOW(CAST('2024-01-05' AS DATE))) + 6) % 7) + 1) DAY AS DATE)", }, ) self.validate_all( "SELECT NEXT_DAY(CAST('2024-01-05' AS DATE), 'WE')", write={ "snowflake": "SELECT NEXT_DAY(CAST('2024-01-05' AS DATE), 'WE')", "duckdb": "SELECT CAST(CAST('2024-01-05' AS DATE) + INTERVAL ((((3 - ISODOW(CAST('2024-01-05' AS DATE))) + 6) % 7) + 1) DAY AS DATE)", }, ) self.validate_all( "SELECT NEXT_DAY(CAST('2024-01-01 10:30:45' AS TIMESTAMP), 'Friday')", write={ "snowflake": "SELECT NEXT_DAY(CAST('2024-01-01 10:30:45' AS TIMESTAMP), 'Friday')", "duckdb": "SELECT CAST(CAST('2024-01-01 10:30:45' AS TIMESTAMP) + INTERVAL ((((5 - ISODOW(CAST('2024-01-01 10:30:45' AS TIMESTAMP))) + 6) % 7) + 1) DAY AS DATE)", }, ) self.validate_all( "SELECT NEXT_DAY(CAST('2024-01-01' AS DATE), day_column)", write={ "snowflake": "SELECT NEXT_DAY(CAST('2024-01-01' AS DATE), day_column)", "duckdb": "SELECT CAST(CAST('2024-01-01' AS DATE) + INTERVAL ((((CASE WHEN STARTS_WITH(UPPER(day_column), 'MO') THEN 1 WHEN STARTS_WITH(UPPER(day_column), 'TU') THEN 2 WHEN STARTS_WITH(UPPER(day_column), 'WE') THEN 3 WHEN STARTS_WITH(UPPER(day_column), 'TH') THEN 4 WHEN STARTS_WITH(UPPER(day_column), 'FR') THEN 5 WHEN STARTS_WITH(UPPER(day_column), 'SA') THEN 6 WHEN STARTS_WITH(UPPER(day_column), 'SU') THEN 7 END - ISODOW(CAST('2024-01-01' AS DATE))) + 6) % 7) + 1) DAY AS DATE)", }, ) def test_previous_day(self): self.validate_all( "SELECT PREVIOUS_DAY(DATE '2024-01-15', 'Monday')", write={ "duckdb": "SELECT CAST(CAST('2024-01-15' AS DATE) - INTERVAL ((((ISODOW(CAST('2024-01-15' AS DATE)) - 1) + 6) % 7) + 1) DAY AS DATE)", "snowflake": "SELECT PREVIOUS_DAY(CAST('2024-01-15' AS DATE), 'Monday')", }, ) self.validate_all( "SELECT PREVIOUS_DAY(DATE '2024-01-15', 'Fr')", write={ "duckdb": "SELECT CAST(CAST('2024-01-15' AS DATE) - INTERVAL ((((ISODOW(CAST('2024-01-15' AS DATE)) - 5) + 6) % 7) + 1) DAY AS DATE)", "snowflake": "SELECT PREVIOUS_DAY(CAST('2024-01-15' AS DATE), 'Fr')", }, ) self.validate_all( "SELECT PREVIOUS_DAY(TIMESTAMP '2024-01-15 10:30:45', 'Monday')", write={ "duckdb": "SELECT CAST(CAST('2024-01-15 10:30:45' AS TIMESTAMP) - INTERVAL ((((ISODOW(CAST('2024-01-15 10:30:45' AS TIMESTAMP)) - 1) + 6) % 7) + 1) DAY AS DATE)", "snowflake": "SELECT PREVIOUS_DAY(CAST('2024-01-15 10:30:45' AS TIMESTAMP), 'Monday')", }, ) self.validate_all( "SELECT PREVIOUS_DAY(DATE '2024-01-15', day_column)", write={ "duckdb": "SELECT CAST(CAST('2024-01-15' AS DATE) - INTERVAL ((((ISODOW(CAST('2024-01-15' AS DATE)) - CASE WHEN STARTS_WITH(UPPER(day_column), 'MO') THEN 1 WHEN STARTS_WITH(UPPER(day_column), 'TU') THEN 2 WHEN STARTS_WITH(UPPER(day_column), 'WE') THEN 3 WHEN STARTS_WITH(UPPER(day_column), 'TH') THEN 4 WHEN STARTS_WITH(UPPER(day_column), 'FR') THEN 5 WHEN STARTS_WITH(UPPER(day_column), 'SA') THEN 6 WHEN STARTS_WITH(UPPER(day_column), 'SU') THEN 7 END) + 6) % 7) + 1) DAY AS DATE)", "snowflake": "SELECT PREVIOUS_DAY(CAST('2024-01-15' AS DATE), day_column)", }, ) def test_historical_data(self): self.validate_identity("SELECT * FROM my_table AT (STATEMENT => $query_id_var)") self.validate_identity("SELECT * FROM my_table AT (OFFSET => -60 * 5)") self.validate_identity("SELECT * FROM my_table BEFORE (STATEMENT => $query_id_var)") self.validate_identity("SELECT * FROM my_table BEFORE (OFFSET => -60 * 5)") self.validate_identity("CREATE SCHEMA restored_schema CLONE my_schema AT (OFFSET => -3600)") self.validate_identity( "CREATE TABLE restored_table CLONE my_table AT (TIMESTAMP => CAST('Sat, 09 May 2015 01:01:00 +0300' AS TIMESTAMPTZ))", ) self.validate_identity( "CREATE DATABASE restored_db CLONE my_db BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" ) self.validate_identity( "SELECT * FROM my_table AT (TIMESTAMP => TO_TIMESTAMP(1432669154242, 3))" ) self.validate_identity( "SELECT * FROM my_table AT (OFFSET => -60 * 5) AS T WHERE T.flag = 'valid'" ) self.validate_identity( "SELECT * FROM my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" ) self.validate_identity( "SELECT * FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" ) self.validate_identity( "SELECT * FROM my_table AT (TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp)", "SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMP))", ) self.validate_identity( "SELECT * FROM my_table AT(TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp_tz)", "SELECT * FROM my_table AT (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPTZ))", ) self.validate_identity( "SELECT * FROM my_table BEFORE (TIMESTAMP => 'Fri, 01 May 2015 16:20:00 -0700'::timestamp_tz);", "SELECT * FROM my_table BEFORE (TIMESTAMP => CAST('Fri, 01 May 2015 16:20:00 -0700' AS TIMESTAMPTZ))", ) self.validate_identity( """ SELECT oldt.* , newt.* FROM my_table BEFORE(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt FULL OUTER JOIN my_table AT(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt ON oldt.id = newt.id WHERE oldt.id IS NULL OR newt.id IS NULL; """, "SELECT oldt.*, newt.* FROM my_table BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS oldt FULL OUTER JOIN my_table AT (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726') AS newt ON oldt.id = newt.id WHERE oldt.id IS NULL OR newt.id IS NULL", ) # Make sure that the historical data keywords can still be used as aliases for historical_data_prefix in ("AT", "BEFORE", "END", "CHANGES"): for schema_suffix in ("", "(col)"): with self.subTest( f"Testing historical data prefix alias: {historical_data_prefix}{schema_suffix}" ): self.validate_identity( f"SELECT * FROM foo {historical_data_prefix}{schema_suffix}", f"SELECT * FROM foo AS {historical_data_prefix}{schema_suffix}", ) def test_ddl(self): for constraint_prefix in ("WITH ", ""): with self.subTest(f"Constraint prefix: {constraint_prefix}"): self.validate_identity( f"CREATE TABLE t (id INT {constraint_prefix}MASKING POLICY p.q.r)", "CREATE TABLE t (id INT MASKING POLICY p.q.r)", ) self.validate_identity( f"CREATE TABLE t (id INT {constraint_prefix}MASKING POLICY p USING (c1, c2, c3))", "CREATE TABLE t (id INT MASKING POLICY p USING (c1, c2, c3))", ) self.validate_identity( f"CREATE TABLE t (id INT {constraint_prefix}PROJECTION POLICY p.q.r)", "CREATE TABLE t (id INT PROJECTION POLICY p.q.r)", ) self.validate_identity( f"CREATE TABLE t (id INT {constraint_prefix}TAG (key1='value_1', key2='value_2'))", "CREATE TABLE t (id INT TAG (key1='value_1', key2='value_2'))", ) self.validate_identity("CREATE OR REPLACE TABLE foo COPY GRANTS USING TEMPLATE (SELECT 1)") self.validate_identity("USE SECONDARY ROLES ALL") self.validate_identity("USE SECONDARY ROLES NONE") self.validate_identity("USE SECONDARY ROLES a, b, c") self.validate_identity("CREATE SECURE VIEW table1 AS (SELECT a FROM table2)") self.validate_identity("CREATE OR REPLACE VIEW foo (uid) COPY GRANTS AS (SELECT 1)") self.validate_identity("CREATE TABLE geospatial_table (id INT, g GEOGRAPHY)") self.validate_identity("CREATE MATERIALIZED VIEW a COMMENT='...' AS SELECT 1 FROM x") self.validate_identity("CREATE DATABASE mytestdb_clone CLONE mytestdb") self.validate_identity("CREATE SCHEMA mytestschema_clone CLONE testschema") self.validate_identity("CREATE TABLE IDENTIFIER('foo') (COLUMN1 VARCHAR, COLUMN2 VARCHAR)") self.validate_identity("CREATE TABLE IDENTIFIER($foo) (col1 VARCHAR, col2 VARCHAR)") self.validate_identity("CREATE TAG cost_center ALLOWED_VALUES 'a', 'b'") self.validate_identity("CREATE WAREHOUSE x").this.assert_is(exp.Identifier) self.validate_identity("CREATE STREAMLIT x").this.assert_is(exp.Identifier) self.validate_identity( "CREATE TEMPORARY STAGE stage1 FILE_FORMAT=(TYPE=PARQUET)" ).this.assert_is(exp.Table) self.validate_identity( "CREATE STAGE stage1 FILE_FORMAT='format1'", "CREATE STAGE stage1 FILE_FORMAT=(FORMAT_NAME='format1')", ) self.validate_identity("CREATE STAGE stage1 FILE_FORMAT=(FORMAT_NAME=stage1.format1)") self.validate_identity("CREATE STAGE stage1 FILE_FORMAT=(FORMAT_NAME='stage1.format1')") self.validate_identity( "CREATE STAGE stage1 FILE_FORMAT=schema1.format1", "CREATE STAGE stage1 FILE_FORMAT=(FORMAT_NAME=schema1.format1)", ) with self.assertRaises(ParseError): self.parse_one("CREATE STAGE stage1 FILE_FORMAT=123", dialect="snowflake") self.validate_identity( "CREATE STAGE s1 URL='s3://bucket-123' FILE_FORMAT=(TYPE='JSON') CREDENTIALS=(aws_key_id='test' aws_secret_key='test')" ) self.validate_identity( "CREATE OR REPLACE TAG IF NOT EXISTS cost_center COMMENT='cost_center tag'" ).this.assert_is(exp.Identifier) self.validate_identity( "CREATE TEMPORARY FILE FORMAT fileformat1 TYPE=PARQUET COMPRESSION=auto" ).this.assert_is(exp.Table) self.validate_identity( "CREATE DYNAMIC TABLE product (pre_tax_profit, taxes, after_tax_profit) TARGET_LAG='20 minutes' WAREHOUSE=mywh AS SELECT revenue - cost, (revenue - cost) * tax_rate, (revenue - cost) * (1.0 - tax_rate) FROM staging_table" ) self.validate_identity( "ALTER TABLE db_name.schmaName.tblName ADD COLUMN_1 VARCHAR NOT NULL TAG (key1='value_1')" ) self.validate_identity( "DROP FUNCTION my_udf (OBJECT(city VARCHAR, zipcode DECIMAL(38, 0), val ARRAY(BOOLEAN)))" ) self.validate_identity( "CREATE TABLE orders_clone_restore CLONE orders AT (TIMESTAMP => TO_TIMESTAMP_TZ('04/05/2013 01:02:03', 'mm/dd/yyyy hh24:mi:ss'))" ) self.validate_identity( "CREATE TABLE orders_clone_restore CLONE orders BEFORE (STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')" ) self.validate_identity( "CREATE SCHEMA mytestschema_clone_restore CLONE testschema BEFORE (TIMESTAMP => TO_TIMESTAMP(40 * 365 * 86400))" ) self.validate_identity( "CREATE OR REPLACE TABLE EXAMPLE_DB.DEMO.USERS (ID DECIMAL(38, 0) NOT NULL, PRIMARY KEY (ID), FOREIGN KEY (CITY_CODE) REFERENCES EXAMPLE_DB.DEMO.CITIES (CITY_CODE))" ) self.validate_identity( "CREATE ICEBERG TABLE my_iceberg_table (amount ARRAY(INT)) CATALOG='SNOWFLAKE' EXTERNAL_VOLUME='my_external_volume' BASE_LOCATION='my/relative/path/from/extvol'" ) self.validate_identity( """CREATE OR REPLACE FUNCTION ibis_udfs.public.object_values("obj" OBJECT) RETURNS ARRAY LANGUAGE JAVASCRIPT RETURNS NULL ON NULL INPUT AS ' return Object.values(obj) '""" ) self.validate_identity( """CREATE OR REPLACE FUNCTION ibis_udfs.public.object_values("obj" OBJECT) RETURNS ARRAY LANGUAGE JAVASCRIPT STRICT AS ' return Object.values(obj) '""" ) self.validate_identity( "CREATE OR REPLACE TABLE TEST (SOME_REF DECIMAL(38, 0) NOT NULL FOREIGN KEY REFERENCES SOME_OTHER_TABLE (ID))" ) self.validate_identity( "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL(38, 0), val ARRAY(BOOLEAN))) RETURNS VARCHAR AS $$ SELECT 'foo' $$", "CREATE OR REPLACE FUNCTION my_udf(location OBJECT(city VARCHAR, zipcode DECIMAL(38, 0), val ARRAY(BOOLEAN))) RETURNS VARCHAR AS ' SELECT \\'foo\\' '", ) self.validate_identity( "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE(col1 ARRAY(INT)) AS $$ WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t $$", "CREATE OR REPLACE FUNCTION my_udtf(foo BOOLEAN) RETURNS TABLE (col1 ARRAY(INT)) AS ' WITH t AS (SELECT CAST([1, 2, 3] AS ARRAY(INT)) AS c) SELECT c FROM t '", ) self.validate_identity( "CREATE SEQUENCE seq1 WITH START=1, INCREMENT=1 ORDER", "CREATE SEQUENCE seq1 START WITH 1 INCREMENT BY 1 ORDER", ) self.validate_identity( "CREATE SEQUENCE seq1 WITH START=1 INCREMENT=1 ORDER", "CREATE SEQUENCE seq1 START WITH 1 INCREMENT BY 1 ORDER", ) self.validate_identity( """create external table et2( col1 date as (parse_json(metadata$external_table_partition):COL1::date), col2 varchar as (parse_json(metadata$external_table_partition):COL2::varchar), col3 number as (parse_json(metadata$external_table_partition):COL3::number)) partition by (col1,col2,col3) location=@s2/logs/ partition_type = user_specified file_format = (type = parquet compression = gzip binary_as_text = false)""", "CREATE EXTERNAL TABLE et2 (col1 DATE AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL1') AS DATE)), col2 VARCHAR AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL2') AS VARCHAR)), col3 DECIMAL(38, 0) AS (CAST(GET_PATH(PARSE_JSON(metadata$external_table_partition), 'COL3') AS DECIMAL(38, 0)))) PARTITION BY (col1, col2, col3) LOCATION=@s2/logs/ partition_type=user_specified FILE_FORMAT=(type=parquet compression=gzip binary_as_text=FALSE)", ) self.validate_all( "CREATE TABLE orders_clone CLONE orders", read={ "bigquery": "CREATE TABLE orders_clone CLONE orders", }, write={ "bigquery": "CREATE TABLE orders_clone CLONE orders", "snowflake": "CREATE TABLE orders_clone CLONE orders", }, ) self.validate_all( "CREATE OR REPLACE TRANSIENT TABLE a (id INT)", read={ "postgres": "CREATE OR REPLACE TRANSIENT TABLE a (id INT)", "snowflake": "CREATE OR REPLACE TRANSIENT TABLE a (id INT)", }, write={ "postgres": "CREATE OR REPLACE TABLE a (id INT)", "mysql": "CREATE OR REPLACE TABLE a (id INT)", "snowflake": "CREATE OR REPLACE TRANSIENT TABLE a (id INT)", }, ) self.validate_all( "CREATE TABLE a (b INT)", read={"teradata": "CREATE MULTISET TABLE a (b INT)"}, write={"snowflake": "CREATE TABLE a (b INT)"}, ) self.validate_identity("CREATE TABLE a TAG (key1='value_1', key2='value_2')") self.validate_all( "CREATE TABLE a TAG (key1='value_1')", read={ "snowflake": "CREATE TABLE a WITH TAG (key1='value_1')", }, ) for action in ("SET", "DROP"): with self.subTest(f"ALTER COLUMN {action} NOT NULL"): self.validate_all( f"ALTER TABLE a ALTER COLUMN my_column {action} NOT NULL", read={ "snowflake": f"ALTER TABLE a MODIFY COLUMN my_column {action} NOT NULL", }, write={ "snowflake": f"ALTER TABLE a ALTER COLUMN my_column {action} NOT NULL", "duckdb": f"ALTER TABLE a ALTER COLUMN my_column {action} NOT NULL", "postgres": f"ALTER TABLE a ALTER COLUMN my_column {action} NOT NULL", }, ) def test_user_defined_functions(self): self.validate_all( "CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS $$ SELECT 1 $$", write={ "snowflake": "CREATE FUNCTION a(x DATE, y BIGINT) RETURNS ARRAY LANGUAGE JAVASCRIPT AS ' SELECT 1 '", }, ) self.validate_all( "CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'", write={ "snowflake": "CREATE FUNCTION a() RETURNS TABLE (b INT) AS 'SELECT 1'", "bigquery": "CREATE TABLE FUNCTION a() RETURNS TABLE AS SELECT 1", }, ) self.validate_all( "CREATE FUNCTION a() RETURNS INT IMMUTABLE AS 'SELECT 1'", write={ "snowflake": "CREATE FUNCTION a() RETURNS INT IMMUTABLE AS 'SELECT 1'", }, ) def test_stored_procedures(self): self.validate_identity("CALL a.b.c(x, y)", check_command_warning=True) self.validate_identity( "CREATE PROCEDURE a.b.c(x INT, y VARIANT) RETURNS OBJECT EXECUTE AS CALLER AS 'BEGIN SELECT 1; END;'" ) def test_table_function(self): self.validate_identity("SELECT * FROM TABLE('MYTABLE')") self.validate_identity("SELECT * FROM TABLE($MYVAR)") self.validate_identity("SELECT * FROM TABLE(?)") self.validate_identity("SELECT * FROM TABLE(:BINDING)") self.validate_identity("SELECT * FROM TABLE($MYVAR) WHERE COL1 = 10") self.validate_identity("SELECT * FROM TABLE('t1') AS f") self.validate_identity("SELECT * FROM (TABLE('t1') CROSS JOIN TABLE('t2'))") self.validate_identity("SELECT * FROM TABLE('t1'), LATERAL (SELECT * FROM t2)") self.validate_identity("SELECT * FROM TABLE('t1') UNION ALL SELECT * FROM TABLE('t2')") self.validate_identity("SELECT * FROM TABLE('t1') TABLESAMPLE BERNOULLI (20.3)") self.validate_identity("""SELECT * FROM TABLE('MYDB."MYSCHEMA"."MYTABLE"')""") self.validate_identity( 'SELECT * FROM TABLE($$MYDB. "MYSCHEMA"."MYTABLE"$$)', """SELECT * FROM TABLE('MYDB. "MYSCHEMA"."MYTABLE"')""", ) def test_flatten(self): self.assertEqual( exp.select(exp.Explode(this=exp.column("x")).as_("y", quoted=True)).sql( "snowflake", pretty=True ), """SELECT IFF(_u.pos = _u_2.pos_2, _u_2."y", NULL) AS "y" FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, ( GREATEST(ARRAY_SIZE(x)) - 1 ) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => x)) AS _u_2(seq, key, path, pos_2, "y", this) WHERE _u.pos = _u_2.pos_2 OR ( _u.pos > ( ARRAY_SIZE(x) - 1 ) AND _u_2.pos_2 = ( ARRAY_SIZE(x) - 1 ) )""", ) self.validate_all( """ select dag_report.acct_id, dag_report.report_date, dag_report.report_uuid, dag_report.airflow_name, dag_report.dag_id, f.value::varchar as operator from cs.telescope.dag_report, table(flatten(input=>split(operators, ','))) f """, write={ "snowflake": """SELECT dag_report.acct_id, dag_report.report_date, dag_report.report_uuid, dag_report.airflow_name, dag_report.dag_id, CAST(f.value AS VARCHAR) AS operator FROM cs.telescope.dag_report, TABLE(FLATTEN(input => SPLIT(operators, ','))) AS f""" }, pretty=True, ) self.validate_all( """ SELECT uc.user_id, uc.start_ts AS ts, CASE WHEN uc.start_ts::DATE >= '2023-01-01' AND uc.country_code IN ('US') AND uc.user_id NOT IN ( SELECT DISTINCT _id FROM users, LATERAL FLATTEN(INPUT => PARSE_JSON(flags)) datasource WHERE datasource.value:name = 'something' ) THEN 'Sample1' ELSE 'Sample2' END AS entity FROM user_countries AS uc LEFT JOIN ( SELECT user_id, MAX(IFF(service_entity IS NULL,1,0)) AS le_null FROM accepted_user_agreements GROUP BY 1 ) AS aua ON uc.user_id = aua.user_id """, write={ "snowflake": """SELECT uc.user_id, uc.start_ts AS ts, CASE WHEN CAST(uc.start_ts AS DATE) >= '2023-01-01' AND uc.country_code IN ('US') AND uc.user_id <> ALL ( SELECT DISTINCT _id FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS) WHERE GET_PATH(datasource.value, 'name') = 'something' ) THEN 'Sample1' ELSE 'Sample2' END AS entity FROM user_countries AS uc LEFT JOIN ( SELECT user_id, MAX(IFF(service_entity IS NULL, 1, 0)) AS le_null FROM accepted_user_agreements GROUP BY 1 ) AS aua ON uc.user_id = aua.user_id CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, ( GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1 ) + 1))) AS _u(seq, key, path, index, pos, this) CROSS JOIN TABLE(FLATTEN(INPUT => PARSE_JSON(flags))) AS _u_2(seq, key, path, pos_2, entity, this) WHERE _u.pos = _u_2.pos_2 OR ( _u.pos > ( ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1 ) AND _u_2.pos_2 = ( ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1 ) )""", }, pretty=True, ) # All examples from https://docs.snowflake.com/en/sql-reference/functions/flatten.html#syntax self.validate_all( "SELECT * FROM TABLE(FLATTEN(input => parse_json('[1, ,77]'))) f", write={ "snowflake": "SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('[1, ,77]'))) AS f" }, ) self.validate_all( """SELECT * FROM TABLE(FLATTEN(input => parse_json('{"a":1, "b":[77,88]}'), outer => true)) f""", write={ "snowflake": """SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('{"a":1, "b":[77,88]}'), outer => TRUE)) AS f""" }, ) self.validate_all( """SELECT * FROM TABLE(FLATTEN(input => parse_json('{"a":1, "b":[77,88]}'), path => 'b')) f""", write={ "snowflake": """SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('{"a":1, "b":[77,88]}'), path => 'b')) AS f""" }, ) self.validate_all( """SELECT * FROM TABLE(FLATTEN(input => parse_json('[]'))) f""", write={"snowflake": """SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('[]'))) AS f"""}, ) self.validate_all( """SELECT * FROM TABLE(FLATTEN(input => parse_json('[]'), outer => true)) f""", write={ "snowflake": """SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('[]'), outer => TRUE)) AS f""" }, ) self.validate_all( """SELECT * FROM TABLE(FLATTEN(input => parse_json('{"a":1, "b":[77,88], "c": {"d":"X"}}'))) f""", write={ "snowflake": """SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('{"a":1, "b":[77,88], "c": {"d":"X"}}'))) AS f""" }, ) self.validate_all( """SELECT * FROM TABLE(FLATTEN(input => parse_json('{"a":1, "b":[77,88], "c": {"d":"X"}}'), recursive => true)) f""", write={ "snowflake": """SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('{"a":1, "b":[77,88], "c": {"d":"X"}}'), recursive => TRUE)) AS f""" }, ) self.validate_all( """SELECT * FROM TABLE(FLATTEN(input => parse_json('{"a":1, "b":[77,88], "c": {"d":"X"}}'), recursive => true, mode => 'object')) f""", write={ "snowflake": """SELECT * FROM TABLE(FLATTEN(input => PARSE_JSON('{"a":1, "b":[77,88], "c": {"d":"X"}}'), recursive => TRUE, mode => 'object')) AS f""" }, ) self.validate_all( """ SELECT id as "ID", f.value AS "Contact", f1.value:type AS "Type", f1.value:content AS "Details" FROM persons p, lateral flatten(input => p.c, path => 'contact') f, lateral flatten(input => f.value:business) f1 """, write={ "snowflake": """SELECT id AS "ID", f.value AS "Contact", GET_PATH(f1.value, 'type') AS "Type", GET_PATH(f1.value, 'content') AS "Details" FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS f(SEQ, KEY, PATH, INDEX, VALUE, THIS), LATERAL FLATTEN(input => GET_PATH(f.value, 'business')) AS f1(SEQ, KEY, PATH, INDEX, VALUE, THIS)""", }, pretty=True, ) self.validate_all( """ SELECT id as "ID", value AS "Contact" FROM persons p, lateral flatten(input => p.c, path => 'contact') """, write={ "snowflake": """SELECT id AS "ID", value AS "Contact" FROM persons AS p, LATERAL FLATTEN(input => p.c, path => 'contact') AS _flattened(SEQ, KEY, PATH, INDEX, VALUE, THIS)""", }, pretty=True, ) def test_minus(self): self.validate_all( "SELECT 1 EXCEPT SELECT 1", read={ "oracle": "SELECT 1 MINUS SELECT 1", "snowflake": "SELECT 1 MINUS SELECT 1", }, ) def test_values(self): select = exp.select("*").from_("values (map(['a'], [1]))") self.assertEqual(select.sql("snowflake"), "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1))") self.validate_all( 'SELECT "c0", "c1" FROM (VALUES (1, 2), (3, 4)) AS "t0"("c0", "c1")', read={ "spark": "SELECT `c0`, `c1` FROM (VALUES (1, 2), (3, 4)) AS `t0`(`c0`, `c1`)", }, ) self.validate_all( """SELECT $1 AS "_1" FROM VALUES ('a'), ('b')""", write={ "snowflake": """SELECT $1 AS "_1" FROM (VALUES ('a'), ('b'))""", "spark": """SELECT ${1} AS `_1` FROM VALUES ('a'), ('b')""", }, ) self.validate_all( "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x) AS t", read={ "duckdb": "SELECT * FROM (VALUES ({'a': 1})) AS t(x)", }, ) self.validate_all( "SELECT * FROM (SELECT OBJECT_CONSTRUCT('a', 1) AS x UNION ALL SELECT OBJECT_CONSTRUCT('a', 2)) AS t", read={ "duckdb": "SELECT * FROM (VALUES ({'a': 1}), ({'a': 2})) AS t(x)", }, ) def test_describe(self): self.validate_identity("DESCRIBE SEMANTIC VIEW TPCDS_SEMANTIC_VIEW_SM") self.validate_identity( "DESC SEMANTIC VIEW TPCDS_SEMANTIC_VIEW_SM", "DESCRIBE SEMANTIC VIEW TPCDS_SEMANTIC_VIEW_SM", ) self.validate_all( "DESCRIBE TABLE db.table", write={ "snowflake": "DESCRIBE TABLE db.table", "spark": "DESCRIBE db.table", }, ) self.validate_all( "DESCRIBE db.table", write={ "snowflake": "DESCRIBE TABLE db.table", "spark": "DESCRIBE db.table", }, ) self.validate_all( "DESC TABLE db.table", write={ "snowflake": "DESCRIBE TABLE db.table", "spark": "DESCRIBE db.table", }, ) self.validate_all( "DESC VIEW db.table", write={ "snowflake": "DESCRIBE VIEW db.table", "spark": "DESCRIBE db.table", }, ) for kind, object_name, prop_type in ( ("DYNAMIC TABLE", "db.schema.t1", exp.DynamicProperty), ("MATERIALIZED VIEW", "my_view", exp.MaterializedProperty), ("EXTERNAL VOLUME", "vol1", exp.ExternalProperty), ("COMPUTE POOL", "pool1", exp.ComputeProperty), ("MASKING POLICY", "db.schema.pol1", exp.MaskingProperty), ("ROW ACCESS POLICY", "pol1", exp.RowAccessProperty), ("API INTEGRATION", "int1", exp.ApiProperty), ("APPLICATION PACKAGE", "pkg1", exp.ApplicationProperty), ("SECURITY INTEGRATION", "int1", exp.SecurityIntegrationProperty), ("NETWORK RULE", "rule1", exp.NetworkProperty), ("ICEBERG TABLE", "db.schema.t1", exp.IcebergProperty), ("HYBRID TABLE", "t1", exp.HybridProperty), ("CATALOG INTEGRATION", "int1", exp.CatalogProperty), ("DATABASE ROLE", "role1", exp.DatabaseProperty), ): with self.subTest(kind=kind): ast = self.validate_identity(f"DESCRIBE {kind} {object_name}") self.assertEqual(ast.args["kind"], kind.split()[-1]) self.assertIsInstance(ast.find(prop_type), prop_type) self.validate_identity( f"DESC {kind} {object_name}", f"DESCRIBE {kind} {object_name}" ) # Verify keyword tokens used by DESCRIBE work as identifiers and aliases from sqlglot import parser from sqlglot.parsers.snowflake import SnowflakeParser tokens = {t.name.lower() for t in SnowflakeParser.CREATABLES - parser.Parser.CREATABLES} tokens |= {k.lower() for k in SnowflakeParser.DESCRIBE_QUALIFIER_PARSERS} tokens -= {"row"} # ROW is not a valid identifier in Snowflake for token in sorted(tokens): with self.subTest(token=token): self.validate_identity(f"SELECT {token} FROM t") self.validate_identity(f"SELECT 1 AS {token}") cols = ", ".join(f"{t} VARCHAR" for t in sorted(tokens)) self.validate_identity(f"CREATE TABLE t ({cols})") self.validate_all( "ENDSWITH('abc', 'c')", read={ "bigquery": "ENDS_WITH('abc', 'c')", "clickhouse": "endsWith('abc', 'c')", "databricks": "ENDSWITH('abc', 'c')", "duckdb": "ENDS_WITH('abc', 'c')", "presto": "ENDS_WITH('abc', 'c')", "spark": "ENDSWITH('abc', 'c')", }, write={ "bigquery": "ENDS_WITH('abc', 'c')", "clickhouse": "endsWith('abc', 'c')", "databricks": "ENDSWITH('abc', 'c')", "duckdb": "ENDS_WITH('abc', 'c')", "presto": "ENDS_WITH('abc', 'c')", "snowflake": "ENDSWITH('abc', 'c')", "spark": "ENDSWITH('abc', 'c')", }, ) def test_parse_like_any(self): for keyword in ("LIKE", "ILIKE"): ast = self.validate_identity(f"a {keyword} ANY FUN('foo')") ast.sql() # check that this doesn't raise @mock.patch("sqlglot.generator.logger") def test_regexp_functions(self, logger): self.validate_all( "REGEXP_SUBSTR(subject, pattern, pos, occ, params, group)", write={ "bigquery": "REGEXP_EXTRACT(subject, pattern, pos, occ)", "hive": "REGEXP_EXTRACT(subject, pattern, group)", "presto": 'REGEXP_EXTRACT(subject, pattern, "group")', "snowflake": "REGEXP_SUBSTR(subject, pattern, pos, occ, params, group)", "spark": "REGEXP_EXTRACT(subject, pattern, group)", }, ) self.validate_all( "REGEXP_SUBSTR(subject, pattern)", read={ "bigquery": "REGEXP_EXTRACT(subject, pattern)", }, write={ "bigquery": "REGEXP_EXTRACT(subject, pattern)", "snowflake": "REGEXP_SUBSTR(subject, pattern)", }, ) self.validate_all( "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', 1)", read={ "hive": "REGEXP_EXTRACT(subject, pattern)", "spark2": "REGEXP_EXTRACT(subject, pattern)", "spark": "REGEXP_EXTRACT(subject, pattern)", "databricks": "REGEXP_EXTRACT(subject, pattern)", }, write={ "hive": "REGEXP_EXTRACT(subject, pattern)", "spark2": "REGEXP_EXTRACT(subject, pattern)", "spark": "REGEXP_EXTRACT(subject, pattern)", "databricks": "REGEXP_EXTRACT(subject, pattern)", "snowflake": "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', 1)", }, ) self.validate_all( "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', group)", read={ "duckdb": "REGEXP_EXTRACT(subject, pattern, group)", "hive": "REGEXP_EXTRACT(subject, pattern, group)", "presto": "REGEXP_EXTRACT(subject, pattern, group)", "snowflake": "REGEXP_SUBSTR(subject, pattern, 1, 1, 'c', group)", "spark": "REGEXP_EXTRACT(subject, pattern, group)", }, ) self.validate_identity("REGEXP_SUBSTR_ALL(subject, pattern, pos, occ, param, group)") # DuckDB transpilation tests for REGEXP_SUBSTR # DuckDB's default (no group) is semantically equivalent to group=0 self.validate_all( "REGEXP_SUBSTR(subject, pattern)", write={ "duckdb": "REGEXP_EXTRACT(subject, pattern)", "snowflake": "REGEXP_SUBSTR(subject, pattern)", }, ) self.validate_all( "REGEXP_SUBSTR(subject, pattern, 3)", write={ "duckdb": "REGEXP_EXTRACT(NULLIF(SUBSTRING(subject, 3), ''), pattern)", "snowflake": "REGEXP_SUBSTR(subject, pattern, 3)", }, ) self.validate_all( "REGEXP_SUBSTR(subject, pattern, 1, 2)", write={ "duckdb": "ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(subject, pattern), 2)", "snowflake": "REGEXP_SUBSTR(subject, pattern, 1, 2)", }, ) self.validate_all( "REGEXP_SUBSTR(subject, pattern, 1, 1, 'e')", write={ "duckdb": "REGEXP_EXTRACT(subject, pattern)", "snowflake": "REGEXP_SUBSTR(subject, pattern, 1, 1, 'e')", }, ) self.validate_all( "REGEXP_SUBSTR(subject, pattern, 1, 1, 'e', 0)", write={ "duckdb": "REGEXP_EXTRACT(subject, pattern)", "snowflake": "REGEXP_SUBSTR(subject, pattern, 1, 1, 'e')", }, ) self.validate_all( "REGEXP_SUBSTR_ALL(subject, pattern)", write={ "duckdb": "REGEXP_EXTRACT_ALL(subject, pattern)", "snowflake": "REGEXP_SUBSTR_ALL(subject, pattern)", }, ) self.validate_all( "REGEXP_SUBSTR_ALL(subject, pattern, 3)", write={ "duckdb": "REGEXP_EXTRACT_ALL(SUBSTRING(subject, 3), pattern)", "snowflake": "REGEXP_SUBSTR_ALL(subject, pattern, 3)", }, ) self.validate_all( "REGEXP_SUBSTR_ALL(subject, pattern, 1, 2)", write={ "duckdb": "REGEXP_EXTRACT_ALL(subject, pattern)[2:]", "snowflake": "REGEXP_SUBSTR_ALL(subject, pattern, 1, 2)", }, ) self.validate_all( "REGEXP_SUBSTR_ALL(subject, pattern, 1, 1, 'e')", write={ "duckdb": "REGEXP_EXTRACT_ALL(subject, pattern)", "snowflake": "REGEXP_SUBSTR_ALL(subject, pattern, 1, 1, 'e')", }, ) self.validate_all( "REGEXP_SUBSTR_ALL(subject, pattern, 1, 1, 'e', 0)", write={ "duckdb": "REGEXP_EXTRACT_ALL(subject, pattern)", "snowflake": "REGEXP_SUBSTR_ALL(subject, pattern, 1, 1, 'e')", }, ) self.validate_identity("SELECT SEARCH((play, line), 'dream')") self.validate_identity("SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER')") self.validate_identity("SELECT SEARCH(character, 'king queen', SEARCH_MODE => 'AND')") self.validate_identity( "SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER', SEARCH_MODE => 'OR')" ) # AST validation tests - verify argument mapping ast = self.validate_identity("SELECT SEARCH(line, 'king')") search_ast = ast.find(exp.Search) self.assertEqual(list(search_ast.args), ["this", "expression"]) self.assertIsNone(search_ast.args.get("analyzer")) self.assertIsNone(search_ast.args.get("search_mode")) ast = self.validate_identity("SELECT SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER')") search_ast = ast.find(exp.Search) self.assertIsNotNone(search_ast.args.get("analyzer")) self.assertIsNone(search_ast.args.get("search_mode")) ast = self.validate_identity("SELECT SEARCH(character, 'king queen', SEARCH_MODE => 'AND')") search_ast = ast.find(exp.Search) self.assertIsNone(search_ast.args.get("analyzer")) self.assertIsNotNone(search_ast.args.get("search_mode")) # Test with arguments in different order (search_mode first, then analyzer) ast = self.validate_identity( "SELECT SEARCH(line, 'king', SEARCH_MODE => 'AND', ANALYZER => 'PATTERN_ANALYZER')", "SELECT SEARCH(line, 'king', ANALYZER => 'PATTERN_ANALYZER', SEARCH_MODE => 'AND')", ) search_ast = ast.find(exp.Search) self.assertEqual(list(search_ast.args), ["this", "expression", "search_mode", "analyzer"]) analyzer = search_ast.args.get("analyzer") self.assertIsNotNone(analyzer) search_mode = search_ast.args.get("search_mode") self.assertIsNotNone(search_mode) self.validate_identity("SELECT SEARCH_IP(col, '192.168.0.0')").selects[0].assert_is( exp.SearchIp ) self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l ')") self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l', 1)") self.validate_identity("SELECT REGEXP_COUNT('hello world', 'l', 1, 'i')") self.validate_all( "SELECT REGEXP_COUNT('hello', 'l')", write={ "snowflake": "SELECT REGEXP_COUNT('hello', 'l')", "duckdb": "SELECT CASE WHEN 'l' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL('hello', 'l')) END", }, ) self.validate_all( "SELECT REGEXP_COUNT('hello world', 'l', 7)", write={ "snowflake": "SELECT REGEXP_COUNT('hello world', 'l', 7)", "duckdb": "SELECT CASE WHEN 'l' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('hello world', 7), 'l')) END", }, ) self.validate_all( "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')", write={ "snowflake": "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')", "duckdb": "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END", }, ) self.validate_all( "SELECT REGEXP_COUNT(subject, pattern)", write={ "snowflake": "SELECT REGEXP_COUNT(subject, pattern)", "duckdb": "SELECT CASE WHEN pattern = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(subject, pattern)) END", }, ) self.validate_identity("SELECT REGEXP_INSTR('abc', 'a')") self.validate_identity("SELECT REGEXP_INSTR('abc', 'a', 1, 1, 0, 'i')") # Basic transpilation self.validate_all( "SELECT REGEXP_INSTR(subject, pattern)", write={ "snowflake": "SELECT REGEXP_INSTR(subject, pattern)", "duckdb": "SELECT CASE WHEN subject IS NULL OR pattern IS NULL THEN NULL WHEN pattern = '' THEN 0 WHEN LENGTH(REGEXP_EXTRACT_ALL(subject, pattern)) < 1 THEN 0 ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(subject, pattern)[1:1], x -> LENGTH(x))), 0) + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(subject, pattern)[1:1 - 1], x -> LENGTH(x))), 0) + 0 END", }, ) # With position offset self.validate_all( "SELECT REGEXP_INSTR(subject, pattern, 5)", write={ "snowflake": "SELECT REGEXP_INSTR(subject, pattern, 5)", "duckdb": "SELECT CASE WHEN subject IS NULL OR pattern IS NULL OR 5 IS NULL THEN NULL WHEN pattern = '' THEN 0 WHEN LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(subject, 5), pattern)) < 1 THEN 0 ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(SUBSTRING(subject, 5), pattern)[1:1], x -> LENGTH(x))), 0) + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(SUBSTRING(subject, 5), pattern)[1:1 - 1], x -> LENGTH(x))), 0) + 5 - 1 END", }, ) # With occurrence self.validate_all( "SELECT REGEXP_INSTR(subject, pattern, 1, 2)", write={ "snowflake": "SELECT REGEXP_INSTR(subject, pattern, 1, 2)", "duckdb": "SELECT CASE WHEN subject IS NULL OR pattern IS NULL OR 1 IS NULL OR 2 IS NULL THEN NULL WHEN pattern = '' THEN 0 WHEN LENGTH(REGEXP_EXTRACT_ALL(subject, pattern)) < 2 THEN 0 ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(subject, pattern)[1:2], x -> LENGTH(x))), 0) + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(subject, pattern)[1:2 - 1], x -> LENGTH(x))), 0) + 0 END", }, ) # With flags self.validate_all( "SELECT REGEXP_INSTR(subject, pattern, 1, 1, 0, 'im')", write={ "snowflake": "SELECT REGEXP_INSTR(subject, pattern, 1, 1, 0, 'im')", "duckdb": "SELECT CASE WHEN subject IS NULL OR pattern IS NULL OR 1 IS NULL OR 1 IS NULL OR 0 IS NULL OR 'im' IS NULL THEN NULL WHEN '(?im)' || pattern = '' THEN 0 WHEN LENGTH(REGEXP_EXTRACT_ALL(subject, '(?im)' || pattern)) < 1 THEN 0 ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(subject, '(?im)' || pattern)[1:1], x -> LENGTH(x))), 0) + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(subject, '(?im)' || pattern)[1:1 - 1], x -> LENGTH(x))), 0) + 0 END", }, ) @mock.patch("sqlglot.generator.logger") def test_regexp_replace(self, logger): self.validate_all( "REGEXP_REPLACE(subject, pattern)", write={ "bigquery": "REGEXP_REPLACE(subject, pattern, '')", "duckdb": "REGEXP_REPLACE(subject, pattern, '', 'g')", "hive": "REGEXP_REPLACE(subject, pattern, '')", "snowflake": "REGEXP_REPLACE(subject, pattern, '')", "spark": "REGEXP_REPLACE(subject, pattern, '')", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement)", read={ "bigquery": "REGEXP_REPLACE(subject, pattern, replacement)", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement)", "hive": "REGEXP_REPLACE(subject, pattern, replacement)", "spark": "REGEXP_REPLACE(subject, pattern, replacement)", }, write={ "bigquery": "REGEXP_REPLACE(subject, pattern, replacement)", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement, 'g')", "postgres": "REGEXP_REPLACE(subject, pattern, replacement, 'g')", "hive": "REGEXP_REPLACE(subject, pattern, replacement)", "snowflake": "REGEXP_REPLACE(subject, pattern, replacement)", "spark": "REGEXP_REPLACE(subject, pattern, replacement)", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, position)", read={ "spark": "REGEXP_REPLACE(subject, pattern, replacement, position)", }, write={ "bigquery": "REGEXP_REPLACE(subject, pattern, replacement)", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement, 'g')", "postgres": "REGEXP_REPLACE(subject, pattern, replacement, position, 'g')", "hive": "REGEXP_REPLACE(subject, pattern, replacement)", "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, position)", "spark": "REGEXP_REPLACE(subject, pattern, replacement, position)", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence, 'c')", write={ "bigquery": "REGEXP_REPLACE(subject, pattern, replacement)", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement, 'c')", "postgres": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence, 'c')", "hive": "REGEXP_REPLACE(subject, pattern, replacement)", "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, position, occurrence, 'c')", "spark": "REGEXP_REPLACE(subject, pattern, replacement, position)", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, 1, 0, 'c')", write={ "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, 1, 0, 'c')", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement, 'cg')", "postgres": "REGEXP_REPLACE(subject, pattern, replacement, 1, 0, 'cg')", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, 1, 1)", write={ "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, 1, 1)", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement)", "postgres": "REGEXP_REPLACE(subject, pattern, replacement, 1, 1)", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, 3, 0)", write={ "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, 3, 0)", "duckdb": "SUBSTRING(subject, 1, 2) || REGEXP_REPLACE(SUBSTRING(subject, 3), pattern, replacement, 'g')", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, 3, 1)", write={ "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, 3, 1)", "duckdb": "SUBSTRING(subject, 1, 2) || REGEXP_REPLACE(SUBSTRING(subject, 3), pattern, replacement)", }, ) self.validate_all( "REGEXP_REPLACE(subject, pattern, replacement, 1, 0, 'i')", write={ "snowflake": "REGEXP_REPLACE(subject, pattern, replacement, 1, 0, 'i')", "duckdb": "REGEXP_REPLACE(subject, pattern, replacement, 'ig')", }, ) def test_replace(self): self.validate_all( "REPLACE(subject, pattern)", write={ "bigquery": "REPLACE(subject, pattern, '')", "duckdb": "REPLACE(subject, pattern, '')", "hive": "REPLACE(subject, pattern, '')", "snowflake": "REPLACE(subject, pattern, '')", "spark": "REPLACE(subject, pattern, '')", }, ) self.validate_all( "REPLACE(subject, pattern, replacement)", read={ "bigquery": "REPLACE(subject, pattern, replacement)", "duckdb": "REPLACE(subject, pattern, replacement)", "hive": "REPLACE(subject, pattern, replacement)", "spark": "REPLACE(subject, pattern, replacement)", }, write={ "bigquery": "REPLACE(subject, pattern, replacement)", "duckdb": "REPLACE(subject, pattern, replacement)", "hive": "REPLACE(subject, pattern, replacement)", "snowflake": "REPLACE(subject, pattern, replacement)", "spark": "REPLACE(subject, pattern, replacement)", }, ) def test_match_recognize(self): for window_frame in ("", "FINAL ", "RUNNING "): for row in ( "ONE ROW PER MATCH", "ALL ROWS PER MATCH", "ALL ROWS PER MATCH SHOW EMPTY MATCHES", "ALL ROWS PER MATCH OMIT EMPTY MATCHES", "ALL ROWS PER MATCH WITH UNMATCHED ROWS", ): for after in ( "AFTER MATCH SKIP", "AFTER MATCH SKIP PAST LAST ROW", "AFTER MATCH SKIP TO NEXT ROW", "AFTER MATCH SKIP TO FIRST x", "AFTER MATCH SKIP TO LAST x", ): with self.subTest( f"MATCH_RECOGNIZE with window frame {window_frame}, rows {row}, after {after}: " ): self.validate_identity( f"""SELECT * FROM x MATCH_RECOGNIZE ( PARTITION BY a, b ORDER BY x DESC MEASURES {window_frame}y AS b {row} {after} PATTERN (^ S1 S2*? ( {{- S3 -}} S4 )+ | PERMUTE(S1, S2){{1,2}} $) DEFINE x AS y )""", pretty=True, ) def test_show_users(self): self.validate_identity("SHOW USERS") self.validate_identity("SHOW TERSE USERS") self.validate_identity("SHOW USERS LIKE '_foo%' STARTS WITH 'bar' LIMIT 5 FROM 'baz'") def test_show_databases(self): self.validate_identity("SHOW TERSE DATABASES") self.validate_identity( "SHOW TERSE DATABASES HISTORY LIKE 'foo' STARTS WITH 'bla' LIMIT 5 FROM 'bob' WITH PRIVILEGES USAGE, MODIFY" ) ast = parse_one("SHOW DATABASES IN ACCOUNT", read="snowflake") self.assertEqual(ast.this, "DATABASES") self.assertEqual(ast.args.get("scope_kind"), "ACCOUNT") def test_show_file_formats(self): self.validate_identity("SHOW FILE FORMATS") self.validate_identity("SHOW FILE FORMATS LIKE 'foo' IN DATABASE db1") self.validate_identity("SHOW FILE FORMATS LIKE 'foo' IN SCHEMA db1.schema1") ast = parse_one("SHOW FILE FORMATS IN ACCOUNT", read="snowflake") self.assertEqual(ast.this, "FILE FORMATS") self.assertEqual(ast.args.get("scope_kind"), "ACCOUNT") def test_show_functions(self): self.validate_identity("SHOW FUNCTIONS") self.validate_identity("SHOW FUNCTIONS LIKE 'foo' IN CLASS bla") ast = parse_one("SHOW FUNCTIONS IN ACCOUNT", read="snowflake") self.assertEqual(ast.this, "FUNCTIONS") self.assertEqual(ast.args.get("scope_kind"), "ACCOUNT") def test_show_procedures(self): self.validate_identity("SHOW PROCEDURES") self.validate_identity("SHOW PROCEDURES LIKE 'foo' IN APPLICATION app") self.validate_identity("SHOW PROCEDURES LIKE 'foo' IN APPLICATION PACKAGE pkg") ast = parse_one("SHOW PROCEDURES IN ACCOUNT", read="snowflake") self.assertEqual(ast.this, "PROCEDURES") self.assertEqual(ast.args.get("scope_kind"), "ACCOUNT") def test_show_stages(self): self.validate_identity("SHOW STAGES") self.validate_identity("SHOW STAGES LIKE 'foo' IN DATABASE db1") self.validate_identity("SHOW STAGES LIKE 'foo' IN SCHEMA db1.schema1") ast = parse_one("SHOW STAGES IN ACCOUNT", read="snowflake") self.assertEqual(ast.this, "STAGES") self.assertEqual(ast.args.get("scope_kind"), "ACCOUNT") def test_show_warehouses(self): self.validate_identity("SHOW WAREHOUSES") self.validate_identity("SHOW WAREHOUSES LIKE 'foo' WITH PRIVILEGES USAGE, MODIFY") ast = parse_one("SHOW WAREHOUSES", read="snowflake") self.assertEqual(ast.this, "WAREHOUSES") def test_show_schemas(self): self.validate_identity( "show terse schemas in database db1 starts with 'a' limit 10 from 'b'", "SHOW TERSE SCHEMAS IN DATABASE db1 STARTS WITH 'a' LIMIT 10 FROM 'b'", ) ast = parse_one("SHOW SCHEMAS IN DATABASE db1", read="snowflake") self.assertEqual(ast.args.get("scope_kind"), "DATABASE") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), "db1") def test_show_objects(self): self.validate_identity( "show terse objects in schema db1.schema1 starts with 'a' limit 10 from 'b'", "SHOW TERSE OBJECTS IN SCHEMA db1.schema1 STARTS WITH 'a' LIMIT 10 FROM 'b'", ) self.validate_identity( "show terse objects in db1.schema1 starts with 'a' limit 10 from 'b'", "SHOW TERSE OBJECTS IN SCHEMA db1.schema1 STARTS WITH 'a' LIMIT 10 FROM 'b'", ) ast = parse_one("SHOW OBJECTS IN db1.schema1", read="snowflake") self.assertEqual(ast.args.get("scope_kind"), "SCHEMA") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), "db1.schema1") def test_show_columns(self): self.validate_identity("SHOW COLUMNS") self.validate_identity("SHOW COLUMNS IN TABLE dt_test") self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN TABLE dt_test") self.validate_identity("SHOW COLUMNS IN VIEW") self.validate_identity("SHOW COLUMNS LIKE '_foo%' IN VIEW dt_test") ast = parse_one("SHOW COLUMNS LIKE '_testing%' IN dt_test", read="snowflake") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), "dt_test") self.assertEqual(ast.find(exp.Literal).sql(dialect="snowflake"), "'_testing%'") def test_show_tables(self): self.validate_identity( "SHOW TABLES LIKE 'line%' IN tpch.public", "SHOW TABLES LIKE 'line%' IN SCHEMA tpch.public", ) self.validate_identity( "SHOW TABLES HISTORY IN tpch.public", "SHOW TABLES HISTORY IN SCHEMA tpch.public", ) self.validate_identity( "show terse tables in schema db1.schema1 starts with 'a' limit 10 from 'b'", "SHOW TERSE TABLES IN SCHEMA db1.schema1 STARTS WITH 'a' LIMIT 10 FROM 'b'", ) self.validate_identity( "show terse tables in db1.schema1 starts with 'a' limit 10 from 'b'", "SHOW TERSE TABLES IN SCHEMA db1.schema1 STARTS WITH 'a' LIMIT 10 FROM 'b'", ) ast = parse_one("SHOW TABLES IN db1.schema1", read="snowflake") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), "db1.schema1") def test_show_primary_keys(self): self.validate_identity("SHOW PRIMARY KEYS") self.validate_identity("SHOW PRIMARY KEYS IN ACCOUNT") self.validate_identity("SHOW PRIMARY KEYS IN DATABASE") self.validate_identity("SHOW PRIMARY KEYS IN DATABASE foo") self.validate_identity("SHOW PRIMARY KEYS IN TABLE") self.validate_identity("SHOW PRIMARY KEYS IN TABLE foo") self.validate_identity( 'SHOW PRIMARY KEYS IN "TEST"."PUBLIC"."foo"', 'SHOW PRIMARY KEYS IN TABLE "TEST"."PUBLIC"."foo"', ) self.validate_identity( 'SHOW TERSE PRIMARY KEYS IN "TEST"."PUBLIC"."foo"', 'SHOW PRIMARY KEYS IN TABLE "TEST"."PUBLIC"."foo"', ) ast = parse_one('SHOW PRIMARY KEYS IN "TEST"."PUBLIC"."foo"', read="snowflake") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), '"TEST"."PUBLIC"."foo"') def test_show_views(self): self.validate_identity("SHOW TERSE VIEWS") self.validate_identity("SHOW VIEWS") self.validate_identity("SHOW VIEWS LIKE 'foo%'") self.validate_identity("SHOW VIEWS IN ACCOUNT") self.validate_identity("SHOW VIEWS IN DATABASE") self.validate_identity("SHOW VIEWS IN DATABASE foo") self.validate_identity("SHOW VIEWS IN SCHEMA foo") self.validate_identity( "SHOW VIEWS IN foo", "SHOW VIEWS IN SCHEMA foo", ) ast = parse_one("SHOW VIEWS IN db1.schema1", read="snowflake") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), "db1.schema1") def test_show_unique_keys(self): self.validate_identity("SHOW UNIQUE KEYS") self.validate_identity("SHOW UNIQUE KEYS IN ACCOUNT") self.validate_identity("SHOW UNIQUE KEYS IN DATABASE") self.validate_identity("SHOW UNIQUE KEYS IN DATABASE foo") self.validate_identity("SHOW UNIQUE KEYS IN TABLE") self.validate_identity("SHOW UNIQUE KEYS IN TABLE foo") self.validate_identity( 'SHOW UNIQUE KEYS IN "TEST"."PUBLIC"."foo"', 'SHOW UNIQUE KEYS IN SCHEMA "TEST"."PUBLIC"."foo"', ) self.validate_identity( 'SHOW TERSE UNIQUE KEYS IN "TEST"."PUBLIC"."foo"', 'SHOW UNIQUE KEYS IN SCHEMA "TEST"."PUBLIC"."foo"', ) ast = parse_one('SHOW UNIQUE KEYS IN "TEST"."PUBLIC"."foo"', read="snowflake") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), '"TEST"."PUBLIC"."foo"') def test_show_imported_keys(self): self.validate_identity("SHOW IMPORTED KEYS") self.validate_identity("SHOW IMPORTED KEYS IN ACCOUNT") self.validate_identity("SHOW IMPORTED KEYS IN DATABASE") self.validate_identity("SHOW IMPORTED KEYS IN DATABASE foo") self.validate_identity("SHOW IMPORTED KEYS IN TABLE") self.validate_identity("SHOW IMPORTED KEYS IN TABLE foo") self.validate_identity( 'SHOW IMPORTED KEYS IN "TEST"."PUBLIC"."foo"', 'SHOW IMPORTED KEYS IN SCHEMA "TEST"."PUBLIC"."foo"', ) self.validate_identity( 'SHOW TERSE IMPORTED KEYS IN "TEST"."PUBLIC"."foo"', 'SHOW IMPORTED KEYS IN SCHEMA "TEST"."PUBLIC"."foo"', ) ast = parse_one('SHOW IMPORTED KEYS IN "TEST"."PUBLIC"."foo"', read="snowflake") self.assertEqual(ast.find(exp.Table).sql(dialect="snowflake"), '"TEST"."PUBLIC"."foo"') def test_show_sequences(self): self.validate_identity("SHOW TERSE SEQUENCES") self.validate_identity("SHOW SEQUENCES") self.validate_identity("SHOW SEQUENCES LIKE '_foo%' IN ACCOUNT") self.validate_identity("SHOW SEQUENCES LIKE '_foo%' IN DATABASE") self.validate_identity("SHOW SEQUENCES LIKE '_foo%' IN DATABASE foo") self.validate_identity("SHOW SEQUENCES LIKE '_foo%' IN SCHEMA") self.validate_identity("SHOW SEQUENCES LIKE '_foo%' IN SCHEMA foo") self.validate_identity( "SHOW SEQUENCES LIKE '_foo%' IN foo", "SHOW SEQUENCES LIKE '_foo%' IN SCHEMA foo", ) ast = parse_one("SHOW SEQUENCES IN dt_test", read="snowflake") self.assertEqual(ast.args.get("scope_kind"), "SCHEMA") def test_storage_integration(self): self.validate_identity( """CREATE STORAGE INTEGRATION s3_int TYPE=EXTERNAL_STAGE STORAGE_PROVIDER='S3' STORAGE_AWS_ROLE_ARN='arn:aws:iam::001234567890:role/myrole' ENABLED=TRUE STORAGE_ALLOWED_LOCATIONS=('s3://mybucket1/path1/', 's3://mybucket2/path2/')""", pretty=True, ).this.assert_is(exp.Identifier) def test_swap(self): ast = parse_one("ALTER TABLE a SWAP WITH b", read="snowflake") assert isinstance(ast, exp.Alter) assert isinstance(ast.args["actions"][0], exp.SwapTable) def test_try_cast(self): self.validate_all("TRY_CAST('foo' AS VARCHAR)", read={"hive": "CAST('foo' AS STRING)"}) self.validate_all("CAST(5 + 5 AS VARCHAR)", read={"hive": "CAST(5 + 5 AS STRING)"}) self.validate_all( "CAST(TRY_CAST('2020-01-01' AS DATE) AS VARCHAR)", read={ "hive": "CAST(CAST('2020-01-01' AS DATE) AS STRING)", "snowflake": "CAST(TRY_CAST('2020-01-01' AS DATE) AS VARCHAR)", }, ) self.validate_all( "TRY_CAST('val' AS VARCHAR)", read={ "hive": "CAST('val' AS STRING)", "snowflake": "TRY_CAST('val' AS VARCHAR)", }, ) self.validate_identity("SELECT TRY_CAST(x AS DOUBLE)") self.validate_identity( "SELECT TRY_CAST(FOO() AS TEXT)", "SELECT TRY_CAST(FOO() AS VARCHAR)" ) expression = parse_one("SELECT CAST(t.x AS STRING) FROM t", read="hive") for value_type in ("string", "int"): with self.subTest( f"Testing Hive -> Snowflake CAST/TRY_CAST conversion for {value_type}" ): func = "TRY_CAST" if value_type == "string" else "CAST" expression = annotate_types(expression, schema={"t": {"x": value_type}}) self.assertEqual( expression.sql(dialect="snowflake"), f"SELECT {func}(t.x AS VARCHAR) FROM t" ) def test_decfloat(self): self.validate_all( "SELECT CAST(1.5 AS DECFLOAT)", write={ "snowflake": "SELECT CAST(1.5 AS DECFLOAT)", "duckdb": "SELECT CAST(1.5 AS DECIMAL(38, 5))", }, ) self.validate_all( "CREATE TABLE t (x DECFLOAT)", write={ "snowflake": "CREATE TABLE t (x DECFLOAT)", "duckdb": "CREATE TABLE t (x DECIMAL(38, 5))", }, ) def test_copy(self): self.validate_identity("COPY INTO test (c1) FROM (SELECT $1.c1 FROM @mystage)") self.validate_identity( """COPY INTO temp FROM @random_stage/path/ FILE_FORMAT = (TYPE=CSV FIELD_DELIMITER='|' NULL_IF=('str1', 'str2') FIELD_OPTIONALLY_ENCLOSED_BY='"' TIMESTAMP_FORMAT='TZHTZM YYYY-MM-DD HH24:MI:SS.FF9' DATE_FORMAT='TZHTZM YYYY-MM-DD HH24:MI:SS.FF9' BINARY_FORMAT=BASE64) VALIDATION_MODE = 'RETURN_3_ROWS'""" ) self.validate_identity( """COPY INTO load1 FROM @%load1/data1/ CREDENTIALS = (AWS_KEY_ID='id' AWS_SECRET_KEY='key' AWS_TOKEN='token') FILES = ('test1.csv', 'test2.csv') FORCE = TRUE""" ) self.validate_identity( """COPY INTO mytable FROM 'azure://myaccount.blob.core.windows.net/mycontainer/data/files' CREDENTIALS = (AZURE_SAS_TOKEN='token') ENCRYPTION = (TYPE='AZURE_CSE' MASTER_KEY='kPx...') FILE_FORMAT = (FORMAT_NAME=my_csv_format)""" ) self.validate_identity( """COPY INTO mytable (col1, col2) FROM 's3://mybucket/data/files' STORAGE_INTEGRATION = "storage" ENCRYPTION = (TYPE='NONE' MASTER_KEY='key') FILES = ('file1', 'file2') PATTERN = 'pattern' FILE_FORMAT = (FORMAT_NAME=my_csv_format NULL_IF=('')) PARSE_HEADER = TRUE""" ) self.validate_identity( """COPY INTO @my_stage/result/data FROM (SELECT * FROM orderstiny) FILE_FORMAT = (TYPE='csv')""" ) self.validate_identity("COPY INTO mytable FILE_FORMAT = (TYPE='csv')") self.validate_identity( """COPY INTO MY_DATABASE.MY_SCHEMA.MY_TABLE FROM @MY_DATABASE.MY_SCHEMA.MY_STAGE/my_path FILE_FORMAT = (FORMAT_NAME=MY_DATABASE.MY_SCHEMA.MY_FILE_FORMAT)""" ) self.validate_all( """COPY INTO 's3://example/data.csv' FROM EXTRA.EXAMPLE.TABLE CREDENTIALS = () FILE_FORMAT = (TYPE = CSV COMPRESSION = NONE NULL_IF = ('') FIELD_OPTIONALLY_ENCLOSED_BY = '"') HEADER = TRUE OVERWRITE = TRUE SINGLE = TRUE """, write={ "": """COPY INTO 's3://example/data.csv' FROM EXTRA.EXAMPLE.TABLE CREDENTIALS = () WITH ( FILE_FORMAT = (TYPE=CSV COMPRESSION=NONE NULL_IF=( '' ) FIELD_OPTIONALLY_ENCLOSED_BY='"'), HEADER TRUE, OVERWRITE TRUE, SINGLE TRUE )""", "snowflake": """COPY INTO 's3://example/data.csv' FROM EXTRA.EXAMPLE.TABLE CREDENTIALS = () FILE_FORMAT = (TYPE=CSV COMPRESSION=NONE NULL_IF=( '' ) FIELD_OPTIONALLY_ENCLOSED_BY='"') HEADER = TRUE OVERWRITE = TRUE SINGLE = TRUE""", }, pretty=True, ) self.validate_all( """COPY INTO 's3://example/data.csv' FROM EXTRA.EXAMPLE.TABLE STORAGE_INTEGRATION = S3_INTEGRATION FILE_FORMAT = (TYPE=CSV COMPRESSION=NONE NULL_IF=('') FIELD_OPTIONALLY_ENCLOSED_BY='"') HEADER = TRUE OVERWRITE = TRUE SINGLE = TRUE """, write={ "": """COPY INTO 's3://example/data.csv' FROM EXTRA.EXAMPLE.TABLE STORAGE_INTEGRATION = S3_INTEGRATION WITH (FILE_FORMAT = (TYPE=CSV COMPRESSION=NONE NULL_IF=('') FIELD_OPTIONALLY_ENCLOSED_BY='"'), HEADER TRUE, OVERWRITE TRUE, SINGLE TRUE)""", "snowflake": """COPY INTO 's3://example/data.csv' FROM EXTRA.EXAMPLE.TABLE STORAGE_INTEGRATION = S3_INTEGRATION FILE_FORMAT = (TYPE=CSV COMPRESSION=NONE NULL_IF=('') FIELD_OPTIONALLY_ENCLOSED_BY='"') HEADER = TRUE OVERWRITE = TRUE SINGLE = TRUE""", }, ) copy_ast = parse_one( """COPY INTO 's3://example/contacts.csv' FROM db.tbl STORAGE_INTEGRATION = PROD_S3_SIDETRADE_INTEGRATION FILE_FORMAT = (FORMAT_NAME=my_csv_format TYPE=CSV COMPRESSION=NONE NULL_IF=('') FIELD_OPTIONALLY_ENCLOSED_BY='"') MATCH_BY_COLUMN_NAME = CASE_SENSITIVE OVERWRITE = TRUE SINGLE = TRUE INCLUDE_METADATA = (col1 = METADATA$START_SCAN_TIME)""", read="snowflake", ) self.assertEqual( quote_identifiers(copy_ast, dialect="snowflake").sql(dialect="snowflake"), """COPY INTO 's3://example/contacts.csv' FROM "db"."tbl" STORAGE_INTEGRATION = "PROD_S3_SIDETRADE_INTEGRATION" FILE_FORMAT = (FORMAT_NAME="my_csv_format" TYPE=CSV COMPRESSION=NONE NULL_IF=('') FIELD_OPTIONALLY_ENCLOSED_BY='"') MATCH_BY_COLUMN_NAME = CASE_SENSITIVE OVERWRITE = TRUE SINGLE = TRUE INCLUDE_METADATA = ("col1" = "METADATA$START_SCAN_TIME")""", ) def test_put_to_stage(self): self.validate_identity('PUT \'file:///dir/tmp.csv\' @"my_DB"."schEMA1"."MYstage"') # PUT with file path and stage ref containing spaces (wrapped in single quotes) ast = parse_one("PUT 'file://my file.txt' '@s1/my folder'", read="snowflake") self.assertIsInstance(ast, exp.Put) self.assertEqual(ast.this, exp.Literal(this="file://my file.txt", is_string=True)) self.assertEqual(ast.args["target"], exp.Var(this="'@s1/my folder'")) self.assertEqual(ast.sql("snowflake"), "PUT 'file://my file.txt' '@s1/my folder'") # expression with additional properties ast = parse_one( "PUT 'file:///tmp/my.txt' @stage1/folder PARALLEL = 1 AUTO_COMPRESS=false source_compression=gzip OVERWRITE=TRUE", read="snowflake", ) self.assertIsInstance(ast, exp.Put) self.assertEqual(ast.this, exp.Literal(this="file:///tmp/my.txt", is_string=True)) self.assertEqual(ast.args["target"], exp.Var(this="@stage1/folder")) properties = ast.args.get("properties") props_dict = {prop.this.this: prop.args["value"].this for prop in properties.expressions} self.assertEqual( props_dict, { "PARALLEL": "1", "AUTO_COMPRESS": False, "source_compression": "gzip", "OVERWRITE": True, }, ) # validate identity for different args and properties self.validate_identity("PUT 'file:///dir/tmp.csv' @s1/test") # the unquoted URI variant is not fully supported yet self.validate_identity("PUT file:///dir/tmp.csv @%table", check_command_warning=True) self.validate_identity( "PUT file:///dir/tmp.csv @s1/test PARALLEL=1 AUTO_COMPRESS=FALSE source_compression=gzip OVERWRITE=TRUE", check_command_warning=True, ) def test_get_from_stage(self): self.validate_identity('GET @"my_DB"."schEMA1"."MYstage" \'file:///dir/tmp.csv\'') self.validate_identity("GET @s1/test 'file:///dir/tmp.csv'").assert_is(exp.Get) # GET with file path and stage ref containing spaces (wrapped in single quotes) ast = parse_one("GET '@s1/my folder' 'file://my file.txt'", read="snowflake") self.assertIsInstance(ast, exp.Get) self.assertEqual(ast.args["target"], exp.Var(this="'@s1/my folder'")) self.assertEqual(ast.this, exp.Literal(this="file://my file.txt", is_string=True)) self.assertEqual(ast.sql("snowflake"), "GET '@s1/my folder' 'file://my file.txt'") # expression with additional properties ast = parse_one("GET @stage1/folder 'file:///tmp/my.txt' PARALLEL = 1", read="snowflake") self.assertIsInstance(ast, exp.Get) self.assertEqual(ast.args["target"], exp.Var(this="@stage1/folder")) self.assertEqual(ast.this, exp.Literal(this="file:///tmp/my.txt", is_string=True)) properties = ast.args.get("properties") props_dict = {prop.this.this: prop.args["value"].this for prop in properties.expressions} self.assertEqual(props_dict, {"PARALLEL": "1"}) # the unquoted URI variant is not fully supported yet self.validate_identity("GET @%table file:///dir/tmp.csv", check_command_warning=True) self.validate_identity( "GET @s1/test file:///dir/tmp.csv PARALLEL=1", check_command_warning=True, ) def test_querying_semi_structured_data(self): self.validate_identity("SELECT $1") self.validate_identity("SELECT $1.elem") self.validate_identity("SELECT $1:a.b", "SELECT GET_PATH($1, 'a.b')") self.validate_identity("SELECT t.$23:a.b", "SELECT GET_PATH(t.$23, 'a.b')") self.validate_identity("SELECT t.$17:a[0].b[0].c", "SELECT GET_PATH(t.$17, 'a[0].b[0].c')") self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"a": [1, 2]}') AS v), s AS (SELECT 1 AS x) SELECT t.v:a[s.x] FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"a": [1, 2]}') AS v), s AS (SELECT 1 AS x) SELECT GET_PATH(t.v, 'a')[s.x] FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"a": [1, 2]}') AS v), s AS (SELECT 1 AS x) SELECT (t.v -> '$.a')[s.x] FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": 1}]}') AS v), s AS (SELECT 0 AS x) SELECT t.v:c[s.x]:r FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": 1}]}') AS v), s AS (SELECT 0 AS x) SELECT GET_PATH(GET_PATH(t.v, 'c')[s.x], 'r') FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"c": [{"r": 1}]}') AS v), s AS (SELECT 0 AS x) SELECT (t.v -> '$.c')[s.x] -> '$.r' FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": {"d": 1}}]}') AS v), s AS (SELECT 0 AS x) SELECT t.v:c[s.x]:r:d::varchar FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": {"d": 1}}]}') AS v), s AS (SELECT 0 AS x) SELECT CAST(GET_PATH(GET_PATH(t.v, 'c')[s.x], 'r.d') AS VARCHAR) FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"c": [{"r": {"d": 1}}]}') AS v), s AS (SELECT 0 AS x) SELECT CAST((t.v -> '$.c')[s.x] -> '$.r.d' AS TEXT) FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"a": {"b": [1, 2]}}') AS v), s AS (SELECT 1 AS x) SELECT t.v:a:b[s.x] FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"a": {"b": [1, 2]}}') AS v), s AS (SELECT 1 AS x) SELECT GET_PATH(t.v, 'a.b')[s.x] FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"a": {"b": [1, 2]}}') AS v), s AS (SELECT 1 AS x) SELECT (t.v -> '$.a.b')[s.x] FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": 1}]}') AS v), s AS (SELECT 0 AS x) SELECT t.v:c[s.x].r FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": 1}]}') AS v), s AS (SELECT 0 AS x) SELECT GET_PATH(GET_PATH(t.v, 'c')[s.x], 'r') FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"c": [{"r": 1}]}') AS v), s AS (SELECT 0 AS x) SELECT (t.v -> '$.c')[s.x] -> '$.r' FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": {"d": 1}}]}') AS v), s AS (SELECT 0 AS x) SELECT t.v:c[s.x].r.d FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": {"d": 1}}]}') AS v), s AS (SELECT 0 AS x) SELECT GET_PATH(GET_PATH(t.v, 'c')[s.x], 'r.d') FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"c": [{"r": {"d": 1}}]}') AS v), s AS (SELECT 0 AS x) SELECT (t.v -> '$.c')[s.x] -> '$.r.d' FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": {"d": {"e": 1}}}]}') AS v), s AS (SELECT 0 AS x) SELECT t.v:c[s.x].r.d.e FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"c": [{"r": {"d": {"e": 1}}}]}') AS v), s AS (SELECT 0 AS x) SELECT GET_PATH(GET_PATH(t.v, 'c')[s.x], 'r.d.e') FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"c": [{"r": {"d": {"e": 1}}}]}') AS v), s AS (SELECT 0 AS x) SELECT (t.v -> '$.c')[s.x] -> '$.r.d.e' FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"a": {"b": [{"r": {"d": 1}}]}}') AS v), s AS (SELECT 0 AS x) SELECT t.v:a.b[s.x].r.d FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"a": {"b": [{"r": {"d": 1}}]}}') AS v), s AS (SELECT 0 AS x) SELECT GET_PATH(GET_PATH(t.v, 'a.b')[s.x], 'r.d') FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"a": {"b": [{"r": {"d": 1}}]}}') AS v), s AS (SELECT 0 AS x) SELECT (t.v -> '$.a.b')[s.x] -> '$.r.d' FROM t, s""", }, ) self.validate_all( """WITH t AS (SELECT PARSE_JSON('{"a": {"b": [{"r": {"d": [10, 20, 30]}}]}}') AS v), s AS (SELECT 0 AS x, 2 AS y) SELECT t.v:a.b[s.x].r.d[s.y] FROM t, s""", write={ "snowflake": """WITH t AS (SELECT PARSE_JSON('{"a": {"b": [{"r": {"d": [10, 20, 30]}}]}}') AS v), s AS (SELECT 0 AS x, 2 AS y) SELECT GET_PATH(GET_PATH(t.v, 'a.b')[s.x], 'r.d')[s.y] FROM t, s""", "duckdb": """WITH t AS (SELECT JSON('{"a": {"b": [{"r": {"d": [10, 20, 30]}}]}}') AS v), s AS (SELECT 0 AS x, 2 AS y) SELECT ((t.v -> '$.a.b')[s.x] -> '$.r.d')[s.y] FROM t, s""", }, ) self.validate_all( """ SELECT col:"customer's department" """, write={ "snowflake": """SELECT GET_PATH(col, '["customer\\'s department"]')""", "postgres": "SELECT JSON_EXTRACT_PATH(col, 'customer''s department')", }, ) def test_alter_set_unset(self): self.validate_identity("ALTER TABLE tbl SET DATA_RETENTION_TIME_IN_DAYS=1") self.validate_identity("ALTER TABLE tbl SET DEFAULT_DDL_COLLATION='test'") self.validate_identity("ALTER TABLE foo SET COMMENT='bar'") self.validate_identity("ALTER TABLE foo SET CHANGE_TRACKING=FALSE") self.validate_identity("ALTER TABLE table1 SET TAG foo.bar = 'baz'") self.validate_identity("ALTER TABLE IF EXISTS foo SET TAG a = 'a', b = 'b', c = 'c'") self.validate_identity( """ALTER TABLE tbl SET STAGE_FILE_FORMAT = (TYPE=CSV FIELD_DELIMITER='|' NULL_IF=('') FIELD_OPTIONALLY_ENCLOSED_BY='"' TIMESTAMP_FORMAT='TZHTZM YYYY-MM-DD HH24:MI:SS.FF9' DATE_FORMAT='TZHTZM YYYY-MM-DD HH24:MI:SS.FF9' BINARY_FORMAT=BASE64)""", ) self.validate_identity( """ALTER TABLE tbl SET STAGE_COPY_OPTIONS = (ON_ERROR=SKIP_FILE SIZE_LIMIT=5 PURGE=TRUE MATCH_BY_COLUMN_NAME=CASE_SENSITIVE)""" ) self.validate_identity("ALTER TABLE foo UNSET TAG a, b, c") self.validate_identity("ALTER TABLE foo UNSET DATA_RETENTION_TIME_IN_DAYS, CHANGE_TRACKING") def test_alter_session(self): expr = self.validate_identity( "ALTER SESSION SET autocommit = FALSE, QUERY_TAG = 'qtag', JSON_INDENT = 1" ) self.assertEqual( expr.find(exp.AlterSession), exp.AlterSession( expressions=[ exp.SetItem( this=exp.EQ( this=exp.Column(this=exp.Identifier(this="autocommit", quoted=False)), expression=exp.Boolean(this=False), ), ), exp.SetItem( this=exp.EQ( this=exp.Column(this=exp.Identifier(this="QUERY_TAG", quoted=False)), expression=exp.Literal(this="qtag", is_string=True), ), ), exp.SetItem( this=exp.EQ( this=exp.Column(this=exp.Identifier(this="JSON_INDENT", quoted=False)), expression=exp.Literal(this="1", is_string=False), ), ), ], unset=False, ), ) expr = self.validate_identity("ALTER SESSION UNSET autocommit, QUERY_TAG") self.assertEqual( expr.find(exp.AlterSession), exp.AlterSession( expressions=[ exp.SetItem(this=exp.Identifier(this="autocommit", quoted=False)), exp.SetItem(this=exp.Identifier(this="QUERY_TAG", quoted=False)), ], unset=True, ), ) def test_from_changes(self): self.validate_identity( """SELECT C1 FROM t1 CHANGES (INFORMATION => APPEND_ONLY) AT (STREAM => 's1') END (TIMESTAMP => $ts2)""" ) self.validate_identity( """SELECT C1 FROM t1 CHANGES (INFORMATION => APPEND_ONLY) BEFORE (STATEMENT => 'STMT_ID') END (TIMESTAMP => $ts2)""" ) self.validate_identity( """SELECT 1 FROM some_table CHANGES (INFORMATION => APPEND_ONLY) AT (TIMESTAMP => TO_TIMESTAMP_TZ('2024-07-01 00:00:00+00:00')) END (TIMESTAMP => TO_TIMESTAMP_TZ('2024-07-01 14:28:59.999999+00:00'))""", """SELECT 1 FROM some_table CHANGES (INFORMATION => APPEND_ONLY) AT (TIMESTAMP => CAST('2024-07-01 00:00:00+00:00' AS TIMESTAMPTZ)) END (TIMESTAMP => CAST('2024-07-01 14:28:59.999999+00:00' AS TIMESTAMPTZ))""", ) def test_grant(self): grant_cmds = [ "GRANT SELECT ON FUTURE TABLES IN DATABASE d1 TO ROLE r1", "GRANT INSERT, DELETE ON FUTURE TABLES IN SCHEMA d1.s1 TO ROLE r2", "GRANT SELECT ON ALL TABLES IN SCHEMA mydb.myschema to ROLE analyst", "GRANT SELECT, INSERT ON FUTURE TABLES IN SCHEMA mydb.myschema TO ROLE role1", "GRANT CREATE MATERIALIZED VIEW ON SCHEMA mydb.myschema TO DATABASE ROLE mydb.dr1", ] for sql in grant_cmds: with self.subTest(f"Testing Snowflake's GRANT command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) self.validate_identity( "GRANT ALL PRIVILEGES ON FUNCTION mydb.myschema.ADD5(number) TO ROLE analyst" ) def test_revoke(self): revoke_cmds = [ "REVOKE SELECT ON FUTURE TABLES IN DATABASE d1 FROM ROLE r1", "REVOKE INSERT, DELETE ON FUTURE TABLES IN SCHEMA d1.s1 FROM ROLE r2", "REVOKE SELECT ON ALL TABLES IN SCHEMA mydb.myschema FROM ROLE analyst", "REVOKE SELECT, INSERT ON FUTURE TABLES IN SCHEMA mydb.myschema FROM ROLE role1", "REVOKE CREATE MATERIALIZED VIEW ON SCHEMA mydb.myschema FROM DATABASE ROLE mydb.dr1", ] for sql in revoke_cmds: with self.subTest(f"Testing Snowflake's REVOKE command statement: {sql}"): self.validate_identity(sql, check_command_warning=True) self.validate_identity( "REVOKE ALL PRIVILEGES ON FUNCTION mydb.myschema.ADD5(number) FROM ROLE analyst" ) def test_window_function_arg(self): query = "SELECT * FROM TABLE(db.schema.FUNC(a) OVER ())" ast = self.parse_one(query) window = ast.find(exp.Window) self.assertEqual(ast.sql("snowflake"), query) self.assertEqual(len(list(ast.find_all(exp.Column))), 1) self.assertEqual(window.this.sql("snowflake"), "db.schema.FUNC(a)") def test_offset_without_limit(self): self.validate_all( "SELECT 1 ORDER BY 1 LIMIT NULL OFFSET 0", read={ "trino": "SELECT 1 ORDER BY 1 OFFSET 0", }, ) def test_listagg(self): self.validate_identity("LISTAGG(data['some_field'], ',')") for distinct in ("", "DISTINCT "): self.validate_all( f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t", read={ "trino": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t", "duckdb": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|' ORDER BY col2) FROM t", }, write={ "snowflake": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t", "trino": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|') WITHIN GROUP (ORDER BY col2) FROM t", "duckdb": f"SELECT LISTAGG({distinct}col, '|SEPARATOR|' ORDER BY col2) FROM t", }, ) def test_rely_options(self): for option in ("NORELY", "RELY"): self.validate_identity( f"CREATE TABLE t (col1 INT PRIMARY KEY {option}, col2 INT UNIQUE {option}, col3 INT NOT NULL FOREIGN KEY REFERENCES other_t (id) {option})" ) self.validate_identity( f"CREATE TABLE t (col1 INT, col2 INT, col3 INT, PRIMARY KEY (col1) {option}, UNIQUE (col1, col2) {option}, FOREIGN KEY (col3) REFERENCES other_t (id) {option})" ) def test_parameter(self): expr = self.validate_identity("SELECT :1") self.assertEqual(expr.find(exp.Placeholder), exp.Placeholder(this="1")) self.validate_identity("SELECT :1, :2") self.validate_identity("SELECT :1 + :2") def test_max_by_min_by(self): max_by = self.validate_identity("MAX_BY(DISTINCT selected_col, filtered_col)") min_by = self.validate_identity("MIN_BY(DISTINCT selected_col, filtered_col)") for node in (max_by, min_by): self.assertEqual(len(node.this.expressions), 1) self.assertIsInstance(node.expression, exp.Column) # Test 3-argument case (returns array) max_by_3 = self.validate_identity("MAX_BY(selected_col, filtered_col, 5)") min_by_3 = self.validate_identity("MIN_BY(selected_col, filtered_col, 3)") for node in (max_by_3, min_by_3): with self.subTest(f"Checking count arg of {node.sql('snowflake')}"): self.assertIsNotNone(node.args.get("count")) self.validate_all( "SELECT MAX_BY(a, b) FROM t", write={ "snowflake": "SELECT MAX_BY(a, b) FROM t", "duckdb": "SELECT ARG_MAX(a, b) FROM t", }, ) self.validate_all( "SELECT MIN_BY(a, b) FROM t", write={ "snowflake": "SELECT MIN_BY(a, b) FROM t", "duckdb": "SELECT ARG_MIN(a, b) FROM t", }, ) def test_create_view_copy_grants(self): # for normal views, 'COPY GRANTS' goes *after* the column list. ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax self.validate_identity( "CREATE OR REPLACE VIEW FOO (A, B) COPY GRANTS AS SELECT A, B FROM TBL" ) # for materialized views, 'COPY GRANTS' must go *before* the column list or an error will be thrown. ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax self.validate_identity( "CREATE OR REPLACE MATERIALIZED VIEW FOO COPY GRANTS (A, B) AS SELECT A, B FROM TBL" ) # check that only 'COPY GRANTS' goes before the column list and other properties still go after self.validate_identity( "CREATE OR REPLACE MATERIALIZED VIEW FOO COPY GRANTS (A, B) COMMENT='foo' TAG (a='b') AS SELECT A, B FROM TBL" ) # no COPY GRANTS self.validate_identity("CREATE OR REPLACE VIEW FOO (A, B) AS SELECT A, B FROM TBL") self.validate_identity( "CREATE OR REPLACE MATERIALIZED VIEW FOO (A, B) AS SELECT A, B FROM TBL" ) def test_semantic_view(self): for dimensions, metrics, where, facts in [ (None, None, None, None), (None, None, None, "a.a"), ("DATE_PART('year', a.b)", None, None, None), (None, "a.b, a.c", None, None), (None, None, None, "a.d, a.e"), ("a.b, a.c", "a.b, a.c", None, None), ("a.b", "a.b, a.c", "a.c > 5", None), ("a.b", None, "a.c > 5", "a.d"), ]: with self.subTest( f"Testing Snowflake's SEMANTIC_VIEW command statement: {dimensions}, {metrics}, {facts} {where}" ): dimensions_str = f" DIMENSIONS {dimensions}" if dimensions else "" metrics_str = f" METRICS {metrics}" if metrics else "" fact_str = f" FACTS {facts}" if facts else "" where_str = f" WHERE {where}" if where else "" self.validate_identity( f"SELECT * FROM SEMANTIC_VIEW(tbl{metrics_str}{dimensions_str}{fact_str}{where_str}) ORDER BY foo" ) self.validate_identity( f"SELECT * FROM SEMANTIC_VIEW(tbl{dimensions_str}{fact_str}{metrics_str}{where_str})", f"SELECT * FROM SEMANTIC_VIEW(tbl{metrics_str}{dimensions_str}{fact_str}{where_str})", ) self.validate_identity( "SELECT * FROM SEMANTIC_VIEW(foo METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE a.b > '1995-01-01')", """SELECT * FROM SEMANTIC_VIEW( foo METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE a.b > '1995-01-01' )""", pretty=True, ) self.validate_identity( "SELECT col1, col2, metric1 FROM SEMANTIC_VIEW(mydb.myschema.my_semantic_view METRICS metric1 DIMENSIONS col1, DATE_TRUNC('MONTH', timestamp_col) AS col2) ORDER BY col1, col2 DESC" ) def test_get_extract(self): self.validate_all( "SELECT GET([4, 5, 6], 1)", write={ "snowflake": "SELECT GET([4, 5, 6], 1)", "duckdb": "SELECT [4, 5, 6][2]", }, ) self.validate_all( "SELECT GET(col::MAP(INTEGER, VARCHAR), 1)", write={ "snowflake": "SELECT GET(CAST(col AS MAP(INT, VARCHAR)), 1)", "duckdb": "SELECT CAST(col AS MAP(INT, TEXT))[1]", }, ) self.validate_all( "SELECT GET(v, 'field')", write={ "snowflake": "SELECT GET(v, 'field')", "duckdb": "SELECT v -> '$.field'", }, ) self.validate_identity("GET(foo, bar)").assert_is(exp.GetExtract) def test_create_sequence(self): self.validate_identity( "CREATE SEQUENCE seq START=5 comment = 'foo' INCREMENT=10", "CREATE SEQUENCE seq COMMENT='foo' START WITH 5 INCREMENT BY 10", ) self.validate_all( "CREATE SEQUENCE seq WITH START=1 INCREMENT=1", write={ "snowflake": "CREATE SEQUENCE seq START WITH 1 INCREMENT BY 1", "duckdb": "CREATE SEQUENCE seq START WITH 1 INCREMENT BY 1", }, ) def test_bit_aggs(self): bit_and_funcs = ["BITANDAGG", "BITAND_AGG", "BIT_AND_AGG", "BIT_ANDAGG"] bit_or_funcs = ["BITORAGG", "BITOR_AGG", "BIT_OR_AGG", "BIT_ORAGG"] bit_xor_funcs = ["BITXORAGG", "BITXOR_AGG", "BIT_XOR_AGG", "BIT_XORAGG"] for bit_func in (bit_and_funcs, bit_or_funcs, bit_xor_funcs): for name in bit_func: with self.subTest(f"Testing Snowflakes {name}"): self.validate_identity(f"{name}(x)", f"{bit_func[0]}(x)") def test_bitmap_or_agg(self): self.validate_identity("BITMAP_OR_AGG(x)") def test_md5_functions(self): self.validate_identity("MD5_HEX(col)", "MD5(col)") self.validate_identity("MD5(col)") self.validate_identity("MD5_BINARY(col)") self.validate_identity("MD5_NUMBER_LOWER64(col)") self.validate_identity("MD5_NUMBER_UPPER64(col)") def test_sha1(self): self.validate_all( "SHA1(x)", write={ "snowflake": "SHA1(x)", "duckdb": "SHA1(x)", }, ) expr = self.validate_identity("SHA1('text')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "SHA1('text')") self.assertEqual(annotated.sql("snowflake"), "SHA1('text')") self.validate_all( "SHA1(X'002A'::BINARY)", write={ "snowflake": "SHA1(CAST(x'002A' AS BINARY))", "duckdb": "SHA1(CAST(UNHEX('002A') AS BLOB))", }, ) expr = self.validate_identity("SHA1(123)") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("snowflake"), "SHA1(123)") self.assertEqual(annotated.sql("duckdb"), "SHA1(CAST(123 AS TEXT))") expr = self.validate_identity("SHA1(DATE '2024-01-15')", "SHA1(CAST('2024-01-15' AS DATE))") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("snowflake"), "SHA1(CAST('2024-01-15' AS DATE))") self.assertEqual(annotated.sql("duckdb"), "SHA1(CAST(CAST('2024-01-15' AS DATE) AS TEXT))") def test_model_attribute(self): self.validate_identity("SELECT model!mladmin") self.validate_identity("SELECT model!PREDICT(1)") self.validate_identity("SELECT m!PREDICT(INPUT_DATA => {*}) AS p FROM tbl") self.validate_identity("SELECT m!PREDICT(INPUT_DATA => {tbl.*}) AS p FROM tbl") self.validate_identity("x.y.z!PREDICT(foo, bar, baz, bla)") self.validate_identity( "SELECT * FROM TABLE(model_trained_with_labeled_data!DETECT_ANOMALIES(INPUT_DATA => TABLE(view_with_data_to_analyze), TIMESTAMP_COLNAME => 'date', TARGET_COLNAME => 'sales', CONFIG_OBJECT => OBJECT_CONSTRUCT('prediction_interval', 0.99)))" ) def test_set_item_kind_attribute(self): expr = parse_one("ALTER SESSION SET autocommit = FALSE", read="snowflake") set_item = expr.find(exp.SetItem) self.assertIsNotNone(set_item) self.assertIsNone(set_item.args.get("kind")) expr = parse_one("SET a = 1", read="snowflake") set_item = expr.find(exp.SetItem) self.assertIsNotNone(set_item) self.assertEqual(set_item.args.get("kind"), "VARIABLE") def test_round(self): self.validate_all( "SELECT ROUND(2.25) AS value", write={ "snowflake": "SELECT ROUND(2.25) AS value", "duckdb": "SELECT ROUND(2.25) AS value", }, ) self.validate_all( "SELECT ROUND(2.25, 1) AS value", write={ "snowflake": "SELECT ROUND(2.25, 1) AS value", "duckdb": "SELECT ROUND(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(EXPR => 2.25, SCALE => 1) AS value", write={ "snowflake": "SELECT ROUND(2.25, 1) AS value", "duckdb": "SELECT ROUND(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(SCALE => 1, EXPR => 2.25) AS value", write={ "snowflake": "SELECT ROUND(2.25, 1) AS value", "duckdb": "SELECT ROUND(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(2.25, 1, 'HALF_AWAY_FROM_ZERO') AS value", write={ "snowflake": """SELECT ROUND(2.25, 1, 'HALF_AWAY_FROM_ZERO') AS value""", "duckdb": "SELECT ROUND(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(EXPR => 2.25, SCALE => 1, ROUNDING_MODE => 'HALF_AWAY_FROM_ZERO') AS value", write={ "snowflake": "SELECT ROUND(2.25, 1, 'HALF_AWAY_FROM_ZERO') AS value", "duckdb": "SELECT ROUND(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(2.25, 1, 'HALF_TO_EVEN') AS value", write={ "snowflake": "SELECT ROUND(2.25, 1, 'HALF_TO_EVEN') AS value", "duckdb": "SELECT ROUND_EVEN(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(ROUNDING_MODE => 'HALF_TO_EVEN', EXPR => 2.25, SCALE => 1) AS value", write={ "snowflake": "SELECT ROUND(2.25, 1, 'HALF_TO_EVEN') AS value", "duckdb": "SELECT ROUND_EVEN(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(SCALE => 1, EXPR => 2.25, , ROUNDING_MODE => 'HALF_TO_EVEN') AS value", write={ "snowflake": "SELECT ROUND(2.25, 1, 'HALF_TO_EVEN') AS value", "duckdb": "SELECT ROUND_EVEN(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(EXPR => 2.25, SCALE => 1, ROUNDING_MODE => 'HALF_TO_EVEN') AS value", write={ "snowflake": "SELECT ROUND(2.25, 1, 'HALF_TO_EVEN') AS value", "duckdb": "SELECT ROUND_EVEN(2.25, 1) AS value", }, ) self.validate_all( "SELECT ROUND(2.256, 1.8) AS value", write={ "snowflake": "SELECT ROUND(2.256, 1.8) AS value", "duckdb": "SELECT ROUND(2.256, CAST(1.8 AS INT)) AS value", }, ) self.validate_all( "SELECT ROUND(2.256, CAST(1.8 AS DECIMAL(38, 0))) AS value", write={ "snowflake": "SELECT ROUND(2.256, CAST(1.8 AS DECIMAL(38, 0))) AS value", "duckdb": "SELECT ROUND(2.256, CAST(CAST(1.8 AS DECIMAL(38, 0)) AS INT)) AS value", }, ) def test_get_bit(self): self.validate_all( "SELECT GETBIT(11, 1)", write={ "snowflake": "SELECT GETBIT(11, 1)", "databricks": "SELECT GETBIT(11, 1)", "redshift": "SELECT GETBIT(11, 1)", }, ) expr = self.validate_identity("GETBIT(11, 1)") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "(11 >> 1) & 1") self.assertEqual(annotated.sql("postgres"), "11 >> 1 & 1") def test_to_binary(self): expr = self.validate_identity("TO_BINARY('48454C50', 'HEX')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "UNHEX('48454C50')") expr = self.validate_identity("TO_BINARY('48454C50')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "UNHEX('48454C50')") expr = self.validate_identity("TO_BINARY('TEST', 'UTF-8')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "ENCODE('TEST')") expr = self.validate_identity("TO_BINARY('SEVMUA==', 'BASE64')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "FROM_BASE64('SEVMUA==')") expr = self.validate_identity("TRY_TO_BINARY('48454C50', 'HEX')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "TRY(UNHEX('48454C50'))") expr = self.validate_identity("TRY_TO_BINARY('48454C50')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "TRY(UNHEX('48454C50'))") expr = self.validate_identity("TRY_TO_BINARY('Hello', 'UTF-8')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "TRY(ENCODE('Hello'))") expr = self.validate_identity("TRY_TO_BINARY('SGVsbG8=', 'BASE64')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "TRY(FROM_BASE64('SGVsbG8='))") expr = self.validate_identity("TRY_TO_BINARY('Hello', 'UTF-16')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "NULL") def test_reverse(self): # Test REVERSE with TO_BINARY (BLOB type) - UTF-8 format expr = self.validate_identity("REVERSE(TO_BINARY('ABC', 'UTF-8'))") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "CAST(REVERSE(CAST(ENCODE('ABC') AS TEXT)) AS BLOB)" ) # Test REVERSE with TO_BINARY - HEX format expr = self.validate_identity("REVERSE(TO_BINARY('414243', 'HEX'))") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "CAST(REVERSE(CAST(UNHEX('414243') AS TEXT)) AS BLOB)", ) # Test REVERSE with HEX_DECODE_BINARY expr = self.validate_identity("REVERSE(HEX_DECODE_BINARY('414243'))") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "CAST(REVERSE(CAST(UNHEX('414243') AS TEXT)) AS BLOB)", ) # Test REVERSE with VARCHAR (should not add casts) expr = self.validate_identity("REVERSE('ABC')") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "REVERSE('ABC')") def test_float_interval(self): # Test TIMEADD with float interval value - DuckDB INTERVAL requires integers expr = self.validate_identity("TIMEADD(HOUR, 2.5, CAST('10:30:00' AS TIME))") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "CAST('10:30:00' AS TIME) + INTERVAL (CAST(ROUND(2.5) AS INT)) HOUR", ) # Test DATEADD with decimal interval value expr = self.validate_identity( "DATEADD(HOUR, CAST(3.8 AS DECIMAL(10, 2)), CAST('2024-01-01 10:00:00' AS TIMESTAMP))" ) annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "CAST('2024-01-01 10:00:00' AS TIMESTAMP) + INTERVAL (CAST(ROUND(CAST(3.8 AS DECIMAL(10, 2))) AS INT)) HOUR", ) # Test TIMESTAMPADD with float interval value - Snowflake parser converts to DATEADD expr = self.parse_one( "TIMESTAMPADD(MINUTE, 30.9, CAST('2024-01-01 10:00:00' AS TIMESTAMP))", dialect="snowflake", ) self.assertEqual( expr.sql("snowflake"), "DATEADD(MINUTE, 30.9, CAST('2024-01-01 10:00:00' AS TIMESTAMP))" ) annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "CAST('2024-01-01 10:00:00' AS TIMESTAMP) + INTERVAL (CAST(ROUND(30.9) AS INT)) MINUTE", ) def test_transpile_bitwise_ops(self): # Binary bitwise operations expr = self.parse_one("SELECT BITOR(x'FF', x'0F')", dialect="snowflake") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "SELECT CAST(CAST(UNHEX('FF') AS BIT) | CAST(UNHEX('0F') AS BIT) AS BLOB)", ) self.assertEqual(annotated.sql("snowflake"), "SELECT BITOR(x'FF', x'0F')") expr = self.parse_one("SELECT BITAND(x'FF', x'0F')", dialect="snowflake") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "SELECT CAST(CAST(UNHEX('FF') AS BIT) & CAST(UNHEX('0F') AS BIT) AS BLOB)", ) self.assertEqual(annotated.sql("snowflake"), "SELECT BITAND(x'FF', x'0F')") expr = self.parse_one("SELECT BITXOR(x'FF', x'0F')", dialect="snowflake") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual( annotated.sql("duckdb"), "SELECT CAST(XOR(CAST(UNHEX('FF') AS BIT), CAST(UNHEX('0F') AS BIT)) AS BLOB)", ) self.assertEqual(annotated.sql("snowflake"), "SELECT BITXOR(x'FF', x'0F')") expr = self.parse_one("SELECT BITNOT(x'FF')", dialect="snowflake") annotated = annotate_types(expr, dialect="snowflake") self.assertEqual(annotated.sql("duckdb"), "SELECT CAST(~CAST(UNHEX('FF') AS BIT) AS BLOB)") self.assertEqual(annotated.sql("snowflake"), "SELECT BITNOT(x'FF')") def test_quoting(self): self.assertEqual( parse_one("select a, B from DUAL", dialect="snowflake").sql( dialect="snowflake", identify="safe" ), 'SELECT a, "B" FROM DUAL', ) def test_floor(self): self.validate_all( "SELECT FLOOR(1.753, 2)", write={"duckdb": "SELECT ROUND(FLOOR(1.753 * POWER(10, 2)) / POWER(10, 2), 2)"}, ) self.validate_all( "SELECT FLOOR(123.45, -1)", write={"duckdb": "SELECT ROUND(FLOOR(123.45 * POWER(10, -1)) / POWER(10, -1), -1)"}, ) self.validate_all( "SELECT FLOOR(a + b, 2)", write={"duckdb": "SELECT ROUND(FLOOR((a + b) * POWER(10, 2)) / POWER(10, 2), 2)"}, ) self.validate_all( "SELECT FLOOR(1.234, 1.5)", write={ "duckdb": "SELECT ROUND(FLOOR(1.234 * POWER(10, CAST(1.5 AS INT))) / POWER(10, CAST(1.5 AS INT)), CAST(1.5 AS INT))" }, ) def test_seq_functions(self): # SEQ1 - 1-byte sequences self.validate_all( "SELECT SEQ1() FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 256 FROM test", "snowflake": "SELECT SEQ1() FROM test", }, ) self.validate_all( "SELECT SEQ1(0) FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 256 FROM test", "snowflake": "SELECT SEQ1(0) FROM test", }, ) # 1 means it's signed parameter, which affects wrap-around behavior self.validate_all( "SELECT SEQ1(1) FROM test", write={ "duckdb": "SELECT (CASE WHEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 256 >= 128 THEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 256 - 256 ELSE (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 256 END) FROM test", "snowflake": "SELECT SEQ1(1) FROM test", }, ) # SEQ2 - 2-byte sequences self.validate_all( "SELECT SEQ2() FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 65536 FROM test", "snowflake": "SELECT SEQ2() FROM test", }, ) self.validate_all( "SELECT SEQ2(0) FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 65536 FROM test", "snowflake": "SELECT SEQ2(0) FROM test", }, ) self.validate_all( "SELECT SEQ2(1) FROM test", write={ "duckdb": "SELECT (CASE WHEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 65536 >= 32768 THEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 65536 - 65536 ELSE (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 65536 END) FROM test", "snowflake": "SELECT SEQ2(1) FROM test", }, ) # SEQ4 - 4-byte sequences self.validate_all( "SELECT SEQ4() FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 4294967296 FROM test", "snowflake": "SELECT SEQ4() FROM test", }, ) self.validate_all( "SELECT SEQ4(0) FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 4294967296 FROM test", "snowflake": "SELECT SEQ4(0) FROM test", }, ) self.validate_all( "SELECT SEQ4(1) FROM test", write={ "duckdb": "SELECT (CASE WHEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 4294967296 >= 2147483648 THEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 4294967296 - 4294967296 ELSE (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 4294967296 END) FROM test", "snowflake": "SELECT SEQ4(1) FROM test", }, ) # SEQ8 - 8-byte sequences self.validate_all( "SELECT SEQ8() FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 18446744073709551616 FROM test", "snowflake": "SELECT SEQ8() FROM test", }, ) self.validate_all( "SELECT SEQ8(0) FROM test", write={ "duckdb": "SELECT (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 18446744073709551616 FROM test", "snowflake": "SELECT SEQ8(0) FROM test", }, ) self.validate_all( "SELECT SEQ8(1) FROM test", write={ "duckdb": "SELECT (CASE WHEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 18446744073709551616 >= 9223372036854775808 THEN (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 18446744073709551616 - 18446744073709551616 ELSE (ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1) % 18446744073709551616 END) FROM test", "snowflake": "SELECT SEQ8(1) FROM test", }, ) def test_generator(self): self.validate_identity("SELECT 1 FROM TABLE(GENERATOR(ROWCOUNT => 10))") self.validate_identity("SELECT 1 FROM TABLE(GENERATOR(TIMELIMIT => 5))") self.validate_identity("SELECT 1 FROM TABLE(GENERATOR(ROWCOUNT => 10, TIMELIMIT => 5))") # Positional args are mapped to ROWCOUNT, TIMELIMIT in order self.validate_identity( "SELECT 1 FROM TABLE(GENERATOR(10))", "SELECT 1 FROM TABLE(GENERATOR(ROWCOUNT => 10))", ) self.validate_identity( "SELECT 1 FROM TABLE(GENERATOR(10, 5))", "SELECT 1 FROM TABLE(GENERATOR(ROWCOUNT => 10, TIMELIMIT => 5))", ) # Basic ROWCOUNT transpilation self.validate_all( "SELECT 1 FROM TABLE(GENERATOR(ROWCOUNT => 5))", write={ "duckdb": "SELECT 1 FROM RANGE(5)", "snowflake": "SELECT 1 FROM TABLE(GENERATOR(ROWCOUNT => 5))", }, ) # GENERATOR with SEQ functions - the common use case # SEQ is replaced with `range` column reference to avoid nested window function issues self.validate_all( "SELECT SEQ8() FROM TABLE(GENERATOR(ROWCOUNT => 5))", write={ "duckdb": "SELECT range % 18446744073709551616 FROM RANGE(5)", "snowflake": "SELECT SEQ8() FROM TABLE(GENERATOR(ROWCOUNT => 5))", }, ) # GENERATOR with JOIN in parenthesized construct - preserves joins self.validate_all( "SELECT * FROM (TABLE(GENERATOR(ROWCOUNT => 5)) JOIN other ON 1 = 1)", write={ "duckdb": "SELECT * FROM (RANGE(5) JOIN other ON 1 = 1)", "snowflake": "SELECT * FROM (TABLE(GENERATOR(ROWCOUNT => 5)) JOIN other ON 1 = 1)", }, ) def test_ceil(self): self.validate_all( "SELECT CEIL(1.753, 2)", write={"duckdb": "SELECT ROUND(CEIL(1.753 * POWER(10, 2)) / POWER(10, 2), 2)"}, ) self.validate_all( "SELECT CEIL(123.45, -1)", write={"duckdb": "SELECT ROUND(CEIL(123.45 * POWER(10, -1)) / POWER(10, -1), -1)"}, ) self.validate_all( "SELECT CEIL(a + b, 2)", write={"duckdb": "SELECT ROUND(CEIL((a + b) * POWER(10, 2)) / POWER(10, 2), 2)"}, ) self.validate_all( "SELECT CEIL(1.234, 1.5)", write={ "duckdb": "SELECT ROUND(CEIL(1.234 * POWER(10, CAST(1.5 AS INT))) / POWER(10, CAST(1.5 AS INT)), CAST(1.5 AS INT))" }, ) def test_corr(self): self.validate_all( "SELECT CORR(a, b)", read={ "snowflake": "SELECT CORR(a, b)", "postgres": "SELECT CORR(a, b)", }, write={ "snowflake": "SELECT CORR(a, b)", "postgres": "SELECT CORR(a, b)", "duckdb": "SELECT CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END", }, ) self.validate_all( "SELECT CORR(a, b) OVER (PARTITION BY c)", read={ "snowflake": "SELECT CORR(a, b) OVER (PARTITION BY c)", "postgres": "SELECT CORR(a, b) OVER (PARTITION BY c)", }, write={ "snowflake": "SELECT CORR(a, b) OVER (PARTITION BY c)", "postgres": "SELECT CORR(a, b) OVER (PARTITION BY c)", "duckdb": "SELECT CASE WHEN ISNAN(CORR(a, b) OVER (PARTITION BY c)) THEN NULL ELSE CORR(a, b) OVER (PARTITION BY c) END", }, ) self.validate_all( "SELECT CORR(a, b) FILTER(WHERE c > 0)", write={ "duckdb": "SELECT CASE WHEN ISNAN(CORR(a, b) FILTER(WHERE c > 0)) THEN NULL ELSE CORR(a, b) FILTER(WHERE c > 0) END", }, ) self.validate_all( "SELECT CORR(a, b) FILTER(WHERE c > 0) OVER (PARTITION BY d)", write={ "duckdb": "SELECT CASE WHEN ISNAN(CORR(a, b) FILTER(WHERE c > 0) OVER (PARTITION BY d)) THEN NULL ELSE CORR(a, b) FILTER(WHERE c > 0) OVER (PARTITION BY d) END", }, ) def test_encryption_functions(self): # ENCRYPT self.validate_identity("ENCRYPT(value, 'passphrase')") self.validate_identity("ENCRYPT(value, 'passphrase', 'aad')") self.validate_identity("ENCRYPT(value, 'passphrase', 'aad', 'AES-GCM')") # ENCRYPT_RAW self.validate_identity("ENCRYPT_RAW(value, key, iv)") self.validate_identity("ENCRYPT_RAW(value, key, iv, aad)") self.validate_identity("ENCRYPT_RAW(value, key, iv, aad, 'AES-GCM')") # DECRYPT self.validate_identity("DECRYPT(encrypted, 'passphrase')") self.validate_identity("DECRYPT(encrypted, 'passphrase', 'aad')") self.validate_identity("DECRYPT(encrypted, 'passphrase', 'aad', 'AES-GCM')") # DECRYPT_RAW self.validate_identity("DECRYPT_RAW(encrypted, key, iv)") self.validate_identity("DECRYPT_RAW(encrypted, key, iv, aad)") self.validate_identity("DECRYPT_RAW(encrypted, key, iv, aad, 'AES-GCM')") self.validate_identity("DECRYPT_RAW(encrypted, key, iv, aad, 'AES-GCM', aead)") # TRY_DECRYPT (parses as Decrypt with safe=True) self.validate_identity("TRY_DECRYPT(encrypted, 'passphrase')") self.validate_identity("TRY_DECRYPT(encrypted, 'passphrase', 'aad')") self.validate_identity("TRY_DECRYPT(encrypted, 'passphrase', 'aad', 'AES-GCM')") # TRY_DECRYPT_RAW (parses as DecryptRaw with safe=True) self.validate_identity("TRY_DECRYPT_RAW(encrypted, key, iv)") self.validate_identity("TRY_DECRYPT_RAW(encrypted, key, iv, aad)") self.validate_identity("TRY_DECRYPT_RAW(encrypted, key, iv, aad, 'AES-GCM')") self.validate_identity("TRY_DECRYPT_RAW(encrypted, key, iv, aad, 'AES-GCM', aead)") def test_update_statement(self): self.validate_identity("UPDATE test SET t = 1 FROM t1") self.validate_identity("UPDATE test SET t = 1 FROM t2 JOIN t3 ON t2.id = t3.id") self.validate_identity( "UPDATE test SET t = 1 FROM (SELECT id FROM test2) AS t2 JOIN test3 AS t3 ON t2.id = t3.id" ) self.validate_identity( "UPDATE sometesttable u FROM (SELECT 5195 AS new_count, '01bee1e5-0000-d31e-0000-e80ef02b9f27' query_id ) b SET qry_hash_count = new_count WHERE u.sample_query_id = b.query_id", "UPDATE sometesttable AS u SET qry_hash_count = new_count FROM (SELECT 5195 AS new_count, '01bee1e5-0000-d31e-0000-e80ef02b9f27' AS query_id) AS b WHERE u.sample_query_id = b.query_id", ) def test_type_sensitive_bitshift_transpilation(self): ast = annotate_types(self.parse_one("SELECT BITSHIFTLEFT(X'FF', 4)"), dialect="snowflake") self.assertEqual(ast.sql("duckdb"), "SELECT CAST(CAST(UNHEX('FF') AS BIT) << 4 AS BLOB)") ast = annotate_types(self.parse_one("SELECT BITSHIFTRIGHT(X'FF', 4)"), dialect="snowflake") self.assertEqual(ast.sql("duckdb"), "SELECT CAST(CAST(UNHEX('FF') AS BIT) >> 4 AS BLOB)") def test_array_flatten(self): # String array flattening self.validate_all( "SELECT ARRAY_FLATTEN([['a', 'b'], ['c', 'd', 'e']])", write={ "snowflake": "SELECT ARRAY_FLATTEN([['a', 'b'], ['c', 'd', 'e']])", "duckdb": "SELECT FLATTEN([['a', 'b'], ['c', 'd', 'e']])", "starrocks": "SELECT ARRAY_FLATTEN([['a', 'b'], ['c', 'd', 'e']])", }, ) # Nested arrays (single level flattening) self.validate_all( "SELECT ARRAY_FLATTEN([[[1, 2], [3]], [[4], [5]]])", write={ "snowflake": "SELECT ARRAY_FLATTEN([[[1, 2], [3]], [[4], [5]]])", "duckdb": "SELECT FLATTEN([[[1, 2], [3]], [[4], [5]]])", }, ) # Array with NULL elements self.validate_all( "SELECT ARRAY_FLATTEN([[1, NULL, 3], [4]])", write={ "snowflake": "SELECT ARRAY_FLATTEN([[1, NULL, 3], [4]])", "duckdb": "SELECT FLATTEN([[1, NULL, 3], [4]])", }, ) # Empty arrays self.validate_all( "SELECT ARRAY_FLATTEN([[]])", write={ "snowflake": "SELECT ARRAY_FLATTEN([[]])", "duckdb": "SELECT FLATTEN([[]])", }, ) def test_array_except(self): self.validate_all( "SELECT ARRAY_EXCEPT([1, 2, 3], [2])", write={ "snowflake": "SELECT ARRAY_EXCEPT([1, 2, 3], [2])", "duckdb": "SELECT CASE WHEN [1, 2, 3] IS NULL OR [2] IS NULL THEN NULL ELSE LIST_TRANSFORM(LIST_FILTER(LIST_ZIP([1, 2, 3], GENERATE_SERIES(1, LENGTH([1, 2, 3]))), pair -> (LENGTH(LIST_FILTER([1, 2, 3][1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])) > LENGTH(LIST_FILTER([2], e -> e IS NOT DISTINCT FROM pair[1])))), pair -> pair[1]) END", }, ) def test_array_position(self): self.validate_all( "SELECT ARRAY_POSITION(2, ARRAY_CONSTRUCT(1, 2, 3))", write={ "snowflake": "SELECT ARRAY_POSITION(2, [1, 2, 3])", "duckdb": "SELECT ARRAY_POSITION([1, 2, 3], 2) - 1", }, ) def test_array_slice(self): self.validate_all( "ARRAY_SLICE(arr, s, e)", write={ "snowflake": "ARRAY_SLICE(arr, s, e)", "duckdb": "ARRAY_SLICE(arr, CASE WHEN s >= 0 THEN s + 1 ELSE s END, CASE WHEN e < 0 THEN e - 1 ELSE e END)", }, ) def test_space(self): # Integer literal self.validate_all( "SELECT SPACE(5)", write={ "snowflake": "SELECT REPEAT(' ', 5)", "duckdb": "SELECT REPEAT(' ', CAST(5 AS BIGINT))", }, ) # Float literal (tests rounding behavior) self.validate_all( "SELECT SPACE(3.7)", write={ "snowflake": "SELECT REPEAT(' ', 3.7)", "duckdb": "SELECT REPEAT(' ', CAST(3.7 AS BIGINT))", }, ) # NULL value self.validate_all( "SELECT SPACE(NULL)", write={ "snowflake": "SELECT REPEAT(' ', NULL)", "duckdb": "SELECT REPEAT(' ', CAST(NULL AS BIGINT))", }, ) def test_charindex(self): self.validate_all( "SELECT CHARINDEX('sub', 'testsubstring', -1)", write={ "snowflake": "SELECT CHARINDEX('sub', 'testsubstring', -1)", "duckdb": "SELECT CASE WHEN STRPOS(SUBSTRING('testsubstring', CASE WHEN -1 <= 0 THEN 1 ELSE -1 END), 'sub') = 0 THEN 0 ELSE STRPOS(SUBSTRING('testsubstring', CASE WHEN -1 <= 0 THEN 1 ELSE -1 END), 'sub') + CASE WHEN -1 <= 0 THEN 1 ELSE -1 END - 1 END", }, ) self.validate_all( "SELECT CHARINDEX('sub', 'testsubstring', p)", write={ "snowflake": "SELECT CHARINDEX('sub', 'testsubstring', p)", "duckdb": "SELECT CASE WHEN STRPOS(SUBSTRING('testsubstring', CASE WHEN p <= 0 THEN 1 ELSE p END), 'sub') = 0 THEN 0 ELSE STRPOS(SUBSTRING('testsubstring', CASE WHEN p <= 0 THEN 1 ELSE p END), 'sub') + CASE WHEN p <= 0 THEN 1 ELSE p END - 1 END", }, ) def test_directed_joins(self): self.validate_identity("SELECT * FROM a CROSS DIRECTED JOIN b USING (id)") self.validate_identity("SELECT * FROM a INNER DIRECTED JOIN b USING (id)") self.validate_identity("SELECT * FROM a NATURAL INNER DIRECTED JOIN b USING (id)") for join_side in ("LEFT", "RIGHT", "FULL"): for outer in ("", " OUTER"): for natural in ("", "NATURAL "): prefix = natural + join_side + outer + " DIRECTED" with self.subTest(f"Testing {prefix} JOIN"): self.validate_identity(f"SELECT * FROM a {prefix} JOIN b USING (id)") ================================================ FILE: tests/dialects/test_solr.py ================================================ from sqlglot import exp from tests.dialects.test_dialect import Validator class TestSolr(Validator): dialect = "solr" def test_solr(self): self.validate_identity("SELECT `default`.column FROM t") self.failureException('SELECT "column" FROM t') self.validate_identity("SELECT column FROM t WHERE column = 'val'") self.validate_identity("a || b", "a OR b").assert_is(exp.Or) ================================================ FILE: tests/dialects/test_spark.py ================================================ from unittest import mock from sqlglot import exp, parse_one from sqlglot.dialects.dialect import Dialects from sqlglot.errors import UnsupportedError from tests.dialects.test_dialect import Validator class TestSpark(Validator): dialect = "spark" def test_ddl(self): self.validate_identity("DAYOFWEEK(TO_DATE(x))") self.validate_identity("DAYOFMONTH(TO_DATE(x))") self.validate_identity("DAYOFYEAR(TO_DATE(x))") self.validate_identity("WEEKOFYEAR(TO_DATE(x))") self.validate_identity("SELECT MODE(category)") self.validate_identity("SELECT MODE(price, TRUE) AS deterministic_mode FROM products") self.validate_identity("SELECT MODE() WITHIN GROUP (ORDER BY status) FROM orders") self.validate_identity("DROP NAMESPACE my_catalog.my_namespace") self.validate_identity("CREATE NAMESPACE my_catalog.my_namespace") self.validate_identity("INSERT OVERWRITE TABLE db1.tb1 TABLE db2.tb2") self.validate_identity("CREATE TABLE foo AS WITH t AS (SELECT 1 AS col) SELECT col FROM t") self.validate_identity("CREATE TEMPORARY VIEW test AS SELECT 1") self.validate_identity("CREATE TABLE foo (col VARCHAR(50))") self.validate_identity("CREATE TABLE foo (col STRUCT)") self.validate_identity("CREATE TABLE foo (col STRING) CLUSTERED BY (col) INTO 10 BUCKETS") self.validate_identity( "CREATE TABLE foo (col STRING) CLUSTERED BY (col) SORTED BY (col) INTO 10 BUCKETS" ) self.validate_identity("TRUNCATE TABLE t1 PARTITION(age = 10, name = 'test', address)") self.validate_all( "CREATE TABLE db.example_table (col_a struct)", write={ "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b TEXT))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b VARCHAR))", "hive": "CREATE TABLE db.example_table (col_a STRUCT)", "spark": "CREATE TABLE db.example_table (col_a STRUCT)", }, ) self.validate_all( "CREATE TABLE db.example_table (col_a struct>)", write={ "bigquery": "CREATE TABLE db.example_table (col_a STRUCT>)", "duckdb": "CREATE TABLE db.example_table (col_a STRUCT(struct_col_a INT, struct_col_b STRUCT(nested_col_a TEXT, nested_col_b TEXT)))", "presto": "CREATE TABLE db.example_table (col_a ROW(struct_col_a INTEGER, struct_col_b ROW(nested_col_a VARCHAR, nested_col_b VARCHAR)))", "hive": "CREATE TABLE db.example_table (col_a STRUCT>)", "spark": "CREATE TABLE db.example_table (col_a STRUCT>)", }, ) self.validate_all( "CREATE TABLE db.example_table (col_a array, col_b array>)", write={ "bigquery": "CREATE TABLE db.example_table (col_a ARRAY, col_b ARRAY>)", "duckdb": "CREATE TABLE db.example_table (col_a INT[], col_b INT[][])", "presto": "CREATE TABLE db.example_table (col_a ARRAY(INTEGER), col_b ARRAY(ARRAY(INTEGER)))", "hive": "CREATE TABLE db.example_table (col_a ARRAY, col_b ARRAY>)", "spark": "CREATE TABLE db.example_table (col_a ARRAY, col_b ARRAY>)", "snowflake": "CREATE TABLE db.example_table (col_a ARRAY, col_b ARRAY)", }, ) self.validate_all( "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", write={ "duckdb": "CREATE TABLE x", "presto": "CREATE TABLE x WITH (format='ICEBERG', PARTITIONED_BY=ARRAY['MONTHS(y)'])", "hive": "CREATE TABLE x STORED AS ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", "spark": "CREATE TABLE x USING ICEBERG PARTITIONED BY (MONTHS(y)) LOCATION 's3://z'", }, ) self.validate_all( "CREATE TABLE test STORED AS PARQUET AS SELECT 1", write={ "duckdb": "CREATE TABLE test AS SELECT 1", "presto": "CREATE TABLE test WITH (format='PARQUET') AS SELECT 1", "trino": "CREATE TABLE test WITH (format='PARQUET') AS SELECT 1", "athena": "CREATE TABLE test WITH (format='PARQUET') AS SELECT 1", # note: lowercase format property is important for Athena "hive": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", "spark": "CREATE TABLE test STORED AS PARQUET AS SELECT 1", }, ) self.validate_all( """CREATE TABLE blah (col_a INT) COMMENT "Test comment: blah" PARTITIONED BY (date STRING) USING ICEBERG TBLPROPERTIES('x' = '1')""", write={ "duckdb": """CREATE TABLE blah ( col_a INT )""", # Partition columns should exist in table "presto": """CREATE TABLE blah ( col_a INTEGER, date VARCHAR ) COMMENT 'Test comment: blah' WITH ( PARTITIONED_BY=ARRAY['date'], format='ICEBERG', x='1' )""", "hive": """CREATE TABLE blah ( col_a INT ) COMMENT 'Test comment: blah' PARTITIONED BY ( date STRING ) STORED AS ICEBERG TBLPROPERTIES ( 'x'='1' )""", "spark": """CREATE TABLE blah ( col_a INT, date STRING ) COMMENT 'Test comment: blah' PARTITIONED BY ( date ) USING ICEBERG TBLPROPERTIES ( 'x'='1' )""", }, pretty=True, ) self.validate_all( "CACHE TABLE testCache OPTIONS ('storageLevel' 'DISK_ONLY') SELECT * FROM testData", write={ "spark": "CACHE TABLE testCache OPTIONS('storageLevel' = 'DISK_ONLY') AS SELECT * FROM testData" }, ) self.validate_all( "ALTER TABLE StudentInfo ADD COLUMNS (LastName STRING, DOB TIMESTAMP)", write={ "spark": "ALTER TABLE StudentInfo ADD COLUMNS (LastName STRING, DOB TIMESTAMP)", }, ) self.validate_all( "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT", write={ "spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT", "hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT", }, ) self.validate_all( "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT", write={ "spark": "ALTER TABLE db.example ALTER COLUMN col_a TYPE BIGINT", "hive": "ALTER TABLE db.example CHANGE COLUMN col_a col_a BIGINT", }, ) self.validate_all( "ALTER TABLE db.example RENAME COLUMN col_a TO col_b", write={ "spark": "ALTER TABLE db.example RENAME COLUMN col_a TO col_b", "hive": UnsupportedError, }, ) self.validate_all( "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)", write={ "spark": "ALTER TABLE StudentInfo DROP COLUMNS (LastName, DOB)", }, ) self.validate_identity("ALTER VIEW StudentInfoView AS SELECT * FROM StudentInfo") self.validate_identity("ALTER VIEW StudentInfoView AS SELECT LastName FROM StudentInfo") self.validate_identity("ALTER VIEW StudentInfoView RENAME TO StudentInfoViewRenamed") self.validate_identity( "ALTER VIEW StudentInfoView SET TBLPROPERTIES ('key1'='val1', 'key2'='val2')" ) self.validate_identity( "ALTER VIEW StudentInfoView UNSET TBLPROPERTIES ('key1', 'key2')", check_command_warning=True, ) def test_to_date(self): self.validate_all( "TO_DATE(x, 'yyyy-MM-dd')", write={ "duckdb": "TRY_CAST(x AS DATE)", "hive": "TO_DATE(x)", "presto": "CAST(CAST(x AS TIMESTAMP) AS DATE)", "spark": "TO_DATE(x)", "snowflake": "TRY_TO_DATE(x, 'yyyy-mm-DD')", "databricks": "TO_DATE(x)", }, ) self.validate_all( "TO_DATE(x, 'yyyy')", write={ "duckdb": "CAST(CAST(TRY_STRPTIME(x, '%Y') AS TIMESTAMP) AS DATE)", "hive": "TO_DATE(x, 'yyyy')", "presto": "CAST(DATE_PARSE(x, '%Y') AS DATE)", "spark": "TO_DATE(x, 'yyyy')", "snowflake": "TRY_TO_DATE(x, 'yyyy')", "databricks": "TO_DATE(x, 'yyyy')", }, ) @mock.patch("sqlglot.generator.logger") def test_hint(self, logger): self.validate_all( "SELECT /*+ COALESCE(3) */ * FROM x", write={ "spark": "SELECT /*+ COALESCE(3) */ * FROM x", "bigquery": "SELECT * FROM x", }, ) self.validate_all( "SELECT /*+ COALESCE(3), REPARTITION(1) */ * FROM x", write={ "spark": "SELECT /*+ COALESCE(3), REPARTITION(1) */ * FROM x", "bigquery": "SELECT * FROM x", }, ) self.validate_all( "SELECT /*+ BROADCAST(table) */ cola FROM table", write={ "spark": "SELECT /*+ BROADCAST(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) self.validate_all( "SELECT /*+ BROADCASTJOIN(table) */ cola FROM table", write={ "spark": "SELECT /*+ BROADCASTJOIN(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) self.validate_all( "SELECT /*+ MAPJOIN(table) */ cola FROM table", write={ "spark": "SELECT /*+ MAPJOIN(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) self.validate_all( "SELECT /*+ MERGE(table) */ cola FROM table", write={ "spark": "SELECT /*+ MERGE(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) self.validate_all( "SELECT /*+ SHUFFLEMERGE(table) */ cola FROM table", write={ "spark": "SELECT /*+ SHUFFLEMERGE(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) self.validate_all( "SELECT /*+ MERGEJOIN(table) */ cola FROM table", write={ "spark": "SELECT /*+ MERGEJOIN(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) self.validate_all( "SELECT /*+ SHUFFLE_HASH(table) */ cola FROM table", write={ "spark": "SELECT /*+ SHUFFLE_HASH(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) self.validate_all( "SELECT /*+ SHUFFLE_REPLICATE_NL(table) */ cola FROM table", write={ "spark": "SELECT /*+ SHUFFLE_REPLICATE_NL(table) */ cola FROM table", "bigquery": "SELECT cola FROM table", }, ) def test_spark(self): self.assertEqual( parse_one("REFRESH TABLE t", read="spark").assert_is(exp.Refresh).sql(dialect="spark"), "REFRESH TABLE t", ) # Spark TRUNC is date-only, should parse to DateTrunc (not numeric Trunc) self.validate_identity("TRUNC(date_col, 'MM')").assert_is(exp.DateTrunc) # Numeric TRUNC from other dialects - Spark has no native support, uses CAST to BIGINT self.validate_all( "CAST(3.14159 AS BIGINT)", read={"postgres": "TRUNC(3.14159, 2)"}, ) self.validate_identity("SELECT APPROX_TOP_K_ACCUMULATE(col, 10)") self.validate_identity("SELECT APPROX_TOP_K_ACCUMULATE(col)") self.validate_identity("SELECT BITMAP_BIT_POSITION(10)") self.validate_identity("SELECT BITMAP_CONSTRUCT_AGG(value)") self.validate_identity("ALTER TABLE foo ADD PARTITION(event = 'click')") self.validate_identity("ALTER TABLE foo ADD IF NOT EXISTS PARTITION(event = 'click')") self.validate_identity("IF(cond, foo AS bar, bla AS baz)") self.validate_identity("any_value(col, true)", "ANY_VALUE(col) IGNORE NULLS") self.validate_identity("first(col, true)", "FIRST(col) IGNORE NULLS") self.validate_identity("first_value(col, true)", "FIRST_VALUE(col) IGNORE NULLS") self.validate_identity("last(col, true)", "LAST(col) IGNORE NULLS") self.validate_identity("last_value(col, true)", "LAST_VALUE(col) IGNORE NULLS") self.validate_identity("DESCRIBE EXTENDED db.tbl") self.validate_identity("SELECT * FROM test TABLESAMPLE (50 PERCENT)") self.validate_identity("SELECT * FROM test TABLESAMPLE (5 ROWS)") self.validate_identity("SELECT * FROM test TABLESAMPLE (BUCKET 4 OUT OF 10)") self.validate_identity("REFRESH 'hdfs://path/to/table'") self.validate_identity("REFRESH TABLE tempDB.view1") self.validate_identity("SELECT CASE WHEN a = NULL THEN 1 ELSE 2 END") self.validate_identity("SELECT * FROM t1 SEMI JOIN t2 ON t1.x = t2.x") self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), x -> x + 1)") self.validate_identity("SELECT TRANSFORM(ARRAY(1, 2, 3), (x, i) -> x + i)") self.validate_identity("REFRESH TABLE a.b.c") self.validate_identity("INTERVAL '-86' DAYS") self.validate_identity("TRIM(' SparkSQL ')") self.validate_identity("TRIM(BOTH 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(LEADING 'SL' FROM 'SSparkSQLS')") self.validate_identity("TRIM(TRAILING 'SL' FROM 'SSparkSQLS')") self.validate_identity("SPLIT(str, pattern, lim)") self.validate_identity( "SELECT * FROM t1, t2", "SELECT * FROM t1 CROSS JOIN t2", ) self.validate_identity( "SELECT 1 limit", "SELECT 1 AS limit", ) self.validate_identity( "SELECT 1 offset", "SELECT 1 AS offset", ) self.validate_identity( "SELECT UNIX_TIMESTAMP()", "SELECT UNIX_TIMESTAMP(CURRENT_TIMESTAMP())", ) self.validate_identity( "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL 23 HOUR + 59 MINUTE + 59 SECONDS", "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL '23' HOUR + INTERVAL '59' MINUTE + INTERVAL '59' SECONDS", ) self.validate_identity( "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL '23' HOUR + '59' MINUTE + '59' SECONDS", "SELECT CAST('2023-01-01' AS TIMESTAMP) + INTERVAL '23' HOUR + INTERVAL '59' MINUTE + INTERVAL '59' SECONDS", ) self.validate_identity( "SELECT INTERVAL '5' HOURS '30' MINUTES '5' SECONDS '6' MILLISECONDS '7' MICROSECONDS", "SELECT INTERVAL '5' HOURS + INTERVAL '30' MINUTES + INTERVAL '5' SECONDS + INTERVAL '6' MILLISECONDS + INTERVAL '7' MICROSECONDS", ) self.validate_identity( "SELECT INTERVAL 5 HOURS 30 MINUTES 5 SECONDS 6 MILLISECONDS 7 MICROSECONDS", "SELECT INTERVAL '5' HOURS + INTERVAL '30' MINUTES + INTERVAL '5' SECONDS + INTERVAL '6' MILLISECONDS + INTERVAL '7' MICROSECONDS", ) self.validate_identity( "SELECT REGEXP_REPLACE('100-200', r'([^0-9])', '')", "SELECT REGEXP_REPLACE('100-200', '([^0-9])', '')", ) self.validate_identity( "SELECT REGEXP_REPLACE('100-200', R'([^0-9])', '')", "SELECT REGEXP_REPLACE('100-200', '([^0-9])', '')", ) self.validate_identity( "SELECT STR_TO_MAP('a:1,b:2,c:3')", "SELECT STR_TO_MAP('a:1,b:2,c:3', ',', ':')", ) self.validate_all( "SELECT * FROM parquet.`name.parquet`", read={ "duckdb": "SELECT * FROM READ_PARQUET('name.parquet')", "spark": "SELECT * FROM parquet.`name.parquet`", }, ) self.validate_all( "SELECT TO_JSON(STRUCT('blah' AS x)) AS y", write={ "presto": "SELECT JSON_FORMAT(CAST(CAST(ROW('blah') AS ROW(x VARCHAR)) AS JSON)) AS y", "spark": "SELECT TO_JSON(STRUCT('blah' AS x)) AS y", "trino": "SELECT JSON_FORMAT(CAST(CAST(ROW('blah') AS ROW(x VARCHAR)) AS JSON)) AS y", }, ) self.validate_all( "SELECT TRY_ELEMENT_AT(ARRAY(1, 2, 3), 2)", read={ "databricks": "SELECT TRY_ELEMENT_AT(ARRAY(1, 2, 3), 2)", "presto": "SELECT ELEMENT_AT(ARRAY[1, 2, 3], 2)", }, write={ "databricks": "SELECT TRY_ELEMENT_AT(ARRAY(1, 2, 3), 2)", "spark": "SELECT TRY_ELEMENT_AT(ARRAY(1, 2, 3), 2)", "duckdb": "SELECT [1, 2, 3][2]", "duckdb, version=1.1.0": "SELECT ([1, 2, 3])[2]", "presto": "SELECT ELEMENT_AT(ARRAY[1, 2, 3], 2)", }, ) self.validate_all( "SELECT ELEMENT_AT(ARRAY(1, 2, 3), 1)", read={ "spark2": "SELECT ELEMENT_AT(ARRAY(1, 2, 3), 1)", "spark": "SELECT ELEMENT_AT(ARRAY(1, 2, 3), 1)", "databricks": "SELECT ELEMENT_AT(ARRAY(1, 2, 3), 1)", }, write={ "spark2": "SELECT ELEMENT_AT(ARRAY(1, 2, 3), 1)", "databricks": "SELECT ELEMENT_AT(ARRAY(1, 2, 3), 1)", }, ) self.validate_all( "SELECT h.id, amount FROM hourlycostagg h LATERAL VIEW inline(h.costs) c", write={ "duckdb": "SELECT h.id, amount FROM hourlycostagg AS h CROSS JOIN LATERAL (SELECT UNNEST(h.costs, max_depth => 2)) AS c", }, ) self.validate_all( "SELECT h.id, amount FROM hourlycostagg h LATERAL VIEW inline(h.adjustments) as type, val, curr", write={ "duckdb": "SELECT h.id, amount FROM hourlycostagg AS h CROSS JOIN LATERAL (SELECT UNNEST(h.adjustments, max_depth => 2)) AS _u_0(type, val, curr)", }, ) self.validate_all( """ WITH hourlycostagg AS ( SELECT 101 AS id, ARRAY( STRUCT(10.0 AS amount, 'USD' AS currency), STRUCT(20.0 AS amount, 'EUR' AS currency) ) AS costs, ARRAY( STRUCT('tax' AS type, 0.15 AS val, 'EUR' AS currency), STRUCT('fee' AS type, 5.00 AS val, 'EUR' AS currency) ) AS adjustments, ARRAY( STRUCT( 12.0 AS length, STRUCT('A' AS tag, 98.5 AS score) AS details ), STRUCT( 23.0 AS length, STRUCT('B' AS tag, 99.5 AS score) AS details ) ) AS info ) SELECT h.id, amount, currency, type, val, leng FROM hourlycostagg h LATERAL VIEW inline(h.costs) c LATERAL VIEW inline(h.adjustments) as type, val, curr LATERAL VIEW inline(h.info) exploded as leng, det """, write={ "duckdb": "WITH hourlycostagg AS (SELECT 101 AS id, [{'amount': 10.0, 'currency': 'USD'}, {'amount': 20.0, 'currency': 'EUR'}] AS costs, [{'type': 'tax', 'val': 0.15, 'currency': 'EUR'}, {'type': 'fee', 'val': 5.00, 'currency': 'EUR'}] AS adjustments, [{'length': 12.0, 'details': {'tag': 'A', 'score': 98.5}}, {'length': 23.0, 'details': {'tag': 'B', 'score': 99.5}}] AS info) SELECT h.id, amount, currency, type, val, leng FROM hourlycostagg AS h CROSS JOIN LATERAL (SELECT UNNEST(h.costs, max_depth => 2)) AS c CROSS JOIN LATERAL (SELECT UNNEST(h.adjustments, max_depth => 2)) AS _u_1(type, val, curr) CROSS JOIN LATERAL (SELECT UNNEST(h.info, max_depth => 2)) AS exploded(leng, det)", }, ) self.validate_all( "SELECT id_column, name, age FROM test_table LATERAL VIEW INLINE(struc_column) explode_view AS name, age", write={ "presto": "SELECT id_column, name, age FROM test_table CROSS JOIN UNNEST(struc_column) AS explode_view(name, age)", "spark": "SELECT id_column, name, age FROM test_table LATERAL VIEW INLINE(struc_column) explode_view AS name, age", "duckdb": "SELECT id_column, name, age FROM test_table CROSS JOIN LATERAL (SELECT UNNEST(struc_column, max_depth => 2)) AS explode_view(name, age)", }, ) self.validate_all( "SELECT ARRAY_AGG(x) FILTER (WHERE x = 5) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)", write={ "duckdb": "SELECT ARRAY_AGG(x) FILTER(WHERE x = 5 AND NOT x IS NULL) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)", "spark": "SELECT COLLECT_LIST(x) FILTER(WHERE x = 5) FROM (SELECT 1 UNION ALL SELECT NULL) AS t(x)", }, ) self.validate_all( "SELECT ARRAY_AGG(1)", write={ "duckdb": "SELECT ARRAY_AGG(1)", "spark": "SELECT COLLECT_LIST(1)", }, ) self.validate_all( "SELECT ARRAY_AGG(DISTINCT STRUCT('a'))", write={ "duckdb": "SELECT ARRAY_AGG(DISTINCT {'col1': 'a'})", "spark": "SELECT COLLECT_LIST(DISTINCT STRUCT('a' AS col1))", }, ) self.validate_all( "SELECT DATE_FORMAT(DATE '2020-01-01', 'EEEE') AS weekday", write={ "presto": "SELECT DATE_FORMAT(CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP), '%W') AS weekday", "spark": "SELECT DATE_FORMAT(CAST('2020-01-01' AS DATE), 'EEEE') AS weekday", }, ) self.validate_all( "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)", read={ "databricks": "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)", }, write={ "databricks": "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)", "duckdb": "SELECT MAP([1, 2], ['a', 'b'])[2]", "duckdb, version=1.1.0": "SELECT (MAP([1, 2], ['a', 'b'])[2])[1]", "spark": "SELECT TRY_ELEMENT_AT(MAP(1, 'a', 2, 'b'), 2)", }, ) self.validate_all( "SELECT SPLIT('123|789', '\\\\|')", read={ "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')", "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')", }, write={ "duckdb": "SELECT STR_SPLIT_REGEX('123|789', '\\|')", "presto": "SELECT REGEXP_SPLIT('123|789', '\\|')", "spark": "SELECT SPLIT('123|789', '\\\\|')", }, ) self.validate_all( "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", write={ "clickhouse": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", "databricks": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", "doris": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS `name` UNION ALL SELECT NULL AS id, 'jake' AS `name`) SELECT COUNT(DISTINCT id, `name`) AS cnt FROM tbl", "duckdb": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT CASE WHEN id IS NULL THEN NULL WHEN name IS NULL THEN NULL ELSE (id, name) END) AS cnt FROM tbl", "hive": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", "mysql": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", "postgres": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT CASE WHEN id IS NULL THEN NULL WHEN name IS NULL THEN NULL ELSE (id, name) END) AS cnt FROM tbl", "presto": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT CASE WHEN id IS NULL THEN NULL WHEN name IS NULL THEN NULL ELSE (id, name) END) AS cnt FROM tbl", "snowflake": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", "spark": "WITH tbl AS (SELECT 1 AS id, 'eggy' AS name UNION ALL SELECT NULL AS id, 'jake' AS name) SELECT COUNT(DISTINCT id, name) AS cnt FROM tbl", }, ) self.validate_all( "SELECT TO_UTC_TIMESTAMP('2016-08-31', 'Asia/Seoul')", write={ "bigquery": "SELECT DATETIME(TIMESTAMP(CAST('2016-08-31' AS DATETIME), 'Asia/Seoul'), 'UTC')", "duckdb": "SELECT CAST('2016-08-31' AS TIMESTAMP) AT TIME ZONE 'Asia/Seoul' AT TIME ZONE 'UTC'", "postgres": "SELECT CAST('2016-08-31' AS TIMESTAMP) AT TIME ZONE 'Asia/Seoul' AT TIME ZONE 'UTC'", "presto": "SELECT WITH_TIMEZONE(CAST('2016-08-31' AS TIMESTAMP), 'Asia/Seoul') AT TIME ZONE 'UTC'", "redshift": "SELECT CAST('2016-08-31' AS TIMESTAMP) AT TIME ZONE 'Asia/Seoul' AT TIME ZONE 'UTC'", "snowflake": "SELECT CONVERT_TIMEZONE('Asia/Seoul', 'UTC', CAST('2016-08-31' AS TIMESTAMP))", "spark": "SELECT TO_UTC_TIMESTAMP(CAST('2016-08-31' AS TIMESTAMP), 'Asia/Seoul')", }, ) self.validate_all( "SELECT FROM_UTC_TIMESTAMP('2016-08-31', 'Asia/Seoul')", write={ "presto": "SELECT AT_TIMEZONE(CAST('2016-08-31' AS TIMESTAMP), 'Asia/Seoul')", "spark": "SELECT FROM_UTC_TIMESTAMP(CAST('2016-08-31' AS TIMESTAMP), 'Asia/Seoul')", }, ) self.validate_all( "foo.bar", read={ "": "STRUCT_EXTRACT(foo, bar)", }, ) self.validate_all( "MAP(1, 2, 3, 4)", write={ "spark": "MAP(1, 2, 3, 4)", "trino": "MAP(ARRAY[1, 3], ARRAY[2, 4])", }, ) self.validate_all( "MAP()", read={ "spark": "MAP()", "trino": "MAP()", }, write={ "trino": "MAP(ARRAY[], ARRAY[])", }, ) self.validate_all( "SELECT STR_TO_MAP('a:1,b:2,c:3', ',', ':')", read={ "presto": "SELECT SPLIT_TO_MAP('a:1,b:2,c:3', ',', ':')", "spark": "SELECT STR_TO_MAP('a:1,b:2,c:3', ',', ':')", }, write={ "presto": "SELECT SPLIT_TO_MAP('a:1,b:2,c:3', ',', ':')", "spark": "SELECT STR_TO_MAP('a:1,b:2,c:3', ',', ':')", }, ) self.validate_all( "SELECT DATEDIFF(MONTH, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))", read={ "duckdb": "SELECT DATEDIFF('month', CAST('1996-10-30' AS TIMESTAMPTZ), CAST('1997-02-28 10:30:00' AS TIMESTAMPTZ))", }, write={ "spark": "SELECT DATEDIFF(MONTH, CAST('1996-10-30' AS TIMESTAMP), CAST('1997-02-28 10:30:00' AS TIMESTAMP))", "spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('1997-02-28 10:30:00' AS TIMESTAMP), CAST('1996-10-30' AS TIMESTAMP)) AS INT)", }, ) self.validate_all( "SELECT DATEDIFF(week, '2020-01-01', '2020-12-31')", write={ "bigquery": "SELECT DATE_DIFF(CAST('2020-12-31' AS DATE), CAST('2020-01-01' AS DATE), WEEK)", "duckdb": "SELECT DATE_DIFF('WEEK', CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", "hive": "SELECT CAST(DATEDIFF('2020-12-31', '2020-01-01') / 7 AS INT)", "postgres": "SELECT CAST(EXTRACT(days FROM (CAST(CAST('2020-12-31' AS DATE) AS TIMESTAMP) - CAST(CAST('2020-01-01' AS DATE) AS TIMESTAMP))) / 7 AS BIGINT)", "redshift": "SELECT DATEDIFF(WEEK, CAST('2020-01-01' AS DATE), CAST('2020-12-31' AS DATE))", "snowflake": "SELECT DATEDIFF(WEEK, TO_DATE('2020-01-01'), TO_DATE('2020-12-31'))", "spark": "SELECT DATEDIFF(WEEK, '2020-01-01', '2020-12-31')", }, ) self.validate_all( "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')", write={ "duckdb": "SELECT DATE_DIFF('MONTH', CAST('1996-10-30' AS DATE), CAST('1997-02-28 10:30:00' AS DATE)) + CASE WHEN DAY(CAST('1997-02-28 10:30:00' AS DATE)) = DAY(LAST_DAY(CAST('1997-02-28 10:30:00' AS DATE))) AND DAY(CAST('1996-10-30' AS DATE)) = DAY(LAST_DAY(CAST('1996-10-30' AS DATE))) THEN 0 ELSE (DAY(CAST('1997-02-28 10:30:00' AS DATE)) - DAY(CAST('1996-10-30' AS DATE))) / 31.0 END", "hive": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')", "spark": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')", }, ) self.validate_all( "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30', FALSE)", write={ "duckdb": "SELECT DATE_DIFF('MONTH', CAST('1996-10-30' AS DATE), CAST('1997-02-28 10:30:00' AS DATE)) + CASE WHEN DAY(CAST('1997-02-28 10:30:00' AS DATE)) = DAY(LAST_DAY(CAST('1997-02-28 10:30:00' AS DATE))) AND DAY(CAST('1996-10-30' AS DATE)) = DAY(LAST_DAY(CAST('1996-10-30' AS DATE))) THEN 0 ELSE (DAY(CAST('1997-02-28 10:30:00' AS DATE)) - DAY(CAST('1996-10-30' AS DATE))) / 31.0 END", "hive": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30')", "spark": "SELECT MONTHS_BETWEEN('1997-02-28 10:30:00', '1996-10-30', FALSE)", }, ) self.validate_all( "SELECT TO_TIMESTAMP('2016-12-31 00:12:00')", write={ "": "SELECT CAST('2016-12-31 00:12:00' AS TIMESTAMP)", "duckdb": "SELECT CAST('2016-12-31 00:12:00' AS TIMESTAMP)", "spark": "SELECT CAST('2016-12-31 00:12:00' AS TIMESTAMP)", }, ) self.validate_all( "SELECT TO_TIMESTAMP(x, 'zZ')", write={ "": "SELECT STR_TO_TIME(x, '%Z%z')", "duckdb": "SELECT STRPTIME(x, '%Z%z')", }, ) self.validate_all( "SELECT TO_TIMESTAMP('2016-12-31', 'yyyy-MM-dd')", read={ "duckdb": "SELECT STRPTIME('2016-12-31', '%Y-%m-%d')", }, write={ "": "SELECT STR_TO_TIME('2016-12-31', '%Y-%m-%d')", "duckdb": "SELECT STRPTIME('2016-12-31', '%Y-%m-%d')", "spark": "SELECT TO_TIMESTAMP('2016-12-31', 'yyyy-MM-dd')", }, ) self.validate_all( "SELECT RLIKE('John Doe', 'John.*')", write={ "bigquery": "SELECT REGEXP_CONTAINS('John Doe', 'John.*')", "hive": "SELECT 'John Doe' RLIKE 'John.*'", "postgres": "SELECT 'John Doe' ~ 'John.*'", "snowflake": "SELECT REGEXP_LIKE('John Doe', 'John.*')", "spark": "SELECT 'John Doe' RLIKE 'John.*'", }, ) self.validate_all( "UNHEX(MD5(x))", write={ "bigquery": "FROM_HEX(TO_HEX(MD5(x)))", "spark": "UNHEX(MD5(x))", }, ) self.validate_all( "SELECT * FROM ((VALUES 1))", write={"spark": "SELECT * FROM (VALUES (1))"} ) self.validate_all( "SELECT CAST(STRUCT('fooo') AS STRUCT)", write={"spark": "SELECT CAST(STRUCT('fooo' AS col1) AS STRUCT)"}, ) self.validate_all( "SELECT CAST(123456 AS VARCHAR(3))", write={ "": "SELECT TRY_CAST(123456 AS TEXT)", "databricks": "SELECT TRY_CAST(123456 AS STRING)", "spark": "SELECT CAST(123456 AS STRING)", "spark2": "SELECT CAST(123456 AS STRING)", }, ) self.validate_all( "SELECT TRY_CAST('a' AS INT)", write={ "": "SELECT TRY_CAST('a' AS INT)", "databricks": "SELECT TRY_CAST('a' AS INT)", "spark": "SELECT TRY_CAST('a' AS INT)", "spark2": "SELECT CAST('a' AS INT)", }, ) self.validate_all( "SELECT piv.Q1 FROM (SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", read={ "snowflake": "SELECT piv.Q1 FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", }, ) self.validate_all( "SELECT piv.Q1 FROM (SELECT * FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", read={ "snowflake": "SELECT piv.Q1 FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", }, ) self.validate_all( "SELECT * FROM produce PIVOT(SUM(produce.sales) FOR quarter IN ('Q1', 'Q2'))", read={ "snowflake": "SELECT * FROM produce PIVOT (SUM(produce.sales) FOR produce.quarter IN ('Q1', 'Q2'))", }, ) self.validate_all( "SELECT * FROM produce AS p PIVOT(SUM(p.sales) AS sales FOR quarter IN ('Q1' AS Q1, 'Q2' AS Q1))", read={ "bigquery": "SELECT * FROM produce AS p PIVOT(SUM(p.sales) AS sales FOR p.quarter IN ('Q1' AS Q1, 'Q2' AS Q1))", }, ) self.validate_all( "SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')", write={ "databricks": "SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')", "hive": "SELECT CAST(MONTHS_BETWEEN('2020-03-05', '2020-01-01') AS INT)", "presto": "SELECT DATE_DIFF('MONTH', CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE), CAST(CAST('2020-03-05' AS TIMESTAMP) AS DATE))", "spark": "SELECT DATEDIFF(MONTH, '2020-01-01', '2020-03-05')", "spark2": "SELECT CAST(MONTHS_BETWEEN('2020-03-05', '2020-01-01') AS INT)", "trino": "SELECT DATE_DIFF('MONTH', CAST(CAST('2020-01-01' AS TIMESTAMP) AS DATE), CAST(CAST('2020-03-05' AS TIMESTAMP) AS DATE))", }, ) self.validate_all( "SELECT * FROM quarterly_sales PIVOT(SUM(amount) AS amount, 'dummy' AS bar FOR quarter IN ('2023_Q1'))", read={ "spark": "SELECT * FROM quarterly_sales PIVOT(SUM(amount) amount, 'dummy' bar FOR quarter IN ('2023_Q1'))", "databricks": "SELECT * FROM quarterly_sales PIVOT(SUM(amount) amount, 'dummy' bar FOR quarter IN ('2023_Q1'))", }, write={ "databricks": "SELECT * FROM quarterly_sales PIVOT(SUM(amount) AS amount, 'dummy' AS bar FOR quarter IN ('2023_Q1'))", }, ) for data_type in ( "BOOLEAN", "DATE", "DOUBLE", "FLOAT", "INT", "TIMESTAMP", ): self.validate_all( f"{data_type}(x)", write={ "": f"CAST(x AS {data_type})", "spark": f"CAST(x AS {data_type})", }, ) for ts_suffix in ("NTZ", "LTZ"): self.validate_all( f"TIMESTAMP_{ts_suffix}(x)", write={ "": f"CAST(x AS TIMESTAMP{ts_suffix})", "spark": f"CAST(x AS TIMESTAMP_{ts_suffix})", }, ) self.validate_all( "STRING(x)", write={ "": "CAST(x AS TEXT)", "spark": "CAST(x AS STRING)", }, ) self.validate_all( "CAST(x AS TIMESTAMP)", read={ "trino": "CAST(x AS TIMESTAMP(6) WITH TIME ZONE)", }, ) self.validate_all( "SELECT DATE_ADD(my_date_column, 1)", write={ "spark": "SELECT DATE_ADD(my_date_column, 1)", "spark2": "SELECT DATE_ADD(my_date_column, 1)", "bigquery": "SELECT DATE_ADD(CAST(CAST(my_date_column AS DATETIME) AS DATE), INTERVAL 1 DAY)", }, ) self.validate_all( "AGGREGATE(my_arr, 0, (acc, x) -> acc + x, s -> s * 2)", write={ "trino": "REDUCE(my_arr, 0, (acc, x) -> acc + x, s -> s * 2)", "duckdb": "REDUCE(my_arr, 0, (acc, x) -> acc + x, s -> s * 2)", "hive": "REDUCE(my_arr, 0, (acc, x) -> acc + x, s -> s * 2)", "presto": "REDUCE(my_arr, 0, (acc, x) -> acc + x, s -> s * 2)", "spark": "AGGREGATE(my_arr, 0, (acc, x) -> acc + x, s -> s * 2)", }, ) self.validate_all( "TRIM('SL', 'SSparkSQLS')", write={"spark": "TRIM('SL' FROM 'SSparkSQLS')"} ) self.validate_all( "ARRAY_SORT(x, (left, right) -> -1)", write={ "duckdb": "ARRAY_SORT(x)", "presto": 'ARRAY_SORT(x, ("left", "right") -> -1)', "hive": "SORT_ARRAY(x)", "spark": "ARRAY_SORT(x, (left, right) -> -1)", }, ) self.validate_all( "ARRAY(0, 1, 2)", write={ "bigquery": "[0, 1, 2]", "duckdb": "[0, 1, 2]", "presto": "ARRAY[0, 1, 2]", "hive": "ARRAY(0, 1, 2)", "spark": "ARRAY(0, 1, 2)", }, ) self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "clickhouse": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST", "duckdb": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST", "postgres": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname ASC, lname NULLS FIRST", "presto": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC, lname NULLS FIRST", "hive": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", "snowflake": "SELECT fname, lname, age FROM person ORDER BY age DESC, fname ASC, lname NULLS FIRST", }, ) self.validate_all( "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", write={ "duckdb": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", "presto": "SELECT APPROX_DISTINCT(a) FROM foo", "hive": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", "spark": "SELECT APPROX_COUNT_DISTINCT(a) FROM foo", }, ) self.validate_all( "MONTH('2021-03-01')", write={ "duckdb": "MONTH(CAST('2021-03-01' AS DATE))", "presto": "MONTH(CAST(CAST('2021-03-01' AS TIMESTAMP) AS DATE))", "hive": "MONTH('2021-03-01')", "spark": "MONTH('2021-03-01')", }, ) self.validate_all( "YEAR('2021-03-01')", write={ "duckdb": "YEAR(CAST('2021-03-01' AS DATE))", "presto": "YEAR(CAST(CAST('2021-03-01' AS TIMESTAMP) AS DATE))", "hive": "YEAR('2021-03-01')", "spark": "YEAR('2021-03-01')", }, ) self.validate_all( "'\u6bdb'", write={ "duckdb": "'毛'", "presto": "'毛'", "hive": "'毛'", "spark": "'毛'", }, ) self.validate_all( "SELECT LEFT(x, 2), RIGHT(x, 2)", write={ "duckdb": "SELECT LEFT(x, 2), RIGHT(x, 2)", "presto": "SELECT SUBSTRING(x, 1, 2), SUBSTRING(x, LENGTH(x) - (2 - 1))", "hive": "SELECT SUBSTRING(x, 1, 2), SUBSTRING(x, LENGTH(x) - (2 - 1))", "spark": "SELECT LEFT(x, 2), RIGHT(x, 2)", }, ) self.validate_identity( "SELECT SUBSTR('Spark' FROM 5 FOR 1)", "SELECT SUBSTRING('Spark', 5, 1)" ) self.validate_identity("SELECT SUBSTR('Spark SQL', 5)", "SELECT SUBSTRING('Spark SQL', 5)") self.validate_identity( "SELECT SUBSTR(ENCODE('Spark SQL', 'utf-8'), 5)", "SELECT SUBSTRING(ENCODE('Spark SQL', 'utf-8'), 5)", ) self.validate_all( "MAP_FROM_ARRAYS(ARRAY(1), c)", write={ "duckdb": "MAP([1], c)", "presto": "MAP(ARRAY[1], c)", "hive": "MAP(ARRAY(1), c)", "spark": "MAP_FROM_ARRAYS(ARRAY(1), c)", "snowflake": "OBJECT_CONSTRUCT([1], c)", }, ) self.validate_all( "SELECT ARRAY_SORT(x)", write={ "duckdb": "SELECT ARRAY_SORT(x)", "presto": "SELECT ARRAY_SORT(x)", "hive": "SELECT SORT_ARRAY(x)", "spark": "SELECT ARRAY_SORT(x)", }, ) self.validate_all( "SELECT DATE_ADD(MONTH, 20, col)", read={ "spark": "SELECT TIMESTAMPADD(MONTH, 20, col)", }, write={ "spark": "SELECT DATE_ADD(MONTH, 20, col)", "databricks": "SELECT DATE_ADD(MONTH, 20, col)", "presto": "SELECT DATE_ADD('MONTH', 20, col)", "trino": "SELECT DATE_ADD('MONTH', 20, col)", }, ) self.validate_identity("DESCRIBE schema.test PARTITION(ds = '2024-01-01')") self.validate_all( "SELECT ANY_VALUE(col, true), FIRST(col, true), FIRST_VALUE(col, true) OVER ()", write={ "duckdb": "SELECT ANY_VALUE(col), ANY_VALUE(col), FIRST_VALUE(col IGNORE NULLS) OVER ()" }, ) self.validate_all( "SELECT STRUCT(1, 2)", write={ "spark": "SELECT STRUCT(1 AS col1, 2 AS col2)", "presto": "SELECT CAST(ROW(1, 2) AS ROW(col1 INTEGER, col2 INTEGER))", "duckdb": "SELECT {'col1': 1, 'col2': 2}", }, ) self.validate_all( "SELECT STRUCT(x, 1, y AS col3, STRUCT(5)) FROM t", write={ "spark": "SELECT STRUCT(x AS x, 1 AS col2, y AS col3, STRUCT(5 AS col1) AS col4) FROM t", "duckdb": "SELECT {'x': x, 'col2': 1, 'col3': y, 'col4': {'col1': 5}} FROM t", }, ) self.validate_all( "SELECT TIMESTAMPDIFF(MONTH, foo, bar)", read={ "databricks": "SELECT TIMESTAMPDIFF(MONTH, foo, bar)", }, write={ "spark": "SELECT TIMESTAMPDIFF(MONTH, foo, bar)", "databricks": "SELECT TIMESTAMPDIFF(MONTH, foo, bar)", }, ) self.validate_all( "SELECT CAST(col AS TIMESTAMP)", write={ "spark2": "SELECT CAST(col AS TIMESTAMP)", "spark": "SELECT CAST(col AS TIMESTAMP)", "databricks": "SELECT TRY_CAST(col AS TIMESTAMP)", "duckdb": "SELECT TRY_CAST(col AS TIMESTAMPTZ)", }, ) self.validate_all( "SELECT * FROM {df}", read={ "databricks": "SELECT * FROM {df}", }, write={ "spark": "SELECT * FROM {df}", "databricks": "SELECT * FROM {df}", }, ) self.validate_all( "SELECT * FROM {df} WHERE id > :foo", read={ "databricks": "SELECT * FROM {df} WHERE id > :foo", }, write={ "spark": "SELECT * FROM {df} WHERE id > :foo", "databricks": "SELECT * FROM {df} WHERE id > :foo", }, ) self.validate_all( "STRING_AGG(x, ', ')", write={ "spark, version=3.0.0": "ARRAY_JOIN(COLLECT_LIST(x), ', ')", "spark, version=4.0.0": "LISTAGG(x, ', ')", "spark": "LISTAGG(x, ', ')", }, ) self.validate_all( "LISTAGG(x, ', ')", write={ "spark, version=3.0.0": "ARRAY_JOIN(COLLECT_LIST(x), ', ')", "spark, version=4.0.0": "LISTAGG(x, ', ')", "spark": "LISTAGG(x, ', ')", }, ) self.validate_all( "LIKE(foo, 'pattern')", write={ "spark": "foo LIKE 'pattern'", "databricks": "foo LIKE 'pattern'", }, ) self.validate_all( "LIKE(foo, 'pattern', '!')", write={ "spark": "foo LIKE 'pattern' ESCAPE '!'", "databricks": "foo LIKE 'pattern' ESCAPE '!'", }, ) self.validate_all( "ILIKE(foo, 'pattern')", write={ "spark": "foo ILIKE 'pattern'", "databricks": "foo ILIKE 'pattern'", }, ) self.validate_all( "ILIKE(foo, 'pattern', '!')", write={ "spark": "foo ILIKE 'pattern' ESCAPE '!'", "databricks": "foo ILIKE 'pattern' ESCAPE '!'", }, ) self.validate_identity("BIT_GET(11, 0)", "GETBIT(11, 0)") self.validate_identity("BITMAP_OR_AGG(x)") self.validate_identity("SELECT ELT(2, 'foo', 'bar', 'baz') AS Result") self.validate_identity("SELECT MAKE_INTERVAL(100, 11, 12, 13, 14, 14, 15)") self.validate_identity("SELECT name, GROUPING_ID() FROM customer GROUP BY ROLLUP (name)") self.validate_identity("SELECT MAKE_TIMESTAMP(2014, 12, 28, 6, 30, 45.887)") self.validate_identity("SELECT CURDATE()", "SELECT CURRENT_DATE") self.validate_all( "SELECT BIT_COUNT(0)", write={ "spark": "SELECT BIT_COUNT(0)", "databricks": "SELECT BIT_COUNT(0)", "duckdb": "SELECT BIT_COUNT(0)", }, ) self.validate_all( "SELECT * FROM foo TIMESTAMP AS OF '2020-01-01 00:00:00' AS bar", read={ "spark": "SELECT * FROM foo TIMESTAMP AS OF '2020-01-01 00:00:00' AS bar", "databricks": "SELECT * FROM foo TIMESTAMP AS OF '2020-01-01 00:00:00' AS bar", }, write={ "databricks": "SELECT * FROM foo TIMESTAMP AS OF '2020-01-01 00:00:00' AS bar", }, ) self.validate_all( "WITH RECURSIVE t(n) AS (SELECT * FROM VALUES (1) AS _values) SELECT n FROM t", read={ "spark": "WITH RECURSIVE t(n) AS (SELECT * FROM VALUES (1) AS _values) SELECT n FROM t", "databricks": "WITH RECURSIVE t(n) AS (SELECT * FROM VALUES (1) AS _values) SELECT n FROM t", }, write={ "databricks": "WITH RECURSIVE t(n) AS (SELECT * FROM VALUES (1) AS _values) SELECT n FROM t", }, ) def test_bool_or(self): self.validate_all( "SELECT a, LOGICAL_OR(b) FROM table GROUP BY a", write={"spark": "SELECT a, BOOL_OR(b) FROM table GROUP BY a"}, ) def test_current_user(self): self.validate_all( "CURRENT_USER", write={"spark": "CURRENT_USER()"}, ) self.validate_all( "CURRENT_USER()", write={"spark": "CURRENT_USER()"}, ) def test_transform_query(self): self.validate_identity("SELECT TRANSFORM(x) USING 'x' AS (x INT) FROM t") self.validate_identity( "SELECT TRANSFORM(zip_code, name, age) USING 'cat' AS (a, b, c) FROM person WHERE zip_code > 94511" ) self.validate_identity( "SELECT TRANSFORM(zip_code, name, age) USING 'cat' AS (a STRING, b STRING, c STRING) FROM person WHERE zip_code > 94511" ) self.validate_identity( "SELECT TRANSFORM(name, age) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' USING 'cat' AS (name_age STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\\n' NULL DEFINED AS 'NULL' FROM person" ) self.validate_identity( "SELECT TRANSFORM(zip_code, name, age) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') USING 'cat' AS (a STRING, b STRING, c STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ('field.delim'='\\t') FROM person WHERE zip_code > 94511" ) self.validate_identity( "SELECT TRANSFORM(zip_code, name, age) USING 'cat' FROM person WHERE zip_code > 94500" ) def test_insert_cte(self): self.validate_all( "INSERT OVERWRITE TABLE table WITH cte AS (SELECT cola FROM other_table) SELECT cola FROM cte", write={ "databricks": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", "hive": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", "spark": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", "spark2": "WITH cte AS (SELECT cola FROM other_table) INSERT OVERWRITE TABLE table SELECT cola FROM cte", }, ) def test_explode_projection_to_unnest(self): self.validate_all( "SELECT EXPLODE(x) FROM tbl", write={ "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))", "spark": "SELECT EXPLODE(x) FROM tbl", }, ) self.validate_all( "SELECT EXPLODE(col) FROM _u", write={ "bigquery": "SELECT IF(pos = pos_2, col_2, NULL) AS col_2 FROM _u CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(col)) - 1)) AS pos CROSS JOIN UNNEST(col) AS col_2 WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(col) - 1) AND pos_2 = (ARRAY_LENGTH(col) - 1))", "presto": "SELECT IF(_u_2.pos = _u_3.pos_2, _u_3.col_2) AS col_2 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u_2(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_3(col_2, pos_2) WHERE _u_2.pos = _u_3.pos_2 OR (_u_2.pos > CARDINALITY(col) AND _u_3.pos_2 = CARDINALITY(col))", "spark": "SELECT EXPLODE(col) FROM _u", }, ) self.validate_all( "SELECT EXPLODE(col) AS exploded FROM schema.tbl", write={ "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.exploded) AS exploded FROM schema.tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(col)))) AS _u(pos) CROSS JOIN UNNEST(col) WITH ORDINALITY AS _u_2(exploded, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(col) AND _u_2.pos_2 = CARDINALITY(col))", }, ) self.validate_all( "SELECT EXPLODE(ARRAY(1, 2))", write={ "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([1, 2])) - 1)) AS pos CROSS JOIN UNNEST([1, 2]) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH([1, 2]) - 1) AND pos_2 = (ARRAY_LENGTH([1, 2]) - 1))", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[1, 2])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[1, 2]) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[1, 2]) AND _u_2.pos_2 = CARDINALITY(ARRAY[1, 2]))", }, ) self.validate_all( "SELECT POSEXPLODE(ARRAY(2, 3)) AS x", write={ "bigquery": "SELECT IF(pos = pos_2, x, NULL) AS x, IF(pos = pos_2, pos_2, NULL) AS pos_2 FROM UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([2, 3])) - 1)) AS pos CROSS JOIN UNNEST([2, 3]) AS x WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH([2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([2, 3]) - 1))", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.x) AS x, IF(_u.pos = _u_2.pos_2, _u_2.pos_2) AS pos_2 FROM UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_2(x, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[2, 3]))", }, ) self.validate_all( "SELECT POSEXPLODE(ARRAY('a'))", write={ "duckdb": "SELECT GENERATE_SUBSCRIPTS(['a'], 1) - 1 AS pos, UNNEST(['a']) AS col", "spark": "SELECT POSEXPLODE(ARRAY('a'))", }, ) self.validate_all( "SELECT POSEXPLODE(x) AS (a, b)", write={ "presto": "SELECT IF(_u.pos = _u_2.a, _u_2.b) AS b, IF(_u.pos = _u_2.a, _u_2.a) AS a FROM UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(b, a) WHERE _u.pos = _u_2.a OR (_u.pos > CARDINALITY(x) AND _u_2.a = CARDINALITY(x))", "duckdb": "SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b", "spark": "SELECT POSEXPLODE(x) AS (a, b)", }, ) self.validate_all( "SELECT * FROM POSEXPLODE(ARRAY('a'))", write={ "duckdb": "SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(['a'], 1) - 1 AS pos, UNNEST(['a']) AS col)", "spark": "SELECT * FROM POSEXPLODE(ARRAY('a'))", }, ) self.validate_all( "SELECT * FROM POSEXPLODE(ARRAY('a')) AS (a, b)", write={ "duckdb": "SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(['a'], 1) - 1 AS a, UNNEST(['a']) AS b)", "spark": "SELECT * FROM POSEXPLODE(ARRAY('a')) AS _t0(a, b)", }, ) self.validate_all( "SELECT POSEXPLODE(ARRAY(2, 3)), EXPLODE(ARRAY(4, 5, 6)) FROM tbl", write={ "bigquery": "SELECT IF(pos = pos_2, col, NULL) AS col, IF(pos = pos_2, pos_2, NULL) AS pos_2, IF(pos = pos_3, col_2, NULL) AS col_2 FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH([2, 3]), ARRAY_LENGTH([4, 5, 6])) - 1)) AS pos CROSS JOIN UNNEST([2, 3]) AS col WITH OFFSET AS pos_2 CROSS JOIN UNNEST([4, 5, 6]) AS col_2 WITH OFFSET AS pos_3 WHERE (pos = pos_2 OR (pos > (ARRAY_LENGTH([2, 3]) - 1) AND pos_2 = (ARRAY_LENGTH([2, 3]) - 1))) AND (pos = pos_3 OR (pos > (ARRAY_LENGTH([4, 5, 6]) - 1) AND pos_3 = (ARRAY_LENGTH([4, 5, 6]) - 1)))", "presto": "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, IF(_u.pos = _u_2.pos_2, _u_2.pos_2) AS pos_2, IF(_u.pos = _u_3.pos_3, _u_3.col_2) AS col_2 FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3]), CARDINALITY(ARRAY[4, 5, 6])))) AS _u(pos) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_2(col, pos_2) CROSS JOIN UNNEST(ARRAY[4, 5, 6]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE (_u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(ARRAY[2, 3]) AND _u_2.pos_2 = CARDINALITY(ARRAY[2, 3]))) AND (_u.pos = _u_3.pos_3 OR (_u.pos > CARDINALITY(ARRAY[4, 5, 6]) AND _u_3.pos_3 = CARDINALITY(ARRAY[4, 5, 6])))", }, ) self.validate_all( "SELECT col, pos, POSEXPLODE(ARRAY(2, 3)) FROM _u", write={ "presto": "SELECT col, pos, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.col_2) AS col_2, IF(_u_2.pos_2 = _u_3.pos_3, _u_3.pos_3) AS pos_3 FROM _u CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(ARRAY[2, 3])))) AS _u_2(pos_2) CROSS JOIN UNNEST(ARRAY[2, 3]) WITH ORDINALITY AS _u_3(col_2, pos_3) WHERE _u_2.pos_2 = _u_3.pos_3 OR (_u_2.pos_2 > CARDINALITY(ARRAY[2, 3]) AND _u_3.pos_3 = CARDINALITY(ARRAY[2, 3]))", }, ) def test_strip_modifiers(self): without_modifiers = "SELECT * FROM t" with_modifiers = f"{without_modifiers} CLUSTER BY y DISTRIBUTE BY x SORT BY z" query = self.parse_one(with_modifiers) for dialect in Dialects: with self.subTest(f"Transpiling query with CLUSTER/DISTRIBUTE/SORT BY to {dialect}"): name = dialect.value if name in ("", "databricks", "hive", "spark", "spark2"): self.assertEqual(query.sql(name), with_modifiers) else: self.assertEqual(query.sql(name), without_modifiers) def test_schema_binding_options(self): for schema_binding in ( "BINDING", "COMPENSATION", "TYPE EVOLUTION", "EVOLUTION", ): with self.subTest(f"Test roundtrip of VIEW schema binding {schema_binding}"): self.validate_identity( f"CREATE VIEW emp_v WITH SCHEMA {schema_binding} AS SELECT * FROM emp" ) def test_minus(self): self.validate_all( "SELECT * FROM db.table1 MINUS SELECT * FROM db.table2", write={ "spark": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2", "databricks": "SELECT * FROM db.table1 EXCEPT SELECT * FROM db.table2", }, ) def test_string(self): for dialect in ("hive", "spark2", "spark", "databricks"): with self.subTest(f"Testing STRING() for {dialect}"): query = parse_one("STRING(a)", dialect=dialect) self.assertEqual(query.sql(dialect), "CAST(a AS STRING)") def test_binary_string(self): for dialect in ("spark2", "spark", "databricks"): with self.subTest(f"Testing HEX strings for {dialect}"): query = parse_one("X'ab'", dialect=dialect) self.assertEqual(query.sql(dialect), "X'ab'") with self.subTest(f"Testing empty HEX strings for {dialect}"): query = parse_one("X''", dialect=dialect) self.assertEqual(query.sql(dialect), "X''") def test_analyze(self): self.validate_identity("ANALYZE TABLE tbl COMPUTE STATISTICS NOSCAN") self.validate_identity("ANALYZE TABLE tbl COMPUTE STATISTICS FOR ALL COLUMNS") self.validate_identity("ANALYZE TABLE tbl COMPUTE STATISTICS FOR COLUMNS foo, bar") self.validate_identity("ANALYZE TABLE ctlg.db.tbl COMPUTE STATISTICS NOSCAN") self.validate_identity("ANALYZE TABLES COMPUTE STATISTICS NOSCAN") self.validate_identity("ANALYZE TABLES FROM db COMPUTE STATISTICS") self.validate_identity("ANALYZE TABLES IN db COMPUTE STATISTICS") self.validate_identity( "ANALYZE TABLE ctlg.db.tbl PARTITION(foo = 'foo', bar = 'bar') COMPUTE STATISTICS NOSCAN" ) def test_transpile_annotated_exploded_column(self): from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.optimizer.qualify import qualify for db_prefix in ("", "explode_view."): with self.subTest(f"Annotated exploded column with prefix: {db_prefix}."): sql = f""" WITH test_table AS ( SELECT 12345 AS id_column, ARRAY( STRUCT('John' AS name, 30 AS age), STRUCT('Mary' AS name, 20 AS age), STRUCT('Mike' AS name, 80 AS age), STRUCT('Dan' AS name, 50 AS age) ) AS struct_column ) SELECT id_column, {db_prefix}new_column.name, {db_prefix}new_column.age FROM test_table LATERAL VIEW EXPLODE(struct_column) explode_view AS new_column """ expr = self.parse_one(sql) qualified = qualify(expr, dialect="spark") annotated = annotate_types(qualified, dialect="spark") self.assertEqual( annotated.sql("spark"), "WITH `test_table` AS (SELECT 12345 AS `id_column`, ARRAY(STRUCT('John' AS `name`, 30 AS `age`), STRUCT('Mary' AS `name`, 20 AS `age`), STRUCT('Mike' AS `name`, 80 AS `age`), STRUCT('Dan' AS `name`, 50 AS `age`)) AS `struct_column`) SELECT `test_table`.`id_column` AS `id_column`, `explode_view`.`new_column`.`name` AS `name`, `explode_view`.`new_column`.`age` AS `age` FROM `test_table` AS `test_table` LATERAL VIEW EXPLODE(`test_table`.`struct_column`) explode_view AS `new_column`", ) self.assertEqual( annotated.sql("presto"), """WITH "test_table" AS (SELECT 12345 AS "id_column", ARRAY[CAST(ROW('John', 30) AS ROW("name" VARCHAR, "age" INTEGER)), CAST(ROW('Mary', 20) AS ROW("name" VARCHAR, "age" INTEGER)), CAST(ROW('Mike', 80) AS ROW("name" VARCHAR, "age" INTEGER)), CAST(ROW('Dan', 50) AS ROW("name" VARCHAR, "age" INTEGER))] AS "struct_column") SELECT "test_table"."id_column" AS "id_column", "explode_view"."name" AS "name", "explode_view"."age" AS "age" FROM "test_table" AS "test_table" CROSS JOIN UNNEST("test_table"."struct_column") AS "explode_view"("name", "age")""", ) def test_approx_percentile(self): self.validate_all( "PERCENTILE_APPROX(DISTINCT col, 0.3)", read={ "spark": "APPROX_PERCENTILE(DISTINCT col, 0.3)", "databricks": "APPROX_PERCENTILE(DISTINCT col, 0.3)", }, ) self.validate_all( "PERCENTILE_APPROX(DISTINCT col, 0.3, 200)", read={ "spark": "APPROX_PERCENTILE(DISTINCT col, 0.3, 200)", "databricks": "APPROX_PERCENTILE(DISTINCT col, 0.3, 200)", }, ) approx_quantile_expr = self.validate_identity("PERCENTILE_APPROX(DISTINCT col, 0.3)") approx_quantile_expr.assert_is(exp.ApproxQuantile) approx_quantile_expr.this.assert_is(exp.Distinct) approx_quantile_expr.args.get("quantile").assert_is(exp.Literal) approx_quantile_expr = self.validate_identity("PERCENTILE_APPROX(DISTINCT col, 0.3, 200)") approx_quantile_expr.assert_is(exp.ApproxQuantile) approx_quantile_expr.this.assert_is(exp.Distinct) approx_quantile_expr.args.get("quantile").assert_is(exp.Literal) approx_quantile_expr.args.get("accuracy").assert_is(exp.Literal) def test_array_insert(self): self.validate_all( "SELECT ARRAY_INSERT(ARRAY('a', 'b', 'c'), 1, 'z')", read={ "databricks": "SELECT ARRAY_INSERT(ARRAY('a', 'b', 'c'), 1, 'z')", }, write={ "databricks": "SELECT ARRAY_INSERT(ARRAY('a', 'b', 'c'), 1, 'z')", "spark": "SELECT ARRAY_INSERT(ARRAY('a', 'b', 'c'), 1, 'z')", }, ) def test_declare(self): self.validate_identity("DECLARE VAR x INT", "DECLARE x INT") self.validate_identity("DECLARE x INT") self.validate_identity("DECLARE VARIABLE myvar INT DEFAULT 5", "DECLARE myvar INT = 5") self.validate_identity("DECLARE x, y, z INT DEFAULT 1", "DECLARE x, y, z INT = 1") self.validate_identity("DECLARE x INT = 5") self.validate_identity("DECLARE five = 5") self.validate_identity("DECLARE OR REPLACE five = 55") self.validate_identity("DECLARE VARIABLE size DEFAULT 6", "DECLARE size = 6") self.validate_identity("DECLARE some_var STRING") def test_set_variable(self): self.validate_all( "SET VAR v = 5", write={ "spark": "SET VARIABLE v = 5", "databricks": "SET VARIABLE v = 5", }, ) self.validate_all( "SET VARIABLE v = 5", write={ "spark": "SET VARIABLE v = 5", "databricks": "SET VARIABLE v = 5", }, ) self.validate_all( "SET VARIABLE v = (SELECT MAX(c1) FROM VALUES (1), (2) AS T(c1))", write={ "spark": "SET VARIABLE v = (SELECT MAX(c1) FROM VALUES (1), (2) AS T(c1))", "databricks": "SET VARIABLE v = (SELECT MAX(c1) FROM VALUES (1), (2) AS T(c1))", }, ) self.validate_all( "SET VARIABLE v = DEFAULT", write={ "spark": "SET VARIABLE v = DEFAULT", "databricks": "SET VARIABLE v = DEFAULT", }, ) self.validate_all( "SET VARIABLE v1 = 1, v2 = '2'", write={ "spark": "SET VARIABLE v1 = 1, v2 = '2'", "databricks": "SET VARIABLE v1 = 1, v2 = '2'", }, ) self.validate_all( "SET VARIABLE (v1, v2) = (SELECT 1, 2)", write={ "spark": "SET VARIABLE (v1, v2) = (SELECT 1, 2)", "databricks": "SET VARIABLE (v1, v2) = (SELECT 1, 2)", }, ) ================================================ FILE: tests/dialects/test_sqlite.py ================================================ from tests.dialects.test_dialect import Validator from sqlglot import exp from sqlglot.helper import logger as helper_logger class TestSQLite(Validator): dialect = "sqlite" def test_sqlite(self): self.validate_identity("SELECT * FROM t AS t INDEXED BY s.i") self.validate_identity("SELECT * FROM t INDEXED BY s.i") self.validate_identity("SELECT * FROM t INDEXED BY i") self.validate_identity("SELECT * FROM t NOT INDEXED") self.validate_identity("SELECT match FROM t") self.validate_identity("SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'") self.validate_identity("SELECT RANK() OVER (RANGE CURRENT ROW) FROM tbl") self.validate_identity("UNHEX(a, b)") self.validate_identity("SELECT DATE()") self.validate_identity("SELECT DATE('now', 'start of month', '+1 month', '-1 day')") self.validate_identity("SELECT DATETIME(1092941466, 'unixepoch')") self.validate_identity("SELECT DATETIME(1092941466, 'auto')") self.validate_identity("SELECT DATETIME(1092941466, 'unixepoch', 'localtime')") self.validate_identity("SELECT UNIXEPOCH()") self.validate_identity("SELECT JULIANDAY('now') - JULIANDAY('1776-07-04')") self.validate_identity("SELECT UNIXEPOCH() - UNIXEPOCH('2004-01-01 02:34:56')") self.validate_identity("SELECT DATE('now', 'start of year', '+9 months', 'weekday 2')") self.validate_identity("SELECT (JULIANDAY('now') - 2440587.5) * 86400.0") self.validate_identity("SELECT UNIXEPOCH('now', 'subsec')") self.validate_identity("SELECT TIMEDIFF('now', '1809-02-12')") self.validate_identity("SELECT * FROM GENERATE_SERIES(1, 5)") self.validate_identity("SELECT INSTR(haystack, needle)") self.validate_identity( "SELECT a, SUM(b) OVER (ORDER BY a ROWS BETWEEN -1 PRECEDING AND 1 FOLLOWING) FROM t1 ORDER BY 1" ) self.validate_identity( "SELECT JSON_EXTRACT('[10, 20, [30, 40]]', '$[2]', '$[0]', '$[1]')", ) self.validate_identity( """SELECT item AS "item", some AS "some" FROM data WHERE (item = 'value_1' COLLATE NOCASE) AND (some = 't' COLLATE NOCASE) ORDER BY item ASC LIMIT 1 OFFSET 0""" ) self.validate_identity( "SELECT a FROM t1 WHERE a NOT NULL AND a NOT NULL ORDER BY a", "SELECT a FROM t1 WHERE NOT a IS NULL AND NOT a IS NULL ORDER BY a", ) self.validate_identity( "SELECT a, b FROM t1 WHERE b + a NOT NULL ORDER BY 1", "SELECT a, b FROM t1 WHERE NOT b + a IS NULL ORDER BY 1", ) self.validate_identity( "SELECT * FROM t1, t2", "SELECT * FROM t1 CROSS JOIN t2", ) self.validate_identity( "ALTER TABLE t RENAME a TO b", "ALTER TABLE t RENAME COLUMN a TO b", ) self.validate_all("SELECT LIKE(y, x)", write={"sqlite": "SELECT x LIKE y"}) self.validate_all("SELECT GLOB('*y*', 'xyz')", write={"sqlite": "SELECT 'xyz' GLOB '*y*'"}) self.validate_all( "SELECT LIKE('%y%', 'xyz', '')", write={"sqlite": "SELECT 'xyz' LIKE '%y%' ESCAPE ''"} ) self.validate_all( "SELECT MIN(a, b) FROM t", read={ "postgres": "SELECT LEAST(a, b) FROM t", "sqlite": "SELECT MIN(a, b) FROM t", }, ) self.validate_all( "SELECT MAX(a, b) FROM t", read={ "postgres": "SELECT GREATEST(a, b) FROM t", "sqlite": "SELECT MAX(a, b) FROM t", }, ) self.validate_all( "SELECT JSON_GROUP_ARRAY(name) FROM t", read={ "postgres": "SELECT JSON_AGG(name) FROM t", "sqlite": "SELECT JSON_GROUP_ARRAY(name) FROM t", }, write={ "postgres": "SELECT JSON_AGG(name) FROM t", }, ) self.validate_all( "SELECT JSON_GROUP_OBJECT(name, value) FROM t", read={ "postgres": "SELECT JSON_OBJECT_AGG(name, value) FROM t", "sqlite": "SELECT JSON_GROUP_OBJECT(name, value) FROM t", }, write={ "postgres": "SELECT JSON_OBJECT_AGG(name, value) FROM t", }, ) self.validate_all( "CURRENT_DATE", read={ "": "CURRENT_DATE", "snowflake": "CURRENT_DATE()", }, ) self.validate_all( "CURRENT_TIME", read={ "": "CURRENT_TIME", }, ) self.validate_all( "CURRENT_TIMESTAMP", read={ "": "CURRENT_TIMESTAMP", "snowflake": "CURRENT_TIMESTAMP()", }, ) self.validate_all( "SELECT DATE('2020-01-01 16:03:05')", read={ "snowflake": "SELECT CAST('2020-01-01 16:03:05' AS DATE)", }, ) self.validate_all( "SELECT CAST([a].[b] AS SMALLINT) FROM foo", write={ "sqlite": 'SELECT CAST("a"."b" AS INTEGER) FROM foo', "spark": "SELECT CAST(`a`.`b` AS SMALLINT) FROM foo", }, ) self.validate_all( "EDITDIST3(col1, col2)", read={ "sqlite": "EDITDIST3(col1, col2)", "spark": "LEVENSHTEIN(col1, col2)", }, write={ "sqlite": "EDITDIST3(col1, col2)", "spark": "LEVENSHTEIN(col1, col2)", }, ) self.validate_all( "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", write={ "spark": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", "sqlite": "SELECT fname, lname, age FROM person ORDER BY age DESC NULLS FIRST, fname ASC NULLS LAST, lname", }, ) self.validate_all("x", read={"snowflake": "LEAST(x)"}) self.validate_all("x", read={"postgres": "GREATEST(x)"}) self.validate_all("MIN(x)", read={"snowflake": "MIN(x)"}, write={"snowflake": "MIN(x)"}) self.validate_all( "MIN(x, y, z)", read={"snowflake": "LEAST(x, y, z)"}, write={"snowflake": "LEAST(x, y, z)"}, ) self.validate_all( "UNICODE(x)", write={ "": "UNICODE(x)", "mysql": "ORD(CONVERT(x USING utf32))", "oracle": "ASCII(UNISTR(x))", "postgres": "ASCII(x)", "redshift": "ASCII(x)", "spark": "ASCII(x)", }, ) self.validate_identity( "SELECT * FROM station WHERE city IS NOT ''", "SELECT * FROM station WHERE NOT city IS ''", ) self.validate_identity("SELECT JSON_OBJECT('col1', 1, 'col2', '1')") self.validate_identity( 'CREATE TABLE "foo t" ("foo t id" TEXT NOT NULL, PRIMARY KEY ("foo t id"))', 'CREATE TABLE "foo t" ("foo t id" TEXT NOT NULL PRIMARY KEY)', ) self.validate_identity("REPLACE INTO foo (x, y) VALUES (1, 2)", check_command_warning=True) self.validate_identity( "ATTACH DATABASE 'foo' AS schema_name", "ATTACH 'foo' AS schema_name" ) self.validate_identity( "ATTACH DATABASE NOT EXISTS(SELECT 1) AS schema_name", "ATTACH NOT EXISTS(SELECT 1) AS schema_name", ) self.validate_identity( "ATTACH DATABASE IIF(NOT EXISTS(SELECT 1), 'foo1', 'foo2') AS schema_name", "ATTACH IIF(NOT EXISTS(SELECT 1), 'foo1', 'foo2') AS schema_name", ) self.validate_identity( "ATTACH DATABASE 'foo' || '.foo2' AS schema_name", "ATTACH 'foo' || '.foo2' AS schema_name", ) self.validate_identity("DETACH DATABASE schema_name", "DETACH schema_name") self.validate_identity("SELECT * FROM t WHERE NULL IS y") self.validate_identity( "SELECT * FROM t WHERE NULL IS NOT y", "SELECT * FROM t WHERE NOT NULL IS y" ) self.validate_identity("SELECT SQLITE_VERSION()") def test_strftime(self): self.validate_identity("SELECT STRFTIME('%Y/%m/%d', 'now')") self.validate_identity("SELECT STRFTIME('%Y-%m-%d', '2016-10-16', 'start of month')") self.validate_identity( "SELECT STRFTIME('%s')", "SELECT STRFTIME('%s', CURRENT_TIMESTAMP)", ) self.validate_all( "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')", write={ "duckdb": "SELECT STRFTIME(CAST('2020-01-01 12:05:03' AS TIMESTAMP), '%Y-%m-%d')", "sqlite": "SELECT STRFTIME('%Y-%m-%d', '2020-01-01 12:05:03')", }, ) self.validate_all( "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)", write={ "duckdb": "SELECT STRFTIME(CAST(CURRENT_TIMESTAMP AS TIMESTAMP), '%Y-%m-%d')", "sqlite": "SELECT STRFTIME('%Y-%m-%d', CURRENT_TIMESTAMP)", }, ) def test_datediff(self): self.validate_all( "DATEDIFF(a, b, 'day')", write={"sqlite": "CAST((JULIANDAY(a) - JULIANDAY(b)) AS INTEGER)"}, ) self.validate_all( "DATEDIFF(a, b, 'hour')", write={"sqlite": "CAST((JULIANDAY(a) - JULIANDAY(b)) * 24.0 AS INTEGER)"}, ) self.validate_all( "DATEDIFF(a, b, 'year')", write={"sqlite": "CAST((JULIANDAY(a) - JULIANDAY(b)) / 365.0 AS INTEGER)"}, ) def test_hexadecimal_literal(self): self.validate_all( "SELECT 0XCC", write={ "sqlite": "SELECT x'CC'", "mysql": "SELECT x'CC'", }, ) def test_window_null_treatment(self): self.validate_all( "SELECT FIRST_VALUE(Name) OVER (PARTITION BY AlbumId ORDER BY Bytes DESC) AS LargestTrack FROM tracks", write={ "sqlite": "SELECT FIRST_VALUE(Name) OVER (PARTITION BY AlbumId ORDER BY Bytes DESC) AS LargestTrack FROM tracks" }, ) def test_longvarchar_dtype(self): self.validate_all( "CREATE TABLE foo (bar LONGVARCHAR)", write={"sqlite": "CREATE TABLE foo (bar TEXT)"}, ) def test_warnings(self): with self.assertLogs(helper_logger) as cm: self.validate_identity( "SELECT * FROM t AS t(c1, c2)", "SELECT * FROM t AS t", ) self.assertIn("Named columns are not supported in table alias.", cm.output[0]) def test_trunc(self): # SQLite TRUNC only accepts one argument self.validate_identity("TRUNC(3.14)").assert_is(exp.Trunc) # Decimals arg is dropped with warning (best-effort transpilation) with self.assertLogs(helper_logger) as cm: self.validate_identity("TRUNC(3.14, 2)", "TRUNC(3.14)").assert_is(exp.Trunc) self.assertIn("'decimals' is not supported", cm.output[0]) def test_ddl(self): for conflict_action in ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"): with self.subTest(f"ON CONFLICT {conflict_action}"): self.validate_identity("CREATE TABLE a (b, c, UNIQUE (b, c) ON CONFLICT IGNORE)") self.validate_identity("CREATE TABLE over (x, y)") self.validate_identity("INSERT OR ABORT INTO foo (x, y) VALUES (1, 2)") self.validate_identity("INSERT OR FAIL INTO foo (x, y) VALUES (1, 2)") self.validate_identity("INSERT OR IGNORE INTO foo (x, y) VALUES (1, 2)") self.validate_identity("INSERT OR REPLACE INTO foo (x, y) VALUES (1, 2)") self.validate_identity("INSERT OR ROLLBACK INTO foo (x, y) VALUES (1, 2)") self.validate_identity("CREATE TABLE foo (id INTEGER PRIMARY KEY ASC)") self.validate_identity("CREATE TEMPORARY TABLE foo (id INTEGER)") self.validate_all( """ CREATE TABLE "Track" ( CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), FOREIGN KEY ("AlbumId") REFERENCES "Album" ( "AlbumId" ) ON DELETE NO ACTION ON UPDATE NO ACTION, FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT ) """, write={ "sqlite": """CREATE TABLE "Track" ( CONSTRAINT "PK_Track" FOREIGN KEY ("TrackId"), FOREIGN KEY ("AlbumId") REFERENCES "Album" ( "AlbumId" ) ON DELETE NO ACTION ON UPDATE NO ACTION, FOREIGN KEY ("AlbumId") ON DELETE CASCADE ON UPDATE RESTRICT, FOREIGN KEY ("AlbumId") ON DELETE SET NULL ON UPDATE SET DEFAULT )""", }, pretty=True, ) self.validate_all( "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", read={ "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)", }, write={ "sqlite": "CREATE TABLE z (a INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT)", "mysql": "CREATE TABLE z (a INT UNIQUE PRIMARY KEY AUTO_INCREMENT)", "postgres": "CREATE TABLE z (a INT GENERATED BY DEFAULT AS IDENTITY NOT NULL UNIQUE PRIMARY KEY)", }, ) self.validate_all( """CREATE TABLE "x" ("Name" NVARCHAR(200) NOT NULL)""", write={ "sqlite": """CREATE TABLE "x" ("Name" TEXT(200) NOT NULL)""", "mysql": "CREATE TABLE `x` (`Name` VARCHAR(200) NOT NULL)", }, ) self.validate_identity( "CREATE TABLE store (store_id INTEGER PRIMARY KEY AUTOINCREMENT, mgr_id INTEGER NOT NULL UNIQUE REFERENCES staff ON UPDATE CASCADE)" ) def test_analyze(self): self.validate_identity("ANALYZE tbl") self.validate_identity("ANALYZE schma.tbl") def test_create_trigger(self): """Test that SQLite CREATE TRIGGER statements fall back to Command parsing.""" self.validate_identity( "CREATE TRIGGER log_insert AFTER INSERT ON users BEGIN INSERT INTO audit_log (user_id, action, created_at) VALUES (NEW.id, 'INSERT', datetime('now')) END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER check_balance BEFORE UPDATE OF balance ON accounts WHEN NEW.balance < 0 BEGIN UPDATE accounts SET balance = 0 WHERE id = NEW.id END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER view_insert INSTEAD OF INSERT ON employee_view BEGIN INSERT INTO employees (id, name, department) VALUES (NEW.id, NEW.name, NEW.department) END", check_command_warning=True, ) ================================================ FILE: tests/dialects/test_starrocks.py ================================================ from sqlglot import exp from sqlglot.errors import UnsupportedError from tests.dialects.test_dialect import Validator class TestStarrocks(Validator): dialect = "starrocks" def test_starrocks(self): self.assertEqual(self.validate_identity("arr[1]").expressions[0], exp.Literal.number(0)) self.validate_identity("SELECT ARRAY_JOIN([1, 3, 5, NULL], '_', 'NULL')") self.validate_identity("SELECT ARRAY_JOIN([1, 3, 5, NULL], '_')") self.validate_identity("ALTER TABLE a SWAP WITH b") self.validate_identity("SELECT ARRAY_AGG(a) FROM x") self.validate_identity("SELECT ST_POINT(10, 20)") self.validate_identity("SELECT ST_DISTANCE_SPHERE(10.1, 20.2, 30.3, 40.4)") self.validate_identity("ARRAY_FLATTEN(arr)").assert_is(exp.Flatten) self.validate_all( "SELECT * FROM t WHERE cond", read={ "": "SELECT * FROM t WHERE cond IS TRUE", "starrocks": "SELECT * FROM t WHERE cond", }, ) self.validate_identity("CURRENT_VERSION()") def test_ddl(self): self.validate_identity("INSERT OVERWRITE my_table SELECT * FROM other_table") self.validate_identity("CREATE TABLE t (c INT) COMMENT 'c'") ddl_sqls = [ "PARTITION BY (col1, col2)", "PARTITION BY DATE_TRUNC('DAY', col2), col1", "PARTITION BY FROM_UNIXTIME(col2)", "DISTRIBUTED BY HASH (col1) BUCKETS 1", "DISTRIBUTED BY HASH (col1)", "DISTRIBUTED BY RANDOM BUCKETS 1", "DISTRIBUTED BY RANDOM", "DISTRIBUTED BY HASH (col1) ORDER BY (col1)", "DISTRIBUTED BY HASH (col1) PROPERTIES ('replication_num'='1')", "PRIMARY KEY (col1) DISTRIBUTED BY HASH (col1)", "DUPLICATE KEY (col1, col2) DISTRIBUTED BY HASH (col1)", "UNIQUE KEY (col1, col2) PARTITION BY RANGE (col1) (START ('2024-01-01') END ('2024-01-31') EVERY (INTERVAL 1 DAY)) DISTRIBUTED BY HASH (col1)", "UNIQUE KEY (col1, col2) PARTITION BY RANGE (col1, col2) (START ('1') END ('10') EVERY (1), START ('10') END ('100') EVERY (10)) DISTRIBUTED BY HASH (col1)", "ORDER BY (col1, col2)", "DISTRIBUTED BY HASH (col1) ROLLUP (r1(event_day, siteid), r2(event_day, citycode), r3(event_day))", "DISTRIBUTED BY HASH (col1) ROLLUP (r1(col2))", "DISTRIBUTED BY HASH (col1) ROLLUP (`r1`(`col2`))", "DISTRIBUTED BY HASH (col1) ROLLUP (r1(col2) FROM base_index)", "DISTRIBUTED BY HASH (col1) ROLLUP (r1(col2) PROPERTIES ('storage_type'='column'))", "DISTRIBUTED BY HASH (col1) ROLLUP (r1(col2) FROM base_index PROPERTIES ('k'='v'))", "DISTRIBUTED BY HASH (col1) ROLLUP (r1(col2) PROPERTIES ('k1'='v1', 'k2'='v2'))", ] for properties in ddl_sqls: with self.subTest(f"Testing create scheme: {properties}"): self.validate_identity(f"CREATE TABLE foo (col1 BIGINT, col2 BIGINT) {properties}") self.validate_identity( f"CREATE TABLE foo (col1 BIGINT, col2 BIGINT) ENGINE=OLAP {properties}" ) # Test the different wider DECIMAL types self.validate_identity( "CREATE TABLE foo (col0 DECIMAL(9, 1), col1 DECIMAL32(9, 1), col2 DECIMAL64(18, 10), col3 DECIMAL128(38, 10)) DISTRIBUTED BY HASH (col1) BUCKETS 1" ) self.validate_identity( "CREATE TABLE foo (col1 LARGEINT) DISTRIBUTED BY HASH (col1) BUCKETS 1" ) self.validate_identity( "CREATE VIEW foo (foo_col1) SECURITY NONE AS SELECT bar_col1 FROM bar" ) # Test ROLLUP property self.validate_all( "CREATE TABLE foo (col1 BIGINT, col2 BIGINT) ROLLUP (r1(col1, col2), r2(col1))", write={ "starrocks": "CREATE TABLE foo (col1 BIGINT, col2 BIGINT) ROLLUP (r1(col1, col2), r2(col1))", "spark": "CREATE TABLE foo (col1 BIGINT, col2 BIGINT)", "duckdb": "CREATE TABLE foo (col1 BIGINT, col2 BIGINT)", "postgres": "CREATE TABLE foo (col1 BIGINT, col2 BIGINT)", }, ) multi_column_cluster = exp.Cluster( expressions=[ exp.column("c"), exp.column("d"), ] ) self.assertEqual(multi_column_cluster.sql(dialect="starrocks"), "ORDER BY (c, d)") single_column_cluster = exp.Cluster(expressions=[exp.column("c")]) self.assertEqual(single_column_cluster.sql(dialect="starrocks"), "ORDER BY (c)") mv_properties = [ # partitioning in MV "PARTITION BY (DATE_FUNC(ts), region) REFRESH ASYNC", "PARTITION BY (DATE_TRUNC('DAY', ts)) REFRESH ASYNC", "PARTITION BY (col1, col2) REFRESH ASYNC", # MV: Refresh trigger property "REFRESH ASYNC", "REFRESH IMMEDIATE", "REFRESH DEFERRED", "REFRESH DEFERRED ASYNC", "REFRESH IMMEDIATE ASYNC", "REFRESH DEFERRED MANUAL", "REFRESH IMMEDIATE MANUAL", "REFRESH IMMEDIATE START ('2025-01-01 00:00:00') EVERY (INTERVAL 5 MINUTE)", "REFRESH IMMEDIATE ASYNC EVERY (INTERVAL 5 MINUTE)", "REFRESH DEFERRED START ('2025-01-01 00:00:00') EVERY (INTERVAL 5 MINUTE)", "REFRESH DEFERRED ASYNC EVERY (INTERVAL 5 MINUTE)", "REFRESH ASYNC START ('2025-01-01 00:00:00') EVERY (INTERVAL 5 MINUTE)", "REFRESH ASYNC EVERY (INTERVAL 5 MINUTE)", ] for properties in mv_properties: with self.subTest(f"Testing refresh clause: {properties}"): self.validate_identity(f"CREATE MATERIALIZED VIEW mv {properties} AS SELECT 1") # RENAME table without TO keyword self.validate_identity("ALTER TABLE t1 RENAME t2") def test_identity(self): self.validate_identity("SELECT CAST(`a`.`b` AS INT) FROM foo") self.validate_identity("SELECT APPROX_COUNT_DISTINCT(a) FROM x") self.validate_identity("SELECT [1, 2, 3]") self.validate_identity( """SELECT CAST(PARSE_JSON(fieldvalue) -> '00000000-0000-0000-0000-00000000' AS VARCHAR) AS `code` FROM (SELECT '{"00000000-0000-0000-0000-00000000":"code01"}') AS t(fieldvalue)""" ) self.validate_identity( "SELECT text FROM example_table", write_sql="SELECT `text` FROM example_table" ) def test_time(self): self.validate_identity("TIMESTAMP('2022-01-01')") self.validate_identity( "SELECT DATE_DIFF('SECOND', '2010-11-30 23:59:59', '2010-11-30 20:58:59')" ) self.validate_identity( "SELECT DATE_DIFF('MINUTE', '2010-11-30 23:59:59', '2010-11-30 20:58:59')" ) def test_regex(self): self.validate_all( "SELECT REGEXP(abc, '%foo%')", read={ "mysql": "SELECT REGEXP_LIKE(abc, '%foo%')", "starrocks": "SELECT REGEXP(abc, '%foo%')", }, write={ "mysql": "SELECT REGEXP_LIKE(abc, '%foo%')", }, ) def test_unnest(self): self.validate_identity( "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t", "SELECT student, score, t.unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS t(unnest)", ) self.validate_all( "SELECT student, score, unnest FROM tests CROSS JOIN LATERAL UNNEST(scores)", write={ "spark": "SELECT student, score, unnest FROM tests LATERAL VIEW EXPLODE(scores) unnest AS unnest", "starrocks": "SELECT student, score, unnest FROM tests CROSS JOIN LATERAL UNNEST(scores) AS unnest(unnest)", }, ) self.validate_all( r"""SELECT * FROM UNNEST(array['John','Jane','Jim','Jamie'], array[24,25,26,27]) AS t(name, age)""", write={ "postgres": "SELECT * FROM UNNEST(ARRAY['John', 'Jane', 'Jim', 'Jamie'], ARRAY[24, 25, 26, 27]) AS t(name, age)", "spark": "SELECT * FROM INLINE(ARRAYS_ZIP(ARRAY('John', 'Jane', 'Jim', 'Jamie'), ARRAY(24, 25, 26, 27))) AS t(name, age)", "starrocks": "SELECT * FROM UNNEST(['John', 'Jane', 'Jim', 'Jamie'], [24, 25, 26, 27]) AS t(name, age)", }, ) # Use UNNEST to convert into multiple columns # see: https://docs.starrocks.io/docs/sql-reference/sql-functions/array-functions/unnest/ self.validate_all( r"""SELECT id, t.type, t.scores FROM example_table, unnest(split(type, ";"), scores) AS t(type,scores)""", write={ "postgres": "SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)", "spark": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", "databricks": r"""SELECT id, t.type, t.scores FROM example_table LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", "starrocks": r"""SELECT id, t.type, t.scores FROM example_table, UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""", "hive": UnsupportedError, }, ) self.validate_all( r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL unnest(split(type, ";"), scores) AS t(type,scores)""", write={ "spark": r"""SELECT id, t.type, t.scores FROM example_table_2 LATERAL VIEW INLINE(ARRAYS_ZIP(SPLIT(type, CONCAT('\\Q', ';', '\\E')), scores)) t AS type, scores""", "starrocks": r"""SELECT id, t.type, t.scores FROM example_table_2 CROSS JOIN LATERAL UNNEST(SPLIT(type, ';'), scores) AS t(type, scores)""", "hive": UnsupportedError, }, ) lateral_explode_sqls = [ "SELECT id, t.col FROM tbl, UNNEST(scores) AS t(col)", "SELECT id, t.col FROM tbl CROSS JOIN LATERAL UNNEST(scores) AS t(col)", ] for sql in lateral_explode_sqls: with self.subTest(f"Testing Starrocks roundtrip & transpilation of: {sql}"): self.validate_all( sql, write={ "starrocks": sql, "spark": "SELECT id, t.col FROM tbl LATERAL VIEW EXPLODE(scores) t AS col", }, ) def test_analyze(self): self.validate_identity("ANALYZE TABLE TBL(c1, c2) PROPERTIES ('prop1'=val1)") self.validate_identity("ANALYZE FULL TABLE TBL(c1, c2) PROPERTIES ('prop1'=val1)") self.validate_identity("ANALYZE SAMPLE TABLE TBL(c1, c2) PROPERTIES ('prop1'=val1)") self.validate_identity("ANALYZE TABLE TBL(c1, c2) WITH SYNC MODE PROPERTIES ('prop1'=val1)") self.validate_identity( "ANALYZE TABLE TBL(c1, c2) WITH ASYNC MODE PROPERTIES ('prop1'=val1)" ) self.validate_identity( "ANALYZE TABLE TBL UPDATE HISTOGRAM ON c1, c2 PROPERTIES ('prop1'=val1)" ) self.validate_identity( "ANALYZE TABLE TBL UPDATE HISTOGRAM ON c1, c2 WITH 5 BUCKETS PROPERTIES ('prop1'=val1)" ) self.validate_identity( "ANALYZE TABLE TBL UPDATE HISTOGRAM ON c1, c2 WITH SYNC MODE WITH 5 BUCKETS PROPERTIES ('prop1'=val1)" ) self.validate_identity( "ANALYZE TABLE TBL UPDATE HISTOGRAM ON c1, c2 WITH ASYNC MODE WITH 5 BUCKETS PROPERTIES ('prop1'=val1)" ) def test_between(self): self.validate_all( "SELECT * FROM t WHERE a BETWEEN 1 AND 5", write={ "starrocks": "SELECT * FROM t WHERE a BETWEEN 1 AND 5", "mysql": "SELECT * FROM t WHERE a BETWEEN 1 AND 5", }, ) self.validate_identity("SELECT a BETWEEN 1 AND 5 FROM t") self.validate_identity( "DELETE FROM t WHERE a BETWEEN b AND c", "DELETE FROM t WHERE a >= b AND a <= c", ) self.validate_identity( "DELETE FROM t WHERE a BETWEEN 1 AND 10 AND b BETWEEN 20 AND 30 OR c BETWEEN 'x' AND 'z'", "DELETE FROM t WHERE a >= 1 AND a <= 10 AND b >= 20 AND b <= 30 OR c >= 'x' AND c <= 'z'", ) def test_partition(self): # Column-based partitioning for cols in "col1", "col1, col2": with self.subTest(f"Testing PARTITION BY with {cols}"): self.validate_identity( f"CREATE TABLE test_table (col1 INT, col2 DATE) PARTITION BY ({cols})" ) self.validate_identity( f"CREATE TABLE test_table (col1 INT, col2 DATE) PARTITION BY {cols}", f"CREATE TABLE test_table (col1 INT, col2 DATE) PARTITION BY ({cols})", ) # Expr-based partitioning self.validate_identity( "CREATE TABLE test_table (col2 DATE) PARTITION BY DATE_TRUNC('DAY', col2)" ) self.validate_identity( "CREATE TABLE test_table (col2 BIGINT) PARTITION BY FROM_UNIXTIME(col2, '%Y%m%d')" ) self.validate_identity( "CREATE TABLE test_table (col1 STRING, col2 BIGINT) PARTITION BY FROM_UNIXTIME(col2, '%Y%m%d'), col1" ) self.validate_identity( "CREATE TABLE test_table (col1 BIGINT, col2 DATE) PARTITION BY FROM_UNIXTIME(col2, '%Y%m%d'), DATE_TRUNC('DAY', col1)" ) # LIST partitioning self.validate_identity( "CREATE TABLE test_table (col1 STRING) PARTITION BY LIST (col1) (PARTITION pLos_Angeles VALUES IN ('Los Angeles'), PARTITION pSan_Francisco VALUES IN ('San Francisco'))" ) # Multi-column LIST partitioning self.validate_identity( "CREATE TABLE test_table (col1 DATE, col2 STRING) PARTITION BY LIST (col1, col2) (PARTITION p1 VALUES IN (('2022-04-01', 'LA'), ('2022-04-01', 'SF')))" ) # RANGE partitioning with explicit values self.validate_identity( "CREATE TABLE test_table (col1 DATE) PARTITION BY RANGE (col1) (PARTITION p1 VALUES LESS THAN ('2020-01-31'), PARTITION p2 VALUES LESS THAN ('2020-02-29'), PARTITION p3 VALUES LESS THAN ('2020-03-31'))" ) self.validate_identity( "CREATE TABLE test_table (col1 STRING) PARTITION BY RANGE (STR2DATE(col1, '%Y-%m-%d')) (PARTITION p1 VALUES LESS THAN ('2021-01-01'), PARTITION p2 VALUES LESS THAN ('2021-01-02'), PARTITION p3 VALUES LESS THAN ('2021-01-03'))" ) self.validate_identity( "CREATE TABLE test_table (col1 DATE) PARTITION BY RANGE (col1) (PARTITION p1 VALUES LESS THAN ('2020-01-31'), PARTITION p_max VALUES LESS THAN (MAXVALUE))" ) # RANGE partitioning with START/END/EVERY self.validate_identity( "CREATE TABLE test_table (col1 BIGINT) PARTITION BY RANGE (col1) (START ('1') END ('10') EVERY (1), START ('10') END ('100') EVERY (10))" ) self.validate_identity( "CREATE TABLE test_table (col1 DATE) PARTITION BY RANGE (col1) (START ('2019-01-01') END ('2021-01-01') EVERY (INTERVAL 1 YEAR), START ('2021-01-01') END ('2021-05-01') EVERY (INTERVAL 1 MONTH), START ('2021-05-01') END ('2021-05-04') EVERY (INTERVAL 1 DAY))" ) ================================================ FILE: tests/dialects/test_tableau.py ================================================ from tests.dialects.test_dialect import Validator class TestTableau(Validator): dialect = "tableau" def test_tableau(self): self.validate_all( "[x]", write={ "hive": "`x`", "tableau": "[x]", }, ) self.validate_all( '"x"', write={ "hive": "'x'", "tableau": "'x'", }, ) self.validate_all( "IF x = 'a' THEN y ELSE NULL END", read={ "presto": "IF(x = 'a', y, NULL)", }, write={ "presto": "IF(x = 'a', y, NULL)", "hive": "IF(x = 'a', y, NULL)", "tableau": "IF x = 'a' THEN y ELSE NULL END", }, ) self.validate_all( "IFNULL(a, 0)", read={ "presto": "COALESCE(a, 0)", }, write={ "presto": "COALESCE(a, 0)", "hive": "COALESCE(a, 0)", "tableau": "IFNULL(a, 0)", }, ) self.validate_all( "COUNTD(a)", read={ "presto": "COUNT(DISTINCT a)", }, write={ "presto": "COUNT(DISTINCT a)", "hive": "COUNT(DISTINCT a)", "tableau": "COUNTD(a)", }, ) self.validate_all( "COUNTD((a))", read={ "presto": "COUNT(DISTINCT(a))", }, write={ "presto": "COUNT(DISTINCT (a))", "hive": "COUNT(DISTINCT (a))", "tableau": "COUNTD((a))", }, ) self.validate_all( "COUNT(a)", read={ "presto": "COUNT(a)", }, write={ "presto": "COUNT(a)", "hive": "COUNT(a)", "tableau": "COUNT(a)", }, ) ================================================ FILE: tests/dialects/test_teradata.py ================================================ from sqlglot import exp from tests.dialects.test_dialect import Validator class TestTeradata(Validator): dialect = "teradata" def test_teradata(self): self.validate_all( "RANDOM(l, u)", write={ "": "(u - l) * RAND() + l", "teradata": "RANDOM(l, u)", }, ) self.validate_identity("TO_NUMBER(expr, fmt, nlsparam)") self.validate_identity("SELECT TOP 10 * FROM tbl") self.validate_identity("SELECT * FROM tbl SAMPLE 5") self.validate_identity( "SELECT * FROM tbl SAMPLE 0.33, .25, .1", "SELECT * FROM tbl SAMPLE 0.33, 0.25, 0.1", ) self.validate_all( "DATABASE tduser", read={ "databricks": "USE tduser", }, write={ "databricks": "USE tduser", "teradata": "DATABASE tduser", }, ) self.validate_identity("SELECT 0x1d", "SELECT X'1d'") self.validate_identity("SELECT X'1D'", "SELECT X'1D'") self.validate_identity("SELECT x'1d'", "SELECT X'1d'") self.validate_identity( "RENAME TABLE emp TO employee", check_command_warning=True ).assert_is(exp.Command) def test_translate(self): self.validate_identity("TRANSLATE(x USING LATIN_TO_UNICODE)") self.validate_identity("TRANSLATE(x USING LATIN_TO_UNICODE WITH ERROR)") def test_locking(self): self.validate_identity("LOCKING ROW FOR ACCESS SELECT * FROM table1") self.validate_identity("LOCKING TABLE table1 FOR ACCESS SELECT col1, col2 FROM table1") self.validate_identity("LOCKING ROW FOR SHARE SELECT * FROM table1") self.validate_identity("LOCKING DATABASE db1 FOR READ SELECT * FROM table1") self.validate_identity("LOCKING ROW FOR EXCLUSIVE SELECT * FROM table1") self.validate_identity("LOCKING VIEW view1 FOR ACCESS SELECT * FROM view1") # Test with more complex SELECT statements self.validate_identity( "LOCKING ROW FOR ACCESS SELECT col1, col2 FROM table1 WHERE col1 > 10" ) self.validate_identity( "LOCKING TABLE table1 FOR ACCESS SELECT * FROM table1 JOIN table2 ON table1.id = table2.id" ) # Test that it still works in CREATE VIEW context (regression test) self.validate_identity( "CREATE VIEW view_b AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b" ) def test_update(self): self.validate_all( "UPDATE A FROM schema.tableA AS A, (SELECT col1 FROM schema.tableA GROUP BY col1) AS B SET col2 = '' WHERE A.col1 = B.col1", write={ "teradata": "UPDATE A FROM schema.tableA AS A, (SELECT col1 FROM schema.tableA GROUP BY col1) AS B SET col2 = '' WHERE A.col1 = B.col1", "mysql": "UPDATE A JOIN `schema`.tableA AS A ON TRUE JOIN (SELECT col1 FROM `schema`.tableA GROUP BY col1) AS B ON TRUE SET A.col2 = '' WHERE A.col1 = B.col1", }, ) def test_statistics(self): self.validate_identity("COLLECT STATISTICS ON tbl INDEX(col)", check_command_warning=True) self.validate_identity("COLLECT STATS ON tbl COLUMNS(col)", check_command_warning=True) self.validate_identity("COLLECT STATS COLUMNS(col) ON tbl", check_command_warning=True) self.validate_identity("HELP STATISTICS personel.employee", check_command_warning=True) self.validate_identity( "HELP STATISTICS personnel.employee FROM my_qcd", check_command_warning=True ) def test_create(self): self.validate_identity( "REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", "CREATE OR REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", ) self.validate_identity( "REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", "CREATE OR REPLACE VIEW view_b (COL1, COL2) AS LOCKING ROW FOR ACCESS SELECT COL1, COL2 FROM table_b", ) self.validate_identity("CREATE TABLE x (y INT) PRIMARY INDEX (y) PARTITION BY y INDEX (y)") self.validate_identity("CREATE TABLE x (y INT) PARTITION BY y INDEX (y)") self.validate_identity( "CREATE MULTISET VOLATILE TABLE my_table (id INT) PRIMARY INDEX (id) ON COMMIT PRESERVE ROWS" ) self.validate_identity( "CREATE SET VOLATILE TABLE my_table (id INT) PRIMARY INDEX (id) ON COMMIT DELETE ROWS" ) self.validate_identity( "CREATE TABLE a (b INT) PRIMARY INDEX (y) PARTITION BY RANGE_N(b BETWEEN 'a', 'b' AND 'c' EACH '1')" ) self.validate_identity( "CREATE TABLE a (b INT) PARTITION BY RANGE_N(b BETWEEN 0, 1 AND 2 EACH 1)" ) self.validate_identity( "CREATE TABLE a (b INT) PARTITION BY RANGE_N(b BETWEEN *, 1 AND * EACH b) INDEX (a)" ) self.validate_identity( "CREATE TABLE a, NO FALLBACK PROTECTION, NO LOG, NO JOURNAL, CHECKSUM=ON, NO MERGEBLOCKRATIO, BLOCKCOMPRESSION=ALWAYS (a INT)" ) self.validate_identity( "CREATE TABLE a, NO FALLBACK PROTECTION, NO LOG, NO JOURNAL, CHECKSUM=ON, NO MERGEBLOCKRATIO, BLOCKCOMPRESSION=ALWAYS (a INT)" ) self.validate_identity( "CREATE TABLE a, WITH JOURNAL TABLE=x.y.z, CHECKSUM=OFF, MERGEBLOCKRATIO=1, DATABLOCKSIZE=10 KBYTES (a INT)" ) self.validate_identity( "CREATE TABLE a, BEFORE JOURNAL, AFTER JOURNAL, FREESPACE=1, DEFAULT DATABLOCKSIZE, BLOCKCOMPRESSION=DEFAULT (a INT)" ) self.validate_identity( "CREATE TABLE a, DUAL JOURNAL, DUAL AFTER JOURNAL, MERGEBLOCKRATIO=1 PERCENT, DATABLOCKSIZE=10 KILOBYTES (a INT)" ) self.validate_identity( "CREATE TABLE a, DUAL BEFORE JOURNAL, LOCAL AFTER JOURNAL, MAXIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=AUTOTEMP(c1 INT) (a INT)" ) self.validate_identity( "CREATE VOLATILE MULTISET TABLE a, NOT LOCAL AFTER JOURNAL, FREESPACE=1 PERCENT, DATABLOCKSIZE=10 BYTES, WITH NO CONCURRENT ISOLATED LOADING FOR ALL (a INT)" ) self.validate_identity( "CREATE VOLATILE SET TABLE example1 AS (SELECT col1, col2, col3 FROM table1) WITH DATA PRIMARY INDEX (col1) ON COMMIT PRESERVE ROWS" ) self.validate_identity( "CREATE SET GLOBAL TEMPORARY TABLE a, NO BEFORE JOURNAL, NO AFTER JOURNAL, MINIMUM DATABLOCKSIZE, BLOCKCOMPRESSION=NEVER (a INT)" ) self.validate_all( """ CREATE SET TABLE test, NO FALLBACK, NO BEFORE JOURNAL, NO AFTER JOURNAL, CHECKSUM = DEFAULT (x INT, y INT, z CHAR(30), a INT, b DATE, e INT) PRIMARY INDEX (a), INDEX(x, y) """, write={ "teradata": "CREATE SET TABLE test, NO FALLBACK, NO BEFORE JOURNAL, NO AFTER JOURNAL, CHECKSUM=DEFAULT (x INT, y INT, z CHAR(30), a INT, b DATE, e INT) PRIMARY INDEX (a) INDEX (x, y)", }, ) self.validate_all( "REPLACE VIEW a AS (SELECT b FROM c)", write={"teradata": "CREATE OR REPLACE VIEW a AS (SELECT b FROM c)"}, ) self.validate_all( "CREATE VOLATILE TABLE a", write={ "teradata": "CREATE VOLATILE TABLE a", "bigquery": "CREATE TABLE a", "clickhouse": "CREATE TABLE a", "databricks": "CREATE TABLE a", "drill": "CREATE TABLE a", "duckdb": "CREATE TABLE a", "hive": "CREATE TABLE a", "mysql": "CREATE TABLE a", "oracle": "CREATE TABLE a", "postgres": "CREATE TABLE a", "presto": "CREATE TABLE a", "redshift": "CREATE TABLE a", "snowflake": "CREATE TABLE a", "spark": "CREATE TABLE a", "sqlite": "CREATE TABLE a", "starrocks": "CREATE TABLE a", "tableau": "CREATE TABLE a", "trino": "CREATE TABLE a", "tsql": "CREATE TABLE a", }, ) self.validate_identity( "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS)" ) self.validate_identity( "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS (1, 2, 3))" ) self.validate_identity( "CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD' COMPRESS (CAST('9999-09-09' AS DATE)), measurement INT)" ) def test_insert(self): self.validate_all( "INS INTO x SELECT * FROM y", write={"teradata": "INSERT INTO x SELECT * FROM y"} ) def test_mod(self): self.validate_all("a MOD b", write={"teradata": "a MOD b", "mysql": "a % b"}) def test_power(self): self.validate_all("a ** b", write={"teradata": "a ** b", "mysql": "POWER(a, b)"}) def test_abbrev(self): self.validate_identity("a LT b", "a < b") self.validate_identity("a LE b", "a <= b") self.validate_identity("a GT b", "a > b") self.validate_identity("a GE b", "a >= b") self.validate_identity("a ^= b", "a <> b") self.validate_identity("a NE b", "a <> b") self.validate_identity("a NOT= b", "a <> b") self.validate_identity("a EQ b", "a = b") self.validate_identity("SEL a FROM b", "SELECT a FROM b") self.validate_identity( "SELECT col1, col2 FROM dbc.table1 WHERE col1 EQ 'value1' MINUS SELECT col1, col2 FROM dbc.table2", "SELECT col1, col2 FROM dbc.table1 WHERE col1 = 'value1' EXCEPT SELECT col1, col2 FROM dbc.table2", ) self.validate_identity("UPD a SET b = 1", "UPDATE a SET b = 1") self.validate_identity("DEL FROM a", "DELETE FROM a") def test_datatype(self): self.validate_all( "CREATE TABLE z (a ST_GEOMETRY(1))", write={ "teradata": "CREATE TABLE z (a ST_GEOMETRY(1))", "redshift": "CREATE TABLE z (a GEOMETRY(1))", }, ) self.validate_identity("CREATE TABLE z (a SYSUDTLIB.INT)") def test_cast(self): self.validate_all( "CAST('1992-01' AS DATE FORMAT 'YYYY-DD')", read={ "bigquery": "CAST('1992-01' AS DATE FORMAT 'YYYY-DD')", }, write={ "teradata": "CAST('1992-01' AS DATE FORMAT 'YYYY-DD')", "bigquery": "PARSE_DATE('%Y-%d', '1992-01')", "databricks": "TO_DATE('1992-01', 'yyyy-dd')", "mysql": "STR_TO_DATE('1992-01', '%Y-%d')", "spark": "TO_DATE('1992-01', 'yyyy-dd')", "": "STR_TO_DATE('1992-01', '%Y-%d')", }, ) self.validate_identity("CAST('1992-01' AS FORMAT 'YYYY-DD')") self.validate_all( "TRYCAST('-2.5' AS DECIMAL(5, 2))", read={ "snowflake": "TRY_CAST('-2.5' AS DECIMAL(5, 2))", }, write={ "snowflake": "TRY_CAST('-2.5' AS DECIMAL(5, 2))", "teradata": "TRYCAST('-2.5' AS DECIMAL(5, 2))", }, ) def test_format_override(self): # Teradata column format overrides use the `(FORMAT )` syntax. # https://docs.teradata.com/r/Enterprise_IntelliFlex_VMware/SQL-Data-Types-and-Literals/Data-Type-Formats-and-Format-Phrases/FORMAT self.validate_identity("SELECT ('a' || 'b') (FORMAT '...')") self.validate_identity("SELECT Col1 (FORMAT '+9999') FROM Test1") self.validate_identity("SELECT date_col (FORMAT 'YYYY-MM-DD') FROM t") self.validate_identity( "SELECT CAST(Col1 AS INTEGER) FROM Test1", "SELECT CAST(Col1 AS INT) FROM Test1", ) def test_time(self): self.validate_identity("CAST(CURRENT_TIMESTAMP(6) AS TIMESTAMP WITH TIME ZONE)") self.validate_all( "CURRENT_TIMESTAMP", read={ "teradata": "CURRENT_TIMESTAMP", "snowflake": "CURRENT_TIMESTAMP()", }, ) self.validate_all( "SELECT '2023-01-01' + INTERVAL '5' YEAR", read={ "teradata": "SELECT '2023-01-01' + INTERVAL '5' YEAR", "snowflake": "SELECT DATEADD(YEAR, 5, '2023-01-01')", }, ) self.validate_all( "SELECT '2023-01-01' - INTERVAL '5' YEAR", read={ "teradata": "SELECT '2023-01-01' - INTERVAL '5' YEAR", "snowflake": "SELECT DATEADD(YEAR, -5, '2023-01-01')", }, ) self.validate_all( "SELECT '2023-01-01' - INTERVAL '5' YEAR", read={ "teradata": "SELECT '2023-01-01' - INTERVAL '5' YEAR", "sqlite": "SELECT DATE_SUB('2023-01-01', 5, YEAR)", }, ) self.validate_all( "SELECT '2023-01-01' + INTERVAL '5' YEAR", read={ "teradata": "SELECT '2023-01-01' + INTERVAL '5' YEAR", "sqlite": "SELECT DATE_SUB('2023-01-01', -5, YEAR)", }, ) self.validate_all( "SELECT (90 * INTERVAL '1' DAY)", read={ "teradata": "SELECT (90 * INTERVAL '1' DAY)", "snowflake": "SELECT INTERVAL '1' QUARTER", }, ) self.validate_all( "SELECT (7 * INTERVAL '1' DAY)", read={ "teradata": "SELECT (7 * INTERVAL '1' DAY)", "snowflake": "SELECT INTERVAL '1' WEEK", }, ) self.validate_all( "SELECT '2023-01-01' + (90 * INTERVAL '5' DAY)", read={ "teradata": "SELECT '2023-01-01' + (90 * INTERVAL '5' DAY)", "snowflake": "SELECT DATEADD(QUARTER, 5, '2023-01-01')", }, ) self.validate_all( "SELECT '2023-01-01' + (7 * INTERVAL '5' DAY)", read={ "teradata": "SELECT '2023-01-01' + (7 * INTERVAL '5' DAY)", "snowflake": "SELECT DATEADD(WEEK, 5, '2023-01-01')", }, ) self.validate_all( "CAST(TO_CHAR(x, 'Q') AS INT)", read={ "teradata": "CAST(TO_CHAR(x, 'Q') AS INT)", "snowflake": "DATE_PART(QUARTER, x)", "bigquery": "EXTRACT(QUARTER FROM x)", }, ) self.validate_all( "EXTRACT(MONTH FROM x)", read={ "teradata": "EXTRACT(MONTH FROM x)", "snowflake": "DATE_PART(MONTH, x)", "bigquery": "EXTRACT(MONTH FROM x)", }, ) self.validate_all( "CAST(TO_CHAR(x, 'Q') AS INT)", read={ "snowflake": "quarter(x)", "teradata": "CAST(TO_CHAR(x, 'Q') AS INT)", }, ) def test_query_band(self): self.validate_identity("SET QUERY_BAND = 'app=myapp;' FOR SESSION") self.validate_identity("SET QUERY_BAND = 'app=myapp;user=john;' FOR TRANSACTION") self.validate_identity("SET QUERY_BAND = 'priority=high;' UPDATE FOR SESSION") self.validate_identity("SET QUERY_BAND = 'workload=batch;' UPDATE FOR TRANSACTION") self.validate_identity("SET QUERY_BAND = 'org=Finance;report=Fin123;' FOR SESSION") self.validate_identity("SET QUERY_BAND = NONE FOR SESSION") self.validate_identity("SET QUERY_BAND = NONE FOR SESSION VOLATILE") self.validate_identity("SET QUERY_BAND = 'priority=high;' UPDATE FOR SESSION VOLATILE") self.validate_identity( "SET QUERY_BAND = 'NONE' FOR SESSION" ) # quoted NONE should remain quoted self.validate_identity("SET QUERY_BAND = '' FOR SESSION") ================================================ FILE: tests/dialects/test_trino.py ================================================ from tests.dialects.test_dialect import Validator class TestTrino(Validator): dialect = "trino" def test_trino(self): self.validate_identity("REFRESH MATERIALIZED VIEW mynamespace.test_view") self.validate_identity("JSON_QUERY(m.properties, 'lax $.area' OMIT QUOTES NULL ON ERROR)") self.validate_identity("JSON_EXTRACT(content, json_path)") self.validate_identity("JSON_QUERY(content, 'lax $.HY.*')") self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITH WRAPPER)") self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITH ARRAY WRAPPER)") self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITH UNCONDITIONAL WRAPPER)") self.validate_identity("JSON_QUERY(content, 'strict $.HY.*' WITHOUT CONDITIONAL WRAPPER)") self.validate_identity("JSON_QUERY(description, 'strict $.comment' KEEP QUOTES)") self.validate_identity( "JSON_QUERY(description, 'strict $.comment' OMIT QUOTES ON SCALAR STRING)" ) self.validate_identity( "JSON_QUERY(content, 'strict $.HY.*' WITH UNCONDITIONAL WRAPPER KEEP QUOTES)" ) self.validate_identity( "SELECT TIMESTAMP '2012-10-31 01:00 -2'", "SELECT CAST('2012-10-31 01:00 -2' AS TIMESTAMP WITH TIME ZONE)", ) self.validate_identity( "SELECT TIMESTAMP '2012-10-31 01:00 +2'", "SELECT CAST('2012-10-31 01:00 +2' AS TIMESTAMP WITH TIME ZONE)", ) self.validate_all( "SELECT TIMESTAMP '2012-10-31 01:00:00 +02:00'", write={ "duckdb": "SELECT CAST('2012-10-31 01:00:00 +02:00' AS TIMESTAMPTZ)", "trino": "SELECT CAST('2012-10-31 01:00:00 +02:00' AS TIMESTAMP WITH TIME ZONE)", }, ) self.validate_all( "SELECT FORMAT('%s', 123)", write={ "duckdb": "SELECT FORMAT('{}', 123)", "snowflake": "SELECT TO_CHAR(123)", "trino": "SELECT FORMAT('%s', 123)", }, ) self.validate_identity( "SELECT * FROM tbl MATCH_RECOGNIZE (PARTITION BY id ORDER BY col MEASURES FIRST(col, 2) AS col1, LAST(col, 2) AS col2 PATTERN (B* A) DEFINE A AS col = 1)" ) self.validate_identity("SELECT VERSION()") def test_listagg(self): self.validate_identity( "SELECT LISTAGG(DISTINCT col, ',') WITHIN GROUP (ORDER BY col ASC) FROM tbl" ) self.validate_identity( "SELECT LISTAGG(col, '; ' ON OVERFLOW ERROR) WITHIN GROUP (ORDER BY col ASC) FROM tbl" ) self.validate_identity( "SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE WITH COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl" ) self.validate_identity( "SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE WITHOUT COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl" ) self.validate_identity( "SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE '...' WITH COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl" ) self.validate_identity( "SELECT LISTAGG(col, '; ' ON OVERFLOW TRUNCATE '...' WITHOUT COUNT) WITHIN GROUP (ORDER BY col ASC) FROM tbl" ) self.validate_identity( "SELECT LISTAGG(col) WITHIN GROUP (ORDER BY col DESC) FROM tbl", "SELECT LISTAGG(col, ',') WITHIN GROUP (ORDER BY col DESC) FROM tbl", ) def test_trim(self): self.validate_identity("SELECT TRIM('!' FROM '!foo!')") self.validate_identity("SELECT TRIM(BOTH '$' FROM '$var$')") self.validate_identity("SELECT TRIM(TRAILING 'ER' FROM UPPER('worker'))") self.validate_identity( "SELECT TRIM(LEADING FROM ' abcd')", "SELECT LTRIM(' abcd')", ) self.validate_identity( "SELECT TRIM('!foo!', '!')", "SELECT TRIM('!' FROM '!foo!')", ) def test_ddl(self): self.validate_identity("ALTER TABLE users RENAME TO people") self.validate_identity("ALTER TABLE IF EXISTS users RENAME TO people") self.validate_identity("ALTER TABLE users ADD COLUMN zip VARCHAR") self.validate_identity("ALTER TABLE IF EXISTS users ADD COLUMN IF NOT EXISTS zip VARCHAR") self.validate_identity("ALTER TABLE users DROP COLUMN zip") self.validate_identity("ALTER TABLE IF EXISTS users DROP COLUMN IF EXISTS zip") self.validate_identity("ALTER TABLE users RENAME COLUMN id TO user_id") self.validate_identity("ALTER TABLE IF EXISTS users RENAME COLUMN IF EXISTS id TO user_id") self.validate_identity("ALTER TABLE users ALTER COLUMN id SET DATA TYPE BIGINT") self.validate_identity("ALTER TABLE users ALTER COLUMN id DROP NOT NULL") self.validate_identity( "ALTER TABLE people SET AUTHORIZATION alice", check_command_warning=True ) self.validate_identity( "ALTER TABLE people SET AUTHORIZATION ROLE PUBLIC", check_command_warning=True ) self.validate_identity( "ALTER TABLE people SET PROPERTIES x = 'y'", check_command_warning=True ) self.validate_identity( "ALTER TABLE people SET PROPERTIES foo = 123, 'foo bar' = 456", check_command_warning=True, ) self.validate_identity( "ALTER TABLE people SET PROPERTIES x = DEFAULT", check_command_warning=True ) self.validate_identity("ALTER VIEW people RENAME TO users") self.validate_identity( "ALTER VIEW people SET AUTHORIZATION alice", check_command_warning=True ) self.validate_identity("CREATE SCHEMA foo WITH (LOCATION='s3://bucket/foo')") self.validate_identity( "CREATE TABLE foo.bar WITH (LOCATION='s3://bucket/foo/bar') AS SELECT 1" ) # Hive connector syntax (partitioned_by) self.validate_identity( "CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONED_BY=ARRAY['a', 'b'])" ) self.validate_identity( 'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONED_BY=ARRAY[\'a\', \'b\'])', identify=True, ) # Iceberg connector syntax (partitioning, can contain Iceberg transform expressions) self.validate_identity( "CREATE TABLE foo (a VARCHAR, b INTEGER, c DATE) WITH (PARTITIONING=ARRAY['a', 'bucket(4, b)', 'month(c)'])", ) self.validate_identity( 'CREATE TABLE "foo" ("a" VARCHAR, "b" INTEGER, "c" DATE) WITH (PARTITIONING=ARRAY[\'a\', \'bucket(4, b)\', \'month(c)\'])', identify=True, ) def test_analyze(self): self.validate_identity("ANALYZE tbl") self.validate_identity("ANALYZE tbl WITH (prop1=val1, prop2=val2)") def test_json_value(self): self.validate_identity( "JSON_VALUE(jl.extra_attributes, 'lax $.amount_source' RETURNING VARCHAR)" ) json_doc = """'{"item": "shoes", "price": "49.95"}'""" self.validate_identity(f"""SELECT JSON_VALUE({json_doc}, 'strict $.price')""") self.validate_identity( f"""SELECT JSON_VALUE({json_doc}, 'lax $.price' RETURNING DECIMAL(4, 2))""" ) for on_option in ("NULL", "ERROR", "DEFAULT 1"): self.validate_identity( f"""SELECT JSON_VALUE({json_doc}, 'lax $.price' RETURNING DECIMAL(4, 2) {on_option} ON EMPTY {on_option} ON ERROR) AS price""" ) def test_array_first(self): self.validate_identity("SELECT ARRAY_FIRST(ARRAY['a', 'b']) FROM tbl") self.validate_identity("SELECT ARRAY_FIRST(ARRAY['a', 'b'], x -> x = 'b') FROM tbl") ================================================ FILE: tests/dialects/test_tsql.py ================================================ from sqlglot import exp, parse_one from sqlglot.errors import ParseError, UnsupportedError from sqlglot.optimizer.annotate_types import annotate_types from tests.dialects.test_dialect import Validator class TestTSQL(Validator): dialect = "tsql" def test_tsql(self): self.validate_all( "WITH x AS (SELECT 1 AS [1]) SELECT TOP 0 * FROM (SELECT * FROM x UNION SELECT * FROM x) AS _l_0 ORDER BY 1", read={ "": "WITH x AS (SELECT 1) SELECT * FROM x UNION SELECT * FROM x ORDER BY 1 LIMIT 0", }, ) # https://learn.microsoft.com/en-us/previous-versions/sql/sql-server-2008-r2/ms187879(v=sql.105)?redirectedfrom=MSDN # tsql allows .. which means use the default schema self.validate_identity("SELECT * FROM a..b") self.validate_identity("SELECT ATN2(x, y)") self.validate_identity("SELECT EXP(1)") self.validate_identity("SELECT SYSDATETIMEOFFSET()") self.validate_identity("SELECT COMPRESS('Hello World')") self.validate_identity("GO").assert_is(exp.Command) self.validate_identity("SELECT go").selects[0].assert_is(exp.Column) self.validate_identity("CREATE view a.b.c", "CREATE VIEW b.c") self.validate_identity("DROP view a.b.c", "DROP VIEW b.c") self.validate_identity("ROUND(x, 1, 0)") self.validate_identity( "EXEC MyProc @id = 7, @name = 'Lochristi'", "EXECUTE MyProc @id = 7, @name = 'Lochristi'", ) self.validate_identity("SELECT TRIM(' test ') AS Result") self.validate_identity("SELECT TRIM('.,! ' FROM ' # test .') AS Result") self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT)") self.validate_identity("SELECT * FROM t TABLESAMPLE (20 ROWS)") self.validate_identity("SELECT * FROM t TABLESAMPLE (10 PERCENT) REPEATABLE (123)") self.validate_identity("SELECT CONCAT(column1, column2)") self.validate_identity("SELECT TestSpecialChar.Test# FROM TestSpecialChar") self.validate_identity("SELECT TestSpecialChar.Test@ FROM TestSpecialChar") self.validate_identity("SELECT TestSpecialChar.Test$ FROM TestSpecialChar") self.validate_identity("SELECT TestSpecialChar.Test_ FROM TestSpecialChar") self.validate_identity("SELECT TOP (2 + 1) 1") self.validate_identity("SELECT * FROM t WHERE NOT c", "SELECT * FROM t WHERE NOT c <> 0") self.validate_identity("1 AND true", "1 <> 0 AND (1 = 1)") self.validate_identity("CAST(x AS int) OR y", "CAST(x AS INTEGER) <> 0 OR y <> 0") self.validate_identity("TRUNCATE TABLE t1 WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))") self.validate_identity( "WITH t1 AS (SELECT 1 AS a), t2 AS (SELECT 1 AS a) SELECT TOP 10 a FROM t1 UNION ALL SELECT TOP 10 a FROM t2" ) self.validate_identity( "SELECT TOP 10 s.RECORDID, n.c.VALUE('(/*:FORM_ROOT/*:SOME_TAG)[1]', 'float') AS SOME_TAG_VALUE FROM source_table.dbo.source_data AS s(nolock) CROSS APPLY FormContent.nodes('/*:FORM_ROOT') AS N(C)" ) self.validate_identity( "CREATE CLUSTERED INDEX [IX_OfficeTagDetail_TagDetailID] ON [dbo].[OfficeTagDetail]([TagDetailID] ASC)" ) self.validate_identity( "CREATE INDEX [x] ON [y]([z] ASC) WITH (allow_page_locks=on) ON X([y])" ) self.validate_identity( "CREATE INDEX [x] ON [y]([z] ASC) WITH (allow_page_locks=on) ON PRIMARY" ) self.validate_identity( "COPY INTO test_1 FROM 'path' WITH (FORMAT_NAME = test, FILE_TYPE = 'CSV', CREDENTIAL = (IDENTITY='Shared Access Signature', SECRET='token'), FIELDTERMINATOR = ';', ROWTERMINATOR = '0X0A', ENCODING = 'UTF8', DATEFORMAT = 'ymd', MAXERRORS = 10, ERRORFILE = 'errorsfolder', IDENTITY_INSERT = 'ON')" ) self.validate_identity( "WITH t1 AS (SELECT 1 AS a), t2 AS (SELECT 1 AS a) SELECT TOP 10 a FROM t1 UNION ALL SELECT TOP 10 a FROM t2 ORDER BY a DESC" ) self.validate_identity( "WITH t1 AS (SELECT 1 AS a), t2 AS (SELECT 1 AS a) SELECT COUNT(*) FROM (SELECT TOP 10 a FROM t1 UNION ALL SELECT TOP 10 a FROM t2 ORDER BY a DESC) AS t" ) self.validate_identity( 'SELECT 1 AS "[x]"', "SELECT 1 AS [[x]]]", ) self.validate_identity( "INSERT INTO foo.bar WITH cte AS (SELECT 1 AS one) SELECT * FROM cte", "WITH cte AS (SELECT 1 AS one) INSERT INTO foo.bar SELECT * FROM cte", ) self.assertEqual( annotate_types(self.validate_identity("SELECT 1 WHERE EXISTS(SELECT 1)")).sql("tsql"), "SELECT 1 WHERE EXISTS(SELECT 1)", ) self.validate_all( "CREATE TABLE test_table([ID] [BIGINT] NOT NULL,[EffectiveFrom] [DATETIME2] (3) NOT NULL)", write={ "spark": "CREATE TABLE test_table (`ID` BIGINT NOT NULL, `EffectiveFrom` TIMESTAMP NOT NULL)", "tsql": "CREATE TABLE test_table ([ID] BIGINT NOT NULL, [EffectiveFrom] DATETIME2(3) NOT NULL)", }, ) self.validate_all( "SELECT CONVERT(DATETIME, '2006-04-25T15:50:59.997', 126)", write={ "duckdb": "SELECT STRPTIME('2006-04-25T15:50:59.997', '%Y-%m-%dT%H:%M:%S.%f')", "tsql": "SELECT CONVERT(DATETIME, '2006-04-25T15:50:59.997', 126)", }, ) self.validate_all( "WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * INTO TEMP_NESTED_WITH FROM (SELECT * FROM C) AS temp", read={ "snowflake": "CREATE TABLE TEMP_NESTED_WITH AS WITH C AS (WITH A AS (SELECT 2 AS value) SELECT * FROM A) SELECT * FROM C", "tsql": "WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * INTO TEMP_NESTED_WITH FROM (SELECT * FROM C) AS temp", }, write={ "snowflake": "CREATE TABLE TEMP_NESTED_WITH AS WITH A AS (SELECT 2 AS value), C AS (SELECT * FROM A) SELECT * FROM (SELECT * FROM C) AS temp", }, ) self.validate_all( "SELECT IIF(cond <> 0, 'True', 'False')", read={ "spark": "SELECT IF(cond, 'True', 'False')", "sqlite": "SELECT IIF(cond, 'True', 'False')", "tsql": "SELECT IIF(cond <> 0, 'True', 'False')", }, ) self.validate_all( "SELECT TRIM(BOTH 'a' FROM a)", read={ "mysql": "SELECT TRIM(BOTH 'a' FROM a)", }, write={ "mysql": "SELECT TRIM(BOTH 'a' FROM a)", "tsql": "SELECT TRIM(BOTH 'a' FROM a)", }, ) self.validate_all( "SELECT TIMEFROMPARTS(23, 59, 59, 0, 0)", read={ "duckdb": "SELECT MAKE_TIME(23, 59, 59)", "mysql": "SELECT MAKETIME(23, 59, 59)", "postgres": "SELECT MAKE_TIME(23, 59, 59)", "snowflake": "SELECT TIME_FROM_PARTS(23, 59, 59)", }, write={ "tsql": "SELECT TIMEFROMPARTS(23, 59, 59, 0, 0)", }, ) self.validate_all( "SELECT DATETIMEFROMPARTS(2013, 4, 5, 12, 00, 00, 0)", read={ # The nanoseconds are ignored since T-SQL doesn't support that precision "snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00, 987654321)" }, write={ "duckdb": "SELECT MAKE_TIMESTAMP(2013, 4, 5, 12, 00, 00 + (0 / 1000.0))", "snowflake": "SELECT TIMESTAMP_FROM_PARTS(2013, 4, 5, 12, 00, 00, 0 * 1000000)", "tsql": "SELECT DATETIMEFROMPARTS(2013, 4, 5, 12, 00, 00, 0)", }, ) self.validate_all( "SELECT TOP 1 * FROM (SELECT x FROM t1 UNION ALL SELECT x FROM t2) AS _l_0", read={ "": "SELECT x FROM t1 UNION ALL SELECT x FROM t2 LIMIT 1", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp", read={ "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT c FROM t", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp", write={ "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", "postgres": "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) AS temp", "oracle": "WITH t(c) AS (SELECT 1) SELECT * INTO foo FROM (SELECT c AS c FROM t) temp", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED #foo FROM (SELECT c AS c FROM t) AS temp", write={ "duckdb": "CREATE TEMPORARY TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", "postgres": "WITH t(c) AS (SELECT 1) SELECT * INTO TEMPORARY foo FROM (SELECT c AS c FROM t) AS temp", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT c INTO #foo FROM t", read={ "tsql": "WITH t(c) AS (SELECT 1) SELECT c INTO #foo FROM t", "postgres": "WITH t(c) AS (SELECT 1) SELECT c INTO TEMPORARY foo FROM t", }, write={ "tsql": "WITH t(c) AS (SELECT 1) SELECT c INTO #foo FROM t", "postgres": "WITH t(c) AS (SELECT 1) SELECT c INTO TEMPORARY foo FROM t", "duckdb": "CREATE TEMPORARY TABLE foo AS WITH t(c) AS (SELECT 1) SELECT c FROM t", "snowflake": "CREATE TEMPORARY TABLE foo AS WITH t(c) AS (SELECT 1) SELECT c FROM t", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp", write={ "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT * INTO UNLOGGED foo FROM (SELECT c AS c FROM t) AS temp", write={ "duckdb": "CREATE TABLE foo AS WITH t(c) AS (SELECT 1) SELECT * FROM (SELECT c AS c FROM t) AS temp", }, ) self.validate_all( "WITH y AS (SELECT 2 AS c) INSERT INTO #t SELECT * FROM y", write={ "duckdb": "WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y", "postgres": "WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y", }, ) self.validate_all( "WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y", read={ "duckdb": "WITH y AS (SELECT 2 AS c) INSERT INTO t SELECT * FROM y", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) SELECT 1 AS c UNION (SELECT c FROM t)", read={ "duckdb": "SELECT 1 AS c UNION (WITH t(c) AS (SELECT 1) SELECT c FROM t)", }, ) self.validate_all( "WITH t(c) AS (SELECT 1) MERGE INTO x AS z USING (SELECT c AS c FROM t) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b", read={ "postgres": "MERGE INTO x AS z USING (WITH t(c) AS (SELECT 1) SELECT c FROM t) AS y ON a = b WHEN MATCHED THEN UPDATE SET a = y.b", }, ) self.validate_all( "WITH t(n) AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT * FROM (SELECT SUM(n) AS s4 FROM t) AS subq", read={ "duckdb": "SELECT * FROM (WITH RECURSIVE t(n) AS (SELECT 1 AS n UNION ALL SELECT n + 1 AS n FROM t WHERE n < 4) SELECT SUM(n) AS s4 FROM t) AS subq", }, ) self.validate_all( "CREATE TABLE #mytemptable (a INTEGER)", read={ "duckdb": "CREATE TEMPORARY TABLE mytemptable (a INT)", }, write={ "tsql": "CREATE TABLE #mytemptable (a INTEGER)", "snowflake": "CREATE TEMPORARY TABLE mytemptable (a INT)", "duckdb": "CREATE TEMPORARY TABLE mytemptable (a INT)", "oracle": "CREATE GLOBAL TEMPORARY TABLE mytemptable (a INT)", "hive": "CREATE TEMPORARY TABLE mytemptable (a INT)", "spark2": "CREATE TEMPORARY TABLE mytemptable (a INT) USING PARQUET", "spark": "CREATE TEMPORARY TABLE mytemptable (a INT) USING PARQUET", "databricks": "CREATE TEMPORARY TABLE mytemptable (a INT) USING PARQUET", }, ) self.validate_all( "CREATE TABLE #mytemp (a INTEGER, b CHAR(2), c TIME(4), d FLOAT(24))", write={ "spark": "CREATE TEMPORARY TABLE mytemp (a INT, b CHAR(2), c TIMESTAMP, d FLOAT) USING PARQUET", "tsql": "CREATE TABLE #mytemp (a INTEGER, b CHAR(2), c TIME(4), d FLOAT(24))", }, ) self.validate_all( """CREATE TABLE [dbo].[mytable]( [email] [varchar](255) NOT NULL, CONSTRAINT [UN_t_mytable] UNIQUE NONCLUSTERED ( [email] ASC ) )""", write={ "hive": "CREATE TABLE `dbo`.`mytable` (`email` VARCHAR(255) NOT NULL)", "spark2": "CREATE TABLE `dbo`.`mytable` (`email` VARCHAR(255) NOT NULL)", "spark": "CREATE TABLE `dbo`.`mytable` (`email` VARCHAR(255) NOT NULL)", "databricks": "CREATE TABLE `dbo`.`mytable` (`email` VARCHAR(255) NOT NULL)", }, ) self.validate_all( "CREATE TABLE x ( A INTEGER NOT NULL, B INTEGER NULL )", write={ "tsql": "CREATE TABLE x (A INTEGER NOT NULL, B INTEGER NULL)", "hive": "CREATE TABLE x (A INT NOT NULL, B INT)", }, ) self.validate_identity( 'CREATE TABLE x (CONSTRAINT "pk_mytable" UNIQUE NONCLUSTERED (a DESC)) ON b (c)', "CREATE TABLE x (CONSTRAINT [pk_mytable] UNIQUE NONCLUSTERED (a DESC)) ON b (c)", ) self.validate_all( """CREATE TABLE x ([zip_cd] VARCHAR(5) NULL NOT FOR REPLICATION, [zip_cd_mkey] VARCHAR(5) NOT NULL, CONSTRAINT [pk_mytable] PRIMARY KEY CLUSTERED ([zip_cd_mkey] ASC) WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF) ON [INDEX]) ON [SECONDARY]""", write={ "tsql": "CREATE TABLE x ([zip_cd] VARCHAR(5) NULL NOT FOR REPLICATION, [zip_cd_mkey] VARCHAR(5) NOT NULL, CONSTRAINT [pk_mytable] PRIMARY KEY CLUSTERED ([zip_cd_mkey] ASC) WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF) ON [INDEX]) ON [SECONDARY]", "spark2": "CREATE TABLE x (`zip_cd` VARCHAR(5), `zip_cd_mkey` VARCHAR(5) NOT NULL, CONSTRAINT `pk_mytable` PRIMARY KEY (`zip_cd_mkey`))", }, ) self.validate_identity("CREATE TABLE x (A INTEGER NOT NULL, B INTEGER NULL)") self.validate_all( "CREATE TABLE x ( A INTEGER NOT NULL, B INTEGER NULL )", write={ "hive": "CREATE TABLE x (A INT NOT NULL, B INT)", }, ) self.validate_identity( "CREATE TABLE tbl (a AS (x + 1) PERSISTED, b AS (y + 2), c AS (y / 3) PERSISTED NOT NULL)" ) self.validate_identity( "CREATE TABLE [db].[tbl]([a] [int])", "CREATE TABLE [db].[tbl] ([a] INTEGER)", ) self.validate_identity("SELECT a = 1", "SELECT 1 AS a").selects[0].assert_is( exp.Alias ).args["alias"].assert_is(exp.Identifier) self.validate_all( "IF OBJECT_ID('tempdb.dbo.#TempTableName', 'U') IS NOT NULL BEGIN DROP TABLE #TempTableName; END", write={ "tsql": "IF NOT OBJECT_ID('tempdb.dbo.#TempTableName', 'U') IS NULL BEGIN DROP TABLE #TempTableName; END", "spark": "DROP TABLE IF EXISTS TempTableName", }, ) self.validate_all( "IF OBJECT_ID('tempdb.dbo.#TempTableName') IS NOT NULL BEGIN DROP TABLE #TempTableName; END", write={ "tsql": "IF NOT OBJECT_ID('tempdb.dbo.#TempTableName') IS NULL BEGIN DROP TABLE #TempTableName; END", "spark": "DROP TABLE IF EXISTS TempTableName", }, ) self.validate_identity( "MERGE INTO mytable WITH (HOLDLOCK) AS T USING mytable_merge AS S " "ON (T.user_id = S.user_id) WHEN NOT MATCHED THEN INSERT (c1, c2) VALUES (S.c1, S.c2)" ) self.validate_identity("UPDATE STATISTICS x", check_command_warning=True) self.validate_identity("UPDATE x SET y = 1 OUTPUT x.a, x.b INTO @y FROM y") self.validate_identity("UPDATE x SET y = 1 OUTPUT x.a, x.b FROM y") self.validate_identity("INSERT INTO x (y) OUTPUT x.a, x.b INTO l SELECT * FROM z") self.validate_identity("INSERT INTO x (y) OUTPUT x.a, x.b SELECT * FROM z") self.validate_identity("DELETE x OUTPUT x.a FROM z") self.validate_identity("SELECT * FROM t WITH (TABLOCK, INDEX(myindex))") self.validate_identity("SELECT * FROM t WITH (NOWAIT)") self.validate_identity("SELECT CASE WHEN a > 1 THEN b END") self.validate_identity("SELECT * FROM taxi ORDER BY 1 OFFSET 0 ROWS FETCH NEXT 3 ROWS ONLY") self.validate_identity("END") self.validate_identity("@x") self.validate_identity("#x") self.validate_identity("PRINT @TestVariable", check_command_warning=True) self.validate_identity("SELECT Employee_ID, Department_ID FROM @MyTableVar") self.validate_identity("INSERT INTO @TestTable VALUES (1, 'Value1', 12, 20)") self.validate_identity("SELECT * FROM #foo") self.validate_identity("SELECT * FROM ##foo") self.validate_identity("SELECT a = 1", "SELECT 1 AS a") self.validate_identity( "DECLARE @TestVariable AS VARCHAR(100) = 'Save Our Planet'", "DECLARE @TestVariable VARCHAR(100) = 'Save Our Planet'", ) self.validate_identity( "SELECT a = 1 UNION ALL SELECT a = b", "SELECT 1 AS a UNION ALL SELECT b AS a" ) self.validate_identity( "SELECT x FROM @MyTableVar AS m JOIN Employee ON m.EmployeeID = Employee.EmployeeID" ) self.validate_identity( "SELECT DISTINCT DepartmentName, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY BaseRate) OVER (PARTITION BY DepartmentName) AS MedianCont FROM dbo.DimEmployee" ) self.validate_identity( 'SELECT "x"."y" FROM foo', "SELECT [x].[y] FROM foo", ) self.validate_all( "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS", read={ "postgres": "SELECT * FROM t OFFSET 2", }, write={ "postgres": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST OFFSET 2", "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 2 ROWS", }, ) self.validate_all( "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", read={ "duckdb": "SELECT * FROM t LIMIT 10 OFFSET 5", "sqlite": "SELECT * FROM t LIMIT 5, 10", "tsql": "SELECT * FROM t ORDER BY (SELECT NULL) OFFSET 5 ROWS FETCH FIRST 10 ROWS ONLY", }, write={ "duckdb": "SELECT * FROM t ORDER BY (SELECT NULL) NULLS FIRST LIMIT 10 OFFSET 5", "sqlite": "SELECT * FROM t ORDER BY (SELECT NULL) LIMIT 10 OFFSET 5", }, ) self.validate_all( "SELECT CAST([a].[b] AS SMALLINT) FROM foo", write={ "tsql": "SELECT CAST([a].[b] AS SMALLINT) FROM foo", "spark": "SELECT CAST(`a`.`b` AS SMALLINT) FROM foo", }, ) self.validate_all( "CONVERT(INT, CONVERT(NUMERIC, '444.75'))", write={ "mysql": "CAST(CAST('444.75' AS DECIMAL) AS SIGNED)", "tsql": "CONVERT(INTEGER, CONVERT(NUMERIC, '444.75'))", }, ) self.validate_all( "STRING_AGG(x, y) WITHIN GROUP (ORDER BY z DESC)", write={ "tsql": "STRING_AGG(x, y) WITHIN GROUP (ORDER BY z DESC)", "mysql": "GROUP_CONCAT(x ORDER BY z DESC SEPARATOR y)", "sqlite": "GROUP_CONCAT(x, y)", "postgres": "STRING_AGG(x, y ORDER BY z DESC NULLS LAST)", }, ) self.validate_all( "STRING_AGG(x, '|') WITHIN GROUP (ORDER BY z ASC)", write={ "tsql": "STRING_AGG(x, '|') WITHIN GROUP (ORDER BY z ASC)", "mysql": "GROUP_CONCAT(x ORDER BY z ASC SEPARATOR '|')", "sqlite": "GROUP_CONCAT(x, '|')", "postgres": "STRING_AGG(x, '|' ORDER BY z ASC NULLS FIRST)", }, ) self.validate_all( "STRING_AGG(x, '|')", write={ "tsql": "STRING_AGG(x, '|')", "mysql": "GROUP_CONCAT(x SEPARATOR '|')", "sqlite": "GROUP_CONCAT(x, '|')", "postgres": "STRING_AGG(x, '|')", }, ) self.validate_all( "HASHBYTES('SHA1', x)", read={ "snowflake": "SHA1(x)", "spark": "SHA(x)", }, write={ "snowflake": "SHA1(x)", "spark": "SHA(x)", "tsql": "HASHBYTES('SHA1', x)", }, ) self.validate_all( "HASHBYTES('SHA2_256', x)", read={ "spark": "SHA2(x, 256)", }, write={ "tsql": "HASHBYTES('SHA2_256', x)", "spark": "SHA2(x, 256)", }, ) self.validate_all( "HASHBYTES('SHA2_512', x)", read={ "spark": "SHA2(x, 512)", }, write={ "tsql": "HASHBYTES('SHA2_512', x)", "spark": "SHA2(x, 512)", }, ) self.validate_all( "HASHBYTES('MD5', 'x')", read={ "spark": "MD5('x')", }, write={ "tsql": "HASHBYTES('MD5', 'x')", "spark": "MD5('x')", }, ) self.validate_identity("HASHBYTES('MD2', 'x')") self.validate_identity("LOG(n)") self.validate_identity("LOG(n, b)") self.validate_all( "STDEV(x)", read={ "": "STDDEV(x)", }, write={ "": "STDDEV(x)", "tsql": "STDEV(x)", }, ) # Check that TRUE and FALSE dont get expanded to (1=1) or (1=0) when used in a VALUES expression self.validate_identity( "SELECT val FROM (VALUES ((TRUE), (FALSE), (NULL))) AS t(val)", write_sql="SELECT val FROM (VALUES ((1), (0), (NULL))) AS t(val)", ) self.validate_identity("'a' + 'b'") self.validate_identity( "'a' || 'b'", "'a' + 'b'", ) self.validate_identity( "CREATE TABLE db.t1 (a INTEGER, b VARCHAR(50), CONSTRAINT c PRIMARY KEY (a DESC))", ) self.validate_identity( "CREATE TABLE db.t1 (a INTEGER, b INTEGER, CONSTRAINT c PRIMARY KEY (a DESC, b))" ) self.validate_all( "SCHEMA_NAME(id)", write={ "sqlite": "'main'", "mysql": "SCHEMA()", "postgres": "CURRENT_SCHEMA", "tsql": "SCHEMA_NAME(id)", }, ) with self.assertRaises(ParseError): parse_one("SELECT begin", read="tsql") self.validate_identity("CREATE PROCEDURE test(@v1 INTEGER = 1, @v2 CHAR(1) = 'c')") self.validate_identity( "DECLARE @v1 AS INTEGER = 1, @v2 AS CHAR(1) = 'c'", "DECLARE @v1 INTEGER = 1, @v2 CHAR(1) = 'c'", ) for output in ("OUT", "OUTPUT", "READONLY"): self.validate_identity( f"CREATE PROCEDURE test(@v1 INTEGER = 1 {output}, @v2 CHAR(1) {output})" ) self.validate_identity( "CREATE PROCEDURE test(@v1 AS INTEGER = 1, @v2 AS CHAR(1) = 'c')", "CREATE PROCEDURE test(@v1 INTEGER = 1, @v2 CHAR(1) = 'c')", ) for order_by in ("", " ORDER BY c"): for json_clause in ("", " NULL ON NULL", " ABSENT ON NULL"): with self.subTest(f"Testing JSON_ARRAYAGG with options: {order_by}, {json_clause}"): self.validate_identity(f"JSON_ARRAYAGG(c{order_by}{json_clause})") self.validate_all( "JSON_ARRAYAGG(c1 ORDER BY c1)", write={ "tsql": "JSON_ARRAYAGG(c1 ORDER BY c1)", "postgres": "JSON_AGG(c1 ORDER BY c1 NULLS FIRST)", }, ) self.validate_identity("CEILING(2)") self.validate_identity("OBJECT_ID('foo')") self.validate_identity("OBJECT_ID('foo', 'U')") def test_option(self): possible_options = [ "HASH GROUP", "ORDER GROUP", "CONCAT UNION", "HASH UNION", "MERGE UNION", "LOOP JOIN", "MERGE JOIN", "HASH JOIN", "DISABLE_OPTIMIZED_PLAN_FORCING", "EXPAND VIEWS", "FAST 15", "FORCE ORDER", "FORCE EXTERNALPUSHDOWN", "DISABLE EXTERNALPUSHDOWN", "FORCE SCALEOUTEXECUTION", "DISABLE SCALEOUTEXECUTION", "IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX", "KEEP PLAN", "KEEPFIXED PLAN", "MAX_GRANT_PERCENT = 5", "MIN_GRANT_PERCENT = 10", "MAXDOP 13", "MAXRECURSION 8", "NO_PERFORMANCE_SPOOL", "OPTIMIZE FOR UNKNOWN", "PARAMETERIZATION SIMPLE", "PARAMETERIZATION FORCED", "QUERYTRACEON 99", "RECOMPILE", "ROBUST PLAN", "USE PLAN N''", "LABEL = 'MyLabel'", ] possible_statements = [ # These should be un-commented once support for the OPTION clause is added for DELETE, MERGE and UPDATE # "DELETE FROM Table1", # "MERGE INTO Locations AS T USING locations_stage AS S ON T.LocationID = S.LocationID WHEN MATCHED THEN UPDATE SET LocationName = S.LocationName", # "UPDATE Customers SET ContactName = 'Alfred Schmidt', City = 'Frankfurt' WHERE CustomerID = 1", "SELECT * FROM Table1", "SELECT * FROM Table1 WHERE id = 2", "UPDATE t1 SET k = t2.k FROM t2", ] for statement in possible_statements: for option in possible_options: query = f"{statement} OPTION({option})" result = self.validate_identity(query) options = result.args.get("options") self.assertIsInstance(options, list, f"When parsing query {query}") is_query_options = map(lambda o: isinstance(o, exp.QueryOption), options) self.assertTrue(all(is_query_options), f"When parsing query {query}") self.validate_identity( f"{statement} OPTION(RECOMPILE, USE PLAN N'', MAX_GRANT_PERCENT = 5)" ) raising_queries = [ # Missing parentheses "SELECT * FROM Table1 OPTION HASH GROUP", # Must be followed by 'PLAN" "SELECT * FROM Table1 OPTION(KEEPFIXED)", # Missing commas "SELECT * FROM Table1 OPTION(HASH GROUP HASH GROUP)", ] for query in raising_queries: with self.assertRaises(ParseError, msg=f"When running '{query}'"): self.parse_one(query) self.validate_all( "SELECT col FROM t OPTION(LABEL = 'foo')", write={ "tsql": "SELECT col FROM t OPTION(LABEL = 'foo')", "databricks": UnsupportedError, }, ) def test_for_xml(self): xml_possible_options = [ "RAW('ElementName')", "RAW('ElementName'), BINARY BASE64", "RAW('ElementName'), TYPE", "RAW('ElementName'), ROOT('RootName')", "RAW('ElementName'), BINARY BASE64, TYPE", "RAW('ElementName'), BINARY BASE64, ROOT('RootName')", "RAW('ElementName'), TYPE, ROOT('RootName')", "RAW('ElementName'), BINARY BASE64, TYPE, ROOT('RootName')", "RAW('ElementName'), XMLDATA", "RAW('ElementName'), XMLSCHEMA('TargetNameSpaceURI')", "RAW('ElementName'), XMLDATA, ELEMENTS XSINIL", "RAW('ElementName'), XMLSCHEMA('TargetNameSpaceURI'), ELEMENTS ABSENT", "RAW('ElementName'), XMLDATA, ELEMENTS ABSENT", "RAW('ElementName'), XMLSCHEMA('TargetNameSpaceURI'), ELEMENTS XSINIL", "AUTO", "AUTO, BINARY BASE64", "AUTO, TYPE", "AUTO, ROOT('RootName')", "AUTO, BINARY BASE64, TYPE", "AUTO, TYPE, ROOT('RootName')", "AUTO, BINARY BASE64, TYPE, ROOT('RootName')", "AUTO, XMLDATA", "AUTO, XMLSCHEMA('TargetNameSpaceURI')", "AUTO, XMLDATA, ELEMENTS XSINIL", "AUTO, XMLSCHEMA('TargetNameSpaceURI'), ELEMENTS ABSENT", "AUTO, XMLDATA, ELEMENTS ABSENT", "AUTO, XMLSCHEMA('TargetNameSpaceURI'), ELEMENTS XSINIL", "EXPLICIT", "EXPLICIT, BINARY BASE64", "EXPLICIT, TYPE", "EXPLICIT, ROOT('RootName')", "EXPLICIT, BINARY BASE64, TYPE", "EXPLICIT, TYPE, ROOT('RootName')", "EXPLICIT, BINARY BASE64, TYPE, ROOT('RootName')", "EXPLICIT, XMLDATA", "EXPLICIT, XMLDATA, BINARY BASE64", "EXPLICIT, XMLDATA, TYPE", "EXPLICIT, XMLDATA, ROOT('RootName')", "EXPLICIT, XMLDATA, BINARY BASE64, TYPE", "EXPLICIT, XMLDATA, BINARY BASE64, TYPE, ROOT('RootName')", "PATH('ElementName')", "PATH('ElementName'), BINARY BASE64", "PATH('ElementName'), TYPE", "PATH('ElementName'), ROOT('RootName')", "PATH('ElementName'), BINARY BASE64, TYPE", "PATH('ElementName'), TYPE, ROOT('RootName')", "PATH('ElementName'), BINARY BASE64, TYPE, ROOT('RootName')", "PATH('ElementName'), ELEMENTS XSINIL", "PATH('ElementName'), ELEMENTS ABSENT", "PATH('ElementName'), BINARY BASE64, ELEMENTS XSINIL", "PATH('ElementName'), TYPE, ELEMENTS ABSENT", "PATH('ElementName'), ROOT('RootName'), ELEMENTS XSINIL", "PATH('ElementName'), BINARY BASE64, TYPE, ROOT('RootName'), ELEMENTS ABSENT", ] for xml_option in xml_possible_options: with self.subTest(f"Testing FOR XML option: {xml_option}"): self.validate_identity(f"SELECT * FROM t FOR XML {xml_option}") self.validate_identity( "SELECT * FROM t FOR XML PATH, BINARY BASE64, ELEMENTS XSINIL", """SELECT * FROM t FOR XML PATH, BINARY BASE64, ELEMENTS XSINIL""", pretty=True, ) def test_types(self): self.validate_identity("CAST(x AS XML)") self.validate_identity("CAST(x AS UNIQUEIDENTIFIER)") self.validate_identity("CAST(x AS MONEY)") self.validate_identity("CAST(x AS SMALLMONEY)") self.validate_identity("CAST(x AS IMAGE)") self.validate_identity("CAST(x AS SQL_VARIANT)") self.validate_identity("CAST(x AS BIT)") self.validate_all( "CAST(x AS DATETIME2(6))", write={ "hive": "CAST(x AS TIMESTAMP)", }, ) self.validate_all( "CAST(x AS ROWVERSION)", read={ "tsql": "CAST(x AS TIMESTAMP)", }, write={ "tsql": "CAST(x AS ROWVERSION)", "hive": "CAST(x AS BINARY)", }, ) for temporal_type in ("SMALLDATETIME", "DATETIME", "DATETIME2"): self.validate_all( f"CAST(x AS {temporal_type})", read={ "": f"CAST(x AS {temporal_type})", }, write={ "mysql": "CAST(x AS DATETIME)", "duckdb": "CAST(x AS TIMESTAMP)", "tsql": f"CAST(x AS {temporal_type})", }, ) def test_types_ints(self): self.validate_all( "CAST(X AS INT)", write={ "hive": "CAST(X AS INT)", "spark2": "CAST(X AS INT)", "spark": "CAST(X AS INT)", "tsql": "CAST(X AS INTEGER)", }, ) self.validate_all( "CAST(X AS BIGINT)", write={ "hive": "CAST(X AS BIGINT)", "spark2": "CAST(X AS BIGINT)", "spark": "CAST(X AS BIGINT)", "tsql": "CAST(X AS BIGINT)", }, ) self.validate_all( "CAST(X AS SMALLINT)", write={ "hive": "CAST(X AS SMALLINT)", "spark2": "CAST(X AS SMALLINT)", "spark": "CAST(X AS SMALLINT)", "tsql": "CAST(X AS SMALLINT)", }, ) self.validate_all( "CAST(X AS TINYINT)", read={ "duckdb": "CAST(X AS UTINYINT)", }, write={ "duckdb": "CAST(X AS UTINYINT)", "hive": "CAST(X AS SMALLINT)", "spark2": "CAST(X AS SMALLINT)", "spark": "CAST(X AS SMALLINT)", "tsql": "CAST(X AS TINYINT)", }, ) def test_types_decimals(self): self.validate_all( "CAST(x as FLOAT)", write={ "spark": "CAST(x AS FLOAT)", "tsql": "CAST(x AS FLOAT)", }, ) self.validate_all( "CAST(x as FLOAT(32))", write={"tsql": "CAST(x AS FLOAT(32))", "hive": "CAST(x AS FLOAT)"}, ) self.validate_all( "CAST(x as FLOAT(64))", write={"tsql": "CAST(x AS FLOAT(64))", "spark": "CAST(x AS DOUBLE)"}, ) self.validate_all( "CAST(x as FLOAT(6))", write={"tsql": "CAST(x AS FLOAT(6))", "hive": "CAST(x AS FLOAT)"} ) self.validate_all( "CAST(x as FLOAT(36))", write={"tsql": "CAST(x AS FLOAT(36))", "hive": "CAST(x AS DOUBLE)"}, ) self.validate_all( "CAST(x as FLOAT(99))", write={"tsql": "CAST(x AS FLOAT(99))", "hive": "CAST(x AS DOUBLE)"}, ) self.validate_all( "CAST(x as DOUBLE)", write={ "spark": "CAST(x AS DOUBLE)", "tsql": "CAST(x AS FLOAT)", }, ) self.validate_all( "CAST(x as DECIMAL(15, 4))", write={ "spark": "CAST(x AS DECIMAL(15, 4))", "tsql": "CAST(x AS NUMERIC(15, 4))", }, ) self.validate_all( "CAST(x as NUMERIC(13,3))", write={ "spark": "CAST(x AS DECIMAL(13, 3))", "tsql": "CAST(x AS NUMERIC(13, 3))", }, ) self.validate_all( "CAST(x as MONEY)", write={ "spark": "CAST(x AS DECIMAL(15, 4))", "tsql": "CAST(x AS MONEY)", }, ) self.validate_all( "CAST(x as SMALLMONEY)", write={ "spark": "CAST(x AS DECIMAL(6, 4))", "tsql": "CAST(x AS SMALLMONEY)", }, ) self.validate_all( "CAST(x as REAL)", write={ "spark": "CAST(x AS FLOAT)", "tsql": "CAST(x AS FLOAT)", }, ) def test_types_string(self): self.validate_all( "CAST(x as CHAR(1))", write={ "spark": "CAST(x AS CHAR(1))", "tsql": "CAST(x AS CHAR(1))", }, ) self.validate_all( "CAST(x as VARCHAR(2))", write={ "spark": "CAST(x AS VARCHAR(2))", "tsql": "CAST(x AS VARCHAR(2))", }, ) self.validate_all( "CAST(x as NCHAR(1))", write={ "spark": "CAST(x AS CHAR(1))", "tsql": "CAST(x AS NCHAR(1))", }, ) self.validate_all( "CAST(x as NVARCHAR(2))", write={ "spark": "CAST(x AS VARCHAR(2))", "tsql": "CAST(x AS NVARCHAR(2))", }, ) self.validate_all( "CAST(x as UNIQUEIDENTIFIER)", write={ "spark": "CAST(x AS STRING)", "tsql": "CAST(x AS UNIQUEIDENTIFIER)", }, ) def test_types_date(self): self.validate_all( "CAST(x as DATE)", write={ "spark": "CAST(x AS DATE)", "tsql": "CAST(x AS DATE)", }, ) self.validate_all( "CAST(x as DATE)", write={ "spark": "CAST(x AS DATE)", "tsql": "CAST(x AS DATE)", }, ) self.validate_all( "CAST(x as TIME(4))", write={ "spark": "CAST(x AS TIMESTAMP)", "tsql": "CAST(x AS TIME(4))", }, ) self.validate_all( "CAST(x as DATETIME2)", write={ "spark": "CAST(x AS TIMESTAMP)", "tsql": "CAST(x AS DATETIME2)", }, ) self.validate_all( "CAST(x as DATETIMEOFFSET)", write={ "spark": "CAST(x AS TIMESTAMP)", "tsql": "CAST(x AS DATETIMEOFFSET)", }, ) self.validate_all( "CREATE TABLE t (col1 DATETIME2(2))", read={ "snowflake": "CREATE TABLE t (col1 TIMESTAMP_NTZ(2))", }, write={ "tsql": "CREATE TABLE t (col1 DATETIME2(2))", }, ) def test_types_bin(self): self.validate_all( "CAST(x as BIT)", write={ "spark": "CAST(x AS BOOLEAN)", "tsql": "CAST(x AS BIT)", }, ) self.validate_all( "CAST(x as VARBINARY)", write={ "spark": "CAST(x AS BINARY)", "tsql": "CAST(x AS VARBINARY)", }, ) self.validate_all( "CAST(x AS BOOLEAN)", write={"tsql": "CAST(x AS BIT)"}, ) self.validate_all("a = TRUE", write={"tsql": "a = 1"}) self.validate_all("a != FALSE", write={"tsql": "a <> 0"}) self.validate_all("a IS TRUE", write={"tsql": "a = 1"}) self.validate_all("a IS NOT FALSE", write={"tsql": "NOT a = 0"}) self.validate_all( "CASE WHEN a IN (TRUE) THEN 'y' ELSE 'n' END", write={"tsql": "CASE WHEN a IN (1) THEN 'y' ELSE 'n' END"}, ) self.validate_all( "CASE WHEN a NOT IN (FALSE) THEN 'y' ELSE 'n' END", write={"tsql": "CASE WHEN NOT a IN (0) THEN 'y' ELSE 'n' END"}, ) self.validate_all("SELECT TRUE, FALSE", write={"tsql": "SELECT 1, 0"}) self.validate_all("SELECT TRUE AS a, FALSE AS b", write={"tsql": "SELECT 1 AS a, 0 AS b"}) self.validate_all( "SELECT 1 FROM a WHERE TRUE", write={"tsql": "SELECT 1 FROM a WHERE (1 = 1)"} ) self.validate_all( "CASE WHEN TRUE THEN 'y' WHEN FALSE THEN 'n' ELSE NULL END", write={"tsql": "CASE WHEN (1 = 1) THEN 'y' WHEN (1 = 0) THEN 'n' ELSE NULL END"}, ) def test_ddl(self): for colstore in ("NONCLUSTERED COLUMNSTORE", "CLUSTERED COLUMNSTORE"): self.validate_identity(f"CREATE {colstore} INDEX index_name ON foo.bar") for view_attr in ("ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"): self.validate_identity(f"CREATE VIEW a.b WITH {view_attr} AS SELECT * FROM x") self.validate_identity("ALTER TABLE dbo.DocExe DROP CONSTRAINT FK_Column_B").assert_is( exp.Alter ).args["actions"][0].assert_is(exp.Drop) for clustered_keyword in ("CLUSTERED", "NONCLUSTERED"): self.validate_identity( 'CREATE TABLE "dbo"."benchmark" (' '"name" CHAR(7) NOT NULL, ' '"internal_id" VARCHAR(10) NOT NULL, ' f'UNIQUE {clustered_keyword} ("internal_id" ASC))', "CREATE TABLE [dbo].[benchmark] (" "[name] CHAR(7) NOT NULL, " "[internal_id] VARCHAR(10) NOT NULL, " f"UNIQUE {clustered_keyword} ([internal_id] ASC))", ) self.validate_identity("CREATE SCHEMA testSchema") self.validate_identity("CREATE VIEW t AS WITH cte AS (SELECT 1 AS c) SELECT c FROM cte") self.validate_identity("ALTER TABLE tbl SET (SYSTEM_VERSIONING=OFF)") self.validate_identity("ALTER TABLE tbl SET (FILESTREAM_ON = 'test')") self.validate_identity("ALTER TABLE tbl SET (DATA_DELETION=ON)") self.validate_identity("ALTER TABLE tbl SET (DATA_DELETION=OFF)") self.validate_identity( "ALTER TABLE t1 WITH CHECK ADD CONSTRAINT ctr FOREIGN KEY (c1) REFERENCES t2 (c2)" ) self.validate_identity( "ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, DATA_CONSISTENCY_CHECK=OFF, HISTORY_RETENTION_PERIOD=5 DAYS))" ) self.validate_identity( "ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, HISTORY_RETENTION_PERIOD=INFINITE))" ) self.validate_identity( "ALTER TABLE tbl SET (DATA_DELETION=ON(FILTER_COLUMN=col, RETENTION_PERIOD=5 MONTHS))" ) self.validate_identity("ALTER VIEW v AS SELECT a, b, c, d FROM foo") self.validate_identity("ALTER VIEW v AS SELECT * FROM foo WHERE c > 100") self.validate_identity( "ALTER VIEW v WITH SCHEMABINDING AS SELECT * FROM foo WHERE c > 100", check_command_warning=True, ) self.validate_identity( "ALTER VIEW v WITH ENCRYPTION AS SELECT * FROM foo WHERE c > 100", check_command_warning=True, ) self.validate_identity( "ALTER VIEW v WITH VIEW_METADATA AS SELECT * FROM foo WHERE c > 100", check_command_warning=True, ) self.validate_identity( "CREATE COLUMNSTORE INDEX index_name ON foo.bar", "CREATE NONCLUSTERED COLUMNSTORE INDEX index_name ON foo.bar", ) self.validate_identity( "CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < CURRENT_TIMESTAMP - 7; END", "CREATE PROCEDURE foo AS BEGIN DELETE FROM bla WHERE foo < GETDATE() - 7; END", ) self.validate_identity( "INSERT INTO Production.UpdatedInventory SELECT ProductID, LocationID, NewQty, PreviousQty FROM (MERGE INTO Production.ProductInventory AS pi USING (SELECT ProductID, SUM(OrderQty) FROM Sales.SalesOrderDetail AS sod INNER JOIN Sales.SalesOrderHeader AS soh ON sod.SalesOrderID = soh.SalesOrderID AND soh.OrderDate BETWEEN '20030701' AND '20030731' GROUP BY ProductID) AS src(ProductID, OrderQty) ON pi.ProductID = src.ProductID WHEN MATCHED AND pi.Quantity - src.OrderQty >= 0 THEN UPDATE SET pi.Quantity = pi.Quantity - src.OrderQty WHEN MATCHED AND pi.Quantity - src.OrderQty <= 0 THEN DELETE OUTPUT $action, Inserted.ProductID, Inserted.LocationID, Inserted.Quantity AS NewQty, Deleted.Quantity AS PreviousQty) AS Changes(Action, ProductID, LocationID, NewQty, PreviousQty) WHERE Action = 'UPDATE'", """INSERT INTO Production.UpdatedInventory SELECT ProductID, LocationID, NewQty, PreviousQty FROM ( MERGE INTO Production.ProductInventory AS pi USING ( SELECT ProductID, SUM(OrderQty) FROM Sales.SalesOrderDetail AS sod INNER JOIN Sales.SalesOrderHeader AS soh ON sod.SalesOrderID = soh.SalesOrderID AND soh.OrderDate BETWEEN '20030701' AND '20030731' GROUP BY ProductID ) AS src(ProductID, OrderQty) ON pi.ProductID = src.ProductID WHEN MATCHED AND pi.Quantity - src.OrderQty >= 0 THEN UPDATE SET pi.Quantity = pi.Quantity - src.OrderQty WHEN MATCHED AND pi.Quantity - src.OrderQty <= 0 THEN DELETE OUTPUT $action, Inserted.ProductID, Inserted.LocationID, Inserted.Quantity AS NewQty, Deleted.Quantity AS PreviousQty ) AS Changes(Action, ProductID, LocationID, NewQty, PreviousQty) WHERE Action = 'UPDATE'""", pretty=True, ) self.validate_all( "CREATE TABLE [#temptest] (name INTEGER)", read={ "duckdb": "CREATE TEMPORARY TABLE 'temptest' (name INTEGER)", "tsql": "CREATE TABLE [#temptest] (name INTEGER)", }, ) self.validate_all( "CREATE TABLE tbl (id INTEGER IDENTITY PRIMARY KEY)", read={ "mysql": "CREATE TABLE tbl (id INT AUTO_INCREMENT PRIMARY KEY)", "tsql": "CREATE TABLE tbl (id INTEGER IDENTITY PRIMARY KEY)", }, ) self.validate_all( "CREATE TABLE tbl (id INTEGER NOT NULL IDENTITY(10, 1) PRIMARY KEY)", read={ "postgres": "CREATE TABLE tbl (id INT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10) PRIMARY KEY)", "tsql": "CREATE TABLE tbl (id INTEGER NOT NULL IDENTITY(10, 1) PRIMARY KEY)", }, write={ "databricks": "CREATE TABLE tbl (id BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 1) PRIMARY KEY)", "postgres": "CREATE TABLE tbl (id INT NOT NULL GENERATED BY DEFAULT AS IDENTITY (START WITH 10 INCREMENT BY 1) PRIMARY KEY)", }, ) self.validate_all( "CREATE TABLE x (a UNIQUEIDENTIFIER, b VARBINARY)", write={ "duckdb": "CREATE TABLE x (a UUID, b BLOB)", "presto": "CREATE TABLE x (a UUID, b VARBINARY)", "spark": "CREATE TABLE x (a STRING, b BINARY)", "postgres": "CREATE TABLE x (a UUID, b BYTEA)", }, ) self.validate_all( "SELECT * INTO foo.bar.baz FROM (SELECT * FROM a.b.c) AS temp", read={ "": "CREATE TABLE foo.bar.baz AS SELECT * FROM a.b.c", "duckdb": "CREATE TABLE foo.bar.baz AS (SELECT * FROM a.b.c)", }, ) self.validate_all( "IF NOT EXISTS (SELECT * FROM sys.indexes WHERE object_id = object_id('db.tbl') AND name = 'idx') EXEC('CREATE INDEX idx ON db.tbl')", read={ "": "CREATE INDEX IF NOT EXISTS idx ON db.tbl", }, ) self.validate_all( "IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = 'foo') EXEC('CREATE SCHEMA foo')", read={ "": "CREATE SCHEMA IF NOT EXISTS foo", }, ) self.validate_all( "IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'baz' AND TABLE_SCHEMA = 'bar' AND TABLE_CATALOG = 'foo') EXEC('CREATE TABLE foo.bar.baz (a INTEGER)')", read={ "": "CREATE TABLE IF NOT EXISTS foo.bar.baz (a INTEGER)", }, ) self.validate_all( "IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'baz' AND TABLE_SCHEMA = 'bar' AND TABLE_CATALOG = 'foo') EXEC('SELECT * INTO foo.bar.baz FROM (SELECT ''2020'' AS z FROM a.b.c) AS temp')", read={ "": "CREATE TABLE IF NOT EXISTS foo.bar.baz AS SELECT '2020' AS z FROM a.b.c", }, ) self.validate_all( "IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'baz' AND TABLE_SCHEMA = 'bar' AND TABLE_CATALOG = 'foo') EXEC('WITH cte1 AS (SELECT 1 AS col_a), cte2 AS (SELECT 1 AS col_b) SELECT * INTO foo.bar.baz FROM (SELECT col_a FROM cte1 UNION ALL SELECT col_b FROM cte2) AS temp')", read={ "": "CREATE TABLE IF NOT EXISTS foo.bar.baz AS WITH cte1 AS (SELECT 1 AS col_a), cte2 AS (SELECT 1 AS col_b) SELECT col_a FROM cte1 UNION ALL SELECT col_b FROM cte2" }, ) self.validate_all( "CREATE OR ALTER VIEW a.b AS SELECT 1", read={ "": "CREATE OR REPLACE VIEW a.b AS SELECT 1", }, write={ "tsql": "CREATE OR ALTER VIEW a.b AS SELECT 1", }, ) self.validate_all( "ALTER TABLE a ADD b INTEGER, c INTEGER", read={ "": "ALTER TABLE a ADD COLUMN b INT, ADD COLUMN c INT", }, write={ "": "ALTER TABLE a ADD COLUMN b INT, ADD COLUMN c INT", "tsql": "ALTER TABLE a ADD b INTEGER, c INTEGER", }, ) self.validate_all( "ALTER TABLE a ALTER COLUMN b INTEGER", read={ "": "ALTER TABLE a ALTER COLUMN b INT", }, write={ "": "ALTER TABLE a ALTER COLUMN b SET DATA TYPE INT", "tsql": "ALTER TABLE a ALTER COLUMN b INTEGER", }, ) self.validate_all( "CREATE TABLE #mytemp (a INTEGER, b CHAR(2), c TIME(4), d FLOAT(24))", write={ "spark": "CREATE TEMPORARY TABLE mytemp (a INT, b CHAR(2), c TIMESTAMP, d FLOAT) USING PARQUET", "tsql": "CREATE TABLE #mytemp (a INTEGER, b CHAR(2), c TIME(4), d FLOAT(24))", }, ) constraint = self.validate_identity( "ALTER TABLE tbl ADD CONSTRAINT cnstr PRIMARY KEY CLUSTERED (ID), CONSTRAINT cnstr2 UNIQUE CLUSTERED (ID)" ).find(exp.AddConstraint) assert constraint assert len(list(constraint.find_all(exp.Constraint))) == 2 def test_transaction(self): self.validate_identity("BEGIN TRANSACTION") self.validate_all("BEGIN TRAN", write={"tsql": "BEGIN TRANSACTION"}) self.validate_identity("BEGIN TRANSACTION transaction_name") self.validate_identity("BEGIN TRANSACTION @tran_name_variable") self.validate_identity("BEGIN TRANSACTION transaction_name WITH MARK 'description'") def test_commit(self): self.validate_all("COMMIT", write={"tsql": "COMMIT TRANSACTION"}) self.validate_all("COMMIT TRAN", write={"tsql": "COMMIT TRANSACTION"}) self.validate_identity("COMMIT TRANSACTION") self.validate_identity("COMMIT TRANSACTION transaction_name") self.validate_identity("COMMIT TRANSACTION @tran_name_variable") self.validate_identity( "COMMIT TRANSACTION @tran_name_variable WITH (DELAYED_DURABILITY = ON)" ) self.validate_identity( "COMMIT TRANSACTION transaction_name WITH (DELAYED_DURABILITY = OFF)" ) def test_rollback(self): self.validate_all("ROLLBACK", write={"tsql": "ROLLBACK TRANSACTION"}) self.validate_all("ROLLBACK TRAN", write={"tsql": "ROLLBACK TRANSACTION"}) self.validate_identity("ROLLBACK TRANSACTION") self.validate_identity("ROLLBACK TRANSACTION transaction_name") self.validate_identity("ROLLBACK TRANSACTION @tran_name_variable") def test_udf(self): self.validate_identity( "DECLARE @DWH_DateCreated AS DATETIME2 = CONVERT(DATETIME2, GETDATE(), 104)", "DECLARE @DWH_DateCreated DATETIME2 = CONVERT(DATETIME2, GETDATE(), 104)", ) self.validate_identity( "CREATE PROCEDURE foo @a INTEGER, @b INTEGER AS SELECT @a = SUM(bla) FROM baz AS bar" ) self.validate_identity( "CREATE PROC foo @ID INTEGER, @AGE INTEGER AS SELECT DB_NAME(@ID) AS ThatDB" ) self.validate_identity("CREATE PROC foo AS SELECT BAR() AS baz") self.validate_identity("CREATE PROCEDURE foo AS SELECT BAR() AS baz") self.validate_identity("CREATE PROCEDURE foo WITH ENCRYPTION AS SELECT 1") self.validate_identity("CREATE PROCEDURE foo WITH RECOMPILE AS SELECT 1") self.validate_identity("CREATE PROCEDURE foo WITH SCHEMABINDING AS SELECT 1") self.validate_identity("CREATE PROCEDURE foo WITH NATIVE_COMPILATION AS SELECT 1") self.validate_identity("CREATE PROCEDURE foo WITH EXECUTE AS OWNER AS SELECT 1") self.validate_identity("CREATE PROCEDURE foo WITH EXECUTE AS 'username' AS SELECT 1") self.validate_identity( "CREATE PROCEDURE foo WITH EXECUTE AS OWNER, SCHEMABINDING, NATIVE_COMPILATION AS SELECT 1" ) self.validate_identity("CREATE FUNCTION foo(@bar INTEGER) RETURNS TABLE AS RETURN SELECT 1") self.validate_identity("CREATE FUNCTION dbo.ISOweek(@DATE DATETIME2) RETURNS INTEGER") # The following two cases don't necessarily correspond to valid TSQL, but they are used to verify # that the syntax RETURNS @return_variable TABLE ... is parsed correctly. # # See also "Transact-SQL Multi-Statement Table-Valued Function Syntax" # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql?view=sql-server-ver16 self.validate_identity( "CREATE FUNCTION foo(@bar INTEGER) RETURNS @foo TABLE (x INTEGER, y NUMERIC) AS RETURN SELECT 1" ) self.validate_identity( "CREATE FUNCTION foo() RETURNS @contacts TABLE (first_name VARCHAR(50), phone VARCHAR(25)) AS SELECT @fname, @phone" ) self.validate_all( """ CREATE FUNCTION udfProductInYear ( @model_year INT ) RETURNS TABLE AS RETURN SELECT product_name, model_year, list_price FROM production.products WHERE model_year = @model_year """, write={ "tsql": """CREATE FUNCTION udfProductInYear( @model_year INTEGER ) RETURNS TABLE AS RETURN SELECT product_name, model_year, list_price FROM production.products WHERE model_year = @model_year""", }, pretty=True, ) def test_procedure_keywords(self): self.validate_identity("BEGIN") self.validate_identity("END") self.validate_identity("SET XACT_ABORT ON") def test_charindex(self): self.validate_identity( "SELECT CAST(SUBSTRING('ABCD~1234', CHARINDEX('~', 'ABCD~1234') + 1, LEN('ABCD~1234')) AS BIGINT)" ) self.validate_all( "CHARINDEX(x, y, 9)", read={ "spark": "LOCATE(x, y, 9)", }, write={ "spark": "LOCATE(x, y, 9)", "tsql": "CHARINDEX(x, y, 9)", }, ) self.validate_all( "CHARINDEX(x, y)", read={ "spark": "LOCATE(x, y)", }, write={ "spark": "LOCATE(x, y)", "tsql": "CHARINDEX(x, y)", }, ) self.validate_all( "CHARINDEX('sub', 'testsubstring', 3)", read={ "spark": "LOCATE('sub', 'testsubstring', 3)", }, write={ "spark": "LOCATE('sub', 'testsubstring', 3)", "tsql": "CHARINDEX('sub', 'testsubstring', 3)", }, ) self.validate_all( "CHARINDEX('sub', 'testsubstring')", read={ "spark": "LOCATE('sub', 'testsubstring')", }, write={ "spark": "LOCATE('sub', 'testsubstring')", "tsql": "CHARINDEX('sub', 'testsubstring')", }, ) def test_len(self): self.validate_all( "LEN(x)", read={"": "LENGTH(x)"}, write={"spark": "LENGTH(CAST(x AS STRING))"} ) self.validate_all( "RIGHT(x, 1)", read={"": "RIGHT(CAST(x AS STRING), 1)"}, write={"spark": "RIGHT(CAST(x AS STRING), 1)"}, ) self.validate_all( "LEFT(x, 1)", read={"": "LEFT(CAST(x AS STRING), 1)"}, write={"spark": "LEFT(CAST(x AS STRING), 1)"}, ) self.validate_all("LEN(1)", write={"tsql": "LEN(1)", "spark": "LENGTH(CAST(1 AS STRING))"}) self.validate_all("LEN('x')", write={"tsql": "LEN('x')", "spark": "LENGTH('x')"}) def test_replicate(self): self.validate_all( "REPLICATE('x', 2)", write={ "spark": "REPEAT('x', 2)", "tsql": "REPLICATE('x', 2)", }, ) def test_isnull(self): self.validate_identity("ISNULL(x, y)") self.validate_all("ISNULL(x, y)", write={"spark": "COALESCE(x, y)"}) def test_json(self): self.validate_identity( """JSON_QUERY(REPLACE(REPLACE(x , '''', '"'), '""', '"'))""", """ISNULL(JSON_QUERY(REPLACE(REPLACE(x, '''', '"'), '""', '"'), '$'), JSON_VALUE(REPLACE(REPLACE(x, '''', '"'), '""', '"'), '$'))""", ) self.validate_all( "JSON_QUERY(r.JSON, '$.Attr_INT')", write={ "spark": "GET_JSON_OBJECT(r.JSON, '$.Attr_INT')", "tsql": "ISNULL(JSON_QUERY(r.JSON, '$.Attr_INT'), JSON_VALUE(r.JSON, '$.Attr_INT'))", }, ) self.validate_all( "JSON_VALUE(r.JSON, '$.Attr_INT')", write={ "spark": "GET_JSON_OBJECT(r.JSON, '$.Attr_INT')", "tsql": "ISNULL(JSON_QUERY(r.JSON, '$.Attr_INT'), JSON_VALUE(r.JSON, '$.Attr_INT'))", }, ) def test_datefromparts(self): self.validate_all( "SELECT DATEFROMPARTS('2020', 10, 01)", write={ "spark": "SELECT MAKE_DATE('2020', 10, 01)", "tsql": "SELECT DATEFROMPARTS('2020', 10, 01)", }, ) def test_datename(self): self.validate_all( "SELECT DATENAME(mm, '1970-01-01')", write={ "spark": "SELECT DATE_FORMAT(CAST('1970-01-01' AS TIMESTAMP), 'MMMM')", "tsql": "SELECT FORMAT(CAST('1970-01-01' AS DATETIME2), 'MMMM')", }, ) self.validate_all( "SELECT DATENAME(dw, '1970-01-01')", write={ "spark": "SELECT DATE_FORMAT(CAST('1970-01-01' AS TIMESTAMP), 'EEEE')", "tsql": "SELECT FORMAT(CAST('1970-01-01' AS DATETIME2), 'dddd')", }, ) def test_datepart(self): datepart_formats = ( (("QUARTER", "qq", "q"), "QUARTER"), (("YEAR", "yy", "yyyy"), "YEAR"), (("HOUR", "hh"), "HOUR"), (("MINUTE", "mi", "n"), "MINUTE"), (("SECOND", "ss", "s"), "SECOND"), (("MILLISECOND", "ms"), "MILLISECOND"), (("MICROSECOND", "mcs"), "MICROSECOND"), (("NANOSECOND", "ns"), "NANOSECOND"), (("WEEKDAY", "dw"), "WEEKDAY"), (("TZOFFSET", "tz"), "TZOFFSET"), (("MONTH", "mm", "m"), "MONTH"), (("DAYOFYEAR", "dy", "y"), "DAYOFYEAR"), (("DAY", "dd", "d"), "DAY"), ) for formats, canonical in datepart_formats: for fmt in formats: with self.subTest(f"Testing DATEPART where part is: {fmt}"): self.validate_identity( f"DATEPART({fmt}, x)", f"DATEPART({canonical}, x)", ) select_datepart_formats = ( (("WEEK", "WW", "WK"), "WEEK"), (("ISOWK", "ISOWW", "ISO_WEEK"), "ISO_WEEK"), ) for formats, canonical in select_datepart_formats: for fmt in formats: with self.subTest(f"Testing DATEPART where part is: {fmt}"): self.validate_identity( f"SELECT DATEPART({fmt}, '2024-11-21')", f"SELECT DATEPART({canonical}, '2024-11-21')", ) self.validate_all( "SELECT DATEPART(month,'1970-01-01')", write={ "spark": "SELECT EXTRACT(month FROM '1970-01-01')", "tsql": "SELECT DATEPART(month, '1970-01-01')", }, ) self.validate_all( "SELECT DATEPART(YEAR, CAST('2017-01-01' AS DATE))", read={ "postgres": "SELECT DATE_PART('YEAR', '2017-01-01'::DATE)", }, write={ "postgres": "SELECT EXTRACT(YEAR FROM CAST('2017-01-01' AS DATE))", "spark": "SELECT EXTRACT(YEAR FROM CAST('2017-01-01' AS DATE))", "tsql": "SELECT DATEPART(YEAR, CAST('2017-01-01' AS DATE))", }, ) self.validate_all( "SELECT DATEPART(month, CAST('2017-03-01' AS DATE))", read={ "postgres": "SELECT DATE_PART('month', '2017-03-01'::DATE)", }, write={ "postgres": "SELECT EXTRACT(month FROM CAST('2017-03-01' AS DATE))", "spark": "SELECT EXTRACT(month FROM CAST('2017-03-01' AS DATE))", "tsql": "SELECT DATEPART(month, CAST('2017-03-01' AS DATE))", }, ) self.validate_all( "SELECT DATEPART(day, CAST('2017-01-02' AS DATE))", read={ "postgres": "SELECT DATE_PART('day', '2017-01-02'::DATE)", }, write={ "postgres": "SELECT EXTRACT(day FROM CAST('2017-01-02' AS DATE))", "spark": "SELECT EXTRACT(day FROM CAST('2017-01-02' AS DATE))", "tsql": "SELECT DATEPART(day, CAST('2017-01-02' AS DATE))", }, ) self.validate_identity( 'SELECT DATEPART("dd", x)', "SELECT DATEPART(DAY, x)", ) def test_convert(self): self.validate_all( "CONVERT(NVARCHAR(200), x)", write={ "spark": "CAST(x AS VARCHAR(200))", "tsql": "CONVERT(NVARCHAR(200), x)", }, ) self.validate_all( "CONVERT(NVARCHAR, x)", write={ "spark": "CAST(x AS VARCHAR(30))", "tsql": "CONVERT(NVARCHAR, x)", }, ) self.validate_all( "CONVERT(NVARCHAR(MAX), x)", write={ "spark": "CAST(x AS STRING)", "tsql": "CONVERT(NVARCHAR(MAX), x)", }, ) self.validate_all( "CONVERT(VARCHAR(200), x)", write={ "spark": "CAST(x AS VARCHAR(200))", "tsql": "CONVERT(VARCHAR(200), x)", }, ) self.validate_all( "CONVERT(VARCHAR, x)", write={ "spark": "CAST(x AS VARCHAR(30))", "tsql": "CONVERT(VARCHAR, x)", }, ) self.validate_all( "CONVERT(VARCHAR(MAX), x)", write={ "spark": "CAST(x AS STRING)", "tsql": "CONVERT(VARCHAR(MAX), x)", }, ) self.validate_all( "CONVERT(CHAR(40), x)", write={ "spark": "CAST(x AS CHAR(40))", "tsql": "CONVERT(CHAR(40), x)", }, ) self.validate_all( "CONVERT(CHAR, x)", write={ "spark": "CAST(x AS CHAR(30))", "tsql": "CONVERT(CHAR, x)", }, ) self.validate_all( "CONVERT(NCHAR(40), x)", write={ "spark": "CAST(x AS CHAR(40))", "tsql": "CONVERT(NCHAR(40), x)", }, ) self.validate_all( "CONVERT(NCHAR, x)", write={ "spark": "CAST(x AS CHAR(30))", "tsql": "CONVERT(NCHAR, x)", }, ) self.validate_all( "CONVERT(VARCHAR, x, 121)", write={ "spark": "CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS VARCHAR(30))", "tsql": "CONVERT(VARCHAR, x, 121)", }, ) self.validate_all( "CONVERT(VARCHAR(40), x, 121)", write={ "spark": "CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS VARCHAR(40))", "tsql": "CONVERT(VARCHAR(40), x, 121)", }, ) self.validate_all( "CONVERT(VARCHAR(MAX), x, 121)", write={ "spark": "CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS STRING)", "tsql": "CONVERT(VARCHAR(MAX), x, 121)", }, ) self.validate_all( "CONVERT(NVARCHAR, x, 121)", write={ "spark": "CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS VARCHAR(30))", "tsql": "CONVERT(NVARCHAR, x, 121)", }, ) self.validate_all( "CONVERT(NVARCHAR(40), x, 121)", write={ "spark": "CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS VARCHAR(40))", "tsql": "CONVERT(NVARCHAR(40), x, 121)", }, ) self.validate_all( "CONVERT(NVARCHAR(MAX), x, 121)", write={ "spark": "CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS STRING)", "tsql": "CONVERT(NVARCHAR(MAX), x, 121)", }, ) self.validate_all( "CONVERT(DATE, x, 121)", write={ "spark": "TO_DATE(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS')", "tsql": "CONVERT(DATE, x, 121)", }, ) self.validate_all( "CONVERT(DATETIME, x, 121)", write={ "spark": "TO_TIMESTAMP(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS')", "tsql": "CONVERT(DATETIME, x, 121)", }, ) self.validate_all( "CONVERT(DATETIME2, x, 121)", write={ "spark": "TO_TIMESTAMP(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS')", "tsql": "CONVERT(DATETIME2, x, 121)", }, ) self.validate_all( "CONVERT(INT, x)", write={ "spark": "CAST(x AS INT)", "tsql": "CONVERT(INTEGER, x)", }, ) self.validate_all( "CONVERT(INT, x, 121)", write={ "spark": "CAST(x AS INT)", "tsql": "CONVERT(INTEGER, x, 121)", }, ) self.validate_all( "TRY_CONVERT(NVARCHAR, x, 121)", write={ "spark": "TRY_CAST(DATE_FORMAT(x, 'yyyy-MM-dd HH:mm:ss.SSSSSS') AS VARCHAR(30))", "tsql": "TRY_CONVERT(NVARCHAR, x, 121)", }, ) self.validate_all( "TRY_CONVERT(INT, x)", write={ "spark": "TRY_CAST(x AS INT)", "tsql": "TRY_CONVERT(INTEGER, x)", }, ) self.validate_all( "TRY_CAST(x AS INT)", write={ "spark": "TRY_CAST(x AS INT)", "tsql": "TRY_CAST(x AS INTEGER)", }, ) self.validate_all( "SELECT CONVERT(VARCHAR(10), testdb.dbo.test.x, 120) y FROM testdb.dbo.test", write={ "mysql": "SELECT CAST(DATE_FORMAT(testdb.dbo.test.x, '%Y-%m-%d %T') AS CHAR(10)) AS y FROM testdb.dbo.test", "spark": "SELECT CAST(DATE_FORMAT(testdb.dbo.test.x, 'yyyy-MM-dd HH:mm:ss') AS VARCHAR(10)) AS y FROM testdb.dbo.test", "tsql": "SELECT CONVERT(VARCHAR(10), testdb.dbo.test.x, 120) AS y FROM testdb.dbo.test", }, ) self.validate_all( "SELECT CONVERT(VARCHAR(10), y.x) z FROM testdb.dbo.test y", write={ "mysql": "SELECT CAST(y.x AS CHAR(10)) AS z FROM testdb.dbo.test AS y", "spark": "SELECT CAST(y.x AS VARCHAR(10)) AS z FROM testdb.dbo.test AS y", "tsql": "SELECT CONVERT(VARCHAR(10), y.x) AS z FROM testdb.dbo.test AS y", }, ) self.validate_all( "SELECT CAST((SELECT x FROM y) AS VARCHAR) AS test", write={ "spark": "SELECT CAST((SELECT x FROM y) AS STRING) AS test", "tsql": "SELECT CAST((SELECT x FROM y) AS VARCHAR) AS test", }, ) def test_add_date(self): self.validate_identity("SELECT DATEADD(YEAR, 1, '2017/08/25')") self.validate_all( "DATEADD(year, 50, '2006-07-31')", write={"bigquery": "DATE_ADD('2006-07-31', INTERVAL 50 YEAR)"}, ) self.validate_all( "SELECT DATEADD(year, 1, '2017/08/25')", write={"spark": "SELECT ADD_MONTHS('2017/08/25', 12)"}, ) self.validate_all( "SELECT DATEADD(qq, 1, '2017/08/25')", write={"spark": "SELECT ADD_MONTHS('2017/08/25', 3)"}, ) self.validate_all( "SELECT DATEADD(wk, 1, '2017/08/25')", write={ "spark": "SELECT DATE_ADD('2017/08/25', 7)", "databricks": "SELECT DATEADD(WEEK, 1, '2017/08/25')", }, ) def test_date_diff(self): self.validate_identity("SELECT DATEDIFF(HOUR, 1.5, '2021-01-01')") self.validate_identity("SELECT DATEDIFF_BIG(HOUR, 1.5, '2021-01-01')") for fnc in ["DATEDIFF", "DATEDIFF_BIG"]: with self.subTest(f"Transpiling T-SQL's {fnc}"): self.validate_all( f"SELECT {fnc}(quarter, 0, '2021-01-01')", write={ "tsql": f"SELECT {fnc}(QUARTER, CAST('1900-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", "spark": "SELECT DATEDIFF(QUARTER, CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", "duckdb": "SELECT DATE_DIFF('QUARTER', CAST('1900-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", }, ) self.validate_all( f"SELECT {fnc}(day, 1, '2021-01-01')", write={ "tsql": f"SELECT {fnc}(DAY, CAST('1900-01-02' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", "spark": "SELECT DATEDIFF(DAY, CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", "duckdb": "SELECT DATE_DIFF('DAY', CAST('1900-01-02' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", }, ) self.validate_all( f"SELECT {fnc}(year, '2020-01-01', '2021-01-01')", write={ "tsql": f"SELECT {fnc}(YEAR, CAST('2020-01-01' AS DATETIME2), CAST('2021-01-01' AS DATETIME2))", "spark": "SELECT DATEDIFF(YEAR, CAST('2020-01-01' AS TIMESTAMP), CAST('2021-01-01' AS TIMESTAMP))", "spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('2021-01-01' AS TIMESTAMP), CAST('2020-01-01' AS TIMESTAMP)) / 12 AS INT)", }, ) self.validate_all( f"SELECT {fnc}(mm, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(MONTH, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", "spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP)) AS INT)", "tsql": f"SELECT {fnc}(MONTH, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))", }, ) self.validate_all( f"SELECT {fnc}(quarter, 'start', 'end')", write={ "databricks": "SELECT DATEDIFF(QUARTER, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", "spark": "SELECT DATEDIFF(QUARTER, CAST('start' AS TIMESTAMP), CAST('end' AS TIMESTAMP))", "spark2": "SELECT CAST(MONTHS_BETWEEN(CAST('end' AS TIMESTAMP), CAST('start' AS TIMESTAMP)) / 3 AS INT)", "tsql": f"SELECT {fnc}(QUARTER, CAST('start' AS DATETIME2), CAST('end' AS DATETIME2))", }, ) # Check superfluous casts arent added. ref: https://github.com/TobikoData/sqlmesh/issues/2672 self.validate_all( f"SELECT {fnc}(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo", write={ "tsql": f"SELECT {fnc}(DAY, CAST(a AS DATETIME2), CAST(b AS DATETIME2)) AS x FROM foo", "clickhouse": "SELECT DATE_DIFF(DAY, CAST(CAST(a AS Nullable(DateTime)) AS DateTime64(6)), CAST(CAST(b AS Nullable(DateTime)) AS DateTime64(6))) AS x FROM foo", }, ) self.validate_identity( f"SELECT DATEADD(DAY, {fnc}(DAY, -3, GETDATE()), '08:00:00')", f"SELECT DATEADD(DAY, {fnc}(DAY, CAST('1899-12-29' AS DATETIME2), CAST(GETDATE() AS DATETIME2)), '08:00:00')", ) def test_lateral_subquery(self): self.validate_all( "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) t(v, y)", write={ "spark": "SELECT x.a, x.b, t.v, t.y FROM x INNER JOIN LATERAL (SELECT v, y FROM t) AS t(v, y)", "postgres": "SELECT x.a, x.b, t.v, t.y FROM x INNER JOIN LATERAL (SELECT v, y FROM t) AS t(v, y) ON TRUE", "tsql": "SELECT x.a, x.b, t.v, t.y FROM x CROSS APPLY (SELECT v, y FROM t) AS t(v, y)", }, ) self.validate_all( "SELECT x.a, x.b, t.v, t.y FROM x OUTER APPLY (SELECT v, y FROM t) t(v, y)", write={ "spark": "SELECT x.a, x.b, t.v, t.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y)", "postgres": "SELECT x.a, x.b, t.v, t.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y) ON TRUE", "tsql": "SELECT x.a, x.b, t.v, t.y FROM x OUTER APPLY (SELECT v, y FROM t) AS t(v, y)", }, ) self.validate_all( "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x OUTER APPLY (SELECT v, y FROM t) t(v, y) OUTER APPLY (SELECT v, y FROM t) s(v, y) LEFT JOIN z ON z.id = s.id", write={ "spark": "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y) LEFT JOIN LATERAL (SELECT v, y FROM t) AS s(v, y) LEFT JOIN z ON z.id = s.id", "postgres": "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x LEFT JOIN LATERAL (SELECT v, y FROM t) AS t(v, y) ON TRUE LEFT JOIN LATERAL (SELECT v, y FROM t) AS s(v, y) ON TRUE LEFT JOIN z ON z.id = s.id", "tsql": "SELECT x.a, x.b, t.v, t.y, s.v, s.y FROM x OUTER APPLY (SELECT v, y FROM t) AS t(v, y) OUTER APPLY (SELECT v, y FROM t) AS s(v, y) LEFT JOIN z ON z.id = s.id", }, ) def test_lateral_table_valued_function(self): self.validate_all( "SELECT t.x, y.z FROM x CROSS APPLY tvfTest(t.x) y(z)", write={ "spark": "SELECT t.x, y.z FROM x INNER JOIN LATERAL TVFTEST(t.x) AS y(z)", "postgres": "SELECT t.x, y.z FROM x INNER JOIN LATERAL TVFTEST(t.x) AS y(z) ON TRUE", "tsql": "SELECT t.x, y.z FROM x CROSS APPLY TVFTEST(t.x) AS y(z)", }, ) self.validate_all( "SELECT t.x, y.z FROM x OUTER APPLY tvfTest(t.x)y(z)", write={ "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL TVFTEST(t.x) AS y(z)", "postgres": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL TVFTEST(t.x) AS y(z) ON TRUE", "tsql": "SELECT t.x, y.z FROM x OUTER APPLY TVFTEST(t.x) AS y(z)", }, ) self.validate_all( "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x)y(z)", write={ "spark": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.tvfTest(t.x) AS y(z)", "postgres": "SELECT t.x, y.z FROM x LEFT JOIN LATERAL a.b.tvfTest(t.x) AS y(z) ON TRUE", "tsql": "SELECT t.x, y.z FROM x OUTER APPLY a.b.tvfTest(t.x) AS y(z)", }, ) def test_top(self): self.validate_all( "SELECT DISTINCT TOP 3 * FROM A", read={ "spark": "SELECT DISTINCT * FROM A LIMIT 3", }, write={ "spark": "SELECT DISTINCT * FROM A LIMIT 3", "teradata": "SELECT DISTINCT TOP 3 * FROM A", "tsql": "SELECT DISTINCT TOP 3 * FROM A", }, ) self.validate_all( "SELECT TOP (3) * FROM A", write={ "spark": "SELECT * FROM A LIMIT 3", }, ) self.validate_identity( "CREATE TABLE schema.table AS SELECT a, id FROM (SELECT a, (SELECT id FROM tb ORDER BY t DESC LIMIT 1) as id FROM tbl) AS _subquery", "SELECT * INTO schema.table FROM (SELECT a AS a, id AS id FROM (SELECT a AS a, (SELECT TOP 1 id FROM tb ORDER BY t DESC) AS id FROM tbl) AS _subquery) AS temp", ) self.validate_identity("SELECT TOP 10 PERCENT") self.validate_identity("SELECT TOP 10 PERCENT WITH TIES") def test_format(self): self.validate_identity("SELECT FORMAT(foo, 'dddd', 'de-CH')") self.validate_identity("SELECT FORMAT(EndOfDayRate, 'N', 'en-us')") self.validate_identity("SELECT FORMAT('01-01-1991', 'd.mm.yyyy')") self.validate_identity("SELECT FORMAT(12345, '###.###.###')") self.validate_identity("SELECT FORMAT(1234567, 'f')") self.validate_all( "SELECT FORMAT(1000000.01,'###,###.###')", write={ "spark": "SELECT FORMAT_NUMBER(1000000.01, '###,###.###')", "tsql": "SELECT FORMAT(1000000.01, '###,###.###')", }, ) self.validate_all( "SELECT FORMAT(1234567, 'f')", write={ "spark": "SELECT FORMAT_NUMBER(1234567, 'f')", "tsql": "SELECT FORMAT(1234567, 'f')", }, ) self.validate_all( "SELECT FORMAT('01-01-1991', 'dd.mm.yyyy')", write={ "spark": "SELECT DATE_FORMAT('01-01-1991', 'dd.mm.yyyy')", "tsql": "SELECT FORMAT('01-01-1991', 'dd.mm.yyyy')", }, ) self.validate_all( "SELECT FORMAT(date_col, 'dd.mm.yyyy')", write={ "spark": "SELECT DATE_FORMAT(date_col, 'dd.mm.yyyy')", "tsql": "SELECT FORMAT(date_col, 'dd.mm.yyyy')", }, ) self.validate_all( "SELECT FORMAT(date_col, 'm')", write={ "spark": "SELECT DATE_FORMAT(date_col, 'MMMM d')", "tsql": "SELECT FORMAT(date_col, 'MMMM d')", }, ) self.validate_all( "SELECT FORMAT(num_col, 'c')", write={ "spark": "SELECT FORMAT_NUMBER(num_col, 'c')", "tsql": "SELECT FORMAT(num_col, 'c')", }, ) def test_string(self): self.validate_all( "SELECT N'test'", write={"spark": "SELECT 'test'"}, ) self.validate_all( "SELECT n'test'", write={"spark": "SELECT 'test'"}, ) self.validate_all( "SELECT '''test'''", write={"spark": r"SELECT '\'test\''"}, ) def test_eomonth(self): self.validate_all( "EOMONTH(GETDATE())", read={ "spark": "LAST_DAY(CURRENT_TIMESTAMP())", }, write={ "bigquery": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS DATE))", "clickhouse": "LAST_DAY(CAST(CURRENT_TIMESTAMP() AS Nullable(DATE)))", "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE))", "mysql": "LAST_DAY(DATE(CURRENT_TIMESTAMP()))", "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE)) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", "presto": "LAST_DAY_OF_MONTH(CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE))", "redshift": "LAST_DAY(CAST(GETDATE() AS DATE))", "snowflake": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))", "spark": "LAST_DAY(TO_DATE(CURRENT_TIMESTAMP()))", "tsql": "EOMONTH(CAST(GETDATE() AS DATE))", }, ) self.validate_all( "EOMONTH(GETDATE(), -1)", write={ "bigquery": "LAST_DAY(DATE_ADD(CAST(CURRENT_TIMESTAMP() AS DATE), INTERVAL -1 MONTH))", "clickhouse": "LAST_DAY(DATE_ADD(MONTH, -1, CAST(CURRENT_TIMESTAMP() AS Nullable(DATE))))", "duckdb": "LAST_DAY(CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL (-1) MONTH)", "mysql": "LAST_DAY(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MONTH))", "postgres": "CAST(DATE_TRUNC('MONTH', CAST(CURRENT_TIMESTAMP AS DATE) + INTERVAL '-1 MONTH') + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)", "presto": "LAST_DAY_OF_MONTH(DATE_ADD('MONTH', -1, CAST(CAST(CURRENT_TIMESTAMP AS TIMESTAMP) AS DATE)))", "redshift": "LAST_DAY(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))", "snowflake": "LAST_DAY(DATEADD(MONTH, -1, TO_DATE(CURRENT_TIMESTAMP())))", "spark": "LAST_DAY(ADD_MONTHS(TO_DATE(CURRENT_TIMESTAMP()), -1))", "tsql": "EOMONTH(DATEADD(MONTH, -1, CAST(GETDATE() AS DATE)))", }, ) def test_identifier_prefixes(self): self.assertTrue( self.validate_identity("#x") .assert_is(exp.Column) .this.assert_is(exp.Identifier) .args.get("temporary") ) self.assertTrue( self.validate_identity("##x") .assert_is(exp.Column) .this.assert_is(exp.Identifier) .args.get("global_") ) self.validate_identity("@x").assert_is(exp.Parameter).this.assert_is(exp.Var) self.validate_identity("SELECT * FROM @x").args["from_"].this.assert_is( exp.Table ).this.assert_is(exp.Parameter).this.assert_is(exp.Var) self.validate_all( "SELECT @x", write={ "databricks": "SELECT ${x}", "hive": "SELECT ${x}", "spark": "SELECT ${x}", "tsql": "SELECT @x", }, ) self.validate_all( "SELECT * FROM #mytemptable", write={ "duckdb": "SELECT * FROM mytemptable", "spark": "SELECT * FROM mytemptable", "tsql": "SELECT * FROM #mytemptable", }, ) self.validate_all( "SELECT * FROM ##mytemptable", write={ "duckdb": "SELECT * FROM mytemptable", "spark": "SELECT * FROM mytemptable", "tsql": "SELECT * FROM ##mytemptable", }, ) def test_temporal_table(self): self.validate_identity( """CREATE TABLE test ("data" CHAR(7), "valid_from" DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, "valid_to" DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ("valid_from", "valid_to")) WITH(SYSTEM_VERSIONING=ON)""", "CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON)", ) self.validate_identity( """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START HIDDEN NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END HIDDEN NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=ON))""" ) self.validate_identity( """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=ON))""" ) self.validate_identity( """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history], DATA_CONSISTENCY_CHECK=OFF))""" ) self.validate_identity( """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history]))""" ) self.validate_identity( """CREATE TABLE test ([data] CHAR(7), [valid_from] DATETIME2(2) GENERATED ALWAYS AS ROW START NOT NULL, [valid_to] DATETIME2(2) GENERATED ALWAYS AS ROW END NOT NULL, PERIOD FOR SYSTEM_TIME ([valid_from], [valid_to])) WITH(SYSTEM_VERSIONING=ON(HISTORY_TABLE=[dbo].[benchmark_history]))""" ) def test_system_time(self): self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo'") self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME AS OF 'foo' AS alias") self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME FROM c TO d") self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME BETWEEN c AND d") self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME CONTAINED IN (c, d)") self.validate_identity("SELECT [x] FROM [a].[b] FOR SYSTEM_TIME ALL AS alias") def test_current_user(self): self.validate_all( "SUSER_NAME()", write={"spark": "CURRENT_USER()"}, ) self.validate_all( "SUSER_SNAME()", write={"spark": "CURRENT_USER()"}, ) self.validate_all( "SYSTEM_USER()", write={"spark": "CURRENT_USER()"}, ) self.validate_all( "SYSTEM_USER", write={"spark": "CURRENT_USER()"}, ) def test_hints(self): self.validate_all( "SELECT x FROM a INNER HASH JOIN b ON b.id = a.id", write={"spark": "SELECT x FROM a INNER JOIN b ON b.id = a.id"}, ) self.validate_all( "SELECT x FROM a INNER LOOP JOIN b ON b.id = a.id", write={"spark": "SELECT x FROM a INNER JOIN b ON b.id = a.id"}, ) self.validate_all( "SELECT x FROM a INNER REMOTE JOIN b ON b.id = a.id", write={"spark": "SELECT x FROM a INNER JOIN b ON b.id = a.id"}, ) self.validate_all( "SELECT x FROM a INNER MERGE JOIN b ON b.id = a.id", write={"spark": "SELECT x FROM a INNER JOIN b ON b.id = a.id"}, ) self.validate_all( "SELECT x FROM a WITH (NOLOCK)", write={ "spark": "SELECT x FROM a", "tsql": "SELECT x FROM a WITH (NOLOCK)", "": "SELECT x FROM a WITH (NOLOCK)", }, ) self.validate_identity("SELECT x FROM a INNER LOOP JOIN b ON b.id = a.id") def test_openjson(self): self.validate_identity("SELECT * FROM OPENJSON(@json)") self.validate_all( """SELECT [key], value FROM OPENJSON(@json,'$.path.to."sub-object"')""", write={ "tsql": """SELECT [key], value FROM OPENJSON(@json, '$.path.to."sub-object"')""", }, ) self.validate_all( "SELECT * FROM OPENJSON(@array) WITH (month VARCHAR(3), temp int, month_id tinyint '$.sql:identity()') as months", write={ "tsql": "SELECT * FROM OPENJSON(@array) WITH (month VARCHAR(3), temp INTEGER, month_id TINYINT '$.sql:identity()') AS months", }, ) self.validate_all( """ SELECT * FROM OPENJSON ( @json ) WITH ( Number VARCHAR(200) '$.Order.Number', Date DATETIME '$.Order.Date', Customer VARCHAR(200) '$.AccountNumber', Quantity INT '$.Item.Quantity', [Order] NVARCHAR(MAX) AS JSON ) """, write={ "tsql": """SELECT * FROM OPENJSON(@json) WITH ( Number VARCHAR(200) '$.Order.Number', Date DATETIME '$.Order.Date', Customer VARCHAR(200) '$.AccountNumber', Quantity INTEGER '$.Item.Quantity', [Order] NVARCHAR(MAX) AS JSON )""" }, pretty=True, ) def test_set(self): self.validate_all( "SET KEY VALUE", write={ "tsql": "SET KEY VALUE", "duckdb": "SET KEY = VALUE", "spark": "SET KEY = VALUE", }, ) self.validate_all( "SET @count = (SELECT COUNT(1) FROM x)", write={ "databricks": "SET count = (SELECT COUNT(1) FROM x)", "tsql": "SET @count = (SELECT COUNT(1) FROM x)", "spark": "SET count = (SELECT COUNT(1) FROM x)", }, ) def test_qualify_derived_table_outputs(self): self.validate_identity( "WITH t AS (SELECT 1) SELECT * FROM t", "WITH t AS (SELECT 1 AS [1]) SELECT * FROM t", ) self.validate_identity( 'WITH t AS (SELECT "c") SELECT * FROM t', "WITH t AS (SELECT [c] AS [c]) SELECT * FROM t", ) self.validate_identity( "SELECT * FROM (SELECT 1) AS subq", "SELECT * FROM (SELECT 1 AS [1]) AS subq", ) self.validate_identity( 'SELECT * FROM (SELECT "c") AS subq', "SELECT * FROM (SELECT [c] AS [c]) AS subq", ) self.validate_all( "WITH t1(c) AS (SELECT 1), t2 AS (SELECT CAST(c AS INTEGER) AS c FROM t1) SELECT * FROM t2", read={ "duckdb": "WITH t1(c) AS (SELECT 1), t2 AS (SELECT CAST(c AS INTEGER) FROM t1) SELECT * FROM t2", }, ) def test_declare(self): # supported cases self.validate_identity("DECLARE @X INT", "DECLARE @X INTEGER") self.validate_identity("DECLARE @X INT = 1", "DECLARE @X INTEGER = 1") self.validate_identity( "DECLARE @X INT, @Y VARCHAR(10)", "DECLARE @X INTEGER, @Y VARCHAR(10)" ) self.validate_identity( "declare @X int = (select col from table where id = 1)", "DECLARE @X INTEGER = (SELECT col FROM table WHERE id = 1)", ) self.validate_identity( "declare @X TABLE (Id INT NOT NULL, Name VARCHAR(100) NOT NULL)", "DECLARE @X TABLE (Id INTEGER NOT NULL, Name VARCHAR(100) NOT NULL)", ) self.validate_identity( "declare @X TABLE (Id INT NOT NULL, constraint PK_Id primary key (Id))", "DECLARE @X TABLE (Id INTEGER NOT NULL, CONSTRAINT PK_Id PRIMARY KEY (Id))", ) self.validate_identity( "declare @X UserDefinedTableType", "DECLARE @X UserDefinedTableType", ) self.validate_identity( "DECLARE @MyTableVar TABLE (EmpID INT NOT NULL, PRIMARY KEY CLUSTERED (EmpID), UNIQUE NONCLUSTERED (EmpID), INDEX CustomNonClusteredIndex NONCLUSTERED (EmpID))", check_command_warning=True, ) self.validate_identity( "DECLARE vendor_cursor CURSOR FOR SELECT VendorID, Name FROM Purchasing.Vendor WHERE PreferredVendorStatus = 1 ORDER BY VendorID", check_command_warning=True, ) def test_scope_resolution_op(self): # we still want to support :: casting shorthand for tsql self.validate_identity("x::int", "CAST(x AS INTEGER)") self.validate_identity("x::varchar", "CAST(x AS VARCHAR)") self.validate_identity("x::varchar(MAX)", "CAST(x AS VARCHAR(MAX))") for lhs, rhs in ( ("", "FOO(a, b)"), ("bar", "baZ(1, 2)"), ("LOGIN", "EricKurjan"), ("GEOGRAPHY", "Point(latitude, longitude, 4326)"), ( "GEOGRAPHY", "STGeomFromText('POLYGON((-122.358 47.653 , -122.348 47.649, -122.348 47.658, -122.358 47.658, -122.358 47.653))', 4326)", ), ): with self.subTest(f"Scope resolution, LHS: {lhs}, RHS: {rhs}"): expr = self.validate_identity(f"{lhs}::{rhs}") base_sql = expr.sql() self.assertEqual(base_sql, f"SCOPE_RESOLUTION({lhs + ', ' if lhs else ''}{rhs})") self.assertEqual(parse_one(base_sql).sql("tsql"), f"{lhs}::{rhs}") def test_count(self): count = annotate_types(self.validate_identity("SELECT COUNT(1) FROM x")) self.assertEqual(count.expressions[0].type.this, exp.DataType.Type.INT) count_big = annotate_types(self.validate_identity("SELECT COUNT_BIG(1) FROM x")) self.assertEqual(count_big.expressions[0].type.this, exp.DataType.Type.BIGINT) self.validate_all( "SELECT COUNT_BIG(1) FROM x", read={ "duckdb": "SELECT COUNT(1) FROM x", "spark": "SELECT COUNT(1) FROM x", }, write={ "duckdb": "SELECT COUNT(1) FROM x", "spark": "SELECT COUNT(1) FROM x", "tsql": "SELECT COUNT_BIG(1) FROM x", }, ) self.validate_all( "SELECT COUNT(1) FROM x", write={ "duckdb": "SELECT COUNT(1) FROM x", "spark": "SELECT COUNT(1) FROM x", "tsql": "SELECT COUNT(1) FROM x", }, ) def test_grant(self): self.validate_identity("GRANT EXECUTE ON TestProc TO User2") self.validate_identity("GRANT EXECUTE ON TestProc TO TesterRole WITH GRANT OPTION") self.validate_identity( "GRANT EXECUTE ON TestProc TO User2 AS TesterRole", check_command_warning=True ) def test_revoke(self): self.validate_identity("REVOKE EXECUTE ON TestProc FROM User2") self.validate_identity("REVOKE EXECUTE ON TestProc FROM TesterRole") def test_parsename(self): for i in range(4): with self.subTest("Testing PARSENAME <-> SPLIT_PART"): self.validate_all( f"SELECT PARSENAME('1.2.3', {i})", read={ "spark": f"SELECT SPLIT_PART('1.2.3', '.', {4 - i})", "databricks": f"SELECT SPLIT_PART('1.2.3', '.', {4 - i})", }, write={ "spark": f"SELECT SPLIT_PART('1.2.3', '.', {4 - i})", "databricks": f"SELECT SPLIT_PART('1.2.3', '.', {4 - i})", "tsql": f"SELECT PARSENAME('1.2.3', {i})", }, ) # Test non-dot delimiter self.validate_all( "SELECT SPLIT_PART('1,2,3', ',', 1)", write={ "spark": "SELECT SPLIT_PART('1,2,3', ',', 1)", "databricks": "SELECT SPLIT_PART('1,2,3', ',', 1)", "tsql": UnsupportedError, }, ) # Test column-type parameters self.validate_all( "WITH t AS (SELECT 'a.b.c' AS value, 1 AS idx) SELECT SPLIT_PART(value, '.', idx) FROM t", write={ "spark": "WITH t AS (SELECT 'a.b.c' AS value, 1 AS idx) SELECT SPLIT_PART(value, '.', idx) FROM t", "databricks": "WITH t AS (SELECT 'a.b.c' AS value, 1 AS idx) SELECT SPLIT_PART(value, '.', idx) FROM t", "tsql": UnsupportedError, }, ) def test_next_value_for(self): self.validate_identity( "SELECT NEXT VALUE FOR db.schema.sequence_name OVER (ORDER BY foo), col" ) self.validate_all( "SELECT NEXT VALUE FOR db.schema.sequence_name", read={ "oracle": "SELECT NEXT VALUE FOR db.schema.sequence_name", "tsql": "SELECT NEXT VALUE FOR db.schema.sequence_name", }, write={ "oracle": "SELECT NEXT VALUE FOR db.schema.sequence_name", }, ) # string literals in the DATETRUNC are casted as DATETIME2 def test_datetrunc(self): self.validate_all( "SELECT DATETRUNC(month, 'foo')", write={ "duckdb": "SELECT DATE_TRUNC('MONTH', CAST('foo' AS TIMESTAMP))", "tsql": "SELECT DATETRUNC(MONTH, CAST('foo' AS DATETIME2))", }, ) self.validate_all( "SELECT DATETRUNC(month, foo)", write={ "duckdb": "SELECT DATE_TRUNC('MONTH', foo)", "tsql": "SELECT DATETRUNC(MONTH, foo)", }, ) self.validate_all( "SELECT DATETRUNC(year, CAST('foo1' AS date))", write={ "duckdb": "SELECT DATE_TRUNC('YEAR', CAST('foo1' AS DATE))", "tsql": "SELECT DATETRUNC(YEAR, CAST('foo1' AS DATE))", }, ) def test_numeric_trunc(self): # T-SQL doesn't have native TRUNC - uses ROUND with third parameter = 1 # Cross-dialect transpilation: other dialects' TRUNC -> T-SQL ROUND(x, n, 1) self.validate_all( "ROUND(3.14159, 2, 1)", read={ "oracle": "TRUNC(3.14159, 2)", "postgres": "TRUNC(3.14159, 2)", "mysql": "TRUNCATE(3.14159, 2)", }, write={ "tsql": "ROUND(3.14159, 2, 1)", }, ) def test_collation_parse(self): self.validate_identity("ALTER TABLE a ALTER COLUMN b CHAR(10) COLLATE abc").assert_is( exp.Alter ).args.get("actions")[0].args.get("collate").this.assert_is(exp.Var) def test_odbc_date_literals(self): for value, cls in [ ("{d'2024-01-01'}", exp.Date), ("{t'12:00:00'}", exp.Time), ("{ts'2024-01-01 12:00:00'}", exp.Timestamp), ]: with self.subTest(f"Testing ODBC date literal: {value}"): sql = f"INSERT INTO tab(ds) VALUES ({value})" expr = self.parse_one(sql) self.assertIsInstance(expr, exp.Insert) self.assertIsInstance(expr.expression.expressions[0].expressions[0], cls) def test_create_trigger(self): self.validate_identity( "CREATE TRIGGER reminder ON customers AFTER INSERT AS BEGIN INSERT INTO audit_log (customer_id, action, created_at) SELECT id, 'INSERT', GETDATE() FROM inserted END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER updview ON vw_employees INSTEAD OF UPDATE AS BEGIN UPDATE employees SET salary = inserted.salary FROM inserted WHERE employees.id = inserted.id END", check_command_warning=True, ) self.validate_identity( "CREATE TRIGGER ddl_trig ON DATABASE FOR CREATE_TABLE AS BEGIN INSERT INTO schema_changes (event_type, event_time, login_name) VALUES ('CREATE_TABLE', GETDATE(), SYSTEM_USER) END", check_command_warning=True, ) def test_procedures(self): self.validate_identity("SELECT 1; SELECT 2").assert_is(exp.Block) sqls = [ "EXECUTE test @in1 = 100, @in2", "EXECUTE sp_executesql @payload, @param_str, @param1 = value1, @param2 = value2", "EXECUTE sp_executesql @stmt = @payload, @params = param_str, @param1 = value1, @param2 = value2", """ CREATE PROCEDURE test1 AS BEGIN SELECT 1; SELECT 2; SELECT 3; END """, """ CREATE PROCEDURE test2(@in1 INTEGER, @c CHAR(1)) AS BEGIN IF @in1 > 1 AND @c = 'c' BEGIN SELECT col1 FROM t WHERE t.col2 = @in1; END; END """, """ CREATE PROCEDURE test(@in1 INTEGER) AS BEGIN SELECT 1; IF @in1 > 1 BEGIN SELECT 1; SELECT 2; END; ELSE BEGIN SELECT 3; SELECT 4; END; END """, """ CREATE PROCEDURE test(@in1 INTEGER) AS BEGIN IF @in1 > 1 BEGIN SELECT col1 FROM t WHERE t.col2 = @in1; SELECT 100; END; IF @in1 > 1 BEGIN SELECT col2 FROM t1; END; END """, """ CREATE PROCEDURE test(@in1 INTEGER) AS BEGIN DECLARE @q1 INTEGER, @q2 INTEGER, @q3 INTEGER; SET @q1 = (SELECT MAX(col1) FROM t1); SET @q2 = (SELECT MIN(col1) FROM t2); IF @in1 > 1 BEGIN SELECT 3; SET @q3 = (SELECT MAX(col2) FROM t1); IF @q3 < 5 BEGIN SELECT 1; SELECT 2; END; END; IF @in1 > 1 BEGIN SELECT 1; END; END """, """ CREATE PROCEDURE test(@in1 INTEGER) AS BEGIN SELECT 1; IF @in1 > 1 BEGIN SELECT 3; END; ELSE BEGIN SELECT 4; SELECT 5; IF @in1 < 0 BEGIN SELECT 1; END; END; END """, """ CREATE PROCEDURE test(@in1 INTEGER, @c CHAR(1)) AS BEGIN WHILE @in1 > 100 BEGIN SELECT col1 FROM t WHERE t.col2 = @in1 AND t.col3 = @c; SET @in1 = @in1 - 1; END; END """, """ CREATE PROCEDURE test(@in1 INTEGER) AS BEGIN DECLARE @temp INTEGER; WHILE @in1 > 100 BEGIN SET @temp = (SELECT MAX(col1) FROM t WHERE t.col2 = @in1); SET @in1 = @in1 - @temp; END; SET @in1 = 50; WHILE @in1 > 5 BEGIN SELECT col2 FROM t1 WHERE t1.col3 = @in1; SET @in1 = @in1 - 1; END; END """, """ CREATE PROCEDURE dbo.test(@in1 INTEGER = 5, @in2 VARCHAR(40) = 'empty', @in3 INTEGER = 1) AS BEGIN INSERT INTO t (id, col1, col2) VALUES (@in1, @in2, @in3); END; CREATE PROCEDURE c.s.test2 AS BEGIN EXECUTE dbo.test; DECLARE @i INTEGER = 0; WHILE @i < 100 BEGIN EXECUTE test @in2 = 'temp_new'; SET @i = @i + 100; END; END """, """ CREATE PROCEDURE DropTableIfExists @TableName NVARCHAR(128) AS BEGIN DECLARE @SQL NVARCHAR(MAX); SET @SQL = N'DROP TABLE IF EXISTS [' + @TableName + ']'; EXECUTE sp_executesql 'SELECT 1 AS c'; EXECUTE sp_executesql N'SELECT 1 AS c'; EXECUTE sp_executesql @SQL; EXECUTE sp_executesql @stmt = @SQL; END """, """ CREATE PROCEDURE test AS BEGIN DECLARE @x INTEGER = 100; IF @x > ANY (SELECT 100) BEGIN SET @x = 100; END; ELSE BEGIN SET @x = 0; END; END """, ] for sql in sqls: ast = parse_one(sql, read="tsql") expected_sql = " ".join(line for line in (l.strip() for l in sql.splitlines()) if line) roundtripped_sql = ast.sql("tsql") with self.subTest(f"Testing: {sql}"): self.assertEqual(expected_sql, roundtripped_sql) self.validate_identity( "EXEC sp_executesql @payload", "EXECUTE sp_executesql @payload" ).assert_is(exp.ExecuteSql) sql = """ CREATE procedure [TRANSF].[SP_Merge_Sales_Real] @Loadid INTEGER ,@NumberOfRows INTEGER WITH EXECUTE AS OWNER, SCHEMABINDING, NATIVE_COMPILATION AS BEGIN SET XACT_ABORT ON; DECLARE @DWH_DateCreated AS DATETIME = CONVERT(DATETIME, getdate(), 104); DECLARE @DWH_DateModified DATETIME2 = CONVERT(DATETIME2, GETDATE(), 104); DECLARE @DWH_IdUserCreated INTEGER = SUSER_ID (CURRENT_USER()); DECLARE @DWH_IdUserModified INTEGER = SUSER_ID (SYSTEM_USER); DECLARE @SalesAmountBefore float; SELECT @SalesAmountBefore=SUM(SalesAmount) FROM TRANSF.[Pre_Merge_Sales_Real] S; END """ expected_sqls = [ "CREATE PROCEDURE [TRANSF].[SP_Merge_Sales_Real] @Loadid INTEGER, @NumberOfRows INTEGER WITH EXECUTE AS OWNER, SCHEMABINDING, NATIVE_COMPILATION AS BEGIN SET XACT_ABORT ON", "DECLARE @DWH_DateCreated DATETIME = CONVERT(DATETIME, GETDATE(), 104)", "DECLARE @DWH_DateModified DATETIME2 = CONVERT(DATETIME2, GETDATE(), 104)", "DECLARE @DWH_IdUserCreated INTEGER = SUSER_ID(CURRENT_USER())", "DECLARE @DWH_IdUserModified INTEGER = SUSER_ID(CURRENT_USER())", "DECLARE @SalesAmountBefore FLOAT", "SELECT @SalesAmountBefore = SUM(SalesAmount) FROM TRANSF.[Pre_Merge_Sales_Real] AS S", "END", ] for expr, expected_sql in zip(parse_one(sql, read="tsql").expressions, expected_sqls): self.assertEqual(expr.sql(dialect="tsql"), expected_sql) sql = """ CREATE PROC [dbo].[transform_proc] AS DECLARE @CurrentDate VARCHAR(20); SET @CurrentDate = CONVERT(VARCHAR(20), GETDATE(), 120); CREATE TABLE [target_schema].[target_table] (a INTEGER) WITH (DISTRIBUTION = REPLICATE, HEAP); """ expected_sqls = [ "CREATE PROC [dbo].[transform_proc] AS DECLARE @CurrentDate VARCHAR(20)", "SET @CurrentDate = CONVERT(VARCHAR(20), GETDATE(), 120)", "CREATE TABLE [target_schema].[target_table] (a INTEGER) WITH (DISTRIBUTION=REPLICATE, HEAP)", ] for expr, expected_sql in zip(parse_one(sql, read="tsql").expressions, expected_sqls): self.assertEqual(expr.sql(dialect="tsql"), expected_sql) self.validate_identity( "IF ((@x = @y AND GETDATE() = GETDATE()) OR (GETDATE() = @t)) BEGIN SET @query_result = (SELECT MAX(id) + 1 FROM t); END", "IF (@x = @y AND GETDATE() = GETDATE()) OR (GETDATE() = @t) BEGIN SET @query_result = (SELECT MAX(id) + 1 FROM t); END", ) ================================================ FILE: tests/fixtures/identity.sql ================================================ SUM(1) SUM(CASE WHEN x > 1 THEN 1 ELSE 0 END) / y 1 (1) 1. (1.) 1.0 (1.0) 1E2 1E+2 1E-2 1.1E10 1.12e-10 -11.023E7 * 3 0.2 (1 * 2) / (3 - 5) ((TRUE)) '' '''' 'x' '\x' "x" '\z' '\\z' '\\\z' '\\\\z' '\\\\\z' '\\\\\\z' '\n' '\\n' '\\\n' '\\\\n' '\\\\\n' '\\\\\\n' "" """x""" N'abc' x x % 1 x < 1 x <= 1 x > 1 x >= 1 x <> 1 x = y OR x > 1 x & 1 x | 1 x ^ 1 ~x x << 1 x >> 1 x >> 1 | 1 & 1 ^ 1 x || y x[:] x[1:] x[:2] x[1:2] x[-4:-1] 1 - -1 - -5 dec.x + y a.filter a.b.c a.b.c.d a.b.c.d.e a.b.c.d.e[0] a.b.c.d.e[0].f a[0][0].b.c[1].d.e.f[1][1] a[0].b[1] a[0].b.c['d'] a.b.C() a['x'].b.C() a.B() a['x'].C() int.x map.x SELECT update SELECT x.update SELECT call.x SELECT end a.b.INT(1.234) INT(x / 100) time * 100 int * 100 dec + 1 x IN (-1, 1) x IN ('a', 'a''a') x IN ((1)) x BETWEEN -1 AND 1 x BETWEEN 'a' || b AND 'c' || d ((a, b) AS c) NOT x IS NULL x IS TRUE x IS FALSE x IS TRUE IS TRUE x LIKE y IS TRUE TRIM('a' || 'b') MAP() GREATEST(x) LEAST(y) MAX(a, b) MIN(a, b) time zone ARRAY CURRENT_DATE CURRENT_DATE('UTC') CURRENT_DATE AT TIME ZONE 'UTC' CURRENT_DATE AT TIME ZONE zone_column CURRENT_DATE AT TIME ZONE 'UTC' AT TIME ZONE 'Asia/Tokio' f1 AT TIME ZONE INTERVAL '-10:00' AS f1 ARRAY() ARRAY(1, 2) ARRAY(time, foo) ARRAY(foo, time) ARRAY(LENGTH(waiter_name) > 0) ARRAY_CONTAINS(x, 1) x.EXTRACT(1) EXTRACT(X FROM y) EXTRACT(DATE FROM y) EXTRACT(WEEK(monday) FROM created_at) CONCAT_WS('-', 'a', 'b') CONCAT_WS('-', 'a', 'b', 'c') POSEXPLODE("x") AS ("a", "b") POSEXPLODE("x") AS ("a", "b", "c") STR_POSITION(haystack, needle) STR_POSITION(haystack, needle, pos) LEVENSHTEIN('gumbo', 'gambol', 2, 1, 1) SPLIT(SPLIT(referrer, 'utm_source=')[OFFSET(1)], "&")[OFFSET(0)] x[ORDINAL(1)][SAFE_OFFSET(2)] x GLOB '??-*' x GLOB y ILIKE(x, 'z') x LIKE SUBSTRING('abc', 1, 1) x LIKE y x LIKE a.y x LIKE '%y%' x ILIKE '%y%' x LIKE '%y%' ESCAPE '\' x ILIKE '%y%' ESCAPE '\' 1 AS escape INTERVAL '1' DAY INTERVAL '1' MONTH INTERVAL '1' YEAR INTERVAL '1' HOUR TO SECOND INTERVAL '-1' CURRENT_DATE INTERVAL '-31' CAST(GETDATE() AS DATE) INTERVAL (1 + 3) DAYS INTERVAL '1' DAY * 5 5 * INTERVAL '1' DAY CASE WHEN TRUE THEN INTERVAL '15' DAYS END CASE WHEN TRUE THEN 1 ELSE interval END CASE WHEN TRUE THEN 1 ELSE "INTERVAL" END SELECT asof FROM x SELECT * WHERE interval IS NULL SELECT * WHERE NOT interval IS NULL SELECT * WHERE INTERVAL "is" > 1 SELECT * WHERE INTERVAL x.is > 1 CAST('45' AS INTERVAL DAYS) CAST(x AS UUID) FILTER(a, x -> x.a.b.c.d.e.f.g) FILTER(a, x -> FOO(x.a.b.c.d.e.f.g) + x.a.b.c.d.e.f.g) TIMESTAMP_FROM_PARTS(2019, 1, 10, 2, 3, 4, 123456789, 'America/Los_Angeles') TIMESTAMPDIFF(CURRENT_TIMESTAMP(), 1, DAY) TIME_FROM_PARTS(14, 30, 45, 123456789) DATETIME_DIFF(CURRENT_DATE, 1, DAY) QUANTILE(x, 0.5) REGEXP_REPLACE('new york', '(\w)(\w*)', x -> UPPER(x[1]) || LOWER(x[2])) REGEXP_LIKE('new york', '.') REGEXP_SPLIT('new york', '.') SPLIT('new york', '.') X((y AS z)).1 X(a.b = 1) (x AS y, y AS z) REPLACE('new york', ' ', '_') REPLACE('new york', ' ') DATE(x) = DATE(y) TIMESTAMP(DATE(x)) TIMESTAMP_TRUNC(COALESCE(time_field, CURRENT_TIMESTAMP()), DAY) MONTHNAME(x) MONTHS_BETWEEN(CAST('2019-03-15' AS DATE), CAST('2019-02-15' AS DATE)) COUNT(DISTINCT CASE WHEN DATE_TRUNC('ISOWEEK', DATE(time_field)) = DATE_TRUNC('ISOWEEK', DATE(time_field2)) THEN report_id ELSE NULL END) COUNT(a, b) x[y - 1] CASE WHEN SUM(x) > 3 THEN 1 END OVER (PARTITION BY x) ANY(x) OVER (PARTITION BY x) SUM(ROW() OVER (PARTITION BY x)) SUM(ROW() OVER (PARTITION BY x + 1)) SUM(ROW() OVER (PARTITION BY x AND y)) SUM(x) OVER (w ORDER BY y) (ROW() OVER ()) CASE WHEN (x > 1) THEN 1 ELSE 0 END CASE (1) WHEN 1 THEN 1 ELSE 0 END CASE 1 WHEN 1 THEN 1 ELSE 0 END CASE 1 WHEN 1 THEN timestamp ELSE date END x AT TIME ZONE 'UTC' CAST('2025-11-20 00:00:00+00' AS TIMESTAMP) AT TIME ZONE 'Africa/Cairo' SET x = 1 SET x = ';' SET variable = value SET GLOBAL variable = value SET LOCAL variable = value @x @"x" COMMIT USE db USE ROLE x USE WAREHOUSE x USE DATABASE x USE SCHEMA x.y USE CATALOG abc NOT 1 NOT NOT 1 SELECT * FROM test SELECT * FROM db.FOO() SELECT *, 1 FROM test SELECT * FROM a.b SELECT * FROM a.b.c SELECT * FROM table SELECT 1 SELECT 1 FROM test SELECT * FROM a, b, (SELECT 1) AS c SELECT a FROM test SELECT 1 AS filter SELECT 1 AS "quoted alias" SELECT SUM(x) AS filter SELECT 1 AS range FROM test SELECT 1 AS count FROM test SELECT 1 AS comment FROM test SELECT 1 AS numeric FROM test SELECT 1 AS number FROM test SELECT COALESCE(offset, 1) SELECT t.count SELECT DISTINCT x FROM test SELECT DISTINCT x, y FROM test SELECT DISTINCT TIMESTAMP_TRUNC(time_field, MONTH) AS time_value FROM "table" SELECT DISTINCT ON (x) x, y FROM z SELECT DISTINCT ON (x, y + 1) * FROM z SELECT DISTINCT ON (x.y) * FROM z SELECT DISTINCT FROM_SOMETHING SELECT top.x SELECT TIMESTAMP(DATE_TRUNC('MONTH', DATE(time_field))) AS time_value FROM "table" SELECT GREATEST((3 + 1), LEAST(3, 4)) SELECT TRANSFORM(a, b -> b) AS x SELECT AGGREGATE(a, (a, b) -> a + b) AS x SELECT COUNT(DISTINCT a, b) SELECT COUNT(DISTINCT a, b + 1) SELECT SUM(DISTINCT x) SELECT TRUNC(a, b) SELECT ARRAY_AGG(STRUCT(x, x AS y) ORDER BY z DESC) AS x SELECT LAG(x) OVER (ORDER BY y) AS x SELECT LEAD(a) OVER (ORDER BY b) AS a SELECT LEAD(a, 1) OVER (PARTITION BY a ORDER BY a) AS x SELECT LEAD(a, 1, b) OVER (PARTITION BY a ORDER BY a) AS x SELECT X((a, b) -> a + b, z -> z) AS x SELECT X(a -> a + ("z" - 1)) SELECT test.* FROM test SELECT a AS b FROM test SELECT "a"."b" FROM "a" SELECT "a".b FROM a SELECT a.b FROM "a" SELECT a.b FROM a SELECT '"hi' AS x FROM x SELECT 1 AS "|sum" FROM x SELECT '\"hi' AS x FROM x SELECT 1 AS b FROM test SELECT 1 AS "b" FROM test SELECT 1 + 1 FROM test SELECT 1 - 1 FROM test SELECT 1 * 1 FROM test SELECT 1 % 1 FROM test SELECT 1 / 1 FROM test SELECT 1 < 2 FROM test SELECT 1 <= 2 FROM test SELECT 1 > 2 FROM test SELECT 1 >= 2 FROM test SELECT 1 <> 2 FROM test SELECT JSON_EXTRACT(x, '$.name') SELECT JSON_EXTRACT_SCALAR(x, '$.name') SELECT x LIKE '%x%' FROM test SELECT * FROM test LIMIT 100 SELECT * FROM test LIMIT 1 + 1 SELECT * FROM test LIMIT 100 OFFSET 200 SELECT * FROM test LIMIT (SELECT 1) SELECT * FROM test LIMIT (SELECT 1) OFFSET (SELECT 1) SELECT * FROM test FETCH FIRST ROWS ONLY SELECT * FROM test FETCH FIRST 1 ROWS ONLY SELECT * FROM test ORDER BY id DESC FETCH FIRST 10 ROWS WITH TIES SELECT * FROM test ORDER BY id DESC FETCH FIRST 10 PERCENT ROWS WITH TIES SELECT * FROM test ORDER BY always DESC SELECT * FROM test FETCH NEXT 1 ROWS ONLY SELECT (1 > 2) AS x FROM test SELECT NOT (1 > 2) FROM test SELECT 1 + 2 AS x FROM test SELECT a, b, 1 < 1 FROM test SELECT a FROM test WHERE NOT FALSE SELECT a FROM test WHERE a = 1 SELECT a FROM test WHERE a = 1 AND b = 2 SELECT a FROM test WHERE a IN (SELECT b FROM z) SELECT a FROM test WHERE a IN ((SELECT 1), 2) SELECT * FROM x WHERE y IN ((SELECT 1) EXCEPT (SELECT 2)) SELECT * FROM x WHERE y IN (SELECT 1 UNION SELECT 2) SELECT * FROM x WHERE y IN ((SELECT 1 UNION SELECT 2)) SELECT * FROM x WHERE y IN (WITH z AS (SELECT 1) SELECT * FROM z) SELECT a FROM test WHERE (a > 1) SELECT a FROM test WHERE a > (SELECT 1 FROM x GROUP BY y) SELECT a FROM test WHERE EXISTS(SELECT 1) SELECT a FROM test WHERE EXISTS(SELECT * FROM x UNION SELECT * FROM Y) OR TRUE SELECT a FROM test WHERE TRUE OR NOT EXISTS(SELECT * FROM x) SELECT a AS any, b AS some, c AS all, d AS exists FROM test WHERE a = ANY (SELECT 1) SELECT a FROM test WHERE a > ALL (SELECT 1) SELECT a FROM test WHERE (a, b) IN (SELECT 1, 2) SELECT X((SELECT 1) UNION (SELECT 2)) SELECT a FROM test ORDER BY a SELECT a FROM test ORDER BY a, b SELECT x FROM tests ORDER BY a DESC, b DESC, c SELECT a FROM test ORDER BY a > 1 SELECT * FROM test ORDER BY DATE DESC, TIMESTAMP DESC SELECT a, b FROM test GROUP BY 1 SELECT a, b FROM test GROUP BY a SELECT a, b FROM test WHERE a = 1 GROUP BY a HAVING a = 2 SELECT a, b FROM test WHERE a = 1 GROUP BY a HAVING a = 2 ORDER BY a SELECT a, b FROM test WHERE a = 1 GROUP BY CASE 1 WHEN 1 THEN 1 END SELECT a FROM test GROUP BY GROUPING SETS (()) SELECT a FROM test GROUP BY GROUPING SETS (x, ()) SELECT a FROM test GROUP BY GROUPING SETS (x, (x, y), (x, y, z), q) SELECT a FROM test GROUP BY CUBE (x) SELECT a FROM test GROUP BY ROLLUP (x) SELECT t.a FROM test AS t GROUP BY ROLLUP (t.x) SELECT a FROM test GROUP BY GROUPING SETS ((x, y)), ROLLUP (b) SELECT a FROM test GROUP BY CUBE (x), ROLLUP (x, y, z) SELECT CASE WHEN a < b THEN 1 WHEN a < c THEN 2 ELSE 3 END FROM test SELECT CASE 1 WHEN 1 THEN 1 ELSE 2 END SELECT CASE 1 WHEN 1 THEN MAP('a', 'b') ELSE MAP('b', 'c') END['a'] SELECT CASE 1 + 2 WHEN 1 THEN 1 ELSE 2 END SELECT CASE TEST(1) + x[0] WHEN 1 THEN 1 ELSE 2 END SELECT CASE x[0] WHEN 1 THEN 1 ELSE 2 END SELECT CASE a.b WHEN 1 THEN 1 ELSE 2 END SELECT CASE CASE x > 1 WHEN TRUE THEN 1 END WHEN 1 THEN 1 ELSE 2 END SELECT a FROM (SELECT a FROM test) AS x SELECT a FROM (SELECT a FROM (SELECT a FROM test) AS y) AS x SELECT a FROM test WHERE a IN (1, 2, 3) OR b BETWEEN 1 AND 4 SELECT a FROM test AS x TABLESAMPLE (BUCKET 1 OUT OF 5) SELECT a FROM test TABLESAMPLE (BUCKET 1 OUT OF 5) SELECT a FROM test TABLESAMPLE (BUCKET 1 OUT OF 5 ON x) SELECT a FROM test TABLESAMPLE (BUCKET 1 OUT OF 5 ON RAND()) SELECT a FROM test TABLESAMPLE (0.1 PERCENT) SELECT a FROM test TABLESAMPLE (100 ROWS) SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) SELECT 1 FROM a.b.table1 AS t UNPIVOT((c3) FOR c4 IN (a, b)) SELECT a FROM test PIVOT(SOMEAGG(x, y, z) FOR q IN (1)) SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) PIVOT(MAX(b) FOR c IN ('d')) SELECT a FROM (SELECT a, b FROM test) PIVOT(SUM(x) FOR y IN ('z', 'q')) SELECT a FROM test UNPIVOT(x FOR y IN (z, q)) AS x SELECT a FROM test PIVOT(SUM(x) FOR y IN ('z', 'q')) UNPIVOT(x FOR y IN (z, q)) AS x SELECT ABS(a) FROM test SELECT AVG(a) FROM test SELECT CEIL(a) FROM test SELECT CEIL(a, b) FROM test SELECT COUNT(a) FROM test SELECT COUNT(1) FROM test SELECT COUNT(*) FROM test SELECT COUNT() FROM test SELECT COUNT(DISTINCT a) FROM test SELECT EXP(a) FROM test SELECT FLOOR(a) FROM test SELECT FLOOR(a, b) FROM test SELECT FIRST_VALUE(a) FROM test SELECT GREATEST(a, b, c) FROM test SELECT LAST_VALUE(a) FROM test SELECT LAST_VALUE(a) IGNORE NULLS OVER () + 1 SELECT LN(a) FROM test SELECT MAX(a) FROM test SELECT MIN(a) FROM test SELECT POWER(a, 2) FROM test SELECT QUANTILE(a, 0.95) FROM test SELECT ROUND(a) FROM test SELECT ROUND(a, 2) FROM test SELECT SUM(a) FROM test SELECT SQRT(a) FROM test SELECT STDDEV(a) FROM test SELECT STDDEV_POP(a) FROM test SELECT STDDEV_SAMP(a) FROM test SELECT VARIANCE(a) FROM test SELECT VARIANCE_POP(a) FROM test SELECT CAST(a AS INT) FROM test SELECT CAST(a AS DATETIME) FROM test SELECT CAST(a AS VARCHAR) FROM test SELECT CAST(a < 1 AS INT) FROM test SELECT CAST(a IS NULL AS INT) FROM test SELECT COUNT(CAST(1 < 2 AS INT)) FROM test SELECT COUNT(CASE WHEN CAST(1 < 2 AS BOOLEAN) THEN 1 END) FROM test SELECT CAST(a AS DECIMAL) FROM test SELECT CAST(a AS DECIMAL(1)) FROM test SELECT CAST(a AS DECIMAL(1, 2)) FROM test SELECT CAST(a AS MAP) FROM test SELECT CAST(a AS TIMESTAMP) FROM test SELECT CAST(a AS DATE) FROM test SELECT CAST(a AS ARRAY) FROM test SELECT CAST(a AS VARIANT) FROM test SELECT TRY_CAST(a AS INT) FROM test SELECT COALESCE(a, b, c) FROM test SELECT ANY_VALUE(a) FROM test SELECT 1 FROM a JOIN b ON a.x = b.x SELECT 1 FROM a JOIN b AS c ON a.x = b.x SELECT 1 FROM a INNER JOIN b ON a.x = b.x SELECT 1 FROM a LEFT JOIN b ON a.x = b.x SELECT 1 FROM a RIGHT JOIN b ON a.x = b.x SELECT 1 FROM a CROSS JOIN b ON a.x = b.x SELECT 1 FROM a SEMI JOIN b ON a.x = b.x SELECT 1 FROM a LEFT SEMI JOIN b ON a.x = b.x SELECT 1 FROM a LEFT ANTI JOIN b ON a.x = b.x SELECT 1 FROM a RIGHT SEMI JOIN b ON a.x = b.x SELECT 1 FROM a RIGHT ANTI JOIN b ON a.x = b.x SELECT 1 FROM a JOIN b USING (x) SELECT 1 FROM a JOIN b USING (x, y, z) SELECT 1 FROM a JOIN (SELECT a FROM c) AS b ON a.x = b.x AND a.x < 2 SELECT 1 FROM a UNION SELECT 2 FROM b SELECT 1 FROM a UNION ALL SELECT 2 FROM b SELECT 1 FROM a JOIN b ON a.foo = b.bar JOIN c ON a.foo = c.bar SELECT 1 FROM a LEFT JOIN b ON a.foo = b.bar JOIN c ON a.foo = c.bar SELECT 1 FROM a LEFT INNER JOIN b ON a.foo = b.bar SELECT 1 FROM a LEFT OUTER JOIN b ON a.foo = b.bar SELECT 1 FROM a NATURAL JOIN b SELECT 1 FROM a NATURAL LEFT JOIN b SELECT 1 FROM a NATURAL LEFT OUTER JOIN b SELECT 1 FROM a OUTER JOIN b ON a.foo = b.bar SELECT 1 FROM a FULL JOIN b ON a.foo = b.bar SELECT 1 FROM a JOIN b JOIN c ON b.id = c.id ON a.id = b.id SELECT * FROM a JOIN b JOIN c USING (id) USING (id) SELECT 1 UNION ALL SELECT 2 SELECT 1 EXCEPT SELECT 2 SELECT 1 EXCEPT SELECT 2 SELECT 1 INTERSECT SELECT 2 SELECT 1 INTERSECT SELECT 2 SELECT 1 AS delete, 2 AS alter SELECT * FROM (x) SELECT * FROM ((x)) SELECT * FROM (((x))) SELECT * FROM ((SELECT 1)) SELECT * FROM (x CROSS JOIN foo LATERAL VIEW EXPLODE(y)) SELECT * FROM (SELECT 1) AS x SELECT * FROM (SELECT 1 UNION SELECT 2) AS x SELECT * FROM (SELECT 1 UNION ALL SELECT 2) AS x SELECT * FROM (SELECT 1 UNION ALL SELECT 2) SELECT * FROM ((SELECT 1) AS a UNION ALL (SELECT 2) AS b) SELECT * FROM ((SELECT 1) AS a(b)) SELECT * FROM ((SELECT 1) UNION (SELECT 2) UNION (SELECT 3)) SELECT * FROM (table1 AS t1 LEFT JOIN table2 AS t2 ON 1 = 1) SELECT * FROM (tbl1 LEFT JOIN tbl2 ON 1 = 1) SELECT * FROM (tbl1, tbl2 JOIN tbl3 ON TRUE) SELECT * FROM (tbl1 CROSS JOIN tbl2) SELECT * FROM (tbl1 CROSS JOIN tbl2) AS t SELECT * FROM (tbl AS tbl) AS t SELECT * FROM (tbl1 JOIN (tbl2 CROSS JOIN tbl3) ON bla = foo) SELECT * FROM (tbl1, LATERAL (SELECT * FROM bla) AS tbl) SELECT * FROM x AS y(a, b) SELECT * EXCEPT (a, b) SELECT * EXCEPT (a, b) FROM y SELECT * REPLACE (a AS b, b AS C) SELECT * REPLACE (a + 1 AS b, b AS C) SELECT * EXCEPT (a, b) REPLACE (a AS b, b AS C) SELECT * EXCEPT (a, b) REPLACE (a AS b, b AS C) FROM y SELECT a.* EXCEPT (a, b), b.* REPLACE (a AS b, b AS C) SELECT a.* EXCEPT (a, b), b.* REPLACE (a AS b, b AS C) FROM x SELECT A.* EXCEPT (A.COL_1) FROM TABLE_1 AS A SELECT zoo, animals FROM (VALUES ('oakland', ARRAY('a', 'b')), ('sf', ARRAY('b', 'c'))) AS t(zoo, animals) SELECT zoo, animals FROM UNNEST(ARRAY(STRUCT('oakland' AS zoo, ARRAY('a', 'b') AS animals), STRUCT('sf' AS zoo, ARRAY('b', 'c') AS animals))) AS t(zoo, animals) WITH a AS (SELECT 1) SELECT 1 UNION ALL SELECT 2 WITH a AS (SELECT 1) SELECT 1 UNION SELECT 2 WITH a AS (SELECT 1) SELECT 1 INTERSECT SELECT 2 WITH a AS (SELECT 1) SELECT 1 EXCEPT SELECT 2 WITH a AS (SELECT 1) SELECT 1 EXCEPT SELECT 2 (SELECT 1) UNION (SELECT 2) (SELECT 1) UNION SELECT 2 SELECT 1 UNION (SELECT 2) (SELECT 1) ORDER BY x LIMIT 1 OFFSET 1 (SELECT 1 UNION SELECT 2) UNION (SELECT 2 UNION ALL SELECT 3) (SELECT 1 UNION SELECT 2) ORDER BY x LIMIT 1 OFFSET 1 SELECT 1 UNION (SELECT 2) ORDER BY x (SELECT 1) UNION SELECT 2 ORDER BY x SELECT * FROM (((SELECT 1) UNION SELECT 2) ORDER BY x LIMIT 1 OFFSET 1) SELECT * FROM ((SELECT 1 AS x) CROSS JOIN (SELECT 2 AS y)) AS z ((SELECT 1) EXCEPT (SELECT 2)) ((SELECT 1)) LIMIT 1 VALUES (1) UNION SELECT * FROM x WITH a AS (SELECT 1) SELECT a.* FROM a WITH a AS (SELECT 1), b AS (SELECT 2) SELECT a.*, b.* FROM a CROSS JOIN b WITH a AS (WITH b AS (SELECT 1 AS x) SELECT b.x FROM b) SELECT a.x FROM a WITH RECURSIVE T(n) AS (VALUES (1) UNION ALL SELECT n + 1 FROM t WHERE n < 100) SELECT SUM(n) FROM t WITH RECURSIVE T(n, m) AS (VALUES (1, 2) UNION ALL SELECT n + 1, n + 2 FROM t) SELECT SUM(n) FROM t WITH baz AS (SELECT 1 AS col) UPDATE bar SET cid = baz.col1 FROM baz SELECT * FROM (WITH y AS (SELECT 1 AS z) SELECT z FROM y) AS x SELECT RANK() OVER () FROM x SELECT RANK() OVER () AS y FROM x SELECT RANK() OVER (PARTITION BY a) FROM x SELECT RANK() OVER (PARTITION BY a, b) FROM x SELECT RANK() OVER (ORDER BY a) FROM x SELECT RANK() OVER (ORDER BY a, b) FROM x SELECT RANK() OVER (PARTITION BY a ORDER BY a) FROM x SELECT RANK() OVER (PARTITION BY a, b ORDER BY a, b DESC) FROM x SELECT SUM(x) OVER (PARTITION BY a) AS y FROM x SELECT SUM(x) OVER (PARTITION BY a ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) SELECT SUM(x) OVER (PARTITION BY a ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) SELECT SUM(x) OVER (PARTITION BY a ORDER BY b RANGE BETWEEN INTERVAL '1' DAY PRECEDING AND CURRENT ROW) SELECT SUM(x) OVER (PARTITION BY a ORDER BY b RANGE BETWEEN INTERVAL '1' DAY PRECEDING AND INTERVAL '2' DAYS FOLLOWING) SELECT SUM(x) OVER (PARTITION BY a ORDER BY b RANGE BETWEEN INTERVAL '1' DAY PRECEDING AND UNBOUNDED FOLLOWING) SELECT SUM(x) OVER (PARTITION BY a ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING) SELECT SUM(x) OVER (PARTITION BY a ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) SELECT SUM(x) OVER (PARTITION BY a ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) SELECT SUM(x) OVER (PARTITION BY a RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) SELECT SUM(x) OVER (PARTITION BY a RANGE BETWEEN 1 AND 3) SELECT SUM(x) OVER (PARTITION BY a RANGE BETWEEN 1 FOLLOWING AND 3) SELECT SUM(x) OVER (PARTITION BY a RANGE BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) SELECT AVG(x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM t SELECT SUM(x) OVER (PARTITION BY a ORDER BY date ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) SELECT LISTAGG(x) WITHIN GROUP (ORDER BY x) AS y SELECT LISTAGG(x) WITHIN GROUP (ORDER BY x DESC) SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x) SELECT PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY x) SELECT SUM(x) FILTER(WHERE x > 1) SELECT SUM(x) FILTER(WHERE x > 1) OVER (ORDER BY y) SELECT COUNT(DISTINCT a) OVER (PARTITION BY c ORDER BY d ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) SELECT a['1'], b[0], x.c[0], "x".d['1'] FROM x SELECT ARRAY(1, 2, 3) FROM x SELECT ARRAY(ARRAY(1), ARRAY(2)) FROM x SELECT MAP[ARRAY(1), ARRAY(2)] FROM x SELECT MAP(ARRAY(1), ARRAY(2)) FROM x SELECT MAX(ARRAY(1, 2, 3)) FROM x SELECT ARRAY(ARRAY(0))[0][0] FROM x SELECT MAP[ARRAY('x'), ARRAY(0)]['x'] FROM x SELECT student, score FROM tests LATERAL VIEW EXPLODE(scores) SELECT student, score FROM tests LATERAL VIEW EXPLODE(scores) AS score SELECT student, score FROM tests LATERAL VIEW EXPLODE(scores) t AS score SELECT student, score FROM tests LATERAL VIEW EXPLODE(scores) t AS score, name SELECT student, score FROM tests LATERAL VIEW OUTER EXPLODE(scores) t AS score, name SELECT tf.* FROM (SELECT 0) AS t LATERAL VIEW STACK(1, 2) tf SELECT tf.* FROM (SELECT 0) AS t LATERAL VIEW STACK(1, 2) tf AS col0, col1, col2 SELECT student, score FROM tests CROSS JOIN UNNEST(scores) AS t(score) SELECT student, score FROM tests CROSS JOIN UNNEST(scores) AS t(a, b) SELECT student, score FROM tests CROSS JOIN UNNEST(scores) WITH ORDINALITY AS t(a, b) SELECT student, score FROM tests CROSS JOIN UNNEST(x.scores) AS t(score) SELECT student, score FROM tests CROSS JOIN UNNEST(ARRAY(x.scores)) AS t(score) CREATE TABLE foo AS (SELECT 1) UNION ALL (SELECT 2) CREATE TABLE foo (id INT PRIMARY KEY ASC) CREATE TABLE a.b AS SELECT 1 CREATE TABLE a.b AS SELECT 1 WITH DATA AND STATISTICS CREATE TABLE a.b AS SELECT 1 WITH NO DATA AND NO STATISTICS CREATE TABLE a.b AS (SELECT 1) NO PRIMARY INDEX CREATE TABLE a.b AS (SELECT 1) UNIQUE PRIMARY INDEX index1 (a) UNIQUE INDEX index2 (b) CREATE TABLE a.b AS (SELECT 1) PRIMARY AMP INDEX index1 (a) UNIQUE INDEX index2 (b) CREATE TABLE a.b AS SELECT a FROM a.c CREATE TABLE IF NOT EXISTS x AS SELECT a FROM d CREATE TABLE z (a INT, b VARCHAR, c VARCHAR(100), d DECIMAL(5, 3)) CREATE TABLE z (end INT) CREATE TABLE z (bucket INT) CREATE TABLE z (truncate INT) CREATE TABLE z (a ARRAY, b MAP, c DECIMAL(5, 3)) CREATE TABLE z (a INT, b VARCHAR COMMENT 'z', c VARCHAR(100) COMMENT 'z', d DECIMAL(5, 3)) CREATE TABLE z (a INT(11) DEFAULT UUID()) CREATE TABLE z (n INT DEFAULT 0 NOT NULL) CREATE TABLE z (a INT(11) DEFAULT NULL COMMENT '客户id') CREATE TABLE z (a INT(11) NOT NULL DEFAULT 1) CREATE TABLE z (a INT(11) NOT NULL DEFAULT -1) CREATE TABLE z (a INT(11) NOT NULL COLLATE utf8_bin AUTO_INCREMENT) CREATE TABLE z (a INT, PRIMARY KEY (a)) CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1 CREATE TABLE z WITH (FORMAT='ORC', x='2') AS SELECT 1 CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='parquet') AS SELECT 1 CREATE TABLE z WITH (TABLE_FORMAT='iceberg', FORMAT='ORC', x='2') AS SELECT 1 CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT, y INT)) CREATE TABLE z (z INT) WITH (PARTITIONED_BY=(x INT)) AS SELECT 1 CREATE TABLE z AS (WITH cte AS (SELECT 1) SELECT * FROM cte) CREATE TABLE z AS ((WITH cte AS (SELECT 1) SELECT * FROM cte)) CREATE TABLE z (a INT UNIQUE) CREATE TABLE z (a INT AUTO_INCREMENT) CREATE TABLE z (a INT UNIQUE AUTO_INCREMENT) CREATE TABLE z (a INT REFERENCES parent (b, c)) CREATE TABLE z (a INT PRIMARY KEY, b INT REFERENCES foo (id)) CREATE TABLE z (a INT, FOREIGN KEY (a) REFERENCES parent (b, c)) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON DELETE NO ACTION) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON DELETE CASCADE) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON DELETE SET NULL) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON DELETE SET DEFAULT) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON UPDATE NO ACTION) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON UPDATE CASCADE) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON UPDATE SET NULL) CREATE TABLE foo (bar INT REFERENCES baz (baz_id) ON UPDATE SET DEFAULT) CREATE TABLE asd AS SELECT asd FROM asd WITH NO DATA CREATE TABLE asd AS SELECT asd FROM asd WITH DATA CREATE TABLE products (x INT GENERATED BY DEFAULT AS IDENTITY) CREATE TABLE products (x INT GENERATED BY DEFAULT ON NULL AS IDENTITY) CREATE TABLE products (x INT GENERATED ALWAYS AS IDENTITY) CREATE TABLE konyvszerzo (szerzo_azon INT CONSTRAINT konyvszerzo_szerzo_fk REFERENCES szerzo) CREATE TABLE IF NOT EXISTS customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (INCREMENT BY 1)) CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 1 MINVALUE -1 MAXVALUE 1 NO CYCLE)) CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 10)) CREATE TABLE customer (pk BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY (CYCLE)) CREATE TABLE customer (period INT NOT NULL) CREATE TABLE foo (baz_id INT REFERENCES baz (id) DEFERRABLE) CREATE TABLE foo (baz CHAR(4) CHARACTER SET LATIN UPPERCASE NOT CASESPECIFIC COMPRESS 'a') CREATE TABLE db.foo (id INT NOT NULL, valid_date DATE FORMAT 'YYYY-MM-DD', measurement INT COMPRESS) CREATE TABLE foo (baz DATE FORMAT 'YYYY/MM/DD' TITLE 'title' INLINE LENGTH 1 COMPRESS ('a', 'b')) CREATE TABLE t (title TEXT) CREATE TABLE foo (baz INT, inline TEXT) CREATE ALGORITHM=UNDEFINED DEFINER=foo@% VIEW a SQL SECURITY DEFINER AS (SELECT a FROM b) CREATE TEMPORARY TABLE x AS SELECT a FROM d CREATE TEMPORARY TABLE IF NOT EXISTS x AS SELECT a FROM d CREATE TABLE a (b INT) ON COMMIT PRESERVE ROWS CREATE VIEW x AS SELECT a FROM b CREATE VIEW IF NOT EXISTS x AS SELECT a FROM b CREATE VIEW z (a, b COMMENT 'b', c COMMENT 'c') AS SELECT a, b, c FROM d CREATE VIEW IF NOT EXISTS z (a, b COMMENT 'b', c COMMENT 'c') AS SELECT a, b, c FROM d CREATE OR REPLACE VIEW x AS SELECT * CREATE OR REPLACE TEMPORARY VIEW x AS SELECT * CREATE TEMPORARY VIEW x AS SELECT a FROM d CREATE TEMPORARY VIEW IF NOT EXISTS x AS SELECT a FROM d CREATE TEMPORARY VIEW x AS WITH y AS (SELECT 1) SELECT * FROM y CREATE MATERIALIZED VIEW x.y.z AS SELECT a FROM b CREATE VIEW z (a, b) CREATE VIEW z (a, b COMMENT 'b', c COMMENT 'c') CREATE VIEW z AS LOCKING ROW FOR ACCESS SELECT a FROM b CREATE TEMPORARY FUNCTION f CREATE TEMPORARY FUNCTION f AS 'g' CREATE FUNCTION f CREATE FUNCTION f AS 'g' CREATE FUNCTION a(b INT, c VARCHAR) AS 'SELECT 1' CREATE FUNCTION a() LANGUAGE sql CREATE FUNCTION a() LANGUAGE sql RETURNS INT CREATE FUNCTION a.b(x INT) RETURNS INT AS RETURN x + 1 CREATE FUNCTION a.b(x TEXT) RETURNS TEXT CONTAINS SQL AS RETURN x CREATE FUNCTION a.b(x TEXT) RETURNS TEXT LANGUAGE SQL MODIFIES SQL DATA AS RETURN x CREATE FUNCTION a.b(x TEXT) LANGUAGE SQL READS SQL DATA RETURNS TEXT AS RETURN x CREATE FUNCTION a.b.c() CREATE INDEX abc ON t(a) CREATE INDEX "abc" ON t(a) CREATE INDEX abc ON t(a, b, b) CREATE INDEX abc ON t(a NULLS LAST) CREATE INDEX pointloc ON points USING GIST(BOX(location, location)) CREATE UNIQUE INDEX abc ON t(a, b, b) CREATE UNIQUE INDEX IF NOT EXISTS my_idx ON tbl(a, b) CREATE SCHEMA x CREATE SCHEMA IF NOT EXISTS y CREATE DATABASE x CREATE DATABASE IF NOT EXISTS y CREATE PROCEDURE IF NOT EXISTS a.b.c() AS 'DECLARE BEGIN; END' CREATE TABLE T3 AS (SELECT DISTINCT A FROM T1 EXCEPT (SELECT A FROM T2) LIMIT 1) DESCRIBE x DESCRIBE EXTENDED a.b DESCRIBE FORMATTED a.b DESCRIBE SELECT 1 DROP INDEX a.b.c DROP FUNCTION a.b.c (INT) DROP MATERIALIZED VIEW x.y.z CACHE TABLE x CACHE LAZY TABLE x CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') CACHE LAZY TABLE x OPTIONS(N'storageLevel' = 'value') CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS SELECT 1 CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS WITH a AS (SELECT 1) SELECT a.* FROM a CACHE LAZY TABLE x AS WITH a AS (SELECT 1) SELECT a.* FROM a CACHE TABLE x AS WITH a AS (SELECT 1) SELECT a.* FROM a CACHE TABLE x AS (SELECT 1 AS y) DROP PROCEDURE a.b.c (INT) INSERT OVERWRITE TABLE a.b PARTITION(ds) SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds = 'YYYY-MM-DD') SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds, hour) SELECT x FROM y INSERT OVERWRITE TABLE a.b PARTITION(ds = 'YYYY-MM-DD', hour = 'hh') SELECT x FROM y INSERT INTO a.b PARTITION(DAY = '2024-04-14') (col1, col2) SELECT x FROM y DELETE FROM x WHERE y > 1 DELETE FROM y DELETE FROM event USING sales WHERE event.eventid = sales.eventid DELETE FROM event USING sales, bla WHERE event.eventid = sales.eventid DELETE FROM event USING sales AS s WHERE event.eventid = s.eventid DELETE FROM event AS event USING sales AS s WHERE event.eventid = s.eventid DROP TABLE a DROP TABLE a.b DROP TABLE IF EXISTS a DROP TABLE IF EXISTS a.b DROP TABLE a CASCADE DROP TABLE s_hajo CASCADE CONSTRAINTS DROP TABLE a PURGE DROP VIEW a DROP VIEW a.b DROP VIEW IF EXISTS a DROP VIEW IF EXISTS a.b USE db BEGIN ROLLBACK ROLLBACK TO b INSERT INTO x SELECT * FROM y INSERT INTO x (SELECT * FROM y) INSERT INTO x WITH y AS (SELECT 1) SELECT * FROM y INSERT INTO x.z IF EXISTS SELECT * FROM y INSERT INTO x VALUES (1, 'a', 2.0) INSERT INTO x VALUES (1, 'a', 2.0), (1, 'a', 3.0), (X(), y[1], z.x) INSERT INTO y (a, b, c) SELECT a, b, c FROM x INSERT INTO y (SELECT 1) UNION (SELECT 2) INSERT INTO result_table (WITH test AS (SELECT * FROM source_table) SELECT * FROM test) INSERT INTO "tests_user" ("username", "first_name", "last_name") VALUES ('fiara', 'Fiara', 'Ironhide') RETURNING "tests_user"."id" INSERT INTO t1 (tc1 /* tc1 */, tc2 /* tc2 */) SELECT c1 /* sc1 */, c2 /* sc2 */ FROM t INSERT INTO t1 ("tc1" /* tc1 */, "tc2" /* tc2 */) SELECT "c1" /* sc1 */, "c2" /* sc2 */ FROM t INSERT OVERWRITE TABLE x IF EXISTS SELECT * FROM y INSERT OVERWRITE TABLE a.b IF EXISTS SELECT * FROM y INSERT OVERWRITE DIRECTORY 'x' SELECT 1 INSERT OVERWRITE LOCAL DIRECTORY 'x' SELECT 1 INSERT OVERWRITE LOCAL DIRECTORY 'x' ROW FORMAT DELIMITED FIELDS TERMINATED BY '1' COLLECTION ITEMS TERMINATED BY '2' MAP KEYS TERMINATED BY '3' LINES TERMINATED BY '4' NULL DEFINED AS '5' SELECT 1 LOAD DATA INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') INPUTFORMAT 'y' LOAD DATA LOCAL INPATH 'x' INTO TABLE y PARTITION(ds = 'yyyy') INPUTFORMAT 'y' SERDE 'z' LOAD DATA INPATH 'x' INTO TABLE y INPUTFORMAT 'y' SERDE 'z' LOAD DATA INPATH 'x' INTO TABLE y.b INPUTFORMAT 'y' SERDE 'z' SELECT 1 FROM PARQUET_SCAN('/x/y/*') AS y UNCACHE TABLE x UNCACHE TABLE IF EXISTS x UPDATE tbl_name SET foo = 123 UPDATE tbl_name SET foo = 123, bar = 345 UPDATE db.tbl_name SET foo = 123 WHERE tbl_name.bar = 234 UPDATE db.tbl_name SET foo = 123, foo_1 = 234 WHERE tbl_name.bar = 234 UPDATE products SET price = price * 1.10 WHERE price <= 99.99 RETURNING name, price AS new_price UPDATE t1 AS a, t2 AS b, t3 AS c LEFT JOIN t4 AS d ON c.id = d.id SET a.id = 1 COMMENT ON COLUMN my_schema.my_table.my_column IS 'Employee ID number' COMMENT ON DATABASE my_database IS 'Development Database' COMMENT ON PROCEDURE my_proc(integer, integer) IS 'Runs a report' COMMENT ON TABLE my_schema.my_table IS 'Employee Information' COMMENT ON TABLE my_schema.my_table IS N'National String' WITH a AS (SELECT 1) INSERT INTO b SELECT * FROM a WITH a AS (SELECT * FROM b) UPDATE a SET col = 1 WITH a AS (SELECT * FROM b) CREATE TABLE b AS SELECT * FROM a WITH a AS (SELECT * FROM b) DELETE FROM a SELECT ? AS ? FROM x WHERE b BETWEEN ? AND ? GROUP BY ?, 1 LIMIT ? SELECT :hello, ? FROM x LIMIT :my_limit SELECT a FROM b WHERE c IS ? SELECT * FROM x OFFSET @skip FETCH NEXT @take ROWS ONLY WITH a AS ((SELECT b.foo AS foo, b.bar AS bar FROM b) UNION ALL (SELECT c.foo AS foo, c.bar AS bar FROM c)) SELECT * FROM a WITH a AS ((SELECT 1 AS b) UNION ALL (SELECT 1 AS b)) SELECT * FROM a SELECT (WITH x AS (SELECT 1 AS y) SELECT * FROM x) AS z SELECT ((SELECT 1) + 1) SELECT ((SELECT 0) UNION (SELECT 1) ORDER BY 1 OFFSET 1) SELECT * FROM x WHERE y IN ((SELECT 1) UNION (SELECT 2) OFFSET 2) SELECT * FROM project.dataset.INFORMATION_SCHEMA.TABLES SELECT CAST(x AS INT) /* comment */ FROM foo SELECT c /* c1 */ AS alias /* c2 */ SELECT a /* x */, b /* x */ SELECT a /* x */ /* y */ /* z */, b /* k */ /* m */ SELECT * FROM foo /* x */, bla /* x */ SELECT 1 /* comment */ + 1 SELECT 1 /* c1 */ + 2 /* c2 */ SELECT 1 /* c1 */ + /* c2 */ 2 /* c3 */ SELECT 1 /* c1 */ + 2 /* c2 */ + 3 /* c3 */ SELECT 1 /* c1 */ + 2 /* c2 */, 3 /* c3 */ SELECT x FROM a.b.c /* x */, e.f.g /* x */ SELECT FOO(x /* c */) /* FOO */, b /* b */ SELECT FOO(x /* c1 */ + y /* c2 */ + BLA(5 /* c3 */)) FROM (VALUES (1 /* c4 */, "test" /* c5 */)) /* c6 */ INSERT INTO foo SELECT * FROM bar /* comment */ /* c */ WITH x AS (SELECT 1) SELECT * FROM x SELECT a FROM x WHERE a COLLATE 'utf8_general_ci' = 'b' SELECT x AS INTO FROM bla ALTER TABLE integers ADD COLUMN k INT ALTER TABLE integers ADD COLUMN k INT FIRST ALTER TABLE integers ADD COLUMN k INT AFTER m ALTER TABLE integers ADD COLUMN IF NOT EXISTS k INT ALTER TABLE IF EXISTS integers ADD COLUMN k INT ALTER TABLE integers ADD COLUMN l INT DEFAULT 10 ALTER TABLE measurements ADD COLUMN mtime TIMESTAMPTZ DEFAULT NOW() ALTER TABLE integers DROP COLUMN k ALTER TABLE integers DROP COLUMN IF EXISTS k ALTER TABLE integers DROP COLUMN k CASCADE ALTER TABLE integers ALTER COLUMN i SET DATA TYPE VARCHAR ALTER TABLE integers ALTER COLUMN i SET DATA TYPE VARCHAR USING CONCAT(i, '_', j) ALTER TABLE integers ALTER COLUMN i SET DEFAULT 10 ALTER TABLE integers ALTER COLUMN i DROP DEFAULT ALTER TABLE ingredients ALTER COLUMN amount COMMENT 'tablespoons' ALTER TABLE mydataset.mytable DROP COLUMN A, DROP COLUMN IF EXISTS B ALTER TABLE mydataset.mytable ADD COLUMN A TEXT, ADD COLUMN IF NOT EXISTS B INT ALTER TABLE orders DROP PARTITION(dt = '2014-05-14', country = 'IN') ALTER TABLE orders DROP IF EXISTS PARTITION(dt = '2014-05-14', country = 'IN') ALTER TABLE orders DROP PARTITION(dt = '2014-05-14', country = 'IN'), PARTITION(dt = '2014-05-15', country = 'IN') ALTER TABLE mydataset.mytable DELETE WHERE x = 1 ALTER TABLE table1 RENAME COLUMN c1 TO c2 ALTER TABLE table1 RENAME COLUMN IF EXISTS c1 TO c2 ALTER TABLE table1 RENAME TO table2 ALTER VIEW view1 AS SELECT a, b, c FROM table1 ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2 ALTER VIEW view1 AS SELECT a, b, c FROM table1 UNION ALL SELECT a, b, c FROM table2 LIMIT 100 SELECT div.a FROM test_table AS div WITH view AS (SELECT 1 AS x) SELECT * FROM view ARRAY>> STRUCT SELECT CAST(NULL AS ARRAY) IS NULL AS array_is_null ALTER TABLE "schema"."tablename" ADD CONSTRAINT "CHK_Name" CHECK (NOT "IdDwh" IS NULL AND "IdDwh" <> (0)) ALTER TABLE persons ADD CONSTRAINT persons_pk PRIMARY KEY (first_name, last_name) ALTER TABLE pets ADD CONSTRAINT pets_persons_fk FOREIGN KEY (owner_first_name, owner_last_name) REFERENCES persons ALTER TABLE pets ADD CONSTRAINT pets_name_not_cute_chk CHECK (LENGTH(name) < 20) ALTER TABLE people10m ADD CONSTRAINT dateWithinRange CHECK (birthDate > '1900-01-01') ALTER TABLE people10m ADD CONSTRAINT validIds CHECK (id > 1 AND id < 99999999) ENFORCED ALTER TABLE ct ADD CONSTRAINT ct_id_fk FOREIGN KEY (id) REFERENCES et (fid) DEFERRABLE INITIALLY DEFERRED ALTER TABLE baa ADD CONSTRAINT boo PRIMARY KEY (x, y) NOT ENFORCED DEFERRABLE INITIALLY DEFERRED NORELY ALTER TABLE baa ADD CONSTRAINT boo PRIMARY KEY (x, y) NOT ENFORCED DEFERRABLE INITIALLY DEFERRED NORELY ALTER TABLE baa ADD CONSTRAINT boo FOREIGN KEY (x, y) REFERENCES persons ON UPDATE NO ACTION ON DELETE NO ACTION MATCH FULL ALTER TABLE a ADD PRIMARY KEY (x, y) NOT ENFORCED ALTER TABLE a ADD FOREIGN KEY (x, y) REFERENCES bla ALTER TABLE s_ut ADD CONSTRAINT s_ut_uq UNIQUE hajo SELECT partition FROM a SELECT end FROM a SELECT id FROM b.a AS a QUALIFY ROW_NUMBER() OVER (PARTITION BY br ORDER BY sadf DESC) = 1 SELECT * FROM x WHERE a GROUP BY a HAVING b SORT BY s ORDER BY c LIMIT d SELECT LEFT.FOO FROM BLA AS LEFT SELECT RIGHT.FOO FROM BLA AS RIGHT SELECT LEFT FROM LEFT LEFT JOIN RIGHT RIGHT JOIN LEFT SELECT * FROM x WHERE name ILIKE ANY XXX('a', 'b') SELECT * FROM x WHERE name LIKE ANY XXX('a', 'b') a OVERLAPS b PRAGMA quick_check PRAGMA QUICK_CHECK(0) PRAGMA QUICK_CHECK('sqlite_master') PRAGMA schema.quick_check PRAGMA schema.QUICK_CHECK(0) PRAGMA schema.QUICK_CHECK('sqlite_master') PRAGMA synchronous = 2 PRAGMA synchronous = FULL PRAGMA memory_limit = '1GB' PRAGMA schema.synchronous = 2 PRAGMA schema.synchronous = FULL PRAGMA schema.memory_limit = '1GB' JSON_OBJECT() JSON_OBJECT(*) JSON_OBJECT('key1': 1, 'key2': TRUE) JSON_OBJECT('id': '5', 'fld1': 'bla', 'fld2': 'bar') JSON_OBJECT('x': NULL, 'y': 1 NULL ON NULL) JSON_OBJECT('x': NULL, 'y': 1 WITH UNIQUE KEYS) JSON_OBJECT('x': NULL, 'y': 1 ABSENT ON NULL WITH UNIQUE KEYS) JSON_OBJECT('x': 1 RETURNING VARCHAR(100)) JSON_OBJECT('x': 1 RETURNING VARBINARY FORMAT JSON ENCODING UTF8) PRIOR AS x SELECT if.x SELECT PERCENTILE_CONT(x, 0.5) OVER () WITH my_cte AS (SELECT 'a' AS desc) SELECT desc AS description FROM my_cte WITH my_cte AS (SELECT 'a' AS asc) SELECT asc AS description FROM my_cte SELECT * FROM case SELECT * FROM schema.case SELECT * FROM current_date SELECT * FROM schema.current_date SELECT /*+ SOME_HINT(foo) */ 1 SELECT /*+ REBALANCE */ * FROM foo SELECT * FROM (tbl1 CROSS JOIN (SELECT * FROM tbl2) AS t1) /* comment1 */ INSERT INTO x /* comment2 */ VALUES (1, 2, 3) /* comment1 */ UPDATE tbl /* comment2 */ SET x = 2 WHERE x < 2 /* comment1 */ DELETE FROM x /* comment2 */ WHERE y > 1 /* comment */ CREATE TABLE foo AS SELECT 1 SELECT next, transform, if SELECT "any", "case", "if", "next" SELECT x FROM y ORDER BY x ASC KILL '123' KILL CONNECTION 123 KILL QUERY '123' CHR(97) SELECT * FROM UNNEST(x) WITH ORDINALITY UNION ALL SELECT * FROM UNNEST(y) WITH ORDINALITY SELECT x FROM t1 UNION ALL SELECT x FROM t2 LIMIT 1 SELECT x FROM t1 UNION ALL SELECT x FROM t2 UNION ALL SELECT x FROM t3 LIMIT 1 WITH use(use) AS (SELECT 1) SELECT use FROM use SELECT recursive FROM t SELECT (ROW_NUMBER() OVER (PARTITION BY user ORDER BY date ASC) - ROW_NUMBER() OVER (PARTITION BY user, segment ORDER BY date ASC)) AS group_id FROM example_table CAST(foo AS BPCHAR) values SELECT values SELECT values AS values FROM t WHERE values + 1 > 3 SELECT truncate SELECT only TRUNC(a, b) SELECT enum SELECT unlogged SELECT name SELECT copy SELECT rollup SELECT unnest SELECT cube, cube.x FROM cube SELECT * FROM a STRAIGHT_JOIN b SELECT COUNT(DISTINCT "foo bar") FROM (SELECT 1 AS "foo bar") AS t SELECT vector WITH all AS (SELECT 1 AS count) SELECT all.count FROM all SELECT rename GRANT SELECT ON TABLE tbl TO user GRANT SELECT, INSERT ON FUNCTION tbl TO user GRANT SELECT ON orders TO ROLE PUBLIC GRANT SELECT ON nation TO alice WITH GRANT OPTION GRANT DELETE ON SCHEMA finance TO bob REVOKE SELECT ON TABLE tbl FROM user REVOKE SELECT, INSERT ON FUNCTION tbl FROM user REVOKE SELECT ON orders FROM ROLE PUBLIC REVOKE GRANT OPTION FOR SELECT ON nation FROM alice REVOKE DELETE ON SCHEMA finance FROM bob CASCADE REVOKE INSERT ON TABLE orders FROM user RESTRICT SELECT attach SELECT detach SELECT 1 OFFSET 1 SELECT 1 LIMIT 1 CAST(x AS INT128) CAST(x AS UINT128) CAST(x AS UINT256) SELECT export SELECT ARG_MAX(DISTINCT selected_col, filtered_col) FROM table SELECT ARG_MIN(DISTINCT selected_col, filtered_col) FROM table a.b.c.D() a.b.c.d.e.f.G() SELECT CUME_DIST() OVER (ORDER BY 1) FROM (SELECT 1) SELECT DENSE_RANK() OVER (ORDER BY 1) FROM (SELECT 1) SELECT NTILE(1) OVER (ORDER BY 1) FROM (SELECT 1) SELECT RANK() OVER (ORDER BY 1) FROM (SELECT 1) SELECT PERCENT_RANK() OVER (ORDER BY 1) FROM (SELECT 1) SELECT ACOS(x) SELECT ACOSH(x) SELECT ASIN(x) SELECT ASINH(x) SELECT ATAN(x) SELECT ATANH(x) SELECT ATAN2(x, y) SELECT COT(x) SELECT COTH(x) SELECT CSC(x) SELECT CSCH(x) SELECT RADIANS(x) SELECT SEC(x) SELECT SECH(x) SELECT UTC_DATE SELECT UTC_TIME SELECT UTC_TIMESTAMP SELECT SIN(x) SELECT SINH(x) SELECT TANH(x) SELECT COSINE_DISTANCE(v1, v2) SELECT EUCLIDEAN_DISTANCE(v1, v2) FOO(values.c) case.* SELECT unknown SELECT test.Unknown FROM test SELECT lock SELECT a FROM test GROUP BY GROUPING SETS ((x + y, z)) SELECT (LEAD(foo1, 1, 0)) OVER (PARTITION BY foo2 ORDER BY foo3) FROM t SELECT LAST_VALUE(CASE WHEN interval <> 'foo' THEN interval END) IGNORE NULLS FROM t WITH UNNEST AS (SELECT 1 AS UNNEST) SELECT UNNEST FROM UNNEST SELECT * FROM tbl GROUP BY GROUPING SETS ((a + 1, b * 1), c, CUBE (a, b), ROLLUP (c, d), (a + y, b * 1), ()) SELECT * FROM tbl GROUP BY GROUPING SETS (GROUPING SETS (course), GROUPING SETS (type), CUBE (a), ROLLUP (b)) SELECT analyze FROM (SELECT 1 AS analyze) SELECT 'Ac' ILIKE 'a%c' ESCAPE NULL SELECT CURRENT_DATABASE() SELECT CURRENT_SCHEMAS(arg_bool) SELECT UNIFORM(1, 10, 5) SELECT UNIFORM(1, 10) SELECT CURRENT_TIMEZONE() SELECT NUMRANGE(1.1, 2.2) -|- NUMRANGE(2.2, 3.3) CREATE TABLE t (a VARCHAR, check INT) ================================================ FILE: tests/fixtures/jsonpath/LICENSE ================================================ jsonpath-compliance-test-suite The BSD-2 license (the "License") set forth below applies to all parts of the jsonpath-compliance-test-suite project. You may not use this file except in compliance with the License. BSD-2 License Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: tests/fixtures/jsonpath/cts.json ================================================ { "description": "JSONPath Compliance Test Suite. This file is autogenerated, do not edit.", "tests": [ { "name": "basic, root", "selector": "$", "document": [ "first", "second" ], "result": [ [ "first", "second" ] ] }, { "name": "basic, no leading whitespace", "selector": " $", "invalid_selector": true }, { "name": "basic, no trailing whitespace", "selector": "$ ", "invalid_selector": true }, { "name": "basic, name shorthand", "selector": "$.a", "document": { "a": "A", "b": "B" }, "result": [ "A" ] }, { "name": "basic, name shorthand, extended unicode ☺", "selector": "$.☺", "document": { "☺": "A", "b": "B" }, "result": [ "A" ] }, { "name": "basic, name shorthand, underscore", "selector": "$._", "document": { "_": "A", "_foo": "B" }, "result": [ "A" ] }, { "name": "basic, name shorthand, symbol", "selector": "$.&", "invalid_selector": true }, { "name": "basic, name shorthand, number", "selector": "$.1", "invalid_selector": true }, { "name": "basic, name shorthand, absent data", "selector": "$.c", "document": { "a": "A", "b": "B" }, "result": [] }, { "name": "basic, name shorthand, array data", "selector": "$.a", "document": [ "first", "second" ], "result": [] }, { "name": "basic, wildcard shorthand, object data", "selector": "$.*", "document": { "a": "A", "b": "B" }, "result": [ "A", "B" ] }, { "name": "basic, wildcard shorthand, array data", "selector": "$.*", "document": [ "first", "second" ], "result": [ "first", "second" ] }, { "name": "basic, wildcard selector, array data", "selector": "$[*]", "document": [ "first", "second" ], "result": [ "first", "second" ] }, { "name": "basic, wildcard shorthand, then name shorthand", "selector": "$.*.a", "document": { "x": { "a": "Ax", "b": "Bx" }, "y": { "a": "Ay", "b": "By" } }, "result": [ "Ax", "Ay" ] }, { "name": "basic, multiple selectors", "selector": "$[0,2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 0, 2 ] }, { "name": "basic, multiple selectors, space instead of comma", "selector": "$[0 2]", "invalid_selector": true }, { "name": "basic, multiple selectors, name and index, array data", "selector": "$['a',1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1 ] }, { "name": "basic, multiple selectors, name and index, object data", "selector": "$['a',1]", "document": { "a": 1, "b": 2 }, "result": [ 1 ] }, { "name": "basic, multiple selectors, index and slice", "selector": "$[1,5:7]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1, 5, 6 ] }, { "name": "basic, multiple selectors, index and slice, overlapping", "selector": "$[1,0:3]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1, 0, 1, 2 ] }, { "name": "basic, multiple selectors, duplicate index", "selector": "$[1,1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1, 1 ] }, { "name": "basic, multiple selectors, wildcard and index", "selector": "$[*,1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1 ] }, { "name": "basic, multiple selectors, wildcard and name", "selector": "$[*,'a']", "document": { "a": "A", "b": "B" }, "result": [ "A", "B", "A" ] }, { "name": "basic, multiple selectors, wildcard and slice", "selector": "$[*,0:2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1 ] }, { "name": "basic, multiple selectors, multiple wildcards", "selector": "$[*,*]", "document": [ 0, 1, 2 ], "result": [ 0, 1, 2, 0, 1, 2 ] }, { "name": "basic, empty segment", "selector": "$[]", "invalid_selector": true }, { "name": "basic, descendant segment, index", "selector": "$..[1]", "document": { "o": [ 0, 1, [ 2, 3 ] ] }, "result": [ 1, 3 ] }, { "name": "basic, descendant segment, name shorthand", "selector": "$..a", "document": { "o": [ { "a": "b" }, { "a": "c" } ] }, "result": [ "b", "c" ] }, { "name": "basic, descendant segment, wildcard shorthand, array data", "selector": "$..*", "document": [ 0, 1 ], "result": [ 0, 1 ] }, { "name": "basic, descendant segment, wildcard selector, array data", "selector": "$..[*]", "document": [ 0, 1 ], "result": [ 0, 1 ] }, { "name": "basic, descendant segment, wildcard shorthand, object data", "selector": "$..*", "document": { "a": "b" }, "result": [ "b" ] }, { "name": "basic, descendant segment, wildcard shorthand, nested data", "selector": "$..*", "document": { "o": [ { "a": "b" } ] }, "result": [ [ { "a": "b" } ], { "a": "b" }, "b" ] }, { "name": "basic, descendant segment, multiple selectors", "selector": "$..['a','d']", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" } ], "result": [ "b", "e", "c", "f" ] }, { "name": "basic, bald descendant segment", "selector": "$..", "invalid_selector": true }, { "name": "filter, existence", "selector": "$[?@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "filter, existence, present with null", "selector": "$[?@.a]", "document": [ { "a": null, "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": null, "d": "e" } ] }, { "name": "filter, equals string, single quotes", "selector": "$[?@.a=='b']", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "filter, equals numeric string, single quotes", "selector": "$[?@.a=='1']", "document": [ { "a": "1", "d": "e" }, { "a": 1, "d": "f" } ], "result": [ { "a": "1", "d": "e" } ] }, { "name": "filter, equals string, double quotes", "selector": "$[?@.a==\"b\"]", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "filter, equals numeric string, double quotes", "selector": "$[?@.a==\"1\"]", "document": [ { "a": "1", "d": "e" }, { "a": 1, "d": "f" } ], "result": [ { "a": "1", "d": "e" } ] }, { "name": "filter, equals number", "selector": "$[?@.a==1]", "document": [ { "a": 1, "d": "e" }, { "a": "c", "d": "f" }, { "a": 2, "d": "f" }, { "a": "1", "d": "f" } ], "result": [ { "a": 1, "d": "e" } ] }, { "name": "filter, equals null", "selector": "$[?@.a==null]", "document": [ { "a": null, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": null, "d": "e" } ] }, { "name": "filter, equals null, absent from data", "selector": "$[?@.a==null]", "document": [ { "d": "e" }, { "a": "c", "d": "f" } ], "result": [] }, { "name": "filter, equals true", "selector": "$[?@.a==true]", "document": [ { "a": true, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": true, "d": "e" } ] }, { "name": "filter, equals false", "selector": "$[?@.a==false]", "document": [ { "a": false, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": false, "d": "e" } ] }, { "name": "filter, deep equality, arrays", "selector": "$[?@.a==@.b]", "document": [ { "a": false, "b": [ 1, 2 ] }, { "a": [ [ 1, [ 2 ] ] ], "b": [ [ 1, [ 2 ] ] ] }, { "a": [ [ 1, [ 2 ] ] ], "b": [ [ [ 2 ], 1 ] ] }, { "a": [ [ 1, [ 2 ] ] ], "b": [ [ 1, 2 ] ] } ], "result": [ { "a": [ [ 1, [ 2 ] ] ], "b": [ [ 1, [ 2 ] ] ] } ] }, { "name": "filter, deep equality, objects", "selector": "$[?@.a==@.b]", "document": [ { "a": false, "b": { "x": 1, "y": { "z": 1 } } }, { "a": { "x": 1, "y": { "z": 1 } }, "b": { "x": 1, "y": { "z": 1 } } }, { "a": { "x": 1, "y": { "z": 1 } }, "b": { "y": { "z": 1 }, "x": 1 } }, { "a": { "x": 1, "y": { "z": 1 } }, "b": { "x": 1 } }, { "a": { "x": 1, "y": { "z": 1 } }, "b": { "x": 1, "y": { "z": 2 } } } ], "result": [ { "a": { "x": 1, "y": { "z": 1 } }, "b": { "x": 1, "y": { "z": 1 } } }, { "a": { "x": 1, "y": { "z": 1 } }, "b": { "y": { "z": 1 }, "x": 1 } } ] }, { "name": "filter, not-equals string, single quotes", "selector": "$[?@.a!='b']", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "c", "d": "f" } ] }, { "name": "filter, not-equals numeric string, single quotes", "selector": "$[?@.a!='1']", "document": [ { "a": "1", "d": "e" }, { "a": 1, "d": "f" } ], "result": [ { "a": 1, "d": "f" } ] }, { "name": "filter, not-equals string, single quotes, different type", "selector": "$[?@.a!='b']", "document": [ { "a": "b", "d": "e" }, { "a": 1, "d": "f" } ], "result": [ { "a": 1, "d": "f" } ] }, { "name": "filter, not-equals string, double quotes", "selector": "$[?@.a!=\"b\"]", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "c", "d": "f" } ] }, { "name": "filter, not-equals numeric string, double quotes", "selector": "$[?@.a!=\"1\"]", "document": [ { "a": "1", "d": "e" }, { "a": 1, "d": "f" } ], "result": [ { "a": 1, "d": "f" } ] }, { "name": "filter, not-equals string, double quotes, different types", "selector": "$[?@.a!=\"b\"]", "document": [ { "a": "b", "d": "e" }, { "a": 1, "d": "f" } ], "result": [ { "a": 1, "d": "f" } ] }, { "name": "filter, not-equals number", "selector": "$[?@.a!=1]", "document": [ { "a": 1, "d": "e" }, { "a": 2, "d": "f" }, { "a": "1", "d": "f" } ], "result": [ { "a": 2, "d": "f" }, { "a": "1", "d": "f" } ] }, { "name": "filter, not-equals number, different types", "selector": "$[?@.a!=1]", "document": [ { "a": 1, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "c", "d": "f" } ] }, { "name": "filter, not-equals null", "selector": "$[?@.a!=null]", "document": [ { "a": null, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "c", "d": "f" } ] }, { "name": "filter, not-equals null, absent from data", "selector": "$[?@.a!=null]", "document": [ { "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "d": "e" }, { "a": "c", "d": "f" } ] }, { "name": "filter, not-equals true", "selector": "$[?@.a!=true]", "document": [ { "a": true, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "c", "d": "f" } ] }, { "name": "filter, not-equals false", "selector": "$[?@.a!=false]", "document": [ { "a": false, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "c", "d": "f" } ] }, { "name": "filter, less than string, single quotes", "selector": "$[?@.a<'c']", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "filter, less than string, double quotes", "selector": "$[?@.a<\"c\"]", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "filter, less than number", "selector": "$[?@.a<10]", "document": [ { "a": 1, "d": "e" }, { "a": 10, "d": "e" }, { "a": "c", "d": "f" }, { "a": 20, "d": "f" } ], "result": [ { "a": 1, "d": "e" } ] }, { "name": "filter, less than null", "selector": "$[?@.a'c']", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "d", "d": "f" } ] }, { "name": "filter, greater than string, double quotes", "selector": "$[?@.a>\"c\"]", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "d", "d": "f" } ] }, { "name": "filter, greater than number", "selector": "$[?@.a>10]", "document": [ { "a": 1, "d": "e" }, { "a": 10, "d": "e" }, { "a": "c", "d": "f" }, { "a": 20, "d": "f" } ], "result": [ { "a": 20, "d": "f" } ] }, { "name": "filter, greater than null", "selector": "$[?@.a>null]", "document": [ { "a": null, "d": "e" }, { "a": "c", "d": "f" } ], "result": [] }, { "name": "filter, greater than true", "selector": "$[?@.a>true]", "document": [ { "a": true, "d": "e" }, { "a": "c", "d": "f" } ], "result": [] }, { "name": "filter, greater than false", "selector": "$[?@.a>false]", "document": [ { "a": false, "d": "e" }, { "a": "c", "d": "f" } ], "result": [] }, { "name": "filter, greater than or equal to string, single quotes", "selector": "$[?@.a>='c']", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "c", "d": "f" }, { "a": "d", "d": "f" } ] }, { "name": "filter, greater than or equal to string, double quotes", "selector": "$[?@.a>=\"c\"]", "document": [ { "a": "b", "d": "e" }, { "a": "c", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "c", "d": "f" }, { "a": "d", "d": "f" } ] }, { "name": "filter, greater than or equal to number", "selector": "$[?@.a>=10]", "document": [ { "a": 1, "d": "e" }, { "a": 10, "d": "e" }, { "a": "c", "d": "f" }, { "a": 20, "d": "f" } ], "result": [ { "a": 10, "d": "e" }, { "a": 20, "d": "f" } ] }, { "name": "filter, greater than or equal to null", "selector": "$[?@.a>=null]", "document": [ { "a": null, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": null, "d": "e" } ] }, { "name": "filter, greater than or equal to true", "selector": "$[?@.a>=true]", "document": [ { "a": true, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": true, "d": "e" } ] }, { "name": "filter, greater than or equal to false", "selector": "$[?@.a>=false]", "document": [ { "a": false, "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": false, "d": "e" } ] }, { "name": "filter, exists and not-equals null, absent from data", "selector": "$[?@.a&&@.a!=null]", "document": [ { "d": "e" }, { "a": "c", "d": "f" } ], "result": [ { "a": "c", "d": "f" } ] }, { "name": "filter, and", "selector": "$[?@.a>0&&@.a<10]", "document": [ { "a": -10, "d": "e" }, { "a": 5, "d": "f" }, { "a": 20, "d": "f" } ], "result": [ { "a": 5, "d": "f" } ] }, { "name": "filter, or", "selector": "$[?@.a=='b'||@.a=='d']", "document": [ { "a": "a", "d": "e" }, { "a": "b", "d": "f" }, { "a": "c", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "b", "d": "f" }, { "a": "d", "d": "f" } ] }, { "name": "filter, not expression", "selector": "$[?!(@.a=='b')]", "document": [ { "a": "a", "d": "e" }, { "a": "b", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "a", "d": "e" }, { "a": "d", "d": "f" } ] }, { "name": "filter, not exists", "selector": "$[?!@.a]", "document": [ { "a": "a", "d": "e" }, { "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "d": "f" } ] }, { "name": "filter, not exists, data null", "selector": "$[?!@.a]", "document": [ { "a": null, "d": "e" }, { "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "d": "f" } ] }, { "name": "filter, non-singular query in comparison, slice", "selector": "$[?@[0:0]==0]", "invalid_selector": true }, { "name": "filter, non-singular query in comparison, all children", "selector": "$[?@[*]==0]", "invalid_selector": true }, { "name": "filter, non-singular query in comparison, descendants", "selector": "$[?@..a==0]", "invalid_selector": true }, { "name": "filter, non-singular query in comparison, combined", "selector": "$[?@.a[*].a==0]", "invalid_selector": true }, { "name": "filter, nested", "selector": "$[?@[?@>1]]", "document": [ [ 0 ], [ 0, 1 ], [ 0, 1, 2 ], [ 42 ] ], "result": [ [ 0, 1, 2 ], [ 42 ] ] }, { "name": "filter, relative non-singular query, index, equal", "selector": "$[?(@[0, 0]==42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, index, not equal", "selector": "$[?(@[0, 0]!=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, index, less-or-equal", "selector": "$[?(@[0, 0]<=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, name, equal", "selector": "$[?(@['a', 'a']==42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, name, not equal", "selector": "$[?(@['a', 'a']!=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, name, less-or-equal", "selector": "$[?(@['a', 'a']<=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, combined, equal", "selector": "$[?(@[0, '0']==42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, combined, not equal", "selector": "$[?(@[0, '0']!=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, combined, less-or-equal", "selector": "$[?(@[0, '0']<=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, wildcard, equal", "selector": "$[?(@.*==42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, wildcard, not equal", "selector": "$[?(@.*!=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, wildcard, less-or-equal", "selector": "$[?(@.*<=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, slice, equal", "selector": "$[?(@[0:0]==42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, slice, not equal", "selector": "$[?(@[0:0]!=42)]", "invalid_selector": true }, { "name": "filter, relative non-singular query, slice, less-or-equal", "selector": "$[?(@[0:0]<=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, index, equal", "selector": "$[?($[0, 0]==42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, index, not equal", "selector": "$[?($[0, 0]!=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, index, less-or-equal", "selector": "$[?($[0, 0]<=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, name, equal", "selector": "$[?($['a', 'a']==42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, name, not equal", "selector": "$[?($['a', 'a']!=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, name, less-or-equal", "selector": "$[?($['a', 'a']<=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, combined, equal", "selector": "$[?($[0, '0']==42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, combined, not equal", "selector": "$[?($[0, '0']!=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, combined, less-or-equal", "selector": "$[?($[0, '0']<=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, wildcard, equal", "selector": "$[?($.*==42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, wildcard, not equal", "selector": "$[?($.*!=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, wildcard, less-or-equal", "selector": "$[?($.*<=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, slice, equal", "selector": "$[?($[0:0]==42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, slice, not equal", "selector": "$[?($[0:0]!=42)]", "invalid_selector": true }, { "name": "filter, absolute non-singular query, slice, less-or-equal", "selector": "$[?($[0:0]<=42)]", "invalid_selector": true }, { "name": "filter, multiple selectors", "selector": "$[?@.a,?@.b]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ] }, { "name": "filter, multiple selectors, comparison", "selector": "$[?@.a=='b',?@.b=='x']", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "filter, multiple selectors, overlapping", "selector": "$[?@.a,?@.d]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" }, { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ] }, { "name": "filter, multiple selectors, filter and index", "selector": "$[?@.a,1]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ] }, { "name": "filter, multiple selectors, filter and wildcard", "selector": "$[?@.a,*]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" }, { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ] }, { "name": "filter, multiple selectors, filter and slice", "selector": "$[?@.a,1:]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" }, { "g": "h" } ], "result": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" }, { "g": "h" } ] }, { "name": "filter, multiple selectors, comparison filter, index and slice", "selector": "$[1, ?@.a=='b', 1:]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "b": "c", "d": "f" }, { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ] }, { "name": "filter, equals number, zero and negative zero", "selector": "$[?@.a==-0]", "document": [ { "a": 0, "d": "e" }, { "a": 0.1, "d": "f" }, { "a": "0", "d": "g" } ], "result": [ { "a": 0, "d": "e" } ] }, { "name": "filter, equals number, with and without decimal fraction", "selector": "$[?@.a==1.0]", "document": [ { "a": 1, "d": "e" }, { "a": 2, "d": "f" }, { "a": "1", "d": "g" } ], "result": [ { "a": 1, "d": "e" } ] }, { "name": "filter, equals number, exponent", "selector": "$[?@.a==1e2]", "document": [ { "a": 100, "d": "e" }, { "a": 100.1, "d": "f" }, { "a": "100", "d": "g" } ], "result": [ { "a": 100, "d": "e" } ] }, { "name": "filter, equals number, positive exponent", "selector": "$[?@.a==1e+2]", "document": [ { "a": 100, "d": "e" }, { "a": 100.1, "d": "f" }, { "a": "100", "d": "g" } ], "result": [ { "a": 100, "d": "e" } ] }, { "name": "filter, equals number, negative exponent", "selector": "$[?@.a==1e-2]", "document": [ { "a": 0.01, "d": "e" }, { "a": 0.02, "d": "f" }, { "a": "0.01", "d": "g" } ], "result": [ { "a": 0.01, "d": "e" } ] }, { "name": "filter, equals number, decimal fraction", "selector": "$[?@.a==1.1]", "document": [ { "a": 1.1, "d": "e" }, { "a": 1, "d": "f" }, { "a": "1.1", "d": "g" } ], "result": [ { "a": 1.1, "d": "e" } ] }, { "name": "filter, equals number, decimal fraction, no fractional digit", "selector": "$[?@.a==1.]", "invalid_selector": true }, { "name": "filter, equals number, decimal fraction, exponent", "selector": "$[?@.a==1.1e2]", "document": [ { "a": 110, "d": "e" }, { "a": 110.1, "d": "f" }, { "a": "110", "d": "g" } ], "result": [ { "a": 110, "d": "e" } ] }, { "name": "filter, equals number, decimal fraction, positive exponent", "selector": "$[?@.a==1.1e+2]", "document": [ { "a": 110, "d": "e" }, { "a": 110.1, "d": "f" }, { "a": "110", "d": "g" } ], "result": [ { "a": 110, "d": "e" } ] }, { "name": "filter, equals number, decimal fraction, negative exponent", "selector": "$[?@.a==1.1e-2]", "document": [ { "a": 0.011, "d": "e" }, { "a": 0.012, "d": "f" }, { "a": "0.011", "d": "g" } ], "result": [ { "a": 0.011, "d": "e" } ] }, { "name": "filter, equals, special nothing", "selector": "$.values[?length(@.a) == value($..c)]", "document": { "c": "cd", "values": [ { "a": "ab" }, { "c": "d" }, { "a": null } ] }, "result": [ { "c": "d" }, { "a": null } ] }, { "name": "index selector, first element", "selector": "$[0]", "document": [ "first", "second" ], "result": [ "first" ] }, { "name": "index selector, second element", "selector": "$[1]", "document": [ "first", "second" ], "result": [ "second" ] }, { "name": "index selector, out of bound", "selector": "$[2]", "document": [ "first", "second" ], "result": [] }, { "name": "index selector, overflowing index", "selector": "$[231584178474632390847141970017375815706539969331281128078915168015826259279872]", "invalid_selector": true }, { "name": "index selector, not actually an index, overflowing index leads into general text", "selector": "$[231584178474632390847141970017375815706539969331281128078915168SomeRandomText]", "invalid_selector": true }, { "name": "index selector, negative", "selector": "$[-1]", "document": [ "first", "second" ], "result": [ "second" ] }, { "name": "index selector, more negative", "selector": "$[-2]", "document": [ "first", "second" ], "result": [ "first" ] }, { "name": "index selector, negative out of bound", "selector": "$[-3]", "document": [ "first", "second" ], "result": [] }, { "name": "index selector, on object", "selector": "$[0]", "document": { "foo": 1 }, "result": [] }, { "name": "index selector, leading 0", "selector": "$[01]", "invalid_selector": true }, { "name": "index selector, leading -0", "selector": "$[-01]", "invalid_selector": true }, { "name": "name selector, double quotes", "selector": "$[\"a\"]", "document": { "a": "A", "b": "B" }, "result": [ "A" ] }, { "name": "name selector, double quotes, absent data", "selector": "$[\"c\"]", "document": { "a": "A", "b": "B" }, "result": [] }, { "name": "name selector, double quotes, array data", "selector": "$[\"a\"]", "document": [ "first", "second" ], "result": [] }, { "name": "name selector, double quotes, embedded U+0000", "selector": "$[\"\u0000\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0001", "selector": "$[\"\u0001\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0002", "selector": "$[\"\u0002\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0003", "selector": "$[\"\u0003\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0004", "selector": "$[\"\u0004\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0005", "selector": "$[\"\u0005\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0006", "selector": "$[\"\u0006\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0007", "selector": "$[\"\u0007\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0008", "selector": "$[\"\b\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0009", "selector": "$[\"\t\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+000A", "selector": "$[\"\n\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+000B", "selector": "$[\"\u000b\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+000C", "selector": "$[\"\f\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+000D", "selector": "$[\"\r\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+000E", "selector": "$[\"\u000e\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+000F", "selector": "$[\"\u000f\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0010", "selector": "$[\"\u0010\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0011", "selector": "$[\"\u0011\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0012", "selector": "$[\"\u0012\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0013", "selector": "$[\"\u0013\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0014", "selector": "$[\"\u0014\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0015", "selector": "$[\"\u0015\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0016", "selector": "$[\"\u0016\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0017", "selector": "$[\"\u0017\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0018", "selector": "$[\"\u0018\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0019", "selector": "$[\"\u0019\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+001A", "selector": "$[\"\u001a\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+001B", "selector": "$[\"\u001b\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+001C", "selector": "$[\"\u001c\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+001D", "selector": "$[\"\u001d\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+001E", "selector": "$[\"\u001e\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+001F", "selector": "$[\"\u001f\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded U+0020", "selector": "$[\" \"]", "document": { " ": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped double quote", "selector": "$[\"\\\"\"]", "document": { "\"": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped reverse solidus", "selector": "$[\"\\\\\"]", "document": { "\\": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped solidus", "selector": "$[\"\\/\"]", "document": { "/": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped backspace", "selector": "$[\"\\b\"]", "document": { "\b": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped form feed", "selector": "$[\"\\f\"]", "document": { "\f": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped line feed", "selector": "$[\"\\n\"]", "document": { "\n": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped carriage return", "selector": "$[\"\\r\"]", "document": { "\r": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped tab", "selector": "$[\"\\t\"]", "document": { "\t": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped ☺, upper case hex", "selector": "$[\"\\u263A\"]", "document": { "☺": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, escaped ☺, lower case hex", "selector": "$[\"\\u263a\"]", "document": { "☺": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, surrogate pair 𝄞", "selector": "$[\"\\uD834\\uDD1E\"]", "document": { "𝄞": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, surrogate pair 😀", "selector": "$[\"\\uD83D\\uDE00\"]", "document": { "😀": "A" }, "result": [ "A" ] }, { "name": "name selector, double quotes, invalid escaped single quote", "selector": "$[\"\\'\"]", "invalid_selector": true }, { "name": "name selector, double quotes, embedded double quote", "selector": "$[\"\"\"]", "invalid_selector": true }, { "name": "name selector, double quotes, incomplete escape", "selector": "$[\"\\\"]", "invalid_selector": true }, { "name": "name selector, single quotes", "selector": "$['a']", "document": { "a": "A", "b": "B" }, "result": [ "A" ] }, { "name": "name selector, single quotes, absent data", "selector": "$['c']", "document": { "a": "A", "b": "B" }, "result": [] }, { "name": "name selector, single quotes, array data", "selector": "$['a']", "document": [ "first", "second" ], "result": [] }, { "name": "name selector, single quotes, embedded U+0000", "selector": "$['\u0000']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0001", "selector": "$['\u0001']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0002", "selector": "$['\u0002']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0003", "selector": "$['\u0003']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0004", "selector": "$['\u0004']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0005", "selector": "$['\u0005']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0006", "selector": "$['\u0006']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0007", "selector": "$['\u0007']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0008", "selector": "$['\b']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0009", "selector": "$['\t']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+000A", "selector": "$['\n']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+000B", "selector": "$['\u000b']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+000C", "selector": "$['\f']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+000D", "selector": "$['\r']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+000E", "selector": "$['\u000e']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+000F", "selector": "$['\u000f']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0010", "selector": "$['\u0010']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0011", "selector": "$['\u0011']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0012", "selector": "$['\u0012']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0013", "selector": "$['\u0013']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0014", "selector": "$['\u0014']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0015", "selector": "$['\u0015']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0016", "selector": "$['\u0016']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0017", "selector": "$['\u0017']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0018", "selector": "$['\u0018']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0019", "selector": "$['\u0019']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+001A", "selector": "$['\u001a']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+001B", "selector": "$['\u001b']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+001C", "selector": "$['\u001c']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+001D", "selector": "$['\u001d']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+001E", "selector": "$['\u001e']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+001F", "selector": "$['\u001f']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded U+0020", "selector": "$[' ']", "document": { " ": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped single quote", "selector": "$['\\'']", "document": { "'": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped reverse solidus", "selector": "$['\\\\']", "document": { "\\": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped solidus", "selector": "$['\\/']", "document": { "/": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped backspace", "selector": "$['\\b']", "document": { "\b": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped form feed", "selector": "$['\\f']", "document": { "\f": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped line feed", "selector": "$['\\n']", "document": { "\n": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped carriage return", "selector": "$['\\r']", "document": { "\r": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped tab", "selector": "$['\\t']", "document": { "\t": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped ☺, upper case hex", "selector": "$['\\u263A']", "document": { "☺": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, escaped ☺, lower case hex", "selector": "$['\\u263a']", "document": { "☺": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, surrogate pair 𝄞", "selector": "$['\\uD834\\uDD1E']", "document": { "𝄞": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, surrogate pair 😀", "selector": "$['\\uD83D\\uDE00']", "document": { "😀": "A" }, "result": [ "A" ] }, { "name": "name selector, single quotes, invalid escaped double quote", "selector": "$['\\\"']", "invalid_selector": true }, { "name": "name selector, single quotes, embedded single quote", "selector": "$[''']", "invalid_selector": true }, { "name": "name selector, single quotes, incomplete escape", "selector": "$['\\']", "invalid_selector": true }, { "name": "name selector, double quotes, empty", "selector": "$[\"\"]", "document": { "a": "A", "b": "B", "": "C" }, "result": [ "C" ] }, { "name": "name selector, single quotes, empty", "selector": "$['']", "document": { "a": "A", "b": "B", "": "C" }, "result": [ "C" ] }, { "name": "slice selector, slice selector", "selector": "$[1:3]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1, 2 ] }, { "name": "slice selector, slice selector with step", "selector": "$[1:6:2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1, 3, 5 ] }, { "name": "slice selector, slice selector with everything omitted, short form", "selector": "$[:]", "document": [ 0, 1, 2, 3 ], "result": [ 0, 1, 2, 3 ] }, { "name": "slice selector, slice selector with everything omitted, long form", "selector": "$[::]", "document": [ 0, 1, 2, 3 ], "result": [ 0, 1, 2, 3 ] }, { "name": "slice selector, slice selector with start omitted", "selector": "$[:2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 0, 1 ] }, { "name": "slice selector, slice selector with start and end omitted", "selector": "$[::2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 0, 2, 4, 6, 8 ] }, { "name": "slice selector, negative step with default start and end", "selector": "$[::-1]", "document": [ 0, 1, 2, 3 ], "result": [ 3, 2, 1, 0 ] }, { "name": "slice selector, negative step with default start", "selector": "$[:0:-1]", "document": [ 0, 1, 2, 3 ], "result": [ 3, 2, 1 ] }, { "name": "slice selector, negative step with default end", "selector": "$[2::-1]", "document": [ 0, 1, 2, 3 ], "result": [ 2, 1, 0 ] }, { "name": "slice selector, larger negative step", "selector": "$[::-2]", "document": [ 0, 1, 2, 3 ], "result": [ 3, 1 ] }, { "name": "slice selector, negative range with default step", "selector": "$[-1:-3]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [] }, { "name": "slice selector, negative range with negative step", "selector": "$[-1:-3:-1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 9, 8 ] }, { "name": "slice selector, negative range with larger negative step", "selector": "$[-1:-6:-2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 9, 7, 5 ] }, { "name": "slice selector, larger negative range with larger negative step", "selector": "$[-1:-7:-2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 9, 7, 5 ] }, { "name": "slice selector, negative from, positive to", "selector": "$[-5:7]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 5, 6 ] }, { "name": "slice selector, negative from", "selector": "$[-2:]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 8, 9 ] }, { "name": "slice selector, positive from, negative to", "selector": "$[1:-1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1, 2, 3, 4, 5, 6, 7, 8 ] }, { "name": "slice selector, negative from, positive to, negative step", "selector": "$[-1:1:-1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 9, 8, 7, 6, 5, 4, 3, 2 ] }, { "name": "slice selector, positive from, negative to, negative step", "selector": "$[7:-5:-1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 7, 6 ] }, { "name": "slice selector, too many colons", "selector": "$[1:2:3:4]", "invalid_selector": true }, { "name": "slice selector, non-integer array index", "selector": "$[1:2:a]", "invalid_selector": true }, { "name": "slice selector, zero step", "selector": "$[1:2:0]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [] }, { "name": "slice selector, empty range", "selector": "$[2:2]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [] }, { "name": "slice selector, slice selector with everything omitted with empty array", "selector": "$[:]", "document": [], "result": [] }, { "name": "slice selector, negative step with empty array", "selector": "$[::-1]", "document": [], "result": [] }, { "name": "slice selector, maximal range with positive step", "selector": "$[0:10]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] }, { "name": "slice selector, maximal range with negative step", "selector": "$[9:0:-1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 9, 8, 7, 6, 5, 4, 3, 2, 1 ] }, { "name": "slice selector, excessively large to value", "selector": "$[2:113667776004]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 2, 3, 4, 5, 6, 7, 8, 9 ] }, { "name": "slice selector, excessively small from value", "selector": "$[-113667776004:1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 0 ] }, { "name": "slice selector, excessively large from value with negative step", "selector": "$[113667776004:0:-1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 9, 8, 7, 6, 5, 4, 3, 2, 1 ] }, { "name": "slice selector, excessively small to value with negative step", "selector": "$[3:-113667776004:-1]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 3, 2, 1, 0 ] }, { "name": "slice selector, excessively large step", "selector": "$[1:10:113667776004]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 1 ] }, { "name": "slice selector, excessively small step", "selector": "$[-1:-10:-113667776004]", "document": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ], "result": [ 9 ] }, { "name": "slice selector, overflowing to value", "selector": "$[2:231584178474632390847141970017375815706539969331281128078915168015826259279872]", "invalid_selector": true }, { "name": "slice selector, underflowing from value", "selector": "$[-231584178474632390847141970017375815706539969331281128078915168015826259279872:1]", "invalid_selector": true }, { "name": "slice selector, overflowing from value with negative step", "selector": "$[231584178474632390847141970017375815706539969331281128078915168015826259279872:0:-1]", "invalid_selector": true }, { "name": "slice selector, underflowing to value with negative step", "selector": "$[3:-231584178474632390847141970017375815706539969331281128078915168015826259279872:-1]", "invalid_selector": true }, { "name": "slice selector, overflowing step", "selector": "$[1:10:231584178474632390847141970017375815706539969331281128078915168015826259279872]", "invalid_selector": true }, { "name": "slice selector, underflowing step", "selector": "$[-1:-10:-231584178474632390847141970017375815706539969331281128078915168015826259279872]", "invalid_selector": true }, { "name": "functions, count, count function", "selector": "$[?count(@..*)>2]", "document": [ { "a": [ 1, 2, 3 ] }, { "a": [ 1 ], "d": "f" }, { "a": 1, "d": "f" } ], "result": [ { "a": [ 1, 2, 3 ] }, { "a": [ 1 ], "d": "f" } ] }, { "name": "functions, count, single-node arg", "selector": "$[?count(@.a)>1]", "document": [ { "a": [ 1, 2, 3 ] }, { "a": [ 1 ], "d": "f" }, { "a": 1, "d": "f" } ], "result": [] }, { "name": "functions, count, multiple-selector arg", "selector": "$[?count(@['a','d'])>1]", "document": [ { "a": [ 1, 2, 3 ] }, { "a": [ 1 ], "d": "f" }, { "a": 1, "d": "f" } ], "result": [ { "a": [ 1 ], "d": "f" }, { "a": 1, "d": "f" } ] }, { "name": "functions, count, non-query arg, number", "selector": "$[?count(1)>2]", "invalid_selector": true }, { "name": "functions, count, non-query arg, string", "selector": "$[?count('string')>2]", "invalid_selector": true }, { "name": "functions, count, non-query arg, true", "selector": "$[?count(true)>2]", "invalid_selector": true }, { "name": "functions, count, non-query arg, false", "selector": "$[?count(false)>2]", "invalid_selector": true }, { "name": "functions, count, non-query arg, null", "selector": "$[?count(null)>2]", "invalid_selector": true }, { "name": "functions, count, result must be compared", "selector": "$[?count(@..*)]", "invalid_selector": true }, { "name": "functions, count, no params", "selector": "$[?count()==1]", "invalid_selector": true }, { "name": "functions, count, too many params", "selector": "$[?count(@.a,@.b)==1]", "invalid_selector": true }, { "name": "functions, length, string data", "selector": "$[?length(@.a)>=2]", "document": [ { "a": "ab" }, { "a": "d" } ], "result": [ { "a": "ab" } ] }, { "name": "functions, length, string data, unicode", "selector": "$[?length(@)==2]", "document": [ "☺", "☺☺", "☺☺☺", "ж", "жж", "жжж", "磨", "阿美", "形声字" ], "result": [ "☺☺", "жж", "阿美" ] }, { "name": "functions, length, array data", "selector": "$[?length(@.a)>=2]", "document": [ { "a": [ 1, 2, 3 ] }, { "a": [ 1 ] } ], "result": [ { "a": [ 1, 2, 3 ] } ] }, { "name": "functions, length, missing data", "selector": "$[?length(@.a)>=2]", "document": [ { "d": "f" } ], "result": [] }, { "name": "functions, length, number arg", "selector": "$[?length(1)>=2]", "document": [ { "d": "f" } ], "result": [] }, { "name": "functions, length, true arg", "selector": "$[?length(true)>=2]", "document": [ { "d": "f" } ], "result": [] }, { "name": "functions, length, false arg", "selector": "$[?length(false)>=2]", "document": [ { "d": "f" } ], "result": [] }, { "name": "functions, length, null arg", "selector": "$[?length(null)>=2]", "document": [ { "d": "f" } ], "result": [] }, { "name": "functions, length, result must be compared", "selector": "$[?length(@.a)]", "invalid_selector": true }, { "name": "functions, length, no params", "selector": "$[?length()==1]", "invalid_selector": true }, { "name": "functions, length, too many params", "selector": "$[?length(@.a,@.b)==1]", "invalid_selector": true }, { "name": "functions, length, non-singular query arg", "selector": "$[?length(@.*)<3]", "invalid_selector": true }, { "name": "functions, length, arg is a function expression", "selector": "$.values[?length(@.a)==length(value($..c))]", "document": { "c": "cd", "values": [ { "a": "ab" }, { "a": "d" } ] }, "result": [ { "a": "ab" } ] }, { "name": "functions, length, arg is special nothing", "selector": "$[?length(value(@.a))>0]", "document": [ { "a": "ab" }, { "c": "d" }, { "a": null } ], "result": [ { "a": "ab" } ] }, { "name": "functions, match, found match", "selector": "$[?match(@.a, 'a.*')]", "document": [ { "a": "ab" } ], "result": [ { "a": "ab" } ] }, { "name": "functions, match, double quotes", "selector": "$[?match(@.a, \"a.*\")]", "document": [ { "a": "ab" } ], "result": [ { "a": "ab" } ] }, { "name": "functions, match, regex from the document", "selector": "$.values[?match(@, $.regex)]", "document": { "regex": "b.?b", "values": [ "abc", "bcd", "bab", "bba", "bbab", "b", true, [], {} ] }, "result": [ "bab" ] }, { "name": "functions, match, don't select match", "selector": "$[?!match(@.a, 'a.*')]", "document": [ { "a": "ab" } ], "result": [] }, { "name": "functions, match, not a match", "selector": "$[?match(@.a, 'a.*')]", "document": [ { "a": "bc" } ], "result": [] }, { "name": "functions, match, select non-match", "selector": "$[?!match(@.a, 'a.*')]", "document": [ { "a": "bc" } ], "result": [ { "a": "bc" } ] }, { "name": "functions, match, non-string first arg", "selector": "$[?match(1, 'a.*')]", "document": [ { "a": "bc" } ], "result": [] }, { "name": "functions, match, non-string second arg", "selector": "$[?match(@.a, 1)]", "document": [ { "a": "bc" } ], "result": [] }, { "name": "functions, match, filter, match function, unicode char class, uppercase", "selector": "$[?match(@, '\\\\p{Lu}')]", "document": [ "ж", "Ж", "1", "жЖ", true, [], {} ], "result": [ "Ж" ] }, { "name": "functions, match, filter, match function, unicode char class negated, uppercase", "selector": "$[?match(@, '\\\\P{Lu}')]", "document": [ "ж", "Ж", "1", true, [], {} ], "result": [ "ж", "1" ] }, { "name": "functions, match, filter, match function, unicode, surrogate pair", "selector": "$[?match(@, 'a.b')]", "document": [ "a𐄁b", "ab", "1", true, [], {} ], "result": [ "a𐄁b" ] }, { "name": "functions, match, result cannot be compared", "selector": "$[?match(@.a, 'a.*')==true]", "invalid_selector": true }, { "name": "functions, match, too few params", "selector": "$[?match(@.a)==1]", "invalid_selector": true }, { "name": "functions, match, too many params", "selector": "$[?match(@.a,@.b,@.c)==1]", "invalid_selector": true }, { "name": "functions, match, arg is a function expression", "selector": "$.values[?match(@.a, value($..['regex']))]", "document": { "regex": "a.*", "values": [ { "a": "ab" }, { "a": "ba" } ] }, "result": [ { "a": "ab" } ] }, { "name": "functions, search, at the end", "selector": "$[?search(@.a, 'a.*')]", "document": [ { "a": "the end is ab" } ], "result": [ { "a": "the end is ab" } ] }, { "name": "functions, search, double quotes", "selector": "$[?search(@.a, \"a.*\")]", "document": [ { "a": "the end is ab" } ], "result": [ { "a": "the end is ab" } ] }, { "name": "functions, search, at the start", "selector": "$[?search(@.a, 'a.*')]", "document": [ { "a": "ab is at the start" } ], "result": [ { "a": "ab is at the start" } ] }, { "name": "functions, search, in the middle", "selector": "$[?search(@.a, 'a.*')]", "document": [ { "a": "contains two matches" } ], "result": [ { "a": "contains two matches" } ] }, { "name": "functions, search, regex from the document", "selector": "$.values[?search(@, $.regex)]", "document": { "regex": "b.?b", "values": [ "abc", "bcd", "bab", "bba", "bbab", "b", true, [], {} ] }, "result": [ "bab", "bba", "bbab" ] }, { "name": "functions, search, don't select match", "selector": "$[?!search(@.a, 'a.*')]", "document": [ { "a": "contains two matches" } ], "result": [] }, { "name": "functions, search, not a match", "selector": "$[?search(@.a, 'a.*')]", "document": [ { "a": "bc" } ], "result": [] }, { "name": "functions, search, select non-match", "selector": "$[?!search(@.a, 'a.*')]", "document": [ { "a": "bc" } ], "result": [ { "a": "bc" } ] }, { "name": "functions, search, non-string first arg", "selector": "$[?search(1, 'a.*')]", "document": [ { "a": "bc" } ], "result": [] }, { "name": "functions, search, non-string second arg", "selector": "$[?search(@.a, 1)]", "document": [ { "a": "bc" } ], "result": [] }, { "name": "functions, search, filter, search function, unicode char class, uppercase", "selector": "$[?search(@, '\\\\p{Lu}')]", "document": [ "ж", "Ж", "1", "жЖ", true, [], {} ], "result": [ "Ж", "жЖ" ] }, { "name": "functions, search, filter, search function, unicode char class negated, uppercase", "selector": "$[?search(@, '\\\\P{Lu}')]", "document": [ "ж", "Ж", "1", true, [], {} ], "result": [ "ж", "1" ] }, { "name": "functions, search, filter, search function, unicode, surrogate pair", "selector": "$[?search(@, 'a.b')]", "document": [ "a𐄁bc", "abc", "1", true, [], {} ], "result": [ "a𐄁bc" ] }, { "name": "functions, search, result cannot be compared", "selector": "$[?search(@.a, 'a.*')==true]", "invalid_selector": true }, { "name": "functions, search, too few params", "selector": "$[?search(@.a)]", "invalid_selector": true }, { "name": "functions, search, too many params", "selector": "$[?search(@.a,@.b,@.c)]", "invalid_selector": true }, { "name": "functions, search, arg is a function expression", "selector": "$.values[?search(@, value($..['regex']))]", "document": { "regex": "b.?b", "values": [ "abc", "bcd", "bab", "bba", "bbab", "b", true, [], {} ] }, "result": [ "bab", "bba", "bbab" ] }, { "name": "functions, value, single-value nodelist", "selector": "$[?value(@.*)==4]", "document": [ [ 4 ], { "foo": 4 }, [ 5 ], { "foo": 5 }, 4 ], "result": [ [ 4 ], { "foo": 4 } ] }, { "name": "functions, value, multi-value nodelist", "selector": "$[?value(@.*)==4]", "document": [ [ 4, 4 ], { "foo": 4, "bar": 4 } ], "result": [] }, { "name": "functions, value, too few params", "selector": "$[?value()==4]", "invalid_selector": true }, { "name": "functions, value, too many params", "selector": "$[?value(@.a,@.b)==4]", "invalid_selector": true }, { "name": "functions, value, result must be compared", "selector": "$[?value(@.a)]", "invalid_selector": true }, { "name": "whitespace, filter, space between question mark and expression", "selector": "$[? @.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, newline between question mark and expression", "selector": "$[?\n@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, tab between question mark and expression", "selector": "$[?\t@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, return between question mark and expression", "selector": "$[?\r@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, space between question mark and parenthesized expression", "selector": "$[? (@.a)]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, newline between question mark and parenthesized expression", "selector": "$[?\n(@.a)]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, tab between question mark and parenthesized expression", "selector": "$[?\t(@.a)]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, return between question mark and parenthesized expression", "selector": "$[?\r(@.a)]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, space between parenthesized expression and bracket", "selector": "$[?(@.a) ]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, newline between parenthesized expression and bracket", "selector": "$[?(@.a)\n]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, tab between parenthesized expression and bracket", "selector": "$[?(@.a)\t]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, return between parenthesized expression and bracket", "selector": "$[?(@.a)\r]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, space between bracket and question mark", "selector": "$[ ?@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, newline between bracket and question mark", "selector": "$[\n?@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, tab between bracket and question mark", "selector": "$[\t?@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, filter, return between bracket and question mark", "selector": "$[\r?@.a]", "document": [ { "a": "b", "d": "e" }, { "b": "c", "d": "f" } ], "result": [ { "a": "b", "d": "e" } ] }, { "name": "whitespace, functions, space between function name and parenthesis", "selector": "$[?count (@.*)==1]", "invalid_selector": true }, { "name": "whitespace, functions, newline between function name and parenthesis", "selector": "$[?count\n(@.*)==1]", "invalid_selector": true }, { "name": "whitespace, functions, tab between function name and parenthesis", "selector": "$[?count\t(@.*)==1]", "invalid_selector": true }, { "name": "whitespace, functions, return between function name and parenthesis", "selector": "$[?count\r(@.*)==1]", "invalid_selector": true }, { "name": "whitespace, functions, space between parenthesis and arg", "selector": "$[?count( @.*)==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, newline between parenthesis and arg", "selector": "$[?count(\n@.*)==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, tab between parenthesis and arg", "selector": "$[?count(\t@.*)==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, return between parenthesis and arg", "selector": "$[?count(\r@.*)==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, space between arg and comma", "selector": "$[?search(@ ,'[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, newline between arg and comma", "selector": "$[?search(@\n,'[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, tab between arg and comma", "selector": "$[?search(@\t,'[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, return between arg and comma", "selector": "$[?search(@\r,'[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, space between comma and arg", "selector": "$[?search(@, '[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, newline between comma and arg", "selector": "$[?search(@,\n'[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, tab between comma and arg", "selector": "$[?search(@,\t'[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, return between comma and arg", "selector": "$[?search(@,\r'[a-z]+')]", "document": [ "foo", "123" ], "result": [ "foo" ] }, { "name": "whitespace, functions, space between arg and parenthesis", "selector": "$[?count(@.* )==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, newline between arg and parenthesis", "selector": "$[?count(@.*\n)==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, tab between arg and parenthesis", "selector": "$[?count(@.*\t)==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, return between arg and parenthesis", "selector": "$[?count(@.*\r)==1]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, functions, spaces in a relative singular selector", "selector": "$[?length(@ .a .b) == 3]", "document": [ { "a": { "b": "foo" } }, {} ], "result": [ { "a": { "b": "foo" } } ] }, { "name": "whitespace, functions, newlines in a relative singular selector", "selector": "$[?length(@\n.a\n.b) == 3]", "document": [ { "a": { "b": "foo" } }, {} ], "result": [ { "a": { "b": "foo" } } ] }, { "name": "whitespace, functions, tabs in a relative singular selector", "selector": "$[?length(@\t.a\t.b) == 3]", "document": [ { "a": { "b": "foo" } }, {} ], "result": [ { "a": { "b": "foo" } } ] }, { "name": "whitespace, functions, returns in a relative singular selector", "selector": "$[?length(@\r.a\r.b) == 3]", "document": [ { "a": { "b": "foo" } }, {} ], "result": [ { "a": { "b": "foo" } } ] }, { "name": "whitespace, functions, spaces in an absolute singular selector", "selector": "$..[?length(@)==length($ [0] .a)]", "document": [ { "a": "foo" }, {} ], "result": [ "foo" ] }, { "name": "whitespace, functions, newlines in an absolute singular selector", "selector": "$..[?length(@)==length($\n[0]\n.a)]", "document": [ { "a": "foo" }, {} ], "result": [ "foo" ] }, { "name": "whitespace, functions, tabs in an absolute singular selector", "selector": "$..[?length(@)==length($\t[0]\t.a)]", "document": [ { "a": "foo" }, {} ], "result": [ "foo" ] }, { "name": "whitespace, functions, returns in an absolute singular selector", "selector": "$..[?length(@)==length($\r[0]\r.a)]", "document": [ { "a": "foo" }, {} ], "result": [ "foo" ] }, { "name": "whitespace, operators, space before ||", "selector": "$[?@.a ||@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, newline before ||", "selector": "$[?@.a\n||@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, tab before ||", "selector": "$[?@.a\t||@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, return before ||", "selector": "$[?@.a\r||@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, space after ||", "selector": "$[?@.a|| @.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, newline after ||", "selector": "$[?@.a||\n@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, tab after ||", "selector": "$[?@.a||\t@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, return after ||", "selector": "$[?@.a||\r@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "c": 3 } ], "result": [ { "a": 1 }, { "b": 2 } ] }, { "name": "whitespace, operators, space before &&", "selector": "$[?@.a &&@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline before &&", "selector": "$[?@.a\n&&@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab before &&", "selector": "$[?@.a\t&&@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return before &&", "selector": "$[?@.a\r&&@.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space after &&", "selector": "$[?@.a&& @.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline after &&", "selector": "$[?@.a&& @.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab after &&", "selector": "$[?@.a&& @.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return after &&", "selector": "$[?@.a&& @.b]", "document": [ { "a": 1 }, { "b": 2 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space before ==", "selector": "$[?@.a ==@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, newline before ==", "selector": "$[?@.a\n==@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, tab before ==", "selector": "$[?@.a\t==@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, return before ==", "selector": "$[?@.a\r==@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, space after ==", "selector": "$[?@.a== @.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, newline after ==", "selector": "$[?@.a==\n@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, tab after ==", "selector": "$[?@.a==\t@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, return after ==", "selector": "$[?@.a==\r@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 1 } ] }, { "name": "whitespace, operators, space before !=", "selector": "$[?@.a !=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline before !=", "selector": "$[?@.a\n!=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab before !=", "selector": "$[?@.a\t!=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return before !=", "selector": "$[?@.a\r!=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space after !=", "selector": "$[?@.a!= @.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline after !=", "selector": "$[?@.a!=\n@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab after !=", "selector": "$[?@.a!=\t@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return after !=", "selector": "$[?@.a!=\r@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space before <", "selector": "$[?@.a <@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline before <", "selector": "$[?@.a\n<@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab before <", "selector": "$[?@.a\t<@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return before <", "selector": "$[?@.a\r<@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space after <", "selector": "$[?@.a< @.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline after <", "selector": "$[?@.a<\n@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab after <", "selector": "$[?@.a<\t@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return after <", "selector": "$[?@.a<\r@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space before >", "selector": "$[?@.b >@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline before >", "selector": "$[?@.b\n>@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab before >", "selector": "$[?@.b\t>@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return before >", "selector": "$[?@.b\r>@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space after >", "selector": "$[?@.b> @.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline after >", "selector": "$[?@.b>\n@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab after >", "selector": "$[?@.b>\t@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return after >", "selector": "$[?@.b>\r@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ], "result": [ { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space before <=", "selector": "$[?@.a <=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline before <=", "selector": "$[?@.a\n<=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab before <=", "selector": "$[?@.a\t<=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return before <=", "selector": "$[?@.a\r<=@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space after <=", "selector": "$[?@.a<= @.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline after <=", "selector": "$[?@.a<=\n@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab after <=", "selector": "$[?@.a<=\t@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return after <=", "selector": "$[?@.a<=\r@.b]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space before >=", "selector": "$[?@.b >=@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline before >=", "selector": "$[?@.b\n>=@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab before >=", "selector": "$[?@.b\t>=@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return before >=", "selector": "$[?@.b\r>=@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space after >=", "selector": "$[?@.b>= @.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, newline after >=", "selector": "$[?@.b>=\n@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, tab after >=", "selector": "$[?@.b>=\t@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, return after >=", "selector": "$[?@.b>=\r@.a]", "document": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 }, { "a": 2, "b": 1 } ], "result": [ { "a": 1, "b": 1 }, { "a": 1, "b": 2 } ] }, { "name": "whitespace, operators, space between logical not and test expression", "selector": "$[?! @.a]", "document": [ { "a": "a", "d": "e" }, { "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "d": "f" } ] }, { "name": "whitespace, operators, newline between logical not and test expression", "selector": "$[?!\n@.a]", "document": [ { "a": "a", "d": "e" }, { "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "d": "f" } ] }, { "name": "whitespace, operators, tab between logical not and test expression", "selector": "$[?!\t@.a]", "document": [ { "a": "a", "d": "e" }, { "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "d": "f" } ] }, { "name": "whitespace, operators, return between logical not and test expression", "selector": "$[?!\r@.a]", "document": [ { "a": "a", "d": "e" }, { "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "d": "f" } ] }, { "name": "whitespace, operators, space between logical not and parenthesized expression", "selector": "$[?! (@.a=='b')]", "document": [ { "a": "a", "d": "e" }, { "a": "b", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "a", "d": "e" }, { "a": "d", "d": "f" } ] }, { "name": "whitespace, operators, newline between logical not and parenthesized expression", "selector": "$[?!\n(@.a=='b')]", "document": [ { "a": "a", "d": "e" }, { "a": "b", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "a", "d": "e" }, { "a": "d", "d": "f" } ] }, { "name": "whitespace, operators, tab between logical not and parenthesized expression", "selector": "$[?!\t(@.a=='b')]", "document": [ { "a": "a", "d": "e" }, { "a": "b", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "a", "d": "e" }, { "a": "d", "d": "f" } ] }, { "name": "whitespace, operators, return between logical not and parenthesized expression", "selector": "$[?!\r(@.a=='b')]", "document": [ { "a": "a", "d": "e" }, { "a": "b", "d": "f" }, { "a": "d", "d": "f" } ], "result": [ { "a": "a", "d": "e" }, { "a": "d", "d": "f" } ] }, { "name": "whitespace, selectors, space between root and bracket", "selector": "$ ['a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, newline between root and bracket", "selector": "$\n['a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, tab between root and bracket", "selector": "$\t['a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, return between root and bracket", "selector": "$\r['a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, space between bracket and bracket", "selector": "$['a'] ['b']", "document": { "a": { "b": "ab" } }, "result": [ "ab" ] }, { "name": "whitespace, selectors, newline between root and bracket", "selector": "$['a'] \n['b']", "document": { "a": { "b": "ab" } }, "result": [ "ab" ] }, { "name": "whitespace, selectors, tab between root and bracket", "selector": "$['a'] \t['b']", "document": { "a": { "b": "ab" } }, "result": [ "ab" ] }, { "name": "whitespace, selectors, return between root and bracket", "selector": "$['a'] \r['b']", "document": { "a": { "b": "ab" } }, "result": [ "ab" ] }, { "name": "whitespace, selectors, space between root and dot", "selector": "$ .a", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, newline between root and dot", "selector": "$\n.a", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, tab between root and dot", "selector": "$\t.a", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, return between root and dot", "selector": "$\r.a", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, space between dot and name", "selector": "$. a", "invalid_selector": true }, { "name": "whitespace, selectors, newline between dot and name", "selector": "$.\na", "invalid_selector": true }, { "name": "whitespace, selectors, tab between dot and name", "selector": "$.\ta", "invalid_selector": true }, { "name": "whitespace, selectors, return between dot and name", "selector": "$.\ra", "invalid_selector": true }, { "name": "whitespace, selectors, space between recursive descent and name", "selector": "$.. a", "invalid_selector": true }, { "name": "whitespace, selectors, newline between recursive descent and name", "selector": "$..\na", "invalid_selector": true }, { "name": "whitespace, selectors, tab between recursive descent and name", "selector": "$..\ta", "invalid_selector": true }, { "name": "whitespace, selectors, return between recursive descent and name", "selector": "$..\ra", "invalid_selector": true }, { "name": "whitespace, selectors, space between bracket and selector", "selector": "$[ 'a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, newline between bracket and selector", "selector": "$[\n'a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, tab between bracket and selector", "selector": "$[\t'a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, return between bracket and selector", "selector": "$[\r'a']", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, space between selector and bracket", "selector": "$['a' ]", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, newline between selector and bracket", "selector": "$['a'\n]", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, tab between selector and bracket", "selector": "$['a'\t]", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, return between selector and bracket", "selector": "$['a'\r]", "document": { "a": "ab" }, "result": [ "ab" ] }, { "name": "whitespace, selectors, space between selector and comma", "selector": "$['a' ,'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, selectors, newline between selector and comma", "selector": "$['a'\n,'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, selectors, tab between selector and comma", "selector": "$['a'\t,'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, selectors, return between selector and comma", "selector": "$['a'\r,'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, selectors, space between comma and selector", "selector": "$['a', 'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, selectors, newline between comma and selector", "selector": "$['a',\n'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, selectors, tab between comma and selector", "selector": "$['a',\t'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, selectors, return between comma and selector", "selector": "$['a',\r'b']", "document": { "a": "ab", "b": "bc" }, "result": [ "ab", "bc" ] }, { "name": "whitespace, slice, space between start and colon", "selector": "$[1 :5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, newline between start and colon", "selector": "$[1\n:5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, tab between start and colon", "selector": "$[1\t:5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, return between start and colon", "selector": "$[1\r:5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, space between colon and end", "selector": "$[1: 5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, newline between colon and end", "selector": "$[1:\n5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, tab between colon and end", "selector": "$[1:\t5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, return between colon and end", "selector": "$[1:\r5:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, space between end and colon", "selector": "$[1:5 :2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, newline between end and colon", "selector": "$[1:5\n:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, tab between end and colon", "selector": "$[1:5\t:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, return between end and colon", "selector": "$[1:5\r:2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, space between colon and step", "selector": "$[1:5: 2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, newline between colon and step", "selector": "$[1:5:\n2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, tab between colon and step", "selector": "$[1:5:\t2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] }, { "name": "whitespace, slice, return between colon and step", "selector": "$[1:5:\r2]", "document": [ 1, 2, 3, 4, 5, 6 ], "result": [ 2, 4 ] } ] } ================================================ FILE: tests/fixtures/optimizer/annotate_functions.sql ================================================ -------------------------------------- -- Dialect -------------------------------------- ABS(1); INT; ABS(1.5); DOUBLE; GREATEST(1, 2, 3); INT; GREATEST(1, 2.5, 3); DOUBLE; LEAST(1, 2, 3); INT; LEAST(1, 2.5, 3); DOUBLE; CURRENT_TIME(); TIME; LOCALTIME(); TIME; TIME_ADD(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR); TIME; TIME_SUB(CAST('09:05:03' AS TIME), INTERVAL 2 HOUR); TIME; SORT_ARRAY(ARRAY(tbl.str_col)); ARRAY; SORT_ARRAY(ARRAY(tbl.double_col)); ARRAY; SORT_ARRAY(ARRAY(tbl.bigint_col)); ARRAY; tbl.bigint || tbl.str_col; VARCHAR; tbl.str_col || tbl.bigint; VARCHAR; ARRAY_REVERSE(['a', 'b']); ARRAY; ARRAY_REVERSE([1, 1.5]); ARRAY; ARRAY_SLICE([1, 1.5], 1, 2); ARRAY; FROM_BASE32(tbl.str_col); BINARY; FROM_BASE64(tbl.str_col); BINARY; ANY_VALUE(tbl.bool_col); BOOLEAN; ANY_VALUE(tbl.bigint_col); BIGINT; ANY_VALUE(tbl.date_col); DATE; ANY_VALUE(tbl.str_col); STRING; ANY_VALUE(tbl.array_col); ARRAY; CURRENT_SCHEMA(); VARCHAR; MONTHNAME(tbl.date_col); VARCHAR; CHR(65); VARCHAR; COUNTIF(tbl.bigint_col > 1); BIGINT; LAST_VALUE(tbl.bigint_col) OVER (ORDER BY tbl.bigint_col); BIGINT; TO_BASE32(tbl.bytes_col); VARCHAR; TO_BASE64(tbl.bytes_col); VARCHAR; UNIX_SECONDS(tbl.timestamp_col); BIGINT; STARTS_WITH(tbl.str_col, prefix); BOOLEAN; ENDS_WITH(tbl.str_col, suffix); BOOLEAN; ASCII('A'); INT; UNICODE('bcd'); INT; LAST_DAY(tbl.timestamp_col); DATE; # dialect: snowflake NEXT_DAY(tbl.date_col, 'MONDAY'); DATE; JUSTIFY_DAYS(INTERVAL '1' DAY); INTERVAL; JUSTIFY_HOURS(INTERVAL '1' HOUR); INTERVAL; JUSTIFY_INTERVAL(INTERVAL '1' HOUR); INTERVAL; UNIX_MICROS(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP)); BIGINT; UNIX_MILLIS(CAST('2008-12-25 15:30:00+00' AS TIMESTAMP)); BIGINT; KURTOSIS(tbl.double_col); DOUBLE; KURTOSIS(tbl.int_col); DOUBLE; LENGTH(tbl.str_col); INT; LENGTH(tbl.bin_col); INT; DAYNAME(tbl.date_col); VARCHAR; CBRT(tbl.int_col); DOUBLE; CBRT(tbl.double_col); DOUBLE; ISINF(tbl.float_col); BOOLEAN; ISNAN(tbl.float_col); BOOLEAN; CURRENT_CATALOG(); VARCHAR; CURRENT_USER(); VARCHAR; SESSION_USER(); VARCHAR; RAND(); DOUBLE; DEGREES(tbl.double_col); DOUBLE; # dialect: snowflake TO_BINARY('test'); BINARY; # dialect: snowflake TO_BINARY('test', 'HEX'); BINARY; ARRAY_CONTAINS(tbl.array_col, '1'); BOOLEAN; -------------------------------------- -- Spark2 / Spark3 / Databricks -------------------------------------- # dialect: spark2, spark, databricks SUBSTRING(tbl.str_col, 0, 0); STRING; # dialect: spark2, spark, databricks SUBSTRING(tbl.bin_col, 0, 0); BINARY; # dialect: spark2, spark, databricks CONCAT(tbl.bin_col, tbl.bin_col); BINARY; # dialect: spark2, spark, databricks CONCAT(tbl.bin_col, tbl.str_col); STRING; # dialect: spark2, spark, databricks CONCAT(tbl.str_col, tbl.bin_col); STRING; # dialect: spark2, spark, databricks CONCAT(tbl.str_col, tbl.str_col); STRING; # dialect: spark2, spark, databricks CONCAT(tbl.str_col, unknown); STRING; # dialect: spark2, spark, databricks CONCAT(tbl.bin_col, unknown); UNKNOWN; # dialect: spark2, spark, databricks CONCAT(unknown, unknown); UNKNOWN; # dialect: spark2, spark, databricks LPAD(tbl.bin_col, 1, tbl.bin_col); BINARY; # dialect: spark2, spark, databricks RPAD(tbl.bin_col, 1, tbl.bin_col); BINARY; # dialect: spark2, spark, databricks LPAD(tbl.bin_col, 1, tbl.str_col); STRING; # dialect: spark2, spark, databricks RPAD(tbl.bin_col, 1, tbl.str_col); STRING; # dialect: spark2, spark, databricks LPAD(tbl.str_col, 1, tbl.bin_col); STRING; # dialect: spark2, spark, databricks RPAD(tbl.str_col, 1, tbl.bin_col); STRING; # dialect: spark2, spark, databricks LPAD(tbl.str_col, 1, tbl.str_col); STRING; # dialect: spark2, spark, databricks RPAD(tbl.str_col, 1, tbl.str_col); STRING; # dialect: hive, spark2, spark, databricks IF(cond, tbl.double_col, tbl.bigint_col); DOUBLE; # dialect: hive, spark2, spark, databricks IF(cond, tbl.bigint_col, tbl.double_col); DOUBLE; # dialect: hive, spark2, spark IF(cond, tbl.double_col, tbl.str_col); STRING; # dialect: hive, spark2, spark IF(cond, tbl.str_col, tbl.double_col); STRING; # dialect: databricks IF(cond, tbl.str_col, tbl.double_col); DOUBLE; # dialect: databricks IF(cond, tbl.double_col, tbl.str_col); DOUBLE; # dialect: hive, spark2, spark IF(cond, tbl.date_col, tbl.str_col); STRING; # dialect: hive, spark2, spark IF(cond, tbl.str_col, tbl.date_col); STRING; # dialect: databricks IF(cond, tbl.date_col, tbl.str_col); DATE; # dialect: databricks IF(cond, tbl.str_col, tbl.date_col); DATE; # dialect: hive, spark2, spark, databricks IF(cond, tbl.date_col, tbl.timestamp_col); TIMESTAMP; # dialect: hive, spark2, spark, databricks IF(cond, tbl.timestamp_col, tbl.date_col); TIMESTAMP; # dialect: hive, spark2, spark, databricks IF(cond, NULL, tbl.str_col); STRING; # dialect: hive, spark2, spark, databricks IF(cond, tbl.str_col, NULL); STRING; # dialect: hive, spark2, spark COALESCE(tbl.str_col, tbl.date_col, tbl.bigint_col); STRING; # dialect: hive, spark2, spark COALESCE(tbl.date_col, tbl.str_col, tbl.bigint_col); STRING; # dialect: hive, spark2, spark COALESCE(tbl.date_col, tbl.bigint_col, tbl.str_col); STRING; # dialect: hive, spark2, spark COALESCE(tbl.str_col, tbl.date_col, tbl.bigint_col); STRING; # dialect: hive, spark2, spark COALESCE(tbl.date_col, tbl.str_col, tbl.bigint_col); STRING; # dialect: hive, spark2, spark COALESCE(tbl.date_col, NULL, tbl.bigint_col, tbl.str_col); STRING; # dialect: databricks COALESCE(tbl.str_col, tbl.bigint_col); BIGINT; # dialect: databricks COALESCE(tbl.bigint_col, tbl.str_col); BIGINT; # dialect: databricks COALESCE(tbl.str_col, NULL, tbl.bigint_col); BIGINT; # dialect: databricks COALESCE(tbl.bigint_col, NULL, tbl.str_col); BIGINT; # dialect: databricks COALESCE(tbl.bool_col, tbl.str_col); BOOLEAN; # dialect: hive, spark2, spark COALESCE(tbl.interval_col, tbl.str_col); STRING; # dialect: databricks COALESCE(tbl.interval_col, tbl.str_col); INTERVAL; # dialect: databricks COALESCE(tbl.bin_col, tbl.str_col); BINARY; # dialect: spark, databricks LOCALTIMESTAMP(); TIMESTAMPNTZ; # dialect: hive, spark2, spark, databricks ENCODE(tbl.str_col, tbl.str_col); BINARY; # dialect: hive, spark2, spark, databricks ENCODE(tbl.bin_col, tbl.bin_col); BINARY; # dialect: spark, databricks CURRENT_TIMEZONE(); STRING; # dialect: hive, spark2, spark, databricks UNIX_TIMESTAMP(); BIGINT; # dialect: hive, spark2, spark, databricks ACOS(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks ACOS(tbl.double_col); DOUBLE; # dialect: spark2, spark, databricks ATAN2(tbl.int_col, tbl.int_col); DOUBLE; # dialect: spark2, spark, databricks ATAN2(tbl.double_col, tbl.double_col); DOUBLE; # dialect: spark2, spark, databricks ATAN2(tbl.double_col, tbl.int_col); DOUBLE; # dialect: spark, databricks ACOSH(tbl.double_col); DOUBLE; # dialect: spark, databricks ACOSH(tbl.int_col); DOUBLE; # dialect: spark2, spark, databricks COT(tbl.int_col); DOUBLE; # dialect: spark2, spark, databricks COT(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks COSH(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks COSH(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks SINH(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks SINH(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks TANH(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks TANH(tbl.int_col); DOUBLE; # dialect: spark, databricks TO_BINARY(tbl.str_col, tbl.str_col); BINARY; # dialect: spark, databricks TO_BINARY(tbl.int_col, tbl.str_col); BINARY; # dialect: spark, databricks TO_BINARY(tbl.double_col, tbl.str_col); BINARY; # dialect: hive, spark2, spark, databricks SHA(tbl.str_col); VARCHAR; # dialect: hive, spark2, spark, databricks SHA1(tbl.str_col); VARCHAR; # dialect: hive, spark2, spark, databricks SHA2(tbl.str_col, tbl.int_col); VARCHAR; # dialect: hive, spark2, spark, databricks SPACE(tbl.int_col); VARCHAR; # dialect: spark2, spark, databricks RANDN(); DOUBLE; # dialect: spark2, spark, databricks BIT_LENGTH(tbl.str_col); INT; # dialect: hive, spark2, spark, databricks ASIN(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks ASIN(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks SIN(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks SIN(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks COS(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks COS(tbl.double_col); DOUBLE; # dialect: spark, databricks ASINH(tbl.int_col); DOUBLE; # dialect: spark, databricks ASINH(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks ATAN(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks ATAN(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks TAN(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks TAN(tbl.double_col); DOUBLE; # dialect: spark, databricks ATANH(tbl.double_col); DOUBLE; # dialect: spark, databricks ATANH(tbl.int_col); DOUBLE; # dialect: spark, databricks SEC(tbl.int_col); DOUBLE; # dialect: spark, databricks SEC(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks CORR(tbl.double_col, tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks CORR(tbl.int_col, tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks CBRT(tbl.double_col); DOUBLE; # dialect: hive, spark2, spark, databricks CBRT(tbl.int_col); DOUBLE; # dialect: hive, spark2, spark, databricks CURRENT_CATALOG(); STRING; # dialect: hive, spark2, spark, databricks CURRENT_DATABASE(); STRING; # dialect: spark, databricks DATE_FROM_UNIX_DATE(tbl.int_col); DATE; # dialect: hive, spark2, spark, databricks MONTHS_BETWEEN(tbl.timestamp_col, tbl.timestamp_col); DOUBLE; # dialect: hive, spark2, spark, databricks MONTHS_BETWEEN(tbl.timestamp_col, tbl.timestamp_col, tbl.bool_col); DOUBLE; # dialect: hive, spark2, spark, databricks MONTH(tbl.date_col); INT; # dialect: spark, databricks MONTHNAME(tbl.date_col); STRING; # dialect: hive, spark, databricks CURRENT_SCHEMA(); STRING; # dialect: hive, spark2, spark, databricks CURRENT_USER(); STRING; # dialect: hive, spark2, spark, databricks UNHEX(tbl.str_col); BINARY; # dialect: hive, spark2, spark, databricks HEX(tbl.str_col); STRING; # dialect: hive, spark2, spark, databricks HEX(tbl.int_col); STRING; # dialect: hive, spark2, spark, databricks SOUNDEX(tbl.str_col); STRING; # dialect: spark, databricks SESSION_USER(); STRING; # dialect: hive, spark2, spark, databricks FACTORIAL(tbl.int_col); BIGINT; # dialect: spark, databricks ARRAY_SIZE(tbl.array_col); INT; # dialect: hive, spark2, spark, databricks QUARTER(tbl.date_col); INT; # dialect: hive, spark2, spark, databricks SECOND(tbl.timestamp_col); INT; # dialect: hive, spark2, spark, databricks MD5(tbl.str_col); STRING; # dialect: hive, spark2, spark, databricks HOUR(tbl.timestamp_col); INT; # dialect: spark, databricks BITMAP_COUNT(tbl.bin_col); BIGINT; # dialect: spark, databricks RANDSTR(tbl.int_col); STRING; # dialect: spark, databricks RANDSTR(tbl.int_col, tbl.int_col); STRING; # dialect: spark, databricks COLLATION(tbl.str_col); STRING; # dialect: hive, spark2, spark, databricks REPEAT(tbl.str_col, tbl.int_col); STRING; # dialect: spark2, spark, databricks FORMAT_STRING(tbl.str_col, tbl.int_col, tbl.str_col); STRING; # dialect: hive, spark2, spark, databricks REPLACE(tbl.str_col, tbl.str_col, tbl.str_col); STRING; # dialect: spark, databricks OVERLAY(tbl.str_col PLACING tbl.str_col FROM tbl.int_col); STRING; # dialect: spark, databricks OVERLAY(tbl.bin_col PLACING tbl.bin_col FROM tbl.int_col FOR tbl.int_col); BINARY; # dialect: spark, databricks UNIX_DATE(tbl.date_col); INT; # dialect: hive, spark2, spark, databricks REVERSE(tbl.str_col); STRING; # dialect: hive, spark2, spark, databricks REVERSE(tbl.array_col); ARRAY; # dialect: spark2, spark, databricks RIGHT(tbl.str_col, tbl.int_col); STRING; # dialect: spark2, spark, databricks NEXT_DAY(tbl.date_col, tbl.str_col); DATE; # dialect: hive NEXT_DAY(tbl.date_col, tbl.str_col); VARCHAR; # dialect: hive, spark2, spark, databricks DAYOFWEEK(tbl.date_col); INT; # dialect: hive, spark2, spark, databricks DAYOFMONTH(tbl.date_col); INT; # dialect: hive, spark2, spark, databricks TRANSLATE(tbl.str_col, tbl.str_col, tbl.str_col); STRING; # dialect: spark, databricks ARRAY_COMPACT(tbl.array_col); ARRAY; # dialect: spark, databricks ARRAY_COMPACT(array(1, 2, 3)); ARRAY; # dialect: hive, spark2, spark, databricks SPLIT(tbl.str_col, tbl.str_col, tbl.int_col); ARRAY; # dialect: hive, spark2, spark, databricks SPLIT(tbl.str_col, tbl.str_col); ARRAY; # dialect: spark2, spark, databricks FROM_UTC_TIMESTAMP(tbl.timestamp_col, tbl.str_col); TIMESTAMP; # dialect: spark2, spark, databricks ADD_MONTHS(tbl.date_col, tbl.int_col); DATE; # dialect: hive ADD_MONTHS(tbl.date_col, tbl.int_col); STRING; # dialect: spark2, spark, databricks FILTER(tbl.array_col, x -> x > 2); ARRAY; # dialect: spark, databricks ARRAY_INSERT(array(1, 2, 3, 4), 5, 5); ARRAY; # dialect: spark, databricks ARRAY_INSERT(tbl.array_col, tbl.int_col, tbl.str_col); ARRAY; # dialect: hive, spark2, spark, databricks ARRAY_INTERSECT(tbl.array_col, tbl.array_col); ARRAY; # dialect: hive, spark2, spark, databricks ARRAY_INTERSECT(array(1, 2, 3), array(1, 3, 5)); ARRAY; # dialect: hive PERCENTILE_APPROX(3, 0.2); DOUBLE; # dialect: hive PERCENTILE_APPROX(3, array(0.2, 0.3)); ARRAY; # dialect: hive PERCENTILE_APPROX(3.1, 0.2); DOUBLE; # dialect: hive PERCENTILE_APPROX(3.1, array(0.2, 0.3)); ARRAY; # dialect: hive, spark2, spark, databricks PERCENTILE(3, 0.2); DOUBLE; # dialect: hive, spark2, spark, databricks PERCENTILE(3, array(0.2, 0.3)); ARRAY; # dialect: spark2, spark, databricks PERCENTILE(3.1, 0.2); DOUBLE; # dialect: spark2, spark, databricks PERCENTILE(3.1, array(0.2, 0.3)); ARRAY; # dialect: spark2, spark, databricks PERCENTILE_APPROX(3.1, 0.2); DOUBLE; # dialect: spark2, spark, databricks PERCENTILE_APPROX(3, 0.2); INT; # dialect: spark2, spark, databricks PERCENTILE_APPROX(3.1, array(0.2, 0.3)); ARRAY; # dialect: spark2, spark, databricks PERCENTILE_APPROX(3, array(0.2, 0.3)); ARRAY; # dialect: spark2, spark, databricks APPROX_PERCENTILE(3.1, array(0.2, 0.3)); ARRAY; # dialect: spark2, spark, databricks APPROX_PERCENTILE(3, array(0.2, 0.3)); ARRAY; # dialect: spark, databricks BIT_OR(tbl.int_col); INT; # dialect: spark, databricks BIT_OR(tbl.bigint_col); BIGINT; # dialect: spark2, spark, databricks ELEMENT_AT(ARRAY(1, 2, 3), 1); INT; # dialect: spark2, spark, databricks ELEMENT_AT(ARRAY('1', '2','3'), 1); STRING; # dialect: spark2, spark, databricks ELEMENT_AT(MAP('a', 1, 'b', 2,'c', 3), 'b'); INT; # dialect: spark2, spark, databricks ELEMENT_AT(MAP('a', 'k1', 'b', 'k2', 'c', 'k3'), 'b'); STRING; # dialect: spark, databricks BIT_AND(tbl.int_col); INT; # dialect: spark, databricks BIT_AND(tbl.bigint_col); BIGINT; # dialect: spark, databricks BIT_XOR(tbl.int_col); INT; # dialect: spark, databricks BIT_XOR(tbl.bigint_col); BIGINT; # dialect: hive, spark2, spark, databricks ARRAY_DISTINCT(tbl.array_col); ARRAY; # dialect: hive, spark2, spark, databricks ARRAY_DISTINCT(array(1, 2, 3, null, 3)); ARRAY; # dialect: hive, spark2, spark, databricks ARRAY_EXCEPT(array(1, 2, 3), array(1, 3, 5)); ARRAY; # dialect: hive, spark2, spark, databricks ARRAY_EXCEPT(tbl.array_col, tbl.array_col); ARRAY; -------------------------------------- -- BigQuery -------------------------------------- # dialect: bigquery SIGN(1); INT; # dialect: bigquery SIGN(1.5); DOUBLE; # dialect: bigquery CEIL(1); DOUBLE; # dialect: bigquery CEIL(5.5); DOUBLE; # dialect: bigquery CEIL(tbl.bignum_col); BIGDECIMAL; # dialect: bigquery FLOOR(1); DOUBLE; # dialect: bigquery FLOOR(5.5); DOUBLE; # dialect: bigquery FLOOR(tbl.bignum_col); BIGDECIMAL; # dialect: bigquery SQRT(1); DOUBLE; # dialect: bigquery SQRT(5.5); DOUBLE; # dialect: bigquery SQRT(tbl.bignum_col); BIGDECIMAL; # dialect: bigquery LN(1); DOUBLE; # dialect: bigquery LN(5.5); DOUBLE; # dialect: bigquery LN(tbl.bignum_col); BIGDECIMAL; # dialect: bigquery LOG(1); DOUBLE; # dialect: bigquery LOG(5.5); DOUBLE; # dialect: bigquery LOG(tbl.bignum_col); BIGDECIMAL; # dialect: bigquery ROUND(1); DOUBLE; # dialect: bigquery ROUND(5.5); DOUBLE; # dialect: bigquery ROUND(tbl.bignum_col); BIGDECIMAL; # dialect: bigquery EXP(1); DOUBLE; # dialect: bigquery EXP(5.5); DOUBLE; # dialect: bigquery EXP(tbl.bignum_col); BIGDECIMAL; # dialect: bigquery AVG(1); FLOAT64; # dialect: bigquery AVG(5.5); FLOAT64; # dialect: bigquery AVG(tbl.bignum_col); BIGNUMERIC; # dialect: bigquery SAFE_DIVIDE(tbl.int_col, tbl.int_col); FLOAT64; # dialect: bigquery SAFE_DIVIDE(tbl.int_col, tbl.bignum_col); BIGNUMERIC; # dialect: bigquery SAFE_DIVIDE(tbl.int_col, tbl.double_col); FLOAT64; # dialect: bigquery SAFE_DIVIDE(tbl.bignum_col, tbl.int_col); BIGNUMERIC; # dialect: bigquery SAFE_DIVIDE(tbl.bignum_col, tbl.bignum_col); BIGNUMERIC; # dialect: bigquery SAFE_DIVIDE(tbl.bignum_col, tbl.double_col); FLOAT64; # dialect: bigquery SAFE_DIVIDE(tbl.double_col, tbl.int_col); FLOAT64; # dialect: bigquery SAFE_DIVIDE(tbl.double_col, tbl.bignum_col); FLOAT64; # dialect: bigquery SAFE_DIVIDE(tbl.double_col, tbl.double_col); FLOAT64; # dialect: bigquery SAFE.TIMESTAMP(tbl.str_col); TIMESTAMPTZ; # dialect: bigquery TIMESTAMP(tbl.str_col); TIMESTAMPTZ; # dialect: bigquery SAFE.PARSE_DATE('%Y-%m-%d', '2024-01-15'); DATE; # dialect: bigquery PARSE_DATE('%Y-%m-%d', '2024-01-15'); DATE; # dialect: bigquery SAFE.PARSE_DATETIME('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00'); DATETIME; # dialect: bigquery SAFE.PARSE_TIME('%H:%M:%S', '10:30:00'); TIME; # dialect: bigquery SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00'); TIMESTAMPTZ; # dialect: bigquery PARSE_TIMESTAMP('%Y-%m-%d %H:%M:%S', '2024-01-15 10:30:00'); TIMESTAMPTZ; # dialect: bigquery CONCAT(tbl.str_col, tbl.str_col); STRING; # dialect: bigquery CONCAT(tbl.bin_col, tbl.bin_col); BINARY; # dialect: bigquery CONCAT(0, tbl.str_col); STRING; # dialect: bigquery CONCAT(tbl.str_col, 0); STRING; # dialect: bigquery LEFT(tbl.str_col, 1); STRING; # dialect: bigquery LEFT(tbl.bin_col, 1); BINARY; # dialect: bigquery RIGHT(tbl.str_col, 1); STRING; # dialect: bigquery RIGHT(tbl.bin_col, 1); BINARY; # dialect: bigquery LOWER(tbl.str_col); STRING; # dialect: bigquery LOWER(tbl.bin_col); BINARY; # dialect: bigquery UPPER(tbl.str_col); STRING; # dialect: bigquery UPPER(tbl.bin_col); BINARY; # dialect: bigquery LPAD(tbl.str_col, 1, tbl.str_col); STRING; # dialect: bigquery LPAD(tbl.bin_col, 1, tbl.bin_col); BINARY; # dialect: bigquery RPAD(tbl.str_col, 1, tbl.str_col); STRING; # dialect: bigquery RPAD(tbl.bin_col, 1, tbl.bin_col); BINARY; # dialect: bigquery LTRIM(tbl.str_col); STRING; # dialect: bigquery LTRIM(tbl.bin_col, tbl.bin_col); BINARY; # dialect: bigquery RTRIM(tbl.str_col); STRING; # dialect: bigquery RTRIM(tbl.bin_col, tbl.bin_col); BINARY; # dialect: bigquery TRIM(tbl.str_col); STRING; # dialect: bigquery TRIM(tbl.bin_col, tbl.bin_col); BINARY; # dialect: bigquery REGEXP_EXTRACT(tbl.str_col, pattern); STRING; # dialect: bigquery REGEXP_EXTRACT(tbl.bin_col, pattern); BINARY; # dialect: bigquery REGEXP_REPLACE(tbl.str_col, pattern, replacement); STRING; # dialect: bigquery REGEXP_REPLACE(tbl.bin_col, pattern, replacement); BINARY; # dialect: bigquery REPEAT(tbl.str_col, 1); STRING; # dialect: bigquery REPEAT(tbl.bin_col, 1); BINARY; # dialect: bigquery SUBSTRING(tbl.str_col, 1); STRING; # dialect: bigquery SUBSTRING(tbl.bin_col, 1); BINARY; # dialect: bigquery SPLIT(tbl.str_col, delim); ARRAY; # dialect: bigquery SPLIT(tbl.bin_col, delim); ARRAY; # dialect: bigquery STRING(json_expr); STRING; # dialect: bigquery STRING(timestamp_expr, timezone); STRING; # dialect: bigquery ARRAY_CONCAT(['a'], ['b']); ARRAY; # dialect: bigquery ARRAY_CONCAT_AGG(tbl.array_col); ARRAY; # dialect: bigquery ARRAY_TO_STRING(['a'], ['b'], ','); STRING; # dialect: bigquery ARRAY_FIRST(['a', 'b']); STRING; # dialect: bigquery ARRAY_LAST(['a', 'b']); STRING; # dialect: bigquery ARRAY_FIRST([1, 1.5]); DOUBLE; # dialect: bigquery ARRAY_LAST([1, 1.5]); DOUBLE; # dialect: bigquery GENERATE_ARRAY(1, 5, 0.3); ARRAY; # dialect: bigquery GENERATE_ARRAY(1, 5); ARRAY; # dialect: bigquery GENERATE_ARRAY(1, 2.5); ARRAY; # dialect: bigquery INT64(JSON '999'); BIGINT; # dialect: bigquery LOGICAL_AND(tbl.bool_col); BOOLEAN; # dialect: bigquery LOGICAL_OR(tbl.bool_col); BOOLEAN; # dialect: bigquery MAKE_INTERVAL(1, 6, 15); INTERVAL; # dialect: bigquery SHA1(tbl.str_col); BINARY; # dialect: bigquery SHA256(tbl.str_col); BINARY; # dialect: bigquery SHA512(tbl.str_col); BINARY; # dialect: bigquery CORR(tbl.double_col, tbl.double_col); DOUBLE; # dialect: bigquery COVAR_POP(tbl.double_col, tbl.double_col); DOUBLE; # dialect: bigquery COVAR_SAMP(tbl.double_col, tbl.double_col); DOUBLE; # dialect: bigquery DATETIME(2025, 1, 1, 12, 0, 0); DATETIME; # dialect: bigquery LAG(tbl.bigint_col, 1 , 2.5) OVER (ORDER BY tbl.bigint_col); DOUBLE; # dialect: bigquery LAG(tbl.bigint_col, 1 , 2) OVER (ORDER BY tbl.bigint_col); BIGINT; # dialect: bigquery ASCII('A'); BIGINT; # dialect: bigquery UNICODE('bcd'); BIGINT; # dialect: bigquery BIT_AND(tbl.bin_col); BIGINT; # dialect: bigquery BIT_OR(tbl.bin_col); BIGINT; # dialect: bigquery BIT_XOR(tbl.bin_col); BIGINT; # dialect: bigquery BIT_COUNT(tbl.bin_col); BIGINT; # dialect: bigquery JSON_ARRAY(10); JSON; # dialect: bigquery JSON_ARRAY(10, [1, 2]); JSON; # dialect: bigquery JSON_VALUE(JSON '{"foo": "1" }', '$.foo'); STRING; # dialect: bigquery JSON_EXTRACT_SCALAR(JSON '["a","b"]'); STRING; # dialect: bigquery JSON_VALUE_ARRAY(JSON '["a","b"]'); ARRAY; # dialect: bigquery JSON_EXTRACT_STRING_ARRAY(JSON '["a","b"]'); ARRAY; # dialect: bigquery JSON_TYPE(JSON '1'); STRING; # dialect: bigquery GENERATE_TIMESTAMP_ARRAY('2016-10-05', '2016-10-07', INTERVAL '1' DAY); ARRAY; # dialect: bigquery TIME(15, 30, 00); TIME; # dialect: bigquery TIME(TIMESTAMP "2008-12-25 15:30:00"); TIME; # dialect: bigquery TIME(DATETIME "2008-12-25 15:30:00"); TIME; # dialect: bigquery TIME_TRUNC(TIME "15:30:00", HOUR); TIME; # dialect: bigquery DATE_FROM_UNIX_DATE(1); DATE; # dialect: bigquery DATE_TRUNC(DATE '2008-12-25', MONTH); DATE; # dialect: bigquery DATE_TRUNC(TIMESTAMP '2008-12-25', MONTH); TIMESTAMP; # dialect: bigquery DATE_TRUNC(DATETIME '2008-12-25', MONTH); DATETIME; # dialect: bigquery TIMESTAMP_TRUNC(TIMESTAMP "2008-12-25 15:30:00+00", DAY, "UTC"); TIMESTAMP; # dialect: bigquery TIMESTAMP_TRUNC(DATETIME "2008-12-25 15:30:00", DAY); DATETIME; # dialect: bigquery PARSE_DATETIME('%a %b %e %I:%M:%S %Y', 'Thu Dec 25 07:30:00 2008'); DATETIME; # dialect: bigquery FORMAT_TIME("%R", TIME "15:30:00"); STRING; # dialect: bigquery PARSE_TIME("%I:%M:%S", "07:30:00"); TIME; # dialect: bigquery BYTE_LENGTH("foo"); BIGINT; # dialect: bigquery CODE_POINTS_TO_STRING([65, 255, 513, 1024]); STRING; # dialect: bigquery REVERSE("abc"); STRING; # dialect: bigquery REVERSE(tbl.bin_col); BINARY; # dialect: bigquery REVERSE(b'1a3'); BINARY; # dialect: bigquery REGEXP_EXTRACT_ALL('Try `func(x)` or `func(y)`', '`(.+?)`'); ARRAY; # dialect: bigquery REGEXP_EXTRACT_ALL(b'\x48\x65\x6C\x6C\x6F', b'(\x6C+)'); ARRAY; # dialect: bigquery REPLACE ('cherry', 'pie', 'cobbler'); STRING; # dialect: bigquery REPLACE(b'\x48\x65\x6C\x6C\x6F', b'\x6C\x6C', b'\x59\x59'); BINARY; # dialect: bigquery TRANSLATE('AaBbCc', 'abc', '1'); STRING; # dialect: bigquery TRANSLATE(b'AaBbCc', b'abc', b'123'); BINARY; # dialect: bigquery SOUNDEX('foo'); STRING; # dialect: bigquery MD5('foo'); BINARY; # dialect: bigquery TO_HEX(MD5('foo')); STRING; # dialect: bigquery MAX_BY(tbl.str_col, tbl.bigint_col); STRING; # dialect: bigquery MAX_BY(tbl.bigint_col, tbl.str_col); BIGINT; # dialect: bigquery MIN_BY(tbl.str_col, tbl.bigint_col); STRING; # dialect: bigquery MIN_BY(tbl.bigint_col, tbl.str_col); BIGINT; # dialect: bigquery GROUPING(tbl.str_col); BIGINT; # dialect: bigquery GROUPING(tbl.bigint_col); BIGINT; # dialect: bigquery FARM_FINGERPRINT('foo'); BIGINT; # dialect: bigquery FARM_FINGERPRINT(b'foo'); BIGINT; # dialect: bigquery APPROX_TOP_COUNT(tbl.str_col, 2); ARRAY>; # dialect: bigquery APPROX_TOP_COUNT(tbl.bigint_col, 2); ARRAY>; # dialect: bigquery APPROX_TOP_SUM(tbl.str_col, 1.5, 2); ARRAY>; # dialect: bigquery APPROX_TOP_SUM(tbl.bigint_col, 1.5, 2); ARRAY>; # dialect: bigquery APPROX_QUANTILES(tbl.bigint_col, 2); ARRAY; # dialect: bigquery APPROX_QUANTILES(tbl.str_col, 2); ARRAY; # dialect: bigquery APPROX_QUANTILES(DISTINCT tbl.bigint_col, 2); ARRAY; # dialect: bigquery APPROX_QUANTILES(DISTINCT tbl.str_col, 2); ARRAY; # dialect: bigquery SAFE_CONVERT_BYTES_TO_STRING(b'\xc2'); STRING; # dialect: bigquery FROM_HEX('foo'); BINARY; # dialect: bigquery TO_HEX(b'foo'); STRING; # dialect: bigquery TO_CODE_POINTS('foo'); ARRAY; # dialect: bigquery TO_CODE_POINTS(b'\x66\x6f\x6f'); ARRAY; # dialect: bigquery CODE_POINTS_TO_BYTES([65, 98]); BINARY; # dialect: bigquery PARSE_BIGNUMERIC('1.2'); BIGDECIMAL; # dialect: bigquery PARSE_NUMERIC('1.2'); DECIMAL; # dialect: bigquery BOOL(PARSE_JSON('true')); BOOLEAN; # dialect: bigquery FLOAT64(PARSE_JSON('9.8')); FLOAT64; # dialect: bigquery FLOAT64(PARSE_JSON('9.8'), wide_number_mode => 'round'); FLOAT64; # dialect: bigquery CONTAINS_SUBSTR('aa', 'a'); BOOLEAN; # dialect: bigquery CONTAINS_SUBSTR(PARSE_JSON('{"lunch":"soup"}'), 'lunch', json_scope => 'JSON_VALUES'); BOOLEAN; # dialect: bigquery NORMALIZE('\u00ea'); STRING; # dialect: bigquery NORMALIZE('\u00ea', NFKC); STRING; # dialect: bigquery NORMALIZE_AND_CASEFOLD('\u00ea', NFKC); STRING; # dialect: bigquery NORMALIZE_AND_CASEFOLD('\u00ea', NFKC); STRING; # dialect: bigquery OCTET_LENGTH("foo"); BIGINT; # dialect: bigquery REGEXP_INSTR('ab@cd-ef', '@[^-]*'); BIGINT; # dialect: bigquery REGEXP_INSTR('a@cd-ef', '@[^-]*', 1, 1, 0); BIGINT; # dialect: bigquery ROW_NUMBER() OVER (ORDER BY 1); BIGINT; # dialect: bigquery FIRST_VALUE(tbl.bigint_col) OVER (ORDER BY 1); BIGINT; # dialect: bigquery FIRST_VALUE(tbl.str_col) OVER (ORDER BY 1); STRING; # dialect: bigquery FIRST_VALUE(tbl.bigint_col RESPECT NULLS) OVER (ORDER BY 1); BIGINT; # dialect: bigquery FIRST_VALUE(tbl.bigint_col IGNORE NULLS) OVER (ORDER BY 1); BIGINT; # dialect: bigquery FIRST_VALUE(tbl.str_col RESPECT NULLS) OVER (ORDER BY 1); STRING; # dialect: bigquery FIRST_VALUE(tbl.str_col IGNORE NULLS) OVER (ORDER BY 1); STRING; # dialect: bigquery NTH_VALUE(tbl.bigint_col, 2) OVER (ORDER BY 1); BIGINT; # dialect: bigquery NTH_VALUE(tbl.str_col, 2) OVER (ORDER BY 1); STRING; # dialect: bigquery NTH_VALUE(tbl.bigint_col, 2 RESPECT NULLS) OVER (ORDER BY 1); BIGINT; # dialect: bigquery NTH_VALUE(tbl.str_col, 2 RESPECT NULLS) OVER (ORDER BY 1); STRING; # dialect: bigquery NTH_VALUE(tbl.bigint_col, 2 IGNORE NULLS) OVER (ORDER BY 1); BIGINT; # dialect: bigquery NTH_VALUE(tbl.str_col, 2 IGNORE NULLS) OVER (ORDER BY 1); STRING; # dialect: bigquery PERCENTILE_DISC(tbl.bigint_col, 0.5) OVER (ORDER BY 1); BIGINT; # dialect: bigquery PERCENTILE_DISC(tbl.str_col, 0.5) OVER (ORDER BY 1); STRING; # dialect: bigquery PERCENTILE_DISC(tbl.bigint_col, 0.5 RESPECT NULLS) OVER (ORDER BY 1); BIGINT; # dialect: bigquery PERCENTILE_DISC(tbl.str_col, 0.5 RESPECT NULLS) OVER (ORDER BY 1); STRING; # dialect: bigquery PERCENTILE_DISC(tbl.bigint_col, 0.5 IGNORE NULLS) OVER (ORDER BY 1); BIGINT; # dialect: bigquery PERCENTILE_DISC(tbl.str_col, 0.5 IGNORE NULLS) OVER (ORDER BY 1); STRING; # dialect: bigquery LEAD(tbl.bigint_col); BIGINT; # dialect: bigquery LEAD(tbl.str_col); STRING; # dialect: bigquery LEAD(tbl.bigint_col, 2); BIGINT; # dialect: bigquery LEAD(tbl.str_col, 2); STRING; # dialect: bigquery FORMAT('%f %E %f %f', 1.1, 2.2, 3.4, 4.4); STRING; # dialect: bigquery NET.HOST('http://example.com'); STRING; # dialect: bigquery NET.REG_DOMAIN('http://example.com'); STRING; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS NUMERIC), CAST(1 AS NUMERIC)) OVER (ORDER BY 1); NUMERIC; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS NUMERIC), CAST(1 AS BIGNUMERIC)) OVER (ORDER BY 1); BIGNUMERIC; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS NUMERIC), CAST(1 AS FLOAT64)) OVER (ORDER BY 1); FLOAT64; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS BIGNUMERIC), CAST(1 AS NUMERIC)) OVER (ORDER BY 1); BIGNUMERIC; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS BIGNUMERIC), CAST(1 AS BIGNUMERIC)) OVER (ORDER BY 1); BIGNUMERIC; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS BIGNUMERIC), CAST(1 AS FLOAT64)) OVER (ORDER BY 1); FLOAT64; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS FLOAT64), CAST(1 AS NUMERIC)) OVER (ORDER BY 1); FLOAT64; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS FLOAT64), CAST(1 AS BIGNUMERIC)) OVER (ORDER BY 1); FLOAT64; # dialect: bigquery PERCENTILE_CONT(CAST(1 AS FLOAT64), CAST(1 AS FLOAT64)) OVER (ORDER BY 1); FLOAT64; # dialect: bigquery CUME_DIST() OVER (ORDER BY 1); DOUBLE; # dialect: bigquery DENSE_RANK() OVER (ORDER BY 1); BIGINT; # dialect: bigquery NTILE(1) OVER (ORDER BY 1); BIGINT; # dialect: bigquery RANK() OVER (ORDER BY 1); BIGINT; # dialect: bigquery PERCENT_RANK() OVER (ORDER BY 1); DOUBLE; # dialect: bigquery JSON_OBJECT('foo', 10, 'bar', TRUE); JSON; # dialect: bigquery JSON_QUERY('{"fruits": ["apples", "oranges", "grapes"]}', '$.fruits'); STRING; # dialect: bigquery JSON_QUERY(JSON_OBJECT('fruits', ['apples', 'oranges', 'grapes']), '$.fruits'); JSON; # dialect: bigquery JSON_EXTRACT('{"fruits": ["apples", "oranges", "grapes"]}', '$.fruits'); STRING; # dialect: bigquery JSON_EXTRACT(JSON_OBJECT('fruits', ['apples', 'oranges', 'grapes']), '$.fruits'); JSON; # dialect: bigquery JSON_QUERY_ARRAY('{"fruits": ["apples", "oranges", "grapes"]}', '$.fruits'); ARRAY; # dialect: bigquery JSON_QUERY_ARRAY(JSON_OBJECT('fruits', ['apples', 'oranges', 'grapes']), '$.fruits'); ARRAY; # dialect: bigquery JSON_EXTRACT_ARRAY('{"fruits": ["apples", "oranges", "grapes"]}', '$.fruits'); ARRAY; # dialect: bigquery JSON_EXTRACT_ARRAY(JSON_OBJECT('fruits', ['apples', 'oranges', 'grapes']), '$.fruits'); ARRAY; # dialect: bigquery JSON_ARRAY_APPEND(PARSE_JSON('["a", "b", "c"]'), '$', 1); JSON; # dialect: bigquery JSON_ARRAY_APPEND(PARSE_JSON('["a", "b", "c"]'), '$', [1, 2], append_each_element => FALSE); JSON; # dialect: bigquery JSON_ARRAY_INSERT(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', 1); JSON; # dialect: bigquery JSON_ARRAY_INSERT(PARSE_JSON('["a", "b", "c"]'), '$[1]', [1, 2], insert_each_element => FALSE); JSON; # dialect: bigquery JSON_ARRAY_INSERT(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', 1); JSON; # dialect: bigquery JSON_ARRAY_INSERT(PARSE_JSON('["a", "b", "c"]'), '$[1]', [1, 2], insert_each_element => FALSE); JSON; # dialect: bigquery JSON_KEYS(PARSE_JSON('{"a": {"b":1}}')); ARRAY; # dialect: bigquery JSON_KEYS(PARSE_JSON('{"a": {"b":1}}'), 1); ARRAY; # dialect: bigquery JSON_KEYS(PARSE_JSON('{"a": {"b":1}}'), 1, node => 'lax'); ARRAY; # dialect: bigquery JSON_REMOVE(PARSE_JSON('["a", ["b", "c"], "d"]'), '$[1]', '$[1]'); JSON; # dialect: bigquery JSON_SET(PARSE_JSON('{"a": 1}'), '$', PARSE_JSON('{"b": 2, "c": 3}')); JSON; # dialect: bigquery JSON_SET(PARSE_JSON('{"a": 1}'), '$.b', 999, create_if_missing => FALSE); JSON; # dialect: bigquery JSON_STRIP_NULLS(PARSE_JSON('[1, null, 2, null, [null]]')); JSON; # dialect: bigquery JSON_STRIP_NULLS(PARSE_JSON('[1, null, 2, null]'), include_arrays => FALSE); JSON; # dialect: bigquery JSON_STRIP_NULLS(PARSE_JSON('{"a": {"b": {"c": null}}, "d": [null], "e": [], "f": 1}'), include_arrays => FALSE, remove_empty => TRUE); JSON; # dialect: bigquery LAX_BOOL(PARSE_JSON('true')); BOOLEAN; # dialect: bigquery LAX_FLOAT64(PARSE_JSON('9.8')); DOUBLE; # dialect: bigquery LAX_INT64(PARSE_JSON('10')); BIGINT; # dialect: bigquery LAX_STRING(PARSE_JSON('"str"')); STRING; # dialect: bigquery TO_JSON_STRING(STRUCT(1 AS id, [10, 20] AS cords)); STRING; # dialect: bigquery TO_JSON(STRUCT(1 AS id, [10, 20] AS cords)); JSON; # dialect: bigquery ABS(CAST(-1 AS INT64)); INT64; # dialect: bigquery ABS(CAST(-1 AS NUMERIC)); NUMERIC; # dialect: bigquery ABS(CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery ABS(CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery IS_INF(1); BOOLEAN; # dialect: bigquery IS_NAN(1); BOOLEAN; # dialect: bigquery CBRT(27); DOUBLE; # dialect: bigquery RAND(); DOUBLE; # dialect: bigquery ACOS(0.5); DOUBLE; # dialect: bigquery ACOSH(0.5); DOUBLE; # dialect: bigquery ASIN(1); DOUBLE; # dialect: bigquery ASINH(1); DOUBLE; # dialect: bigquery ATAN(0.5); DOUBLE; # dialect: bigquery ATANH(0.5); DOUBLE; # dialect: bigquery ATAN2(0.5, 0.3); DOUBLE; # dialect: bigquery COT(1); DOUBLE; # dialect: bigquery COTH(1); DOUBLE; # dialect: bigquery CSC(1); DOUBLE; # dialect: bigquery CSCH(1); DOUBLE; # dialect: bigquery SEC(1); DOUBLE; # dialect: bigquery SECH(1); DOUBLE; # dialect: bigquery SIN(1); DOUBLE; # dialect: bigquery SINH(1); DOUBLE; # dialect: bigquery COSINE_DISTANCE([1.0, 2.0], [3.0, 4.0]); DOUBLE; #dialect: bigquery EUCLIDEAN_DISTANCE([1.0, 2.0], [3.0, 4.0]); DOUBLE; # dialect: bigquery RANGE_BUCKET(20, [0, 10, 20, 30, 40]); BIGINT; # dialect: bigquery SAFE_ADD(CAST(1 AS INT64), CAST(1 AS NUMERIC)); NUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS INT64), CAST(1 AS INT64)); INT64; # dialect: bigquery SAFE_ADD(CAST(1 AS INT64), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS INT64), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_ADD(CAST(1 AS NUMERIC), CAST(1 AS INT64)); NUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS NUMERIC), CAST(1 AS NUMERIC)); NUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS NUMERIC), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS NUMERIC), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_ADD(CAST(1 AS BIGNUMERIC), CAST(1 AS INT64)); BIGNUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS BIGNUMERIC), CAST(1 AS NUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS BIGNUMERIC), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_ADD(CAST(1 AS BIGNUMERIC), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_ADD(CAST(1 AS FLOAT64), CAST(1 AS INT64)); FLOAT64; # dialect: bigquery SAFE_ADD(CAST(1 AS FLOAT64), CAST(1 AS NUMERIC)); FLOAT64; # dialect: bigquery SAFE_ADD(CAST(1 AS FLOAT64), CAST(1 AS BIGNUMERIC)); FLOAT64; # dialect: bigquery SAFE_ADD(CAST(1 AS FLOAT64), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS INT64), CAST(1 AS INT64)); INT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS INT64), CAST(1 AS NUMERIC)); NUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS INT64), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS INT64), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS NUMERIC), CAST(1 AS INT64)); NUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS NUMERIC), CAST(1 AS NUMERIC)); NUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS NUMERIC), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS NUMERIC), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS BIGNUMERIC), CAST(1 AS INT64)); BIGNUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS BIGNUMERIC), CAST(1 AS NUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS BIGNUMERIC), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS BIGNUMERIC), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS FLOAT64), CAST(1 AS INT64)); FLOAT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS FLOAT64), CAST(1 AS NUMERIC)); FLOAT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS FLOAT64), CAST(1 AS BIGNUMERIC)); FLOAT64; # dialect: bigquery SAFE_MULTIPLY(CAST(1 AS FLOAT64), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS INT64), CAST(1 AS INT64)); INT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS INT64), CAST(1 AS NUMERIC)); NUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS INT64), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS INT64), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS NUMERIC), CAST(1 AS INT64)); NUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS NUMERIC), CAST(1 AS NUMERIC)); NUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS NUMERIC), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS NUMERIC), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS BIGNUMERIC), CAST(1 AS INT64)); BIGNUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS BIGNUMERIC), CAST(1 AS NUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS BIGNUMERIC), CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS BIGNUMERIC), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS FLOAT64), CAST(1 AS INT64)); FLOAT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS FLOAT64), CAST(1 AS NUMERIC)); FLOAT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS FLOAT64), CAST(1 AS BIGNUMERIC)); FLOAT64; # dialect: bigquery SAFE_SUBTRACT(CAST(1 AS FLOAT64), CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_NEGATE(CAST(1 AS FLOAT64)); FLOAT64; # dialect: bigquery SAFE_NEGATE(CAST(1 AS NUMERIC)); NUMERIC; # dialect: bigquery SAFE_NEGATE(CAST(1 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery STRING_AGG(tbl.str_col); STRING; # dialect: bigquery STRING_AGG(tbl.bin_col); BINARY; # dialect: bigquery STRING_AGG(DISTINCT tbl.str_col); STRING; # dialect: bigquery STRING_AGG(tbl.str_col ORDER BY tbl.str_col); STRING; # dialect: bigquery STRING_AGG(DISTINCT tbl.str_col, ',' ORDER BY tbl.str_col); STRING; # dialect: bigquery STRING_AGG(DISTINCT tbl.bin_col ORDER BY tbl.bin_col); BINARY; # dialect: bigquery STRING_AGG(tbl.str_col, ',' LIMIT 10); STRING; # dialect: bigquery STRING_AGG(tbl.str_col, ',' ORDER BY tbl.str_col LIMIT 10); STRING; # dialect: bigquery STRING_AGG(DISTINCT tbl.str_col, ',' ORDER BY tbl.str_col LIMIT 10); STRING; # dialect: bigquery STRING_AGG(DISTINCT tbl.bin_col ORDER BY tbl.bin_col LIMIT 10); BINARY; # dialect: bigquery ARRAY_AGG(tbl.int_col LIMIT 10); ARRAY; # dialect: bigquery ARRAY_AGG(DISTINCT tbl.str_col ORDER BY tbl.str_col LIMIT 10); ARRAY; # dialect: bigquery DATETIME_TRUNC(DATETIME "2008-12-25 15:30:00", DAY); DATETIME; # dialect: bigquery DATETIME_TRUNC(TIMESTAMP "2008-12-25 15:30:00", DAY); TIMESTAMP; # dialect: bigquery GENERATE_UUID(); STRING; # dialect: bigquery STRUCT(tbl.str_col); STRUCT; # dialect: bigquery LENGTH(tbl.str_col); BIGINT; # dialect: bigquery LENGTH(tbl.bin_col); BIGINT; # dialect: bigquery IF(TRUE, '2010-01-01', DATE '2020-02-02'); DATE; # dialect: bigquery IF(TRUE, DATETIME '2010-01-01 00:00:00', '2020-02-02 00:00:00'); DATETIME; # dialect: bigquery IF(TRUE, '00:00:00', TIME '00:01:00'); TIME; # dialect: bigquery IF(TRUE, 1, CAST(2.5 AS BIGNUMERIC)); BIGNUMERIC; # dialect: bigquery IF(TRUE, 1.5, 2.5); FLOAT64; # dialect: bigquery IF(TRUE, '2010-01-01 00:00:00', TIMESTAMP '2020-02-02 00:00:00'); TIMESTAMP; # dialect: bigquery COALESCE('2010-01-01', DATE '2020-02-02'); DATE; # dialect: bigquery COALESCE(DATETIME '2010-01-01 00:00:00', '2020-02-02 00:00:00'); DATETIME; # dialect: bigquery IFNULL('00:00:00', TIME '00:01:00'); TIME; # dialect: bigquery IFNULL(TIMESTAMP '2010-01-01 00:00:00', '2020-02-02 00:00:00'); TIMESTAMP; # dialect: bigquery ANY_VALUE(c2::STRING HAVING MIN c1::INT64); STRING; # dialect: bigquery ANY_VALUE(c2::STRING HAVING MAX c1::INT64); STRING; # dialect: bigquery r'a'; STRING; # dialect: bigquery DATE_ADD(DATE '2008-12-25', INTERVAL 5 DAY); DATE; # dialect: bigquery DATE_ADD(DATE '2008-12-25', INTERVAL 2 WEEK); DATE; # dialect: bigquery DATE_ADD(DATE '2008-12-25', INTERVAL 3 MONTH); DATE; # dialect: bigquery DATE_ADD(DATE '2008-12-25', INTERVAL 1 QUARTER); DATE; # dialect: bigquery DATE_ADD(DATE '2008-12-25', INTERVAL 2 YEAR); DATE; # dialect: bigquery DATE_ADD(TIMESTAMP '2008-12-25 15:30:00', INTERVAL 5 DAY); TIMESTAMP; # dialect: bigquery DATE_ADD(TIMESTAMP '2008-12-25 15:30:00', INTERVAL 2 HOUR); TIMESTAMP; # dialect: bigquery DATE_ADD(TIMESTAMP '2008-12-25 15:30:00', INTERVAL 30 MINUTE); TIMESTAMP; # dialect: bigquery DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 5 DAY); DATETIME; # dialect: bigquery DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 2 WEEK); DATETIME; # dialect: bigquery DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 3 MONTH); DATETIME; # dialect: bigquery DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 1 QUARTER); DATETIME; # dialect: bigquery DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 2 YEAR); DATETIME; # dialect: bigquery DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 2 HOUR); DATETIME; # dialect: bigquery DATE_ADD(DATETIME '2008-12-25 15:30:00', INTERVAL 30 MINUTE); DATETIME; # dialect: bigquery UNIX_DATE(tbl.date_col); BIGINT; -------------------------------------- -- Snowflake -------------------------------------- # dialect: snowflake ABS(tbl.bigint_col); BIGINT; # dialect: snowflake ABS(tbl.double_col); DOUBLE; # dialect: snowflake ADD_MONTHS(tbl.date_col, 2); DATE; # dialect: snowflake ADD_MONTHS(tbl.timestamp_col, -1); TIMESTAMP; # dialect: snowflake ARRAY_CONSTRUCT(); ARRAY; # dialect: snowflake ARRAY_CONSTRUCT_COMPACT(); ARRAY; # dialect: snowflake ARRAY_CONSTRUCT_COMPACT(1, null, 2); ARRAY; # dialect: snowflake ARRAY_COMPACT([1, null, 2]); ARRAY; # dialect: snowflake ARRAY_APPEND([1, 2, 3], 4); ARRAY; # dialect: snowflake ARRAY_CAT([1, 2], [3, 4]); ARRAY; # dialect: snowflake ARRAY_PREPEND([2, 3, 4], 1); ARRAY; # dialect: snowflake ARRAY_REMOVE([1, 2, 3], 2); ARRAY; # dialect: snowflake ARRAYS_ZIP([1, 2], [3, 4]); ARRAY; # dialect: snowflake ASIN(tbl.double_col); DOUBLE; # dialect: snowflake ASINH(tbl.double_col); DOUBLE; # dialect: snowflake ATAN(tbl.double_col); DOUBLE; # dialect: snowflake ATAN2(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake ATANH(tbl.double_col); DOUBLE; # dialect: snowflake CBRT(tbl.double_col); DOUBLE; # dialect: snowflake CBRT(tbl.decfloat_col); DECFLOAT; # dialect: snowflake CBRT(tbl.int_col); DOUBLE; # dialect: snowflake COVAR_POP(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake COVAR_SAMP(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake COVAR_POP(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake COVAR_SAMP(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake AI_AGG('foo', 'bar'); VARCHAR; # dialect: snowflake AI_AGG(null, 'bar'); VARCHAR; # dialect: snowflake AI_SUMMARIZE_AGG('foo'); VARCHAR; # dialect: snowflake AI_SUMMARIZE_AGG(null); VARCHAR; # dialect: snowflake AI_CLASSIFY('text', ['travel', 'cooking']); VARCHAR; # dialect: snowflake AI_CLASSIFY('text', ['travel', 'cooking'], {'output_mode': 'multi'}); VARCHAR; # dialect: snowflake ASCII('A'); INT; # dialect: snowflake ASCII(''); INT; # dialect: snowflake ASCII(NULL); INT; # dialect: snowflake BASE64_DECODE_BINARY('SGVsbG8='); BINARY; # dialect: snowflake BASE64_DECODE_STRING('SGVsbG8gV29ybGQ='); VARCHAR; # dialect: snowflake BASE64_DECODE_STRING('SGVsbG8gV29ybGQ=', '+/='); VARCHAR; # dialect: snowflake BASE64_ENCODE(tbl.bin_col); VARCHAR; # dialect: snowflake BASE64_ENCODE('Hello World'); VARCHAR; # dialect: snowflake BASE64_ENCODE('Hello World', 76); VARCHAR; # dialect: snowflake BASE64_ENCODE('Hello World', 76, '+/='); VARCHAR; # dialect: snowflake BIT_LENGTH('abc'); INT; # dialect: snowflake BITMAP_BIT_POSITION(tbl.int_col); BIGINT; # dialect: snowflake BITMAP_BUCKET_NUMBER(tbl.int_col); BIGINT; # dialect: snowflake BITMAP_CONSTRUCT_AGG(tbl.int_col); BINARY; # dialect: snowflake BITMAP_COUNT(BITMAP_CONSTRUCT_AGG(tbl.int_col)); BIGINT; # dialect: snowflake BIT_LENGTH(tbl.str_col); INT; # dialect: snowflake BIT_LENGTH(tbl.bin_col); INT; # dialect: snowflake BITNOT(5); INT; # dialect: snowflake BITNOT(tbl.bin_col); BINARY; # dialect: snowflake BIT_NOT(5); INT; # dialect: snowflake BITAND(2, 4); INT; # dialect: snowflake BITAND(tbl.bin_col, tbl.bin_col); BINARY; # dialect: snowflake BIT_AND(2, 4); INT; # dialect: snowflake BITOR(2, 4); INT; # dialect: snowflake BITOR(tbl.bin_col, tbl.bin_col); BINARY; # dialect: snowflake BITSHIFTLEFT(2, 1); INT; # dialect: snowflake BITSHIFTLEFT(tbl.bin_col, 4); BINARY; # dialect: snowflake BITSHIFTRIGHT(24, 1); INT; # dialect: snowflake BITSHIFTRIGHT(tbl.bin_col, 4); BINARY; # dialect: snowflake BITXOR(5, 3); INT; # dialect: snowflake BITXOR(tbl.bin_col, tbl.bin_col); BINARY; # dialect: snowflake BITANDAGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BITAND_AGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BIT_AND_AGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BIT_ANDAGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BITORAGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BITOR_AGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BIT_OR_AGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BIT_ORAGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BITXORAGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BITXOR_AGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BIT_XOR_AGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BIT_XORAGG(tbl.int_col); NUMBER(38, 0); # dialect: snowflake BITMAP_OR_AGG(tbl.bin_col); BINARY; # dialect: snowflake BOOLXOR_AGG(tbl.bool_col); BOOLEAN; # dialect: snowflake BOOLNOT(tbl.int_col); BOOLEAN; # dialect: snowflake BOOLNOT(NULL); BOOLEAN; # dialect: snowflake BOOLAND(1, -2); BOOLEAN; # dialect: snowflake BOOLOR(1, 0); BOOLEAN; # dialect: snowflake BOOLXOR(2, 0); BOOLEAN; # dialect: snowflake BOOLAND_AGG(tbl.bool_col); BOOLEAN; # dialect: snowflake BOOLOR_AGG(tbl.bool_col); BOOLEAN; # dialect: snowflake TO_BOOLEAN('true'); BOOLEAN; # dialect: snowflake TO_BOOLEAN(1); BOOLEAN; # dialect: snowflake TO_BOOLEAN(tbl.varchar_col); BOOLEAN; # dialect: snowflake ARRAY_AGG(tbl.bin_col); ARRAY; # dialect: snowflake ARRAY_AGG(tbl.bool_col); ARRAY; # dialect: snowflake ARRAY_AGG(tbl.date_col); ARRAY; # dialect: snowflake ARRAY_AGG(tbl.double_col); ARRAY; # dialect: snowflake ARRAY_AGG(tbl.str_col); ARRAY; # dialect: snowflake ARRAY_UNIQUE_AGG(tbl.bin_col); ARRAY; # dialect: snowflake ARRAY_UNIQUE_AGG(tbl.bool_col); ARRAY; # dialect: snowflake ARRAY_UNIQUE_AGG(tbl.date_col); ARRAY; # dialect: snowflake ARRAY_UNIQUE_AGG(tbl.double_col); ARRAY; # dialect: snowflake ARRAY_UNIQUE_AGG(tbl.str_col); ARRAY; # dialect: snowflake ARRAY_UNION_AGG(tbl.array_col); ARRAY; # dialect: snowflake CHARINDEX('world', 'hello world'); INT; # dialect: snowflake CHARINDEX('world', 'hello world', 1); INT; # dialect: snowflake CASE WHEN score >= 90 THEN 100 WHEN score >= 80 THEN 220 END; INT; # dialect: snowflake CASE WHEN score >= 90 THEN 'A' WHEN score >= 80 THEN 'B' ELSE 'C' END; VARCHAR; # dialect: snowflake CASE WHEN score >= 90 THEN TRUE WHEN score >= 80 THEN FALSE ELSE NULL END; BOOLEAN; # dialect: snowflake CEIL(3.14); DOUBLE; # dialect: snowflake CEIL(3.14::FLOAT, 1); FLOAT; # dialect: snowflake CEIL(3.14, 1); DOUBLE; # dialect: snowflake CEIL(10::NUMERIC); NUMBER; # dialect: snowflake CHAR(65); VARCHAR; # dialect: snowflake CHR(8364); VARCHAR; # dialect: snowflake CHECK_JSON('{"key": "value", "array": [1, 2, 3]}'); VARCHAR; # dialect: snowflake CHECK_XML('value'); VARCHAR; # dialect: snowflake CHECK_XML('value', TRUE); VARCHAR; # dialect: snowflake COLLATE('hello', 'utf8'); VARCHAR; # dialect: snowflake COSH(1.5); DOUBLE; # dialect: snowflake COALESCE(42, 0, 100); INT; # dialect: snowflake COALESCE(1.5, 2.7); DOUBLE; # dialect: snowflake COALESCE(1::BIGINT, 2::BIGINT); BIGINT; # dialect: snowflake COALESCE('hello', 'world'); VARCHAR; # dialect: snowflake COALESCE(CAST('2024-01-01' AS DATE), CAST('2024-12-31' AS DATE)); DATE; # dialect: snowflake CAST(1.5 AS DECFLOAT); DECFLOAT; # dialect: snowflake CAST(1 AS VARCHAR); VARCHAR; # dialect: snowflake CAST('123' AS INT); INT; # dialect: snowflake COALESCE(TRUE, FALSE); BOOLEAN; # dialect: snowflake COUNT(*); BIGINT; # dialect: snowflake COUNT(DISTINCT tbl.str_col); BIGINT; # dialect: snowflake COMPRESS('Hello World', 'SNAPPY'); BINARY; # dialect: snowflake COMPRESS('Hello World', 'zlib(1)'); BINARY; # dialect: snowflake DATE_PART('year', tbl.date_col); INT; # dialect: snowflake DATE_PART('month', tbl.timestamp_col); INT; # dialect: snowflake DATE_PART('day', tbl.date_col); INT; # dialect: snowflake DATEADD(HOUR, 3, TO_TIME('05:00:00')); TIME; # dialect: snowflake DATEADD(YEAR, 1, TO_TIMESTAMP('2022-05-08 14:30:00')); TIMESTAMP; # dialect: snowflake DATEADD(MONTH, 1, '2023-01-31'::DATE); DATE; # dialect: snowflake DATEADD(HOUR, 2, '2022-04-05'::DATE); TIMESTAMPNTZ; # dialect: snowflake DEGREES(PI()/3); DOUBLE; # dialect: snowflake DEGREES(1); DOUBLE; # dialect: snowflake DATE_FROM_PARTS(1977, 8, 7); DATE; # dialect: snowflake DECOMPRESS_BINARY('compressed_data', 'SNAPPY'); BINARY; # dialect: snowflake DECOMPRESS_STRING('compressed_data', 'ZSTD'); VARCHAR; # dialect: snowflake DIV0(10, 0); DOUBLE; # dialect: snowflake DIV0(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake DIV0NULL(10, 0); DOUBLE; # dialect: snowflake DIV0NULL(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake LPAD('Hello', 10, '*'); VARCHAR; # dialect: snowflake LPAD(tbl.str_col, 10); VARCHAR; # dialect: snowflake LPAD(tbl.bin_col, 10, 0x20); BINARY; # dialect: snowflake RPAD('Hello', 10, '*'); VARCHAR; # dialect: snowflake RPAD(tbl.str_col, 10); VARCHAR; # dialect: snowflake RPAD(tbl.bin_col, 10, 0x20); BINARY; # dialect: snowflake COLLATION('hello'); VARCHAR; # dialect: snowflake COT(tbl.double_col); DOUBLE; # dialect: snowflake COS(tbl.double_col); DOUBLE; # dialect: snowflake CONCAT('Hello', 'World!'); VARCHAR; # dialect: snowflake CONCAT(tbl.str_col, tbl.str_col, tbl.str_col); VARCHAR; # dialect: snowflake CONCAT_WS(':', 'one'); VARCHAR; # dialect: snowflake CONCAT_WS(',', 'one', 'two', 'three'); VARCHAR; # dialect: snowflake CONCAT_WS(tbl.bin_col, tbl.bin_col); BINARY; # dialect: snowflake CONTAINS('hello world', 'world'); BOOLEAN; # dialect: snowflake CONTAINS(tbl.str_col, 'test'); BOOLEAN; # dialect: snowflake CONTAINS(tbl.bin_col, tbl.bin_col); BOOLEAN; # dialect: snowflake CONTAINS(tbl.bin_col, NULL); BOOLEAN; # dialect: snowflake CONVERT_TIMEZONE('America/New_York', '2024-08-06 09:10:00.000'); TIMESTAMPTZ; # dialect: snowflake CONVERT_TIMEZONE('America/Los_Angeles', 'America/New_York', '2024-08-06 09:10:00.000'); TIMESTAMPNTZ; # dialect: snowflake CURRENT_ACCOUNT(); VARCHAR; # dialect: snowflake CURRENT_ACCOUNT_NAME(); VARCHAR; # dialect: snowflake CURRENT_AVAILABLE_ROLES(); VARCHAR; # dialect: snowflake CURRENT_CLIENT(); VARCHAR; # dialect: snowflake CURRENT_IP_ADDRESS(); VARCHAR; # dialect: snowflake CURRENT_DATABASE(); VARCHAR; # dialect: snowflake CURRENT_SCHEMAS(); VARCHAR; # dialect: snowflake CURRENT_SECONDARY_ROLES(); VARCHAR; # dialect: snowflake CURRENT_SESSION(); VARCHAR; # dialect: snowflake CURRENT_STATEMENT(); VARCHAR; # dialect: snowflake CURRENT_VERSION(); VARCHAR; # dialect: snowflake CURRENT_TRANSACTION(); VARCHAR; # dialect: snowflake CURRENT_WAREHOUSE(); VARCHAR; # dialect: snowflake CURRENT_ORGANIZATION_USER(); VARCHAR; # dialect: snowflake CURRENT_REGION(); VARCHAR; # dialect: snowflake CURRENT_ROLE(); VARCHAR; # dialect: snowflake CURRENT_ROLE_TYPE(); VARCHAR; # dialect: snowflake CURRENT_ORGANIZATION_NAME(); VARCHAR; # dialect: snowflake DATEDIFF('year', tbl.date_col, tbl.date_col); INT; # dialect: snowflake DATEDIFF('month', tbl.timestamp_col, tbl.timestamp_col); INT; # dialect: snowflake TIMESTAMPDIFF('year', tbl.date_col, tbl.date_col); INT; # dialect: snowflake TIMESTAMPDIFF('month', tbl.timestamp_col, tbl.timestamp_col); INT; # dialect: snowflake TIMEDIFF('year', tbl.date_col, tbl.date_col); INT; # dialect: snowflake TIMEDIFF('month', tbl.timestamp_col, tbl.timestamp_col); INT; # dialect: snowflake DATE_TRUNC('year', TO_DATE('2024-05-09')); DATE; # dialect: snowflake DATE_TRUNC('minute', TO_TIME('08:50:48')); TIME; # dialect: snowflake DATE_TRUNC('minute', TO_TIMESTAMP('2024-05-09 08:50:57.891')); TIMESTAMP; # dialect: snowflake TIMESTAMP_FROM_PARTS(2024, 5, 9, 14, 30, 45); TIMESTAMP; # dialect: snowflake TIMESTAMP_FROM_PARTS(2024, 5, 9, 14, 30, 45, 123); TIMESTAMP; # dialect: snowflake TIMESTAMP_FROM_PARTS(CAST('2024-05-09' AS DATE), CAST('14:30:45' AS TIME)); TIMESTAMP; # dialect: snowflake TIMESTAMPFROMPARTS(2024, 5, 9, 14, 30, 45); TIMESTAMP; # dialect: snowflake TIMESTAMPFROMPARTS(CAST('2024-05-09' AS DATE), CAST('14:30:45' AS TIME)); TIMESTAMP; # dialect: snowflake TIMESTAMP_LTZ_FROM_PARTS(2024, 5, 9, 14, 30, 45); TIMESTAMPLTZ; # dialect: snowflake TIMESTAMP_LTZ_FROM_PARTS(2024, 5, 9, 14, 30, 45, 123); TIMESTAMPLTZ; # dialect: snowflake TIMESTAMP_NTZ_FROM_PARTS(2024, 5, 9, 14, 30, 45); TIMESTAMP; # dialect: snowflake TIMESTAMP_NTZ_FROM_PARTS(2024, 5, 9, 14, 30, 45, 123); TIMESTAMP; # dialect: snowflake TIMESTAMP_TZ_FROM_PARTS(2024, 5, 9, 14, 30, 45, 123, 'UTC'); TIMESTAMPTZ; # dialect: snowflake TIMESTAMP_TZ_FROM_PARTS(2024, 5, 9, 14, 30, 45, 123); TIMESTAMPTZ; # dialect: snowflake EDITDISTANCE('hello', 'world'); INT; # dialect: snowflake EDITDISTANCE(tbl.str_col, 'test'); INT; # dialect: snowflake EDITDISTANCE('hello', 'world', 3); INT; # dialect: snowflake EQUAL_NULL(1, 2); BOOLEAN; # dialect: snowflake EXTRACT(YEAR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(QUARTER FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(MONTH FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(WEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(WEEKISO FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(DAY FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(DAYOFMONTH FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(DAYOFWEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(DAYOFWEEKISO FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(DAYOFYEAR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(YEAROFWEEK FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(YEAROFWEEKISO FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(HOUR FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(MINUTE FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(SECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); INT; # dialect: snowflake EXTRACT(NANOSECOND FROM CAST('2026-01-06 11:45:00.123456789' AS TIMESTAMP_NTZ)); BIGINT; # dialect: snowflake EXTRACT(EPOCH_SECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); BIGINT; # dialect: snowflake EXTRACT(EPOCH_MILLISECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); BIGINT; # dialect: snowflake EXTRACT(EPOCH_MICROSECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); BIGINT; # dialect: snowflake EXTRACT(EPOCH_NANOSECOND FROM CAST('2026-01-06 11:45:00' AS TIMESTAMP_NTZ)); BIGINT; # dialect: snowflake EXTRACT(YEAR FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(QUARTER FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(MONTH FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(WEEK FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(WEEKISO FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(DAY FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(DAYOFMONTH FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(DAYOFWEEK FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(DAYOFWEEKISO FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(DAYOFYEAR FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(YEAROFWEEK FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(YEAROFWEEKISO FROM CAST('2026-01-06' AS DATE)); INT; # dialect: snowflake EXTRACT(HOUR FROM CAST('11:45:00.123456789' AS TIME)); INT; # dialect: snowflake EXTRACT(MINUTE FROM CAST('11:45:00.123456789' AS TIME)); INT; # dialect: snowflake EXTRACT(SECOND FROM CAST('11:45:00.123456789' AS TIME)); INT; # dialect: snowflake YEAR(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake YEAR(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake YEAROFWEEK(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake YEAROFWEEK(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake YEAROFWEEKISO(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake YEAROFWEEKISO(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake DAY(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake DAY(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake DAYOFMONTH(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake DAYOFMONTH(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake DAYOFWEEK(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake DAYOFWEEK(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake DAYOFWEEKISO(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake DAYOFWEEKISO(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake DAYOFYEAR(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake DAYOFYEAR(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake WEEK(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake WEEK(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake WEEKOFYEAR(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake WEEKOFYEAR(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake WEEKISO(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake WEEKISO(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake MONTH(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake MONTH(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake QUARTER(CAST('2024-05-09' AS DATE)); TINYINT; # dialect: snowflake QUARTER(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); TINYINT; # dialect: snowflake EXP(1); DOUBLE; # dialect: snowflake EXP(5.5); DOUBLE; # dialect: snowflake FACTORIAL(5); BIGINT; # dialect: snowflake FLOOR(42); INT; # dialect: snowflake FLOOR(135.135, 1); DOUBLE; # dialect: snowflake FLOOR(tbl.bigint_col, -1); BIGINT; # dialect: snowflake GETBIT(11, 3); INT; # dialect: snowflake GROUPING(tbl.str_col); INT; # dialect: snowflake GROUPING(tbl.bigint_col); INT; # dialect: snowflake GROUPING_ID(tbl.str_col); BIGINT; # dialect: snowflake GROUPING_ID(tbl.bigint_col, tbl.str_col); BIGINT; # dialect: snowflake GREATEST(tbl.bigint_col, tbl.bigint_col); BIGINT; # dialect: snowflake GREATEST(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake GREATEST(tbl.str_col, tbl.str_col); VARCHAR; # dialect: snowflake GREATEST(tbl.double_col, tbl.bigint_col); DOUBLE; # dialect: snowflake GREATEST(tbl.bigint_col, tbl.double_col); DOUBLE; # dialect: snowflake ENDSWITH('hello world', 'world'); BOOLEAN; # dialect: snowflake ENDSWITH(tbl.str_col, 'test'); BOOLEAN; # dialect: snowflake ENDSWITH(tbl.bin_col, tbl.bin_col); BOOLEAN; # dialect: snowflake ENDSWITH(tbl.bin_col, NULL); BOOLEAN; # dialect: snowflake GREATEST_IGNORE_NULLS(1, 2, 3); INT; # dialect: snowflake GREATEST_IGNORE_NULLS(1, 2.5, 3); DOUBLE; # dialect: snowflake GREATEST_IGNORE_NULLS('a', 'b', 'c'); VARCHAR; # dialect: snowflake GREATEST_IGNORE_NULLS(CAST('2023-01-01' AS DATE), CAST('2023-01-02' AS DATE)); DATE; # dialect: snowflake HASH_AGG(tbl.str_col); DECIMAL(19, 0); # dialect: snowflake LEAST_IGNORE_NULLS(1, 2, 3); INT; # dialect: snowflake LEAST_IGNORE_NULLS(1, 2.5, 3); DOUBLE; # dialect: snowflake LEAST_IGNORE_NULLS('a', 'b', 'c'); VARCHAR; # dialect: snowflake LEAST_IGNORE_NULLS(CAST('2023-01-01' AS DATE), CAST('2023-01-02' AS DATE)); DATE; # dialect: snowflake HEX_DECODE_BINARY('48656C6C6F'); BINARY; # dialect: snowflake HEX_DECODE_STRING('48656C6C6F'); VARCHAR; # dialect: snowflake HEX_ENCODE('Hello World'); VARCHAR; # dialect: snowflake HEX_ENCODE('Hello World', 'upper'); VARCHAR; # dialect: snowflake HEX_ENCODE('Hello World', 'lower'); VARCHAR; # dialect: snowflake HOUR(CAST('08:50:57' AS TIME)); INT; # dialect: snowflake INITCAP('hello world'); VARCHAR; # dialect: snowflake INITCAP('hello world', ' '); VARCHAR; # dialect: snowflake INITCAP(tbl.str_col); VARCHAR; # dialect: snowflake IFF(TRUE, 42, 0); INT; # dialect: snowflake IFF(TRUE, 42, NULL); INT; # dialect: snowflake IFF(col1 > 0, 'yes', 'no'); VARCHAR; # dialect: snowflake IFF(FALSE, 1.5, 2.7); DOUBLE; # dialect: snowflake IFF(TRUE, CAST('2024-01-01' AS DATE), CAST('2024-12-31' AS DATE)); DATE; # dialect: snowflake IFNULL('hello', 'world'); VARCHAR; # dialect: snowflake IFNULL(1, 2); INT; # dialect: snowflake IFNULL(1.5, 2.7); DOUBLE; # dialect: snowflake IFNULL(5::BIGINT, 10::BIGINT); BIGINT; # dialect: snowflake IFNULL(CAST('2024-01-01' AS DATE), CAST('2024-12-31' AS DATE)); DATE; # dialect: snowflake IFNULL(5::BIGINT, 2.71::FLOAT); FLOAT; # dialect: snowflake IS_NULL_VALUE(payload:field); BOOLEAN; # dialect: snowflake 1 IN (1, 2, 3); BOOLEAN; # dialect: snowflake 1 NOT IN (1, 2, 3); BOOLEAN; # dialect: snowflake JAROWINKLER_SIMILARITY('hello', 'world'); INT; # dialect: duckdb JARO_WINKLER_SIMILARITY('hello', 'world'); DOUBLE; # dialect: snowflake INSERT('abc', 1, 2, 'Z'); VARCHAR; # dialect: snowflake INSERT(tbl.bin_col, 1, 2, tbl.bin_col); BINARY; # dialect: snowflake KURTOSIS(tbl.double_col); DOUBLE; # dialect: snowflake KURTOSIS(tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake KURTOSIS(tbl.float_col); DOUBLE; # dialect: snowflake KURTOSIS(tbl.float_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake KURTOSIS(tbl.int_col); NUMBER(38, 12); # dialect: snowflake KURTOSIS(tbl.int_col) OVER (PARTITION BY 1); NUMBER(38, 12); # dialect: snowflake KURTOSIS(tbl.decfloat_col); DECFLOAT; # dialect: snowflake KURTOSIS(tbl.decfloat_col) OVER (PARTITION BY 1); DECFLOAT; # dialect: snowflake LEAST(x::DECIMAL(18, 2)); DECIMAL(18, 2); # dialect: snowflake LEFT('hello world', 5); VARCHAR; # dialect: snowflake LEFT(tbl.str_col, 3); STRING; # dialect: snowflake LEFT(tbl.bin_col, 3); BINARY; # dialect: snowflake LEFT(tbl.bin_col, NULL); BINARY; # dialect: snowflake LAST_DAY(CAST('2024-05-09' AS DATE)); DATE; # dialect: snowflake LAST_DAY(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); DATE; # dialect: snowflake LAST_DAY(CAST('2024-02-15' AS DATE), MONTH); DATE; # dialect: snowflake LEN(tbl.str_col); INT; # dialect: snowflake LEN(tbl.bin_col); INT; # dialect: snowflake LOCALTIMESTAMP; TIMESTAMPLTZ; # dialect: snowflake LOCALTIMESTAMP(); TIMESTAMPLTZ; # dialect: snowflake LOCALTIMESTAMP(3); TIMESTAMPLTZ; # dialect: snowflake OCTET_LENGTH(tbl.str_col); INT; # dialect: snowflake OCTET_LENGTH(tbl.bin_col); INT; # dialect: snowflake PARSE_URL('https://example.com/path'); OBJECT; # dialect: snowflake PARSE_URL(tbl.str_col, 0); OBJECT; # dialect: snowflake POSITION('abc' IN 'abcdef'); INT; # dialect: snowflake POSITION('abc', 'abcdef'); INT; # dialect: snowflake POSITION('abc', 'abcdef', 1); INT; # dialect: snowflake PREVIOUS_DAY(CAST('2024-05-09' AS DATE), 'MONDAY'); DATE; # dialect: snowflake PREVIOUS_DAY(CAST('2024-05-09 08:50:57' AS TIMESTAMP), 'MONDAY'); DATE; # dialect: snowflake DECODE(x, 1, 100, 2, 200, 0); INT; # dialect: snowflake DECODE(status, 'A', 'Active', 'I', 'Inactive', 'Neither'); VARCHAR; # dialect: snowflake DECODE(100, 100, 1, 90, 2, 5.5); DOUBLE; # dialect: snowflake DECODE(x, 1, 100, NULL); INT; # dialect: snowflake PI(); DOUBLE; # dialect: snowflake POW(tbl.double_col, 2); DOUBLE; # dialect: snowflake RANDOM(); BIGINT; # dialect: snowflake RANDOM(123); BIGINT; # dialect: snowflake RANDSTR(123, 456); VARCHAR; # dialect: snowflake RANDSTR(123, RANDOM()); VARCHAR; # dialect: snowflake RADIANS(tbl.double_col); DOUBLE; # dialect: snowflake LOWER(tbl.str_col); VARCHAR; # dialect: snowflake LN(tbl.double_col); DOUBLE; # dialect: snowflake LOG(tbl.double_col); DOUBLE; # dialect: snowflake LTRIM(' hello world '); VARCHAR; # dialect: snowflake LTRIM(tbl.str_col); VARCHAR; # dialect: snowflake LTRIM(NULL); VARCHAR; # dialect: snowflake MAP_CAT(CAST(col AS MAP(VARCHAR, VARCHAR)), CAST(col AS MAP(VARCHAR, VARCHAR))); MAP; # dialect: snowflake MAP_CONTAINS_KEY('k1', CAST(col AS MAP(VARCHAR, VARCHAR))); BOOLEAN; # dialect: snowflake MAP_DELETE(CAST(col AS MAP(VARCHAR, VARCHAR)), 'b'); MAP; # dialect: snowflake MAP_INSERT(CAST(col AS MAP(VARCHAR, VARCHAR)), 'b', '2'); MAP; # dialect: snowflake MAP_KEYS(CAST(col AS MAP(VARCHAR, VARCHAR))); ARRAY; # dialect: snowflake MAP_PICK(CAST(col AS MAP(VARCHAR, VARCHAR)), 'a', 'c'); MAP; # dialect: snowflake MAP_SIZE(CAST(col AS MAP(VARCHAR, VARCHAR))); INT; # dialect: snowflake MINUTE(CAST('08:50:57' AS TIME)); INT; # dialect: snowflake MEDIAN(2.71::FLOAT); FLOAT; # dialect: snowflake MEDIAN(tbl.bigint_col) OVER (PARTITION BY 1); DECIMAL(38, 3); # dialect: snowflake MEDIAN(CAST(100 AS DECIMAL(10,2))); DECIMAL(13, 5); # dialect: snowflake MONTHNAME(CAST('2024-05-09' AS DATE)); VARCHAR; # dialect: snowflake MONTHNAME(CAST('2024-05-09 08:50:57' AS TIMESTAMP)); VARCHAR; # dialect: snowflake NORMAL(0, 1, RANDOM()); DOUBLE; # dialect: snowflake NVL2(col1, col2, col3); UNKNOWN; # dialect: snowflake NVL('hello', 'world'); VARCHAR; # dialect: snowflake NVL(tbl.int_col, 42); INT; # dialect: snowflake NVL(tbl.date_col, CAST('2024-01-01' AS DATE)); DATE; # dialect: snowflake NVL(1, 3.14); DOUBLE; # dialect: snowflake NVL(5::BIGINT, 2.71::FLOAT); FLOAT; # dialect: snowflake NULLIF(1, 2); INT; # dialect: snowflake NULLIF(1.5, 2.7); DOUBLE; # dialect: snowflake NULLIF(5::BIGINT, 10::BIGINT); BIGINT; # dialect: snowflake NULLIF(CAST('2024-01-01' AS DATE), CAST('2024-12-31' AS DATE)); DATE; # dialect: snowflake NULLIF(1::INT, 2::BIGINT); BIGINT; # dialect: snowflake NULLIF(1::INT, 2.5::DOUBLE); DOUBLE; # dialect: snowflake NULLIFZERO(5); INT; # dialect: snowflake NULLIFZERO(5::BIGINT); BIGINT; # dialect: snowflake NULLIFZERO(5.5); DOUBLE; # dialect: snowflake NULLIFZERO(5.5::FLOAT); FLOAT; # dialect: snowflake MOD(tbl.bigint_col, 3); BIGINT; # dialect: snowflake MOD(tbl.double_col, 2.5); DOUBLE; # dialect: snowflake MOD(42, 7); INT; # dialect: snowflake MONTHS_BETWEEN(tbl.date_col, CAST('2019-01-01' AS DATE)); DOUBLE; # dialect: snowflake MONTHS_BETWEEN(tbl.timestamp_col, CAST('2019-02-15 01:00:00' AS TIMESTAMP)); DOUBLE; # dialect: snowflake REGR_AVGX(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_AVGX(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_AVGX(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_AVGY(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_AVGY(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_AVGY(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_COUNT(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_COUNT(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake REGR_COUNT(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_COUNT(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_INTERCEPT(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_INTERCEPT(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake REGR_INTERCEPT(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_INTERCEPT(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_R2(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_R2(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake REGR_R2(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_R2(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_SXX(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_SXX(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake REGR_SXX(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_SXX(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_SXY(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_SXY(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake REGR_SXY(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_SXY(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_SYY(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_SYY(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake REGR_SYY(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_SYY(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_SLOPE(tbl.double_col, tbl.double_col); DOUBLE; # dialect: snowflake REGR_SLOPE(tbl.double_col, tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake REGR_SLOPE(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_SLOPE(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_VALX(NULL, 2.0); DOUBLE; # dialect: snowflake REGR_VALX(NULL, NULL); DOUBLE; # dialect: snowflake REGR_VALX(2.0, NULL); DOUBLE; # dialect: snowflake REGR_VALX(1.0, 2.0); DOUBLE; # dialect: snowflake REGR_VALX(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_VALX(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake REGR_VALY(1.0, 2.0); DOUBLE; # dialect: snowflake REGR_VALY(tbl.int_col, tbl.int_col); DOUBLE; # dialect: snowflake REGR_VALY(tbl.decfloat_col, tbl.decfloat_col); DECFLOAT; # dialect: snowflake 'foo' REGEXP 'bar'; BOOLEAN; # dialect: snowflake 'foo' NOT REGEXP 'bar'; BOOLEAN; # dialect: snowflake 'text123' REGEXP '^[a-z]+[0-9]+$'; BOOLEAN; # dialect: snowflake REGEXP_LIKE('foo', 'bar'); BOOLEAN; # dialect: snowflake REGEXP_LIKE(NULL, 'bar'); BOOLEAN; # dialect: snowflake REGEXP_LIKE('foo', 'bar', 'baz'); BOOLEAN; # dialect: snowflake REGEXP_LIKE('foo', NULL, 'baz'); BOOLEAN; # dialect: snowflake REGEXP_COUNT('hello world', 'l'); DECIMAL(38, 0); # dialect: snowflake REGEXP_COUNT('hello world', 'l', 1); DECIMAL(38, 0); # dialect: snowflake REGEXP_COUNT('hello world', 'l', 1, 'i'); DECIMAL(38, 0); # dialect: snowflake REGEXP_EXTRACT_ALL('hello world', 'world'); ARRAY; # dialect: snowflake REGEXP_EXTRACT_ALL('hello world', 'world', 1); ARRAY; # dialect: snowflake REGEXP_EXTRACT_ALL('hello world', 'world', 1, 1); ARRAY; # dialect: snowflake REGEXP_EXTRACT_ALL('hello world', 'world', 1, 1, 'i'); ARRAY; # dialect: snowflake REGEXP_EXTRACT_ALL('hello world', 'world', 1, 1, 'i', 0); ARRAY; # dialect: snowflake REGEXP_INSTR('hello world', 'world'); DECIMAL(38, 0); # dialect: snowflake REGEXP_INSTR('hello world', 'world', 1, 1, 0); DECIMAL(38, 0); # dialect: snowflake REGEXP_INSTR('hello world', 'world', 1, 1, 0, 'i'); DECIMAL(38, 0); # dialect: snowflake REGEXP_INSTR('hello world', 'world', 1, 1, 0, 'i', 1); DECIMAL(38, 0); # dialect: snowflake REGEXP_REPLACE('hello world', 'world', 'universe'); VARCHAR; # dialect: snowflake REGEXP_REPLACE('hello world', 'world', NULL); VARCHAR; # dialect: snowflake REGEXP_REPLACE('hello world', 'world', 'universe', 1, 1, 'i'); VARCHAR; # dialect: snowflake REGEXP_SUBSTR('hello world', 'world'); VARCHAR; # dialect: snowflake REGEXP_SUBSTR(NULL, 'world'); VARCHAR; # dialect: snowflake REGEXP_SUBSTR('hello world', NULL); VARCHAR; # dialect: snowflake REGEXP_SUBSTR('hello world', 'world', 1); VARCHAR; # dialect: snowflake REGEXP_SUBSTR('hello world', 'world', 1, 1, 'e', NULL); VARCHAR; # dialect: snowflake REGEXP_SUBSTR_ALL('hello world', 'world'); ARRAY; # dialect: snowflake REGEXP_SUBSTR_ALL('hello world', 'world', 1); ARRAY; # dialect: snowflake REGEXP_SUBSTR_ALL('hello world', 'world', 1, 1); ARRAY; # dialect: snowflake REGEXP_SUBSTR_ALL('hello world', 'world', 1, 1, 'i'); ARRAY; # dialect: snowflake REGEXP_SUBSTR_ALL('hello world', 'world', 1, 1, 'i', 0); ARRAY; # dialect: snowflake REPEAT('hello', 3); VARCHAR; # dialect: snowflake REPEAT(tbl.str_col, 2); VARCHAR; # dialect: snowflake REPEAT('hello', NULL); VARCHAR; # dialect: snowflake REPLACE(tbl.str_col, 'old', 'new'); VARCHAR; # dialect: snowflake REPLACE('hello', 'old', NULL); VARCHAR; # dialect: snowflake REVERSE('Hello, world!'); VARCHAR; # dialect: snowflake REVERSE(tbl.str_col); VARCHAR; # dialect: snowflake REVERSE(tbl.bin_col); BINARY; # dialect: snowflake REVERSE(NULL); VARCHAR; # dialect: snowflake ROUND(42); INT; # dialect: snowflake ROUND(tbl.bigint_col, -1); BIGINT; # dialect: snowflake ROUND(tbl.double_col, 0, 'HALF_TO_EVEN'); DOUBLE; # dialect: snowflake ROUND(CAST(3.14 AS FLOAT), 1); FLOAT; # dialect: snowflake ROUND(CAST(1.5 AS DECFLOAT), 0); DECFLOAT; # dialect: snowflake FLOOR(CAST(3.7 AS DECFLOAT)); DECFLOAT; # dialect: snowflake FLOOR(CAST(3.7 AS FLOAT)); FLOAT; # dialect: snowflake FLOOR(tbl.double_col); DOUBLE; # dialect: snowflake CEIL(CAST(3.2 AS DECFLOAT)); DECFLOAT; # dialect: snowflake CEIL(CAST(3.2 AS FLOAT)); FLOAT; # dialect: snowflake CEIL(tbl.double_col); DOUBLE; # dialect: snowflake SQRT(CAST(16 AS DECFLOAT)); DECFLOAT; # dialect: snowflake SQRT(CAST(16 AS DOUBLE)); DOUBLE; # dialect: snowflake EXP(CAST(2 AS DECFLOAT)); DECFLOAT; # dialect: snowflake EXP(CAST(2 AS DOUBLE)); DOUBLE; # dialect: snowflake LN(CAST(10 AS DECFLOAT)); DECFLOAT; # dialect: snowflake LN(CAST(10 AS DOUBLE)); DOUBLE; # dialect: snowflake LOG(CAST(100 AS DECFLOAT), 10); DECFLOAT; # dialect: snowflake LOG(CAST(100 AS DOUBLE), 10); DOUBLE; # dialect: snowflake POW(CAST(2 AS DECFLOAT), 3); DECFLOAT; # dialect: snowflake POW(CAST(2 AS DOUBLE), 3); DOUBLE; # dialect: snowflake SIN(CAST(1.5 AS DECFLOAT)); DECFLOAT; # dialect: snowflake SIN(CAST(1.5 AS DOUBLE)); DOUBLE; # dialect: snowflake COS(CAST(1.5 AS DECFLOAT)); DECFLOAT; # dialect: snowflake COS(CAST(1.5 AS DOUBLE)); DOUBLE; # dialect: snowflake TAN(CAST(1.5 AS DECFLOAT)); DECFLOAT; # dialect: snowflake TAN(CAST(1.5 AS DOUBLE)); DOUBLE; # dialect: snowflake COT(CAST(1.5 AS DECFLOAT)); DECFLOAT; # dialect: snowflake COT(CAST(1.5 AS DOUBLE)); DOUBLE; # dialect: snowflake ASIN(CAST(0.5 AS DECFLOAT)); DECFLOAT; # dialect: snowflake ASIN(CAST(0.5 AS DOUBLE)); DOUBLE; # dialect: snowflake ACOS(CAST(0.5 AS DECFLOAT)); DECFLOAT; # dialect: snowflake ACOS(CAST(0.5 AS DOUBLE)); DOUBLE; # dialect: snowflake ATAN(CAST(1 AS DECFLOAT)); DECFLOAT; # dialect: snowflake ATAN(CAST(1 AS DOUBLE)); DOUBLE; # dialect: snowflake ATAN2(CAST(1 AS DECFLOAT), 1); DECFLOAT; # dialect: snowflake ATAN2(CAST(1 AS DOUBLE), 1); DOUBLE; # dialect: snowflake DEGREES(CAST(3.14159 AS DECFLOAT)); DECFLOAT; # dialect: snowflake DEGREES(CAST(3.14159 AS DOUBLE)); DOUBLE; # dialect: snowflake RADIANS(CAST(180 AS DECFLOAT)); DECFLOAT; # dialect: snowflake RADIANS(CAST(180 AS DOUBLE)); DOUBLE; # dialect: snowflake TANH(CAST(1 AS DECFLOAT)); DECFLOAT; # dialect: snowflake TANH(CAST(1 AS DOUBLE)); DOUBLE; # dialect: snowflake TO_DECFLOAT('123.456'); DECFLOAT; # dialect: snowflake TO_DECFLOAT('123.456', '999.999'); DECFLOAT; # dialect: snowflake TRY_TO_DECFLOAT('123.456'); DECFLOAT; # dialect: snowflake TRY_TO_DECFLOAT('invalid'); DECFLOAT; # dialect: snowflake TRY_TO_BINARY('48656C6C6F'); BINARY; # dialect: snowflake TRY_TO_BINARY('48656C6C6F', 'HEX'); BINARY; # dialect: snowflake TRY_TO_BOOLEAN('true'); BOOLEAN; # dialect: snowflake TO_DATE('2024-01-31'); DATE; # dialect: snowflake TO_DATE('2024-01-31', 'AUTO'); DATE; # dialect: snowflake TRY_TO_DATE('2024-01-31'); DATE; # dialect: snowflake TRY_TO_DATE('2024-01-31', 'AUTO'); DATE; # dialect: snowflake TO_DECIMAL('123.45'); DECIMAL(38, 0); # dialect: snowflake TO_DECIMAL('123.45', '999.99'); DECIMAL(38, 0); # dialect: snowflake TO_DECIMAL('123.45', '999.99', 10, 2); DECIMAL(38, 0); # dialect: snowflake TRY_TO_DECIMAL('123.45'); DECIMAL(38, 0); # dialect: snowflake TRY_TO_DECIMAL('123.45', '999.99'); DECIMAL(38, 0); # dialect: snowflake TRY_TO_DECIMAL('123.45', '999.99', 10, 2); DECIMAL(38, 0); # dialect: snowflake TO_DOUBLE('123.456'); DOUBLE; # dialect: snowflake TO_DOUBLE('123.456', '999.99'); DOUBLE; # dialect: snowflake TRY_TO_DOUBLE('123.456'); DOUBLE; # dialect: snowflake TRY_TO_DOUBLE('123.456', '999.99'); DOUBLE; # dialect: snowflake TO_FILE(tbl.obj_col); FILE; # dialect: snowflake TO_FILE('file.csv'); FILE; # dialect: snowflake TO_FILE('file.csv', '/relativepath/'); FILE; # dialect: snowflake TRY_TO_FILE(tbl.obj_col); FILE; # dialect: snowflake TRY_TO_FILE('file.csv'); FILE; # dialect: snowflake TRY_TO_FILE('file.csv', '/relativepath/'); FILE; # dialect: snowflake TO_NUMBER('123.45'); DECIMAL(38, 0); # dialect: snowflake TO_NUMBER('123.45', '999.99'); DECIMAL(38, 0); # dialect: snowflake TO_NUMBER('123.45', '999.99', 10, 2); DECIMAL(38, 0); # dialect: snowflake TRY_TO_NUMBER('123.45'); DECIMAL(38, 0); # dialect: snowflake TRY_TO_NUMBER('123.45', '999.99'); DECIMAL(38, 0); # dialect: snowflake TRY_TO_NUMBER('123.45', '999.99', 10, 2); DECIMAL(38, 0); # dialect: snowflake TRY_TO_NUMERIC('123.45'); DECIMAL(38, 0); # dialect: snowflake TRY_TO_NUMERIC('123.45', '999.99'); DECIMAL(38, 0); # dialect: snowflake TRY_TO_NUMERIC('123.45', '999.99', 10, 2); DECIMAL(38, 0); # dialect: snowflake TO_TIME('12:30:00'); TIME; # dialect: snowflake TO_TIME('12:30:00', 'AUTO'); TIME; # dialect: snowflake TRY_TO_TIME('12:30:00'); TIME; # dialect: snowflake TRY_TO_TIME('12:30:00', 'AUTO'); TIME; # dialect: snowflake TO_TIME('093000', 'HH24MISS'); TIME; # dialect: snowflake TRY_TO_TIME('093000', 'HH24MISS'); TIME; # dialect: snowflake TO_TIMESTAMP('2024-01-15 12:30:00'); TIMESTAMP; # dialect: snowflake TO_TIMESTAMP('2024-01-15 12:30:00', 'AUTO'); TIMESTAMP; # dialect: snowflake TO_TIMESTAMP_LTZ('2024-01-15 12:30:00'); TIMESTAMPLTZ; # dialect: snowflake TO_TIMESTAMP_LTZ('2024-01-15 12:30:00', 'AUTO'); TIMESTAMPLTZ; # dialect: snowflake TO_TIMESTAMP_NTZ('2024-01-15 12:30:00'); TIMESTAMPNTZ; # dialect: snowflake TO_TIMESTAMP_NTZ('2024-01-15 12:30:00', 'AUTO'); TIMESTAMPNTZ; # dialect: snowflake TO_TIMESTAMP_TZ('2024-01-15 12:30:00'); TIMESTAMPTZ; # dialect: snowflake TO_TIMESTAMP_TZ('2024-01-15 12:30:00', 'AUTO'); TIMESTAMPTZ; # dialect: snowflake TRY_TO_TIMESTAMP('2024-01-15 12:30:00'); TIMESTAMP; # dialect: snowflake TRY_TO_TIMESTAMP('2024-01-15 12:30:00', 'AUTO'); TIMESTAMP; # dialect: snowflake TRY_TO_TIMESTAMP_LTZ('2024-01-15 12:30:00'); TIMESTAMPLTZ; # dialect: snowflake TRY_TO_TIMESTAMP_LTZ('2024-01-15 12:30:00', 'AUTO'); TIMESTAMPLTZ; # dialect: snowflake TRY_TO_TIMESTAMP_NTZ('2024-01-15 12:30:00'); TIMESTAMPNTZ; # dialect: snowflake TRY_TO_TIMESTAMP_NTZ('2024-01-15 12:30:00', 'AUTO'); TIMESTAMPNTZ; # dialect: snowflake TRY_TO_TIMESTAMP_TZ('2024-01-15 12:30:00'); TIMESTAMPTZ; # dialect: snowflake TRY_TO_TIMESTAMP_TZ('2024-01-15 12:30:00', 'AUTO'); TIMESTAMPTZ; # dialect: snowflake ABS(CAST(-123.456 AS DECFLOAT)); DECFLOAT; # dialect: snowflake ABS(CAST(-123.456 AS FLOAT)); FLOAT; # dialect: snowflake MOD(CAST(10 AS DECFLOAT), 3); DECFLOAT; # dialect: snowflake MOD(CAST(10 AS FLOAT), 3); FLOAT; # dialect: snowflake GREATEST(CAST(1 AS FLOAT), CAST(2 AS DECFLOAT)); DECFLOAT; # dialect: snowflake GREATEST(CAST(2 AS DECFLOAT), CAST(2 AS DECFLOAT)); DECFLOAT; # dialect: snowflake GREATEST(CAST(1 AS FLOAT), CAST(2 AS FLOAT)); FLOAT; # dialect: snowflake LEAST(CAST(1 AS FLOAT), CAST(2 AS DECFLOAT)); DECFLOAT; # dialect: snowflake LEAST(CAST(1 AS DECFLOAT), CAST(2 AS DECFLOAT)); DECFLOAT; # dialect: snowflake LEAST(CAST(1 AS FLOAT), CAST(2 AS FLOAT)); FLOAT; # dialect: snowflake SECOND(CAST('08:50:57' AS TIME)); INT; # dialect: snowflake SQUARE(tbl.double_col); DOUBLE; # dialect: snowflake TANH(tbl.double_col); DOUBLE; # dialect: snowflake RIGHT('hello world', 5); VARCHAR; # dialect: snowflake RIGHT(tbl.str_col, 3); STRING; # dialect: snowflake RIGHT(tbl.bin_col, 3); BINARY; # dialect: snowflake RIGHT(tbl.str_col, NULL); STRING; # dialect: snowflake RLIKE('foo', 'bar'); BOOLEAN; # dialect: snowflake RLIKE(NULL, 'bar'); BOOLEAN; # dialect: snowflake RLIKE('foo', 'bar', NULL); BOOLEAN; # dialect: snowflake RTRIM(' hello world '); VARCHAR; # dialect: snowflake RTRIM(tbl.str_col); VARCHAR; # dialect: snowflake RTRIM(NULL); VARCHAR; # dialect: snowflake RTRIMMED_LENGTH(' ABCD '); INT; # dialect: snowflake SHA1('foo'); VARCHAR; # dialect: snowflake SHA1(null); VARCHAR; # dialect: snowflake SHA1_BINARY('foo'); BINARY; # dialect: snowflake SHA1_BINARY(null); BINARY; # dialect: snowflake SHA1_HEX('foo'); VARCHAR; # dialect: snowflake SHA1_HEX(null); VARCHAR; # dialect: snowflake SHA2('foo'); VARCHAR; # dialect: snowflake SHA2(null); VARCHAR; # dialect: snowflake SHA2('foo', 256); VARCHAR; # dialect: snowflake SHA2('foo', null); VARCHAR; # dialect: snowflake SHA2_BINARY('foo'); BINARY; # dialect: snowflake SHA2_BINARY(null); BINARY; # dialect: snowflake SHA2_BINARY('foo', 256); BINARY; # dialect: snowflake SHA2_BINARY('foo', null); BINARY; # dialect: snowflake SHA2_HEX('foo'); VARCHAR; # dialect: snowflake SHA2_HEX(null); VARCHAR; # dialect: snowflake SHA2_HEX('foo', 256); VARCHAR; # dialect: snowflake SHA2_HEX('foo', null); VARCHAR; # dialect: snowflake SIN(tbl.double_col); DOUBLE; # dialect: snowflake SINH(1); DOUBLE; # dialect: snowflake SINH(1.5); DOUBLE; # dialect: snowflake SIGN(tbl.double_col); INT; # dialect: snowflake SKEW(tbl.double_col); DOUBLE; # dialect: snowflake SOUNDEX(tbl.str_col); VARCHAR; # dialect: snowflake SOUNDEX_P123('test'); VARCHAR; # dialect: snowflake SPACE(5); VARCHAR; # dialect: snowflake SPACE(tbl.int_col); VARCHAR; # dialect: snowflake SPACE(NULL); VARCHAR; # dialect: snowflake SQRT(tbl.double_col); DOUBLE; # dialect: snowflake SPLIT('hello world', ' '); ARRAY; # dialect: snowflake SPLIT(tbl.str_col, ','); ARRAY; # dialect: snowflake SPLIT(NULL, ','); ARRAY; # dialect: snowflake SPLIT_PART('11.22.33', '.', 1); VARCHAR; # dialect: snowflake STRTOK('hello world'); VARCHAR; # dialect: snowflake STRTOK('hello world', ' '); VARCHAR; # dialect: snowflake STRTOK('a.b.c', '.', 1); VARCHAR; # dialect: snowflake STARTSWITH('hello world', 'hello'); BOOLEAN; # dialect: snowflake STARTSWITH(tbl.str_col, 'test'); BOOLEAN; # dialect: snowflake STARTSWITH(tbl.bin_col, tbl.bin_col); BOOLEAN; # dialect: snowflake STARTSWITH(tbl.bin_col, NULL); BOOLEAN; # dialect: snowflake SEARCH(line, 'king'); BOOLEAN; # dialect: snowflake SEARCH((play, line), 'dream'); BOOLEAN; # dialect: snowflake SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER'); BOOLEAN; # dialect: snowflake SEARCH(line, 'king', SEARCH_MODE => 'OR'); BOOLEAN; # dialect: snowflake SEARCH(line, 'king', ANALYZER => 'UNICODE_ANALYZER', SEARCH_MODE => 'AND'); BOOLEAN; # dialect: snowflake SEARCH_IP(col, '192.168.0.0'); BOOLEAN; # dialect: snowflake STDDEV(tbl.double_col); DOUBLE; # dialect: snowflake STDDEV(tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake STDDEV_POP(tbl.double_col); DOUBLE; # dialect: snowflake STDDEV_POP(tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake STDDEV_SAMP(tbl.double_col); DOUBLE; # dialect: snowflake STDDEV_SAMP(tbl.double_col) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake STRTOK_TO_ARRAY('a,b,c', ','); ARRAY; # dialect: snowflake SUBSTR('hello world', 1, 5); VARCHAR; # dialect: snowflake SUBSTR(tbl.str_col, 1, 3); STRING; # dialect: snowflake SUBSTR(tbl.bin_col, 1, 3); BINARY; # dialect: snowflake SUBSTR(tbl.str_col, NULL); STRING; # dialect: snowflake TAN(tbl.double_col); DOUBLE; # dialect: snowflake TIMEADD(hour, 1, CAST('14:30:45' AS TIME)); TIME; # dialect: snowflake TIMEADD(minute, 30, CAST('2024-05-09 14:30:45' AS TIMESTAMP)); TIMESTAMP; # dialect: snowflake TIMEADD(day, 1, CAST('2024-05-09' AS DATE)); DATE; # dialect: snowflake TIMEADD(hour, 1, CAST('2024-05-09' AS DATE)); TIMESTAMPNTZ; # dialect: snowflake TIME_FROM_PARTS(14, 30, 45); TIME; # dialect: snowflake TIME_FROM_PARTS(14, 30, 45, 123); TIME; # dialect: snowflake TIMEFROMPARTS(14, 30, 45); TIME; # dialect: snowflake TIMEFROMPARTS(14, 30, 45, 123); TIME; # dialect: snowflake TIME_SLICE(tbl.timestamp_col, 15, 'minute'); TIMESTAMP; # dialect: snowflake TIME_SLICE(tbl.date_col, 1, 'day', 'start'); DATE; # dialect: snowflake TIMESTAMPADD(DAY, 5, CAST('2008-12-25' AS DATE)); DATE; # dialect: snowflake TIMESTAMPADD(HOUR, 3, TO_TIME('05:00:00')); TIME; # dialect: snowflake TIMESTAMPADD(YEAR, 1, TO_TIMESTAMP('2022-05-08 14:30:00')); TIMESTAMP; # dialect: snowflake TRANSLATE('hello world', 'elo', 'XYZ'); VARCHAR; # dialect: snowflake UNICODE('€'); INT; # dialect: snowflake WIDTH_BUCKET(tbl.double_col, 0, 100, 10); INT; # dialect: snowflake ZEROIFNULL(5); INT; # dialect: snowflake ZEROIFNULL(5::BIGINT); BIGINT; # dialect: snowflake ZEROIFNULL(5.5); DOUBLE; # dialect: snowflake ZEROIFNULL(5.5::FLOAT); FLOAT; # dialect: snowflake ZEROIFNULL(5.12::DECIMAL(10,2)); DECIMAL(10, 2); # dialect: snowflake TRIM('hello world'); VARCHAR; # dialect: snowflake TRIM('hello world', 'hello'); VARCHAR; # dialect: snowflake TRIM(tbl.str_col); VARCHAR; # dialect: snowflake TRIM(tbl.str_col, tbl.str_col); VARCHAR; # dialect: snowflake TRIM(NULL); VARCHAR; # dialect: snowflake TRY_BASE64_DECODE_BINARY('SGVsbG8='); BINARY; # dialect: snowflake TRY_BASE64_DECODE_BINARY('SGVsbG8=', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'); BINARY; # dialect: snowflake TRY_BASE64_DECODE_STRING('SGVsbG8gV29ybGQ='); VARCHAR; # dialect: snowflake TRY_BASE64_DECODE_STRING('SGVsbG8gV29ybGQ=', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'); VARCHAR; # dialect: snowflake TRY_HEX_DECODE_BINARY('48656C6C6F'); BINARY; # dialect: snowflake TRY_HEX_DECODE_STRING('48656C6C6F'); VARCHAR; # dialect: snowflake UPPER('Hello, world!'); VARCHAR; # dialect: snowflake UPPER(tbl.str_col); VARCHAR; # dialect: snowflake UUID_STRING(); VARCHAR; # dialect: snowflake UUID_STRING('foo', 'bar'); VARCHAR; # dialect: snowflake UUID_STRING(null, null); VARCHAR; # dialect: snowflake MD5(tbl.str_col); VARCHAR; # dialect: snowflake MD5_HEX(tbl.str_col); VARCHAR; # dialect: snowflake MD5_BINARY(tbl.str_col); BINARY; # dialect: snowflake MD5_NUMBER_LOWER64(tbl.str_col); BIGINT; # dialect: snowflake MD5_NUMBER_UPPER64(tbl.str_col); BIGINT; # dialect: snowflake 'Hello' NOT ILIKE 'h%'; BOOLEAN; # dialect: snowflake 'Hello' ILIKE 'h_llo'; BOOLEAN; # dialect: snowflake tbl.str_col NOT ILIKE '%x%'; BOOLEAN; # dialect: snowflake 'Hello' NOT LIKE 'H%'; BOOLEAN; # dialect: snowflake 'Hello' LIKE 'H_llo'; BOOLEAN; # dialect: snowflake tbl.str_col NOT LIKE '%e%'; BOOLEAN; # dialect: snowflake tbl.str_col LIKE ALL ('H%', '%o'); BOOLEAN; # dialect: snowflake tbl.str_col LIKE ANY ('H%', '%o'); BOOLEAN; # dialect: snowflake tbl.str_col ILIKE ANY ('h%', '%x'); BOOLEAN; # dialect: snowflake LIKE(tbl.str_col, 'pattern'); BOOLEAN; # dialect: snowflake ILIKE(tbl.str_col, 'pattern'); BOOLEAN; # dialect: snowflake OBJECT_AGG(tbl.str_col, tbl.variant_col); OBJECT; # dialect: snowflake PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY tbl.int_col); INT; # dialect: snowflake PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY tbl.double_col); DOUBLE; # dialect: snowflake PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY tbl.int_col); INT; # dialect: snowflake PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tbl.double_col); DOUBLE; # dialect: snowflake PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY tbl.bigint_col) OVER (PARTITION BY 1); BIGINT; # dialect: snowflake PARSE_IP('192.168.1.1', 'INET'); OBJECT; # dialect: snowflake MAX(tbl.bigint_col); BIGINT; # dialect: snowflake MAX(tbl.int_col); INT; # dialect: snowflake MAX(tbl.double_col); DOUBLE; # dialect: snowflake MAX(tbl.str_col); VARCHAR; # dialect: snowflake MAX(tbl.date_col); DATE; # dialect: snowflake MAX(tbl.timestamp_col); TIMESTAMP; # dialect: snowflake MAX_BY('foo', tbl.bigint_col); VARCHAR; # dialect: snowflake MAX_BY('foo', tbl.bigint_col, 3); ARRAY; # dialect: snowflake MIN_BY('foo', tbl.bigint_col); VARCHAR; # dialect: snowflake MIN_BY('foo', tbl.bigint_col, 3); ARRAY; # dialect: snowflake APPROX_PERCENTILE(tbl.bigint_col, 0.5); DOUBLE; # dialect: snowflake APPROX_PERCENTILE(tbl.double_col, 0.5); DOUBLE; # dialect: snowflake APPROX_PERCENTILE(tbl.int_col, 0.9); DOUBLE; # dialect: snowflake APPROX_PERCENTILE(tbl.bigint_col, 0.5) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake APPROX_PERCENTILE(tbl.double_col, 0.5) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake APPROX_PERCENTILE(tbl.int_col, 0.9) OVER (PARTITION BY 1); DOUBLE; # dialect: snowflake APPROX_PERCENTILE_COMBINE(tbl.state_col); OBJECT; # dialect: snowflake APPROX_PERCENTILE_ACCUMULATE(tbl.bigint_col); OBJECT; # dialect: snowflake APPROX_PERCENTILE_ACCUMULATE(tbl.double_col); OBJECT; # dialect: snowflake APPROX_PERCENTILE_ACCUMULATE(tbl.int_col); OBJECT; # dialect: snowflake APPROX_PERCENTILE_ESTIMATE(tbl.state_col, 0.5); DOUBLE; # dialect: snowflake APPROX_TOP_K_ACCUMULATE(tbl.str_col, 10); OBJECT; # dialect: snowflake APPROX_TOP_K_COMBINE(tbl.state_col, 10); OBJECT; # dialect: snowflake APPROX_TOP_K_COMBINE(tbl.state_col); OBJECT; # dialect: snowflake APPROX_TOP_K_ESTIMATE(tbl.state_col, 4); ARRAY; # dialect: snowflake APPROX_TOP_K_ESTIMATE(tbl.state_col); ARRAY; # dialect: snowflake APPROX_COUNT_DISTINCT(tbl.str_col); BIGINT; # dialect: snowflake APPROX_COUNT_DISTINCT(tbl.bigint_col); BIGINT; # dialect: snowflake APPROX_COUNT_DISTINCT(tbl.double_col); BIGINT; # dialect: snowflake APPROX_COUNT_DISTINCT(*); BIGINT; # dialect: snowflake APPROX_COUNT_DISTINCT(DISTINCT tbl.str_col); BIGINT; # dialect: snowflake APPROX_COUNT_DISTINCT(tbl.str_col) OVER (PARTITION BY 1); BIGINT; # dialect: snowflake APPROX_COUNT_DISTINCT(tbl.bigint_col) OVER (PARTITION BY 1); BIGINT; # dialect: snowflake APPROX_COUNT_DISTINCT(tbl.double_col) OVER (PARTITION BY 1); BIGINT; # dialect: snowflake APPROX_TOP_K(tbl.bigint_col); ARRAY; # dialect: snowflake APPROX_TOP_K(tbl.str_col); ARRAY; # dialect: snowflake APPROX_TOP_K(tbl.str_col, 5); ARRAY; # dialect: snowflake APPROX_TOP_K(tbl.str_col, 5, 1000); ARRAY; # dialect: snowflake MINHASH(5, tbl.int_col); VARIANT; # dialect: snowflake MINHASH(5, tbl.int_col, tbl.str_col); VARIANT; # dialect: snowflake MINHASH(5, *); VARIANT; # dialect: snowflake MINHASH_COMBINE(tbl.variant_col); VARIANT; # dialect: snowflake APPROXIMATE_SIMILARITY(tbl.variant_col); DOUBLE; # dialect: snowflake APPROXIMATE_JACCARD_INDEX(tbl.variant_col); DOUBLE; # dialect: snowflake MIN(tbl.double_col); DOUBLE; # dialect: snowflake MIN(tbl.int_col); INT; # dialect: snowflake MIN(tbl.bigint_col); BIGINT; # dialect: snowflake MIN(CAST(100 AS DECIMAL(10,2))); DECIMAL(10, 2); # dialect: snowflake MIN(tbl.bigint_col) OVER (PARTITION BY 1); BIGINT; # dialect: snowflake VECTOR_COSINE_SIMILARITY([1,2,3], [4,5,6]); DOUBLE; # dialect: snowflake VECTOR_INNER_PRODUCT([1,2,3], [4,5,6]); DOUBLE; # dialect: snowflake VECTOR_L1_DISTANCE([1,2,3], [4,5,6]); DOUBLE; # dialect: snowflake VECTOR_L2_DISTANCE([1,2,3], [4,5,6]); DOUBLE; # dialect: snowflake ZIPF(1, 10, RANDOM()); BIGINT; # dialect: snowflake ZIPF(2, 100, 1234); BIGINT; # dialect: snowflake XMLGET(PARSE_XML('content'), 'level2'); OBJECT; # dialect: snowflake XMLGET(PARSE_XML('ab'), 'item', 1); OBJECT; # dialect: snowflake MODE(tbl.double_col); DOUBLE; # dialect: snowflake MODE(tbl.date_col); DATE; # dialect: snowflake MODE(tbl.timestamp_col); TIMESTAMP; # dialect: snowflake MODE(tbl.bool_col); BOOLEAN; # dialect: snowflake MODE(CAST(100 AS DECIMAL(10,2))); DECIMAL(10, 2); # dialect: snowflake MODE(tbl.bigint_col) OVER (PARTITION BY 1); BIGINT; # dialect: snowflake MODE(CAST(NULL AS INT)); INT; # dialect: snowflake MODE(tbl.str_col) OVER (PARTITION BY tbl.int_col); VARCHAR; # dialect: snowflake VAR_SAMP(tbl.decfloat_col); DECFLOAT; # dialect: snowflake VAR_SAMP(tbl.double_col); DOUBLE; # dialect: snowflake VAR_SAMP(tbl.int_col); NUMBER(38, 6); # dialect: snowflake VARIANCE_SAMP(tbl.decfloat_col); DECFLOAT; # dialect: snowflake VARIANCE_SAMP(tbl.double_col); DOUBLE; # dialect: snowflake VARIANCE_SAMP(tbl.int_col); NUMBER(38, 6); # dialect: snowflake VARIANCE(tbl.decfloat_col); DECFLOAT; # dialect: snowflake VARIANCE(tbl.double_col); DOUBLE; # dialect: snowflake VARIANCE(tbl.int_col); NUMBER(38, 6); # dialect: snowflake VAR_POP(tbl.decfloat_col); DECFLOAT; # dialect: snowflake VAR_POP(tbl.double_col); DOUBLE; # dialect: snowflake VAR_POP(tbl.int_col); NUMBER(38, 6); # dialect: snowflake VARIANCE_POP(tbl.decfloat_col); DECFLOAT; # dialect: snowflake VARIANCE_POP(tbl.double_col); DOUBLE; # dialect: snowflake VARIANCE_POP(tbl.int_col); NUMBER(38, 6); # dialect: snowflake VARIANCE_POP(1::NUMBER(38, 6)); NUMBER(38, 12); # dialect: snowflake VARIANCE_POP(1::NUMBER(38, 15)); NUMBER(38, 15); # dialect: snowflake VARIANCE_POP(1::NUMBER(30, 5)); NUMBER(38, 12); # dialect: snowflake ENCRYPT(tbl.str_col, 'passphrase'); BINARY; # dialect: snowflake ENCRYPT(tbl.str_col, 'passphrase', 'aad'); BINARY; # dialect: snowflake ENCRYPT(tbl.str_col, 'passphrase', 'aad', 'AES-GCM'); BINARY; # dialect: snowflake ENCRYPT_RAW(tbl.str_col, tbl.key_col, tbl.iv_col); BINARY; # dialect: snowflake ENCRYPT_RAW(tbl.str_col, tbl.key_col, tbl.iv_col, tbl.aad_col); BINARY; # dialect: snowflake ENCRYPT_RAW(tbl.str_col, tbl.key_col, tbl.iv_col, tbl.aad_col, 'AES-GCM'); BINARY; # dialect: snowflake DECRYPT(tbl.encrypted_col, 'passphrase'); BINARY; # dialect: snowflake DECRYPT(tbl.encrypted_col, 'passphrase', 'aad'); BINARY; # dialect: snowflake DECRYPT(tbl.encrypted_col, 'passphrase', 'aad', 'AES-GCM'); BINARY; # dialect: snowflake DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col); BINARY; # dialect: snowflake DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col, tbl.aad_col); BINARY; # dialect: snowflake DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col, tbl.aad_col, 'AES-GCM'); BINARY; # dialect: snowflake DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col, tbl.aad_col, 'AES-GCM', HEX_DECODE_BINARY('ff')); BINARY; # dialect: snowflake TRY_DECRYPT(tbl.encrypted_col, 'passphrase'); BINARY; # dialect: snowflake TRY_DECRYPT(tbl.encrypted_col, 'passphrase', 'aad'); BINARY; # dialect: snowflake TRY_DECRYPT(tbl.encrypted_col, 'passphrase', 'aad', 'AES-GCM'); BINARY; # dialect: snowflake TRY_DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col); BINARY; # dialect: snowflake TRY_DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col, tbl.aad_col); BINARY; # dialect: snowflake TRY_DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col, tbl.aad_col, 'AES-GCM'); BINARY; # dialect: snowflake TRY_DECRYPT_RAW(tbl.encrypted_col, tbl.key_col, tbl.iv_col, tbl.aad_col, 'AES-GCM', HEX_DECODE_BINARY('ff')); BINARY; # dialect: snowflake SEQ1(); INT; # dialect: snowflake SEQ1(1); INT; # dialect: snowflake SEQ2(); INT; # dialect: snowflake SEQ2(1); INT; # dialect: snowflake SEQ4(); INT; # dialect: snowflake SEQ4(1); INT; # dialect: snowflake SEQ8(); BIGINT; # dialect: snowflake SEQ8(1); BIGINT; -------------------------------------- -- T-SQL -------------------------------------- # dialect: tsql SYSDATETIMEOFFSET(); TIMESTAMPTZ; # dialect: tsql RADIANS(90); INT; # dialect: tsql SIN(tbl.int_col); FLOAT; # dialect: tsql SIN(tbl.float_col); FLOAT; # dialect: tsql COS(tbl.int_col); FLOAT; # dialect: tsql COS(tbl.float_col); FLOAT; # dialect: tsql TAN(tbl.int_col); FLOAT; # dialect: tsql TAN(tbl.float_col); FLOAT; # dialect: tsql COT(tbl.int_col); FLOAT; # dialect: tsql COT(tbl.float_col); FLOAT; # dialect: tsql ATN2(tbl.int_col, tbl.int_col); FLOAT; # dialect: tsql ATN2(tbl.int_col, tbl.float_col); FLOAT; # dialect: tsql ATN2(tbl.float_col, tbl.int_col); FLOAT; # dialect: tsql ATN2(tbl.float_col, tbl.float_col); FLOAT; # dialect: tsql ASIN(tbl.int_col); FLOAT; # dialect: tsql ASIN(tbl.float_col); FLOAT; # dialect: tsql ACOS(tbl.int_col); FLOAT; # dialect: tsql ACOS(tbl.float_col); FLOAT; # dialect: tsql ATAN(tbl.int_col); FLOAT; # dialect: tsql ATAN(tbl.float_col); FLOAT; # dialect: tsql CURRENT_TIMEZONE(); NVARCHAR; # dialect: tsql SOUNDEX(tbl.str_col); VARCHAR; # dialect: tsql STUFF(tbl.str_col, tbl.int_col, tbl.int_col, tbl.str_col); VARCHAR; # dialect: tsql DEGREES(tbl.int_col); INT; # dialect: tsql DEGREES(tbl.float_col); FLOAT; # dialect: tsql DEGREES(tbl.bigint_col); BIGINT; # dialect: tsql CURRENT_TIMESTAMP; DATETIME; -------------------------------------- -- MySQL -------------------------------------- # dialect: mysql DEGREES(tbl.double_col); DOUBLE; # dialect: mysql DEGREES(tbl.int_col); DOUBLE; # dialect: mysql LOCALTIME; DATETIME; # dialect: mysql ELT(1, 'a', 'b'); VARCHAR; # dialect: mysql DAYOFWEEK(tbl.date_col); INT; # dialect: mysql DAYOFMONTH(tbl.date_col); INT; # dialect: mysql DAYOFYEAR(tbl.date_col); INT; # dialect: mysql MONTH(tbl.date_col); INT; # dialect: mysql WEEK(tbl.date_col); INT; # dialect: mysql WEEK(tbl.date_col, int_col); INT; # dialect: mysql QUARTER(tbl.date_col); INT; # dialect: mysql HOUR(tbl.time_col); INT; # dialect: mysql SECOND(tbl.time_col); INT; # dialect: mysql SIN(tbl.int_col); DOUBLE; # dialect: mysql SIN(tbl.double_col); DOUBLE; # dialect: mysql COS(tbl.int_col); DOUBLE; # dialect: mysql COS(tbl.double_col); DOUBLE; # dialect: mysql TAN(tbl.int_col); DOUBLE; # dialect: mysql TAN(tbl.double_col); DOUBLE; # dialect: mysql COT(tbl.int_col); DOUBLE; # dialect: mysql COT(tbl.double_col); DOUBLE; # dialect: mysql ASIN(tbl.int_col); DOUBLE; # dialect: mysql ASIN(tbl.double_col); DOUBLE; # dialect: mysql ACOS(tbl.int_col); DOUBLE; # dialect: mysql ACOS(tbl.double_col); DOUBLE; # dialect: mysql ATAN(tbl.int_col); DOUBLE; # dialect: mysql ATAN(tbl.double_col); DOUBLE; # dialect: mysql ATAN(tbl.int_col, tbl.int_col); DOUBLE; # dialect: mysql ATAN(tbl.int_col, tbl.double_col); DOUBLE; # dialect: mysql ATAN(tbl.double_col, tbl.int_col); DOUBLE; # dialect: mysql ATAN(tbl.double_col, tbl.double_col); DOUBLE; # dialect: mysql ATAN2(tbl.int_col, tbl.int_col); DOUBLE; # dialect: mysql ATAN2(tbl.int_col, tbl.double_col); DOUBLE; # dialect: mysql ATAN2(tbl.double_col, tbl.int_col); DOUBLE; # dialect: mysql ATAN2(tbl.double_col, tbl.double_col); DOUBLE; # dialect: mysql VERSION(); VARCHAR; # dialect: mysql CURRENT_TIMESTAMP(); DATETIME; -------------------------------------- -- DuckDB -------------------------------------- # dialect: duckdb SHA1(tbl.str_col); VARCHAR; # dialect: duckdb SHA256(tbl.str_col); VARCHAR; # dialect: duckdb GET_BIT(tbl.str_col, tbl.int_col); INT; # dialect: duckdb FACTORIAL(tbl.int_col); HUGEINT; # dialect: duckdb SIN(tbl.int_col); DOUBLE; # dialect: duckdb SIN(tbl.double_col); DOUBLE; # dialect: duckdb ASIN(tbl.int_col); DOUBLE; # dialect: duckdb ASIN(tbl.double_col); DOUBLE; # dialect: duckdb COS(tbl.int_col); DOUBLE; # dialect: duckdb COS(tbl.double_col); DOUBLE; # dialect: duckdb ACOS(tbl.int_col); DOUBLE; # dialect: duckdb ACOS(tbl.double_col); DOUBLE; # dialect: duckdb COT(tbl.int_col); DOUBLE; # dialect: duckdb COT(tbl.double_col); DOUBLE; # dialect: duckdb TAN(tbl.int_col); DOUBLE; # dialect: duckdb TAN(tbl.double_col); DOUBLE; # dialect: duckdb ATAN(tbl.int_col); DOUBLE; # dialect: duckdb ATAN(tbl.double_col); DOUBLE; # dialect: duckdb ATAN2(tbl.int_col, tbl.int_col); DOUBLE; # dialect: duckdb ATAN2(tbl.int_col, tbl.double_col); DOUBLE; # dialect: duckdb ATAN2(tbl.double_col, tbl.int_col); DOUBLE; # dialect: duckdb ATAN2(tbl.double_col, tbl.double_col); DOUBLE; # dialect: duckdb ACOSH(tbl.int_col); DOUBLE; # dialect: duckdb ACOSH(tbl.double_col); DOUBLE; # dialect: duckdb ASINH(tbl.int_col); DOUBLE; # dialect: duckdb ASINH(tbl.double_col); DOUBLE; # dialect: duckdb ATANH(tbl.int_col); DOUBLE; # dialect: duckdb TANH(tbl.int_col); DOUBLE; # dialect: duckdb TANH(tbl.double_col); DOUBLE; # dialect: duckdb COSH(tbl.int_col); DOUBLE; # dialect: duckdb COSH(tbl.double_col); DOUBLE; # dialect: duckdb SINH(tbl.int_col); DOUBLE; # dialect: duckdb SINH(tbl.double_col); DOUBLE; # dialect: duckdb ATANH(tbl.double_col); DOUBLE; # dialect: duckdb ISINF(tbl.float_col); BOOLEAN; # dialect: duckdb REVERSE(tbl.str_col); VARCHAR; # dialect: duckdb RANDOM(); DOUBLE; # dialect: duckdb FORMAT('Benchmark "{}" took {} seconds', 'CSV', 42); VARCHAR; # dialect: duckdb QUARTER(tbl.date_col); BIGINT; # dialect: duckdb QUARTER(tbl.timestamp_col); BIGINT; # dialect: duckdb QUARTER(tbl.interval_col); BIGINT; # dialect: duckdb QUARTER(tbl.timestamp_tz_col); BIGINT; # dialect: duckdb MINUTE(tbl.date_col); BIGINT; # dialect: duckdb MONTH(tbl.date_col); BIGINT; # dialect: duckdb DAYOFWEEK(tbl.date_col); BIGINT; # dialect: duckdb DAYOFYEAR(tbl.date_col); BIGINT; # dialect: duckdb EPOCH(tbl.interval_col); DOUBLE; # dialect: duckdb DAYOFMONTH(tbl.date_col); BIGINT; # dialect: duckdb DAY(tbl.date_col); BIGINT; # dialect: duckdb HOUR(tbl.date_col); BIGINT; # dialect: duckdb SECOND(tbl.date_col); BIGINT; # dialect: duckdb TO_DAYS(tbl.int_col); INTERVAL; # dialect: duckdb ISODOW(tbl.date_col); BIGINT; # dialect: duckdb BIT_LENGTH(tbl.str_col); BIGINT; # dialect: duckdb MAKE_TIME(tbl.bigint_col, tbl.bigint_col, tbl.double_col); TIME; # dialect: duckdb LENGTH(tbl.str_col); BIGINT; # dialect: duckdb TIME_BUCKET(tbl.interval_col, tbl.date_col, tbl.interval_col); DATE; # dialect: duckdb TIME_BUCKET(tbl.interval_col, tbl.date_col); DATE; # dialect: duckdb TIME_BUCKET(tbl.interval_col, tbl.timestamp_col, tbl.interval_col); TIMESTAMP; # dialect: duckdb TIME_BUCKET(tbl.interval_col, tbl.timestamp_col); TIMESTAMP; # dialect: duckdb TRANSLATE(tbl.str_col, tbl.str_col, tbl.str_col); VARCHAR; # dialect: duckdb COUNTIF(tbl.int_col > tbl.int_col); HUGEINT; # dialect: duckdb DATE_DIFF('year', tbl.timestamp_col, tbl.timestamp_col); BIGINT; # dialect: duckdb EXTRACT('hour' FROM tbl.timestamp_col); BIGINT; # dialect: duckdb EXTRACT('month' FROM tbl.timestamp_col); BIGINT; -------------------------------------- -- Presto / Trino -------------------------------------- # dialect: presto, trino MD5(tbl.bin_col); VARBINARY; # dialect: presto, trino LEVENSHTEIN_DISTANCE(tbl.str_col, tbl.str_col); BIGINT; # dialect: presto, trino LENGTH(tbl.str_col); BIGINT; # dialect: presto, trino POSITION(tbl.str_col IN tbl.str_col); BIGINT; # dialect: presto, trino STRPOS(tbl.str_col, tbl.str_col); BIGINT; # dialect: presto, trino BITWISE_AND(tbl.bigint_col, tbl.bigint_col); BIGINT; # dialect: presto, trino BITWISE_NOT(tbl.bigint_col); BIGINT; # dialect: presto, trino BITWISE_OR(tbl.bigint_col, tbl.bigint_col); BIGINT; # dialect: presto, trino BITWISE_XOR(tbl.bigint_col, tbl.bigint_col); BIGINT; # dialect: presto, trino WIDTH_BUCKET(tbl.double_col, tbl.array_col); BIGINT; # dialect: trino ARRAY_FIRST(ARRAY['a', 'b'], x -> x = 'b'); VARCHAR; ================================================ FILE: tests/fixtures/optimizer/annotate_types.sql ================================================ 5; INT; -5; INT; ~5; INT; (5); INT; 5.3; DOUBLE; 'bla'; VARCHAR; true; bool; not true; bool; false; bool; x is null; bool; x is not null; bool; EXISTS(SELECT 1); bool; ALL(SELECT 1); bool; ANY(SELECT 1); bool; null; UNKNOWN; # dialect: spark null; NULL; # dialect: databricks null; NULL; null and false; bool; null + 1; int; CASE WHEN x THEN NULL ELSE 1 END; INT; CASE WHEN x THEN 1 ELSE NULL END; INT; IF(true, 1, null); INT; IF(true, null, 1); INT; STRUCT(1 AS col); STRUCT; # Note: ensure the struct is annotated as UNKNOWN when any of its arguments are UNKNOWN STRUCT(1, f2); UNKNOWN; STRUCT(1 AS col, 2.5 AS row); STRUCT; STRUCT(1); STRUCT; STRUCT(1 AS col, 2.5 AS row, struct(3.5 AS inner_col, 4 AS inner_row) AS nested_struct); STRUCT>; STRUCT(1 AS col, 2.5, ARRAY[1, 2, 3] AS nested_array, 'foo'); STRUCT, VARCHAR>; STRUCT(1, 2.5, 'bar'); STRUCT; STRUCT(1 AS "CaseSensitive"); STRUCT<"CaseSensitive" INT>; # dialect: duckdb STRUCT_PACK(a := 1, b := 2.5); STRUCT; # dialect: presto ROW(1, 2.5, 'foo'); STRUCT; # dialect: bigquery EXTRACT(date from x); DATE; # dialect: bigquery EXTRACT(time from x); TIME; # dialect: bigquery EXTRACT(day from x); INT; CASE WHEN x THEN CAST(y AS DECIMAL(18, 2)) ELSE NULL END; DECIMAL(18,2); CASE WHEN x THEN NULL ELSE CAST(y AS DECIMAL(18, 2)) END; DECIMAL(18,2); # dialect: bigquery CASE WHEN TRUE THEN '2010-01-01' ELSE DATE '2020-02-02' END; DATE; # dialect: bigquery CASE WHEN TRUE THEN '2010-01-01' WHEN FALSE THEN DATE '2020-02-02' ELSE '1990-01-01' END; DATE; # dialect: bigquery CASE WHEN TRUE THEN DATETIME '2020-02-02 00:00:00' ELSE '2010-01-01' END; DATETIME; # dialect: bigquery CASE WHEN TRUE THEN TIMESTAMP '2020-02-02 00:00:00' ELSE '2010-01-01' END; TIMESTAMP; # dialect: bigquery NULL; INT64; # dialect: bigquery ARRAY(SELECT 'foo' UNION ALL SELECT 'bar'); ARRAY; # dialect: bigquery ARRAY(SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3); ARRAY; # dialect: bigquery ARRAY(SELECT 1 UNION ALL SELECT 2.5); ARRAY; 1 + (SELECT 2.5 AS c); DOUBLE; # dialect: bigquery CASE WHEN TRUE THEN 2.5 ELSE CAST(3.5 AS BIGNUMERIC) END; BIGNUMERIC; # dialect: bigquery CASE WHEN TRUE THEN CAST(3.5 AS BIGNUMERIC) ELSE 2.5 END; BIGNUMERIC; # dialect: bigquery CASE WHEN TRUE THEN 3/10 ELSE CAST(3.5 AS BIGNUMERIC) END; FLOAT64; # dialect: bigquery CASE WHEN TRUE THEN CAST(3.5 AS BIGNUMERIC) ELSE 3/10 END; FLOAT64; # dialect: bigquery CASE WHEN TRUE THEN 2.4 ELSE 2.5 END; FLOAT64; # dialect: bigquery CASE WHEN x < y THEN 3/10 WHEN x > y THEN 2 ELSE CAST(3.5 AS BIGNUMERIC) END; FLOAT64; # dialect: bigquery CASE WHEN x < y THEN 2 WHEN x > y THEN 3/10 ELSE CAST(3.5 AS BIGNUMERIC) END; FLOAT64; # dialect: bigquery CASE WHEN x < y THEN CAST(3.5 AS BIGNUMERIC) WHEN x > y THEN 3/10 ELSE 2 END; FLOAT64; # dialect: snowflake BITSHIFTLEFT(255, 4); INT; # dialect: snowflake BITSHIFTRIGHT(1024, 2); INT; # dialect: snowflake BITSHIFTLEFT(X'FF', 4); BINARY; # dialect: snowflake BITSHIFTRIGHT(X'FF', 4); BINARY; # dialect: snowflake BITOR(BITSHIFTLEFT(5, 16), BITSHIFTLEFT(3, 8)); INT; # dialect: snowflake BITAND(BITSHIFTLEFT(255, 4), BITSHIFTLEFT(15, 2)); INT; # dialect: bigquery CAST(1 AS BIGNUMERIC) + 1.5; BIGNUMERIC; # dialect: bigquery 1.5 + CAST(1 AS BIGNUMERIC); BIGNUMERIC; # dialect: bigquery 1.5 + CAST(1 AS FLOAT64); FLOAT64; # dialect: bigquery CAST(1 AS FLOAT64) + 1.5; FLOAT64; # dialect: bigquery CAST(1 AS INT) + 1.5; FLOAT64; # dialect: bigquery 1.5 + CAST(1 AS INT); FLOAT64; # dialect: bigquery IF(1 = 1, CAST(1 AS BIGNUMERIC) * 1.5, CAST(2 AS BIGNUMERIC)); BIGNUMERIC; ================================================ FILE: tests/fixtures/optimizer/canonicalize.sql ================================================ SELECT w.d + w.e AS c FROM w AS w; SELECT CONCAT("w"."d", "w"."e") AS "c" FROM "w" AS "w"; SELECT CAST(w.d AS DATE) > w.e AS a FROM w AS w; SELECT CAST("w"."d" AS DATE) > CAST("w"."e" AS DATE) AS "a" FROM "w" AS "w"; SELECT CAST(1 AS VARCHAR) AS a FROM w AS w; SELECT CAST(1 AS VARCHAR) AS "a" FROM "w" AS "w"; SELECT CAST(1 + 3.2 AS DOUBLE) AS a FROM w AS w; SELECT 1 + 3.2 AS "a" FROM "w" AS "w"; SELECT '1' + 1 AS "col"; SELECT '1' + 1 AS "col"; SELECT '1' + '1' AS "col"; SELECT CONCAT('1', '1') AS "col"; SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' DAY; SELECT CAST('2022-01-01' AS DATE) + INTERVAL '1' DAY AS "_col_0"; SELECT CAST('2022-01-01' AS DATE) IS NULL AS "a"; SELECT CAST('2022-01-01' AS DATE) IS NULL AS "a"; -------------------------------------- -- Ensure boolean predicates -------------------------------------- SELECT a FROM x WHERE b; SELECT "x"."a" AS "a" FROM "x" AS "x" WHERE "x"."b" <> 0; SELECT NOT b FROM x; SELECT NOT "x"."b" <> 0 AS "_col_0" FROM "x" AS "x"; SELECT a FROM x GROUP BY a HAVING SUM(b); SELECT "x"."a" AS "a" FROM "x" AS "x" GROUP BY "x"."a" HAVING SUM("x"."b") <> 0; SELECT a FROM x GROUP BY a HAVING SUM(b) AND TRUE; SELECT "x"."a" AS "a" FROM "x" AS "x" GROUP BY "x"."a" HAVING SUM("x"."b") <> 0 AND TRUE; SELECT a FROM x WHERE 1; SELECT "x"."a" AS "a" FROM "x" AS "x" WHERE 1 <> 0; SELECT a FROM x WHERE COALESCE(0, 1); SELECT "x"."a" AS "a" FROM "x" AS "x" WHERE COALESCE(0 <> 0, 1 <> 0); SELECT a FROM x WHERE CASE WHEN COALESCE(b, 1) THEN 1 ELSE 0 END; SELECT "x"."a" AS "a" FROM "x" AS "x" WHERE CASE WHEN COALESCE("x"."b" <> 0, 1 <> 0) THEN 1 ELSE 0 END <> 0; -------------------------------------- -- Replace date functions -------------------------------------- DATE('2023-01-01'); CAST('2023-01-01' AS DATE); -- Some dialects only allow dates DATE('2023-01-01 00:00:00'); DATE('2023-01-01 00:00:00'); TIMESTAMP('2023-01-01'); CAST('2023-01-01' AS TIMESTAMP); TIMESTAMP('2023-01-01', '12:00:00'); TIMESTAMP('2023-01-01', '12:00:00'); -------------------------------------- -- Coerce date function args -------------------------------------- '2023-01-01' + INTERVAL '1' DAY; CAST('2023-01-01' AS DATE) + INTERVAL '1' DAY; '2023-01-01' + INTERVAL '1' HOUR; CAST('2023-01-01' AS DATETIME) + INTERVAL '1' HOUR; '2023-01-01 00:00:01' + INTERVAL '1' HOUR; CAST('2023-01-01 00:00:01' AS DATETIME) + INTERVAL '1' HOUR; CAST('2023-01-01' AS DATE) + INTERVAL '1' HOUR; CAST(CAST('2023-01-01' AS DATE) AS DATETIME) + INTERVAL '1' HOUR; SELECT t.d + INTERVAL '1' HOUR FROM temporal AS t; SELECT CAST("t"."d" AS DATETIME) + INTERVAL '1' HOUR AS "_col_0" FROM "temporal" AS "t"; DATE_ADD(CAST("x" AS DATE), 1, 'YEAR'); DATE_ADD(CAST("x" AS DATE), 1, 'YEAR'); DATE_ADD('2023-01-01', 1, 'YEAR'); DATE_ADD(CAST('2023-01-01' AS DATE), 1, 'YEAR'); DATE_ADD('2023-01-01 00:00:00', 1, 'DAY'); DATE_ADD(CAST('2023-01-01 00:00:00' AS DATETIME), 1, 'DAY'); SELECT DATE_ADD(t.d, 1, 'HOUR') FROM temporal AS t; SELECT DATE_ADD(CAST("t"."d" AS DATETIME), 1, 'HOUR') AS "_col_0" FROM "temporal" AS "t"; SELECT DATE_TRUNC('SECOND', t.d) FROM temporal AS t; SELECT DATE_TRUNC('SECOND', CAST("t"."d" AS DATETIME)) AS "_col_0" FROM "temporal" AS "t"; DATE_TRUNC('DAY', '2023-01-01'); DATE_TRUNC('DAY', CAST('2023-01-01' AS DATE)); DATEDIFF('2023-01-01', '2023-01-02', DAY); DATEDIFF(CAST('2023-01-01' AS DATETIME), CAST('2023-01-02' AS DATETIME), DAY); SELECT "t"."d" > '2023-01-01' AS "d" FROM "temporal" AS "t"; SELECT "t"."d" > CAST('2023-01-01' AS DATE) AS "d" FROM "temporal" AS "t"; SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t"; SELECT "t"."d" > CAST('2023-01-01' AS DATETIME) AS "d" FROM "temporal" AS "t"; SELECT "t"."t" > '2023-01-01 00:00:01' AS "t" FROM "temporal" AS "t"; SELECT "t"."t" > CAST('2023-01-01 00:00:01' AS DATETIME) AS "t" FROM "temporal" AS "t"; WITH "t" AS (SELECT CAST("ext"."created_at" AS TIMESTAMP) AS "created_at" FROM "ext" AS "ext") SELECT "t"."created_at" > '2024-10-01 12:05:02' AS "col" FROM "t" AS "t"; WITH "t" AS (SELECT CAST("ext"."created_at" AS TIMESTAMP) AS "created_at" FROM "ext" AS "ext") SELECT "t"."created_at" > CAST('2024-10-01 12:05:02' AS TIMESTAMP) AS "col" FROM "t" AS "t"; # dialect: mysql SELECT `t`.`d` < '2023-01-01 00:00:01' AS `col` FROM `temporal` AS `t`; SELECT CAST(`t`.`d` AS DATETIME) < CAST('2023-01-01 00:00:01' AS DATETIME) AS `col` FROM `temporal` AS `t`; # dialect: mysql SELECT CAST(`t`.`some_col` AS DATE) < CAST(`t`.`other_col` AS CHAR) AS `col` FROM `other_table` AS `t`; SELECT CAST(CAST(`t`.`some_col` AS DATE) AS DATETIME) < CAST(CAST(`t`.`other_col` AS CHAR) AS DATETIME) AS `col` FROM `other_table` AS `t`; -------------------------------------- -- Remove redundant casts -------------------------------------- CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(8, 4)) AS "x"; CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(8, 4)) AS "x"; CAST(CAST("foo" AS DECIMAL(4, 2)) AS DECIMAL(4, 2)) AS "x"; CAST("foo" AS DECIMAL(4, 2)) AS "x"; CAST(CAST('2023-01-01' AS DATE) AS DATE); CAST('2023-01-01' AS DATE); CAST(DATE_TRUNC('YEAR', CAST('2023-01-01' AS DATE)) AS DATE); DATE_TRUNC('YEAR', CAST('2023-01-01' AS DATE)); DATE(DATE_TRUNC('YEAR', CAST("x" AS DATE))); DATE_TRUNC('YEAR', CAST("x" AS DATE)); ================================================ FILE: tests/fixtures/optimizer/eliminate_ctes.sql ================================================ # title: CTE WITH q AS ( SELECT a FROM x ) SELECT a FROM x; SELECT a FROM x; # title: Nested CTE SELECT a FROM ( WITH q AS ( SELECT a FROM x ) SELECT a FROM x ); SELECT a FROM ( SELECT a FROM x ); # title: Chained CTE WITH q AS ( SELECT a FROM x ), r AS ( SELECT a FROM q ) SELECT a FROM x; SELECT a FROM x; # title: CTE reference in subquery where alias matches outer table name WITH q AS ( SELECT a FROM y ) SELECT a FROM x AS q WHERE a IN ( SELECT a FROM q ); WITH q AS ( SELECT a FROM y ) SELECT a FROM x AS q WHERE a IN ( SELECT a FROM q ); # title: CTE reference in subquery where alias matches outer table name and outer alias is also CTE WITH q AS ( SELECT a FROM y ), q2 AS ( SELECT a FROM y ) SELECT a FROM q2 AS q WHERE a IN ( SELECT a FROM q ); WITH q AS ( SELECT a FROM y ), q2 AS ( SELECT a FROM y ) SELECT a FROM q2 AS q WHERE a IN ( SELECT a FROM q ); # Title: Do not remove CTE if it is an RHS of a SEMI/ANTI join WITH t1 AS ( SELECT 1 AS foo ), t2 AS ( SELECT 1 AS foo ) SELECT * FROM t1 LEFT ANTI JOIN t2 ON t1.foo = t2.foo; WITH t1 AS ( SELECT 1 AS foo ), t2 AS ( SELECT 1 AS foo ) SELECT * FROM t1 LEFT ANTI JOIN t2 ON t1.foo = t2.foo ================================================ FILE: tests/fixtures/optimizer/eliminate_joins.sql ================================================ # title: Remove left join on distinct derived table SELECT x.a FROM x LEFT JOIN ( SELECT DISTINCT y.b FROM y ) AS y ON x.b = y.b; SELECT x.a FROM x; # title: Remove left join on grouped derived table SELECT x.a FROM x LEFT JOIN ( SELECT y.b, SUM(y.c) FROM y GROUP BY y.b ) AS y ON x.b = y.b; SELECT x.a FROM x; # title: Remove left join on aggregate derived table SELECT x.a FROM x LEFT JOIN ( SELECT SUM(y.b) AS b FROM y ) AS y ON x.b = y.b; SELECT x.a FROM x; # title: Noop - not all distinct columns in condition SELECT x.a FROM x LEFT JOIN ( SELECT DISTINCT y.b, y.c FROM y ) AS y ON x.b = y.b; SELECT x.a FROM x LEFT JOIN ( SELECT DISTINCT y.b, y.c FROM y ) AS y ON x.b = y.b; # title: Noop - not all grouped columns in condition SELECT x.a FROM x LEFT JOIN ( SELECT y.b, y.c FROM y GROUP BY y.b, y.c ) AS y ON x.b = y.b; SELECT x.a FROM x LEFT JOIN ( SELECT y.b, y.c FROM y GROUP BY y.b, y.c ) AS y ON x.b = y.b; # title: Noop - not left join SELECT x.a FROM x JOIN ( SELECT DISTINCT y.b FROM y ) AS y ON x.b = y.b; SELECT x.a FROM x JOIN ( SELECT DISTINCT y.b FROM y ) AS y ON x.b = y.b; # title: Noop - unqualified columns SELECT a FROM x LEFT JOIN ( SELECT DISTINCT y.b FROM y ) AS y ON x.b = y.b; SELECT a FROM x LEFT JOIN ( SELECT DISTINCT y.b FROM y ) AS y ON x.b = y.b; # title: Noop - cross join SELECT a FROM x CROSS JOIN ( SELECT DISTINCT y.b FROM y ) AS y; SELECT a FROM x CROSS JOIN ( SELECT DISTINCT y.b FROM y ) AS y; # title: Noop - column is used SELECT x.a, y.b FROM x LEFT JOIN ( SELECT DISTINCT y.b FROM y ) AS y ON x.b = y.b; SELECT x.a, y.b FROM x LEFT JOIN ( SELECT DISTINCT y.b FROM y ) AS y ON x.b = y.b; # title: Multiple group by columns SELECT x.a FROM x LEFT JOIN ( SELECT y.b AS b, y.c + 1 AS d, COUNT(1) FROM y GROUP BY y.b, y.c + 1 ) AS y ON x.b = y.b AND 1 = y.d; SELECT x.a FROM x; # title: Chained left joins SELECT x.a FROM x LEFT JOIN ( SELECT y.b AS b FROM y GROUP BY y.b ) AS y ON x.b = y.b LEFT JOIN ( SELECT y.b AS c FROM y GROUP BY y.b ) AS z ON y.b = z.c; SELECT x.a FROM x; # title: CTE WITH z AS ( SELECT DISTINCT y.b FROM y ) SELECT x.a FROM x LEFT JOIN z ON x.b = z.b; WITH z AS ( SELECT DISTINCT y.b FROM y ) SELECT x.a FROM x; # title: Noop - Not all grouped expressions are in outputs SELECT x.a FROM x LEFT JOIN ( SELECT y.b FROM y GROUP BY y.b, y.c ) AS y ON x.b = y.b; SELECT x.a FROM x LEFT JOIN ( SELECT y.b FROM y GROUP BY y.b, y.c ) AS y ON x.b = y.b; # title: Cross join on aggregate derived table SELECT x.a FROM x CROSS JOIN ( SELECT SUM(y.b) AS b FROM y ) AS y; SELECT x.a FROM x; # title: Cross join on derived table with LIMIT 1 SELECT x.a FROM x CROSS JOIN ( SELECT y.b AS b FROM y LIMIT 1 ) AS y; SELECT x.a FROM x; # title: Cross join on derived table with no FROM clause SELECT x.a FROM x CROSS JOIN ( SELECT 1 AS b, 2 AS c ) AS y; SELECT x.a FROM x; # title: Noop - cross join on non-aggregate subquery SELECT x.a FROM x CROSS JOIN ( SELECT y.b FROM y ) AS y; SELECT x.a FROM x CROSS JOIN ( SELECT y.b FROM y ) AS y; # title: Do not remove left anti join SELECT x.b FROM x LEFT ANTI JOIN ( SELECT 1 AS b ) AS sub ON x.b = sub.b; SELECT x.b FROM x LEFT ANTI JOIN ( SELECT 1 AS b ) AS sub ON x.b = sub.b; ================================================ FILE: tests/fixtures/optimizer/eliminate_subqueries.sql ================================================ -- No derived tables SELECT * FROM x; SELECT * FROM x; -- Unaliased derived tables SELECT a FROM (SELECT b FROM (SELECT c FROM x)); WITH cte AS (SELECT c FROM x), cte_2 AS (SELECT b FROM cte AS cte) SELECT a FROM cte_2 AS cte_2; -- Joined derived table inside nested derived table SELECT b FROM (SELECT b FROM (SELECT b FROM x JOIN (SELECT b FROM y) AS y ON x.b = y.b)); WITH y_2 AS (SELECT b FROM y), cte AS (SELECT b FROM x JOIN y_2 AS y ON x.b = y.b), cte_2 AS (SELECT b FROM cte AS cte) SELECT b FROM cte_2 AS cte_2; -- Aliased derived tables SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS z; WITH y AS (SELECT c FROM x), z AS (SELECT b FROM y AS y) SELECT a FROM z AS z; -- Existing CTEs WITH q AS (SELECT c FROM x) SELECT a FROM (SELECT b FROM q AS y) AS z; WITH q AS (SELECT c FROM x), z AS (SELECT b FROM q AS y) SELECT a FROM z AS z; -- Derived table inside CTE WITH x AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM x; WITH y AS (SELECT a FROM x), x AS (SELECT a FROM y AS y) SELECT a FROM x; -- Name conflicts with existing outer derived table SELECT a FROM (SELECT b FROM (SELECT c FROM x) AS y) AS y; WITH y AS (SELECT c FROM x), y_2 AS (SELECT b FROM y AS y) SELECT a FROM y_2 AS y; -- Name conflicts with outer join SELECT a, b FROM (SELECT c FROM (SELECT d FROM x) AS x) AS y JOIN x ON x.a = y.a; WITH x_2 AS (SELECT d FROM x), y AS (SELECT c FROM x_2 AS x) SELECT a, b FROM y AS y JOIN x ON x.a = y.a; -- Name conflicts with table name that is selected in another branch SELECT * FROM (SELECT * FROM (SELECT a FROM x) AS x) AS y JOIN (SELECT * FROM x) AS z ON x.a = y.a; WITH x_2 AS (SELECT a FROM x), y AS (SELECT * FROM x_2 AS x), z AS (SELECT * FROM x) SELECT * FROM y AS y JOIN z AS z ON x.a = y.a; -- Name conflicts with table alias SELECT a FROM (SELECT a FROM (SELECT a FROM x) AS y) AS z CROSS JOIN q AS y; WITH y AS (SELECT a FROM x), z AS (SELECT a FROM y AS y) SELECT a FROM z AS z CROSS JOIN q AS y; -- Name conflicts with existing CTE WITH y AS (SELECT a FROM (SELECT a FROM x) AS y) SELECT a FROM y; WITH y_2 AS (SELECT a FROM x), y AS (SELECT a FROM y_2 AS y) SELECT a FROM y; -- Union of selects with derived tables (SELECT a FROM (SELECT b FROM x)) UNION (SELECT a FROM (SELECT b FROM y)); WITH cte AS (SELECT b FROM x), cte_2 AS (SELECT b FROM y) (SELECT a FROM cte AS cte) UNION (SELECT a FROM cte_2 AS cte_2); -- Subquery SELECT a FROM x WHERE b = (SELECT y.c FROM y); SELECT a FROM x WHERE b = (SELECT y.c FROM y); -- Correlated subquery SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a); SELECT a FROM x WHERE b = (SELECT c FROM y WHERE y.a = x.a); -- Duplicate CTE SELECT a FROM (SELECT b FROM x) AS y CROSS JOIN (SELECT b FROM x) AS z; WITH y AS (SELECT b FROM x) SELECT a FROM y AS y CROSS JOIN y AS z; -- Doubly duplicate CTE SELECT * FROM (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS z JOIN (SELECT * FROM x JOIN (SELECT * FROM x) AS y) AS q; WITH y AS (SELECT * FROM x), z AS (SELECT * FROM x, y AS y) SELECT * FROM z AS z, z AS q; -- Another duplicate... SELECT x.id FROM (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS x JOIN (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) AS y ON x.id = y.id; WITH x_2 AS (SELECT * FROM x AS x JOIN y AS y ON x.id = y.id) SELECT x.id FROM x_2 AS x JOIN x_2 AS y ON x.id = y.id; -- Root subquery (SELECT * FROM (SELECT * FROM x)) LIMIT 1; (WITH cte AS (SELECT * FROM x) SELECT * FROM cte AS cte) LIMIT 1; -- Existing duplicate CTE WITH y AS (SELECT a FROM x) SELECT a FROM (SELECT a FROM x) AS y CROSS JOIN y AS z; WITH y AS (SELECT a FROM x) SELECT a FROM y AS y CROSS JOIN y AS z; -- Nested CTE WITH cte1 AS (SELECT a FROM x) SELECT a FROM (WITH cte2 AS (SELECT a FROM cte1) SELECT a FROM cte2); WITH cte1 AS (SELECT a FROM x), cte2 AS (SELECT a FROM cte1), cte AS (SELECT a FROM cte2 AS cte2) SELECT a FROM cte AS cte; -- Nested CTE inside CTE WITH cte1 AS (WITH cte2 AS (SELECT a FROM x) SELECT t.a FROM cte2 AS t) SELECT a FROM cte1; WITH cte2 AS (SELECT a FROM x), cte1 AS (SELECT t.a FROM cte2 AS t) SELECT a FROM cte1; -- Duplicate CTE nested in CTE WITH cte1 AS (SELECT a FROM x), cte2 AS (WITH cte3 AS (SELECT a FROM x) SELECT a FROM cte3) SELECT a FROM cte2; WITH cte1 AS (SELECT a FROM x), cte2 AS (SELECT a FROM cte1 AS cte3) SELECT a FROM cte2; -- Wrapped subquery joined with table SELECT * FROM ((SELECT c FROM t1) JOIN t2); WITH cte AS (SELECT c FROM t1) SELECT * FROM (cte AS cte, t2); -- Wrapped subquery with redundant parentheses SELECT * FROM (((SELECT * FROM tbl))); WITH cte AS (SELECT * FROM tbl) SELECT * FROM cte AS cte; ================================================ FILE: tests/fixtures/optimizer/isolate_table_selects.sql ================================================ SELECT * FROM x AS x, y AS y2; SELECT * FROM (SELECT * FROM x AS x) AS x, (SELECT * FROM y AS y2) AS y2; SELECT * FROM x AS x WHERE x = 1; SELECT * FROM x AS x WHERE x = 1; SELECT * FROM x AS x CROSS JOIN y AS y; SELECT * FROM (SELECT * FROM x AS x) AS x CROSS JOIN (SELECT * FROM y AS y) AS y; SELECT * FROM (SELECT 1) AS x CROSS JOIN y AS y; SELECT * FROM (SELECT 1) AS x CROSS JOIN (SELECT * FROM y AS y) AS y; SELECT * FROM x AS x JOIN (SELECT * FROM y) AS y; SELECT * FROM (SELECT * FROM x AS x) AS x, (SELECT * FROM y) AS y; WITH y AS (SELECT *) SELECT * FROM x AS x; WITH y AS (SELECT *) SELECT * FROM x AS x; WITH y AS (SELECT * FROM y AS y2 CROSS JOIN x AS z2) SELECT * FROM x AS x CROSS JOIN y as y; WITH y AS (SELECT * FROM (SELECT * FROM y AS y2) AS y2 CROSS JOIN (SELECT * FROM x AS z2) AS z2) SELECT * FROM (SELECT * FROM x AS x) AS x CROSS JOIN y AS y; SELECT * FROM x AS x CROSS JOIN xx AS y; SELECT * FROM (SELECT * FROM x AS x) AS x CROSS JOIN xx AS y; ================================================ FILE: tests/fixtures/optimizer/merge_subqueries.sql ================================================ # title: Simple SELECT a, b FROM (SELECT a, b FROM x); SELECT x.a AS a, x.b AS b FROM x AS x; # title: Wrap addition in a multiplication SELECT c * 2 AS d FROM (SELECT a + b AS c FROM x); SELECT (x.a + x.b) * 2 AS d FROM x AS x; # title: Wrap addition in an addition # note: The "simplify" rule will unwrap this SELECT c + d AS e FROM (SELECT a + b AS c, a AS d FROM x); SELECT (x.a + x.b) + x.a AS e FROM x AS x; # title: Wrap multiplication in an addition # note: The "simplify" rule will unwrap this WITH cte AS (SELECT a * b AS c, a AS d FROM x) SELECT c + d AS e FROM cte; SELECT (x.a * x.b) + x.a AS e FROM x AS x; # title: Don't wrap function SELECT 2 * foo AS bar FROM (SELECT CAST(b AS DOUBLE) AS foo FROM x); SELECT 2 * CAST(x.b AS DOUBLE) AS bar FROM x AS x; # title: Don't wrap a wrapped expression SELECT foo * 2 AS bar FROM (SELECT (1 + 2 + 3) AS foo FROM x); SELECT (1 + 2 + 3) * 2 AS bar FROM x AS x; # title: Inner table alias is merged SELECT a, b FROM (SELECT a, b FROM x AS q) AS r; SELECT q.a AS a, q.b AS b FROM x AS q; # title: Double nesting SELECT a, b FROM (SELECT a, b FROM (SELECT a, b FROM x)); SELECT x.a AS a, x.b AS b FROM x AS x; # title: WHERE clause is merged SELECT a, SUM(b) AS b FROM (SELECT a, b FROM x WHERE a > 1) GROUP BY a; SELECT x.a AS a, SUM(x.b) AS b FROM x AS x WHERE x.a > 1 GROUP BY x.a; # title: Outer query has join SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b; SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1; # title: Leave tables isolated # leave_tables_isolated: true SELECT a, c FROM (SELECT a, b FROM x WHERE a > 1) AS x JOIN y ON x.b = y.b; SELECT x.a AS a, y.c AS c FROM (SELECT x.a AS a, x.b AS b FROM x AS x WHERE x.a > 1) AS x JOIN y AS y ON x.b = y.b; # title: Join on derived table SELECT a, c FROM x JOIN (SELECT b, c FROM y) AS y ON x.b = y.b; SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; # title: Inner query has a join SELECT a, c FROM (SELECT a, c FROM x JOIN y ON x.b = y.b); SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; # title: Inner query has conflicting name in outer query SELECT a, c FROM (SELECT q.a, q.b FROM x AS q) AS x JOIN y AS q ON x.b = q.b; SELECT q_2.a AS a, q.c AS c FROM x AS q_2 JOIN y AS q ON q_2.b = q.b; # title: Inner query has conflicting name in joined source SELECT x.a, q.c FROM (SELECT a, x.b FROM x JOIN y AS q ON x.b = q.b) AS x JOIN y AS q ON x.b = q.b; SELECT x.a AS a, q.c AS c FROM x AS x JOIN y AS q_2 ON x.b = q_2.b JOIN y AS q ON x.b = q.b; # title: Inner query has multiple conflicting names SELECT x.a, q.c, r.c FROM (SELECT q.a, r.b FROM x AS q JOIN y AS r ON q.b = r.b) AS x JOIN y AS q ON x.b = q.b JOIN y AS r ON x.b = r.b ORDER BY x.a, q.c, r.c; SELECT q_2.a AS a, q.c AS c, r.c AS c FROM x AS q_2 JOIN y AS r_2 ON q_2.b = r_2.b JOIN y AS q ON r_2.b = q.b JOIN y AS r ON r_2.b = r.b ORDER BY q_2.a, q.c, r.c; # title: Inner queries have conflicting names with each other SELECT r.b FROM (SELECT b FROM x AS x) AS q JOIN (SELECT b FROM x) AS r ON q.b = r.b; SELECT x_2.b AS b FROM x AS x JOIN x AS x_2 ON x.b = x_2.b; # title: WHERE clause in joined derived table is merged to ON clause SELECT x.a, y.c FROM x JOIN (SELECT b, c FROM y WHERE c > 1) AS y ON x.b = y.b ORDER BY x.a; SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b AND y.c > 1 ORDER BY x.a; # title: Comma JOIN in outer query SELECT x.a, y.c FROM (SELECT a FROM x) AS x, (SELECT c FROM y) AS y; SELECT x.a AS a, y.c AS c FROM x AS x, y AS y; # title: Comma JOIN in inner query SELECT x.a, x.c FROM (SELECT x.a, z.c FROM x, y AS z) AS x; SELECT x.a AS a, z.c AS c FROM x AS x, y AS z; # title: (Regression) Column in ORDER BY SELECT * FROM (SELECT * FROM (SELECT * FROM x)) ORDER BY a LIMIT 1; SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY x.a LIMIT 1; # title: CTE WITH x AS (SELECT a, b FROM main.x) SELECT a, b FROM x; SELECT x.a AS a, x.b AS b FROM main.x AS x; # title: CTE with outer table alias WITH y AS (SELECT a, b FROM x) SELECT a, b FROM y AS z; SELECT x.a AS a, x.b AS b FROM x AS x; # title: Nested CTE WITH x2 AS (SELECT a FROM main.x), x3 AS (SELECT a FROM x2) SELECT a FROM x3; SELECT x.a AS a FROM main.x AS x; # title: CTE WHERE clause is merged WITH x AS (SELECT a, b FROM main.x WHERE a > 1) SELECT a, SUM(b) AS b FROM x GROUP BY a; SELECT x.a AS a, SUM(x.b) AS b FROM main.x AS x WHERE x.a > 1 GROUP BY x.a; # title: CTE Outer query has join WITH x2 AS (SELECT a, b FROM x WHERE a > 1) SELECT a, c FROM x2 AS x JOIN y ON x.b = y.b; SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b WHERE x.a > 1; # title: CTE with inner table alias WITH y AS (SELECT a, b FROM x AS q) SELECT a, b FROM y AS z; SELECT q.a AS a, q.b AS b FROM x AS q; # title: Nested CTE SELECT * FROM (WITH x AS (SELECT a, b FROM main.x) SELECT a, b FROM x); SELECT x.a AS a, x.b AS b FROM main.x AS x; # title: Inner select is an expression SELECT a FROM (SELECT a FROM (SELECT COALESCE(a) AS a FROM x LEFT JOIN y ON x.a = y.b) AS x) AS x; SELECT COALESCE(x.a) AS a FROM x AS x LEFT JOIN y AS y ON x.a = y.b; # title: CTE select is an expression WITH x2 AS (SELECT COALESCE(a) AS a FROM x LEFT JOIN y ON x.a = y.b) SELECT a FROM (SELECT a FROM x2 AS x) AS x; SELECT COALESCE(x.a) AS a FROM x AS x LEFT JOIN y AS y ON x.a = y.b; # title: Full outer join SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x WHERE x.b = 1) AS x FULL OUTER JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x WHERE x.b = 1) AS x FULL OUTER JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y ON x.b = y.b; # title: Full outer join, no predicates SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x) AS x FULL OUTER JOIN (SELECT y.b AS b FROM y AS y) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM x AS x FULL OUTER JOIN y AS y ON x.b = y.b; # title: Left join SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x WHERE x.b = 1) AS x LEFT JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM x AS x LEFT JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y ON x.b = y.b WHERE x.b = 1; # title: Left join, no predicates SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x) AS x LEFT JOIN (SELECT y.b AS b FROM y AS y) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM x AS x LEFT JOIN y AS y ON x.b = y.b; # title: Right join SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x WHERE x.b = 1) AS x RIGHT JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x WHERE x.b = 1) AS x RIGHT JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y ON x.b = y.b; # title: Right join, no predicates SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x) AS x RIGHT JOIN (SELECT y.b AS b FROM y AS y) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM x AS x RIGHT JOIN y AS y ON x.b = y.b; # title: Inner join SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x WHERE x.b = 1) AS x INNER JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM x AS x INNER JOIN y AS y ON x.b = y.b AND y.b = 2 WHERE x.b = 1; # title: Inner join, no predicates SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x) AS x INNER JOIN (SELECT y.b AS b FROM y AS y) AS y ON x.b = y.b; SELECT x.b AS b, y.b AS b2 FROM x AS x INNER JOIN y AS y ON x.b = y.b; # title: Cross join SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x WHERE x.b = 1) AS x CROSS JOIN (SELECT y.b AS b FROM y AS y WHERE y.b = 2) AS y; SELECT x.b AS b, y.b AS b2 FROM x AS x JOIN y AS y ON y.b = 2 WHERE x.b = 1; # title: Cross join, no predicates SELECT x.b AS b, y.b AS b2 FROM (SELECT x.b AS b FROM x AS x) AS x CROSS JOIN (SELECT y.b AS b FROM y AS y) AS y; SELECT x.b AS b, y.b AS b2 FROM x AS x CROSS JOIN y AS y; # title: Broadcast hint # dialect: spark WITH m AS (SELECT x.a, x.b FROM x), n AS (SELECT y.b, y.c FROM y), joined as (SELECT /*+ BROADCAST(k) */ m.a, k.c FROM m JOIN n AS k ON m.b = k.b) SELECT joined.a, joined.c FROM joined; SELECT /*+ BROADCAST(y) */ x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; # title: Broadcast hint multiple tables # dialect: spark WITH m AS (SELECT x.a, x.b FROM x), n AS (SELECT y.b, y.c FROM y), joined as (SELECT /*+ BROADCAST(m, n) */ m.a, n.c FROM m JOIN n ON m.b = n.b) SELECT joined.a, joined.c FROM joined; SELECT /*+ BROADCAST(x, y) */ x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; # title: Multiple Table Hints # dialect: spark WITH m AS (SELECT x.a, x.b FROM x), n AS (SELECT y.b, y.c FROM y), joined as (SELECT /*+ BROADCAST(m), MERGE(m, n) */ m.a, n.c FROM m JOIN n ON m.b = n.b) SELECT joined.a, joined.c FROM joined; SELECT /*+ BROADCAST(x), MERGE(x, y) */ x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; # title: Mix Table and Column Hints # dialect: spark WITH m AS (SELECT x.a, x.b FROM x), n AS (SELECT y.b, y.c FROM y), joined as (SELECT /*+ BROADCAST(m), MERGE(m, n) */ m.a, n.c FROM m JOIN n ON m.b = n.b) SELECT /*+ COALESCE(3) */ joined.a, joined.c FROM joined; SELECT /*+ COALESCE(3), BROADCAST(x), MERGE(x, y) */ x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; # title: Hint Subquery # dialect: spark SELECT subquery.a, subquery.c FROM ( SELECT /*+ BROADCAST(m), MERGE(m, n) */ m.a, n.c FROM (SELECT x.a, x.b FROM x) AS m JOIN (SELECT y.b, y.c FROM y) AS n ON m.b = n.b ) AS subquery; SELECT /*+ BROADCAST(x), MERGE(x, y) */ x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; # title: Subquery Test # dialect: spark SELECT /*+ BROADCAST(x) */ x.a, x.c FROM ( SELECT x.a, x.c FROM ( SELECT x.a, COUNT(1) AS c FROM x GROUP BY x.a ) AS x ) AS x; SELECT /*+ BROADCAST(x) */ x.a AS a, x.c AS c FROM (SELECT x.a AS a, COUNT(1) AS c FROM x AS x GROUP BY x.a) AS x; # title: Test preventing merge of window expressions where clause with t1 as ( SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x ORDER BY x.a, x.b, row_num ) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1; WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x ORDER BY x.a, x.b, row_num) SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 WHERE t1.row_num = 1; # title: Test preventing merge of window expressions join clause with t1 as ( SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x ) SELECT t1.a, t1.b FROM t1 JOIN y ON t1.a = y.c AND t1.row_num = 1; WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 JOIN y AS y ON t1.a = y.c AND t1.row_num = 1; # title: Test preventing merge of window expressions agg function with t1 as ( SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x ) SELECT SUM(t1.row_num) as total_rows FROM t1; WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT SUM(t1.row_num) AS total_rows FROM t1 AS t1; # title: Test prevent merging of window if in group by with t1 as ( SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x ) SELECT t1.row_num AS row_num, SUM(t1.a) AS total FROM t1 GROUP BY t1.row_num ORDER BY t1.row_num; WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t1.row_num AS row_num, SUM(t1.a) AS total FROM t1 AS t1 GROUP BY t1.row_num ORDER BY row_num; # title: Test prevent merging of window if in order by with t1 as ( SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x ) SELECT t1.row_num AS row_num, t1.a AS a FROM t1 ORDER BY t1.row_num, t1.a; WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t1.row_num AS row_num, t1.a AS a FROM t1 AS t1 ORDER BY t1.row_num, t1.a; # title: Test preventing merging of window nested under complex projection if in order by WITH t1 AS ( SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) - 1 AS row_num FROM x ) SELECT t1.row_num AS row_num, t1.a AS a FROM t1 ORDER BY t1.row_num, t1.a; WITH t1 AS (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) - 1 AS row_num FROM x AS x) SELECT t1.row_num AS row_num, t1.a AS a FROM t1 AS t1 ORDER BY t1.row_num, t1.a; # title: Test allow merging of window function with t1 as ( SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x ORDER BY x.a, x.b, row_num ) SELECT t1.a, t1.b, t1.row_num FROM t1; SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x ORDER BY x.a, x.b, row_num; # title: Keep ORDER BY # execute: false WITH t AS (SELECT t1.x AS x, t1.y AS y, t2.a AS a, t2.b AS b FROM t1 AS t1(x, y) CROSS JOIN t2 AS t2(a, b) ORDER BY t2.a) SELECT t.x AS x, t.y AS y, t.a AS a, t.b AS b FROM t AS t; SELECT t1.x AS x, t1.y AS y, t2.a AS a, t2.b AS b FROM t1 AS t1(x, y) CROSS JOIN t2 AS t2(a, b) ORDER BY t2.a; # title: Do not merge window functions, inner table is aliased in outer query with t1 as ( SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x ) SELECT t2.row_num FROM t1 AS t2 WHERE t2.row_num = 2; WITH t1 AS (SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x AS x) SELECT t2.row_num AS row_num FROM t1 AS t2 WHERE t2.row_num = 2; # title: Values Test # dialect: spark WITH t1 AS ( SELECT a1.cola FROM VALUES (1) AS a1(cola) ), t2 AS ( SELECT a2.cola FROM VALUES (1) AS a2(cola) ) SELECT /*+ BROADCAST(t2) */ t1.cola, t2.cola, FROM t1 JOIN t2 ON t1.cola = t2.cola; SELECT /*+ BROADCAST(a2) */ a1.cola AS cola, a2.cola AS cola FROM VALUES (1) AS a1(cola) JOIN VALUES (1) AS a2(cola) ON a1.cola = a2.cola; # title: Nested subquery selects from same table as another subquery WITH i AS ( SELECT x.a AS a FROM x AS x ), j AS ( SELECT x.a, x.b FROM x AS x ), k AS ( SELECT j.a, j.b FROM j AS j ) SELECT i.a, k.b FROM i AS i LEFT JOIN k AS k ON i.a = k.a; SELECT x.a AS a, x_2.b AS b FROM x AS x LEFT JOIN x AS x_2 ON x.a = x_2.a; # title: Outer select joins on inner select join WITH i AS ( SELECT x.a AS a FROM y AS y JOIN x AS x ON y.b = x.b ) SELECT x.a AS a FROM x AS x LEFT JOIN i AS i ON x.a = i.a; WITH i AS (SELECT x.a AS a FROM y AS y JOIN x AS x ON y.b = x.b) SELECT x.a AS a FROM x AS x LEFT JOIN i AS i ON x.a = i.a; # title: Outer scope selects from wrapped table with a join (unknown schema) # execute: false WITH _q_0 AS (SELECT t1.c AS c FROM t1 AS t1) SELECT * FROM (_q_0 AS _q_0 CROSS JOIN t2 AS t2); WITH _q_0 AS (SELECT t1.c AS c FROM t1 AS t1) SELECT * FROM (_q_0 AS _q_0 CROSS JOIN t2 AS t2); # title: Outer scope selects single column from wrapped table with a join WITH _q_0 AS ( SELECT x.a AS a FROM x AS x ), y_2 AS ( SELECT y.b AS b FROM y AS y ) SELECT y.b AS b FROM ( _q_0 AS _q_0 JOIN y_2 AS y ON _q_0.a = y.b ); SELECT y.b AS b FROM (x AS x JOIN y AS y ON x.a = y.b); # title: merge cte into subquery with overlapping alias WITH q AS ( SELECT y.b AS a FROM y AS y ) SELECT q.a AS a FROM x AS q WHERE q.a IN ( SELECT q.a AS a FROM q AS q ); SELECT q.a AS a FROM x AS q WHERE q.a IN (SELECT y.b AS a FROM y AS y); # title: dont merge when inner query has ORDER BY and outer query is UNION WITH q AS ( SELECT x.a AS a FROM x ORDER BY x.a ) SELECT q.a AS a FROM q UNION ALL SELECT 1 AS a; WITH q AS (SELECT x.a AS a FROM x AS x ORDER BY x.a) SELECT q.a AS a FROM q AS q UNION ALL SELECT 1 AS a; # title: Consecutive inner - outer conflicting names WITH tbl AS (select 1 as id) SELECT id FROM ( SELECT OTBL.id FROM ( SELECT OTBL.id FROM ( SELECT OTBL.id FROM tbl AS OTBL LEFT OUTER JOIN tbl AS ITBL ON OTBL.id = ITBL.id ) AS OTBL LEFT OUTER JOIN tbl AS ITBL ON OTBL.id = ITBL.id ) AS OTBL LEFT OUTER JOIN tbl AS ITBL ON OTBL.id = ITBL.id ) AS ITBL; WITH tbl AS (SELECT 1 AS id) SELECT OTBL.id AS id FROM tbl AS OTBL LEFT OUTER JOIN tbl AS ITBL_2 ON OTBL.id = ITBL_2.id LEFT OUTER JOIN tbl AS ITBL_3 ON OTBL.id = ITBL_3.id LEFT OUTER JOIN tbl AS ITBL ON OTBL.id = ITBL.id; # title: Inner query contains subquery with an alias that conflicts with outer query WITH i AS ( SELECT a FROM ( SELECT 1 a ) AS conflict ), j AS ( SELECT 1 AS a ) SELECT i.a, conflict.a FROM i LEFT JOIN j AS conflict ON i.a = conflict.a; WITH j AS (SELECT 1 AS a) SELECT conflict_2.a AS a, conflict.a AS a FROM (SELECT 1 AS a) AS conflict_2 LEFT JOIN j AS conflict ON conflict_2.a = conflict.a; # title: column name is not lost with cte as ( select x.a * x.b as mult from x ) select cte.mult from cte; SELECT x.a * x.b AS mult FROM x AS x; # title: avoid merging subquery with JOIN WITH t0 AS ( SELECT 5 AS id ), t1 AS ( SELECT 1 AS id, 'US' AS cid ), t2 AS ( SELECT 1 AS id, 'US' AS cid ) SELECT t0.id, t3.cid AS cid FROM t0 INNER JOIN ( SELECT t1.id, t2.cid FROM t1 RIGHT JOIN t2 ON t1.cid = t2.cid ) AS t3 ON t0.id = t3.id; WITH t0 AS (SELECT 5 AS id), t1 AS (SELECT 1 AS id, 'US' AS cid), t2 AS (SELECT 1 AS id, 'US' AS cid) SELECT t0.id AS id, t3.cid AS cid FROM t0 AS t0 INNER JOIN (SELECT t1.id AS id, t2.cid AS cid FROM t1 AS t1 RIGHT JOIN t2 AS t2 ON t1.cid = t2.cid) AS t3 ON t0.id = t3.id; # title: Dont replace GROUP and ORDER BY if expression is literal WITH t1 AS (SELECT 1 AS col) SELECT a, SUM(b) AS b FROM (SELECT 6 AS a, col AS b FROM t1) AS t GROUP BY a ORDER BY a; WITH t1 AS (SELECT 1 AS col) SELECT 6 AS a, SUM(t1.col) AS b FROM t1 AS t1 GROUP BY a ORDER BY a; ================================================ FILE: tests/fixtures/optimizer/normalize.sql ================================================ (A OR B) AND (B OR C) AND (E OR F); (A OR B) AND (B OR C) AND (E OR F); (A AND B) OR (B AND C AND D); (A OR C) AND (A OR D) AND B; (A OR B) AND (A OR C) AND (A OR D) AND (B OR C) AND (B OR D) AND B; (A OR C) AND (A OR D) AND B; (A AND E) OR (B AND C) OR (D AND (E OR F)); (A OR B OR D) AND (A OR C OR D) AND (B OR D OR E) AND (B OR E OR F) AND (C OR D OR E) AND (C OR E OR F); (A AND B AND C AND D AND E AND F AND G) OR (H AND I AND J AND K AND L AND M AND N) OR (O AND P AND Q); (A AND B AND C AND D AND E AND F AND G) OR (H AND I AND J AND K AND L AND M AND N) OR (O AND P AND Q); NOT NOT NOT (A OR B); NOT A AND NOT B; A OR B; A OR B; A AND (B AND C); A AND B AND C; A OR (B AND C); (A OR B) AND (A OR C); (A AND B) OR C; (A OR C) AND (B OR C); A OR (B OR (C AND D)); (A OR B OR C) AND (A OR B OR D); A OR ((((B OR C) AND (B OR D)) OR C) AND (((B OR C) AND (B OR D)) OR D)); (A OR B OR C) AND (A OR B OR D); (A AND B) OR (C AND D); (A OR C) AND (A OR D) AND (B OR C) AND (B OR D); (A AND B) OR (C OR (D AND E)); (A OR C OR D) AND (A OR C OR E) AND (B OR C OR D) AND (B OR C OR E); SELECT * FROM x WHERE (A AND B) OR C; SELECT * FROM x WHERE (A OR C) AND (B OR C); dt2 between '2022-01-01 12:00:00' and '2022-12-31' and dt2 >= '2022-05-01 12:00:00' or dt2 = '2021-06-01 12:00:00'; (dt2 <= '2022-12-31' OR dt2 = '2021-06-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-01-01 12:00:00') AND (dt2 = '2021-06-01 12:00:00' OR dt2 >= '2022-05-01 12:00:00'); ================================================ FILE: tests/fixtures/optimizer/normalize_identifiers.sql ================================================ foo; foo; # dialect: snowflake foo + "bar".baz; FOO + "bar".BAZ; SELECT a FROM x; SELECT a FROM x; # dialect: snowflake SELECT A FROM X; SELECT A FROM X; SELECT "A" FROM "X"; SELECT "A" FROM "X"; SELECT a AS A FROM x; SELECT a AS a FROM x; # dialect: snowflake SELECT A AS a FROM X; SELECT A AS A FROM X; SELECT * FROM x; SELECT * FROM x; SELECT A FROM x; SELECT a FROM x; # dialect: snowflake SELECT a FROM X; SELECT A FROM X; SELECT a FROM X; SELECT a FROM x; # dialect: snowflake SELECT A FROM x; SELECT A FROM X; SELECT A AS A FROM (SELECT a AS A FROM x); SELECT a AS a FROM (SELECT a AS a FROM x); SELECT a AS B FROM x ORDER BY B; SELECT a AS b FROM x ORDER BY b; SELECT A FROM x ORDER BY A; SELECT a FROM x ORDER BY a; SELECT A AS B FROM X GROUP BY A HAVING SUM(B) > 0; SELECT a AS b FROM x GROUP BY a HAVING SUM(b) > 0; SELECT A AS B, SUM(B) AS C FROM X GROUP BY A HAVING C > 0; SELECT a AS b, SUM(b) AS c FROM x GROUP BY a HAVING c > 0; SELECT A FROM X UNION SELECT A FROM X; SELECT a FROM x UNION SELECT a FROM x; SELECT A AS A FROM X UNION SELECT A AS A FROM X; SELECT a AS a FROM x UNION SELECT a AS a FROM x; (SELECT A AS A FROM X); (SELECT a AS a FROM x); # dialect: snowflake SELECT a /* sqlglot.meta case_sensitive */, b FROM table /* sqlglot.meta case_sensitive */; SELECT a /* sqlglot.meta case_sensitive */, B FROM table /* sqlglot.meta case_sensitive */; # dialect: redshift SELECT COALESCE(json_val.a /* sqlglot.meta case_sensitive */, json_val.A /* sqlglot.meta case_sensitive */) FROM tbl; SELECT COALESCE(json_val.a /* sqlglot.meta case_sensitive */, json_val.A /* sqlglot.meta case_sensitive */) FROM tbl; SELECT @X; SELECT @X; # dialect: bigquery,normalization_strategy=case_insensitive_uppercase SELECT `foo`, `BaR` FROM baz CROSS JOIN `bla` CROSS JOIN bloo; SELECT `FOO`, `BAR` FROM BAZ CROSS JOIN `BLA` CROSS JOIN BLOO; ================================================ FILE: tests/fixtures/optimizer/optimize_joins.sql ================================================ SELECT * FROM x JOIN y ON y.a = 1 JOIN z ON x.a = z.a AND y.a = z.a; SELECT * FROM x JOIN z ON x.a = z.a AND TRUE JOIN y ON y.a = 1 AND y.a = z.a; SELECT * FROM x JOIN y ON y.a = 1 JOIN z ON x.a = z.a; SELECT * FROM x JOIN y ON y.a = 1 JOIN z ON x.a = z.a; SELECT * FROM x CROSS JOIN y JOIN z ON x.a = z.a AND y.a = z.a; SELECT * FROM x JOIN z ON x.a = z.a AND TRUE JOIN y ON y.a = z.a; SELECT * FROM x LEFT JOIN y ON y.a = 1 JOIN z ON x.a = z.a AND y.a = z.a; SELECT * FROM x LEFT JOIN y ON y.a = 1 JOIN z ON x.a = z.a AND y.a = z.a; SELECT * FROM x INNER JOIN z ON x.id = z.id; SELECT * FROM x JOIN z ON x.id = z.id; SELECT * FROM x LEFT OUTER JOIN z; SELECT * FROM x LEFT JOIN z ON TRUE; SELECT * FROM x CROSS JOIN z; SELECT * FROM x CROSS JOIN z; SELECT * FROM x JOIN z; SELECT * FROM x CROSS JOIN z; SELECT * FROM x FULL JOIN z; SELECT * FROM x FULL JOIN z ON TRUE; SELECT * FROM x NATURAL JOIN z; SELECT * FROM x NATURAL JOIN z ON TRUE; SELECT * FROM x RIGHT JOIN z; SELECT * FROM x RIGHT JOIN z ON TRUE; SELECT * FROM x JOIN z USING (id); SELECT * FROM x JOIN z USING (id); SELECT * FROM x CROSS JOIN z ON TRUE; SELECT * FROM x CROSS JOIN z; SELECT * FROM x LEFT ANTI JOIN y ON x.a = y.a; SELECT * FROM x LEFT ANTI JOIN y ON x.a = y.a; SELECT * FROM x LEFT SEMI JOIN y ON x.a = y.a; SELECT * FROM x LEFT SEMI JOIN y ON x.a = y.a; ================================================ FILE: tests/fixtures/optimizer/optimizer.sql ================================================ # title: lateral # execute: false SELECT a, m FROM z LATERAL VIEW EXPLODE([1, 2]) q AS m; SELECT "z"."a" AS "a", "q"."m" AS "m" FROM "z" AS "z" LATERAL VIEW EXPLODE(ARRAY(1, 2)) q AS "m"; # title: unnest # execute: false SELECT x FROM UNNEST([1, 2]) AS q(x, y); SELECT "q"."x" AS "x" FROM UNNEST(ARRAY(1, 2)) AS "q"("x", "y"); # title: explode_outer # dialect: spark # execute: false CREATE OR REPLACE TEMPORARY VIEW latest_boo AS SELECT TRIM(split(points, ':')[0]) as points_type, TRIM(split(points, ':')[1]) as points_value FROM ( SELECT explode_outer(split(object_pointsText, ',')) as points FROM ( SELECT object_pointstext, FROM boo ) WHERE object_pointstext IS NOT NULL ); CREATE OR REPLACE TEMPORARY VIEW `latest_boo` AS WITH `_1` AS ( SELECT EXPLODE_OUTER(SPLIT(`boo`.`object_pointstext`, ',')) AS `points` FROM `boo` AS `boo` WHERE NOT `boo`.`object_pointstext` IS NULL ) SELECT TRIM(SPLIT(`_1`.`points`, ':')[0]) AS `points_type`, TRIM(SPLIT(`_1`.`points`, ':')[1]) AS `points_value` FROM `_1` AS `_1`; # title: Union in CTE WITH cte AS ( ( SELECT a FROM x ) UNION ALL ( SELECT b AS a FROM y ) ) SELECT * FROM cte; WITH "cte" AS ( ( SELECT "x"."a" AS "a" FROM "x" AS "x" ) UNION ALL ( SELECT "y"."b" AS "a" FROM "y" AS "y" ) ) SELECT "cte"."a" AS "a" FROM "cte" AS "cte"; # title: Chained CTEs WITH cte1 AS ( SELECT a FROM x ), cte2 AS ( SELECT a + 1 AS a FROM cte1 ) SELECT a FROM cte1 UNION ALL SELECT a FROM cte2; WITH "cte1" AS ( SELECT "x"."a" AS "a" FROM "x" AS "x" ) SELECT "cte1"."a" AS "a" FROM "cte1" AS "cte1" UNION ALL SELECT "cte1"."a" + 1 AS "a" FROM "cte1" AS "cte1"; # title: Correlated subquery SELECT a, SUM(b) AS sum_b FROM ( SELECT x.a, y.b FROM x, y WHERE (SELECT max(b) FROM y WHERE x.b = y.b) >= 0 AND x.b = y.b ) d WHERE (TRUE AND TRUE OR 'a' = 'b') AND a > 1 GROUP BY a; WITH "_u_0" AS ( SELECT MAX("y"."b") AS "_col_0", "y"."b" AS "_u_1" FROM "y" AS "y" GROUP BY "y"."b" ) SELECT "x"."a" AS "a", SUM("y"."b") AS "sum_b" FROM "x" AS "x" JOIN "y" AS "y" ON "x"."b" = "y"."b" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "x"."b" WHERE "_u_0"."_col_0" >= 0 AND "x"."a" > 1 GROUP BY "x"."a"; # title: Root subquery (SELECT a FROM x) LIMIT 1; ( SELECT "x"."a" AS "a" FROM "x" AS "x" ) LIMIT 1; # title: Root subquery is union (SELECT b FROM x UNION SELECT b FROM y ORDER BY b) LIMIT 1; ( SELECT "x"."b" AS "b" FROM "x" AS "x" UNION SELECT "y"."b" AS "b" FROM "y" AS "y" ORDER BY "b" ) LIMIT 1; # title: broadcast # dialect: spark SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b; SELECT /*+ BROADCAST(`y`) */ `x`.`b` AS `b` FROM `x` AS `x` JOIN `y` AS `y` ON `x`.`b` = `y`.`b`; # title: aggregate # execute: false SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x; SELECT AGGREGATE(ARRAY("x"."a", "x"."b"), 0, ("x", "acc") -> "x" + "acc" + "x"."a") AS "sum_agg" FROM "x" AS "x"; # title: values SELECT cola, colb FROM (VALUES (1, 'test'), (2, 'test2')) AS tab(cola, colb); SELECT "tab"."cola" AS "cola", "tab"."colb" AS "colb" FROM (VALUES (1, 'test'), (2, 'test2')) AS "tab"("cola", "colb"); # title: spark values # dialect: spark SELECT cola, colb FROM (VALUES (1, 'test'), (2, 'test2')) AS tab(cola, colb); SELECT `tab`.`cola` AS `cola`, `tab`.`colb` AS `colb` FROM VALUES (1, 'test'), (2, 'test2') AS `tab`(`cola`, `colb`); # title: complex CTE dependencies WITH m AS ( SELECT a, b FROM (VALUES (1, 2)) AS a1(a, b) ), n AS ( SELECT a, b FROM m WHERE m.a = 1 ), o AS ( SELECT a, b FROM m WHERE m.a = 2 ) SELECT n.a, n.b, o.b FROM n FULL OUTER JOIN o ON n.a = o.a CROSS JOIN n AS n2 WHERE o.b > 0 AND n.a = n2.a; WITH "m" AS ( SELECT "a1"."a" AS "a", "a1"."b" AS "b" FROM (VALUES (1, 2)) AS "a1"("a", "b") ), "n" AS ( SELECT "m"."a" AS "a", "m"."b" AS "b" FROM "m" AS "m" WHERE "m"."a" = 1 ), "o" AS ( SELECT "m"."a" AS "a", "m"."b" AS "b" FROM "m" AS "m" WHERE "m"."a" = 2 ) SELECT "n"."a" AS "a", "n"."b" AS "b", "o"."b" AS "b" FROM "n" AS "n" FULL JOIN "o" AS "o" ON "n"."a" = "o"."a" JOIN "n" AS "n2" ON "n"."a" = "n2"."a" WHERE "o"."b" > 0; # title: Broadcast hint # dialect: spark WITH m AS ( SELECT x.a, x.b FROM x ), n AS ( SELECT y.b, y.c FROM y ), joined as ( SELECT /*+ BROADCAST(n) */ m.a, n.c FROM m JOIN n ON m.b = n.b ) SELECT joined.a, joined.c FROM joined; SELECT /*+ BROADCAST(`y`) */ `x`.`a` AS `a`, `y`.`c` AS `c` FROM `x` AS `x` JOIN `y` AS `y` ON `x`.`b` = `y`.`b`; # title: Mix Table and Column Hints # dialect: spark WITH m AS ( SELECT x.a, x.b FROM x ), n AS ( SELECT y.b, y.c FROM y ), joined as ( SELECT /*+ BROADCAST(m), MERGE(m, n) */ m.a, n.c FROM m JOIN n ON m.b = n.b ) SELECT /*+ COALESCE(3) */ joined.a, joined.c FROM joined; SELECT /*+ COALESCE(3), BROADCAST(`x`), MERGE(`x`, `y`) */ `x`.`a` AS `a`, `y`.`c` AS `c` FROM `x` AS `x` JOIN `y` AS `y` ON `x`.`b` = `y`.`b`; WITH cte1 AS ( WITH cte2 AS ( SELECT a, b FROM x ) SELECT a1 FROM ( WITH cte3 AS (SELECT 1) SELECT a AS a1, b AS b1 FROM cte2 ) ) SELECT a1 FROM cte1; SELECT "x"."a" AS "a1" FROM "x" AS "x"; # title: recursive cte WITH RECURSIVE cte1 AS ( SELECT * FROM ( SELECT 1 AS a, 2 AS b ) base CROSS JOIN (SELECT 3 c) y UNION ALL SELECT * FROM cte1 WHERE a < 1 ) SELECT * FROM cte1; WITH RECURSIVE "base" AS ( SELECT 1 AS "a", 2 AS "b" ), "y" AS ( SELECT 3 AS "c" ), "cte1" AS ( SELECT "base"."a" AS "a", "base"."b" AS "b", "y"."c" AS "c" FROM "base" AS "base" CROSS JOIN "y" AS "y" UNION ALL SELECT "cte1"."a" AS "a", "cte1"."b" AS "b", "cte1"."c" AS "c" FROM "cte1" AS "cte1" WHERE "cte1"."a" < 1 ) SELECT "cte1"."a" AS "a", "cte1"."b" AS "b", "cte1"."c" AS "c" FROM "cte1" AS "cte1"; # title: right join should not push down to from SELECT x.a, y.b FROM x RIGHT JOIN y ON x.a = y.b WHERE x.b = 1; SELECT "x"."a" AS "a", "y"."b" AS "b" FROM "x" AS "x" RIGHT JOIN "y" AS "y" ON "x"."a" = "y"."b" WHERE "x"."b" = 1; # title: right join can push down to itself SELECT x.a, y.b FROM x RIGHT JOIN y ON x.a = y.b WHERE y.b = 1; WITH "y_2" AS ( SELECT "y"."b" AS "b" FROM "y" AS "y" WHERE "y"."b" = 1 ) SELECT "x"."a" AS "a", "y"."b" AS "b" FROM "x" AS "x" RIGHT JOIN "y_2" AS "y" ON "x"."a" = "y"."b"; # title: lateral column alias reference SELECT x.a + 1 AS c, c + 1 AS d FROM x; SELECT "x"."a" + 1 AS "c", "x"."a" + 2 AS "d" FROM "x" AS "x"; # title: column reference takes priority over lateral column alias reference SELECT x.a + 1 AS b, b + 1 AS c FROM x; SELECT "x"."a" + 1 AS "b", "x"."b" + 1 AS "c" FROM "x" AS "x"; # title: unqualified struct element is selected in the outer query # execute: false WITH "cte" AS ( SELECT FROM_JSON("value", 'STRUCT>>>') AS "struct" FROM "tbl" ) SELECT "struct"."f1"."f2"."f3"."f4" AS "f4" FROM "cte"; SELECT FROM_JSON("tbl"."value", 'STRUCT>>>')."f1"."f2"."f3"."f4" AS "f4" FROM "tbl" AS "tbl"; # title: qualified struct element is selected in the outer query # execute: false WITH "cte" AS ( SELECT FROM_JSON("value", 'STRUCT, STRUCT>') AS "struct" FROM "tbl" ) SELECT "cte"."struct"."f1"."f2" AS "f2", "cte"."struct"."f1"."f3" AS "f3" FROM "cte"; SELECT FROM_JSON("tbl"."value", 'STRUCT, STRUCT>')."f1"."f2" AS "f2", FROM_JSON("tbl"."value", 'STRUCT, STRUCT>')."f1"."f3" AS "f3" FROM "tbl" AS "tbl"; # title: left join doesnt push down predicate to join in merge subqueries # execute: false SELECT main_query.id, main_query.score FROM ( SELECT alias_1.id, score FROM ( SELECT company_table.score AS score, id FROM company_table ) AS alias_1 JOIN ( SELECT id FROM ( SELECT company_table_2.id, CASE WHEN unlocked.company_id IS NULL THEN 0 ELSE 1 END AS is_exported FROM company_table AS company_table_2 LEFT JOIN unlocked AS unlocked ON company_table_2.id = unlocked.company_id ) WHERE NOT id IS NULL AND is_exported = FALSE ) AS alias_2 ON ( alias_1.id = alias_2.id ) ) AS main_query; WITH "alias_2" AS ( SELECT "company_table_2"."id" AS "id" FROM "company_table" AS "company_table_2" LEFT JOIN "unlocked" AS "unlocked" ON "company_table_2"."id" = "unlocked"."company_id" WHERE CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE AND NOT "company_table_2"."id" IS NULL ) SELECT "company_table"."id" AS "id", "company_table"."score" AS "score" FROM "company_table" AS "company_table" JOIN "alias_2" AS "alias_2" ON "alias_2"."id" = "company_table"."id"; # title: db.table alias clash # execute: false select * from db1.tbl, db2.tbl; SELECT * FROM "db1"."tbl" AS "tbl" CROSS JOIN "db2"."tbl" AS "tbl_2"; # execute: false SELECT *, IFF( IFF( uploaded_at >= '2022-06-16', 'workday', 'bamboohr' ) = source_system, 1, 0 ) AS sort_order FROM unioned WHERE ( source_system = 'workday' AND '9999-01-01' >= '2022-06-16' ) OR ( source_system = 'bamboohr' AND '0001-01-01' < '2022-06-16' ) QUALIFY ROW_NUMBER() OVER ( PARTITION BY unique_filter_key ORDER BY sort_order DESC, 1 ) = 1; SELECT *, IFF( "unioned"."source_system" = IFF("unioned"."uploaded_at" >= '2022-06-16', 'workday', 'bamboohr'), 1, 0 ) AS "sort_order" FROM "unioned" AS "unioned" WHERE "unioned"."source_system" = 'bamboohr' OR "unioned"."source_system" = 'workday' QUALIFY ROW_NUMBER() OVER ( PARTITION BY "unioned"."unique_filter_key" ORDER BY "unioned"."sort_order" DESC, 1 ) = 1; # title: pivoted source with explicit selections # execute: false SELECT * FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); SELECT "_1"."a" AS "a", "_1"."x" AS "x", "_1"."y" AS "y", "_1"."z" AS "z" FROM ( SELECT "tb"."a" AS "a", "tb"."b" AS "b", "tb"."c" AS "c" FROM "sc"."tb" AS "tb" ) AS "_0" PIVOT(SUM("_0"."c") FOR "_0"."b" IN ('x', 'y', 'z')) AS "_1"; # title: pivoted source with explicit selections where one of them is excluded & selected at the same time # note: we need to respect the exclude when selecting * from pivoted source and not include the computed column twice # execute: false SELECT * EXCEPT (x), CAST(x AS TEXT) AS x FROM (SELECT a, b, c FROM sc.tb) PIVOT (SUM(c) FOR b IN ('x','y','z')); SELECT "_1"."a" AS "a", "_1"."y" AS "y", "_1"."z" AS "z", CAST("_1"."x" AS TEXT) AS "x" FROM ( SELECT "tb"."a" AS "a", "tb"."b" AS "b", "tb"."c" AS "c" FROM "sc"."tb" AS "tb" ) AS "_0" PIVOT(SUM("_0"."c") FOR "_0"."b" IN ('x', 'y', 'z')) AS "_1"; # title: pivoted source with implicit selections # execute: false SELECT * FROM (SELECT * FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT "_1"."g" AS "g", "_1"."x" AS "x", "_1"."y" AS "y" FROM ( SELECT "u"."f" AS "f", "u"."g" AS "g", "u"."h" AS "h" FROM "u" AS "u" ) AS "_0" PIVOT(SUM("_0"."f") FOR "_0"."h" IN ('x', 'y')) AS "_1"; # title: selecting explicit qualified columns from pivoted source with explicit selections # execute: false SELECT piv.x, piv.y FROM (SELECT f, h FROM u) PIVOT (SUM(f) FOR h IN ('x', 'y')) AS piv; SELECT "piv"."x" AS "x", "piv"."y" AS "y" FROM ( SELECT "u"."f" AS "f", "u"."h" AS "h" FROM "u" AS "u" ) AS "_0" PIVOT(SUM("_0"."f") FOR "_0"."h" IN ('x', 'y')) AS "piv"; # title: selecting explicit unqualified columns from pivoted source with implicit selections # execute: false SELECT x, y FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT "_0"."x" AS "x", "_0"."y" AS "y" FROM "u" AS "u" PIVOT(SUM("u"."f") FOR "u"."h" IN ('x', 'y')) AS "_0"; # title: selecting all columns from a pivoted CTE source, using alias for the aggregation and generating bigquery # execute: false # dialect: bigquery WITH u_cte(f, g, h) AS (SELECT * FROM u) SELECT * FROM u_cte PIVOT(SUM(f) AS sum FOR h IN ('x', 'y')); WITH `u_cte` AS ( SELECT `u`.`f` AS `f`, `u`.`g` AS `g`, `u`.`h` AS `h` FROM `u` AS `u` ) SELECT `_0`.`g` AS `g`, `_0`.`sum_x` AS `sum_x`, `_0`.`sum_y` AS `sum_y` FROM `u_cte` AS `u_cte` PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_0`; # title: selecting all columns from a pivoted source and generating snowflake # execute: false # dialect: snowflake SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT "_0"."G" AS "G", "_0"."'x'" AS "'x'", "_0"."'y'" AS "'y'" FROM "U" AS "U" PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_0"; # title: selecting all columns from a pivoted source and generating spark # note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`) # execute: false # dialect: spark SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y')); SELECT `_0`.`g` AS `g`, `_0`.`x` AS `x`, `_0`.`y` AS `y` FROM ( SELECT * FROM `u` AS `u` PIVOT(SUM(`u`.`f`) FOR `h` IN ('x', 'y')) ) AS `_0`; # title: selecting all columns from a pivoted source, pivot has column aliases # execute: false # dialect: snowflake WITH source AS ( SELECT id, key, value, timestamp_1, timestamp_2 FROM DB_NAME.SCHEMA_NAME.TABLE_NAME ), enriched AS ( SELECT * FROM source PIVOT(MAX(value) FOR key IN ('a', 'b', 'c')) AS final (id, timestamp_1, timestamp_2, col_1, col_2, col_3) ) SELECT id, timestamp_1 FROM enriched; WITH "SOURCE" AS ( SELECT "TABLE_NAME"."ID" AS "ID", "TABLE_NAME"."KEY" AS "KEY", "TABLE_NAME"."VALUE" AS "VALUE", "TABLE_NAME"."TIMESTAMP_1" AS "TIMESTAMP_1", "TABLE_NAME"."TIMESTAMP_2" AS "TIMESTAMP_2" FROM "DB_NAME"."SCHEMA_NAME"."TABLE_NAME" AS "TABLE_NAME" ) SELECT "FINAL"."ID" AS "ID", "FINAL"."TIMESTAMP_1" AS "TIMESTAMP_1" FROM "SOURCE" AS "SOURCE" PIVOT(MAX("SOURCE"."VALUE") FOR "SOURCE"."KEY" IN ('a', 'b', 'c')) AS "FINAL"("ID", "TIMESTAMP_1", "TIMESTAMP_2", "COL_1", "COL_2", "COL_3"); # title: unpivoted table source with a single value column, unpivot columns can't be qualified # execute: false # dialect: snowflake SELECT * FROM m_sales AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid; SELECT "M_SALES"."EMPID" AS "EMPID", "M_SALES"."DEPT" AS "DEPT", "M_SALES"."MONTH" AS "MONTH", "M_SALES"."SALES" AS "SALES" FROM "M_SALES" AS "M_SALES"("EMPID", "DEPT", "JAN", "FEB") UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "M_SALES" ORDER BY "M_SALES"."EMPID"; # title: unpivoted table source, unpivot has column aliases # execute: false SELECT * FROM (SELECT * FROM m_sales) AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) AS unpiv(a, b, c, d); SELECT "unpiv"."a" AS "a", "unpiv"."b" AS "b", "unpiv"."c" AS "c", "unpiv"."d" AS "d" FROM ( SELECT "m_sales"."empid" AS "empid", "m_sales"."dept" AS "dept", "m_sales"."jan" AS "jan", "m_sales"."feb" AS "feb" FROM "m_sales" AS "m_sales" ) AS "m_sales" UNPIVOT("sales" FOR "month" IN ("m_sales"."jan", "m_sales"."feb")) AS "unpiv"("a", "b", "c", "d"); # title: unpivoted derived table source with a single value column # execute: false # dialect: snowflake SELECT * FROM (SELECT * FROM m_sales) AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid; SELECT "_0"."EMPID" AS "EMPID", "_0"."DEPT" AS "DEPT", "_0"."MONTH" AS "MONTH", "_0"."SALES" AS "SALES" FROM ( SELECT "M_SALES"."EMPID" AS "EMPID", "M_SALES"."DEPT" AS "DEPT", "M_SALES"."JAN" AS "JAN", "M_SALES"."FEB" AS "FEB" FROM "M_SALES" AS "M_SALES" ) AS "M_SALES" UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_0" ORDER BY "_0"."EMPID"; # title: unpivoted table source with a single value column, unpivot columns can be qualified # execute: false # dialect: bigquery # note: the named columns aren not supported by BQ but we add them here to avoid defining a schema SELECT * FROM produce AS produce(product, q1, q2, q3, q4) UNPIVOT(sales FOR quarter IN (q1, q2, q3, q4)); SELECT `produce`.`product` AS `product`, `produce`.`quarter` AS `quarter`, `produce`.`sales` AS `sales` FROM `produce` AS `produce` UNPIVOT(`sales` FOR `quarter` IN (`produce`.`q1`, `produce`.`q2`, `produce`.`q3`, `produce`.`q4`)) AS `produce`; # title: unpivoted table source with multiple value columns # execute: false # dialect: bigquery SELECT * FROM produce AS produce(product, q1, q2, q3, q4) UNPIVOT((first_half_sales, second_half_sales) FOR semesters IN ((Q1, Q2) AS 'semester_1', (Q3, Q4) AS 'semester_2')); SELECT `produce`.`product` AS `product`, `produce`.`semesters` AS `semesters`, `produce`.`first_half_sales` AS `first_half_sales`, `produce`.`second_half_sales` AS `second_half_sales` FROM `produce` AS `produce` UNPIVOT((`first_half_sales`, `second_half_sales`) FOR `semesters` IN ( (`produce`.`q1`, `produce`.`q2`) AS 'semester_1', (`produce`.`q3`, `produce`.`q4`) AS 'semester_2' ) ) AS `produce`; # title: quoting is preserved # dialect: snowflake with cte1("id", foo) as (select 1, 2) select "id" from cte1; WITH "CTE1" AS ( SELECT 1 AS "id" ) SELECT "CTE1"."id" AS "id" FROM "CTE1" AS "CTE1"; # title: ensures proper quoting happens after all optimizations # execute: false SELECT "foO".x FROM (SELECT 1 AS x) AS "foO"; WITH "foO" AS ( SELECT 1 AS "x" ) SELECT "foO"."x" AS "x" FROM "foO" AS "foO"; # title: lateral subquery # execute: false # dialect: postgres SELECT u.user_id, l.log_date FROM users u CROSS JOIN LATERAL ( SELECT l.log_date FROM logs l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date DESC NULLS LAST LIMIT 1 ) l; SELECT "u"."user_id" AS "user_id", "l"."log_date" AS "log_date" FROM "users" AS "u" CROSS JOIN LATERAL ( SELECT "l"."log_date" AS "log_date" FROM "logs" AS "l" WHERE "l"."log_date" <= 100 AND "l"."user_id" = "u"."user_id" ORDER BY "l"."log_date" DESC NULLS LAST LIMIT 1 ) AS "l"; # title: bigquery column identifiers are case-insensitive # execute: false # dialect: bigquery WITH cte AS ( SELECT refresh_date AS `reFREsh_date`, term AS `TeRm`, `rank` FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` ) SELECT refresh_date AS `Day`, term AS Top_Term, rank, FROM cte WHERE rank = 1 AND refresh_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 2 WEEK) GROUP BY `dAy`, `top_term`, rank ORDER BY `DaY` DESC; SELECT `top_terms`.`refresh_date` AS `day`, `top_terms`.`term` AS `top_term`, `top_terms`.`rank` AS `rank` FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` AS `top_terms` WHERE `top_terms`.`rank` = 1 AND `top_terms`.`refresh_date` >= DATE_SUB(CURRENT_DATE, INTERVAL '2' WEEK) GROUP BY `day`, `top_term`, `rank` ORDER BY `day` DESC; # title: group by keys cannot be simplified SELECT a + 1 + 1 + 1 + 1 AS b, 2 + 1 AS c FROM x GROUP BY a + 1 + 1 HAVING a + 1 + 1 + 1 + 1 > 1; SELECT "x"."a" + 1 + 1 + 1 + 1 AS "b", 3 AS "c" FROM "x" AS "x" GROUP BY "x"."a" + 1 + 1 HAVING "x"."a" + 1 + 1 + 1 + 1 > 1; # title: replace alias with mult expression without wrapping it WITH cte AS (SELECT a * b AS c, a AS d, b as e FROM x) SELECT c + d - (c - e) AS f FROM cte; SELECT "x"."a" * "x"."b" + "x"."a" - ( "x"."a" * "x"."b" - "x"."b" ) AS "f" FROM "x" AS "x"; # title: wrapped table without alias # execute: false SELECT * FROM (tbl); SELECT * FROM ( "tbl" AS "tbl" ); # title: wrapped table with alias # execute: false SELECT * FROM (tbl AS tbl); SELECT * FROM ( "tbl" AS "tbl" ); # title: wrapped join of tables without alias SELECT a, c FROM (x LEFT JOIN y ON a = c); SELECT "x"."a" AS "a", "y"."c" AS "c" FROM ( "x" AS "x" LEFT JOIN "y" AS "y" ON "x"."a" = "y"."c" ); # title: wrapped join of tables with alias # execute: false SELECT a, c FROM (x LEFT JOIN y ON a = c) AS t; SELECT "x"."a" AS "a", "y"."c" AS "c" FROM "x" AS "x" LEFT JOIN "y" AS "y" ON "x"."a" = "y"."c"; # title: chained wrapped joins without aliases # execute: false SELECT * FROM ((a CROSS JOIN ((b CROSS JOIN c) CROSS JOIN (d CROSS JOIN e)))); SELECT * FROM ( ( "a" AS "a" CROSS JOIN ( ( "b" AS "b" CROSS JOIN "c" AS "c" ) CROSS JOIN ( "d" AS "d" CROSS JOIN "e" AS "e" ) ) ) ); # title: chained wrapped joins with aliases # execute: false SELECT * FROM ((a AS foo CROSS JOIN b AS bar) CROSS JOIN c AS baz); SELECT * FROM ( ( "a" AS "foo" CROSS JOIN "b" AS "bar" ) CROSS JOIN "c" AS "baz" ); # title: table joined with join construct SELECT x.a, y.b, z.c FROM x LEFT JOIN (y INNER JOIN z ON y.c = z.c) ON x.b = y.b; SELECT "x"."a" AS "a", "y"."b" AS "b", "z"."c" AS "c" FROM "x" AS "x" LEFT JOIN ( "y" AS "y" JOIN "z" AS "z" ON "y"."c" = "z"."c" ) ON "x"."b" = "y"."b"; # title: select * from table joined with join construct # execute: false SELECT * FROM x LEFT JOIN (y INNER JOIN z ON y.c = z.c) ON x.b = y.b; SELECT "y"."b" AS "b", "y"."c" AS "c", "z"."a" AS "a", "z"."c" AS "c", "x"."a" AS "a", "x"."b" AS "b" FROM "x" AS "x" LEFT JOIN ( "y" AS "y" JOIN "z" AS "z" ON "y"."c" = "z"."c" ) ON "x"."b" = "y"."b"; # title: select * from wrapped subquery # execute: false SELECT * FROM ((SELECT * FROM tbl)); WITH "_0" AS ( SELECT * FROM "tbl" AS "tbl" ) SELECT * FROM ( "_0" AS "_0" ); # title: select * from wrapped subquery joined to a table (known schema) SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c); SELECT "x"."a" AS "a", "x"."b" AS "b", "y"."b" AS "b", "y"."c" AS "c" FROM ( "x" AS "x" JOIN "y" AS "y" ON "x"."a" = "y"."c" ); # title: select * from wrapped subquery joined to a table (unknown schema) # execute: false SELECT * FROM ((SELECT c FROM t1) JOIN t2); WITH "_0" AS ( SELECT "t1"."c" AS "c" FROM "t1" AS "t1" ) SELECT * FROM ( "_0" AS "_0" CROSS JOIN "t2" AS "t2" ); # title: select specific columns from wrapped subquery joined to a table SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b); SELECT "y"."b" AS "b" FROM ( "x" AS "x" JOIN "y" AS "y" ON "x"."a" = "y"."b" ); # title: select * from wrapped join of subqueries (unknown schema) # execute: false SELECT * FROM ((SELECT * FROM t1) JOIN (SELECT * FROM t2)); WITH "_0" AS ( SELECT * FROM "t1" AS "t1" ), "_1" AS ( SELECT * FROM "t2" AS "t2" ) SELECT * FROM ( "_0" AS "_0" CROSS JOIN "_1" AS "_1" ); # title: select * from wrapped join of subqueries (known schema) SELECT * FROM ((SELECT * FROM x) INNER JOIN (SELECT * FROM y) ON a = c); SELECT "x"."a" AS "a", "x"."b" AS "b", "y"."b" AS "b", "y"."c" AS "c" FROM ( "x" AS "x" JOIN "y" AS "y" ON "x"."a" = "y"."c" ); # title: replace scalar subquery, wrap resulting column in a MAX SELECT a, SUM(c) / (SELECT SUM(c) FROM y) * 100 AS foo FROM y INNER JOIN x ON y.b = x.b GROUP BY a; WITH "_u_0" AS ( SELECT SUM("y"."c") AS "_col_0" FROM "y" AS "y" ) SELECT "x"."a" AS "a", SUM("y"."c") / MAX("_u_0"."_col_0") * 100 AS "foo" FROM "y" AS "y" CROSS JOIN "_u_0" AS "_u_0" JOIN "x" AS "x" ON "x"."b" = "y"."b" GROUP BY "x"."a"; # title: select * from a cte, which had one of its two columns aliased WITH cte(x, y) AS (SELECT 1, 2) SELECT * FROM cte AS cte(a); WITH "cte" AS ( SELECT 1 AS "x", 2 AS "y" ) SELECT "cte"."a" AS "a", "cte"."y" AS "y" FROM "cte" AS "cte"("a"); # title: select single column from a cte using its alias WITH cte(x) AS (SELECT 1) SELECT a FROM cte AS cte(a); WITH "cte" AS ( SELECT 1 AS "x" ) SELECT "cte"."a" AS "a" FROM "cte" AS "cte"("a"); # title: joined ctes with a "using" clause, one of which has had its column aliased WITH m(a) AS (SELECT 1), n(b) AS (SELECT 1) SELECT * FROM m JOIN n AS foo(a) USING (a); WITH "m" AS ( SELECT 1 AS "a" ), "n" AS ( SELECT 1 AS "b" ) SELECT COALESCE("m"."a", "foo"."a") AS "a" FROM "m" AS "m" JOIN "n" AS "foo"("a") ON "foo"."a" = "m"."a"; # title: reduction of string concatenation that uses CONCAT(..), || and + # execute: false SELECT CONCAT('a', 'b') || CONCAT(CONCAT('c', 'd'), CONCAT('e', 'f')) + ('g' || 'h' || 'i'); SELECT 'abcdefghi' AS "_col_0"; # title: complex query with derived tables and redundant parentheses # execute: false # dialect: snowflake SELECT ("SUBQUERY_0"."KEY") AS "SUBQUERY_1_COL_0" FROM ( SELECT * FROM ((( SELECT * FROM ( SELECT event_name AS key, insert_ts FROM ( SELECT insert_ts, event_name FROM sales WHERE insert_ts > '2023-08-07 21:03:35.590 -0700' ) ) ))) AS "SF_CONNECTOR_QUERY_ALIAS" ) AS "SUBQUERY_0"; SELECT "SALES"."EVENT_NAME" AS "SUBQUERY_1_COL_0" FROM "SALES" AS "SALES" WHERE "SALES"."INSERT_TS" > '2023-08-07 21:03:35.590 -0700'; # title: using join without select * # execute: false with alias1 as (select * from table1), alias2 as (select * from table2), alias3 as ( select cid, min(od) as m_od, count(odi) as c_od, from alias2 group by 1 ) select alias1.cid, alias3.m_od, coalesce(alias3.c_od, 0) as c_od, from alias1 left join alias3 using (cid); WITH "alias3" AS ( SELECT "table2"."cid" AS "cid", MIN("table2"."od") AS "m_od", COUNT("table2"."odi") AS "c_od" FROM "table2" AS "table2" GROUP BY "table2"."cid" ) SELECT "table1"."cid" AS "cid", "alias3"."m_od" AS "m_od", COALESCE("alias3"."c_od", 0) AS "c_od" FROM "table1" AS "table1" LEFT JOIN "alias3" AS "alias3" ON "alias3"."cid" = "table1"."cid"; # title: CTE with EXPLODE cannot be merged # dialect: spark # execute: false SELECT Name, FruitStruct.`$id`, FruitStruct.value FROM (SELECT Name, explode(Fruits) as FruitStruct FROM fruits_table); WITH `_0` AS ( SELECT `fruits_table`.`name` AS `name`, EXPLODE(`fruits_table`.`fruits`) AS `fruitstruct` FROM `fruits_table` AS `fruits_table` ) SELECT `_0`.`name` AS `name`, `_0`.`fruitstruct`.`$id` AS `$id`, `_0`.`fruitstruct`.`value` AS `value` FROM `_0` AS `_0`; # title: mysql is case-sensitive by default # dialect: mysql # execute: false WITH T AS (SELECT 1 AS CoL) SELECT * FROM `T`; WITH `T` AS ( SELECT 1 AS `CoL` ) SELECT `T`.`CoL` AS `CoL` FROM `T` AS `T`; # title: override mysql's settings so it normalizes to lowercase # dialect: mysql, normalization_strategy = lowercase # execute: false WITH T AS (SELECT 1 AS `CoL`) SELECT * FROM T; WITH `t` AS ( SELECT 1 AS `CoL` ) SELECT `t`.`CoL` AS `CoL` FROM `t` AS `t`; # title: top-level query is parenthesized # execute: false WITH x AS ( SELECT a FROM t ) ( SELECT * FROM x UNION ALL SELECT * FROM x LIMIT 10 ) LIMIT 10; WITH "x" AS ( SELECT "t"."a" AS "a" FROM "t" AS "t" ) ( SELECT "x"."a" AS "a" FROM "x" AS "x" UNION ALL SELECT "x"."a" AS "a" FROM "x" AS "x" LIMIT 10 ) LIMIT 10; # title: avoid producing DAG cycle when pushing down predicate to join # execute: false SELECT a.company, b.num FROM route AS a(num, company, pos, stop) JOIN route AS b(num, company, pos, stop) ON (a.num = b.num) JOIN stops AS c(id, name) ON (c.id = b.stop) JOIN stops AS d(id, name) ON (d.id = c.id) WHERE c.name = 'Craiglockhart' OR d.name = 'Tollcross'; SELECT "a"."company" AS "company", "b"."num" AS "num" FROM "route" AS "a"("num", "company", "pos", "stop") JOIN "route" AS "b"("num", "company", "pos", "stop") ON "a"."num" = "b"."num" JOIN "stops" AS "c"("id", "name") ON "b"."stop" = "c"."id" JOIN "stops" AS "d"("id", "name") ON "c"."id" = "d"."id" AND ( "c"."name" = 'Craiglockhart' OR "d"."name" = 'Tollcross' ); # title: avoid dag cycles with unnesting subqueries # execute: false # dialect: snowflake SELECT A.ACCOUNT_ID, A.NAME, C.EMAIL_DOMAIN FROM ACCOUNTS AS A LEFT JOIN CONTACTS AS C ON C.ACCOUNT_ID = A.ACCOUNT_ID AND C.EMAIL_DOMAIN IN ( SELECT D.DOMAIN FROM DOMAINS D WHERE TYPE = 'education' ); WITH "_u_0" AS ( SELECT "D"."DOMAIN" AS "DOMAIN" FROM "DOMAINS" AS "D" WHERE "D"."TYPE" = 'education' GROUP BY "D"."DOMAIN" ) SELECT "A"."ACCOUNT_ID" AS "ACCOUNT_ID", "A"."NAME" AS "NAME", "C"."EMAIL_DOMAIN" AS "EMAIL_DOMAIN" FROM "ACCOUNTS" AS "A" LEFT JOIN "CONTACTS" AS "C" ON "A"."ACCOUNT_ID" = "C"."ACCOUNT_ID" LEFT JOIN "_u_0" AS "_u_0" ON "C"."EMAIL_DOMAIN" = "_u_0"."DOMAIN" WHERE NOT "_u_0"."DOMAIN" IS NULL; # title: decorrelate subquery and transpile ArrayAny correctly when generating spark # execute: false # dialect: spark SELECT COUNT(DISTINCT cs1.cs_order_number) AS `order count`, SUM(cs1.cs_ext_ship_cost) AS `total shipping cost`, SUM(cs1.cs_net_profit) AS `total net profit` FROM catalog_sales cs1, date_dim, customer_address, call_center WHERE date_dim.d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) AND cs1.cs_ship_date_sk = date_dim.d_date_sk AND cs1.cs_ship_addr_sk = customer_address.ca_address_sk AND customer_address.ca_state = 'GA' AND cs1.cs_call_center_sk = call_center.cc_call_center_sk AND call_center.cc_county IN ( 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) AND EXISTS( SELECT * FROM catalog_sales cs2 WHERE cs1.cs_order_number = cs2.cs_order_number AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) AND NOT EXISTS( SELECT * FROM catalog_returns cr1 WHERE cs1.cs_order_number = cr1.cr_order_number ) ORDER BY COUNT(DISTINCT cs1.cs_order_number ) LIMIT 100; WITH `_u_0` AS ( SELECT `cs2`.`cs_order_number` AS `_u_1`, COLLECT_LIST(`cs2`.`cs_warehouse_sk`) AS `_u_2` FROM `catalog_sales` AS `cs2` GROUP BY `cs2`.`cs_order_number` ), `_u_3` AS ( SELECT `cr1`.`cr_order_number` AS `_u_4` FROM `catalog_returns` AS `cr1` GROUP BY `cr1`.`cr_order_number` ) SELECT COUNT(DISTINCT `cs1`.`cs_order_number`) AS `order count`, SUM(`cs1`.`cs_ext_ship_cost`) AS `total shipping cost`, SUM(`cs1`.`cs_net_profit`) AS `total net profit` FROM `catalog_sales` AS `cs1` JOIN `date_dim` AS `date_dim` ON `cs1`.`cs_ship_date_sk` = `date_dim`.`d_date_sk` AND `date_dim`.`d_date` <= ( CAST(CAST('2002-02-01' AS DATE) AS TIMESTAMP) + INTERVAL '60' DAYS ) AND `date_dim`.`d_date` >= '2002-02-01' JOIN `customer_address` AS `customer_address` ON `cs1`.`cs_ship_addr_sk` = `customer_address`.`ca_address_sk` AND `customer_address`.`ca_state` = 'GA' JOIN `call_center` AS `call_center` ON `call_center`.`cc_call_center_sk` = `cs1`.`cs_call_center_sk` AND `call_center`.`cc_county` IN ( 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) LEFT JOIN `_u_0` AS `_u_0` ON `_u_0`.`_u_1` = `cs1`.`cs_order_number` LEFT JOIN `_u_3` AS `_u_3` ON `_u_3`.`_u_4` = `cs1`.`cs_order_number` WHERE `_u_3`.`_u_4` IS NULL AND ( SIZE(`_u_0`.`_u_2`) = 0 OR SIZE(FILTER(`_u_0`.`_u_2`, `_x` -> `cs1`.`cs_warehouse_sk` <> `_x`)) <> 0 ) AND NOT `_u_0`.`_u_1` IS NULL ORDER BY COUNT(DISTINCT `cs1`.`cs_order_number`) LIMIT 100; # execute: false SELECT * FROM event WHERE priority = 'High' AND tagname IN ( SELECT tag_input AS tagname FROM cascade WHERE tag_input = 'XXX' OR tag_output = 'XXX' UNION SELECT tag_output AS tagname FROM cascade WHERE tag_input = 'XXX' OR tag_output = 'XXX' ); WITH "_u_0" AS ( SELECT "cascade"."tag_input" AS "tagname" FROM "cascade" AS "cascade" WHERE "cascade"."tag_input" = 'XXX' OR "cascade"."tag_output" = 'XXX' UNION SELECT "cascade"."tag_output" AS "tagname" FROM "cascade" AS "cascade" WHERE "cascade"."tag_input" = 'XXX' OR "cascade"."tag_output" = 'XXX' ), "_u_1" AS ( SELECT "cascade"."tag_input" AS "tagname" FROM "_u_0" AS "_u_0" GROUP BY "cascade"."tag_input" ) SELECT * FROM "event" AS "event" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."tagname" = "event"."tagname" WHERE "event"."priority" = 'High' AND NOT "_u_1"."tagname" IS NULL; # title: SELECT TRANSFORM ... Spark clause when schema is provided # execute: false # dialect: spark WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' AS (y STRING) FROM a); WITH `a` AS ( SELECT 'v' AS `x` ), `_0` AS ( SELECT TRANSFORM(`a`.`x`) USING 'cat' AS ( `y` STRING ) FROM `a` AS `a` ) SELECT `_0`.`y` AS `y` FROM `_0` AS `_0`; # title: SELECT TRANSFORM ... Spark clause when schema is not provided # execute: false # dialect: spark WITH a AS (SELECT 'v' AS x) SELECT * FROM (SELECT TRANSFORM(x) USING 'cat' FROM a); WITH `a` AS ( SELECT 'v' AS `x` ), `_0` AS ( SELECT TRANSFORM(`a`.`x`) USING 'cat' FROM `a` AS `a` ) SELECT `_0`.`key` AS `key`, `_0`.`value` AS `value` FROM `_0` AS `_0`; # title: avoid reordering of non inner joins # execute: true WITH t1 AS ( SELECT NULL AS id1 ), t2 AS ( SELECT 1 AS id2 ), t3 AS ( SELECT 'info' AS info ) SELECT t1.id1 AS id1, t2.id2 AS id2, t3.info AS info FROM t1 RIGHT JOIN t2 AS t2 ON t1.id1 = t2.id2 RIGHT JOIN t3 ON TRUE; WITH "t1" AS ( SELECT NULL AS "id1" ), "t2" AS ( SELECT 1 AS "id2" ), "t3" AS ( SELECT 'info' AS "info" ) SELECT "t1"."id1" AS "id1", "t2"."id2" AS "id2", "t3"."info" AS "info" FROM "t1" AS "t1" RIGHT JOIN "t2" AS "t2" ON "t1"."id1" = "t2"."id2" CROSS JOIN "t3" AS "t3"; # title: subquery in GENERATE_SERIES stays untouched # execute: false WITH t3 AS (SELECT t1.c1::bigint AS ref1 FROM (SELECT MAX(x.a) as c1 FROM x) t1 JOIN GENERATE_SERIES((SELECT MAX(a) FROM x), 10, 1) AS t2(c1) on t2.c1 > t1.c1) SELECT * FROM t3; WITH "t1" AS ( SELECT MAX("x"."a") AS "c1" FROM "x" AS "x" ) SELECT CAST("t1"."c1" AS BIGINT) AS "ref1" FROM "t1" AS "t1" JOIN GENERATE_SERIES(( SELECT MAX("x"."a") AS "_col_0" FROM "x" AS "x" ), 10, 1) AS "t2"("c1") ON "t1"."c1" < "t2"."c1"; # title: empty table right join GENERATE_SERIES should have data SELECT t1.c1 FROM z AS z RIGHT JOIN GENERATE_SERIES((SELECT MIN(x.a) FROM x), 10, 1) AS t1(c1) ON t1.c1 > z.c; SELECT "t1"."c1" AS "c1" FROM "z" AS "z" RIGHT JOIN GENERATE_SERIES(( SELECT MIN("x"."a") AS "_col_0" FROM "x" AS "x" ), 10, 1) AS "t1"("c1") ON "t1"."c1" > "z"."c" ; ================================================ FILE: tests/fixtures/optimizer/pushdown_cte_alias_columns.sql ================================================ WITH y(c) AS (SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0) SELECT c FROM y; WITH y(c) AS (SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0) SELECT c FROM y; WITH y(c) AS (SELECT SUM(a) as d FROM (SELECT 1 a) AS x HAVING c > 0) SELECT c FROM y; WITH y(c) AS (SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0) SELECT c FROM y; WITH x(c) AS (SELECT SUM(1) a HAVING c > 0 LIMIT 1) SELECT * FROM x; WITH x(c) AS (SELECT SUM(1) AS c HAVING c > 0 LIMIT 1) SELECT * FROM x; -- Invalid statement in Snowflake but checking more complex structures WITH x(c) AS ((SELECT 1 a) HAVING c > 0) SELECT * FROM x; WITH x(c) AS ((SELECT 1 AS a) HAVING c > 0) SELECT * FROM x; -- Invalid statement in Snowflake but checking more complex structures WITH x(c) AS ((SELECT SUM(1) a) HAVING c > 0 LIMIT 1) SELECT * FROM x; WITH x(c) AS ((SELECT SUM(1) AS a) HAVING c > 0 LIMIT 1) SELECT * FROM x; -- Invalid statement in Snowflake but checking that we don't fail WITH x(c) AS (SELECT SUM(a) FROM x HAVING c > 0 UNION ALL SELECT SUM(a) FROM y HAVING c > 0) SELECT * FROM x; WITH x(c) AS (SELECT SUM(a) FROM x HAVING c > 0 UNION ALL SELECT SUM(a) FROM y HAVING c > 0) SELECT * FROM x; ================================================ FILE: tests/fixtures/optimizer/pushdown_predicates.sql ================================================ SELECT x.a AS a FROM (SELECT x.a FROM x AS x) AS x JOIN y WHERE x.a = 1 AND x.b = 1 AND y.a = 1; SELECT x.a AS a FROM (SELECT x.a FROM x AS x WHERE x.a = 1 AND x.b = 1) AS x JOIN y ON y.a = 1 WHERE TRUE AND TRUE AND TRUE; WITH x AS (SELECT y.a FROM y) SELECT * FROM x WHERE x.a = 1; WITH x AS (SELECT y.a FROM y WHERE y.a = 1) SELECT * FROM x WHERE TRUE; SELECT x.a FROM (SELECT * FROM x) AS x CROSS JOIN y WHERE y.a = 1 OR (x.a = 1 AND x.b = 1); SELECT x.a FROM (SELECT * FROM x) AS x CROSS JOIN y WHERE (x.a = 1 AND x.b = 1) OR y.a = 1; SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = y.a AND x.a = 1 AND x.b = 1) OR x.a = y.a; SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON x.a = y.a WHERE TRUE; SELECT x.a FROM (SELECT * FROM x) AS x JOIN y WHERE (x.a = y.a AND x.a = 1 AND x.b = 1) OR x.a = y.b; SELECT x.a FROM (SELECT * FROM x) AS x JOIN y ON (x.a = 1 AND x.a = y.a AND x.b = 1) OR x.a = y.b WHERE (x.a = 1 AND x.a = y.a AND x.b = 1) OR x.a = y.b; SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x) AS x WHERE x.c = 1; SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x WHERE x.b * 1 = 1) AS x WHERE TRUE; SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x) AS x WHERE x.c = 1 or x.c = 2; SELECT x.a FROM (SELECT x.a AS a, x.b * 1 AS c FROM x WHERE x.b * 1 = 1 OR x.b * 1 = 2) AS x WHERE TRUE; SELECT x.a AS a FROM (SELECT x.a FROM x AS x) AS x JOIN y WHERE x.a = 1 AND x.b = 1 AND (x.c = 1 OR y.c = 1); SELECT x.a AS a FROM (SELECT x.a FROM x AS x WHERE x.a = 1 AND x.b = 1) AS x JOIN y ON x.c = 1 OR y.c = 1 WHERE TRUE AND TRUE AND (TRUE); SELECT x.a FROM x AS x JOIN (SELECT y.a FROM y AS y) AS y ON y.a = 1 AND x.a = y.a; SELECT x.a FROM x AS x JOIN (SELECT y.a FROM y AS y WHERE y.a = 1) AS y ON x.a = y.a AND TRUE; SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y) AS y ON y.a = 1 WHERE x.a = 1 AND x.b = 1 AND y.a = x.a; SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON x.a = y.a AND TRUE WHERE x.a = 1 AND TRUE AND x.b = 1; SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT * FROM y AS y) AS y WHERE x.a = 1 AND x.b = 1 AND y.a = x.a AND y.a = 1; SELECT x.a AS a FROM x AS x JOIN (SELECT * FROM y AS y WHERE y.a = 1) AS y ON x.a = y.a AND TRUE WHERE x.a = 1 AND TRUE AND x.b = 1 AND TRUE; with t1 as (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) as row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1; WITH t1 AS (SELECT x.a, x.b, ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.a) AS row_num FROM x) SELECT t1.a, t1.b FROM t1 WHERE row_num = 1; WITH m AS (SELECT a, b FROM (VALUES (1, 2)) AS a1(a, b)), n AS (SELECT a, b FROM m WHERE m.a = 1), o AS (SELECT a, b FROM m WHERE m.a = 2) SELECT n.a, n.b, n.a, o.b FROM n FULL OUTER JOIN o ON n.a = o.a; WITH m AS (SELECT a, b FROM (VALUES (1, 2)) AS a1(a, b)), n AS (SELECT a, b FROM m WHERE m.a = 1), o AS (SELECT a, b FROM m WHERE m.a = 2) SELECT n.a, n.b, n.a, o.b FROM n FULL OUTER JOIN o ON n.a = o.a; -- Pushdown predicate to HAVING (CNF) SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt FROM x AS x) AS x WHERE x.cnt > 0; SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt FROM x AS x HAVING COUNT(1) > 0) AS x WHERE TRUE; -- Pushdown predicate to HAVING (DNF) SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt, COUNT(x.a) AS cnt_a, COUNT(x.b) AS cnt_b FROM x AS x) AS x WHERE (x.cnt_a > 0 AND x.cnt_b > 0) OR x.cnt > 0; SELECT x.cnt AS cnt FROM (SELECT COUNT(1) AS cnt, COUNT(x.a) AS cnt_a, COUNT(x.b) AS cnt_b FROM x AS x HAVING COUNT(1) > 0 OR (COUNT(x.a) > 0 AND COUNT(x.b) > 0)) AS x WHERE x.cnt > 0 OR (x.cnt_a > 0 AND x.cnt_b > 0); SELECT x.a, u.val FROM x AS x CROSS JOIN UNNEST(ARRAY[0, 1]) AS u("val") WHERE x.a > u.val; SELECT x.a, u.val FROM x AS x JOIN UNNEST(ARRAY(0, 1)) AS u("val") ON u.val < x.a WHERE TRUE; # dialect: presto SELECT x.a, u.val FROM x AS x CROSS JOIN UNNEST(ARRAY[0, 1]) AS u("val") WHERE x.a > u.val; SELECT x.a, u.val FROM x AS x CROSS JOIN UNNEST(ARRAY[0, 1]) AS u("val") WHERE x.a > u.val; # dialect: trino SELECT x.a, u.val FROM x AS x CROSS JOIN UNNEST(ARRAY[0, 1]) AS u("val") WHERE x.a > u.val; SELECT x.a, u.val FROM x AS x CROSS JOIN UNNEST(ARRAY[0, 1]) AS u("val") WHERE x.a > u.val; # dialect: athena SELECT x.a, u.val FROM x AS x CROSS JOIN UNNEST(ARRAY[0, 1]) AS u("val") WHERE x.a > u.val; SELECT x.a, u.val FROM x AS x CROSS JOIN UNNEST(ARRAY[0, 1]) AS u("val") WHERE x.a > u.val; # dialect: presto SELECT x.a, u.val FROM UNNEST(ARRAY[0, 1]) AS u("val") CROSS JOIN x AS x WHERE x.a > u.val; SELECT x.a, u.val FROM UNNEST(ARRAY[0, 1]) AS u("val") JOIN x AS x ON u.val < x.a WHERE TRUE; # dialect: trino SELECT x.a, u.val FROM UNNEST(ARRAY[0, 1]) AS u("val") CROSS JOIN x AS x WHERE x.a > u.val; SELECT x.a, u.val FROM UNNEST(ARRAY[0, 1]) AS u("val") JOIN x AS x ON u.val < x.a WHERE TRUE; # dialect: athena SELECT x.a, u.val FROM UNNEST(ARRAY[0, 1]) AS u("val") CROSS JOIN x AS x WHERE x.a > u.val; SELECT x.a, u.val FROM UNNEST(ARRAY[0, 1]) AS u("val") JOIN x AS x ON u.val < x.a WHERE TRUE; -- DNF: cross-table predicate is only pushed to the last eligible JOIN (not to an earlier JOIN that doesn't yet have all referenced tables in scope) SELECT a.id, b.val, c.name FROM t_a AS a INNER JOIN t_b AS b ON b.a_id = a.id INNER JOIN t_c AS c ON c.b_id = b.id WHERE (b.flag = 1 AND c.active = 1) OR (b.flag = 2 AND c.active = 0); SELECT a.id, b.val, c.name FROM t_a AS a INNER JOIN t_b AS b ON a.id = b.a_id INNER JOIN t_c AS c ON ((b.flag = 1 AND c.active = 1) OR (b.flag = 2 AND c.active = 0)) AND b.id = c.b_id WHERE (b.flag = 1 AND c.active = 1) OR (b.flag = 2 AND c.active = 0); -- DNF: single-table predicate is pushed to its own JOIN regardless of join order SELECT a.id, b.val FROM t_a AS a INNER JOIN t_b AS b ON b.a_id = a.id WHERE (b.flag = 1 AND b.active = 1) OR (b.flag = 2 AND b.active = 0); SELECT a.id, b.val FROM t_a AS a INNER JOIN t_b AS b ON ((b.active = 0 AND b.flag = 2) OR (b.active = 1 AND b.flag = 1)) AND a.id = b.a_id WHERE (b.active = 0 AND b.flag = 2) OR (b.active = 1 AND b.flag = 1); ================================================ FILE: tests/fixtures/optimizer/pushdown_projections.sql ================================================ SELECT a FROM (SELECT * FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT 1 FROM (SELECT * FROM x) WHERE b = 2; SELECT 1 AS "1" FROM (SELECT x.b AS b FROM x AS x) AS _0 WHERE _0.b = 2; SELECT a, b, a from x; SELECT x.a AS a, x.b AS b, x.a AS a FROM x AS x; SELECT (SELECT c FROM y WHERE q.b = y.b) FROM (SELECT * FROM x) AS q; SELECT (SELECT y.c AS c FROM y AS y WHERE q.b = y.b) AS _col_0 FROM (SELECT x.b AS b FROM x AS x) AS q; SELECT a FROM x JOIN (SELECT b, c FROM y) AS z ON x.b = z.b; SELECT x.a AS a FROM x AS x JOIN (SELECT y.b AS b FROM y AS y) AS z ON x.b = z.b; SELECT x1.a FROM (SELECT * FROM x) AS x1, (SELECT * FROM x) AS x2; SELECT x1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS x1, (SELECT 1 AS _ FROM x AS x) AS x2; SELECT a FROM (SELECT DISTINCT a, b FROM x); SELECT _0.a AS a FROM (SELECT DISTINCT x.a AS a, x.b AS b FROM x AS x) AS _0; SELECT a FROM (SELECT a, b FROM x UNION ALL SELECT a, b FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION ALL SELECT x.a AS a FROM x AS x) AS _0; WITH t1 AS (SELECT x.a AS a, x.b AS b FROM x UNION ALL SELECT z.b AS b, z.c AS c FROM z) SELECT a, b FROM t1; WITH t1 AS (SELECT x.a AS a, x.b AS b FROM x AS x UNION ALL SELECT z.b AS b, z.c AS c FROM z AS z) SELECT t1.a AS a, t1.b AS b FROM t1 AS t1; SELECT a FROM (SELECT a, b FROM x UNION SELECT a, b FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x UNION SELECT x.a AS a, x.b AS b FROM x AS x) AS _0; WITH y AS (SELECT * FROM x) SELECT a FROM y; WITH y AS (SELECT x.a AS a FROM x AS x) SELECT y.a AS a FROM y AS y; WITH z AS (SELECT * FROM x), q AS (SELECT b FROM z) SELECT b FROM q; WITH z AS (SELECT x.b AS b FROM x AS x), q AS (SELECT z.b AS b FROM z AS z) SELECT q.b AS b FROM q AS q; WITH z AS (SELECT * FROM x) SELECT a FROM z UNION SELECT a FROM z; WITH z AS (SELECT x.a AS a FROM x AS x) SELECT z.a AS a FROM z AS z UNION SELECT z.a AS a FROM z AS z; SELECT b FROM (SELECT a, SUM(b) AS b FROM x GROUP BY a); SELECT _0.b AS b FROM (SELECT SUM(x.b) AS b FROM x AS x GROUP BY x.a) AS _0; SELECT b FROM (SELECT a, SUM(b) AS b FROM x ORDER BY a); SELECT _0.b AS b FROM (SELECT x.a AS a, SUM(x.b) AS b FROM x AS x ORDER BY a) AS _0; SELECT x FROM (VALUES(1, 2)) AS q(x, y); SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y); SELECT x FROM UNNEST([1, 2]) AS q(x, y); SELECT q.x AS x FROM UNNEST(ARRAY(1, 2)) AS q(x, y); WITH t1 AS (SELECT cola, colb FROM UNNEST([STRUCT(1 AS cola, 'test' AS colb)]) AS "q"("cola", "colb")) SELECT cola FROM t1; WITH t1 AS (SELECT "q".cola AS cola FROM UNNEST(ARRAY(STRUCT(1 AS cola, 'test' AS colb))) AS "q"("cola", "colb")) SELECT t1.cola AS cola FROM t1 AS t1; SELECT x FROM VALUES(1, 2) AS q(x, y); SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y); SELECT i.a FROM x AS i LEFT JOIN (SELECT a, b FROM (SELECT a, b FROM x)) AS j ON i.a = j.a; SELECT i.a AS a FROM x AS i LEFT JOIN (SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0) AS j ON i.a = j.a; WITH cte AS (SELECT source.a AS a, ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC) AS index FROM source AS source QUALIFY index) SELECT cte.a AS a FROM cte; WITH cte AS (SELECT source.a AS a FROM source AS source QUALIFY ROW_NUMBER() OVER (PARTITION BY source.id, source.timestamp ORDER BY source.a DESC)) SELECT cte.a AS a FROM cte AS cte; WITH cte AS (SELECT 1 AS x, 2 AS y, 3 AS z) SELECT cte.a FROM cte AS cte(a); WITH cte AS (SELECT 1 AS x) SELECT cte.a AS a FROM cte AS cte(a); WITH cte(x, y, z) AS (SELECT 1, 2, 3) SELECT a, z FROM cte AS cte(a); WITH cte AS (SELECT 1 AS x, 3 AS z) SELECT cte.a AS a, cte.z AS z FROM cte AS cte(a); WITH cte(x, y, z) AS (SELECT 1, 2, 3) SELECT a, z FROM (SELECT * FROM cte AS cte(b)) AS cte(a); WITH cte AS (SELECT 1 AS x, 3 AS z) SELECT cte.a AS a, cte.z AS z FROM (SELECT cte.b AS a, cte.z AS z FROM cte AS cte(b)) AS cte; WITH y AS (SELECT a FROM x) SELECT 1 FROM y; WITH y AS (SELECT 1 AS _ FROM x AS x) SELECT 1 AS "1" FROM y AS y; WITH y AS (SELECT SUM(a) FROM x) SELECT 1 FROM y; WITH y AS (SELECT MAX(1) AS _ FROM x AS x) SELECT 1 AS "1" FROM y AS y; WITH y AS (SELECT a FROM x GROUP BY a) SELECT 1 FROM y; WITH y AS (SELECT 1 AS _ FROM x AS x GROUP BY x.a) SELECT 1 AS "1" FROM y AS y; WITH cte AS (SELECT col FROM t) SELECT IF(1 IN UNNEST(col), 1, 0) AS col FROM cte; WITH cte AS (SELECT t.col AS col FROM t AS t) SELECT CASE WHEN 1 IN (SELECT UNNEST(cte.col)) THEN 1 ELSE 0 END AS col FROM cte AS cte; -------------------------------------- -- Unknown Star Expansion -------------------------------------- SELECT a FROM (SELECT * FROM zz) WHERE b = 1; SELECT _0.a AS a FROM (SELECT zz.a AS a, zz.b AS b FROM zz AS zz) AS _0 WHERE _0.b = 1; SELECT a FROM (SELECT * FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc); SELECT _0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _0; SELECT a FROM (SELECT a FROM aa UNION ALL SELECT * FROM bb UNION ALL SELECT * from cc); SELECT _0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa UNION ALL SELECT bb.a AS a FROM bb AS bb UNION ALL SELECT cc.a AS a FROM cc AS cc) AS _0; SELECT a FROM (SELECT * FROM aa CROSS JOIN bb); SELECT _0.a AS a FROM (SELECT a AS a FROM aa AS aa CROSS JOIN bb AS bb) AS _0; SELECT a FROM (SELECT aa.* FROM aa); SELECT _0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS _0; SELECT a FROM (SELECT * FROM (SELECT * FROM aa)); SELECT _1.a AS a FROM (SELECT _0.a AS a FROM (SELECT aa.a AS a FROM aa AS aa) AS _0) AS _1; with cte1 as (SELECT cola, colb FROM tb UNION ALL SELECT colc, cold FROM tb2) SELECT cola FROM cte1; WITH cte1 AS (SELECT tb.cola AS cola FROM tb AS tb UNION ALL SELECT tb2.colc AS colc FROM tb2 AS tb2) SELECT cte1.cola AS cola FROM cte1 AS cte1; SELECT * FROM ((SELECT c FROM t1) JOIN t2); SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _0, t2 AS t2); SELECT a, d FROM (SELECT 1 a, 2 c, 3 d, 4 e UNION ALL BY NAME SELECT 6 c, 7 d, 8 a, 9 e); SELECT _0.a AS a, _0.d AS d FROM (SELECT 1 AS a, 3 AS d UNION ALL BY NAME SELECT 7 AS d, 8 AS a) AS _0; SELECT a, b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) (SELECT a, b, c FROM cte1)); SELECT _0.a AS a, _0.b AS b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b) SELECT cte1.a AS a, cte1.b AS b FROM cte1 AS cte1) AS _0; ================================================ FILE: tests/fixtures/optimizer/qualify_columns.sql ================================================ -------------------------------------- -- Qualify columns -------------------------------------- SELECT a FROM x; SELECT x.a AS a FROM x AS x; SELECT "a" FROM x; SELECT x."a" AS a FROM x AS x; # execute: false SELECT a FROM zz GROUP BY a ORDER BY a; SELECT zz.a AS a FROM zz AS zz GROUP BY zz.a ORDER BY a; # execute: false SELECT x, p FROM (SELECT x from xx) xx CROSS JOIN yy; SELECT xx.x AS x, yy.p AS p FROM (SELECT xx.x AS x FROM xx AS xx) AS xx CROSS JOIN yy AS yy; SELECT a FROM x AS z; SELECT z.a AS a FROM x AS z; SELECT a AS a FROM x; SELECT x.a AS a FROM x AS x; SELECT x.a FROM x; SELECT x.a AS a FROM x AS x; SELECT x.a AS a FROM x; SELECT x.a AS a FROM x AS x; SELECT a AS b FROM x; SELECT x.a AS b FROM x AS x; # execute: false SELECT 1, 2 + 3 FROM x; SELECT 1 AS "1", 2 + 3 AS _col_1 FROM x AS x; # execute: false SELECT a + b FROM x; SELECT x.a + x.b AS _col_0 FROM x AS x; SELECT l.a FROM x l WHERE a IN (select a FROM x ORDER by a); SELECT l.a AS a FROM x AS l WHERE l.a IN (SELECT x.a AS a FROM x AS x ORDER BY a); # execute: false SELECT a, SUM(b) FROM x WHERE a > 1 AND b > 1 GROUP BY a; SELECT x.a AS a, SUM(x.b) AS _col_1 FROM x AS x WHERE x.a > 1 AND x.b > 1 GROUP BY x.a; SELECT SUM(a) AS c FROM x HAVING SUM(a) > 3; SELECT SUM(x.a) AS c FROM x AS x HAVING SUM(x.a) > 3; SELECT SUM(a) AS a FROM x HAVING SUM(a) > 3; SELECT SUM(x.a) AS a FROM x AS x HAVING SUM(x.a) > 3; SELECT SUM(a) AS c FROM x HAVING c > 3; SELECT SUM(x.a) AS c FROM x AS x HAVING SUM(x.a) > 3; # execute: false SELECT SUM(a) AS a FROM x HAVING a > 3; SELECT SUM(x.a) AS a FROM x AS x HAVING SUM(x.a) > 3; SELECT SUM(a) AS c FROM x HAVING SUM(b) > 3; SELECT SUM(x.a) AS c FROM x AS x HAVING SUM(x.b) > 3; SELECT a AS j, b FROM x ORDER BY j; SELECT x.a AS j, x.b AS b FROM x AS x ORDER BY j; SELECT a AS j, b AS a FROM x ORDER BY 1; SELECT x.a AS j, x.b AS a FROM x AS x ORDER BY j; SELECT SUM(a) AS c, SUM(b) AS d FROM x ORDER BY 1, 2; SELECT SUM(x.a) AS c, SUM(x.b) AS d FROM x AS x ORDER BY c, d; # execute: false SELECT CAST(a AS INT) FROM x ORDER BY a; SELECT CAST(x.a AS INT) AS a FROM x AS x ORDER BY a; # execute: false SELECT SUM(a), SUM(b) AS c FROM x ORDER BY 1, 2; SELECT SUM(x.a) AS _col_0, SUM(x.b) AS c FROM x AS x ORDER BY _col_0, c; SELECT a AS j, b FROM x GROUP BY j, b; SELECT x.a AS j, x.b AS b FROM x AS x GROUP BY x.a, x.b; SELECT a, b FROM x GROUP BY 1, 2; SELECT x.a AS a, x.b AS b FROM x AS x GROUP BY x.a, x.b; SELECT a, b FROM x ORDER BY 1, 2; SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY a, b; SELECT DISTINCT a AS c, b AS d FROM x ORDER BY 1; SELECT DISTINCT x.a AS c, x.b AS d FROM x AS x ORDER BY c; SELECT 2 FROM x GROUP BY 1; SELECT 2 AS "2" FROM x AS x GROUP BY 1; SELECT 'a' AS a FROM x GROUP BY 1; SELECT 'a' AS a FROM x AS x GROUP BY 1; SELECT NULL AS a FROM x GROUP BY 1; SELECT NULL AS a FROM x AS x GROUP BY 1; SELECT TRUE AS a FROM x GROUP BY 1; SELECT TRUE AS a FROM x AS x GROUP BY 1; # execute: false # dialect: oracle SELECT t."col" FROM tbl t; SELECT T."col" AS "col" FROM TBL T; # execute: false # dialect: oracle WITH base AS (SELECT x.dummy AS COL_1 FROM dual x) SELECT b."COL_1" FROM base b; WITH BASE AS (SELECT X.DUMMY AS COL_1 FROM DUAL X) SELECT B."COL_1" AS COL_1 FROM BASE B; # execute: false -- this query seems to be invalid in postgres and duckdb but valid in bigquery SELECT 2 a FROM x GROUP BY 1 HAVING a > 1; SELECT 2 AS a FROM x AS x GROUP BY 1 HAVING a > 1; SELECT 2 d FROM x GROUP BY d HAVING d > 1; SELECT 2 AS d FROM x AS x GROUP BY 1 HAVING d > 1; SELECT 2 d FROM x GROUP BY 1 ORDER BY 1; SELECT 2 AS d FROM x AS x GROUP BY 1 ORDER BY d; # execute: false SELECT DATE(a), DATE(b) AS c FROM x GROUP BY 1, 2; SELECT DATE(x.a) AS _col_0, DATE(x.b) AS c FROM x AS x GROUP BY DATE(x.a), DATE(x.b); # execute: false SELECT (SELECT MIN(a) FROM UNNEST([1, 2])) AS f FROM x GROUP BY 1; SELECT (SELECT MIN(_0.a) AS _col_0 FROM UNNEST(ARRAY(1, 2)) AS _0) AS f FROM x AS x GROUP BY 1; # dialect: bigquery WITH x AS (select 'a' as a, 1 as b) SELECT x.a AS c, y.a as d, SUM(x.b) AS y, FROM x join x as y on x.a = y.a group by 1, 2; WITH x AS (SELECT 'a' AS a, 1 AS b) SELECT x.a AS c, y.a AS d, SUM(x.b) AS y FROM x AS x JOIN x AS y ON x.a = y.a GROUP BY x.a, 2; SELECT SUM(x.a) AS c FROM x JOIN y ON x.b = y.b GROUP BY c; SELECT SUM(x.a) AS c FROM x AS x JOIN y AS y ON x.b = y.b GROUP BY y.c; SELECT COALESCE(x.a) AS d FROM x JOIN y ON x.b = y.b GROUP BY d; SELECT COALESCE(x.a) AS d FROM x AS x JOIN y AS y ON x.b = y.b GROUP BY COALESCE(x.a); SELECT a + 1 AS d FROM x WHERE d > 1; SELECT x.a + 1 AS d FROM x AS x WHERE (x.a + 1) > 1; # execute: false SELECT a + 1 AS d, d + 2 FROM x; SELECT x.a + 1 AS d, x.a + 1 + 2 AS _col_1 FROM x AS x; SELECT a AS a, b FROM x ORDER BY a; SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY a; SELECT a, b FROM x ORDER BY a; SELECT x.a AS a, x.b AS b FROM x AS x ORDER BY a; SELECT a FROM x ORDER BY b; SELECT x.a AS a FROM x AS x ORDER BY x.b; SELECT SUM(a) AS a FROM x ORDER BY SUM(a); SELECT SUM(x.a) AS a FROM x AS x ORDER BY SUM(x.a); # execute: false SELECT AGGREGATE(ARRAY(a, x.b), 0, (x, acc) -> x + acc + a) AS sum_agg FROM x; SELECT AGGREGATE(ARRAY(x.a, x.b), 0, (x, acc) -> x + acc + x.a) AS sum_agg FROM x AS x; # dialect: starrocks # execute: false SELECT DATE_TRUNC('week', a) AS a FROM x; SELECT DATE_TRUNC('WEEK', x.a) AS a FROM x AS x; # dialect: bigquery # execute: false SELECT DATE_TRUNC(a, MONTH) AS a FROM x; SELECT DATE_TRUNC(x.a, MONTH) AS a FROM x AS x; # execute: false SELECT x FROM READ_PARQUET('path.parquet', hive_partition=1); SELECT _0.x AS x FROM READ_PARQUET('path.parquet', hive_partition = 1) AS _0; # execute: false select * from (values (1, 2)); SELECT _0._col_0 AS _col_0, _0._col_1 AS _col_1 FROM (VALUES (1, 2)) AS _0(_col_0, _col_1); # execute: false select * from (values (1, 2)) x; SELECT x._col_0 AS _col_0, x._col_1 AS _col_1 FROM (VALUES (1, 2)) AS x(_col_0, _col_1); # execute: false SELECT SOME_UDF(data).* FROM t; SELECT SOME_UDF(t.data).* FROM t AS t; # execute: false SELECT p.* FROM p UNION ALL SELECT p2.* FROM p2; SELECT p.* FROM p AS p UNION ALL SELECT p2.* FROM p2 AS p2; # execute: false # allow_partial_qualification: true # validate_qualify_columns: false SELECT a + 1 AS i, missing_column FROM x; SELECT x.a + 1 AS i, missing_column AS missing_column FROM x AS x; # execute: false # dialect: clickhouse SELECT s, arr1, arr2 FROM arrays_test LEFT ARRAY JOIN arr1, arrays_test.arr2; SELECT arrays_test.s AS s, arrays_test.arr1 AS arr1, arrays_test.arr2 AS arr2 FROM arrays_test AS arrays_test LEFT ARRAY JOIN arrays_test.arr1, arrays_test.arr2; # execute: false # dialect: snowflake WITH employees AS ( SELECT * FROM (VALUES ('President', 1, NULL), ('Vice President Engineering', 10, 1), ('Programmer', 100, 10), ('QA Engineer', 101, 10), ('Vice President HR', 20, 1), ('Health Insurance Analyst', 200, 20) ) AS t(title, employee_ID, manager_ID) ) SELECT employee_ID, manager_ID, title, level FROM employees START WITH title = 'President' CONNECT BY manager_ID = PRIOR employee_id ORDER BY employee_ID NULLS LAST; WITH EMPLOYEES AS (SELECT T.TITLE AS TITLE, T.EMPLOYEE_ID AS EMPLOYEE_ID, T.MANAGER_ID AS MANAGER_ID FROM (VALUES ('President', 1, NULL), ('Vice President Engineering', 10, 1), ('Programmer', 100, 10), ('QA Engineer', 101, 10), ('Vice President HR', 20, 1), ('Health Insurance Analyst', 200, 20)) AS T(TITLE, EMPLOYEE_ID, MANAGER_ID)) SELECT EMPLOYEES.EMPLOYEE_ID AS EMPLOYEE_ID, EMPLOYEES.MANAGER_ID AS MANAGER_ID, EMPLOYEES.TITLE AS TITLE, LEVEL AS LEVEL FROM EMPLOYEES AS EMPLOYEES START WITH EMPLOYEES.TITLE = 'President' CONNECT BY EMPLOYEES.MANAGER_ID = PRIOR EMPLOYEES.EMPLOYEE_ID ORDER BY EMPLOYEE_ID; # execute: false # dialect: oracle WITH t1 AS ( SELECT 1 AS c1, 1 AS c2, 'Y' AS TOP_PARENT_INDICATOR, 1 AS id FROM DUAL ), t2 AS ( SELECT 1 AS c2, 2 AS id FROM DUAL ) SELECT t1.c1 FROM t1 LEFT JOIN t2 ON t1.c2 = t2.c2 WHERE (t1.TOP_PARENT_INDICATOR = 'Y' OR LEVEL = 1) START WITH (t1.id IS NOT NULL) CONNECT BY PRIOR t1.id = t2.id; WITH T1 AS (SELECT 1 AS C1, 1 AS C2, 'Y' AS TOP_PARENT_INDICATOR, 1 AS ID FROM DUAL DUAL), T2 AS (SELECT 1 AS C2, 2 AS ID FROM DUAL DUAL) SELECT T1.C1 AS C1 FROM T1 T1 LEFT JOIN T2 T2 ON T1.C2 = T2.C2 WHERE (T1.TOP_PARENT_INDICATOR = 'Y' OR LEVEL = 1) START WITH (NOT T1.ID IS NULL) CONNECT BY PRIOR T1.ID = T2.ID; # execute: false # dialect: postgres SELECT * FROM ROWS FROM (GENERATE_SERIES(1, 3), GENERATE_SERIES(10, 12)) AS t(a, b); SELECT t.a AS a, t.b AS b FROM ROWS FROM (GENERATE_SERIES(1, 3), GENERATE_SERIES(10, 12)) AS t(a, b); # execute: false # dialect: clickhouse SELECT generate_series FROM generate_series(0, 10) AS g; SELECT g.generate_series AS generate_series FROM generate_series(0, 10) AS g(generate_series); # execute: false # dialect: snowflake SELECT * FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ORDER BY empid; SELECT * FROM QUARTERLY_SALES AS QUARTERLY_SALES PIVOT(SUM(QUARTERLY_SALES.AMOUNT) FOR QUARTERLY_SALES.QUARTER IN (ANY ORDER BY QUARTER)) AS _0 ORDER BY _0.EMPID; # execute: false SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY x) AS x FROM t; SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY t.x) AS x FROM t AS t; # execute: false # dialect: bigquery WITH t AS (SELECT 1 AS c) SELECT TO_JSON_STRING(t) FROM t; WITH t AS (SELECT 1 AS c) SELECT TO_JSON_STRING(t) AS _col_0 FROM t AS t; # execute: false # dialect: bigquery SELECT DATE_TRUNC(col1, WEEK(MONDAY)), col2 FROM t; SELECT DATE_TRUNC(t.col1, WEEK(MONDAY)) AS _col_0, t.col2 AS col2 FROM t AS t; # execute: false SELECT first, second FROM (SELECT 'val' AS col, STACK(2, 1, 2, 3) AS (first, second)) AS tbl; SELECT tbl.first AS first, tbl.second AS second FROM (SELECT 'val' AS col, STACK(2, 1, 2, 3) AS (first, second)) AS tbl; # execute: false # dialect: postgres WITH t AS (SELECT 1 AS c) SELECT t FROM t; WITH t AS (SELECT 1 AS c) SELECT t AS _col_0 FROM t AS t; -------------------------------------- -- Derived tables -------------------------------------- SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y; SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y; SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y(a); SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y; SELECT y.c AS c FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS y(c); SELECT y.c AS c FROM (SELECT x.a AS c, x.b AS b FROM x AS x) AS y; SELECT a FROM (SELECT a FROM x AS x) y; SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y; SELECT a FROM (SELECT a AS a FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT a FROM (SELECT a FROM (SELECT a FROM x)); SELECT _1.a AS a FROM (SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0) AS _1; SELECT x.a FROM x AS x JOIN (SELECT * FROM x) AS y ON x.a = y.a; SELECT x.a AS a FROM x AS x JOIN (SELECT x.a AS a, x.b AS b FROM x AS x) AS y ON x.a = y.a; SELECT a FROM x as t1 /* there is comment */; SELECT t1.a AS a FROM x AS t1 /* there is comment */; -------------------------------------- -- Joins -------------------------------------- SELECT a, c FROM x JOIN y ON x.b = y.b; SELECT x.a AS a, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; SELECT a, c FROM x, y; SELECT x.a AS a, y.c AS c FROM x AS x, y AS y; -------------------------------------- -- Unions -------------------------------------- SELECT a FROM x UNION SELECT a FROM x ORDER BY a; SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x ORDER BY a; SELECT a FROM x UNION SELECT a FROM x UNION SELECT a FROM x ORDER BY a; SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x ORDER BY a; SELECT a FROM (SELECT a FROM x UNION SELECT a FROM x) ORDER BY a; SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x UNION SELECT x.a AS a FROM x AS x) AS _0 ORDER BY a; # title: nested subqueries in union ((select a from x where a < 1)) UNION ((select a from x where a > 2)); (SELECT x.a AS a FROM x AS x WHERE x.a < 1) UNION (SELECT x.a AS a FROM x AS x WHERE x.a > 2); # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); SELECT _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); SELECT _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar INNER UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz); SELECT _0.foo AS foo, _0.bar AS bar, _0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL CORRESPONDING SELECT 3 AS bar, 4 AS baz); SELECT _0.foo AS foo, _0.bar AS bar, _0.baz AS baz FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL CORRESPONDING BY (foo, bar) SELECT 3 AS bar, 4 AS baz); SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz); SELECT _0.foo AS foo, _0.bar AS bar FROM (SELECT 1 AS foo, 2 AS bar FULL UNION ALL BY NAME ON (foo, bar) SELECT 3 AS bar, 4 AS baz) AS _0; # dialect: bigquery # execute: false SELECT * FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar); SELECT _0.bar AS bar FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) LEFT UNION ALL BY NAME ON (bar) SELECT 3 AS foo, 4 AS bar) AS _0; # dialect: bigquery # execute: false SELECT * FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar); SELECT _0.foo AS foo, _0.qux AS qux FROM ((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) AS _0; # dialect: bigquery # execute: false SELECT * FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo); SELECT _0.foo AS foo FROM (((SELECT 1 AS foo, 2 AS bar LEFT UNION ALL BY NAME SELECT 3 AS bar, 4 AS baz) FULL UNION ALL BY NAME ON (foo, qux) SELECT 3 AS qux, 4 AS bar) INNER UNION ALL BY NAME ON (foo) SELECT 6 AS foo) AS _0; # Title: Nested set operations with modifiers # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 FULL OUTER UNION ALL BY NAME (SELECT * FROM t3 FULL OUTER UNION ALL BY NAME SELECT * FROM t4)))); WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _0.a AS a, _0.b AS b, _0.c AS c, _0.d AS d, _0.e AS e, _0.f AS f FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL OUTER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _0); # Title: Nested set operations with different modifiers (FULL + INNER) # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 INNER UNION ALL BY NAME (SELECT * FROM t3 FULL OUTER UNION ALL BY NAME SELECT * FROM t4)))); WITH t1 AS (SELECT 1 AS a, 2 AS b), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS e, 3 AS f) SELECT _0.a AS a, _0.b AS b, _0.c AS c FROM ((SELECT t1.a AS a, t1.b AS b FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 INNER UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 FULL OUTER UNION ALL BY NAME SELECT t4.e AS e, t4.f AS f FROM t4 AS t4))) AS _0); # Title: Nested set operations with different modifiers (FULL + LEFT) # dialect: bigquery # execute: false WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT * FROM ((SELECT * FROM t1 FULL OUTER UNION ALL BY NAME (SELECT * FROM t2 FULL UNION ALL BY NAME (SELECT * FROM t3 LEFT UNION ALL BY NAME SELECT * FROM t4)))); WITH t1 AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d), t2 AS (SELECT 2 AS b, 3 AS c), t3 AS (SELECT 2 AS c, 3 AS d), t4 AS (SELECT 2 AS d, 3 AS e) SELECT _0.a AS a, _0.b AS b, _0.c AS c, _0.d AS d FROM ((SELECT t1.a AS a, t1.b AS b, t1.c AS c, t1.d AS d FROM t1 AS t1 FULL OUTER UNION ALL BY NAME (SELECT t2.b AS b, t2.c AS c FROM t2 AS t2 FULL UNION ALL BY NAME (SELECT t3.c AS c, t3.d AS d FROM t3 AS t3 LEFT UNION ALL BY NAME SELECT t4.d AS d, t4.e AS e FROM t4 AS t4))) AS _0); -------------------------------------- -- Subqueries -------------------------------------- SELECT a FROM x WHERE b IN (SELECT c FROM y); SELECT x.a AS a FROM x AS x WHERE x.b IN (SELECT y.c AS c FROM y AS y); # execute: false SELECT (SELECT c FROM y) FROM x; SELECT (SELECT y.c AS c FROM y AS y) AS _col_0 FROM x AS x; # execute: false WITH t(c) AS (SELECT 1) SELECT (SELECT c) FROM t; WITH t AS (SELECT 1 AS c) SELECT (SELECT t.c AS c) AS _col_0 FROM t AS t; # execute: false WITH t1(c1) AS (SELECT 1), t2(c2) AS (SELECT 2) SELECT (SELECT c1 FROM t2) FROM t1; WITH t1 AS (SELECT 1 AS c1), t2 AS (SELECT 2 AS c2) SELECT (SELECT t1.c1 AS c1 FROM t2 AS t2) AS _col_0 FROM t1 AS t1; SELECT a FROM (SELECT a FROM x) WHERE a IN (SELECT b FROM (SELECT b FROM y)); SELECT _1.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _1 WHERE _1.a IN (SELECT _0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _0); # dialect: mysql # execute: false SELECT * FROM table_a as A WHERE A.col1 IN (SELECT MAX(B.col2) FROM table_b as B UNION ALL SELECT MAX(C.col2) FROM table_b as C); SELECT * FROM table_a AS `A` WHERE `A`.col1 IN (SELECT MAX(`B`.col2) AS _col_0 FROM table_b AS `B` UNION ALL SELECT MAX(`C`.col2) AS _col_0 FROM table_b AS `C`); # Title: Unnest deep subquery select * from x where b in ((((select b from y)))); SELECT x.a AS a, x.b AS b FROM x AS x WHERE x.b IN (SELECT y.b AS b FROM y AS y); -------------------------------------- -- Correlated subqueries -------------------------------------- SELECT a FROM x WHERE b IN (SELECT c FROM y WHERE y.b = x.a); SELECT x.a AS a FROM x AS x WHERE x.b IN (SELECT y.c AS c FROM y AS y WHERE y.b = x.a); SELECT a FROM x WHERE b IN (SELECT c FROM y WHERE y.b = a); SELECT x.a AS a FROM x AS x WHERE x.b IN (SELECT y.c AS c FROM y AS y WHERE y.b = x.a); SELECT a FROM x WHERE b IN (SELECT b FROM y AS x); SELECT x.a AS a FROM x AS x WHERE x.b IN (SELECT x.b AS b FROM y AS x); SELECT a FROM x AS i WHERE b IN (SELECT b FROM y AS j WHERE j.b IN (SELECT c FROM y AS k WHERE k.b = j.b)); SELECT i.a AS a FROM x AS i WHERE i.b IN (SELECT j.b AS b FROM y AS j WHERE j.b IN (SELECT k.c AS c FROM y AS k WHERE k.b = j.b)); # execute: false SELECT (SELECT n.a FROM n WHERE n.id = m.id) FROM m AS m; SELECT (SELECT n.a AS a FROM n AS n WHERE n.id = m.id) AS _col_0 FROM m AS m; -------------------------------------- -- Expand * -------------------------------------- SELECT * FROM x; SELECT x.a AS a, x.b AS b FROM x AS x; SELECT x.* FROM x; SELECT x.a AS a, x.b AS b FROM x AS x; SELECT * FROM x JOIN y ON x.b = y.b; SELECT x.a AS a, x.b AS b, y.b AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; SELECT x.* FROM x JOIN y ON x.b = y.b; SELECT x.a AS a, x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b; SELECT x.*, y.* FROM x JOIN y ON x.b = y.b; SELECT x.a AS a, x.b AS b, y.b AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; SELECT a FROM (SELECT * FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS _0; SELECT * FROM (SELECT a FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT * FROM x GROUP BY 1, 2; SELECT x.a AS a, x.b AS b FROM x AS x GROUP BY x.a, x.b; SELECT * FROM (SELECT * FROM x) AS s(a, b); SELECT s.a AS a, s.b AS b FROM (SELECT x.a AS a, x.b AS b FROM x AS x) AS s; # execute: false SELECT * FROM (SELECT * FROM t) AS s(a, b); SELECT s.a AS a, s.b AS b FROM (SELECT t.a AS a, t.b AS b FROM t AS t) AS s; # execute: false SELECT * FROM (SELECT * FROM t1 UNION ALL SELECT * FROM t2) AS s(b); SELECT s.b AS b FROM (SELECT t1.b AS b FROM t1 AS t1 UNION ALL SELECT t2.b AS b FROM t2 AS t2) AS s; # dialect: bigquery # execute: false WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col2, Struct("test" AS col1, Struct(3 AS col2) AS lvl2) AS lvl1) AS col), tbl2 AS (SELECT STRUCT(1 AS col1, 2 AS col2, Struct("test" AS col1, Struct(3 AS col2) AS lvl2) AS lvl1) AS col) SELECT tbl1.col.*, tbl2.col.* FROM tbl1, tbl2; WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col2, Struct('test' AS col1, Struct(3 AS col2) AS lvl2) AS lvl1) AS col), tbl2 AS (SELECT STRUCT(1 AS col1, 2 AS col2, Struct('test' AS col1, Struct(3 AS col2) AS lvl2) AS lvl1) AS col) SELECT tbl1.col.col1 AS col1, tbl1.col.col2 AS col2, tbl1.col.lvl1 AS lvl1, tbl2.col.col1 AS col1, tbl2.col.col2 AS col2, tbl2.col.lvl1 AS lvl1 FROM tbl1 AS tbl1 CROSS JOIN tbl2 AS tbl2; # dialect: bigquery # execute: false WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col2, Struct("test" AS col1, Struct(3 AS col2) AS lvl2) AS lvl1, 3 AS col3) AS col) SELECT tbl1.col.lvl1.* FROM tbl1; WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col2, Struct('test' AS col1, Struct(3 AS col2) AS lvl2) AS lvl1, 3 AS col3) AS col) SELECT tbl1.col.lvl1.col1 AS col1, tbl1.col.lvl1.lvl2 AS lvl2 FROM tbl1 AS tbl1; # dialect: bigquery # execute: false # title: Cannot expand struct star with unnamed fields WITH tbl1 AS (SELECT STRUCT(1 AS col1, Struct(5 AS col1)) AS col) SELECT tbl1.col.* FROM tbl1; WITH tbl1 AS (SELECT STRUCT(1 AS col1, Struct(5 AS col1)) AS col) SELECT tbl1.col.* FROM tbl1 AS tbl1; # dialect: bigquery # execute: false # title: Cannot expand struct star with ambiguous fields WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1; WITH tbl1 AS (SELECT STRUCT(1 AS col1, 2 AS col1) AS col) SELECT tbl1.col.* FROM tbl1 AS tbl1; # dialect: bigquery # execute: false # title: BigQuery - Expand struct literal WITH tbl1 AS (SELECT STRUCT(1 AS f0, 2 as f1) AS col) SELECT tbl1.col.* from tbl1; WITH tbl1 AS (SELECT STRUCT(1 AS f0, 2 AS f1) AS col) SELECT tbl1.col.f0 AS f0, tbl1.col.f1 AS f1 FROM tbl1 AS tbl1; # dialect: bigquery # execute: false # title: BigQuery - Expand top level nested struct SELECT one.* FROM structs; SELECT structs.one.a_1 AS a_1, structs.one.b_1 AS b_1 FROM structs AS structs; # dialect: risingwave # execute: false # title: RisingWave - Expand top level nested struct SELECT (one).* FROM structs; SELECT (structs.one).a_1 AS a_1, (structs.one).b_1 AS b_1 FROM structs AS structs; # dialect: risingwave # execute: false # title: RisingWave - Preserve struct field identifier quotes SELECT (quoted).* FROM structs; SELECT (structs.quoted)."foo bar" AS "foo bar" FROM structs AS structs; # dialect: bigquery # execute: false # title: BigQuery - Expand midlevel struct SELECT nested_0.nested_1.* FROM structs; SELECT structs.nested_0.nested_1.a_2 AS a_2, structs.nested_0.nested_1.nested_2 AS nested_2 FROM structs AS structs; # dialect: risingwave # execute: false # title: RisingWave - Expand midlevel struct SELECT ((nested_0).nested_1).* FROM structs; SELECT ((structs.nested_0).nested_1).a_2 AS a_2, ((structs.nested_0).nested_1).nested_2 AS nested_2 FROM structs AS structs; # title: CSV files are not scanned by default # execute: false SELECT * FROM READ_CSV('file.csv'); SELECT * FROM READ_CSV('file.csv') AS _0; # dialect: clickhouse # Title: Expand tuples in VALUES using the structure provided # execute: false SELECT * FROM VALUES ('person String, place String', ('Noah', 'Paris')); SELECT _0.person AS person, _0.place AS place FROM VALUES ('person String, place String', ('Noah', 'Paris')) AS _0(person, place); # dialect: clickhouse # Title: Expand tuples in VALUES using the default naming scheme in CH # execute: false SELECT * FROM VALUES ((1, 1), (2, 2)); SELECT _0.c1 AS c1, _0.c2 AS c2 FROM VALUES ((1, 1), (2, 2)) AS _0(c1, c2); # dialect: clickhouse # Title: Expand fields in VALUES using the default naming scheme in CH # execute: false SELECT * FROM VALUES (1, 2, 3); SELECT _0.c1 AS c1 FROM VALUES ((1), (2), (3)) AS _0(c1); # title: Expand PIVOT column combinations # dialect: duckdb WITH cities AS (SELECT * FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT * FROM cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')); WITH cities AS (SELECT t.country AS country, t.name AS name, t.year AS year, t.population AS population FROM (VALUES ('nl', 'amsterdam', 2000, 1005)) AS t(country, name, year, population)) SELECT _0.nl_2000_amsterdam_total AS nl_2000_amsterdam_total, _0.nl_2000_amsterdam_count AS nl_2000_amsterdam_count, _0.nl_2000_seattle_total AS nl_2000_seattle_total, _0.nl_2000_seattle_count AS nl_2000_seattle_count, _0.nl_2010_amsterdam_total AS nl_2010_amsterdam_total, _0.nl_2010_amsterdam_count AS nl_2010_amsterdam_count, _0.nl_2010_seattle_total AS nl_2010_seattle_total, _0.nl_2010_seattle_count AS nl_2010_seattle_count, _0.us_2000_amsterdam_total AS us_2000_amsterdam_total, _0.us_2000_amsterdam_count AS us_2000_amsterdam_count, _0.us_2000_seattle_total AS us_2000_seattle_total, _0.us_2000_seattle_count AS us_2000_seattle_count, _0.us_2010_amsterdam_total AS us_2010_amsterdam_total, _0.us_2010_amsterdam_count AS us_2010_amsterdam_count, _0.us_2010_seattle_total AS us_2010_seattle_total, _0.us_2010_seattle_count AS us_2010_seattle_count FROM cities AS cities PIVOT(SUM(population) AS total, COUNT(population) AS count FOR country IN ('nl', 'us') year IN (2000, 2010) name IN ('amsterdam', 'seattle')) AS _0; -------------------------------------- -- CTEs -------------------------------------- WITH z AS (SELECT x.a AS a FROM x) SELECT z.a AS a FROM z; WITH z AS (SELECT x.a AS a FROM x AS x) SELECT z.a AS a FROM z AS z; WITH z(a) AS (SELECT a FROM x) SELECT * FROM z; WITH z AS (SELECT x.a AS a FROM x AS x) SELECT z.a AS a FROM z AS z; WITH z AS (SELECT a FROM x) SELECT * FROM z as q; WITH z AS (SELECT x.a AS a FROM x AS x) SELECT q.a AS a FROM z AS q; WITH z AS (SELECT a FROM x) SELECT * FROM z; WITH z AS (SELECT x.a AS a FROM x AS x) SELECT z.a AS a FROM z AS z; WITH z AS (SELECT a FROM x), q AS (SELECT * FROM z) SELECT * FROM q; WITH z AS (SELECT x.a AS a FROM x AS x), q AS (SELECT z.a AS a FROM z AS z) SELECT q.a AS a FROM q AS q; WITH z AS (SELECT * FROM x) SELECT * FROM z UNION SELECT * FROM z ORDER BY a, b; WITH z AS (SELECT x.a AS a, x.b AS b FROM x AS x) SELECT z.a AS a, z.b AS b FROM z AS z UNION SELECT z.a AS a, z.b AS b FROM z AS z ORDER BY a, b; WITH z AS (SELECT * FROM x), q AS (SELECT b FROM z) SELECT b FROM q; WITH z AS (SELECT x.a AS a, x.b AS b FROM x AS x), q AS (SELECT z.b AS b FROM z AS z) SELECT q.b AS b FROM q AS q; WITH z AS ((SELECT b FROM x UNION ALL SELECT b FROM y) ORDER BY b) SELECT * FROM z; WITH z AS ((SELECT x.b AS b FROM x AS x UNION ALL SELECT y.b AS b FROM y AS y) ORDER BY b) SELECT z.b AS b FROM z AS z; WITH cte(x) AS (SELECT 1) SELECT * FROM cte AS cte(a); WITH cte AS (SELECT 1 AS x) SELECT cte.a AS a FROM cte AS cte(a); WITH cte(x, y) AS (SELECT 1, 2) SELECT cte.* FROM cte AS cte(a); WITH cte AS (SELECT 1 AS x, 2 AS y) SELECT cte.a AS a, cte.y AS y FROM cte AS cte(a); -- Cannot pop table column aliases for recursive ctes (redshift). WITH RECURSIVE cte(x) AS (SELECT 1), cte2(y) AS (SELECT 2) SELECT * FROM cte, cte2; WITH RECURSIVE cte(x) AS (SELECT 1 AS x), cte2(y) AS (SELECT 2 AS y) SELECT cte.x AS x, cte2.y AS y FROM cte AS cte, cte2 AS cte2; # execute: false WITH player AS (SELECT player.name, player.asset.info FROM players) SELECT * FROM player; WITH player AS (SELECT players.player.name AS name, players.player.asset.info AS info FROM players AS players) SELECT player.name AS name, player.info AS info FROM player AS player; # execute: false WITH tesT AS (SELECT c1 FROM t1) SELECT c1 FROM test; WITH test AS (SELECT t1.c1 AS c1 FROM t1 AS t1) SELECT test.c1 AS c1 FROM test AS test; -------------------------------------- -- Except, Replace, Rename -------------------------------------- # execute: false SELECT * RENAME(a AS d) FROM x; SELECT x.a AS d, x.b AS b FROM x AS x; # execute: false SELECT * EXCEPT(b) RENAME(a AS d) FROM x; SELECT x.a AS d FROM x AS x; SELECT x.* EXCEPT(a), y.* FROM x, y; SELECT x.b AS b, y.b AS b, y.c AS c FROM x AS x, y AS y; SELECT * EXCEPT(a) FROM x; SELECT x.b AS b FROM x AS x; # execute: false SELECT * EXCEPT(x.a) FROM x AS x; SELECT x.b AS b FROM x AS x; # execute: false # note: this query would fail in the engine level because there are 0 selected columns SELECT * EXCEPT (a, b) FROM x; SELECT * EXCEPT (a, b) FROM x AS x; SELECT x.a, * EXCEPT (a) FROM x AS x LEFT JOIN x AS y USING (a); SELECT x.a AS a, x.b AS b, y.b AS b FROM x AS x LEFT JOIN x AS y ON x.a = y.a; SELECT COALESCE(CAST(t1.a AS VARCHAR), '') AS a, t2.* EXCEPT (a) FROM x AS t1, x AS t2; SELECT COALESCE(CAST(t1.a AS VARCHAR), '') AS a, t2.b AS b FROM x AS t1, x AS t2; # execute: false SELECT * REPLACE(2 AS a) FROM x; SELECT 2 AS a, x.b AS b FROM x AS x; # execute: false SELECT * EXCEPT (a, b) REPLACE (a AS a) FROM x; SELECT * EXCEPT (a, b) REPLACE (x.a AS a) FROM x AS x; # execute: false SELECT * REPLACE(COALESCE(b, a) AS a, a as b) FROM x; SELECT COALESCE(x.b, x.a) AS a, x.a AS b FROM x AS x; # execute: false SELECT * REPLACE(1 AS a) RENAME(b as alias_b) FROM x; SELECT 1 AS a, x.b AS alias_b FROM x AS x; # execute: false SELECT * EXCEPT(a) REPLACE(COALESCE(a, b) AS b) RENAME(b AS new_b) FROM x; SELECT COALESCE(x.a, x.b) AS new_b FROM x AS x; # execute: false SELECT * REPLACE(1 AS a, a AS b) RENAME(b AS new_b) FROM x; SELECT 1 AS a, x.a AS new_b FROM x AS x; -------------------------------------- -- Using -------------------------------------- SELECT x.b FROM x JOIN y USING (b); SELECT x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b; # execute: false WITH cte AS (SELECT a.b.c.d.f.g FROM tbl1) SELECT g FROM (SELECT g FROM tbl2) tbl2 JOIN cte USING(g); WITH cte AS (SELECT tbl1.a.b.c.d.f.g AS g FROM tbl1 AS tbl1) SELECT COALESCE(tbl2.g, cte.g) AS g FROM (SELECT tbl2.g AS g FROM tbl2 AS tbl2) AS tbl2 JOIN cte AS cte ON tbl2.g = cte.g; SELECT x.b FROM x JOIN y USING (b) JOIN z USING (b); SELECT x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b JOIN z AS z ON x.b = z.b; SELECT b FROM x AS x2 JOIN y AS y2 USING (b); SELECT COALESCE(x2.b, y2.b) AS b FROM x AS x2 JOIN y AS y2 ON x2.b = y2.b; SELECT b FROM x JOIN y USING (b) WHERE b = 1 and y.b = 2; SELECT COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b WHERE COALESCE(x.b, y.b) = 1 AND y.b = 2; SELECT b FROM x JOIN y USING (b) JOIN z USING (b); SELECT COALESCE(x.b, y.b, z.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b JOIN z AS z ON x.b = z.b; SELECT * FROM x JOIN y USING(b); SELECT x.a AS a, COALESCE(x.b, y.b) AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; SELECT x.* FROM x JOIN y USING(b); SELECT x.a AS a, COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b; SELECT * FROM x LEFT JOIN y USING(b); SELECT x.a AS a, COALESCE(x.b, y.b) AS b, y.c AS c FROM x AS x LEFT JOIN y AS y ON x.b = y.b; SELECT b FROM x JOIN y USING(b); SELECT COALESCE(x.b, y.b) AS b FROM x AS x JOIN y AS y ON x.b = y.b; SELECT b, c FROM x JOIN y USING(b); SELECT COALESCE(x.b, y.b) AS b, y.c AS c FROM x AS x JOIN y AS y ON x.b = y.b; SELECT b, c FROM y JOIN z USING(b, c); SELECT COALESCE(y.b, z.b) AS b, COALESCE(y.c, z.c) AS c FROM y AS y JOIN z AS z ON y.b = z.b AND y.c = z.c; SELECT * FROM y JOIN z USING(b, c); SELECT COALESCE(y.b, z.b) AS b, COALESCE(y.c, z.c) AS c FROM y AS y JOIN z AS z ON y.b = z.b AND y.c = z.c; SELECT * FROM y JOIN z USING(b, c) WHERE b = 2 AND c = 3; SELECT COALESCE(y.b, z.b) AS b, COALESCE(y.c, z.c) AS c FROM y AS y JOIN z AS z ON y.b = z.b AND y.c = z.c WHERE COALESCE(y.b, z.b) = 2 AND COALESCE(y.c, z.c) = 3; -- We can safely convert `b` to `x.b` in the following two queries, because the original queries -- would be invalid if `b` also existed in `t`'s schema (which we don't know), due to ambiguity. # execute: false SELECT b FROM x JOIN t USING(a); SELECT x.b AS b FROM x AS x JOIN t AS t ON x.a = t.a; # execute: false SELECT b FROM t JOIN x USING(a); SELECT x.b AS b FROM t AS t JOIN x AS x ON t.a = x.a; # execute: false SELECT a FROM t1 JOIN t2 USING(a); SELECT COALESCE(t1.a, t2.a) AS a FROM t1 AS t1 JOIN t2 AS t2 ON t1.a = t2.a; WITH m(a) AS (SELECT 1), n(b) AS (SELECT 1) SELECT * FROM m JOIN n AS foo(a) USING (a); WITH m AS (SELECT 1 AS a), n AS (SELECT 1 AS b) SELECT COALESCE(m.a, foo.a) AS a FROM m AS m JOIN n AS foo(a) ON m.a = foo.a; # title: coalesce the USING clause's columns (3 joins, 2 join columns) WITH t1 AS (SELECT 'x' AS id, DATE '2024-01-01' AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, DATE '2024-02-02' AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, DATE '2024-02-02' AS foo, 456 AS value) SELECT * FROM t1 FULL OUTER JOIN t2 USING(id, foo) FULL OUTER JOIN t3 USING(id, foo); WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value) SELECT COALESCE(t1.id, t2.id, t3.id) AS id, COALESCE(t1.foo, t2.foo, t3.foo) AS foo, t1.value AS value, t2.value AS value, t3.value AS value FROM t1 AS t1 FULL OUTER JOIN t2 AS t2 ON t1.id = t2.id AND t1.foo = t2.foo FULL OUTER JOIN t3 AS t3 ON COALESCE(t1.id, t2.id) = t3.id AND COALESCE(t1.foo, t2.foo) = t3.foo; # title: coalesce the USING clause's columns (3 joins, 3 join columns) WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value) SELECT * FROM t1 FULL OUTER JOIN t2 USING (id, foo, value) FULL OUTER JOIN t3 USING (id, foo, value); WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value) SELECT COALESCE(t1.id, t2.id, t3.id) AS id, COALESCE(t1.foo, t2.foo, t3.foo) AS foo, COALESCE(t1.value, t2.value, t3.value) AS value FROM t1 AS t1 FULL OUTER JOIN t2 AS t2 ON t1.id = t2.id AND t1.foo = t2.foo AND t1.value = t2.value FULL OUTER JOIN t3 AS t3 ON COALESCE(t1.id, t2.id) = t3.id AND COALESCE(t1.foo, t2.foo) = t3.foo AND COALESCE(t1.value, t2.value) = t3.value; # title: coalesce the USING clause's columns (4 joins, 2 join columns) WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value), t4 AS (SELECT 'x' AS id, CAST('2024-03-03' AS DATE) AS foo, 789 AS value) SELECT * FROM t1 FULL OUTER JOIN t2 USING (id, foo) FULL OUTER JOIN t3 USING (id, foo) FULL OUTER JOIN t4 USING (id, foo); WITH t1 AS (SELECT 'x' AS id, CAST('2024-01-01' AS DATE) AS foo, 000 AS value), t2 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 123 AS value), t3 AS (SELECT 'x' AS id, CAST('2024-02-02' AS DATE) AS foo, 456 AS value), t4 AS (SELECT 'x' AS id, CAST('2024-03-03' AS DATE) AS foo, 789 AS value) SELECT COALESCE(t1.id, t2.id, t3.id, t4.id) AS id, COALESCE(t1.foo, t2.foo, t3.foo, t4.foo) AS foo, t1.value AS value, t2.value AS value, t3.value AS value, t4.value AS value FROM t1 AS t1 FULL OUTER JOIN t2 AS t2 ON t1.id = t2.id AND t1.foo = t2.foo FULL OUTER JOIN t3 AS t3 ON COALESCE(t1.id, t2.id) = t3.id AND COALESCE(t1.foo, t2.foo) = t3.foo FULL OUTER JOIN t4 AS t4 ON COALESCE(t1.id, t2.id, t3.id) = t4.id AND COALESCE(t1.foo, t2.foo, t3.foo) = t4.foo; # title: Name anonymous STRUCT fields if replacing USING columns WITH t1 AS (SELECT 1 AS id), t2 AS (SELECT 2 AS id) SELECT STRUCT(id) AS my_field FROM t1 JOIN t2 USING (id); WITH t1 AS (SELECT 1 AS id), t2 AS (SELECT 2 AS id) SELECT STRUCT(COALESCE(t1.id, t2.id) AS id) AS my_field FROM t1 AS t1 JOIN t2 AS t2 ON t1.id = t2.id; # title: Do not rename aliased STRUCT fields if replacing USING columns WITH t1 AS (SELECT 1 AS id), t2 AS (SELECT 2 AS id) SELECT STRUCT(id AS col) AS my_field FROM t1 JOIN t2 USING (id); WITH t1 AS (SELECT 1 AS id), t2 AS (SELECT 2 AS id) SELECT STRUCT(COALESCE(t1.id, t2.id) AS col) AS my_field FROM t1 AS t1 JOIN t2 AS t2 ON t1.id = t2.id; -------------------------------------- -- Hint with table reference -------------------------------------- # dialect: spark SELECT /*+ BROADCAST(y) */ x.b FROM x JOIN y ON x.b = y.b; SELECT /*+ BROADCAST(y) */ x.b AS b FROM x AS x JOIN y AS y ON x.b = y.b; -------------------------------------- -- UDTF -------------------------------------- # execute: false SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; SELECT _0.c AS c FROM x AS x LATERAL VIEW EXPLODE(x.a) _0 AS c; # execute: false SELECT c FROM xx LATERAL VIEW EXPLODE (a) AS c; SELECT _0.c AS c FROM xx AS xx LATERAL VIEW EXPLODE(xx.a) _0 AS c; # execute: false SELECT c FROM x LATERAL VIEW EXPLODE (a) t AS c; SELECT t.c AS c FROM x AS x LATERAL VIEW EXPLODE(x.a) t AS c; # execute: false SELECT aa FROM x, UNNEST(a) AS t(aa); SELECT t.aa AS aa FROM x AS x, UNNEST(x.a) AS t(aa); # dialect: bigquery # execute: false SELECT aa FROM x, UNNEST(a) AS aa; SELECT aa AS aa FROM x AS x CROSS JOIN UNNEST(x.a) AS aa; # dialect: bigquery # execute: false select * from unnest ([1, 2]) as x with offset; SELECT x AS x, offset AS offset FROM UNNEST([1, 2]) AS x WITH OFFSET AS offset; # dialect: bigquery # execute: false select * from unnest ([1, 2]) as x with offset as y; SELECT x AS x, y AS y FROM UNNEST([1, 2]) AS x WITH OFFSET AS y; # dialect: bigquery # execute: false select x, a, x.a from unnest([STRUCT(1 AS a)]) as x CROSS JOIN m; SELECT x AS x, a AS a, x.a AS a FROM UNNEST([STRUCT(1 AS a)]) AS x CROSS JOIN m AS m; # dialect: bigquery # execute: false WITH cte AS (SELECT [STRUCT(1 AS a)] AS x) select a, x, m.a from cte, UNNEST(x) AS m CROSS JOIN n; WITH cte AS (SELECT [STRUCT(1 AS a)] AS x) SELECT a AS a, cte.x AS x, m.a AS a FROM cte AS cte CROSS JOIN UNNEST(cte.x) AS m CROSS JOIN n AS n; # dialect: presto SELECT x.a, i.b FROM x CROSS JOIN UNNEST(SPLIT(CAST(b AS VARCHAR), ',')) AS i(b); SELECT x.a AS a, i.b AS b FROM x AS x CROSS JOIN UNNEST(SPLIT(CAST(x.b AS VARCHAR), ',')) AS i(b); # execute: false SELECT c FROM (SELECT 1 a) AS x LATERAL VIEW EXPLODE(a) AS c; SELECT _0.c AS c FROM (SELECT 1 AS a) AS x LATERAL VIEW EXPLODE(x.a) _0 AS c; # execute: false SELECT * FROM foo(bar) AS t(c1, c2, c3); SELECT t.c1 AS c1, t.c2 AS c2, t.c3 AS c3 FROM FOO(bar) AS t(c1, c2, c3); # execute: false SELECT c1, c3 FROM foo(bar) AS t(c1, c2, c3); SELECT t.c1 AS c1, t.c3 AS c3 FROM FOO(bar) AS t(c1, c2, c3); # dialect: redshift # execute: false SELECT c.f::VARCHAR(MAX) AS f, e AS e FROM a.b AS c, c.d AS e; SELECT CAST(c.f AS VARCHAR(MAX)) AS f, e AS e FROM a.b AS c, c.d AS e; # dialect: bigquery WITH cte AS (SELECT 1 AS col) SELECT * FROM cte LEFT JOIN UNNEST((SELECT ARRAY_AGG(DISTINCT x) AS agg FROM UNNEST([1]) AS x WHERE col = 1)); WITH cte AS (SELECT 1 AS col) SELECT * FROM cte AS cte LEFT JOIN UNNEST((SELECT ARRAY_AGG(DISTINCT x) AS agg FROM UNNEST([1]) AS x WHERE cte.col = 1)); # dialect: bigquery SELECT * FROM UNNEST(ARRAY>[("p10", 1, 0.0)]); SELECT percentile AS percentile, value AS value, score AS score FROM UNNEST(ARRAY>[('p10', 1, 0.0)]); # dialect: bigquery # execute: false WITH scores AS (SELECT * FROM UNNEST((SELECT ARRAY>[("p10", 1, 0.0)]))) SELECT percentile FROM scores; WITH scores AS (SELECT percentile AS percentile, value AS value, score AS score FROM UNNEST((SELECT ARRAY>[('p10', 1, 0.0)] AS _col_0))) SELECT scores.percentile AS percentile FROM scores AS scores; -------------------------------------- -- Window functions -------------------------------------- # title: ORDER BY in window function SELECT a + 1 AS a, ROW_NUMBER() OVER (PARTITION BY b ORDER BY a) AS row_num FROM x ORDER BY a, row_num; SELECT x.a + 1 AS a, ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a) AS row_num FROM x AS x ORDER BY a, row_num; # dialect: bigquery SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) AS row_num FROM x QUALIFY row_num = 1; SELECT ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) AS row_num FROM x AS x QUALIFY ROW_NUMBER() OVER (PARTITION BY x.a ORDER BY x.b) = 1; # dialect: bigquery SELECT x.b, x.a FROM x LEFT JOIN y ON x.b = y.b QUALIFY ROW_NUMBER() OVER(PARTITION BY x.b ORDER BY x.a DESC) = 1 ORDER BY x.b, x.a; SELECT x.b AS b, x.a AS a FROM x AS x LEFT JOIN y AS y ON x.b = y.b QUALIFY ROW_NUMBER() OVER (PARTITION BY x.b ORDER BY x.a DESC) = 1 ORDER BY x.b, x.a; SELECT * FROM x QUALIFY COUNT(a) OVER (PARTITION BY b) > 1; SELECT x.a AS a, x.b AS b FROM x AS x QUALIFY COUNT(x.a) OVER (PARTITION BY x.b) > 1; -------------------------------------- -- Expand laterals -------------------------------------- # execute: false SELECT 2 AS d, d + 1 FROM x WHERE d = 2 GROUP BY d; SELECT 2 AS d, 2 + 1 AS _col_1 FROM x AS x WHERE 2 = 2 GROUP BY 1; # title: expand alias reference SELECT x.a + 1 AS i, i + 1 AS j, j + 1 AS k FROM x; SELECT x.a + 1 AS i, x.a + 1 + 1 AS j, x.a + 1 + 1 + 1 AS k FROM x AS x; # title: noop - reference comes before alias # execute: false # validate_qualify_columns: false SELECT i + 1 AS j, x.a + 1 AS i FROM x; SELECT i + 1 AS j, x.a + 1 AS i FROM x AS x; # title: subquery SELECT * FROM ( SELECT x.a + 1 AS i, i + 1 AS j FROM x ); SELECT _0.i AS i, _0.j AS j FROM (SELECT x.a + 1 AS i, x.a + 1 + 1 AS j FROM x AS x) AS _0; # title: wrap expanded alias to ensure operator precedence isnt broken # execute: false SELECT x.a + x.b AS f, f * x.b FROM x; SELECT x.a + x.b AS f, (x.a + x.b) * x.b AS _col_1 FROM x AS x; # title: no need to wrap expanded alias # execute: false SELECT x.a + x.b AS f, f, f + 5 FROM x; SELECT x.a + x.b AS f, x.a + x.b AS _col_1, x.a + x.b + 5 AS _col_2 FROM x AS x; # title: expand double agg if window func SELECT a, SUM(b) AS c, SUM(c) OVER(PARTITION BY a) AS d from x group by 1 ORDER BY a; SELECT x.a AS a, SUM(x.b) AS c, SUM(SUM(x.b)) OVER (PARTITION BY x.a) AS d FROM x AS x GROUP BY x.a ORDER BY a; # title: we can't expand aliases corresponding to recursive CTE columns (CTE names output columns) # execute: false WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t; WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t; # title: we can't expand aliases corresponding to recursive CTE columns (CTE doesn't name output columns) # execute: false WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT c + 1 AS c FROM t WHERE c <= 10) SELECT c FROM t; WITH RECURSIVE t AS (SELECT 1 AS c UNION ALL SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 10) SELECT t.c AS c FROM t AS t; # title: expand DISTINCT ON ordinals / projection names SELECT DISTINCT ON (new_col, b + 1, 1) t1.a AS new_col FROM x AS t1 ORDER BY new_col; SELECT DISTINCT ON (new_col, t1.b + 1, new_col) t1.a AS new_col FROM x AS t1 ORDER BY new_col; # title: qualify columns for Aggregate Functions and DISTINCT SELECT COALESCE(COUNT(DISTINCT a)) AS a FROM x; SELECT COALESCE(COUNT(DISTINCT x.a)) AS a FROM x AS x; # title: Oracle does not support lateral alias expansion # dialect: oracle # execute: false SELECT a AS b, b AS a FROM c; SELECT C.A AS B, C.B AS A FROM C C; # title: enable aliases expansion for the base case of recursive CTE WITH RECURSIVE rec AS (SELECT id, parent_id AS parent, 1 AS level FROM (SELECT 1 AS id, 0 AS parent_id) AS t WHERE parent = 0 UNION ALL SELECT rec.id + 10 AS id, rec.id AS parent, rec.level + 1 AS level FROM rec WHERE level < 3) SELECT * FROM rec; WITH RECURSIVE rec AS (SELECT t.id AS id, t.parent_id AS parent, 1 AS level FROM (SELECT 1 AS id, 0 AS parent_id) AS t WHERE t.parent_id = 0 UNION ALL SELECT rec.id + 10 AS id, rec.id AS parent, rec.level + 1 AS level FROM rec AS rec WHERE rec.level < 3) SELECT rec.id AS id, rec.parent AS parent, rec.level AS level FROM rec AS rec; WITH RECURSIVE rec AS (SELECT id, parent_id AS parent, 1 AS level FROM (SELECT 1 AS id, 0 AS parent_id) AS t WHERE parent = 0 UNION ALL SELECT num, val AS x, 2 AS level FROM (SELECT 2 AS num, 1 AS val) AS s WHERE x = 1 UNION ALL SELECT rec.id + 10 AS id, rec.id AS parent, rec.level + 1 AS level FROM rec WHERE rec.level < 3) SELECT * FROM rec ORDER BY rec.id; WITH RECURSIVE rec AS (SELECT t.id AS id, t.parent_id AS parent, 1 AS level FROM (SELECT 1 AS id, 0 AS parent_id) AS t WHERE t.parent_id = 0 UNION ALL SELECT s.num AS num, s.val AS x, 2 AS level FROM (SELECT 2 AS num, 1 AS val) AS s WHERE s.val = 1 UNION ALL SELECT rec.id + 10 AS id, rec.id AS parent, rec.level + 1 AS level FROM rec AS rec WHERE rec.level < 3) SELECT rec.id AS id, rec.parent AS parent, rec.level AS level FROM rec AS rec ORDER BY rec.id; WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT * FROM (SELECT c + 1 AS c FROM t WHERE c <= 3 UNION ALL SELECT c + 2 AS c FROM t WHERE c <= 3)) SELECT c FROM t ORDER BY c; WITH RECURSIVE t(c) AS (SELECT 1 AS c UNION ALL SELECT _0.c AS c FROM (SELECT t.c + 1 AS c FROM t AS t WHERE t.c <= 3 UNION ALL SELECT t.c + 2 AS c FROM t AS t WHERE t.c <= 3) AS _0) SELECT t.c AS c FROM t AS t ORDER BY c; -------------------------------------- -- Wrapped tables / join constructs -------------------------------------- # execute: false SELECT * FROM ((tbl)); SELECT * FROM ((tbl AS tbl)); SELECT a, c FROM (x LEFT JOIN y ON a = c); SELECT x.a AS a, y.c AS c FROM (x AS x LEFT JOIN y AS y ON x.a = y.c); # execute: false SELECT * FROM ((a CROSS JOIN ((b CROSS JOIN c) CROSS JOIN (d CROSS JOIN e)))); SELECT * FROM ((a AS a CROSS JOIN ((b AS b CROSS JOIN c AS c) CROSS JOIN (d AS d CROSS JOIN e AS e)))); # execute: false SELECT * FROM ((SELECT * FROM tbl)); SELECT * FROM ((SELECT * FROM tbl AS tbl) AS _0); # execute: false SELECT * FROM ((SELECT c FROM t1) CROSS JOIN t2); SELECT * FROM ((SELECT t1.c AS c FROM t1 AS t1) AS _0 CROSS JOIN t2 AS t2); # execute: false SELECT * FROM ((SELECT * FROM x) INNER JOIN y ON a = c); SELECT y.b AS b, y.c AS c, _0.a AS a, _0.b AS b FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _0 INNER JOIN y AS y ON _0.a = y.c); SELECT x.a, y.b, z.c FROM x LEFT JOIN (y INNER JOIN z ON y.c = z.c) ON x.b = y.b; SELECT x.a AS a, y.b AS b, z.c AS c FROM x AS x LEFT JOIN (y AS y INNER JOIN z AS z ON y.c = z.c) ON x.b = y.b; SELECT * FROM ((SELECT * FROM x) INNER JOIN (SELECT * FROM y) ON a = c); SELECT _0.a AS a, _0.b AS b, _1.b AS b, _1.c AS c FROM ((SELECT x.a AS a, x.b AS b FROM x AS x) AS _0 INNER JOIN (SELECT y.b AS b, y.c AS c FROM y AS y) AS _1 ON _0.a = _1.c); SELECT b FROM ((SELECT a FROM x) INNER JOIN y ON a = b); SELECT y.b AS b FROM ((SELECT x.a AS a FROM x AS x) AS _0 INNER JOIN y AS y ON _0.a = y.b); SELECT a, c FROM x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y TABLESAMPLE SYSTEM (10 ROWS); SELECT x.a AS a, y.c AS c FROM x AS x TABLESAMPLE SYSTEM (10 ROWS) CROSS JOIN y AS y TABLESAMPLE SYSTEM (10 ROWS); SELECT x.a FROM x INNER JOIN y ON x.a = c INNER JOIN z ON x.a = z.c; SELECT x.a AS a FROM x AS x INNER JOIN y AS y ON x.a = y.c INNER JOIN z AS z ON x.a = z.c; -------------------------------------- -- Snowflake allows column alias to be used in almost all clauses -------------------------------------- # title: Snowflake column alias in JOIN # dialect: snowflake # execute: false SELECT x.a AS foo FROM x JOIN y ON foo = y.b; SELECT X.A AS FOO FROM X AS X JOIN Y AS Y ON X.A = Y.B; # title: Snowflake column alias in QUALIFY # dialect: snowflake # execute: false SELECT x.a AS foo FROM x QUALIFY foo = 1; SELECT X.A AS FOO FROM X AS X QUALIFY X.A = 1; # title: Snowflake column alias in GROUP BY # dialect: snowflake # execute: false SELECT x.a AS foo FROM x GROUP BY foo = 1; SELECT X.A AS FOO FROM X AS X GROUP BY X.A = 1; # title: Snowflake column alias in WHERE # dialect: snowflake # execute: false SELECT x.a AS foo FROM x WHERE foo = 1; SELECT X.A AS FOO FROM X AS X WHERE X.A = 1; -------------------------------------- -- SEMI / ANTI Joins -------------------------------------- # title: SEMI JOIN table is excluded from the scope SELECT * FROM x SEMI JOIN y USING (b); SELECT x.a AS a, x.b AS b FROM x AS x SEMI JOIN y AS y ON x.b = y.b; # title: ANTI JOIN table is excluded from the scope SELECT * FROM x ANTI JOIN y USING (b); SELECT x.a AS a, x.b AS b FROM x AS x ANTI JOIN y AS y ON x.b = y.b; # title: SEMI + normal joins reinclude the table on scope SELECT * FROM x SEMI JOIN y USING (b) JOIN y USING (b); SELECT x.a AS a, COALESCE(x.b, y_2.b) AS b, y_2.c AS c FROM x AS x SEMI JOIN y AS y ON x.b = y.b JOIN y AS y_2 ON x.b = y_2.b; # title: ANTI + normal joins reinclude the table on scope SELECT * FROM x ANTI JOIN y USING (b) JOIN y USING (b); SELECT x.a AS a, COALESCE(x.b, y_2.b) AS b, y_2.c AS c FROM x AS x ANTI JOIN y AS y ON x.b = y.b JOIN y AS y_2 ON x.b = y_2.b; ================================================ FILE: tests/fixtures/optimizer/qualify_columns__invalid.sql ================================================ SELECT z.a FROM x; SELECT z.* FROM x; SELECT x FROM x; SELECT x FROM VALUES (1, 2); SELECT a FROM x AS z JOIN y AS z; SELECT a FROM x JOIN (SELECT b FROM y WHERE y.b = x.c); SELECT a FROM x AS y JOIN (SELECT a FROM y) AS q ON y.a = q.a; SELECT q.a FROM (SELECT x.b FROM x) AS z JOIN (SELECT a FROM z) AS q ON z.b = q.a; SELECT b FROM x AS a CROSS JOIN y AS b CROSS JOIN y AS c; SELECT x.a FROM x JOIN y USING (a); SELECT a, SUM(b) FROM x GROUP BY 3; SELECT p FROM (SELECT x from xx) y CROSS JOIN yy CROSS JOIN zz SELECT a FROM (SELECT * FROM x CROSS JOIN y); SELECT x FROM tbl AS tbl(a); SELECT a JOIN b USING (a); SELECT x.a FROM x INNER JOIN y ON x.a = c INNER JOIN z ON x.a = c; SELECT b FROM x INNER JOIN y ON x.a = y.c INNER JOIN z ON x.a = z.c; ================================================ FILE: tests/fixtures/optimizer/qualify_columns__with_invisible.sql ================================================ -------------------------------------- -- Qualify columns -------------------------------------- SELECT a FROM x; SELECT x.a AS a FROM x AS x; SELECT b FROM x; SELECT x.b AS b FROM x AS x; -------------------------------------- -- Derived tables -------------------------------------- SELECT x.a FROM x AS x CROSS JOIN (SELECT * FROM x); SELECT x.a AS a FROM x AS x CROSS JOIN (SELECT x.a AS a FROM x AS x) AS _0; SELECT x.b FROM x AS x CROSS JOIN (SELECT b FROM x); SELECT x.b AS b FROM x AS x CROSS JOIN (SELECT x.b AS b FROM x AS x) AS _0; -------------------------------------- -- Expand * -------------------------------------- SELECT * FROM x; SELECT x.a AS a FROM x AS x; SELECT * FROM y CROSS JOIN z ON y.b = z.b; SELECT y.b AS b, z.b AS b FROM y AS y CROSS JOIN z AS z ON y.b = z.b; SELECT * FROM y CROSS JOIN z ON y.c = z.c; SELECT y.b AS b, z.b AS b FROM y AS y CROSS JOIN z AS z ON y.c = z.c; SELECT a FROM (SELECT * FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; SELECT * FROM (SELECT a FROM x); SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0; ================================================ FILE: tests/fixtures/optimizer/qualify_columns_ddl.sql ================================================ # title: Create with CTE WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM cte; WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT cte.b AS b FROM cte AS cte; # title: Create with CTE, query also has CTE WITH cte1 AS (SELECT b FROM y) CREATE TABLE s AS WITH cte2 AS (SELECT b FROM cte1) SELECT * FROM cte2; WITH cte1 AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS WITH cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) SELECT cte2.b AS b FROM cte2 AS cte2; # title: Create without CTE CREATE TABLE foo AS SELECT a FROM tbl; CREATE TABLE foo AS SELECT tbl.a AS a FROM tbl AS tbl; # title: Create with complex CTE with derived table WITH cte AS (SELECT a FROM (SELECT a FROM x)) CREATE TABLE s AS SELECT * FROM cte; WITH cte AS (SELECT _0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _0) CREATE TABLE s AS SELECT cte.a AS a FROM cte AS cte; # title: Create wtih multiple CTEs WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte2; WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) CREATE TABLE s AS SELECT cte2.b AS b FROM cte2 AS cte2; # title: Create with multiple CTEs, selecting only from the first CTE (unnecessary code) WITH cte1 AS (SELECT b FROM y), cte2 AS (SELECT b FROM cte1) CREATE TABLE s AS SELECT * FROM cte1; WITH cte1 AS (SELECT y.b AS b FROM y AS y), cte2 AS (SELECT cte1.b AS b FROM cte1 AS cte1) CREATE TABLE s AS SELECT cte1.b AS b FROM cte1 AS cte1; # title: Create with multiple derived tables CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM y)); CREATE TABLE s AS SELECT _1.b AS b FROM (SELECT _0.b AS b FROM (SELECT y.b AS b FROM y AS y) AS _0) AS _1; # title: Create with a CTE and a derived table WITH cte AS (SELECT b FROM y) CREATE TABLE s AS SELECT * FROM (SELECT b FROM (SELECT b FROM cte)); WITH cte AS (SELECT y.b AS b FROM y AS y) CREATE TABLE s AS SELECT _1.b AS b FROM (SELECT _0.b AS b FROM (SELECT cte.b AS b FROM cte AS cte) AS _0) AS _1; # title: Insert with CTE # dialect: spark WITH cte AS (SELECT b FROM y) INSERT INTO s SELECT * FROM cte; WITH cte AS (SELECT y.b AS b FROM y AS y) INSERT INTO s SELECT cte.b AS b FROM cte AS cte; # title: Insert without CTE INSERT INTO foo SELECT a FROM tbl; INSERT INTO foo SELECT tbl.a AS a FROM tbl AS tbl; ================================================ FILE: tests/fixtures/optimizer/qualify_tables.sql ================================================ # title: single table SELECT 1 FROM z; SELECT 1 FROM c.db.z AS z; # title: single table with db SELECT 1 FROM y.z; SELECT 1 FROM c.y.z AS z; # title: single table with db, catalog SELECT 1 FROM x.y.z; SELECT 1 FROM x.y.z AS z; # title: single table with db, catalog, alias SELECT 1 FROM x.y.z AS z; SELECT 1 FROM x.y.z AS z; # title: only information schema # dialect: bigquery SELECT * FROM information_schema.tables; SELECT * FROM c.db.`information_schema.tables` AS tables; # title: information schema with db # dialect: bigquery SELECT * FROM y.information_schema.tables; SELECT * FROM c.y.`information_schema.tables` AS tables; # title: information schema with db, catalog # dialect: bigquery SELECT * FROM x.y.information_schema.tables; SELECT * FROM x.y.`information_schema.tables` AS tables; # title: information schema with db, catalog, alias # dialect: bigquery SELECT * FROM x.y.information_schema.tables AS z; SELECT * FROM x.y.`information_schema.tables` AS z; # title: redshift unnest syntax, z.a should be a column, not a table # dialect: redshift SELECT 1 FROM y.z AS z, z.a; SELECT 1 FROM c.y.z AS z, z.a; # title: bigquery implicit unnest syntax, coordinates.position should be a column, not a table # dialect: bigquery SELECT results FROM Coordinates, coordinates.position AS results; SELECT results FROM c.db.Coordinates AS coordinates CROSS JOIN UNNEST(coordinates.position) AS results; # title: bigquery implicit unnest syntax, table is already qualified # dialect: bigquery SELECT results FROM db.coordinates, Coordinates.position AS results; SELECT results FROM c.db.coordinates AS coordinates CROSS JOIN UNNEST(Coordinates.position) AS results; # title: bigquery schema name clashes with CTE name - this is a join, not an implicit unnest # dialect: bigquery WITH Coordinates AS (SELECT [1, 2] AS position) SELECT results FROM Coordinates, `Coordinates.position` AS results; WITH Coordinates AS (SELECT [1, 2] AS position) SELECT results FROM Coordinates AS Coordinates CROSS JOIN `c.Coordinates.position` AS results; # title: single cte WITH a AS (SELECT 1 FROM z) SELECT 1 FROM a; WITH a AS (SELECT 1 FROM c.db.z AS z) SELECT 1 FROM a AS a; # title: two ctes that are self-joined WITH a AS (SELECT 1 FROM z) SELECT 1 FROM a CROSS JOIN a; WITH a AS (SELECT 1 FROM c.db.z AS z) SELECT 1 FROM a AS a CROSS JOIN a AS a; # title: query that yields a single column as projection SELECT (SELECT y.c FROM y AS y) FROM x; SELECT (SELECT y.c FROM c.db.y AS y) FROM c.db.x AS x; # title: pivoted table SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')); SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS _0; # title: pivoted table, pivot has alias SELECT * FROM x PIVOT (SUM(a) FOR b IN ('a', 'b')) AS piv; SELECT * FROM c.db.x AS x PIVOT(SUM(a) FOR b IN ('a', 'b')) AS piv; # title: wrapped table without alias SELECT * FROM (tbl); SELECT * FROM (c.db.tbl AS tbl); # title: wrapped table with alias SELECT * FROM (tbl AS tbl); SELECT * FROM (c.db.tbl AS tbl); # title: wrapped table with alias using multiple (redundant) parentheses SELECT * FROM ((((tbl AS tbl)))); SELECT * FROM ((((c.db.tbl AS tbl)))); # title: wrapped join of tables without alias SELECT * FROM (t1 CROSS JOIN t2); SELECT * FROM (c.db.t1 AS t1 CROSS JOIN c.db.t2 AS t2); # title: wrapped join of tables with alias, expansion of join construct SELECT * FROM (t1 CROSS JOIN t2) AS t; SELECT * FROM (SELECT * FROM c.db.t1 AS t1 CROSS JOIN c.db.t2 AS t2) AS t; # title: chained wrapped joins without aliases (1) SELECT * FROM ((a CROSS JOIN b) CROSS JOIN c); SELECT * FROM ((c.db.a AS a CROSS JOIN c.db.b AS b) CROSS JOIN c.db.c AS c); # title: chained wrapped joins without aliases (2) SELECT * FROM (a CROSS JOIN (b CROSS JOIN c)); SELECT * FROM (c.db.a AS a CROSS JOIN (c.db.b AS b CROSS JOIN c.db.c AS c)); # title: chained wrapped joins without aliases (3) SELECT * FROM ((a CROSS JOIN ((b CROSS JOIN c) CROSS JOIN d))); SELECT * FROM ((c.db.a AS a CROSS JOIN ((c.db.b AS b CROSS JOIN c.db.c AS c) CROSS JOIN c.db.d AS d))); # title: chained wrapped joins without aliases (4) SELECT * FROM ((a CROSS JOIN ((b CROSS JOIN c) CROSS JOIN (d CROSS JOIN e)))); SELECT * FROM ((c.db.a AS a CROSS JOIN ((c.db.b AS b CROSS JOIN c.db.c AS c) CROSS JOIN (c.db.d AS d CROSS JOIN c.db.e AS e)))); # title: chained wrapped joins with aliases SELECT * FROM ((a AS foo CROSS JOIN b AS bar) CROSS JOIN c AS baz); SELECT * FROM ((c.db.a AS foo CROSS JOIN c.db.b AS bar) CROSS JOIN c.db.c AS baz); # title: wrapped join with subquery without alias SELECT * FROM (tbl1 CROSS JOIN (SELECT * FROM tbl2) AS t1); SELECT * FROM (c.db.tbl1 AS tbl1 CROSS JOIN (SELECT * FROM c.db.tbl2 AS tbl2) AS t1); # title: wrapped join with subquery with alias, parentheses cant be omitted because of alias SELECT * FROM (tbl1 CROSS JOIN (SELECT * FROM tbl2) AS t1) AS t2; SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 CROSS JOIN (SELECT * FROM c.db.tbl2 AS tbl2) AS t1) AS t2; # title: join construct as the right operand of a left join SELECT * FROM a LEFT JOIN (b INNER JOIN c ON c.id = b.id) ON b.id = a.id; SELECT * FROM c.db.a AS a LEFT JOIN (c.db.b AS b INNER JOIN c.db.c AS c ON c.id = b.id) ON b.id = a.id; # title: nested joins SELECT * FROM a LEFT JOIN b INNER JOIN c ON c.id = b.id ON b.id = a.id; SELECT * FROM c.db.a AS a LEFT JOIN c.db.b AS b INNER JOIN c.db.c AS c ON c.id = b.id ON b.id = a.id; # title: parentheses cant be omitted because alias shadows inner table names SELECT t.a FROM (tbl AS tbl) AS t; SELECT t.a FROM (SELECT * FROM c.db.tbl AS tbl) AS t; # title: wrapped aliased table with outer alias SELECT * FROM ((((tbl AS tbl)))) AS _0; SELECT * FROM (SELECT * FROM c.db.tbl AS tbl) AS _0; # title: join construct with three tables SELECT * FROM (tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3) AS _0; SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS _0; # title: join construct with three tables and redundant set of parentheses SELECT * FROM ((tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2 JOIN tbl3 AS tbl3 ON id1 = id3)) AS _0; SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN c.db.tbl2 AS tbl2 ON id1 = id2 JOIN c.db.tbl3 AS tbl3 ON id1 = id3) AS _0; # title: join construct within join construct SELECT * FROM (tbl1 AS tbl1 JOIN (tbl2 AS tbl2 JOIN tbl3 AS tbl3 ON id2 = id3) AS _0 ON id1 = id3) AS _1; SELECT * FROM (SELECT * FROM c.db.tbl1 AS tbl1 JOIN (SELECT * FROM c.db.tbl2 AS tbl2 JOIN c.db.tbl3 AS tbl3 ON id2 = id3) AS _0 ON id1 = id3) AS _1; # title: wrapped subquery without alias SELECT * FROM ((SELECT * FROM t)); SELECT * FROM ((SELECT * FROM c.db.t AS t) AS _0); # title: wrapped subquery without alias joined with a table SELECT * FROM ((SELECT * FROM t1) INNER JOIN t2 ON a = b); SELECT * FROM ((SELECT * FROM c.db.t1 AS t1) AS _0 INNER JOIN c.db.t2 AS t2 ON a = b); # title: lateral unnest with alias SELECT x FROM t, LATERAL UNNEST(t.xs) AS x; SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS x; # title: lateral unnest without alias SELECT x FROM t, LATERAL UNNEST(t.xs); SELECT x FROM c.db.t AS t, LATERAL UNNEST(t.xs) AS _0; # title: table with ordinality SELECT * FROM t CROSS JOIN JSON_ARRAY_ELEMENTS(t.response) WITH ORDINALITY AS kv_json; SELECT * FROM c.db.t AS t CROSS JOIN JSON_ARRAY_ELEMENTS(t.response) WITH ORDINALITY AS kv_json; # title: alter table ALTER TABLE t ADD PRIMARY KEY (id) NOT ENFORCED; ALTER TABLE c.db.t ADD PRIMARY KEY (id) NOT ENFORCED; # title: create statement with cte CREATE TABLE t1 AS (WITH cte AS (SELECT x FROM t2) SELECT * FROM cte); CREATE TABLE c.db.t1 AS (WITH cte AS (SELECT x FROM c.db.t2 AS t2) SELECT * FROM cte AS cte); # title: delete statement DELETE FROM t1 WHERE NOT c IN (SELECT c FROM t2); DELETE FROM c.db.t1 WHERE NOT c IN (SELECT c FROM c.db.t2 AS t2); # title: insert statement with cte # dialect: spark WITH cte AS (SELECT b FROM y) INSERT INTO s SELECT * FROM cte; WITH cte AS (SELECT b FROM c.db.y AS y) INSERT INTO c.db.s SELECT * FROM cte AS cte; # title: qualify wrapped query (SELECT x FROM t); (SELECT x FROM c.db.t AS t); # title: replace columns with db/catalog refs SELECT db1.a.id, db2.a.id FROM db1.a JOIN db2.a ON db1.a.id = db2.a.id; SELECT a.id, a_2.id FROM c.db1.a AS a JOIN c.db2.a AS a_2 ON a.id = a_2.id; SELECT cat.db1.a.id, db2.a.id FROM cat.db1.a JOIN db2.a ON cat.db1.a.id = db2.a.id; SELECT a.id, a_2.id FROM cat.db1.a AS a JOIN c.db2.a AS a_2 ON a.id = a_2.id; COPY INTO (SELECT * FROM x) TO 'data' WITH (FORMAT 'CSV'); COPY INTO (SELECT * FROM c.db.x AS x) TO 'data' WITH (FORMAT 'CSV'); # title: tablesample SELECT 1 FROM x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN y TABLESAMPLE SYSTEM (10 PERCENT); SELECT 1 FROM c.db.x AS x TABLESAMPLE SYSTEM (10 PERCENT) CROSS JOIN c.db.y AS y TABLESAMPLE SYSTEM (10 PERCENT); WITH cte_tbl AS (SELECT 1 AS col2) UPDATE y SET col1 = (SELECT * FROM x) WHERE EXISTS(SELECT 1 FROM cte_tbl); WITH cte_tbl AS (SELECT 1 AS col2) UPDATE c.db.y SET col1 = (SELECT * FROM c.db.x AS x) WHERE EXISTS(SELECT 1 FROM cte_tbl AS cte_tbl); # title: avoid qualifying CTE with UPDATE WITH cte AS (SELECT 1 AS c, 'name' AS name) UPDATE t SET name = cte.name FROM cte WHERE cte.c = 1; WITH cte AS (SELECT 1 AS c, 'name' AS name) UPDATE c.db.t SET name = cte.name FROM cte WHERE cte.c = 1; # title: avoid qualifying CTE with DELETE WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE t FROM t AS t INNER JOIN cte ON t.id = cte.c; WITH cte AS (SELECT 1 AS c, 'name' AS name) DELETE c.db.t FROM c.db.t AS t INNER JOIN cte ON t.id = cte.c; # title: canonicalize single table alias # canonicalize_table_aliases: true SELECT * FROM t; SELECT * FROM c.db.t AS _0; # title: canonicalize join table aliases # canonicalize_table_aliases: true SELECT * FROM t1 JOIN t2 ON t1.id = t2.id; SELECT * FROM c.db.t1 AS _0 JOIN c.db.t2 AS _1 ON _0.id = _1.id; # title: canonicalize join with different databases # canonicalize_table_aliases: true SELECT * FROM db1.users JOIN db2.users ON db1.users.id = db2.users.id; SELECT * FROM c.db1.users AS _0 JOIN c.db2.users AS _1 ON _0.id = _1.id; # title: canonicalize CTE alias # canonicalize_table_aliases: true WITH cte AS (SELECT * FROM t) SELECT * FROM cte; WITH cte AS (SELECT * FROM c.db.t AS _0) SELECT * FROM cte AS _1; # title: canonicalize subquery alias # canonicalize_table_aliases: true SELECT * FROM (SELECT * FROM t); SELECT * FROM (SELECT * FROM c.db.t AS _0) AS _1; # title: canonicalize multiple tables with subquery # canonicalize_table_aliases: true SELECT * FROM t1, (SELECT * FROM t2) AS sub, t3; SELECT * FROM c.db.t1 AS _2, (SELECT * FROM c.db.t2 AS _0) AS _1, c.db.t3 AS _3; # title: canonicalize CTE with PIVOT # canonicalize_table_aliases: true WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y')); WITH cte AS (SELECT * FROM c.db.t AS _0) SELECT * FROM cte AS _1 PIVOT(SUM(c) FOR v IN ('x', 'y')) AS _2; # title: canonicalize sources that reference external columns # canonicalize_table_aliases: true SELECT * FROM x WHERE x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a LIMIT 10); SELECT * FROM c.db.x AS _1 WHERE _1.a = (SELECT SUM(_0.c) AS c FROM c.db.y AS _0 WHERE _0.a = _1.a LIMIT 10); # title: canonicalize sources that have colliding aliases # canonicalize_table_aliases: true SELECT t.foo FROM t AS t, (SELECT t.bar FROM t AS t); SELECT _2.foo FROM c.db.t AS _2, (SELECT _0.bar FROM c.db.t AS _0) AS _1; # title: Qualify GENERATE_SERIES with its default column generate_series # dialect: postgres SELECT generate_series FROM GENERATE_SERIES(1,2); SELECT generate_series FROM GENERATE_SERIES(1, 2) AS _0(generate_series); # title: Qualify GENERATE_SERIES with alias by wrapping it # dialect: postgres SELECT g FROM GENERATE_SERIES(1,2) AS g; SELECT g FROM GENERATE_SERIES(1, 2) AS _0(g); # title: Qualify GENERATE_SERIES with alias on table and columns # dialect: postgres SELECT g FROM GENERATE_SERIES(1,2) AS t(g); SELECT g FROM GENERATE_SERIES(1, 2) AS t(g); # title: Qualify GENERATE_SERIES with explicit column and canonicalize_table_aliases # dialect: postgres # canonicalize_table_aliases: true SELECT g FROM GENERATE_SERIES(1,2) AS t(g); SELECT g FROM GENERATE_SERIES(1, 2) AS _0(g); ================================================ FILE: tests/fixtures/optimizer/quote_identifiers.sql ================================================ SELECT a FROM x; SELECT "a" FROM "x"; SELECT "a" FROM "x"; SELECT "a" FROM "x"; SELECT x.a AS a FROM db.x; SELECT "x"."a" AS "a" FROM "db"."x"; SELECT @x; SELECT @x; # dialect: snowflake SELECT * FROM DUAL; SELECT * FROM DUAL; # dialect: snowflake SELECT * FROM "DUAL"; SELECT * FROM "DUAL"; # dialect: snowflake SELECT * FROM "dual"; SELECT * FROM "dual"; # dialect: snowflake SELECT dual FROM t; SELECT "dual" FROM "t"; # dialect: snowflake SELECT * FROM t AS dual; SELECT * FROM "t" AS "dual"; # dialect: bigquery SELECT `p.d.udf`(data).* FROM `p.d.t`; SELECT `p.d.udf`(`data`).* FROM `p.d.t`; ================================================ FILE: tests/fixtures/optimizer/simplify.sql ================================================ -------------------------------------- -- Conditions -------------------------------------- x AND x; x AND TRUE; y OR y; y AND TRUE; x AND NOT x; NOT x AND x; x OR NOT x; NOT x OR x; 1 AND TRUE; TRUE; TRUE AND TRUE; TRUE; 1 AND TRUE AND 1 AND 1; TRUE; TRUE AND FALSE; FALSE; FALSE AND FALSE; FALSE; FALSE AND TRUE AND TRUE; FALSE; x > y OR FALSE; x > y; FALSE OR x = y; x = y; 1 = 1; TRUE; 1.0 = 1; TRUE; CAST('2023-01-01' AS DATE) = CAST('2023-01-01' AS DATE); TRUE; 'x' = 'y'; FALSE; 'x' = 'x'; TRUE; STRUCT(NULL AS a); STRUCT(NULL AS a); NULL AND TRUE; NULL AND TRUE; NULL AND FALSE; FALSE; NULL AND NULL; NULL AND TRUE; NULL OR TRUE; TRUE; NULL OR NULL; NULL AND TRUE; FALSE OR NULL; NULL AND TRUE; NOT TRUE; FALSE; NOT FALSE; TRUE; NOT NULL; NULL AND TRUE; NULL = NULL; NULL = NULL; SELECT (EXISTS(SELECT 1 WHERE FALSE)) AND NULL; SELECT EXISTS(SELECT 1 WHERE FALSE) AND NULL; SELECT NULL AND (EXISTS(SELECT 1 WHERE FALSE)); SELECT EXISTS(SELECT 1 WHERE FALSE) AND NULL; 1 AND 0; FALSE; 0 AND 1; FALSE; 0 OR 1; TRUE; 0 OR NULL; NULL AND TRUE; NULL OR 0; NULL AND TRUE; 0 AND NULL; FALSE; NULL AND 0; FALSE; -- Can't optimize this because different engines do different things -- mysql converts to 0 and 1 but tsql does true and false NULL <=> NULL; NULL IS NOT DISTINCT FROM NULL; a IS NOT DISTINCT FROM a; a IS NOT DISTINCT FROM a; NULL IS DISTINCT FROM NULL; NULL IS DISTINCT FROM NULL; NOT (NOT TRUE); TRUE; a AND (b OR b); a AND b; a AND (b AND b); a AND b; -- bigquery doesn't allow unparenthesis comparisons (x is not null) != (y is null); (NOT x IS NULL) <> (y IS NULL); # dialect: mysql A XOR A; A XOR A; # dialect: mysql SELECT DISTINCT GREATEST(EXISTS(SELECT 1 WHERE FALSE), (EXISTS(SELECT 1 WHERE FALSE)) XOR ((0.08) IN ((t1.c0) XOR (t1.c0)))) AS ref0 FROM (SELECT NULL AS c0 UNION ALL SELECT 1 AS c0) AS t1, (SELECT 0.01 AS c1) AS t0; SELECT DISTINCT GREATEST(EXISTS(SELECT 1 WHERE FALSE), 0.08 IN (t1.c0 XOR t1.c0) XOR EXISTS(SELECT 1 WHERE FALSE)) AS ref0 FROM (SELECT NULL AS c0 UNION ALL SELECT 1 AS c0) AS t1, (SELECT 0.01 AS c1) AS t0; TRUE AND TRUE OR TRUE AND FALSE; TRUE; COALESCE(x, y) <> ALL (SELECT z FROM w); COALESCE(x, y) <> ALL (SELECT z FROM w); SELECT NOT (2 <> ALL (SELECT 2 UNION ALL SELECT 3)); SELECT 2 = ANY(SELECT 2 UNION ALL SELECT 3); SELECT t_bool.a AND TRUE FROM t_bool; SELECT t_bool.a FROM t_bool; SELECT TRUE AND t_bool.a FROM t_bool; SELECT t_bool.a FROM t_bool; SELECT t_bool.a OR FALSE FROM t_bool; SELECT t_bool.a FROM t_bool; SELECT FALSE OR t_bool.a FROM t_bool; SELECT t_bool.a FROM t_bool; -------------------------------------- -- Absorption -------------------------------------- (A OR B) AND (C OR NOT A); (A OR B) AND (C OR NOT A); A AND (A OR B); A AND TRUE; A AND D AND E AND (B OR A); A AND D AND E; D AND A AND E AND (B OR A); A AND D AND E; (A OR B) AND A; A AND TRUE; C AND D AND (A OR B) AND E AND F AND A; A AND C AND D AND E AND F; A OR (A AND B); A AND TRUE; (A AND B) OR A; A AND TRUE; A AND (NOT A OR B); A AND B; (NOT A OR B) AND A; A AND B; A OR (NOT A AND B); A OR B; A OR ((((NOT A AND B)))); A OR B; (A OR C) AND ((A OR C) OR B); A OR C; (A OR C) AND (A OR B OR C); A OR C; A AND (B AND C) AND (D AND E); A AND B AND C AND D AND E; A AND (A OR B) AND (A OR B OR C); A AND TRUE; (A OR B) AND (A OR C) AND (A OR B OR C); (A OR B) AND (A OR C); -------------------------------------- -- Elimination -------------------------------------- (A AND B) OR (A AND NOT B); A AND TRUE; (A AND B) OR (NOT A AND B); B AND TRUE; (A AND NOT B) OR (A AND B); A AND TRUE; (NOT A AND B) OR (A AND B); B AND TRUE; (A OR B) AND (A OR NOT B); A AND TRUE; (A OR B) AND (NOT A OR B); B AND TRUE; (A OR NOT B) AND (A OR B); A AND TRUE; (NOT A OR B) AND (A OR B); B AND TRUE; (NOT A OR NOT B) AND (NOT A OR B); NOT A; (NOT A OR NOT B) AND (NOT A OR NOT NOT B); NOT A; E OR (A AND B) OR C OR D OR (A AND NOT B); A OR C OR D OR E; (A AND B) OR (A AND NOT B) OR (A AND NOT B); A AND TRUE; (A AND B) OR (A AND B) OR (A AND NOT B); A AND TRUE; (A AND B) OR (A AND NOT B) OR (A AND B) OR (A AND NOT B); A AND TRUE; SELECT t_bool.a OR t_bool.a FROM t_bool; SELECT t_bool.a FROM t_bool; SELECT t_bool.a AND t_bool.a FROM t_bool; SELECT t_bool.a FROM t_bool; SELECT SUM(t.x OR t.x) FROM t; SELECT SUM(t.x AND TRUE) FROM t; SELECT SUM(t.x AND t.x) FROM t; SELECT SUM(t.x AND TRUE) FROM t; -------------------------------------- -- Associativity -------------------------------------- (A AND B) AND C; A AND B AND C; A AND (B AND C); A AND B AND C; (A OR B) OR C; A OR B OR C; A OR (B OR C); A OR B OR C; ((A AND B) AND C) AND D; A AND B AND C AND D; (((((A) AND B)) AND C)) AND D; A AND B AND C AND D; (x + 1) + 2; x + 3; x + (1 + 2); x + 3; (x * 2) * 4 + (1 + 3) + 5; x * 8 + 9; (x - 1) - 2; (x - 1) - 2; x - (3 - 2); x - 1; -------------------------------------- -- Comparison and Pruning -------------------------------------- A AND D AND B AND E AND F AND G AND E AND A; A AND B AND D AND E AND F AND G; A OR D OR B OR E OR F OR G OR E OR A; A OR B OR D OR E OR F OR G; # dialect: mysql A XOR D XOR B XOR E XOR F XOR G XOR C; A XOR B XOR C XOR D XOR E XOR F XOR G; A AND NOT B AND C AND B; A AND B AND C AND NOT B; (a AND b AND c AND d) AND (d AND c AND b AND a); a AND b AND c AND d; (c AND (a AND b)) AND ((b AND a) AND c); a AND b AND c; (A AND B AND C) OR (C AND B AND A); A AND B AND C; -------------------------------------- -- Where removal -------------------------------------- SELECT x WHERE TRUE; SELECT x; SELECT x FROM y JOIN z ON TRUE; SELECT x FROM y CROSS JOIN z; SELECT x FROM y RIGHT JOIN z ON TRUE; SELECT x FROM y CROSS JOIN z; SELECT x FROM y LEFT JOIN z ON TRUE; SELECT x FROM y LEFT JOIN z ON TRUE; SELECT x FROM y FULL OUTER JOIN z ON TRUE; SELECT x FROM y FULL OUTER JOIN z ON TRUE; SELECT x FROM y JOIN z USING (x); SELECT x FROM y JOIN z USING (x); -------------------------------------- -- Parenthesis removal -------------------------------------- (TRUE); TRUE; (FALSE); FALSE; ((TRUE)); TRUE; (FALSE OR TRUE); TRUE; TRUE OR (((FALSE) OR (TRUE)) OR FALSE); TRUE; (NOT FALSE) AND (NOT TRUE); FALSE; ((NOT FALSE) AND (x = x)) AND (TRUE OR 1 <> 3); x = x; ((NOT FALSE) AND (x = x)) AND (FALSE OR 1 <> 2); x = x; (('a' = 'a') AND TRUE and NOT FALSE); TRUE; (x = y) and z; x = y AND z; x * (1 - y); x * (1 - y); (((x % 20) = 0) = TRUE); ((x % 20) = 0) = TRUE; ANY(t.value); ANY(t.value); SELECT (ARRAY_AGG(foo))[1]; SELECT (ARRAY_AGG(foo))[1]; SELECT -(x.a > x.b) FROM x; SELECT -(x.a > x.b) FROM x; SELECT (-((x.a) IS NULL)) FROM x; SELECT -(x.a IS NULL) FROM x; SELECT * FROM A WHERE a - (b < c) < 0 AND a + (b > c) >= 0; SELECT * FROM A WHERE a + (b > c) >= 0 AND a - (b < c) < 0; -------------------------------------- -- Literals -------------------------------------- 1 + 1; 2; 0.06 + 0.01; 0.07; 0.06 + 1; 1.06; 1.2E+1 + 15E-3; 12.015; 1.2E1 + 15E-3; 12.015; 1 - 2; -1; -1 + 3; 2; 1 - 2 - 4; -5; -(-1); 1; - -+1; 1; +-1; -1; ++1; 1; 0.06 - 0.01; 0.05; 3 * 4; 12; 3.0 * 9; 27.0; 0.03 * 0.73; 0.0219; 1 / 3; 1 / 3; 1 / 3.0; 0.3333333333333333333333333333; 20.0 / 6; 3.333333333333333333333333333; 10 / 5; 10 / 5; (1.0 * 3) * 4 - 2 * (5 / 2); 12.0 - 2 * (5 / 2); a * 0.5 / 10 / (2.0 + 3); a * 0.5 / 10 / 5.0; a * 0.5 - 10 - (2.0 + 3); a * 0.5 - 10 - 5.0; x * (10 - 5); x * 5; 6 - 2 + 4 * 2 + a; 12 + a; a + 1 + 1 + 2; a + 4; a + (1 + 1) + (10); a + 12; a + (1 * 1) + (1 - (1 * 1)); a + 1; a + (b * c) + (d - (e * f)); a + b * c + (d - e * f); 5 + 4 * 3; 17; 1 < 2; TRUE; 2 <= 2; TRUE; 2 >= 2; TRUE; 2 > 1; TRUE; 2 > 2.5; FALSE; 3 > 2.5; TRUE; 1 > NULL; 1 > NULL; 1 <= NULL; 1 <= NULL; 1 IS NULL; FALSE; NULL IS NULL; TRUE; NULL IS NOT NULL; FALSE; 1 IS NOT NULL; TRUE; date '1998-12-01' - interval x day; CAST('1998-12-01' AS DATE) - INTERVAL x DAY; date '1998-12-01' - interval '90' day; CAST('1998-09-02' AS DATE); date '1998-12-01' + interval '1' week; CAST('1998-12-08' AS DATE); interval '1' year + date '1998-01-01'; CAST('1999-01-01' AS DATE); interval '1' year + date '1998-01-01' + 3 * 7 * 4; CAST('1999-01-01' AS DATE) + 84; date '1998-12-01' - interval '90' foo; CAST('1998-12-01' AS DATE) - INTERVAL '90' FOO; date '1998-12-01' + interval '90' foo; CAST('1998-12-01' AS DATE) + INTERVAL '90' FOO; CAST(x AS DATE) + interval '1' week; CAST(x AS DATE) + INTERVAL '1' WEEK; CAST('2008-11-11' AS DATETIME) + INTERVAL '5' MONTH; CAST('2009-04-11 00:00:00' AS DATETIME); datetime '1998-12-01' - interval '90' day; CAST('1998-09-02 00:00:00' AS DATETIME); CAST(x AS DATETIME) + interval '1' WEEK; CAST(x AS DATETIME) + INTERVAL '1' WEEK; # dialect: bigquery CAST('2023-01-01' AS TIMESTAMP) + INTERVAL 1 DAY; CAST('2023-01-02 00:00:00' AS TIMESTAMP); # dialect: bigquery INTERVAL 1 DAY + CAST('2023-01-01' AS TIMESTAMP); CAST('2023-01-02 00:00:00' AS TIMESTAMP); # dialect: bigquery CAST('2023-01-02' AS TIMESTAMP) - INTERVAL 1 DAY; CAST('2023-01-01 00:00:00' AS TIMESTAMP); TS_OR_DS_TO_DATE('1998-12-01 00:00:01') - interval '90' day; CAST('1998-09-02' AS DATE); DATE_ADD(CAST('2023-01-02' AS DATE), -2, 'MONTH'); CAST('2022-11-02' AS DATE); DATE_SUB(CAST('2023-01-02' AS DATE), 1 + 1, 'DAY'); CAST('2022-12-31' AS DATE); DATE_ADD(CAST('2023-01-02' AS DATETIME), -2, 'HOUR'); CAST('2023-01-01 22:00:00' AS DATETIME); DATETIME_ADD(CAST('2023-01-02' AS DATETIME), -2, 'HOUR'); CAST('2023-01-01 22:00:00' AS DATETIME); DATETIME_SUB(CAST('2023-01-02' AS DATETIME), 1 + 1, 'HOUR'); CAST('2023-01-01 22:00:00' AS DATETIME); DATE_ADD(x, 1, 'MONTH'); DATE_ADD(x, 1, 'MONTH'); DATE_ADD(x, 1); DATE_ADD(x, 1, 'DAY'); SELECT 1 WHERE 'foo'; SELECT 1 WHERE 'foo'; SELECT 1 WHERE NOT 'foo'; SELECT 1 WHERE NOT 'foo'; -------------------------------------- -- Comparisons -------------------------------------- x < 0 OR x > 1; x < 0 OR x > 1; x < 0 OR x > 0; x < 0 OR x > 0; x < 1 OR x > 0; x < 1 OR x > 0; x < 1 OR x >= 0; x < 1 OR x >= 0; x <= 1 OR x > 0; x <= 1 OR x > 0; x <= 1 OR x >= 0; x <= 1 OR x >= 0; x <= 1 AND x <= 0; x <= 0; x <= 1 AND x > 0; x <= 1 AND x > 0; x <= 1 OR x > 0; x <= 1 OR x > 0; x <= 0 OR x < 0; x <= 0; x >= 0 OR x > 0; x >= 0; x >= 0 OR x > 1; x >= 0; x <= 0 OR x >= 0; x <= 0 OR x >= 0; x <= 0 AND x >= 0; x <= 0 AND x >= 0; x < 1 AND x < 2; x < 1; x < 1 OR x < 2; x < 2; x < 2 AND x < 1; x < 1; x < 2 OR x < 1; x < 2; x < 1 AND x < 1; x < 1; x < 1 OR x < 1; x < 1; x <= 1 AND x < 1; x < 1; x <= 1 OR x < 1; x <= 1; x < 1 AND x <= 1; x < 1; x < 1 OR x <= 1; x <= 1; x > 1 AND x > 2; x > 2; x > 1 OR x > 2; x > 1; x > 2 AND x > 1; x > 2; x > 2 OR x > 1; x > 1; x > 1 AND x > 1; x > 1; x > 1 OR x > 1; x > 1; x >= 1 AND x > 1; x > 1; x >= 1 OR x > 1; x >= 1; x > 1 AND x >= 1; x > 1; x > 1 OR x >= 1; x >= 1; x > 1 AND x >= 2; x >= 2; x > 1 OR x >= 2; x > 1; x > 1 AND x >= 2 AND x > 3 AND x > 0; x > 3; (x > 1 AND x >= 2 AND x > 3 AND x > 0) OR x > 0; x > 0; x > 1 AND x < 2 AND x > 3; FALSE; x > 1 AND x < 1; FALSE; x < 2 AND x > 1; x < 2 AND x > 1; x = 1 AND x < 1; FALSE; x = 1 AND x < 1.1; x = 1; x = 1 AND x <= 1; x = 1; x = 1 AND x <= 0.9; FALSE; x = 1 AND x > 0.9; x = 1; x = 1 AND x > 1; FALSE; x = 1 AND x >= 1; x = 1; x = 1 AND x >= 2; FALSE; x = 1 AND x <> 2; x = 1; x <> 1 AND x = 1; FALSE; x BETWEEN 0 AND 5 AND x > 3; x <= 5 AND x > 3; x > 3 AND 5 > x AND x BETWEEN 0 AND 10; x < 5 AND x > 3; x > 3 AND 5 < x AND x BETWEEN 9 AND 10; x <= 10 AND x >= 9; NOT x BETWEEN 0 AND 1; x < 0 OR x > 1; 1 < x AND 3 < x; x > 3; 'a' < 'b'; TRUE; x = 2018 OR x <> 2018; x <> 2018 OR x = 2018; t0.x = t1.x AND t0.y < t1.y AND t0.y <= t1.y; t0.x = t1.x AND t0.y < t1.y AND t0.y <= t1.y; 1 < x; x > 1; 1 <= x; x >= 1; 1 > x; x < 1; 1 >= x; x <= 1; 1 = x; x = 1; 1 <> x; x <> 1; NOT 1 < x; x <= 1; NOT 1 <= x; x < 1; NOT 1 > x; x >= 1; NOT 1 >= x; x > 1; NOT 1 = x; x <> 1; NOT 1 <> x; x = 1; x > CAST('2024-01-01' AS DATE) OR x > CAST('2023-12-31' AS DATE); x > CAST('2023-12-31' AS DATE); CAST(x AS DATE) > CAST('2024-01-01' AS DATE) OR CAST(x AS DATE) > CAST('2023-12-31' AS DATE); CAST(x AS DATE) > CAST('2023-12-31' AS DATE); FUN() > 0 OR FUN() > 1; FUN() > 0; RAND() > 0 OR RAND() > 1; RAND() > 0 OR RAND() > 1; CAST(1 AS UINT) >= 0; TRUE; CAST(-1 AS TINYINT) <= 0; TRUE; CAST(1 AS INT) = CAST(1 AS UINT); TRUE; CASE WHEN CAST(1 AS TINYINT) = 1 THEN FALSE ELSE TRUE END; FALSE; CAST(1 AS INT) + 1; CAST(1 AS INT) + 1; CAST(CAST(CAST(-1 AS INT) AS INT) AS INT) = -1; TRUE; CAST(-1 AS UINT) <= 0; CAST(-1 AS UINT) <= 0; CAST(-129 AS TINYINT) <= 0; CAST(-129 AS TINYINT) <= 0; CAST(256 AS UINT) >= 0; CAST(256 AS UINT) >= 0; CAST(CAST(CAST(-1 AS INT) AS UINT) AS INT) = 1; CAST(CAST(CAST(-1 AS INT) AS UINT) AS INT) = 1; CAST(x AS TINYINT) = 1; CAST(x AS TINYINT) = 1; CAST(CAST(1 AS INT) AS BOOLEAN) = 1; CAST(CAST(1 AS INT) AS BOOLEAN) = 1; CAST(CAST(CAST(1 AS INT) AS BOOLEAN) AS INT) = 1; CAST(CAST(CAST(1 AS INT) AS BOOLEAN) AS INT) = 1; x > CAST('2023-01-01' AS DATE) AND x < CAST('2023-01-01' AS DATETIME); FALSE; -------------------------------------- -- COALESCE -------------------------------------- COALESCE(x); x; COALESCE(x, 1) = 2; NOT x IS NULL AND x = 2; # dialect: redshift COALESCE(x, 1) = 2; COALESCE(x, 1) = 2; 2 = COALESCE(x, 1); NOT x IS NULL AND x = 2; COALESCE(x, 1, 1) = 1 + 1; NOT x IS NULL AND x = 2; COALESCE(x, 1, 2) = 2; NOT x IS NULL AND x = 2; COALESCE(x, 3) <= 2; NOT x IS NULL AND x <= 2; COALESCE(x, 1) <> 2; x <> 2 OR x IS NULL; COALESCE(x, 1) <= 2; x <= 2 OR x IS NULL; COALESCE(x, 1) = 1; x = 1 OR x IS NULL; COALESCE(x, 1) IS NULL; FALSE; COALESCE(ROW() OVER (), 1) = 1; ROW() OVER () = 1 OR ROW() OVER () IS NULL; a AND b AND COALESCE(ROW() OVER (), 1) = 1; (ROW() OVER () = 1 OR ROW() OVER () IS NULL) AND a AND b; COALESCE(1, 2); 1; COALESCE(CAST(CAST('2023-01-01' AS TIMESTAMP) AS DATE), x); CAST(CAST('2023-01-01' AS TIMESTAMP) AS DATE); COALESCE(CAST(NULL AS DATE), x); COALESCE(CAST(NULL AS DATE), x); NOT COALESCE(x, 1) = 2 AND y = 3; (x <> 2 OR x IS NULL) AND y = 3; -------------------------------------- -- CONCAT -------------------------------------- CONCAT(x, y); CONCAT(x, y); CONCAT_WS(sep, x, y); CONCAT_WS(sep, x, y); CONCAT(x); CONCAT(x); CONCAT('a', 'b', 'c'); 'abc'; CONCAT('a', NULL); CONCAT('a', NULL); CONCAT_WS('-', 'a', 'b', 'c'); 'a-b-c'; CONCAT('a', x, y, 'b', 'c'); CONCAT('a', x, y, 'bc'); CONCAT_WS('-', 'a', x, y, 'b', 'c'); CONCAT_WS('-', 'a', x, y, 'b-c'); 'a' || 'b'; 'ab'; CONCAT_WS('-', 'a'); 'a'; CONCAT_WS('-', x, y); CONCAT_WS('-', x, y); CONCAT_WS('', x, y); CONCAT_WS('', x, y); CONCAT_WS('-', x); CONCAT_WS('-', x); CONCAT_WS(sep, 'a', 'b'); CONCAT_WS(sep, 'a', 'b'); 'a' || 'b' || x; 'ab' || x; CONCAT(a, b) IN (SELECT * FROM foo WHERE cond); CONCAT(a, b) IN (SELECT * FROM foo WHERE cond); -------------------------------------- -- DATE_TRUNC -------------------------------------- DATE_TRUNC('week', CAST('2023-12-15' AS DATE)); CAST('2023-12-11' AS DATE); DATE_TRUNC('week', CAST('2023-12-16' AS DATE)); CAST('2023-12-11' AS DATE); # dialect: bigquery DATE_TRUNC(CAST('2023-12-15' AS DATE), WEEK); CAST('2023-12-10' AS DATE); # dialect: bigquery DATE_TRUNC(CAST('2023-10-01' AS TIMESTAMP), QUARTER); CAST('2023-10-01 00:00:00' AS TIMESTAMP); # dialect: bigquery DATE_TRUNC(CAST('2023-12-16' AS DATE), WEEK); CAST('2023-12-10' AS DATE); DATE_TRUNC('year', x) = CAST('2021-01-01' AS DATE); x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); # dialect: bigquery DATE_TRUNC(x, year) = CAST('2021-01-01' AS TIMESTAMP); x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); DATE_TRUNC('quarter', x) = CAST('2021-01-01' AS DATE); x < CAST('2021-04-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); # dialect: bigquery DATE_TRUNC(x, quarter) = CAST('2021-01-01' AS TIMESTAMP); x < CAST('2021-04-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); DATE_TRUNC('month', x) = CAST('2021-01-01' AS DATE); x < CAST('2021-02-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); # dialect: bigquery DATE_TRUNC(x, month) = CAST('2021-01-01' AS TIMESTAMP); x < CAST('2021-02-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); DATE_TRUNC('week', x) = CAST('2021-01-04' AS DATE); x < CAST('2021-01-11' AS DATE) AND x >= CAST('2021-01-04' AS DATE); DATE_TRUNC('day', x) = CAST('2021-01-01' AS DATE); x < CAST('2021-01-02' AS DATE) AND x >= CAST('2021-01-01' AS DATE); # dialect: bigquery DATE_TRUNC(x, DAY) = CAST('2021-01-01' AS TIMESTAMP); x < CAST('2021-01-02 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); CAST('2021-01-01' AS DATE) = DATE_TRUNC('year', x); x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); # dialect: bigquery CAST('2021-01-01' AS TIMESTAMP) = DATE_TRUNC(x, year); x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP); -- Always false, except for nulls DATE_TRUNC('quarter', x) = CAST('2021-01-02' AS DATE); DATE_TRUNC('QUARTER', x) = CAST('2021-01-02' AS DATE); DATE_TRUNC('year', x) <> CAST('2021-01-01' AS DATE); FALSE; -- Always true, except for nulls DATE_TRUNC('year', x) <> CAST('2021-01-02' AS DATE); DATE_TRUNC('YEAR', x) <> CAST('2021-01-02' AS DATE); DATE_TRUNC('year', x) <= CAST('2021-01-01' AS DATE); x < CAST('2022-01-01' AS DATE); # dialect: bigquery DATE_TRUNC(x, year) <= CAST('2021-01-01' AS TIMESTAMP); x < CAST('2022-01-01 00:00:00' AS TIMESTAMP); DATE_TRUNC('year', x) <= CAST('2021-01-02' AS DATE); x < CAST('2022-01-01' AS DATE); CAST('2021-01-01' AS DATE) >= DATE_TRUNC('year', x); x < CAST('2022-01-01' AS DATE); # dialect: bigquery CAST('2021-01-01' AS TIMESTAMP) >= DATE_TRUNC(x, year); x < CAST('2022-01-01 00:00:00' AS TIMESTAMP); DATE_TRUNC('year', x) < CAST('2021-01-01' AS DATE); x < CAST('2021-01-01' AS DATE); DATE_TRUNC('year', x) < CAST('2021-01-02' AS DATE); x < CAST('2022-01-01' AS DATE); DATE_TRUNC('year', x) >= CAST('2021-01-01' AS DATE); x >= CAST('2021-01-01' AS DATE); DATE_TRUNC('year', x) >= CAST('2021-01-02' AS DATE); x >= CAST('2022-01-01' AS DATE); DATE_TRUNC('year', x) > CAST('2021-01-01' AS DATE); x >= CAST('2022-01-01' AS DATE); DATE_TRUNC('year', x) > CAST('2021-01-02' AS DATE); x >= CAST('2022-01-01' AS DATE); DATE_TRUNC('year', x) > TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE('2021-01-02')); x >= CAST('2022-01-01' AS DATE); DATE_TRUNC('year', x) > TS_OR_DS_TO_DATE(TS_OR_DS_TO_DATE('2021-01-02', '%Y')); DATE_TRUNC('YEAR', x) > CAST(STR_TO_TIME('2021-01-02', '%Y') AS DATE); -- right is not a date DATE_TRUNC('year', x) <> '2021-01-02'; DATE_TRUNC('YEAR', x) <> '2021-01-02'; DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2023-01-01' AS DATE)); (x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE)) OR (x < CAST('2024-01-01' AS DATE) AND x >= CAST('2023-01-01' AS DATE)); # dialect: bigquery DATE_TRUNC(x, year) IN (CAST('2021-01-01' AS TIMESTAMP), CAST('2023-01-01' AS TIMESTAMP)); (x < CAST('2022-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2021-01-01 00:00:00' AS TIMESTAMP)) OR (x < CAST('2024-01-01 00:00:00' AS TIMESTAMP) AND x >= CAST('2023-01-01 00:00:00' AS TIMESTAMP)); -- merge ranges DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2022-01-01' AS DATE)); x < CAST('2023-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); -- one of the values will always be false DATE_TRUNC('year', x) IN (CAST('2021-01-01' AS DATE), CAST('2022-01-02' AS DATE)); x < CAST('2022-01-01' AS DATE) AND x >= CAST('2021-01-01' AS DATE); TIMESTAMP_TRUNC(x, YEAR) = CAST('2021-01-01' AS DATETIME); x < CAST('2022-01-01 00:00:00' AS DATETIME) AND x >= CAST('2021-01-01 00:00:00' AS DATETIME); -- right side is not a date literal DATE_TRUNC('day', x) = CAST(y AS DATE); CAST(y AS DATE) = DATE_TRUNC('DAY', x); -- nested cast DATE_TRUNC('day', x) = CAST(CAST('2021-01-01 01:02:03' AS DATETIME) AS DATE); x < CAST('2021-01-02' AS DATE) AND x >= CAST('2021-01-01' AS DATE); TIMESTAMP_TRUNC(x, YEAR) = CAST(CAST('2021-01-01 01:02:03' AS DATE) AS DATETIME); x < CAST('2022-01-01 00:00:00' AS DATETIME) AND x >= CAST('2021-01-01 00:00:00' AS DATETIME); DATE_TRUNC('day', CAST(x AS DATE)) <= CAST('2021-01-01 01:02:03' AS TIMESTAMP); CAST(x AS DATE) < CAST('2021-01-02 01:02:03' AS TIMESTAMP); -------------------------------------- -- EQUALITY -------------------------------------- x + 1 = 3; x = 2; 1 + x = 3; x = 2; 3 = x + 1; x = 2; x - 1 = 3; x = 4; x + 1 > 3; x > 2; x + 1 >= 3; x >= 2; x + 1 <= 3; x <= 2; x + 1 <= 3; x <= 2; x + 1 <> 3; x <> 2; 1 + x + 1 = 3 + 1; x = 2; x - INTERVAL 1 DAY = CAST('2021-01-01' AS DATE); x = CAST('2021-01-02' AS DATE); x - INTERVAL 1 DAY = TS_OR_DS_TO_DATE('2021-01-01 00:00:01'); x = CAST('2021-01-02' AS DATE); x - INTERVAL 1 HOUR > CAST('2021-01-01' AS DATETIME); x > CAST('2021-01-01 01:00:00' AS DATETIME); DATETIME_ADD(x, 1, HOUR) < CAST('2021-01-01' AS DATETIME); x < CAST('2020-12-31 23:00:00' AS DATETIME); DATETIME_SUB(x, 1, DAY) >= CAST('2021-01-01' AS DATETIME); x >= CAST('2021-01-02 00:00:00' AS DATETIME); DATE_ADD(x, 1, DAY) <= CAST('2021-01-01' AS DATE); x <= CAST('2020-12-31' AS DATE); DATE_SUB(x, 1, DAY) <> CAST('2021-01-01' AS DATE); x <> CAST('2021-01-02' AS DATE); DATE_ADD(DATE_ADD(DATE_TRUNC('week', DATE_SUB(x, 1, DAY)), 1, DAY), 1, YEAR) < CAST('2021-01-08' AS DATE); x < CAST('2020-01-14' AS DATE); x - INTERVAL '1' day = CAST(y AS DATE); CAST(y AS DATE) = x - INTERVAL '1' DAY; -------------------------------------- -- Constant Propagation -------------------------------------- x = 5 AND y = x; x = 5 AND y = 5; 5 = x AND y = x; x = 5 AND y = 5; x = 5 OR y = x; x = 5 OR x = y; (x = 5 AND y = x) OR y = 1; (x = 5 AND y = 5) OR y = 1; t.x = 5 AND y = x; t.x = 5 AND x = y; t.x = 'a' AND y = CONCAT_WS('-', t.x, 'b'); t.x = 'a' AND y = 'a-b'; x = 5 AND y = x AND y + 1 < 5; FALSE; x = 5 AND x = 6; FALSE; x = 5 AND (y = x OR z = 1); x = 5 AND (x = y OR z = 1); x = 5 AND x + 3 = 8; x = 5; x = 5 AND (SELECT x FROM t WHERE y = 1); (SELECT x FROM t WHERE y = 1) AND x = 5; x = 1 AND y > 0 AND (SELECT z = 5 FROM t WHERE y = 1); (SELECT z = 5 FROM t WHERE y = 1) AND x = 1 AND y > 0; x = 1 AND x = y AND (SELECT z FROM t WHERE a AND (b OR c)); (SELECT z FROM t WHERE a AND (b OR c)) AND x = 1 AND y = 1; t1.a = 39 AND t2.b = t1.a AND t3.c = t2.b; t1.a = 39 AND t2.b = 39 AND t3.c = 39; x = 1 AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END; x = 1; x = 1 AND IF(x = 5, FALSE, TRUE); x = 1; x = 1 AND CASE x WHEN 5 THEN FALSE ELSE TRUE END; x = 1; x = y AND CASE WHEN x = 5 THEN FALSE ELSE TRUE END; CASE WHEN x = 5 THEN FALSE ELSE TRUE END AND x = y; x = 1 AND CASE WHEN y = 5 THEN x = z END; CASE WHEN y = 5 THEN z = 1 END AND x = 1; -------------------------------------- -- Simplify Conditionals -------------------------------------- IF(TRUE, x, y); x; IF(FALSE, x, y); y; IF(FALSE, x); NULL; IF(NULL, x, y); y; IF(cond, x, y); CASE WHEN cond THEN x ELSE y END; CASE WHEN TRUE THEN x ELSE y END; x; CASE WHEN FALSE THEN x ELSE y END; y; CASE WHEN FALSE THEN x WHEN FALSE THEN y WHEN TRUE THEN z END; z; CASE NULL WHEN NULL THEN x ELSE y END; y; CASE 4 WHEN 1 THEN x WHEN 2 THEN y WHEN 3 THEN z ELSE w END; w; CASE 4 WHEN 1 THEN x WHEN 2 THEN y WHEN 3 THEN z WHEN 4 THEN w END; w; CASE WHEN value = 1 THEN x ELSE y END; CASE WHEN value = 1 THEN x ELSE y END; CASE WHEN FALSE THEN x END; NULL; CASE 1 WHEN 1 + 1 THEN x END; NULL; CASE WHEN cond THEN x ELSE y END; CASE WHEN cond THEN x ELSE y END; CASE WHEN cond THEN x END; CASE WHEN cond THEN x END; CASE x WHEN y THEN z ELSE w END; CASE WHEN x = y THEN z ELSE w END; CASE x WHEN y THEN z END; CASE WHEN x = y THEN z END; CASE x1 + x2 WHEN x3 THEN x4 WHEN x5 + x6 THEN x7 ELSE x8 END; CASE WHEN x3 = (x1 + x2) THEN x4 WHEN (x1 + x2) = (x5 + x6) THEN x7 ELSE x8 END; -------------------------------------- -- Simplify STARTSWITH -------------------------------------- STARTS_WITH('foo', 'f'); TRUE; STARTS_WITH('foo', 'g'); FALSE; STARTS_WITH('', 'f'); FALSE; STARTS_WITH('', ''); TRUE; STARTS_WITH('foo', ''); TRUE; STARTS_WITH(NULL, y); STARTS_WITH(NULL, y); STARTS_WITH(x, y); STARTS_WITH(x, y); STARTS_WITH('x', y); STARTS_WITH('x', y); STARTS_WITH(x, 'y'); STARTS_WITH(x, 'y'); -------------------------------------- -- Simplify NOT -------------------------------------- SELECT NOT(NOT(a)) FROM x; SELECT NOT NOT a FROM x; SELECT NOT(NOT(NOT(NOT t_bool.a))) FROM t_bool; SELECT t_bool.a FROM t_bool; # dialect: mysql SELECT NOT(NOT(NOT(NOT t_bool.a))) FROM t_bool; SELECT NOT NOT NOT NOT t_bool.a FROM t_bool; # dialect: sqlite SELECT NOT(NOT(NOT(NOT t_bool.a))) FROM t_bool; SELECT NOT NOT NOT NOT t_bool.a FROM t_bool; # dialect: mysql WITH t0 AS (SELECT 1 AS a, 'foo' AS p) SELECT NOT(NOT(CASE WHEN t0.a > 1 THEN t0.a ELSE t0.p END)) AS res FROM t0; WITH t0 AS (SELECT 1 AS a, 'foo' AS p) SELECT NOT NOT CASE WHEN t0.a > 1 THEN t0.a ELSE t0.p END AS res FROM t0; # dialect: sqlite WITH t0 AS (SELECT 1 AS a, 'foo' AS p) SELECT NOT (NOT(CASE WHEN t0.a > 1 THEN t0.a ELSE t0.p END)) AS res FROM t0; WITH t0 AS (SELECT 1 AS a, 'foo' AS p) SELECT NOT NOT CASE WHEN t0.a > 1 THEN t0.a ELSE t0.p END AS res FROM t0; -------------------------------------- -- Simplify complements -------------------------------------- TRUE OR NOT TRUE; TRUE; TRUE AND NOT TRUE; FALSE; 'a' OR NOT 'a'; TRUE; 'a' AND NOT 'a'; FALSE; 100 OR NOT 100; TRUE; 100 AND NOT 100; FALSE; NULL OR NOT NULL; NULL AND TRUE; NULL AND NOT NULL; NULL AND TRUE; NULL OR (NULL AND TRUE); NULL AND TRUE; SELECT IF(NULL = NULL, 1, 100); SELECT 100; # dialect: snowflake SELECT * FROM o ASOF JOIN e MATCH_CONDITION (o.observed_date >= e.metric_date) ON o.id = e.id; SELECT * FROM o ASOF JOIN e MATCH_CONDITION (o.observed_date >= e.metric_date) ON e.id = o.id; ================================================ FILE: tests/fixtures/optimizer/tpc-ds/tpc-ds.sql ================================================ -------------------------------------- -- TPC-DS 1 -------------------------------------- # execute: true WITH customer_total_return AS (SELECT sr_customer_sk AS ctr_customer_sk, sr_store_sk AS ctr_store_sk, Sum(sr_return_amt) AS ctr_total_return FROM store_returns, date_dim WHERE sr_returned_date_sk = d_date_sk AND d_year = 2001 GROUP BY sr_customer_sk, sr_store_sk) SELECT c_customer_id FROM customer_total_return ctr1, store, customer WHERE ctr1.ctr_total_return > (SELECT Avg(ctr_total_return) * 1.2 FROM customer_total_return ctr2 WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) AND s_store_sk = ctr1.ctr_store_sk AND s_state = 'TN' AND ctr1.ctr_customer_sk = c_customer_sk ORDER BY c_customer_id LIMIT 100; WITH "customer_total_return" AS ( SELECT "store_returns"."sr_customer_sk" AS "ctr_customer_sk", "store_returns"."sr_store_sk" AS "ctr_store_sk", SUM("store_returns"."sr_return_amt") AS "ctr_total_return" FROM "store_returns" AS "store_returns" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_returns"."sr_returned_date_sk" AND "date_dim"."d_year" = 2001 GROUP BY "store_returns"."sr_customer_sk", "store_returns"."sr_store_sk" ), "_u_0" AS ( SELECT AVG("ctr2"."ctr_total_return") * 1.2 AS "_col_0", "ctr2"."ctr_store_sk" AS "_u_1" FROM "customer_total_return" AS "ctr2" GROUP BY "ctr2"."ctr_store_sk" ) SELECT "customer"."c_customer_id" AS "c_customer_id" FROM "customer_total_return" AS "ctr1" JOIN "store" AS "store" ON "ctr1"."ctr_store_sk" = "store"."s_store_sk" AND "store"."s_state" = 'TN' JOIN "customer" AS "customer" ON "ctr1"."ctr_customer_sk" = "customer"."c_customer_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "ctr1"."ctr_store_sk" WHERE "_u_0"."_col_0" < "ctr1"."ctr_total_return" ORDER BY "c_customer_id" LIMIT 100; -------------------------------------- -- TPC-DS 2 -------------------------------------- # execute: true WITH wscs AS (SELECT sold_date_sk, sales_price FROM (SELECT ws_sold_date_sk sold_date_sk, ws_ext_sales_price sales_price FROM web_sales) UNION ALL (SELECT cs_sold_date_sk sold_date_sk, cs_ext_sales_price sales_price FROM catalog_sales)), wswscs AS (SELECT d_week_seq, Sum(CASE WHEN ( d_day_name = 'Sunday' ) THEN sales_price ELSE NULL END) sun_sales, Sum(CASE WHEN ( d_day_name = 'Monday' ) THEN sales_price ELSE NULL END) mon_sales, Sum(CASE WHEN ( d_day_name = 'Tuesday' ) THEN sales_price ELSE NULL END) tue_sales, Sum(CASE WHEN ( d_day_name = 'Wednesday' ) THEN sales_price ELSE NULL END) wed_sales, Sum(CASE WHEN ( d_day_name = 'Thursday' ) THEN sales_price ELSE NULL END) thu_sales, Sum(CASE WHEN ( d_day_name = 'Friday' ) THEN sales_price ELSE NULL END) fri_sales, Sum(CASE WHEN ( d_day_name = 'Saturday' ) THEN sales_price ELSE NULL END) sat_sales FROM wscs, date_dim WHERE d_date_sk = sold_date_sk GROUP BY d_week_seq) SELECT d_week_seq1, Round(sun_sales1 / sun_sales2, 2) AS "_col_1", Round(mon_sales1 / mon_sales2, 2) AS "_col_2", Round(tue_sales1 / tue_sales2, 2) AS "_col_3", Round(wed_sales1 / wed_sales2, 2) AS "_col_4", Round(thu_sales1 / thu_sales2, 2) AS "_col_5", Round(fri_sales1 / fri_sales2, 2) AS "_col_6", Round(sat_sales1 / sat_sales2, 2) AS "_col_7" FROM (SELECT wswscs.d_week_seq d_week_seq1, sun_sales sun_sales1, mon_sales mon_sales1, tue_sales tue_sales1, wed_sales wed_sales1, thu_sales thu_sales1, fri_sales fri_sales1, sat_sales sat_sales1 FROM wswscs, date_dim WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 1998) y, (SELECT wswscs.d_week_seq d_week_seq2, sun_sales sun_sales2, mon_sales mon_sales2, tue_sales tue_sales2, wed_sales wed_sales2, thu_sales thu_sales2, fri_sales fri_sales2, sat_sales sat_sales2 FROM wswscs, date_dim WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 1998 + 1) z WHERE d_week_seq1 = d_week_seq2 - 53 ORDER BY d_week_seq1; WITH "wscs" AS ( SELECT "web_sales"."ws_sold_date_sk" AS "sold_date_sk", "web_sales"."ws_ext_sales_price" AS "sales_price" FROM "web_sales" AS "web_sales" UNION ALL ( SELECT "catalog_sales"."cs_sold_date_sk" AS "sold_date_sk", "catalog_sales"."cs_ext_sales_price" AS "sales_price" FROM "catalog_sales" AS "catalog_sales" ) ), "wswscs" AS ( SELECT "date_dim"."d_week_seq" AS "d_week_seq", SUM( CASE WHEN "date_dim"."d_day_name" = 'Sunday' THEN "wscs"."sales_price" ELSE NULL END ) AS "sun_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Monday' THEN "wscs"."sales_price" ELSE NULL END ) AS "mon_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Tuesday' THEN "wscs"."sales_price" ELSE NULL END ) AS "tue_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Wednesday' THEN "wscs"."sales_price" ELSE NULL END ) AS "wed_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Thursday' THEN "wscs"."sales_price" ELSE NULL END ) AS "thu_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Friday' THEN "wscs"."sales_price" ELSE NULL END ) AS "fri_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Saturday' THEN "wscs"."sales_price" ELSE NULL END ) AS "sat_sales" FROM "wscs" AS "wscs" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "wscs"."sold_date_sk" GROUP BY "date_dim"."d_week_seq" ), "z" AS ( SELECT "wswscs"."d_week_seq" AS "d_week_seq2", "wswscs"."sun_sales" AS "sun_sales2", "wswscs"."mon_sales" AS "mon_sales2", "wswscs"."tue_sales" AS "tue_sales2", "wswscs"."wed_sales" AS "wed_sales2", "wswscs"."thu_sales" AS "thu_sales2", "wswscs"."fri_sales" AS "fri_sales2", "wswscs"."sat_sales" AS "sat_sales2" FROM "wswscs" AS "wswscs" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_week_seq" = "wswscs"."d_week_seq" AND "date_dim"."d_year" = 1999 ) SELECT "wswscs"."d_week_seq" AS "d_week_seq1", ROUND("wswscs"."sun_sales" / "z"."sun_sales2", 2) AS "_col_1", ROUND("wswscs"."mon_sales" / "z"."mon_sales2", 2) AS "_col_2", ROUND("wswscs"."tue_sales" / "z"."tue_sales2", 2) AS "_col_3", ROUND("wswscs"."wed_sales" / "z"."wed_sales2", 2) AS "_col_4", ROUND("wswscs"."thu_sales" / "z"."thu_sales2", 2) AS "_col_5", ROUND("wswscs"."fri_sales" / "z"."fri_sales2", 2) AS "_col_6", ROUND("wswscs"."sat_sales" / "z"."sat_sales2", 2) AS "_col_7" FROM "wswscs" AS "wswscs" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_week_seq" = "wswscs"."d_week_seq" AND "date_dim"."d_year" = 1998 JOIN "z" AS "z" ON "wswscs"."d_week_seq" = "z"."d_week_seq2" - 53 ORDER BY "d_week_seq1"; -------------------------------------- -- TPC-DS 3 -------------------------------------- # execute: true SELECT dt.d_year, item.i_brand_id brand_id, item.i_brand brand, Sum(ss_ext_discount_amt) sum_agg FROM date_dim dt, store_sales, item WHERE dt.d_date_sk = store_sales.ss_sold_date_sk AND store_sales.ss_item_sk = item.i_item_sk AND item.i_manufact_id = 427 AND dt.d_moy = 11 GROUP BY dt.d_year, item.i_brand, item.i_brand_id ORDER BY dt.d_year, sum_agg DESC, brand_id LIMIT 100; SELECT "dt"."d_year" AS "d_year", "item"."i_brand_id" AS "brand_id", "item"."i_brand" AS "brand", SUM("store_sales"."ss_ext_discount_amt") AS "sum_agg" FROM "date_dim" AS "dt" JOIN "store_sales" AS "store_sales" ON "dt"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" AND "item"."i_manufact_id" = 427 WHERE "dt"."d_moy" = 11 GROUP BY "dt"."d_year", "item"."i_brand", "item"."i_brand_id" ORDER BY "d_year", "sum_agg" DESC, "brand_id" LIMIT 100; -------------------------------------- -- TPC-DS 4 -------------------------------------- # execute: true WITH year_total AS (SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag , c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, Sum(( ( ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt ) + ss_ext_sales_price ) / 2) year_total, 's' sale_type FROM customer, store_sales, date_dim WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag, c_birth_country customer_birth_country , c_login customer_login, c_email_address customer_email_address , d_year dyear , Sum(( ( ( cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt ) + cs_ext_sales_price ) / 2 )) year_total, 'c' sale_type FROM customer, catalog_sales, date_dim WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag, c_birth_country customer_birth_country , c_login customer_login, c_email_address customer_email_address , d_year dyear , Sum(( ( ( ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt ) + ws_ext_sales_price ) / 2 )) year_total, 'w' sale_type FROM customer, web_sales, date_dim WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year) SELECT t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_preferred_cust_flag FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id AND t_s_firstyear.customer_id = t_c_secyear.customer_id AND t_s_firstyear.customer_id = t_c_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_secyear.customer_id AND t_s_firstyear.sale_type = 's' AND t_c_firstyear.sale_type = 'c' AND t_w_firstyear.sale_type = 'w' AND t_s_secyear.sale_type = 's' AND t_c_secyear.sale_type = 'c' AND t_w_secyear.sale_type = 'w' AND t_s_firstyear.dyear = 2001 AND t_s_secyear.dyear = 2001 + 1 AND t_c_firstyear.dyear = 2001 AND t_c_secyear.dyear = 2001 + 1 AND t_w_firstyear.dyear = 2001 AND t_w_secyear.dyear = 2001 + 1 AND t_s_firstyear.year_total > 0 AND t_c_firstyear.year_total > 0 AND t_w_firstyear.year_total > 0 AND CASE WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total ELSE NULL END > CASE WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total ELSE NULL END AND CASE WHEN t_c_firstyear.year_total > 0 THEN t_c_secyear.year_total / t_c_firstyear.year_total ELSE NULL END > CASE WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total ELSE NULL END ORDER BY t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_preferred_cust_flag LIMIT 100; WITH "customer_2" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk", "customer"."c_customer_id" AS "c_customer_id", "customer"."c_first_name" AS "c_first_name", "customer"."c_last_name" AS "c_last_name", "customer"."c_preferred_cust_flag" AS "c_preferred_cust_flag", "customer"."c_birth_country" AS "c_birth_country", "customer"."c_login" AS "c_login", "customer"."c_email_address" AS "c_email_address" FROM "customer" AS "customer" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year" FROM "date_dim" AS "date_dim" ), "year_total" AS ( SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_first_name" AS "customer_first_name", "customer"."c_last_name" AS "customer_last_name", "customer"."c_preferred_cust_flag" AS "customer_preferred_cust_flag", "date_dim"."d_year" AS "dyear", SUM( ( ( "store_sales"."ss_ext_list_price" - "store_sales"."ss_ext_wholesale_cost" - "store_sales"."ss_ext_discount_amt" ) + "store_sales"."ss_ext_sales_price" ) / 2 ) AS "year_total", 's' AS "sale_type" FROM "customer_2" AS "customer" JOIN "store_sales" AS "store_sales" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "customer"."c_customer_id", "customer"."c_first_name", "customer"."c_last_name", "customer"."c_preferred_cust_flag", "customer"."c_birth_country", "customer"."c_login", "customer"."c_email_address", "date_dim"."d_year" UNION ALL SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_first_name" AS "customer_first_name", "customer"."c_last_name" AS "customer_last_name", "customer"."c_preferred_cust_flag" AS "customer_preferred_cust_flag", "date_dim"."d_year" AS "dyear", SUM( ( ( ( "catalog_sales"."cs_ext_list_price" - "catalog_sales"."cs_ext_wholesale_cost" - "catalog_sales"."cs_ext_discount_amt" ) + "catalog_sales"."cs_ext_sales_price" ) / 2 ) ) AS "year_total", 'c' AS "sale_type" FROM "customer_2" AS "customer" JOIN "catalog_sales" AS "catalog_sales" ON "catalog_sales"."cs_bill_customer_sk" = "customer"."c_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" GROUP BY "customer"."c_customer_id", "customer"."c_first_name", "customer"."c_last_name", "customer"."c_preferred_cust_flag", "customer"."c_birth_country", "customer"."c_login", "customer"."c_email_address", "date_dim"."d_year" UNION ALL SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_first_name" AS "customer_first_name", "customer"."c_last_name" AS "customer_last_name", "customer"."c_preferred_cust_flag" AS "customer_preferred_cust_flag", "date_dim"."d_year" AS "dyear", SUM( ( ( ( "web_sales"."ws_ext_list_price" - "web_sales"."ws_ext_wholesale_cost" - "web_sales"."ws_ext_discount_amt" ) + "web_sales"."ws_ext_sales_price" ) / 2 ) ) AS "year_total", 'w' AS "sale_type" FROM "customer_2" AS "customer" JOIN "web_sales" AS "web_sales" ON "customer"."c_customer_sk" = "web_sales"."ws_bill_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "customer"."c_customer_id", "customer"."c_first_name", "customer"."c_last_name", "customer"."c_preferred_cust_flag", "customer"."c_birth_country", "customer"."c_login", "customer"."c_email_address", "date_dim"."d_year" ) SELECT "t_s_secyear"."customer_id" AS "customer_id", "t_s_secyear"."customer_first_name" AS "customer_first_name", "t_s_secyear"."customer_last_name" AS "customer_last_name", "t_s_secyear"."customer_preferred_cust_flag" AS "customer_preferred_cust_flag" FROM "year_total" AS "t_s_firstyear" JOIN "year_total" AS "t_c_firstyear" ON "t_c_firstyear"."customer_id" = "t_s_firstyear"."customer_id" AND "t_c_firstyear"."dyear" = 2001 AND "t_c_firstyear"."sale_type" = 'c' AND "t_c_firstyear"."year_total" > 0 JOIN "year_total" AS "t_s_secyear" ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id" AND "t_s_secyear"."dyear" = 2002 AND "t_s_secyear"."sale_type" = 's' JOIN "year_total" AS "t_w_firstyear" ON "t_s_firstyear"."customer_id" = "t_w_firstyear"."customer_id" AND "t_w_firstyear"."dyear" = 2001 AND "t_w_firstyear"."sale_type" = 'w' AND "t_w_firstyear"."year_total" > 0 JOIN "year_total" AS "t_c_secyear" ON "t_c_secyear"."customer_id" = "t_s_firstyear"."customer_id" AND "t_c_secyear"."dyear" = 2002 AND "t_c_secyear"."sale_type" = 'c' AND CASE WHEN "t_c_firstyear"."year_total" > 0 THEN "t_c_secyear"."year_total" / "t_c_firstyear"."year_total" ELSE NULL END > CASE WHEN "t_s_firstyear"."year_total" > 0 THEN "t_s_secyear"."year_total" / "t_s_firstyear"."year_total" ELSE NULL END JOIN "year_total" AS "t_w_secyear" ON "t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id" AND "t_w_secyear"."dyear" = 2002 AND "t_w_secyear"."sale_type" = 'w' AND CASE WHEN "t_c_firstyear"."year_total" > 0 THEN "t_c_secyear"."year_total" / "t_c_firstyear"."year_total" ELSE NULL END > CASE WHEN "t_w_firstyear"."year_total" > 0 THEN "t_w_secyear"."year_total" / "t_w_firstyear"."year_total" ELSE NULL END WHERE "t_s_firstyear"."dyear" = 2001 AND "t_s_firstyear"."sale_type" = 's' AND "t_s_firstyear"."year_total" > 0 ORDER BY "t_s_secyear"."customer_id", "t_s_secyear"."customer_first_name", "t_s_secyear"."customer_last_name", "t_s_secyear"."customer_preferred_cust_flag" LIMIT 100; -------------------------------------- -- TPC-DS 5 -------------------------------------- WITH ssr AS ( SELECT s_store_id, Sum(sales_price) AS sales, Sum(profit) AS profit, Sum(return_amt) AS returns1, Sum(net_loss) AS profit_loss FROM ( SELECT ss_store_sk AS store_sk, ss_sold_date_sk AS date_sk, ss_ext_sales_price AS sales_price, ss_net_profit AS profit, Cast(0 AS DECIMAL(7,2)) AS return_amt, Cast(0 AS DECIMAL(7,2)) AS net_loss FROM store_sales UNION ALL SELECT sr_store_sk AS store_sk, sr_returned_date_sk AS date_sk, Cast(0 AS DECIMAL(7,2)) AS sales_price, Cast(0 AS DECIMAL(7,2)) AS profit, sr_return_amt AS return_amt, sr_net_loss AS net_loss FROM store_returns ) salesreturns, date_dim, store WHERE date_sk = d_date_sk AND d_date BETWEEN Cast('2002-08-22' AS DATE) AND ( Cast('2002-08-22' AS DATE) + INTERVAL '14' day) AND store_sk = s_store_sk GROUP BY s_store_id) , csr AS ( SELECT cp_catalog_page_id, sum(sales_price) AS sales, sum(profit) AS profit, sum(return_amt) AS returns1, sum(net_loss) AS profit_loss FROM ( SELECT cs_catalog_page_sk AS page_sk, cs_sold_date_sk AS date_sk, cs_ext_sales_price AS sales_price, cs_net_profit AS profit, cast(0 AS decimal(7,2)) AS return_amt, cast(0 AS decimal(7,2)) AS net_loss FROM catalog_sales UNION ALL SELECT cr_catalog_page_sk AS page_sk, cr_returned_date_sk AS date_sk, cast(0 AS decimal(7,2)) AS sales_price, cast(0 AS decimal(7,2)) AS profit, cr_return_amount AS return_amt, cr_net_loss AS net_loss FROM catalog_returns ) salesreturns, date_dim, catalog_page WHERE date_sk = d_date_sk AND d_date BETWEEN cast('2002-08-22' AS date) AND ( cast('2002-08-22' AS date) + INTERVAL '14' day) AND page_sk = cp_catalog_page_sk GROUP BY cp_catalog_page_id) , wsr AS ( SELECT web_site_id, sum(sales_price) AS sales, sum(profit) AS profit, sum(return_amt) AS returns1, sum(net_loss) AS profit_loss FROM ( SELECT ws_web_site_sk AS wsr_web_site_sk, ws_sold_date_sk AS date_sk, ws_ext_sales_price AS sales_price, ws_net_profit AS profit, cast(0 AS decimal(7,2)) AS return_amt, cast(0 AS decimal(7,2)) AS net_loss FROM web_sales UNION ALL SELECT ws_web_site_sk AS wsr_web_site_sk, wr_returned_date_sk AS date_sk, cast(0 AS decimal(7,2)) AS sales_price, cast(0 AS decimal(7,2)) AS profit, wr_return_amt AS return_amt, wr_net_loss AS net_loss FROM web_returns LEFT OUTER JOIN web_sales ON ( wr_item_sk = ws_item_sk AND wr_order_number = ws_order_number) ) salesreturns, date_dim, web_site WHERE date_sk = d_date_sk AND d_date BETWEEN cast('2002-08-22' AS date) AND ( cast('2002-08-22' AS date) + INTERVAL '14' day) AND wsr_web_site_sk = web_site_sk GROUP BY web_site_id) SELECT channel , id , sum(sales) AS sales , sum(returns1) AS returns1 , sum(profit) AS profit FROM ( SELECT 'store channel' AS channel , 'store' || s_store_id AS id , sales , returns1 , (profit - profit_loss) AS profit FROM ssr UNION ALL SELECT 'catalog channel' AS channel , 'catalog_page' || cp_catalog_page_id AS id , sales , returns1 , (profit - profit_loss) AS profit FROM csr UNION ALL SELECT 'web channel' AS channel , 'web_site' || web_site_id AS id , sales , returns1 , (profit - profit_loss) AS profit FROM wsr ) x GROUP BY rollup (channel, id) ORDER BY channel , id LIMIT 100; WITH "salesreturns" AS ( SELECT "store_sales"."ss_store_sk" AS "store_sk", "store_sales"."ss_sold_date_sk" AS "date_sk", "store_sales"."ss_ext_sales_price" AS "sales_price", "store_sales"."ss_net_profit" AS "profit", CAST(0 AS DECIMAL(7, 2)) AS "return_amt", CAST(0 AS DECIMAL(7, 2)) AS "net_loss" FROM "store_sales" AS "store_sales" UNION ALL SELECT "store_returns"."sr_store_sk" AS "store_sk", "store_returns"."sr_returned_date_sk" AS "date_sk", CAST(0 AS DECIMAL(7, 2)) AS "sales_price", CAST(0 AS DECIMAL(7, 2)) AS "profit", "store_returns"."sr_return_amt" AS "return_amt", "store_returns"."sr_net_loss" AS "net_loss" FROM "store_returns" AS "store_returns" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE CAST("date_dim"."d_date" AS DATE) <= CAST('2002-09-05' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2002-08-22' AS DATE) ), "ssr" AS ( SELECT "store"."s_store_id" AS "s_store_id", SUM("salesreturns"."sales_price") AS "sales", SUM("salesreturns"."profit") AS "profit", SUM("salesreturns"."return_amt") AS "returns1", SUM("salesreturns"."net_loss") AS "profit_loss" FROM "salesreturns" AS "salesreturns" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "salesreturns"."date_sk" JOIN "store" AS "store" ON "salesreturns"."store_sk" = "store"."s_store_sk" GROUP BY "store"."s_store_id" ), "salesreturns_2" AS ( SELECT "catalog_sales"."cs_catalog_page_sk" AS "page_sk", "catalog_sales"."cs_sold_date_sk" AS "date_sk", "catalog_sales"."cs_ext_sales_price" AS "sales_price", "catalog_sales"."cs_net_profit" AS "profit", CAST(0 AS DECIMAL(7, 2)) AS "return_amt", CAST(0 AS DECIMAL(7, 2)) AS "net_loss" FROM "catalog_sales" AS "catalog_sales" UNION ALL SELECT "catalog_returns"."cr_catalog_page_sk" AS "page_sk", "catalog_returns"."cr_returned_date_sk" AS "date_sk", CAST(0 AS DECIMAL(7, 2)) AS "sales_price", CAST(0 AS DECIMAL(7, 2)) AS "profit", "catalog_returns"."cr_return_amount" AS "return_amt", "catalog_returns"."cr_net_loss" AS "net_loss" FROM "catalog_returns" AS "catalog_returns" ), "csr" AS ( SELECT "catalog_page"."cp_catalog_page_id" AS "cp_catalog_page_id", SUM("salesreturns"."sales_price") AS "sales", SUM("salesreturns"."profit") AS "profit", SUM("salesreturns"."return_amt") AS "returns1", SUM("salesreturns"."net_loss") AS "profit_loss" FROM "salesreturns_2" AS "salesreturns" JOIN "catalog_page" AS "catalog_page" ON "catalog_page"."cp_catalog_page_sk" = "salesreturns"."page_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "salesreturns"."date_sk" GROUP BY "catalog_page"."cp_catalog_page_id" ), "salesreturns_3" AS ( SELECT "web_sales"."ws_web_site_sk" AS "wsr_web_site_sk", "web_sales"."ws_sold_date_sk" AS "date_sk", "web_sales"."ws_ext_sales_price" AS "sales_price", "web_sales"."ws_net_profit" AS "profit", CAST(0 AS DECIMAL(7, 2)) AS "return_amt", CAST(0 AS DECIMAL(7, 2)) AS "net_loss" FROM "web_sales" AS "web_sales" UNION ALL SELECT "web_sales"."ws_web_site_sk" AS "wsr_web_site_sk", "web_returns"."wr_returned_date_sk" AS "date_sk", CAST(0 AS DECIMAL(7, 2)) AS "sales_price", CAST(0 AS DECIMAL(7, 2)) AS "profit", "web_returns"."wr_return_amt" AS "return_amt", "web_returns"."wr_net_loss" AS "net_loss" FROM "web_returns" AS "web_returns" LEFT JOIN "web_sales" AS "web_sales" ON "web_returns"."wr_item_sk" = "web_sales"."ws_item_sk" AND "web_returns"."wr_order_number" = "web_sales"."ws_order_number" ), "wsr" AS ( SELECT "web_site"."web_site_id" AS "web_site_id", SUM("salesreturns"."sales_price") AS "sales", SUM("salesreturns"."profit") AS "profit", SUM("salesreturns"."return_amt") AS "returns1", SUM("salesreturns"."net_loss") AS "profit_loss" FROM "salesreturns_3" AS "salesreturns" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "salesreturns"."date_sk" JOIN "web_site" AS "web_site" ON "salesreturns"."wsr_web_site_sk" = "web_site"."web_site_sk" GROUP BY "web_site"."web_site_id" ), "x" AS ( SELECT 'store channel' AS "channel", 'store' || "ssr"."s_store_id" AS "id", "ssr"."sales" AS "sales", "ssr"."returns1" AS "returns1", "ssr"."profit" - "ssr"."profit_loss" AS "profit" FROM "ssr" AS "ssr" UNION ALL SELECT 'catalog channel' AS "channel", 'catalog_page' || "csr"."cp_catalog_page_id" AS "id", "csr"."sales" AS "sales", "csr"."returns1" AS "returns1", "csr"."profit" - "csr"."profit_loss" AS "profit" FROM "csr" AS "csr" UNION ALL SELECT 'web channel' AS "channel", 'web_site' || "wsr"."web_site_id" AS "id", "wsr"."sales" AS "sales", "wsr"."returns1" AS "returns1", "wsr"."profit" - "wsr"."profit_loss" AS "profit" FROM "wsr" AS "wsr" ) SELECT "x"."channel" AS "channel", "x"."id" AS "id", SUM("x"."sales") AS "sales", SUM("x"."returns1") AS "returns1", SUM("x"."profit") AS "profit" FROM "x" AS "x" GROUP BY ROLLUP ( "x"."channel", "x"."id" ) ORDER BY "channel", "id" LIMIT 100; -------------------------------------- -- TPC-DS 6 -------------------------------------- # execute: true SELECT a.ca_state state, Count(*) cnt FROM customer_address a, customer c, store_sales s, date_dim d, item i WHERE a.ca_address_sk = c.c_current_addr_sk AND c.c_customer_sk = s.ss_customer_sk AND s.ss_sold_date_sk = d.d_date_sk AND s.ss_item_sk = i.i_item_sk AND d.d_month_seq = (SELECT DISTINCT ( d_month_seq ) FROM date_dim WHERE d_year = 1998 AND d_moy = 7) AND i.i_current_price > 1.2 * (SELECT Avg(j.i_current_price) FROM item j WHERE j.i_category = i.i_category) GROUP BY a.ca_state HAVING Count(*) >= 10 ORDER BY cnt LIMIT 100; WITH "_u_0" AS ( SELECT DISTINCT "date_dim"."d_month_seq" AS "d_month_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 7 AND "date_dim"."d_year" = 1998 ), "_u_1" AS ( SELECT AVG("j"."i_current_price") AS "_col_0", "j"."i_category" AS "_u_2" FROM "item" AS "j" GROUP BY "j"."i_category" ) SELECT "a"."ca_state" AS "state", COUNT(*) AS "cnt" FROM "customer_address" AS "a" JOIN "customer" AS "c" ON "a"."ca_address_sk" = "c"."c_current_addr_sk" JOIN "store_sales" AS "s" ON "c"."c_customer_sk" = "s"."ss_customer_sk" JOIN "date_dim" AS "d" ON "d"."d_date_sk" = "s"."ss_sold_date_sk" JOIN "item" AS "i" ON "i"."i_item_sk" = "s"."ss_item_sk" JOIN "_u_0" AS "_u_0" ON "_u_0"."d_month_seq" = "d"."d_month_seq" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."_u_2" = "i"."i_category" WHERE "i"."i_current_price" > 1.2 * "_u_1"."_col_0" GROUP BY "a"."ca_state" HAVING COUNT(*) >= 10 ORDER BY "cnt" LIMIT 100; -------------------------------------- -- TPC-DS 7 -------------------------------------- # execute: true SELECT i_item_id, Avg(ss_quantity) agg1, Avg(ss_list_price) agg2, Avg(ss_coupon_amt) agg3, Avg(ss_sales_price) agg4 FROM store_sales, customer_demographics, date_dim, item, promotion WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND ss_cdemo_sk = cd_demo_sk AND ss_promo_sk = p_promo_sk AND cd_gender = 'F' AND cd_marital_status = 'W' AND cd_education_status = '2 yr Degree' AND ( p_channel_email = 'N' OR p_channel_event = 'N' ) AND d_year = 1998 GROUP BY i_item_id ORDER BY i_item_id LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", AVG("store_sales"."ss_quantity") AS "agg1", AVG("store_sales"."ss_list_price") AS "agg2", AVG("store_sales"."ss_coupon_amt") AS "agg3", AVG("store_sales"."ss_sales_price") AS "agg4" FROM "store_sales" AS "store_sales" JOIN "customer_demographics" AS "customer_demographics" ON "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = '2 yr Degree' AND "customer_demographics"."cd_gender" = 'F' AND "customer_demographics"."cd_marital_status" = 'W' JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" = 1998 JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "promotion" AS "promotion" ON ( "promotion"."p_channel_email" = 'N' OR "promotion"."p_channel_event" = 'N' ) AND "promotion"."p_promo_sk" = "store_sales"."ss_promo_sk" GROUP BY "item"."i_item_id" ORDER BY "i_item_id" LIMIT 100; -------------------------------------- -- TPC-DS 8 -------------------------------------- SELECT s_store_name, Sum(ss_net_profit) FROM store_sales, date_dim, store, (SELECT ca_zip FROM (SELECT SUBSTRING(ca_zip, 1, 5) ca_zip FROM customer_address WHERE SUBSTRING(ca_zip, 1, 5) IN ( '67436', '26121', '38443', '63157', '68856', '19485', '86425', '26741', '70991', '60899', '63573', '47556', '56193', '93314', '87827', '62017', '85067', '95390', '48091', '10261', '81845', '41790', '42853', '24675', '12840', '60065', '84430', '57451', '24021', '91735', '75335', '71935', '34482', '56943', '70695', '52147', '56251', '28411', '86653', '23005', '22478', '29031', '34398', '15365', '42460', '33337', '59433', '73943', '72477', '74081', '74430', '64605', '39006', '11226', '49057', '97308', '42663', '18187', '19768', '43454', '32147', '76637', '51975', '11181', '45630', '33129', '45995', '64386', '55522', '26697', '20963', '35154', '64587', '49752', '66386', '30586', '59286', '13177', '66646', '84195', '74316', '36853', '32927', '12469', '11904', '36269', '17724', '55346', '12595', '53988', '65439', '28015', '63268', '73590', '29216', '82575', '69267', '13805', '91678', '79460', '94152', '14961', '15419', '48277', '62588', '55493', '28360', '14152', '55225', '18007', '53705', '56573', '80245', '71769', '57348', '36845', '13039', '17270', '22363', '83474', '25294', '43269', '77666', '15488', '99146', '64441', '43338', '38736', '62754', '48556', '86057', '23090', '38114', '66061', '18910', '84385', '23600', '19975', '27883', '65719', '19933', '32085', '49731', '40473', '27190', '46192', '23949', '44738', '12436', '64794', '68741', '15333', '24282', '49085', '31844', '71156', '48441', '17100', '98207', '44982', '20277', '71496', '96299', '37583', '22206', '89174', '30589', '61924', '53079', '10976', '13104', '42794', '54772', '15809', '56434', '39975', '13874', '30753', '77598', '78229', '59478', '12345', '55547', '57422', '42600', '79444', '29074', '29752', '21676', '32096', '43044', '39383', '37296', '36295', '63077', '16572', '31275', '18701', '40197', '48242', '27219', '49865', '84175', '30446', '25165', '13807', '72142', '70499', '70464', '71429', '18111', '70857', '29545', '36425', '52706', '36194', '42963', '75068', '47921', '74763', '90990', '89456', '62073', '88397', '73963', '75885', '62657', '12530', '81146', '57434', '25099', '41429', '98441', '48713', '52552', '31667', '14072', '13903', '44709', '85429', '58017', '38295', '44875', '73541', '30091', '12707', '23762', '62258', '33247', '78722', '77431', '14510', '35656', '72428', '92082', '35267', '43759', '24354', '90952', '11512', '21242', '22579', '56114', '32339', '52282', '41791', '24484', '95020', '28408', '99710', '11899', '43344', '72915', '27644', '62708', '74479', '17177', '32619', '12351', '91339', '31169', '57081', '53522', '16712', '34419', '71779', '44187', '46206', '96099', '61910', '53664', '12295', '31837', '33096', '10813', '63048', '31732', '79118', '73084', '72783', '84952', '46965', '77956', '39815', '32311', '75329', '48156', '30826', '49661', '13736', '92076', '74865', '88149', '92397', '52777', '68453', '32012', '21222', '52721', '24626', '18210', '42177', '91791', '75251', '82075', '44372', '45542', '20609', '60115', '17362', '22750', '90434', '31852', '54071', '33762', '14705', '40718', '56433', '30996', '40657', '49056', '23585', '66455', '41021', '74736', '72151', '37007', '21729', '60177', '84558', '59027', '93855', '60022', '86443', '19541', '86886', '30532', '39062', '48532', '34713', '52077', '22564', '64638', '15273', '31677', '36138', '62367', '60261', '80213', '42818', '25113', '72378', '69802', '69096', '55443', '28820', '13848', '78258', '37490', '30556', '77380', '28447', '44550', '26791', '70609', '82182', '33306', '43224', '22322', '86959', '68519', '14308', '46501', '81131', '34056', '61991', '19896', '87804', '65774', '92564' ) INTERSECT SELECT ca_zip FROM (SELECT SUBSTRING(ca_zip, 1, 5) ca_zip, Count(*) cnt FROM customer_address, customer WHERE ca_address_sk = c_current_addr_sk AND c_preferred_cust_flag = 'Y' GROUP BY ca_zip HAVING Count(*) > 10)A1)A2) V1 WHERE ss_store_sk = s_store_sk AND ss_sold_date_sk = d_date_sk AND d_qoy = 2 AND d_year = 2000 AND ( SUBSTRING(s_zip, 1, 2) = SUBSTRING(V1.ca_zip, 1, 2) ) GROUP BY s_store_name ORDER BY s_store_name LIMIT 100; WITH "a1" AS ( SELECT SUBSTRING("customer_address"."ca_zip", 1, 5) AS "ca_zip" FROM "customer_address" AS "customer_address" JOIN "customer" AS "customer" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" AND "customer"."c_preferred_cust_flag" = 'Y' GROUP BY "customer_address"."ca_zip" HAVING COUNT(*) > 10 ), "a2" AS ( SELECT SUBSTRING("customer_address"."ca_zip", 1, 5) AS "ca_zip" FROM "customer_address" AS "customer_address" WHERE SUBSTRING("customer_address"."ca_zip", 1, 5) IN ( '67436', '26121', '38443', '63157', '68856', '19485', '86425', '26741', '70991', '60899', '63573', '47556', '56193', '93314', '87827', '62017', '85067', '95390', '48091', '10261', '81845', '41790', '42853', '24675', '12840', '60065', '84430', '57451', '24021', '91735', '75335', '71935', '34482', '56943', '70695', '52147', '56251', '28411', '86653', '23005', '22478', '29031', '34398', '15365', '42460', '33337', '59433', '73943', '72477', '74081', '74430', '64605', '39006', '11226', '49057', '97308', '42663', '18187', '19768', '43454', '32147', '76637', '51975', '11181', '45630', '33129', '45995', '64386', '55522', '26697', '20963', '35154', '64587', '49752', '66386', '30586', '59286', '13177', '66646', '84195', '74316', '36853', '32927', '12469', '11904', '36269', '17724', '55346', '12595', '53988', '65439', '28015', '63268', '73590', '29216', '82575', '69267', '13805', '91678', '79460', '94152', '14961', '15419', '48277', '62588', '55493', '28360', '14152', '55225', '18007', '53705', '56573', '80245', '71769', '57348', '36845', '13039', '17270', '22363', '83474', '25294', '43269', '77666', '15488', '99146', '64441', '43338', '38736', '62754', '48556', '86057', '23090', '38114', '66061', '18910', '84385', '23600', '19975', '27883', '65719', '19933', '32085', '49731', '40473', '27190', '46192', '23949', '44738', '12436', '64794', '68741', '15333', '24282', '49085', '31844', '71156', '48441', '17100', '98207', '44982', '20277', '71496', '96299', '37583', '22206', '89174', '30589', '61924', '53079', '10976', '13104', '42794', '54772', '15809', '56434', '39975', '13874', '30753', '77598', '78229', '59478', '12345', '55547', '57422', '42600', '79444', '29074', '29752', '21676', '32096', '43044', '39383', '37296', '36295', '63077', '16572', '31275', '18701', '40197', '48242', '27219', '49865', '84175', '30446', '25165', '13807', '72142', '70499', '70464', '71429', '18111', '70857', '29545', '36425', '52706', '36194', '42963', '75068', '47921', '74763', '90990', '89456', '62073', '88397', '73963', '75885', '62657', '12530', '81146', '57434', '25099', '41429', '98441', '48713', '52552', '31667', '14072', '13903', '44709', '85429', '58017', '38295', '44875', '73541', '30091', '12707', '23762', '62258', '33247', '78722', '77431', '14510', '35656', '72428', '92082', '35267', '43759', '24354', '90952', '11512', '21242', '22579', '56114', '32339', '52282', '41791', '24484', '95020', '28408', '99710', '11899', '43344', '72915', '27644', '62708', '74479', '17177', '32619', '12351', '91339', '31169', '57081', '53522', '16712', '34419', '71779', '44187', '46206', '96099', '61910', '53664', '12295', '31837', '33096', '10813', '63048', '31732', '79118', '73084', '72783', '84952', '46965', '77956', '39815', '32311', '75329', '48156', '30826', '49661', '13736', '92076', '74865', '88149', '92397', '52777', '68453', '32012', '21222', '52721', '24626', '18210', '42177', '91791', '75251', '82075', '44372', '45542', '20609', '60115', '17362', '22750', '90434', '31852', '54071', '33762', '14705', '40718', '56433', '30996', '40657', '49056', '23585', '66455', '41021', '74736', '72151', '37007', '21729', '60177', '84558', '59027', '93855', '60022', '86443', '19541', '86886', '30532', '39062', '48532', '34713', '52077', '22564', '64638', '15273', '31677', '36138', '62367', '60261', '80213', '42818', '25113', '72378', '69802', '69096', '55443', '28820', '13848', '78258', '37490', '30556', '77380', '28447', '44550', '26791', '70609', '82182', '33306', '43224', '22322', '86959', '68519', '14308', '46501', '81131', '34056', '61991', '19896', '87804', '65774', '92564' ) INTERSECT SELECT "a1"."ca_zip" AS "ca_zip" FROM "a1" AS "a1" ) SELECT "store"."s_store_name" AS "s_store_name", SUM("store_sales"."ss_net_profit") AS "_col_1" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_qoy" = 2 AND "date_dim"."d_year" = 2000 JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "a2" AS "a2" ON SUBSTRING("a2"."ca_zip", 1, 2) = SUBSTRING("store"."s_zip", 1, 2) GROUP BY "store"."s_store_name" ORDER BY "s_store_name" LIMIT 100; -------------------------------------- -- TPC-DS 9 -------------------------------------- # execute: true SELECT CASE WHEN (SELECT Count(*) FROM store_sales WHERE ss_quantity BETWEEN 1 AND 20) > 3672 THEN (SELECT Avg(ss_ext_list_price) FROM store_sales WHERE ss_quantity BETWEEN 1 AND 20) ELSE (SELECT Avg(ss_net_profit) FROM store_sales WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, CASE WHEN (SELECT Count(*) FROM store_sales WHERE ss_quantity BETWEEN 21 AND 40) > 3392 THEN (SELECT Avg(ss_ext_list_price) FROM store_sales WHERE ss_quantity BETWEEN 21 AND 40) ELSE (SELECT Avg(ss_net_profit) FROM store_sales WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, CASE WHEN (SELECT Count(*) FROM store_sales WHERE ss_quantity BETWEEN 41 AND 60) > 32784 THEN (SELECT Avg(ss_ext_list_price) FROM store_sales WHERE ss_quantity BETWEEN 41 AND 60) ELSE (SELECT Avg(ss_net_profit) FROM store_sales WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, CASE WHEN (SELECT Count(*) FROM store_sales WHERE ss_quantity BETWEEN 61 AND 80) > 26032 THEN (SELECT Avg(ss_ext_list_price) FROM store_sales WHERE ss_quantity BETWEEN 61 AND 80) ELSE (SELECT Avg(ss_net_profit) FROM store_sales WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, CASE WHEN (SELECT Count(*) FROM store_sales WHERE ss_quantity BETWEEN 81 AND 100) > 23982 THEN (SELECT Avg(ss_ext_list_price) FROM store_sales WHERE ss_quantity BETWEEN 81 AND 100) ELSE (SELECT Avg(ss_net_profit) FROM store_sales WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 FROM reason WHERE r_reason_sk = 1; WITH "_u_0" AS ( SELECT COUNT(*) AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 20 AND "store_sales"."ss_quantity" >= 1 ), "_u_1" AS ( SELECT AVG("store_sales"."ss_ext_list_price") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 20 AND "store_sales"."ss_quantity" >= 1 ), "_u_10" AS ( SELECT AVG("store_sales"."ss_ext_list_price") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 80 AND "store_sales"."ss_quantity" >= 61 ), "_u_11" AS ( SELECT AVG("store_sales"."ss_net_profit") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 80 AND "store_sales"."ss_quantity" >= 61 ), "_u_12" AS ( SELECT COUNT(*) AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 100 AND "store_sales"."ss_quantity" >= 81 ), "_u_13" AS ( SELECT AVG("store_sales"."ss_ext_list_price") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 100 AND "store_sales"."ss_quantity" >= 81 ), "_u_14" AS ( SELECT AVG("store_sales"."ss_net_profit") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 100 AND "store_sales"."ss_quantity" >= 81 ), "_u_2" AS ( SELECT AVG("store_sales"."ss_net_profit") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 20 AND "store_sales"."ss_quantity" >= 1 ), "_u_3" AS ( SELECT COUNT(*) AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 40 AND "store_sales"."ss_quantity" >= 21 ), "_u_4" AS ( SELECT AVG("store_sales"."ss_ext_list_price") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 40 AND "store_sales"."ss_quantity" >= 21 ), "_u_5" AS ( SELECT AVG("store_sales"."ss_net_profit") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 40 AND "store_sales"."ss_quantity" >= 21 ), "_u_6" AS ( SELECT COUNT(*) AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 60 AND "store_sales"."ss_quantity" >= 41 ), "_u_7" AS ( SELECT AVG("store_sales"."ss_ext_list_price") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 60 AND "store_sales"."ss_quantity" >= 41 ), "_u_8" AS ( SELECT AVG("store_sales"."ss_net_profit") AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 60 AND "store_sales"."ss_quantity" >= 41 ), "_u_9" AS ( SELECT COUNT(*) AS "_col_0" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_quantity" <= 80 AND "store_sales"."ss_quantity" >= 61 ) SELECT CASE WHEN "_u_0"."_col_0" > 3672 THEN "_u_1"."_col_0" ELSE "_u_2"."_col_0" END AS "bucket1", CASE WHEN "_u_3"."_col_0" > 3392 THEN "_u_4"."_col_0" ELSE "_u_5"."_col_0" END AS "bucket2", CASE WHEN "_u_6"."_col_0" > 32784 THEN "_u_7"."_col_0" ELSE "_u_8"."_col_0" END AS "bucket3", CASE WHEN "_u_9"."_col_0" > 26032 THEN "_u_10"."_col_0" ELSE "_u_11"."_col_0" END AS "bucket4", CASE WHEN "_u_12"."_col_0" > 23982 THEN "_u_13"."_col_0" ELSE "_u_14"."_col_0" END AS "bucket5" FROM "reason" AS "reason" CROSS JOIN "_u_0" AS "_u_0" CROSS JOIN "_u_1" AS "_u_1" CROSS JOIN "_u_10" AS "_u_10" CROSS JOIN "_u_11" AS "_u_11" CROSS JOIN "_u_12" AS "_u_12" CROSS JOIN "_u_13" AS "_u_13" CROSS JOIN "_u_14" AS "_u_14" CROSS JOIN "_u_2" AS "_u_2" CROSS JOIN "_u_3" AS "_u_3" CROSS JOIN "_u_4" AS "_u_4" CROSS JOIN "_u_5" AS "_u_5" CROSS JOIN "_u_6" AS "_u_6" CROSS JOIN "_u_7" AS "_u_7" CROSS JOIN "_u_8" AS "_u_8" CROSS JOIN "_u_9" AS "_u_9" WHERE "reason"."r_reason_sk" = 1; -------------------------------------- -- TPC-DS 10 -------------------------------------- # execute: true SELECT cd_gender, cd_marital_status, cd_education_status, Count(*) cnt1, cd_purchase_estimate, Count(*) cnt2, cd_credit_rating, Count(*) cnt3, cd_dep_count, Count(*) cnt4, cd_dep_employed_count, Count(*) cnt5, cd_dep_college_count, Count(*) cnt6 FROM customer c, customer_address ca, customer_demographics WHERE c.c_current_addr_sk = ca.ca_address_sk AND ca_county IN ( 'Lycoming County', 'Sheridan County', 'Kandiyohi County', 'Pike County', 'Greene County' ) AND cd_demo_sk = c.c_current_cdemo_sk AND EXISTS (SELECT * FROM store_sales, date_dim WHERE c.c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2002 AND d_moy BETWEEN 4 AND 4 + 3) AND ( EXISTS (SELECT * FROM web_sales, date_dim WHERE c.c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year = 2002 AND d_moy BETWEEN 4 AND 4 + 3) OR EXISTS (SELECT * FROM catalog_sales, date_dim WHERE c.c_customer_sk = cs_ship_customer_sk AND cs_sold_date_sk = d_date_sk AND d_year = 2002 AND d_moy BETWEEN 4 AND 4 + 3) ) GROUP BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count ORDER BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date_id" AS "d_date_id", "date_dim"."d_date" AS "d_date", "date_dim"."d_month_seq" AS "d_month_seq", "date_dim"."d_week_seq" AS "d_week_seq", "date_dim"."d_quarter_seq" AS "d_quarter_seq", "date_dim"."d_year" AS "d_year", "date_dim"."d_dow" AS "d_dow", "date_dim"."d_moy" AS "d_moy", "date_dim"."d_dom" AS "d_dom", "date_dim"."d_qoy" AS "d_qoy", "date_dim"."d_fy_year" AS "d_fy_year", "date_dim"."d_fy_quarter_seq" AS "d_fy_quarter_seq", "date_dim"."d_fy_week_seq" AS "d_fy_week_seq", "date_dim"."d_day_name" AS "d_day_name", "date_dim"."d_quarter_name" AS "d_quarter_name", "date_dim"."d_holiday" AS "d_holiday", "date_dim"."d_weekend" AS "d_weekend", "date_dim"."d_following_holiday" AS "d_following_holiday", "date_dim"."d_first_dom" AS "d_first_dom", "date_dim"."d_last_dom" AS "d_last_dom", "date_dim"."d_same_day_ly" AS "d_same_day_ly", "date_dim"."d_same_day_lq" AS "d_same_day_lq", "date_dim"."d_current_day" AS "d_current_day", "date_dim"."d_current_week" AS "d_current_week", "date_dim"."d_current_month" AS "d_current_month", "date_dim"."d_current_quarter" AS "d_current_quarter", "date_dim"."d_current_year" AS "d_current_year" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" <= 7 AND "date_dim"."d_moy" >= 4 AND "date_dim"."d_year" = 2002 ), "_u_0" AS ( SELECT "store_sales"."ss_customer_sk" AS "_u_1" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "store_sales"."ss_customer_sk" ), "_u_2" AS ( SELECT "web_sales"."ws_bill_customer_sk" AS "_u_3" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "web_sales"."ws_bill_customer_sk" ), "_u_4" AS ( SELECT "catalog_sales"."cs_ship_customer_sk" AS "_u_5" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" GROUP BY "catalog_sales"."cs_ship_customer_sk" ) SELECT "customer_demographics"."cd_gender" AS "cd_gender", "customer_demographics"."cd_marital_status" AS "cd_marital_status", "customer_demographics"."cd_education_status" AS "cd_education_status", COUNT(*) AS "cnt1", "customer_demographics"."cd_purchase_estimate" AS "cd_purchase_estimate", COUNT(*) AS "cnt2", "customer_demographics"."cd_credit_rating" AS "cd_credit_rating", COUNT(*) AS "cnt3", "customer_demographics"."cd_dep_count" AS "cd_dep_count", COUNT(*) AS "cnt4", "customer_demographics"."cd_dep_employed_count" AS "cd_dep_employed_count", COUNT(*) AS "cnt5", "customer_demographics"."cd_dep_college_count" AS "cd_dep_college_count", COUNT(*) AS "cnt6" FROM "customer" AS "c" JOIN "customer_address" AS "ca" ON "c"."c_current_addr_sk" = "ca"."ca_address_sk" AND "ca"."ca_county" IN ( 'Lycoming County', 'Sheridan County', 'Kandiyohi County', 'Pike County', 'Greene County' ) JOIN "customer_demographics" AS "customer_demographics" ON "c"."c_current_cdemo_sk" = "customer_demographics"."cd_demo_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "c"."c_customer_sk" LEFT JOIN "_u_2" AS "_u_2" ON "_u_2"."_u_3" = "c"."c_customer_sk" LEFT JOIN "_u_4" AS "_u_4" ON "_u_4"."_u_5" = "c"."c_customer_sk" WHERE NOT "_u_0"."_u_1" IS NULL AND ( NOT "_u_2"."_u_3" IS NULL OR NOT "_u_4"."_u_5" IS NULL ) GROUP BY "customer_demographics"."cd_gender", "customer_demographics"."cd_marital_status", "customer_demographics"."cd_education_status", "customer_demographics"."cd_purchase_estimate", "customer_demographics"."cd_credit_rating", "customer_demographics"."cd_dep_count", "customer_demographics"."cd_dep_employed_count", "customer_demographics"."cd_dep_college_count" ORDER BY "cd_gender", "cd_marital_status", "cd_education_status", "cd_purchase_estimate", "cd_credit_rating", "cd_dep_count", "cd_dep_employed_count", "cd_dep_college_count" LIMIT 100; -------------------------------------- -- TPC-DS 11 -------------------------------------- # execute: true WITH year_total AS (SELECT c_customer_id customer_id, c_first_name customer_first_name , c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag , c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, Sum(ss_ext_list_price - ss_ext_discount_amt) year_total, 's' sale_type FROM customer, store_sales, date_dim WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name , c_last_name customer_last_name, c_preferred_cust_flag customer_preferred_cust_flag , c_birth_country customer_birth_country, c_login customer_login, c_email_address customer_email_address, d_year dyear, Sum(ws_ext_list_price - ws_ext_discount_amt) year_total, 'w' sale_type FROM customer, web_sales, date_dim WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk GROUP BY c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, c_login, c_email_address, d_year) SELECT t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_birth_country FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_w_firstyear, year_total t_w_secyear WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_secyear.customer_id AND t_s_firstyear.customer_id = t_w_firstyear.customer_id AND t_s_firstyear.sale_type = 's' AND t_w_firstyear.sale_type = 'w' AND t_s_secyear.sale_type = 's' AND t_w_secyear.sale_type = 'w' AND t_s_firstyear.dyear = 2001 AND t_s_secyear.dyear = 2001 + 1 AND t_w_firstyear.dyear = 2001 AND t_w_secyear.dyear = 2001 + 1 AND t_s_firstyear.year_total > 0 AND t_w_firstyear.year_total > 0 AND CASE WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total ELSE 0.0 END > CASE WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total ELSE 0.0 END ORDER BY t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name, t_s_secyear.customer_birth_country LIMIT 100; WITH "customer_2" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk", "customer"."c_customer_id" AS "c_customer_id", "customer"."c_first_name" AS "c_first_name", "customer"."c_last_name" AS "c_last_name", "customer"."c_preferred_cust_flag" AS "c_preferred_cust_flag", "customer"."c_birth_country" AS "c_birth_country", "customer"."c_login" AS "c_login", "customer"."c_email_address" AS "c_email_address" FROM "customer" AS "customer" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year" FROM "date_dim" AS "date_dim" ), "year_total" AS ( SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_first_name" AS "customer_first_name", "customer"."c_last_name" AS "customer_last_name", "customer"."c_birth_country" AS "customer_birth_country", "date_dim"."d_year" AS "dyear", SUM("store_sales"."ss_ext_list_price" - "store_sales"."ss_ext_discount_amt") AS "year_total", 's' AS "sale_type" FROM "customer_2" AS "customer" JOIN "store_sales" AS "store_sales" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "customer"."c_customer_id", "customer"."c_first_name", "customer"."c_last_name", "customer"."c_preferred_cust_flag", "customer"."c_birth_country", "customer"."c_login", "customer"."c_email_address", "date_dim"."d_year" UNION ALL SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_first_name" AS "customer_first_name", "customer"."c_last_name" AS "customer_last_name", "customer"."c_birth_country" AS "customer_birth_country", "date_dim"."d_year" AS "dyear", SUM("web_sales"."ws_ext_list_price" - "web_sales"."ws_ext_discount_amt") AS "year_total", 'w' AS "sale_type" FROM "customer_2" AS "customer" JOIN "web_sales" AS "web_sales" ON "customer"."c_customer_sk" = "web_sales"."ws_bill_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "customer"."c_customer_id", "customer"."c_first_name", "customer"."c_last_name", "customer"."c_preferred_cust_flag", "customer"."c_birth_country", "customer"."c_login", "customer"."c_email_address", "date_dim"."d_year" ) SELECT "t_s_secyear"."customer_id" AS "customer_id", "t_s_secyear"."customer_first_name" AS "customer_first_name", "t_s_secyear"."customer_last_name" AS "customer_last_name", "t_s_secyear"."customer_birth_country" AS "customer_birth_country" FROM "year_total" AS "t_s_firstyear" JOIN "year_total" AS "t_s_secyear" ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id" AND "t_s_secyear"."dyear" = 2002 AND "t_s_secyear"."sale_type" = 's' JOIN "year_total" AS "t_w_firstyear" ON "t_s_firstyear"."customer_id" = "t_w_firstyear"."customer_id" AND "t_w_firstyear"."dyear" = 2001 AND "t_w_firstyear"."sale_type" = 'w' AND "t_w_firstyear"."year_total" > 0 JOIN "year_total" AS "t_w_secyear" ON "t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id" AND "t_w_secyear"."dyear" = 2002 AND "t_w_secyear"."sale_type" = 'w' AND CASE WHEN "t_s_firstyear"."year_total" > 0 THEN "t_s_secyear"."year_total" / "t_s_firstyear"."year_total" ELSE 0.0 END < CASE WHEN "t_w_firstyear"."year_total" > 0 THEN "t_w_secyear"."year_total" / "t_w_firstyear"."year_total" ELSE 0.0 END WHERE "t_s_firstyear"."dyear" = 2001 AND "t_s_firstyear"."sale_type" = 's' AND "t_s_firstyear"."year_total" > 0 ORDER BY "t_s_secyear"."customer_id", "t_s_secyear"."customer_first_name", "t_s_secyear"."customer_last_name", "t_s_secyear"."customer_birth_country" LIMIT 100; -------------------------------------- -- TPC-DS 12 -------------------------------------- SELECT i_item_id , i_item_desc , i_category , i_class , i_current_price , Sum(ws_ext_sales_price) AS itemrevenue , Sum(ws_ext_sales_price)*100/Sum(Sum(ws_ext_sales_price)) OVER (partition BY i_class) AS revenueratio FROM web_sales , item , date_dim WHERE ws_item_sk = i_item_sk AND i_category IN ('Home', 'Men', 'Women') AND ws_sold_date_sk = d_date_sk AND d_date BETWEEN Cast('2000-05-11' AS DATE) AND ( Cast('2000-05-11' AS DATE) + INTERVAL '30' day) GROUP BY i_item_id , i_item_desc , i_category , i_class , i_current_price ORDER BY i_category , i_class , i_item_id , i_item_desc , revenueratio LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "item"."i_category" AS "i_category", "item"."i_class" AS "i_class", "item"."i_current_price" AS "i_current_price", SUM("web_sales"."ws_ext_sales_price") AS "itemrevenue", SUM("web_sales"."ws_ext_sales_price") * 100 / SUM(SUM("web_sales"."ws_ext_sales_price")) OVER (PARTITION BY "item"."i_class") AS "revenueratio" FROM "web_sales" AS "web_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-10' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-05-11' AS DATE) JOIN "item" AS "item" ON "item"."i_category" IN ('Home', 'Men', 'Women') AND "item"."i_item_sk" = "web_sales"."ws_item_sk" GROUP BY "item"."i_item_id", "item"."i_item_desc", "item"."i_category", "item"."i_class", "item"."i_current_price" ORDER BY "i_category", "i_class", "i_item_id", "i_item_desc", "revenueratio" LIMIT 100; -------------------------------------- -- TPC-DS 13 -------------------------------------- SELECT Avg(ss_quantity), Avg(ss_ext_sales_price), Avg(ss_ext_wholesale_cost), Sum(ss_ext_wholesale_cost) FROM store_sales, store, customer_demographics, household_demographics, customer_address, date_dim WHERE s_store_sk = ss_store_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2001 AND ( ( ss_hdemo_sk = hd_demo_sk AND cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'U' AND cd_education_status = 'Advanced Degree' AND ss_sales_price BETWEEN 100.00 AND 150.00 AND hd_dep_count = 3 ) OR ( ss_hdemo_sk = hd_demo_sk AND cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'M' AND cd_education_status = 'Primary' AND ss_sales_price BETWEEN 50.00 AND 100.00 AND hd_dep_count = 1 ) OR ( ss_hdemo_sk = hd_demo_sk AND cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'D' AND cd_education_status = 'Secondary' AND ss_sales_price BETWEEN 150.00 AND 200.00 AND hd_dep_count = 1 ) ) AND ( ( ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ( 'AZ', 'NE', 'IA' ) AND ss_net_profit BETWEEN 100 AND 200 ) OR ( ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ( 'MS', 'CA', 'NV' ) AND ss_net_profit BETWEEN 150 AND 300 ) OR ( ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ( 'GA', 'TX', 'NJ' ) AND ss_net_profit BETWEEN 50 AND 250 ) ); SELECT AVG("store_sales"."ss_quantity") AS "_col_0", AVG("store_sales"."ss_ext_sales_price") AS "_col_1", AVG("store_sales"."ss_ext_wholesale_cost") AS "_col_2", SUM("store_sales"."ss_ext_wholesale_cost") AS "_col_3" FROM "store_sales" AS "store_sales" CROSS JOIN "household_demographics" AS "household_demographics" JOIN "customer_address" AS "customer_address" ON ( "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" AND "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('AZ', 'NE', 'IA') AND "store_sales"."ss_net_profit" <= 200 AND "store_sales"."ss_net_profit" >= 100 ) OR ( "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" AND "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('GA', 'TX', 'NJ') AND "store_sales"."ss_net_profit" <= 250 AND "store_sales"."ss_net_profit" >= 50 ) OR ( "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" AND "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('MS', 'CA', 'NV') AND "store_sales"."ss_net_profit" <= 300 AND "store_sales"."ss_net_profit" >= 150 ) JOIN "customer_demographics" AS "customer_demographics" ON ( "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = 'Advanced Degree' AND "customer_demographics"."cd_marital_status" = 'U' AND "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND "household_demographics"."hd_dep_count" = 3 AND "store_sales"."ss_sales_price" <= 150.00 AND "store_sales"."ss_sales_price" >= 100.00 ) OR ( "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = 'Primary' AND "customer_demographics"."cd_marital_status" = 'M' AND "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND "household_demographics"."hd_dep_count" = 1 AND "store_sales"."ss_sales_price" <= 100.00 AND "store_sales"."ss_sales_price" >= 50.00 ) OR ( "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = 'Secondary' AND "customer_demographics"."cd_marital_status" = 'D' AND "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND "household_demographics"."hd_dep_count" = 1 AND "store_sales"."ss_sales_price" <= 200.00 AND "store_sales"."ss_sales_price" >= 150.00 ) JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" = 2001 JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk"; -------------------------------------- -- TPC-DS 14 -------------------------------------- WITH cross_items AS (SELECT i_item_sk ss_item_sk FROM item, (SELECT iss.i_brand_id brand_id, iss.i_class_id class_id, iss.i_category_id category_id FROM store_sales, item iss, date_dim d1 WHERE ss_item_sk = iss.i_item_sk AND ss_sold_date_sk = d1.d_date_sk AND d1.d_year BETWEEN 1999 AND 1999 + 2 INTERSECT SELECT ics.i_brand_id, ics.i_class_id, ics.i_category_id FROM catalog_sales, item ics, date_dim d2 WHERE cs_item_sk = ics.i_item_sk AND cs_sold_date_sk = d2.d_date_sk AND d2.d_year BETWEEN 1999 AND 1999 + 2 INTERSECT SELECT iws.i_brand_id, iws.i_class_id, iws.i_category_id FROM web_sales, item iws, date_dim d3 WHERE ws_item_sk = iws.i_item_sk AND ws_sold_date_sk = d3.d_date_sk AND d3.d_year BETWEEN 1999 AND 1999 + 2) WHERE i_brand_id = brand_id AND i_class_id = class_id AND i_category_id = category_id), avg_sales AS (SELECT Avg(quantity * list_price) average_sales FROM (SELECT ss_quantity quantity, ss_list_price list_price FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 UNION ALL SELECT cs_quantity quantity, cs_list_price list_price FROM catalog_sales, date_dim WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 UNION ALL SELECT ws_quantity quantity, ws_list_price list_price FROM web_sales, date_dim WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) SELECT channel, i_brand_id, i_class_id, i_category_id, Sum(sales), Sum(number_sales) FROM (SELECT 'store' channel, i_brand_id, i_class_id, i_category_id, Sum(ss_quantity * ss_list_price) sales, Count(*) number_sales FROM store_sales, item, date_dim WHERE ss_item_sk IN (SELECT ss_item_sk FROM cross_items) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1999 + 2 AND d_moy = 11 GROUP BY i_brand_id, i_class_id, i_category_id HAVING Sum(ss_quantity * ss_list_price) > (SELECT average_sales FROM avg_sales) UNION ALL SELECT 'catalog' channel, i_brand_id, i_class_id, i_category_id, Sum(cs_quantity * cs_list_price) sales, Count(*) number_sales FROM catalog_sales, item, date_dim WHERE cs_item_sk IN (SELECT ss_item_sk FROM cross_items) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 1999 + 2 AND d_moy = 11 GROUP BY i_brand_id, i_class_id, i_category_id HAVING Sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales) UNION ALL SELECT 'web' channel, i_brand_id, i_class_id, i_category_id, Sum(ws_quantity * ws_list_price) sales, Count(*) number_sales FROM web_sales, item, date_dim WHERE ws_item_sk IN (SELECT ss_item_sk FROM cross_items) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 1999 + 2 AND d_moy = 11 GROUP BY i_brand_id, i_class_id, i_category_id HAVING Sum(ws_quantity * ws_list_price) > (SELECT average_sales FROM avg_sales)) y GROUP BY rollup ( channel, i_brand_id, i_class_id, i_category_id ) ORDER BY channel, i_brand_id, i_class_id, i_category_id LIMIT 100; WITH "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id" FROM "item" AS "item" ), "_0" AS ( SELECT "iss"."i_brand_id" AS "brand_id", "iss"."i_class_id" AS "class_id", "iss"."i_category_id" AS "category_id" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "d1"."d_year" <= 2001 AND "d1"."d_year" >= 1999 JOIN "item" AS "iss" ON "iss"."i_item_sk" = "store_sales"."ss_item_sk" INTERSECT SELECT "ics"."i_brand_id" AS "i_brand_id", "ics"."i_class_id" AS "i_class_id", "ics"."i_category_id" AS "i_category_id" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim" AS "d2" ON "catalog_sales"."cs_sold_date_sk" = "d2"."d_date_sk" AND "d2"."d_year" <= 2001 AND "d2"."d_year" >= 1999 JOIN "item" AS "ics" ON "catalog_sales"."cs_item_sk" = "ics"."i_item_sk" INTERSECT SELECT "iws"."i_brand_id" AS "i_brand_id", "iws"."i_class_id" AS "i_class_id", "iws"."i_category_id" AS "i_category_id" FROM "web_sales" AS "web_sales" JOIN "date_dim" AS "d3" ON "d3"."d_date_sk" = "web_sales"."ws_sold_date_sk" AND "d3"."d_year" <= 2001 AND "d3"."d_year" >= 1999 JOIN "item" AS "iws" ON "iws"."i_item_sk" = "web_sales"."ws_item_sk" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_year" <= 2001 AND "date_dim"."d_year" >= 1999 ), "x" AS ( SELECT "store_sales"."ss_quantity" AS "quantity", "store_sales"."ss_list_price" AS "list_price" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" UNION ALL SELECT "catalog_sales"."cs_quantity" AS "quantity", "catalog_sales"."cs_list_price" AS "list_price" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" UNION ALL SELECT "web_sales"."ws_quantity" AS "quantity", "web_sales"."ws_list_price" AS "list_price" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" ), "avg_sales" AS ( SELECT AVG("x"."quantity" * "x"."list_price") AS "average_sales" FROM "x" AS "x" ), "date_dim_3" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 11 AND "date_dim"."d_year" = 2001 ), "_u_0" AS ( SELECT "item"."i_item_sk" AS "ss_item_sk" FROM "item_2" AS "item" JOIN "_0" AS "_0" ON "_0"."brand_id" = "item"."i_brand_id" AND "_0"."category_id" = "item"."i_category_id" AND "_0"."class_id" = "item"."i_class_id" GROUP BY "item"."i_item_sk" ), "_u_1" AS ( SELECT "avg_sales"."average_sales" AS "average_sales" FROM "avg_sales" AS "avg_sales" ), "y" AS ( SELECT 'store' AS "channel", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id", SUM("store_sales"."ss_quantity" * "store_sales"."ss_list_price") AS "sales", COUNT(*) AS "number_sales" FROM "store_sales" AS "store_sales" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim_3" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."ss_item_sk" = "store_sales"."ss_item_sk" CROSS JOIN "_u_1" AS "_u_1" WHERE NOT "_u_0"."ss_item_sk" IS NULL GROUP BY "item"."i_brand_id", "item"."i_class_id", "item"."i_category_id" HAVING MAX("_u_1"."average_sales") < SUM("store_sales"."ss_quantity" * "store_sales"."ss_list_price") UNION ALL SELECT 'catalog' AS "channel", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id", SUM("catalog_sales"."cs_quantity" * "catalog_sales"."cs_list_price") AS "sales", COUNT(*) AS "number_sales" FROM "catalog_sales" AS "catalog_sales" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "date_dim_3" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_0" AS "_u_2" ON "_u_2"."ss_item_sk" = "catalog_sales"."cs_item_sk" CROSS JOIN "_u_1" AS "_u_3" WHERE NOT "_u_2"."ss_item_sk" IS NULL GROUP BY "item"."i_brand_id", "item"."i_class_id", "item"."i_category_id" HAVING MAX("_u_3"."average_sales") < SUM("catalog_sales"."cs_quantity" * "catalog_sales"."cs_list_price") UNION ALL SELECT 'web' AS "channel", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id", SUM("web_sales"."ws_quantity" * "web_sales"."ws_list_price") AS "sales", COUNT(*) AS "number_sales" FROM "web_sales" AS "web_sales" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" JOIN "date_dim_3" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" LEFT JOIN "_u_0" AS "_u_4" ON "_u_4"."ss_item_sk" = "web_sales"."ws_item_sk" CROSS JOIN "_u_1" AS "_u_5" WHERE NOT "_u_4"."ss_item_sk" IS NULL GROUP BY "item"."i_brand_id", "item"."i_class_id", "item"."i_category_id" HAVING MAX("_u_5"."average_sales") < SUM("web_sales"."ws_quantity" * "web_sales"."ws_list_price") ) SELECT "y"."channel" AS "channel", "y"."i_brand_id" AS "i_brand_id", "y"."i_class_id" AS "i_class_id", "y"."i_category_id" AS "i_category_id", SUM("y"."sales") AS "_col_4", SUM("y"."number_sales") AS "_col_5" FROM "y" AS "y" GROUP BY ROLLUP ( "y"."channel", "y"."i_brand_id", "y"."i_class_id", "y"."i_category_id" ) ORDER BY "channel", "i_brand_id", "i_class_id", "i_category_id" LIMIT 100; -------------------------------------- -- TPC-DS 15 -------------------------------------- # execute: true SELECT ca_zip, Sum(cs_sales_price) AS "_col_1" FROM catalog_sales, customer, customer_address, date_dim WHERE cs_bill_customer_sk = c_customer_sk AND c_current_addr_sk = ca_address_sk AND ( SUBSTRING(ca_zip, 1, 5) IN ( '85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792' ) OR ca_state IN ( 'CA', 'WA', 'GA' ) OR cs_sales_price > 500 ) AND cs_sold_date_sk = d_date_sk AND d_qoy = 1 AND d_year = 1998 GROUP BY ca_zip ORDER BY ca_zip LIMIT 100; SELECT "customer_address"."ca_zip" AS "ca_zip", SUM("catalog_sales"."cs_sales_price") AS "_col_1" FROM "catalog_sales" AS "catalog_sales" JOIN "customer" AS "customer" ON "catalog_sales"."cs_bill_customer_sk" = "customer"."c_customer_sk" JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_qoy" = 1 AND "date_dim"."d_year" = 1998 JOIN "customer_address" AS "customer_address" ON ( "catalog_sales"."cs_sales_price" > 500 OR "customer_address"."ca_state" IN ('CA', 'WA', 'GA') OR SUBSTRING("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') ) AND "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" GROUP BY "customer_address"."ca_zip" ORDER BY "ca_zip" LIMIT 100; -------------------------------------- -- TPC-DS 16 -------------------------------------- SELECT Count(DISTINCT cs_order_number) AS "order count" , Sum(cs_ext_ship_cost) AS "total shipping cost" , Sum(cs_net_profit) AS "total net profit" FROM catalog_sales cs1 , date_dim , customer_address , call_center WHERE d_date BETWEEN '2002-3-01' AND ( Cast('2002-3-01' AS DATE) + INTERVAL '60' day) AND cs1.cs_ship_date_sk = d_date_sk AND cs1.cs_ship_addr_sk = ca_address_sk AND ca_state = 'IA' AND cs1.cs_call_center_sk = cc_call_center_sk AND cc_county IN ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) AND EXISTS ( SELECT * FROM catalog_sales cs2 WHERE cs1.cs_order_number = cs2.cs_order_number AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) AND NOT EXISTS ( SELECT * FROM catalog_returns cr1 WHERE cs1.cs_order_number = cr1.cr_order_number) ORDER BY count(DISTINCT cs_order_number) LIMIT 100; WITH "_u_0" AS ( SELECT "cs2"."cs_order_number" AS "_u_1", ARRAY_AGG("cs2"."cs_warehouse_sk") AS "_u_2" FROM "catalog_sales" AS "cs2" GROUP BY "cs2"."cs_order_number" ), "_u_3" AS ( SELECT "cr1"."cr_order_number" AS "_u_4" FROM "catalog_returns" AS "cr1" GROUP BY "cr1"."cr_order_number" ) SELECT COUNT(DISTINCT "cs1"."cs_order_number") AS "order count", SUM("cs1"."cs_ext_ship_cost") AS "total shipping cost", SUM("cs1"."cs_net_profit") AS "total net profit" FROM "catalog_sales" AS "cs1" JOIN "date_dim" AS "date_dim" ON "cs1"."cs_ship_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_date" >= '2002-3-01' AND ( CAST('2002-3-01' AS DATE) + INTERVAL '60' DAY ) >= CAST("date_dim"."d_date" AS DATE) JOIN "customer_address" AS "customer_address" ON "cs1"."cs_ship_addr_sk" = "customer_address"."ca_address_sk" AND "customer_address"."ca_state" = 'IA' JOIN "call_center" AS "call_center" ON "call_center"."cc_call_center_sk" = "cs1"."cs_call_center_sk" AND "call_center"."cc_county" IN ( 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "cs1"."cs_order_number" LEFT JOIN "_u_3" AS "_u_3" ON "_u_3"."_u_4" = "cs1"."cs_order_number" WHERE "_u_3"."_u_4" IS NULL AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "cs1"."cs_warehouse_sk" <> "_x") AND NOT "_u_0"."_u_1" IS NULL ORDER BY COUNT(DISTINCT "cs1"."cs_order_number") LIMIT 100; -------------------------------------- -- TPC-DS 17 -------------------------------------- # execute: true SELECT i_item_id, i_item_desc, s_state, Count(ss_quantity) AS store_sales_quantitycount, Avg(ss_quantity) AS store_sales_quantityave, Stddev_samp(ss_quantity) AS store_sales_quantitystdev, Stddev_samp(ss_quantity) / Avg(ss_quantity) AS store_sales_quantitycov, Count(sr_return_quantity) AS store_returns_quantitycount, Avg(sr_return_quantity) AS store_returns_quantityave, Stddev_samp(sr_return_quantity) AS store_returns_quantitystdev, Stddev_samp(sr_return_quantity) / Avg(sr_return_quantity) AS store_returns_quantitycov, Count(cs_quantity) AS catalog_sales_quantitycount, Avg(cs_quantity) AS catalog_sales_quantityave, Stddev_samp(cs_quantity) / Avg(cs_quantity) AS catalog_sales_quantitystdev, Stddev_samp(cs_quantity) / Avg(cs_quantity) AS catalog_sales_quantitycov FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item WHERE d1.d_quarter_name = '1999Q1' AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND ss_customer_sk = sr_customer_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND sr_returned_date_sk = d2.d_date_sk AND d2.d_quarter_name IN ( '1999Q1', '1999Q2', '1999Q3' ) AND sr_customer_sk = cs_bill_customer_sk AND sr_item_sk = cs_item_sk AND cs_sold_date_sk = d3.d_date_sk AND d3.d_quarter_name IN ( '1999Q1', '1999Q2', '1999Q3' ) GROUP BY i_item_id, i_item_desc, s_state ORDER BY i_item_id, i_item_desc, s_state LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "store"."s_state" AS "s_state", COUNT("store_sales"."ss_quantity") AS "store_sales_quantitycount", AVG("store_sales"."ss_quantity") AS "store_sales_quantityave", STDDEV_SAMP("store_sales"."ss_quantity") AS "store_sales_quantitystdev", STDDEV_SAMP("store_sales"."ss_quantity") / AVG("store_sales"."ss_quantity") AS "store_sales_quantitycov", COUNT("store_returns"."sr_return_quantity") AS "store_returns_quantitycount", AVG("store_returns"."sr_return_quantity") AS "store_returns_quantityave", STDDEV_SAMP("store_returns"."sr_return_quantity") AS "store_returns_quantitystdev", STDDEV_SAMP("store_returns"."sr_return_quantity") / AVG("store_returns"."sr_return_quantity") AS "store_returns_quantitycov", COUNT("catalog_sales"."cs_quantity") AS "catalog_sales_quantitycount", AVG("catalog_sales"."cs_quantity") AS "catalog_sales_quantityave", STDDEV_SAMP("catalog_sales"."cs_quantity") / AVG("catalog_sales"."cs_quantity") AS "catalog_sales_quantitystdev", STDDEV_SAMP("catalog_sales"."cs_quantity") / AVG("catalog_sales"."cs_quantity") AS "catalog_sales_quantitycov" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "d1"."d_quarter_name" = '1999Q1' JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_customer_sk" = "store_sales"."ss_customer_sk" AND "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "catalog_sales" AS "catalog_sales" ON "catalog_sales"."cs_bill_customer_sk" = "store_returns"."sr_customer_sk" AND "catalog_sales"."cs_item_sk" = "store_returns"."sr_item_sk" JOIN "date_dim" AS "d2" ON "d2"."d_date_sk" = "store_returns"."sr_returned_date_sk" AND "d2"."d_quarter_name" IN ('1999Q1', '1999Q2', '1999Q3') JOIN "date_dim" AS "d3" ON "catalog_sales"."cs_sold_date_sk" = "d3"."d_date_sk" AND "d3"."d_quarter_name" IN ('1999Q1', '1999Q2', '1999Q3') GROUP BY "item"."i_item_id", "item"."i_item_desc", "store"."s_state" ORDER BY "i_item_id", "i_item_desc", "s_state" LIMIT 100; -------------------------------------- -- TPC-DS 18 -------------------------------------- SELECT i_item_id, ca_country, ca_state, ca_county, Avg(Cast(cs_quantity AS NUMERIC(12, 2))) agg1, Avg(Cast(cs_list_price AS NUMERIC(12, 2))) agg2, Avg(Cast(cs_coupon_amt AS NUMERIC(12, 2))) agg3, Avg(Cast(cs_sales_price AS NUMERIC(12, 2))) agg4, Avg(Cast(cs_net_profit AS NUMERIC(12, 2))) agg5, Avg(Cast(c_birth_year AS NUMERIC(12, 2))) agg6, Avg(Cast(cd1.cd_dep_count AS NUMERIC(12, 2))) agg7 FROM catalog_sales, customer_demographics cd1, customer_demographics cd2, customer, customer_address, date_dim, item WHERE cs_sold_date_sk = d_date_sk AND cs_item_sk = i_item_sk AND cs_bill_cdemo_sk = cd1.cd_demo_sk AND cs_bill_customer_sk = c_customer_sk AND cd1.cd_gender = 'F' AND cd1.cd_education_status = 'Secondary' AND c_current_cdemo_sk = cd2.cd_demo_sk AND c_current_addr_sk = ca_address_sk AND c_birth_month IN ( 8, 4, 2, 5, 11, 9 ) AND d_year = 2001 AND ca_state IN ( 'KS', 'IA', 'AL', 'UT', 'VA', 'NC', 'TX' ) GROUP BY rollup ( i_item_id, ca_country, ca_state, ca_county ) ORDER BY ca_country, ca_state, ca_county, i_item_id LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "customer_address"."ca_country" AS "ca_country", "customer_address"."ca_state" AS "ca_state", "customer_address"."ca_county" AS "ca_county", AVG(CAST("catalog_sales"."cs_quantity" AS DECIMAL(12, 2))) AS "agg1", AVG(CAST("catalog_sales"."cs_list_price" AS DECIMAL(12, 2))) AS "agg2", AVG(CAST("catalog_sales"."cs_coupon_amt" AS DECIMAL(12, 2))) AS "agg3", AVG(CAST("catalog_sales"."cs_sales_price" AS DECIMAL(12, 2))) AS "agg4", AVG(CAST("catalog_sales"."cs_net_profit" AS DECIMAL(12, 2))) AS "agg5", AVG(CAST("customer"."c_birth_year" AS DECIMAL(12, 2))) AS "agg6", AVG(CAST("cd1"."cd_dep_count" AS DECIMAL(12, 2))) AS "agg7" FROM "catalog_sales" AS "catalog_sales" JOIN "customer_demographics" AS "cd1" ON "catalog_sales"."cs_bill_cdemo_sk" = "cd1"."cd_demo_sk" AND "cd1"."cd_education_status" = 'Secondary' AND "cd1"."cd_gender" = 'F' JOIN "customer" AS "customer" ON "catalog_sales"."cs_bill_customer_sk" = "customer"."c_customer_sk" AND "customer"."c_birth_month" IN (8, 4, 2, 5, 11, 9) JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_year" = 2001 JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "customer_demographics" AS "cd2" ON "cd2"."cd_demo_sk" = "customer"."c_current_cdemo_sk" JOIN "customer_address" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" AND "customer_address"."ca_state" IN ('KS', 'IA', 'AL', 'UT', 'VA', 'NC', 'TX') GROUP BY ROLLUP ( "item"."i_item_id", "customer_address"."ca_country", "customer_address"."ca_state", "customer_address"."ca_county" ) ORDER BY "ca_country", "ca_state", "ca_county", "i_item_id" LIMIT 100; -------------------------------------- -- TPC-DS 19 -------------------------------------- # execute: true SELECT i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, Sum(ss_ext_sales_price) ext_price FROM date_dim, store_sales, item, customer, customer_address, store WHERE d_date_sk = ss_sold_date_sk AND ss_item_sk = i_item_sk AND i_manager_id = 38 AND d_moy = 12 AND d_year = 1998 AND ss_customer_sk = c_customer_sk AND c_current_addr_sk = ca_address_sk AND SUBSTRING(ca_zip, 1, 5) <> SUBSTRING(s_zip, 1, 5) AND ss_store_sk = s_store_sk GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact ORDER BY ext_price DESC, i_brand, i_brand_id, i_manufact_id, i_manufact LIMIT 100; SELECT "item"."i_brand_id" AS "brand_id", "item"."i_brand" AS "brand", "item"."i_manufact_id" AS "i_manufact_id", "item"."i_manufact" AS "i_manufact", SUM("store_sales"."ss_ext_sales_price") AS "ext_price" FROM "date_dim" AS "date_dim" JOIN "store_sales" AS "store_sales" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" AND "item"."i_manager_id" = 38 JOIN "customer_address" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" AND SUBSTRING("customer_address"."ca_zip", 1, 5) <> SUBSTRING("store"."s_zip", 1, 5) WHERE "date_dim"."d_moy" = 12 AND "date_dim"."d_year" = 1998 GROUP BY "item"."i_brand", "item"."i_brand_id", "item"."i_manufact_id", "item"."i_manufact" ORDER BY "ext_price" DESC, "brand", "brand_id", "i_manufact_id", "i_manufact" LIMIT 100; -------------------------------------- -- TPC-DS 20 -------------------------------------- SELECT i_item_id , i_item_desc , i_category , i_class , i_current_price , Sum(cs_ext_sales_price) AS itemrevenue , Sum(cs_ext_sales_price)*100/Sum(Sum(cs_ext_sales_price)) OVER (partition BY i_class) AS revenueratio FROM catalog_sales , item , date_dim WHERE cs_item_sk = i_item_sk AND i_category IN ('Children', 'Women', 'Electronics') AND cs_sold_date_sk = d_date_sk AND d_date BETWEEN Cast('2001-02-03' AS DATE) AND ( Cast('2001-02-03' AS DATE) + INTERVAL '30' day) GROUP BY i_item_id , i_item_desc , i_category , i_class , i_current_price ORDER BY i_category , i_class , i_item_id , i_item_desc , revenueratio LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "item"."i_category" AS "i_category", "item"."i_class" AS "i_class", "item"."i_current_price" AS "i_current_price", SUM("catalog_sales"."cs_ext_sales_price") AS "itemrevenue", SUM("catalog_sales"."cs_ext_sales_price") * 100 / SUM(SUM("catalog_sales"."cs_ext_sales_price")) OVER (PARTITION BY "item"."i_class") AS "revenueratio" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" AND CAST("date_dim"."d_date" AS DATE) <= CAST('2001-03-05' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2001-02-03' AS DATE) JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" AND "item"."i_category" IN ('Children', 'Women', 'Electronics') GROUP BY "item"."i_item_id", "item"."i_item_desc", "item"."i_category", "item"."i_class", "item"."i_current_price" ORDER BY "i_category", "i_class", "i_item_id", "i_item_desc", "revenueratio" LIMIT 100; -------------------------------------- -- TPC-DS 21 -------------------------------------- SELECT * FROM ( SELECT w_warehouse_name , i_item_id , Sum( CASE WHEN ( Cast(d_date AS DATE) < Cast ('2000-05-13' AS DATE)) THEN inv_quantity_on_hand ELSE 0 END) AS inv_before , Sum( CASE WHEN ( Cast(d_date AS DATE) >= Cast ('2000-05-13' AS DATE)) THEN inv_quantity_on_hand ELSE 0 END) AS inv_after FROM inventory , warehouse , item , date_dim WHERE i_current_price BETWEEN 0.99 AND 1.49 AND i_item_sk = inv_item_sk AND inv_warehouse_sk = w_warehouse_sk AND inv_date_sk = d_date_sk AND d_date BETWEEN (Cast ('2000-05-13' AS DATE) - INTERVAL '30' day) AND ( cast ('2000-05-13' AS date) + INTERVAL '30' day) GROUP BY w_warehouse_name, i_item_id) x WHERE ( CASE WHEN inv_before > 0 THEN inv_after / inv_before ELSE NULL END) BETWEEN 2.0/3.0 AND 3.0/2.0 ORDER BY w_warehouse_name , i_item_id LIMIT 100; WITH "x" AS ( SELECT "warehouse"."w_warehouse_name" AS "w_warehouse_name", "item"."i_item_id" AS "i_item_id", SUM( CASE WHEN CAST("date_dim"."d_date" AS DATE) < CAST('2000-05-13' AS DATE) THEN "inventory"."inv_quantity_on_hand" ELSE 0 END ) AS "inv_before", SUM( CASE WHEN CAST("date_dim"."d_date" AS DATE) >= CAST('2000-05-13' AS DATE) THEN "inventory"."inv_quantity_on_hand" ELSE 0 END ) AS "inv_after" FROM "inventory" AS "inventory" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-12' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-04-13' AS DATE) JOIN "item" AS "item" ON "inventory"."inv_item_sk" = "item"."i_item_sk" AND "item"."i_current_price" <= 1.49 AND "item"."i_current_price" >= 0.99 JOIN "warehouse" AS "warehouse" ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk" GROUP BY "warehouse"."w_warehouse_name", "item"."i_item_id" ) SELECT "x"."w_warehouse_name" AS "w_warehouse_name", "x"."i_item_id" AS "i_item_id", "x"."inv_before" AS "inv_before", "x"."inv_after" AS "inv_after" FROM "x" AS "x" WHERE CASE WHEN "x"."inv_before" > 0 THEN "x"."inv_after" / "x"."inv_before" ELSE NULL END <= 1.5 AND CASE WHEN "x"."inv_before" > 0 THEN "x"."inv_after" / "x"."inv_before" ELSE NULL END >= 0.6666666666666666666666666667 ORDER BY "x"."w_warehouse_name", "x"."i_item_id" LIMIT 100; -------------------------------------- -- TPC-DS 22 -------------------------------------- SELECT i_product_name, i_brand, i_class, i_category, Avg(inv_quantity_on_hand) qoh FROM inventory, date_dim, item, warehouse WHERE inv_date_sk = d_date_sk AND inv_item_sk = i_item_sk AND inv_warehouse_sk = w_warehouse_sk AND d_month_seq BETWEEN 1205 AND 1205 + 11 GROUP BY rollup( i_product_name, i_brand, i_class, i_category ) ORDER BY qoh, i_product_name, i_brand, i_class, i_category LIMIT 100; SELECT "item"."i_product_name" AS "i_product_name", "item"."i_brand" AS "i_brand", "item"."i_class" AS "i_class", "item"."i_category" AS "i_category", AVG("inventory"."inv_quantity_on_hand") AS "qoh" FROM "inventory" AS "inventory" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" AND "date_dim"."d_month_seq" <= 1216 AND "date_dim"."d_month_seq" >= 1205 JOIN "item" AS "item" ON "inventory"."inv_item_sk" = "item"."i_item_sk" JOIN "warehouse" AS "warehouse" ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk" GROUP BY ROLLUP ( "item"."i_product_name", "item"."i_brand", "item"."i_class", "item"."i_category" ) ORDER BY "qoh", "i_product_name", "i_brand", "i_class", "i_category" LIMIT 100; -------------------------------------- -- TPC-DS 23 -------------------------------------- # execute: true WITH frequent_ss_items AS (SELECT SUBSTRING(i_item_desc, 1, 30) itemdesc, i_item_sk item_sk, d_date solddate, Count(*) cnt FROM store_sales, date_dim, item WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND d_year IN ( 1998, 1998 + 1, 1998 + 2, 1998 + 3 ) GROUP BY SUBSTRING(i_item_desc, 1, 30), i_item_sk, d_date HAVING Count(*) > 4), max_store_sales AS (SELECT Max(csales) tpcds_cmax FROM (SELECT c_customer_sk, Sum(ss_quantity * ss_sales_price) csales FROM store_sales, customer, date_dim WHERE ss_customer_sk = c_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year IN ( 1998, 1998 + 1, 1998 + 2, 1998 + 3 ) GROUP BY c_customer_sk)), best_ss_customer AS (SELECT c_customer_sk, Sum(ss_quantity * ss_sales_price) ssales FROM store_sales, customer WHERE ss_customer_sk = c_customer_sk GROUP BY c_customer_sk HAVING Sum(ss_quantity * ss_sales_price) > ( 95 / 100.0 ) * (SELECT * FROM max_store_sales)) SELECT Sum(sales) AS "_col_0" FROM (SELECT cs_quantity * cs_list_price sales FROM catalog_sales, date_dim WHERE d_year = 1998 AND d_moy = 6 AND cs_sold_date_sk = d_date_sk AND cs_item_sk IN (SELECT item_sk FROM frequent_ss_items) AND cs_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer) UNION ALL SELECT ws_quantity * ws_list_price sales FROM web_sales, date_dim WHERE d_year = 1998 AND d_moy = 6 AND ws_sold_date_sk = d_date_sk AND ws_item_sk IN (SELECT item_sk FROM frequent_ss_items) AND ws_bill_customer_sk IN (SELECT c_customer_sk FROM best_ss_customer)) LIMIT 100; WITH "frequent_ss_items" AS ( SELECT "item"."i_item_sk" AS "item_sk" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" IN (1998, 1999, 2000, 2001) JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" GROUP BY SUBSTRING("item"."i_item_desc", 1, 30), "item"."i_item_sk", "date_dim"."d_date" HAVING COUNT(*) > 4 ), "customer_2" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk" FROM "customer" AS "customer" ), "_0" AS ( SELECT SUM("store_sales"."ss_quantity" * "store_sales"."ss_sales_price") AS "csales" FROM "store_sales" AS "store_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" IN (1998, 1999, 2000, 2001) GROUP BY "customer"."c_customer_sk" ), "max_store_sales" AS ( SELECT MAX("_0"."csales") AS "tpcds_cmax" FROM "_0" AS "_0" ), "best_ss_customer" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk" FROM "store_sales" AS "store_sales" CROSS JOIN "max_store_sales" AS "max_store_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" GROUP BY "customer"."c_customer_sk" HAVING 0.95 * MAX("max_store_sales"."tpcds_cmax") < SUM("store_sales"."ss_quantity" * "store_sales"."ss_sales_price") ), "date_dim_4" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 6 AND "date_dim"."d_year" = 1998 ), "_u_1" AS ( SELECT "frequent_ss_items"."item_sk" AS "item_sk" FROM "frequent_ss_items" AS "frequent_ss_items" GROUP BY "frequent_ss_items"."item_sk" ), "_u_2" AS ( SELECT "best_ss_customer"."c_customer_sk" AS "c_customer_sk" FROM "best_ss_customer" AS "best_ss_customer" GROUP BY "best_ss_customer"."c_customer_sk" ), "_1" AS ( SELECT "catalog_sales"."cs_quantity" * "catalog_sales"."cs_list_price" AS "sales" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_4" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."item_sk" = "catalog_sales"."cs_item_sk" LEFT JOIN "_u_2" AS "_u_2" ON "_u_2"."c_customer_sk" = "catalog_sales"."cs_bill_customer_sk" WHERE NOT "_u_1"."item_sk" IS NULL AND NOT "_u_2"."c_customer_sk" IS NULL UNION ALL SELECT "web_sales"."ws_quantity" * "web_sales"."ws_list_price" AS "sales" FROM "web_sales" AS "web_sales" JOIN "date_dim_4" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" LEFT JOIN "_u_1" AS "_u_3" ON "_u_3"."item_sk" = "web_sales"."ws_item_sk" LEFT JOIN "_u_2" AS "_u_4" ON "_u_4"."c_customer_sk" = "web_sales"."ws_bill_customer_sk" WHERE NOT "_u_3"."item_sk" IS NULL AND NOT "_u_4"."c_customer_sk" IS NULL ) SELECT SUM("_1"."sales") AS "_col_0" FROM "_1" AS "_1" LIMIT 100; -------------------------------------- -- TPC-DS 24 -------------------------------------- # execute: true WITH ssales AS (SELECT c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size, Sum(ss_net_profit) netpaid FROM store_sales, store_returns, store, item, customer, customer_address WHERE ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk AND ss_customer_sk = c_customer_sk AND ss_item_sk = i_item_sk AND ss_store_sk = s_store_sk AND c_birth_country = Upper(ca_country) AND s_zip = ca_zip AND s_market_id = 6 GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, i_current_price, i_manager_id, i_units, i_size) SELECT c_last_name, c_first_name, s_store_name, Sum(netpaid) paid FROM ssales WHERE i_color = 'papaya' GROUP BY c_last_name, c_first_name, s_store_name HAVING Sum(netpaid) > (SELECT 0.05 * Avg(netpaid) FROM ssales); WITH "ssales" AS ( SELECT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "store"."s_store_name" AS "s_store_name", "item"."i_color" AS "i_color", SUM("store_sales"."ss_net_profit") AS "netpaid" FROM "store_sales" AS "store_sales" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store" AS "store" ON "store"."s_market_id" = 6 AND "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "customer_address" AS "customer_address" ON "customer"."c_birth_country" = UPPER("customer_address"."ca_country") AND "customer_address"."ca_zip" = "store"."s_zip" GROUP BY "customer"."c_last_name", "customer"."c_first_name", "store"."s_store_name", "customer_address"."ca_state", "store"."s_state", "item"."i_color", "item"."i_current_price", "item"."i_manager_id", "item"."i_units", "item"."i_size" ), "_u_0" AS ( SELECT 0.05 * AVG("ssales"."netpaid") AS "_col_0" FROM "ssales" AS "ssales" ) SELECT "ssales"."c_last_name" AS "c_last_name", "ssales"."c_first_name" AS "c_first_name", "ssales"."s_store_name" AS "s_store_name", SUM("ssales"."netpaid") AS "paid" FROM "ssales" AS "ssales" CROSS JOIN "_u_0" AS "_u_0" WHERE "ssales"."i_color" = 'papaya' GROUP BY "ssales"."c_last_name", "ssales"."c_first_name", "ssales"."s_store_name" HAVING MAX("_u_0"."_col_0") < SUM("ssales"."netpaid"); -------------------------------------- -- TPC-DS 25 -------------------------------------- # execute: true SELECT i_item_id, i_item_desc, s_store_id, s_store_name, Max(ss_net_profit) AS store_sales_profit, Max(sr_net_loss) AS store_returns_loss, Max(cs_net_profit) AS catalog_sales_profit FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item WHERE d1.d_moy = 4 AND d1.d_year = 2001 AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND ss_customer_sk = sr_customer_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND sr_returned_date_sk = d2.d_date_sk AND d2.d_moy BETWEEN 4 AND 10 AND d2.d_year = 2001 AND sr_customer_sk = cs_bill_customer_sk AND sr_item_sk = cs_item_sk AND cs_sold_date_sk = d3.d_date_sk AND d3.d_moy BETWEEN 4 AND 10 AND d3.d_year = 2001 GROUP BY i_item_id, i_item_desc, s_store_id, s_store_name ORDER BY i_item_id, i_item_desc, s_store_id, s_store_name LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "store"."s_store_id" AS "s_store_id", "store"."s_store_name" AS "s_store_name", MAX("store_sales"."ss_net_profit") AS "store_sales_profit", MAX("store_returns"."sr_net_loss") AS "store_returns_loss", MAX("catalog_sales"."cs_net_profit") AS "catalog_sales_profit" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "d1"."d_moy" = 4 AND "d1"."d_year" = 2001 JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_customer_sk" = "store_sales"."ss_customer_sk" AND "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "catalog_sales" AS "catalog_sales" ON "catalog_sales"."cs_bill_customer_sk" = "store_returns"."sr_customer_sk" AND "catalog_sales"."cs_item_sk" = "store_returns"."sr_item_sk" JOIN "date_dim" AS "d2" ON "d2"."d_date_sk" = "store_returns"."sr_returned_date_sk" AND "d2"."d_moy" <= 10 AND "d2"."d_moy" >= 4 AND "d2"."d_year" = 2001 JOIN "date_dim" AS "d3" ON "catalog_sales"."cs_sold_date_sk" = "d3"."d_date_sk" AND "d3"."d_moy" <= 10 AND "d3"."d_moy" >= 4 AND "d3"."d_year" = 2001 GROUP BY "item"."i_item_id", "item"."i_item_desc", "store"."s_store_id", "store"."s_store_name" ORDER BY "i_item_id", "i_item_desc", "s_store_id", "s_store_name" LIMIT 100; -------------------------------------- -- TPC-DS 26 -------------------------------------- # execute: true SELECT i_item_id, Avg(cs_quantity) agg1, Avg(cs_list_price) agg2, Avg(cs_coupon_amt) agg3, Avg(cs_sales_price) agg4 FROM catalog_sales, customer_demographics, date_dim, item, promotion WHERE cs_sold_date_sk = d_date_sk AND cs_item_sk = i_item_sk AND cs_bill_cdemo_sk = cd_demo_sk AND cs_promo_sk = p_promo_sk AND cd_gender = 'F' AND cd_marital_status = 'W' AND cd_education_status = 'Secondary' AND ( p_channel_email = 'N' OR p_channel_event = 'N' ) AND d_year = 2000 GROUP BY i_item_id ORDER BY i_item_id LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", AVG("catalog_sales"."cs_quantity") AS "agg1", AVG("catalog_sales"."cs_list_price") AS "agg2", AVG("catalog_sales"."cs_coupon_amt") AS "agg3", AVG("catalog_sales"."cs_sales_price") AS "agg4" FROM "catalog_sales" AS "catalog_sales" JOIN "customer_demographics" AS "customer_demographics" ON "catalog_sales"."cs_bill_cdemo_sk" = "customer_demographics"."cd_demo_sk" AND "customer_demographics"."cd_education_status" = 'Secondary' AND "customer_demographics"."cd_gender" = 'F' AND "customer_demographics"."cd_marital_status" = 'W' JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_year" = 2000 JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "promotion" AS "promotion" ON "catalog_sales"."cs_promo_sk" = "promotion"."p_promo_sk" AND ( "promotion"."p_channel_email" = 'N' OR "promotion"."p_channel_event" = 'N' ) GROUP BY "item"."i_item_id" ORDER BY "i_item_id" LIMIT 100; -------------------------------------- -- TPC-DS 27 -------------------------------------- SELECT i_item_id, s_state, Grouping(s_state) g_state, Avg(ss_quantity) agg1, Avg(ss_list_price) agg2, Avg(ss_coupon_amt) agg3, Avg(ss_sales_price) agg4 FROM store_sales, customer_demographics, date_dim, store, item WHERE ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk AND ss_store_sk = s_store_sk AND ss_cdemo_sk = cd_demo_sk AND cd_gender = 'M' AND cd_marital_status = 'D' AND cd_education_status = 'College' AND d_year = 2000 AND s_state IN ( 'TN', 'TN', 'TN', 'TN', 'TN', 'TN' ) GROUP BY rollup ( i_item_id, s_state ) ORDER BY i_item_id, s_state LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "store"."s_state" AS "s_state", GROUPING("store"."s_state") AS "g_state", AVG("store_sales"."ss_quantity") AS "agg1", AVG("store_sales"."ss_list_price") AS "agg2", AVG("store_sales"."ss_coupon_amt") AS "agg3", AVG("store_sales"."ss_sales_price") AS "agg4" FROM "store_sales" AS "store_sales" JOIN "customer_demographics" AS "customer_demographics" ON "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = 'College' AND "customer_demographics"."cd_gender" = 'M' AND "customer_demographics"."cd_marital_status" = 'D' JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" = 2000 JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store" AS "store" ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY ROLLUP ( "item"."i_item_id", "store"."s_state" ) ORDER BY "i_item_id", "s_state" LIMIT 100; -------------------------------------- -- TPC-DS 28 -------------------------------------- SELECT * FROM (SELECT Avg(ss_list_price) b1_lp, Count(ss_list_price) b1_cnt, Count(DISTINCT ss_list_price) b1_cntd FROM store_sales WHERE ss_quantity BETWEEN 0 AND 5 AND ( ss_list_price BETWEEN 18 AND 18 + 10 OR ss_coupon_amt BETWEEN 1939 AND 1939 + 1000 OR ss_wholesale_cost BETWEEN 34 AND 34 + 20 )) B1, (SELECT Avg(ss_list_price) b2_lp, Count(ss_list_price) b2_cnt, Count(DISTINCT ss_list_price) b2_cntd FROM store_sales WHERE ss_quantity BETWEEN 6 AND 10 AND ( ss_list_price BETWEEN 1 AND 1 + 10 OR ss_coupon_amt BETWEEN 35 AND 35 + 1000 OR ss_wholesale_cost BETWEEN 50 AND 50 + 20 )) B2, (SELECT Avg(ss_list_price) b3_lp, Count(ss_list_price) b3_cnt, Count(DISTINCT ss_list_price) b3_cntd FROM store_sales WHERE ss_quantity BETWEEN 11 AND 15 AND ( ss_list_price BETWEEN 91 AND 91 + 10 OR ss_coupon_amt BETWEEN 1412 AND 1412 + 1000 OR ss_wholesale_cost BETWEEN 17 AND 17 + 20 )) B3, (SELECT Avg(ss_list_price) b4_lp, Count(ss_list_price) b4_cnt, Count(DISTINCT ss_list_price) b4_cntd FROM store_sales WHERE ss_quantity BETWEEN 16 AND 20 AND ( ss_list_price BETWEEN 9 AND 9 + 10 OR ss_coupon_amt BETWEEN 5270 AND 5270 + 1000 OR ss_wholesale_cost BETWEEN 29 AND 29 + 20 )) B4, (SELECT Avg(ss_list_price) b5_lp, Count(ss_list_price) b5_cnt, Count(DISTINCT ss_list_price) b5_cntd FROM store_sales WHERE ss_quantity BETWEEN 21 AND 25 AND ( ss_list_price BETWEEN 45 AND 45 + 10 OR ss_coupon_amt BETWEEN 826 AND 826 + 1000 OR ss_wholesale_cost BETWEEN 5 AND 5 + 20 )) B5, (SELECT Avg(ss_list_price) b6_lp, Count(ss_list_price) b6_cnt, Count(DISTINCT ss_list_price) b6_cntd FROM store_sales WHERE ss_quantity BETWEEN 26 AND 30 AND ( ss_list_price BETWEEN 174 AND 174 + 10 OR ss_coupon_amt BETWEEN 5548 AND 5548 + 1000 OR ss_wholesale_cost BETWEEN 42 AND 42 + 20 )) B6 LIMIT 100; WITH "b1" AS ( SELECT AVG("store_sales"."ss_list_price") AS "b1_lp", COUNT("store_sales"."ss_list_price") AS "b1_cnt", COUNT(DISTINCT "store_sales"."ss_list_price") AS "b1_cntd" FROM "store_sales" AS "store_sales" WHERE ( "store_sales"."ss_coupon_amt" <= 2939 AND "store_sales"."ss_coupon_amt" >= 1939 OR "store_sales"."ss_list_price" <= 28 AND "store_sales"."ss_list_price" >= 18 OR "store_sales"."ss_wholesale_cost" <= 54 AND "store_sales"."ss_wholesale_cost" >= 34 ) AND "store_sales"."ss_quantity" <= 5 AND "store_sales"."ss_quantity" >= 0 ), "b2" AS ( SELECT AVG("store_sales"."ss_list_price") AS "b2_lp", COUNT("store_sales"."ss_list_price") AS "b2_cnt", COUNT(DISTINCT "store_sales"."ss_list_price") AS "b2_cntd" FROM "store_sales" AS "store_sales" WHERE ( "store_sales"."ss_coupon_amt" <= 1035 AND "store_sales"."ss_coupon_amt" >= 35 OR "store_sales"."ss_list_price" <= 11 AND "store_sales"."ss_list_price" >= 1 OR "store_sales"."ss_wholesale_cost" <= 70 AND "store_sales"."ss_wholesale_cost" >= 50 ) AND "store_sales"."ss_quantity" <= 10 AND "store_sales"."ss_quantity" >= 6 ), "b3" AS ( SELECT AVG("store_sales"."ss_list_price") AS "b3_lp", COUNT("store_sales"."ss_list_price") AS "b3_cnt", COUNT(DISTINCT "store_sales"."ss_list_price") AS "b3_cntd" FROM "store_sales" AS "store_sales" WHERE ( "store_sales"."ss_coupon_amt" <= 2412 AND "store_sales"."ss_coupon_amt" >= 1412 OR "store_sales"."ss_list_price" <= 101 AND "store_sales"."ss_list_price" >= 91 OR "store_sales"."ss_wholesale_cost" <= 37 AND "store_sales"."ss_wholesale_cost" >= 17 ) AND "store_sales"."ss_quantity" <= 15 AND "store_sales"."ss_quantity" >= 11 ), "b4" AS ( SELECT AVG("store_sales"."ss_list_price") AS "b4_lp", COUNT("store_sales"."ss_list_price") AS "b4_cnt", COUNT(DISTINCT "store_sales"."ss_list_price") AS "b4_cntd" FROM "store_sales" AS "store_sales" WHERE ( "store_sales"."ss_coupon_amt" <= 6270 AND "store_sales"."ss_coupon_amt" >= 5270 OR "store_sales"."ss_list_price" <= 19 AND "store_sales"."ss_list_price" >= 9 OR "store_sales"."ss_wholesale_cost" <= 49 AND "store_sales"."ss_wholesale_cost" >= 29 ) AND "store_sales"."ss_quantity" <= 20 AND "store_sales"."ss_quantity" >= 16 ), "b5" AS ( SELECT AVG("store_sales"."ss_list_price") AS "b5_lp", COUNT("store_sales"."ss_list_price") AS "b5_cnt", COUNT(DISTINCT "store_sales"."ss_list_price") AS "b5_cntd" FROM "store_sales" AS "store_sales" WHERE ( "store_sales"."ss_coupon_amt" <= 1826 AND "store_sales"."ss_coupon_amt" >= 826 OR "store_sales"."ss_list_price" <= 55 AND "store_sales"."ss_list_price" >= 45 OR "store_sales"."ss_wholesale_cost" <= 25 AND "store_sales"."ss_wholesale_cost" >= 5 ) AND "store_sales"."ss_quantity" <= 25 AND "store_sales"."ss_quantity" >= 21 ), "b6" AS ( SELECT AVG("store_sales"."ss_list_price") AS "b6_lp", COUNT("store_sales"."ss_list_price") AS "b6_cnt", COUNT(DISTINCT "store_sales"."ss_list_price") AS "b6_cntd" FROM "store_sales" AS "store_sales" WHERE ( "store_sales"."ss_coupon_amt" <= 6548 AND "store_sales"."ss_coupon_amt" >= 5548 OR "store_sales"."ss_list_price" <= 184 AND "store_sales"."ss_list_price" >= 174 OR "store_sales"."ss_wholesale_cost" <= 62 AND "store_sales"."ss_wholesale_cost" >= 42 ) AND "store_sales"."ss_quantity" <= 30 AND "store_sales"."ss_quantity" >= 26 ) SELECT "b1"."b1_lp" AS "b1_lp", "b1"."b1_cnt" AS "b1_cnt", "b1"."b1_cntd" AS "b1_cntd", "b2"."b2_lp" AS "b2_lp", "b2"."b2_cnt" AS "b2_cnt", "b2"."b2_cntd" AS "b2_cntd", "b3"."b3_lp" AS "b3_lp", "b3"."b3_cnt" AS "b3_cnt", "b3"."b3_cntd" AS "b3_cntd", "b4"."b4_lp" AS "b4_lp", "b4"."b4_cnt" AS "b4_cnt", "b4"."b4_cntd" AS "b4_cntd", "b5"."b5_lp" AS "b5_lp", "b5"."b5_cnt" AS "b5_cnt", "b5"."b5_cntd" AS "b5_cntd", "b6"."b6_lp" AS "b6_lp", "b6"."b6_cnt" AS "b6_cnt", "b6"."b6_cntd" AS "b6_cntd" FROM "b1" AS "b1" CROSS JOIN "b2" AS "b2" CROSS JOIN "b3" AS "b3" CROSS JOIN "b4" AS "b4" CROSS JOIN "b5" AS "b5" CROSS JOIN "b6" AS "b6" LIMIT 100; -------------------------------------- -- TPC-DS 29 -------------------------------------- # execute: true SELECT i_item_id, i_item_desc, s_store_id, s_store_name, Avg(ss_quantity) AS store_sales_quantity, Avg(sr_return_quantity) AS store_returns_quantity, Avg(cs_quantity) AS catalog_sales_quantity FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item WHERE d1.d_moy = 4 AND d1.d_year = 1998 AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND ss_customer_sk = sr_customer_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND sr_returned_date_sk = d2.d_date_sk AND d2.d_moy BETWEEN 4 AND 4 + 3 AND d2.d_year = 1998 AND sr_customer_sk = cs_bill_customer_sk AND sr_item_sk = cs_item_sk AND cs_sold_date_sk = d3.d_date_sk AND d3.d_year IN ( 1998, 1998 + 1, 1998 + 2 ) GROUP BY i_item_id, i_item_desc, s_store_id, s_store_name ORDER BY i_item_id, i_item_desc, s_store_id, s_store_name LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "store"."s_store_id" AS "s_store_id", "store"."s_store_name" AS "s_store_name", AVG("store_sales"."ss_quantity") AS "store_sales_quantity", AVG("store_returns"."sr_return_quantity") AS "store_returns_quantity", AVG("catalog_sales"."cs_quantity") AS "catalog_sales_quantity" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "d1"."d_moy" = 4 AND "d1"."d_year" = 1998 JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_customer_sk" = "store_sales"."ss_customer_sk" AND "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "catalog_sales" AS "catalog_sales" ON "catalog_sales"."cs_bill_customer_sk" = "store_returns"."sr_customer_sk" AND "catalog_sales"."cs_item_sk" = "store_returns"."sr_item_sk" JOIN "date_dim" AS "d2" ON "d2"."d_date_sk" = "store_returns"."sr_returned_date_sk" AND "d2"."d_moy" <= 7 AND "d2"."d_moy" >= 4 AND "d2"."d_year" = 1998 JOIN "date_dim" AS "d3" ON "catalog_sales"."cs_sold_date_sk" = "d3"."d_date_sk" AND "d3"."d_year" IN (1998, 1999, 2000) GROUP BY "item"."i_item_id", "item"."i_item_desc", "store"."s_store_id", "store"."s_store_name" ORDER BY "i_item_id", "i_item_desc", "s_store_id", "s_store_name" LIMIT 100; -------------------------------------- -- TPC-DS 30 -------------------------------------- WITH customer_total_return AS (SELECT wr_returning_customer_sk AS ctr_customer_sk, ca_state AS ctr_state, Sum(wr_return_amt) AS ctr_total_return FROM web_returns, date_dim, customer_address WHERE wr_returned_date_sk = d_date_sk AND d_year = 2000 AND wr_returning_addr_sk = ca_address_sk GROUP BY wr_returning_customer_sk, ca_state) SELECT c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return FROM customer_total_return ctr1, customer_address, customer WHERE ctr1.ctr_total_return > (SELECT Avg(ctr_total_return) * 1.2 FROM customer_total_return ctr2 WHERE ctr1.ctr_state = ctr2.ctr_state) AND ca_address_sk = c_current_addr_sk AND ca_state = 'IN' AND ctr1.ctr_customer_sk = c_customer_sk ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return LIMIT 100; WITH "customer_total_return" AS ( SELECT "web_returns"."wr_returning_customer_sk" AS "ctr_customer_sk", "customer_address"."ca_state" AS "ctr_state", SUM("web_returns"."wr_return_amt") AS "ctr_total_return" FROM "web_returns" AS "web_returns" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "web_returns"."wr_returning_addr_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "web_returns"."wr_returned_date_sk" AND "date_dim"."d_year" = 2000 GROUP BY "web_returns"."wr_returning_customer_sk", "customer_address"."ca_state" ), "_u_0" AS ( SELECT AVG("ctr2"."ctr_total_return") * 1.2 AS "_col_0", "ctr2"."ctr_state" AS "_u_1" FROM "customer_total_return" AS "ctr2" GROUP BY "ctr2"."ctr_state" ) SELECT "customer"."c_customer_id" AS "c_customer_id", "customer"."c_salutation" AS "c_salutation", "customer"."c_first_name" AS "c_first_name", "customer"."c_last_name" AS "c_last_name", "customer"."c_preferred_cust_flag" AS "c_preferred_cust_flag", "customer"."c_birth_day" AS "c_birth_day", "customer"."c_birth_month" AS "c_birth_month", "customer"."c_birth_year" AS "c_birth_year", "customer"."c_birth_country" AS "c_birth_country", "customer"."c_login" AS "c_login", "customer"."c_email_address" AS "c_email_address", "customer"."c_last_review_date" AS "c_last_review_date", "ctr1"."ctr_total_return" AS "ctr_total_return" FROM "customer_total_return" AS "ctr1" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_state" = 'IN' JOIN "customer" AS "customer" ON "ctr1"."ctr_customer_sk" = "customer"."c_customer_sk" AND "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "ctr1"."ctr_state" WHERE "_u_0"."_col_0" < "ctr1"."ctr_total_return" ORDER BY "c_customer_id", "c_salutation", "c_first_name", "c_last_name", "c_preferred_cust_flag", "c_birth_day", "c_birth_month", "c_birth_year", "c_birth_country", "c_login", "c_email_address", "c_last_review_date", "ctr_total_return" LIMIT 100; -------------------------------------- -- TPC-DS 31 -------------------------------------- # execute: true WITH ss AS (SELECT ca_county, d_qoy, d_year, Sum(ss_ext_sales_price) AS store_sales FROM store_sales, date_dim, customer_address WHERE ss_sold_date_sk = d_date_sk AND ss_addr_sk = ca_address_sk GROUP BY ca_county, d_qoy, d_year), ws AS (SELECT ca_county, d_qoy, d_year, Sum(ws_ext_sales_price) AS web_sales FROM web_sales, date_dim, customer_address WHERE ws_sold_date_sk = d_date_sk AND ws_bill_addr_sk = ca_address_sk GROUP BY ca_county, d_qoy, d_year) SELECT ss1.ca_county, ss1.d_year, ws2.web_sales / ws1.web_sales web_q1_q2_increase, ss2.store_sales / ss1.store_sales store_q1_q2_increase, ws3.web_sales / ws2.web_sales web_q2_q3_increase, ss3.store_sales / ss2.store_sales store_q2_q3_increase FROM ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 WHERE ss1.d_qoy = 1 AND ss1.d_year = 2001 AND ss1.ca_county = ss2.ca_county AND ss2.d_qoy = 2 AND ss2.d_year = 2001 AND ss2.ca_county = ss3.ca_county AND ss3.d_qoy = 3 AND ss3.d_year = 2001 AND ss1.ca_county = ws1.ca_county AND ws1.d_qoy = 1 AND ws1.d_year = 2001 AND ws1.ca_county = ws2.ca_county AND ws2.d_qoy = 2 AND ws2.d_year = 2001 AND ws1.ca_county = ws3.ca_county AND ws3.d_qoy = 3 AND ws3.d_year = 2001 AND CASE WHEN ws1.web_sales > 0 THEN ws2.web_sales / ws1.web_sales ELSE NULL END > CASE WHEN ss1.store_sales > 0 THEN ss2.store_sales / ss1.store_sales ELSE NULL END AND CASE WHEN ws2.web_sales > 0 THEN ws3.web_sales / ws2.web_sales ELSE NULL END > CASE WHEN ss2.store_sales > 0 THEN ss3.store_sales / ss2.store_sales ELSE NULL END ORDER BY ss1.d_year; WITH "customer_address_2" AS ( SELECT "customer_address"."ca_address_sk" AS "ca_address_sk", "customer_address"."ca_county" AS "ca_county" FROM "customer_address" AS "customer_address" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_qoy" AS "d_qoy" FROM "date_dim" AS "date_dim" ), "ss" AS ( SELECT "customer_address"."ca_county" AS "ca_county", "date_dim"."d_qoy" AS "d_qoy", "date_dim"."d_year" AS "d_year", SUM("store_sales"."ss_ext_sales_price") AS "store_sales" FROM "store_sales" AS "store_sales" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "customer_address"."ca_county", "date_dim"."d_qoy", "date_dim"."d_year" ), "ws" AS ( SELECT "customer_address"."ca_county" AS "ca_county", "date_dim"."d_qoy" AS "d_qoy", "date_dim"."d_year" AS "d_year", SUM("web_sales"."ws_ext_sales_price") AS "web_sales" FROM "web_sales" AS "web_sales" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "web_sales"."ws_bill_addr_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "customer_address"."ca_county", "date_dim"."d_qoy", "date_dim"."d_year" ) SELECT "ss1"."ca_county" AS "ca_county", "ss1"."d_year" AS "d_year", "ws2"."web_sales" / "ws1"."web_sales" AS "web_q1_q2_increase", "ss2"."store_sales" / "ss1"."store_sales" AS "store_q1_q2_increase", "ws3"."web_sales" / "ws2"."web_sales" AS "web_q2_q3_increase", "ss3"."store_sales" / "ss2"."store_sales" AS "store_q2_q3_increase" FROM "ss" AS "ss1" JOIN "ss" AS "ss2" ON "ss1"."ca_county" = "ss2"."ca_county" AND "ss2"."d_qoy" = 2 AND "ss2"."d_year" = 2001 JOIN "ws" AS "ws1" ON "ss1"."ca_county" = "ws1"."ca_county" AND "ws1"."d_qoy" = 1 AND "ws1"."d_year" = 2001 JOIN "ss" AS "ss3" ON "ss2"."ca_county" = "ss3"."ca_county" AND "ss3"."d_qoy" = 3 AND "ss3"."d_year" = 2001 JOIN "ws" AS "ws2" ON "ws1"."ca_county" = "ws2"."ca_county" AND "ws2"."d_qoy" = 2 AND "ws2"."d_year" = 2001 AND CASE WHEN "ss1"."store_sales" > 0 THEN "ss2"."store_sales" / "ss1"."store_sales" ELSE NULL END < CASE WHEN "ws1"."web_sales" > 0 THEN "ws2"."web_sales" / "ws1"."web_sales" ELSE NULL END JOIN "ws" AS "ws3" ON "ws1"."ca_county" = "ws3"."ca_county" AND "ws3"."d_qoy" = 3 AND "ws3"."d_year" = 2001 AND CASE WHEN "ss2"."store_sales" > 0 THEN "ss3"."store_sales" / "ss2"."store_sales" ELSE NULL END < CASE WHEN "ws2"."web_sales" > 0 THEN "ws3"."web_sales" / "ws2"."web_sales" ELSE NULL END WHERE "ss1"."d_qoy" = 1 AND "ss1"."d_year" = 2001 ORDER BY "ss1"."d_year"; -------------------------------------- -- TPC-DS 32 -------------------------------------- SELECT Sum(cs_ext_discount_amt) AS "excess discount amount" FROM catalog_sales , item , date_dim WHERE i_manufact_id = 610 AND i_item_sk = cs_item_sk AND d_date BETWEEN '2001-03-04' AND ( Cast('2001-03-04' AS DATE) + INTERVAL '90' day) AND d_date_sk = cs_sold_date_sk AND cs_ext_discount_amt > ( SELECT 1.3 * avg(cs_ext_discount_amt) FROM catalog_sales , date_dim WHERE cs_item_sk = i_item_sk AND d_date BETWEEN '2001-03-04' AND ( cast('2001-03-04' AS date) + INTERVAL '90' day) AND d_date_sk = cs_sold_date_sk ) LIMIT 100; WITH "catalog_sales_2" AS ( SELECT "catalog_sales"."cs_sold_date_sk" AS "cs_sold_date_sk", "catalog_sales"."cs_item_sk" AS "cs_item_sk", "catalog_sales"."cs_ext_discount_amt" AS "cs_ext_discount_amt" FROM "catalog_sales" AS "catalog_sales" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_date" >= '2001-03-04' AND CAST("date_dim"."d_date" AS DATE) <= CAST('2001-06-02' AS DATE) ), "_u_0" AS ( SELECT 1.3 * AVG("catalog_sales"."cs_ext_discount_amt") AS "_col_0", "catalog_sales"."cs_item_sk" AS "_u_1" FROM "catalog_sales_2" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" GROUP BY "catalog_sales"."cs_item_sk" ) SELECT SUM("catalog_sales"."cs_ext_discount_amt") AS "excess discount amount" FROM "catalog_sales_2" AS "catalog_sales" JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" AND "item"."i_manufact_id" = 610 JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "item"."i_item_sk" WHERE "_u_0"."_col_0" < "catalog_sales"."cs_ext_discount_amt" LIMIT 100; -------------------------------------- -- TPC-DS 33 -------------------------------------- # execute: true WITH ss AS (SELECT i_manufact_id, Sum(ss_ext_sales_price) total_sales FROM store_sales, date_dim, customer_address, item WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category IN ( 'Books' )) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 3 AND ss_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_manufact_id), cs AS (SELECT i_manufact_id, Sum(cs_ext_sales_price) total_sales FROM catalog_sales, date_dim, customer_address, item WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category IN ( 'Books' )) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 3 AND cs_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_manufact_id), ws AS (SELECT i_manufact_id, Sum(ws_ext_sales_price) total_sales FROM web_sales, date_dim, customer_address, item WHERE i_manufact_id IN (SELECT i_manufact_id FROM item WHERE i_category IN ( 'Books' )) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 3 AND ws_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -5 GROUP BY i_manufact_id) SELECT i_manufact_id, Sum(total_sales) total_sales FROM (SELECT * FROM ss UNION ALL SELECT * FROM cs UNION ALL SELECT * FROM ws) tmp1 GROUP BY i_manufact_id ORDER BY total_sales LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 3 AND "date_dim"."d_year" = 1999 ), "customer_address_2" AS ( SELECT "customer_address"."ca_address_sk" AS "ca_address_sk", "customer_address"."ca_gmt_offset" AS "ca_gmt_offset" FROM "customer_address" AS "customer_address" WHERE "customer_address"."ca_gmt_offset" = -5 ), "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_manufact_id" AS "i_manufact_id" FROM "item" AS "item" ), "_u_0" AS ( SELECT "item"."i_manufact_id" AS "i_manufact_id" FROM "item" AS "item" WHERE "item"."i_category" IN ('Books') GROUP BY "item"."i_manufact_id" ), "ss" AS ( SELECT "item"."i_manufact_id" AS "i_manufact_id", SUM("store_sales"."ss_ext_sales_price") AS "total_sales" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."i_manufact_id" = "item"."i_manufact_id" WHERE NOT "_u_0"."i_manufact_id" IS NULL GROUP BY "item"."i_manufact_id" ), "cs" AS ( SELECT "item"."i_manufact_id" AS "i_manufact_id", SUM("catalog_sales"."cs_ext_sales_price") AS "total_sales" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "catalog_sales"."cs_bill_addr_sk" = "customer_address"."ca_address_sk" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" LEFT JOIN "_u_0" AS "_u_1" ON "_u_1"."i_manufact_id" = "item"."i_manufact_id" WHERE NOT "_u_1"."i_manufact_id" IS NULL GROUP BY "item"."i_manufact_id" ), "ws" AS ( SELECT "item"."i_manufact_id" AS "i_manufact_id", SUM("web_sales"."ws_ext_sales_price") AS "total_sales" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "web_sales"."ws_bill_addr_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" LEFT JOIN "_u_0" AS "_u_2" ON "_u_2"."i_manufact_id" = "item"."i_manufact_id" WHERE NOT "_u_2"."i_manufact_id" IS NULL GROUP BY "item"."i_manufact_id" ), "tmp1" AS ( SELECT "ss"."i_manufact_id" AS "i_manufact_id", "ss"."total_sales" AS "total_sales" FROM "ss" AS "ss" UNION ALL SELECT "cs"."i_manufact_id" AS "i_manufact_id", "cs"."total_sales" AS "total_sales" FROM "cs" AS "cs" UNION ALL SELECT "ws"."i_manufact_id" AS "i_manufact_id", "ws"."total_sales" AS "total_sales" FROM "ws" AS "ws" ) SELECT "tmp1"."i_manufact_id" AS "i_manufact_id", SUM("tmp1"."total_sales") AS "total_sales" FROM "tmp1" AS "tmp1" GROUP BY "tmp1"."i_manufact_id" ORDER BY "total_sales" LIMIT 100; -------------------------------------- -- TPC-DS 34 -------------------------------------- # execute: true SELECT c_last_name, c_first_name, c_salutation, c_preferred_cust_flag, ss_ticket_number, cnt FROM (SELECT ss_ticket_number, ss_customer_sk, Count(*) cnt FROM store_sales, date_dim, store, household_demographics WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND ( date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28 ) AND ( household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential = 'unknown' ) AND household_demographics.hd_vehicle_count > 0 AND ( CASE WHEN household_demographics.hd_vehicle_count > 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END ) > 1.2 AND date_dim.d_year IN ( 1999, 1999 + 1, 1999 + 2 ) AND store.s_county IN ( 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' , 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) GROUP BY ss_ticket_number, ss_customer_sk) dn, customer WHERE ss_customer_sk = c_customer_sk AND cnt BETWEEN 15 AND 20 ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC; WITH "dn" AS ( SELECT "store_sales"."ss_ticket_number" AS "ss_ticket_number", "store_sales"."ss_customer_sk" AS "ss_customer_sk", COUNT(*) AS "cnt" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" IN (1999, 2000, 2001) AND ( ( "date_dim"."d_dom" <= 28 AND "date_dim"."d_dom" >= 25 ) OR ( "date_dim"."d_dom" <= 3 AND "date_dim"."d_dom" >= 1 ) ) JOIN "household_demographics" AS "household_demographics" ON ( "household_demographics"."hd_buy_potential" = '>10000' OR "household_demographics"."hd_buy_potential" = 'unknown' ) AND "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND "household_demographics"."hd_vehicle_count" > 0 AND CASE WHEN "household_demographics"."hd_vehicle_count" > 0 THEN "household_demographics"."hd_dep_count" / "household_demographics"."hd_vehicle_count" ELSE NULL END > 1.2 JOIN "store" AS "store" ON "store"."s_county" IN ( 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "store_sales"."ss_ticket_number", "store_sales"."ss_customer_sk" ) SELECT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "customer"."c_salutation" AS "c_salutation", "customer"."c_preferred_cust_flag" AS "c_preferred_cust_flag", "dn"."ss_ticket_number" AS "ss_ticket_number", "dn"."cnt" AS "cnt" FROM "dn" AS "dn" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "dn"."ss_customer_sk" WHERE "dn"."cnt" <= 20 AND "dn"."cnt" >= 15 ORDER BY "c_last_name", "c_first_name", "c_salutation", "c_preferred_cust_flag" DESC; -------------------------------------- -- TPC-DS 35 -------------------------------------- # execute: true SELECT ca_state, cd_gender, cd_marital_status, cd_dep_count, Count(*) cnt1, Stddev_samp(cd_dep_count) AS "_col_5", Avg(cd_dep_count) AS "_col_6", Max(cd_dep_count) AS "_col_7", cd_dep_employed_count, Count(*) cnt2, Stddev_samp(cd_dep_employed_count) AS "_col_10", Avg(cd_dep_employed_count) AS "_col_11", Max(cd_dep_employed_count) AS "_col_12", cd_dep_college_count, Count(*) cnt3, Stddev_samp(cd_dep_college_count) AS "_col_15", Avg(cd_dep_college_count) AS "_col_16", Max(cd_dep_college_count) AS "_col_17" FROM customer c, customer_address ca, customer_demographics WHERE c.c_current_addr_sk = ca.ca_address_sk AND cd_demo_sk = c.c_current_cdemo_sk AND EXISTS (SELECT * FROM store_sales, date_dim WHERE c.c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2001 AND d_qoy < 4) AND ( EXISTS (SELECT * FROM web_sales, date_dim WHERE c.c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year = 2001 AND d_qoy < 4) OR EXISTS (SELECT * FROM catalog_sales, date_dim WHERE c.c_customer_sk = cs_ship_customer_sk AND cs_sold_date_sk = d_date_sk AND d_year = 2001 AND d_qoy < 4) ) GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, cd_dep_employed_count, cd_dep_college_count ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, cd_dep_employed_count, cd_dep_college_count LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date_id" AS "d_date_id", "date_dim"."d_date" AS "d_date", "date_dim"."d_month_seq" AS "d_month_seq", "date_dim"."d_week_seq" AS "d_week_seq", "date_dim"."d_quarter_seq" AS "d_quarter_seq", "date_dim"."d_year" AS "d_year", "date_dim"."d_dow" AS "d_dow", "date_dim"."d_moy" AS "d_moy", "date_dim"."d_dom" AS "d_dom", "date_dim"."d_qoy" AS "d_qoy", "date_dim"."d_fy_year" AS "d_fy_year", "date_dim"."d_fy_quarter_seq" AS "d_fy_quarter_seq", "date_dim"."d_fy_week_seq" AS "d_fy_week_seq", "date_dim"."d_day_name" AS "d_day_name", "date_dim"."d_quarter_name" AS "d_quarter_name", "date_dim"."d_holiday" AS "d_holiday", "date_dim"."d_weekend" AS "d_weekend", "date_dim"."d_following_holiday" AS "d_following_holiday", "date_dim"."d_first_dom" AS "d_first_dom", "date_dim"."d_last_dom" AS "d_last_dom", "date_dim"."d_same_day_ly" AS "d_same_day_ly", "date_dim"."d_same_day_lq" AS "d_same_day_lq", "date_dim"."d_current_day" AS "d_current_day", "date_dim"."d_current_week" AS "d_current_week", "date_dim"."d_current_month" AS "d_current_month", "date_dim"."d_current_quarter" AS "d_current_quarter", "date_dim"."d_current_year" AS "d_current_year" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_qoy" < 4 AND "date_dim"."d_year" = 2001 ), "_u_0" AS ( SELECT "store_sales"."ss_customer_sk" AS "_u_1" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "store_sales"."ss_customer_sk" ), "_u_2" AS ( SELECT "web_sales"."ws_bill_customer_sk" AS "_u_3" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "web_sales"."ws_bill_customer_sk" ), "_u_4" AS ( SELECT "catalog_sales"."cs_ship_customer_sk" AS "_u_5" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" GROUP BY "catalog_sales"."cs_ship_customer_sk" ) SELECT "ca"."ca_state" AS "ca_state", "customer_demographics"."cd_gender" AS "cd_gender", "customer_demographics"."cd_marital_status" AS "cd_marital_status", "customer_demographics"."cd_dep_count" AS "cd_dep_count", COUNT(*) AS "cnt1", STDDEV_SAMP("customer_demographics"."cd_dep_count") AS "_col_5", AVG("customer_demographics"."cd_dep_count") AS "_col_6", MAX("customer_demographics"."cd_dep_count") AS "_col_7", "customer_demographics"."cd_dep_employed_count" AS "cd_dep_employed_count", COUNT(*) AS "cnt2", STDDEV_SAMP("customer_demographics"."cd_dep_employed_count") AS "_col_10", AVG("customer_demographics"."cd_dep_employed_count") AS "_col_11", MAX("customer_demographics"."cd_dep_employed_count") AS "_col_12", "customer_demographics"."cd_dep_college_count" AS "cd_dep_college_count", COUNT(*) AS "cnt3", STDDEV_SAMP("customer_demographics"."cd_dep_college_count") AS "_col_15", AVG("customer_demographics"."cd_dep_college_count") AS "_col_16", MAX("customer_demographics"."cd_dep_college_count") AS "_col_17" FROM "customer" AS "c" JOIN "customer_address" AS "ca" ON "c"."c_current_addr_sk" = "ca"."ca_address_sk" JOIN "customer_demographics" AS "customer_demographics" ON "c"."c_current_cdemo_sk" = "customer_demographics"."cd_demo_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "c"."c_customer_sk" LEFT JOIN "_u_2" AS "_u_2" ON "_u_2"."_u_3" = "c"."c_customer_sk" LEFT JOIN "_u_4" AS "_u_4" ON "_u_4"."_u_5" = "c"."c_customer_sk" WHERE NOT "_u_0"."_u_1" IS NULL AND ( NOT "_u_2"."_u_3" IS NULL OR NOT "_u_4"."_u_5" IS NULL ) GROUP BY "ca"."ca_state", "customer_demographics"."cd_gender", "customer_demographics"."cd_marital_status", "customer_demographics"."cd_dep_count", "customer_demographics"."cd_dep_employed_count", "customer_demographics"."cd_dep_college_count" ORDER BY "ca_state", "cd_gender", "cd_marital_status", "cd_dep_count", "cd_dep_employed_count", "cd_dep_college_count" LIMIT 100; -------------------------------------- -- TPC-DS 36 -------------------------------------- SELECT Sum(ss_net_profit) / Sum(ss_ext_sales_price) AS gross_margin, i_category, i_class, Grouping(i_category) + Grouping(i_class) AS lochierarchy, Rank() OVER ( partition BY Grouping(i_category)+Grouping(i_class), CASE WHEN Grouping( i_class) = 0 THEN i_category END ORDER BY Sum(ss_net_profit)/Sum(ss_ext_sales_price) ASC) AS rank_within_parent FROM store_sales, date_dim d1, item, store WHERE d1.d_year = 2000 AND d1.d_date_sk = ss_sold_date_sk AND i_item_sk = ss_item_sk AND s_store_sk = ss_store_sk AND s_state IN ( 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN' ) GROUP BY rollup( i_category, i_class ) ORDER BY lochierarchy DESC, CASE WHEN lochierarchy = 0 THEN i_category END, rank_within_parent LIMIT 100; SELECT SUM("store_sales"."ss_net_profit") / SUM("store_sales"."ss_ext_sales_price") AS "gross_margin", "item"."i_category" AS "i_category", "item"."i_class" AS "i_class", GROUPING("item"."i_category") + GROUPING("item"."i_class") AS "lochierarchy", RANK() OVER ( PARTITION BY GROUPING("item"."i_category") + GROUPING("item"."i_class"), CASE WHEN GROUPING("item"."i_class") = 0 THEN "item"."i_category" END ORDER BY SUM("store_sales"."ss_net_profit") / SUM("store_sales"."ss_ext_sales_price") ) AS "rank_within_parent" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "d1"."d_year" = 2000 JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store" AS "store" ON "store"."s_state" IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY ROLLUP ( "item"."i_category", "item"."i_class" ) ORDER BY "lochierarchy" DESC, CASE WHEN "lochierarchy" = 0 THEN "i_category" END, "rank_within_parent" LIMIT 100; -------------------------------------- -- TPC-DS 37 -------------------------------------- SELECT i_item_id , i_item_desc , i_current_price FROM item, inventory, date_dim, catalog_sales WHERE i_current_price BETWEEN 20 AND 20 + 30 AND inv_item_sk = i_item_sk AND d_date_sk=inv_date_sk AND d_date BETWEEN Cast('1999-03-06' AS DATE) AND ( Cast('1999-03-06' AS DATE) + INTERVAL '60' day) AND i_manufact_id IN (843,815,850,840) AND inv_quantity_on_hand BETWEEN 100 AND 500 AND cs_item_sk = i_item_sk GROUP BY i_item_id, i_item_desc, i_current_price ORDER BY i_item_id LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "item"."i_current_price" AS "i_current_price" FROM "item" AS "item" JOIN "catalog_sales" AS "catalog_sales" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "inventory" AS "inventory" ON "inventory"."inv_item_sk" = "item"."i_item_sk" AND "inventory"."inv_quantity_on_hand" <= 500 AND "inventory"."inv_quantity_on_hand" >= 100 JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" AND CAST("date_dim"."d_date" AS DATE) <= CAST('1999-05-05' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('1999-03-06' AS DATE) WHERE "item"."i_current_price" <= 50 AND "item"."i_current_price" >= 20 AND "item"."i_manufact_id" IN (843, 815, 850, 840) GROUP BY "item"."i_item_id", "item"."i_item_desc", "item"."i_current_price" ORDER BY "i_item_id" LIMIT 100; -------------------------------------- -- TPC-DS 38 -------------------------------------- # execute: true SELECT Count(*) AS "_col_0" FROM (SELECT DISTINCT c_last_name, c_first_name, d_date FROM store_sales, date_dim, customer WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1188 AND 1188 + 11 INTERSECT SELECT DISTINCT c_last_name, c_first_name, d_date FROM catalog_sales, date_dim, customer WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1188 AND 1188 + 11 INTERSECT SELECT DISTINCT c_last_name, c_first_name, d_date FROM web_sales, date_dim, customer WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk AND web_sales.ws_bill_customer_sk = customer.c_customer_sk AND d_month_seq BETWEEN 1188 AND 1188 + 11) hot_cust LIMIT 100; WITH "customer_2" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk", "customer"."c_first_name" AS "c_first_name", "customer"."c_last_name" AS "c_last_name" FROM "customer" AS "customer" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date", "date_dim"."d_month_seq" AS "d_month_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_month_seq" <= 1199 AND "date_dim"."d_month_seq" >= 1188 ), "hot_cust" AS ( SELECT DISTINCT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "date_dim"."d_date" AS "d_date" FROM "store_sales" AS "store_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" INTERSECT SELECT DISTINCT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "date_dim"."d_date" AS "d_date" FROM "catalog_sales" AS "catalog_sales" JOIN "customer_2" AS "customer" ON "catalog_sales"."cs_bill_customer_sk" = "customer"."c_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" INTERSECT SELECT DISTINCT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "date_dim"."d_date" AS "d_date" FROM "web_sales" AS "web_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "web_sales"."ws_bill_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" ) SELECT COUNT(*) AS "_col_0" FROM "hot_cust" AS "hot_cust" LIMIT 100; -------------------------------------- -- TPC-DS 39 -------------------------------------- WITH inv AS (SELECT w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy, stdev, mean, CASE mean WHEN 0 THEN NULL ELSE stdev / mean END cov FROM (SELECT w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy, Stddev_samp(inv_quantity_on_hand) stdev, Avg(inv_quantity_on_hand) mean FROM inventory, item, warehouse, date_dim WHERE inv_item_sk = i_item_sk AND inv_warehouse_sk = w_warehouse_sk AND inv_date_sk = d_date_sk AND d_year = 2002 GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo WHERE CASE mean WHEN 0 THEN 0 ELSE stdev / mean END > 1) SELECT inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov, inv2.w_warehouse_sk, inv2.i_item_sk, inv2.d_moy, inv2.mean, inv2.cov FROM inv inv1, inv inv2 WHERE inv1.i_item_sk = inv2.i_item_sk AND inv1.w_warehouse_sk = inv2.w_warehouse_sk AND inv1.d_moy = 1 AND inv2.d_moy = 1 + 1 ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov, inv2.d_moy, inv2.mean, inv2.cov; WITH "foo" AS ( SELECT "warehouse"."w_warehouse_sk" AS "w_warehouse_sk", "item"."i_item_sk" AS "i_item_sk", "date_dim"."d_moy" AS "d_moy", STDDEV_SAMP("inventory"."inv_quantity_on_hand") AS "stdev", AVG("inventory"."inv_quantity_on_hand") AS "mean" FROM "inventory" AS "inventory" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" AND "date_dim"."d_year" = 2002 JOIN "item" AS "item" ON "inventory"."inv_item_sk" = "item"."i_item_sk" JOIN "warehouse" AS "warehouse" ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk" GROUP BY "warehouse"."w_warehouse_name", "warehouse"."w_warehouse_sk", "item"."i_item_sk", "date_dim"."d_moy" ), "inv" AS ( SELECT "foo"."w_warehouse_sk" AS "w_warehouse_sk", "foo"."i_item_sk" AS "i_item_sk", "foo"."d_moy" AS "d_moy", "foo"."mean" AS "mean", CASE WHEN "foo"."mean" = 0 THEN NULL ELSE "foo"."stdev" / "foo"."mean" END AS "cov" FROM "foo" AS "foo" WHERE CASE WHEN "foo"."mean" = 0 THEN 0 ELSE "foo"."stdev" / "foo"."mean" END > 1 ) SELECT "inv1"."w_warehouse_sk" AS "w_warehouse_sk", "inv1"."i_item_sk" AS "i_item_sk", "inv1"."d_moy" AS "d_moy", "inv1"."mean" AS "mean", "inv1"."cov" AS "cov", "inv2"."w_warehouse_sk" AS "w_warehouse_sk", "inv2"."i_item_sk" AS "i_item_sk", "inv2"."d_moy" AS "d_moy", "inv2"."mean" AS "mean", "inv2"."cov" AS "cov" FROM "inv" AS "inv1" JOIN "inv" AS "inv2" ON "inv1"."i_item_sk" = "inv2"."i_item_sk" AND "inv1"."w_warehouse_sk" = "inv2"."w_warehouse_sk" AND "inv2"."d_moy" = 2 WHERE "inv1"."d_moy" = 1 ORDER BY "inv1"."w_warehouse_sk", "inv1"."i_item_sk", "inv1"."d_moy", "inv1"."mean", "inv1"."cov", "inv2"."d_moy", "inv2"."mean", "inv2"."cov"; -------------------------------------- -- TPC-DS 40 -------------------------------------- SELECT w_state , i_item_id , Sum( CASE WHEN ( Cast(d_date AS DATE) < Cast ('2002-06-01' AS DATE)) THEN cs_sales_price - COALESCE(cr_refunded_cash,0) ELSE 0 END) AS sales_before , Sum( CASE WHEN ( Cast(d_date AS DATE) >= Cast ('2002-06-01' AS DATE)) THEN cs_sales_price - COALESCE(cr_refunded_cash,0) ELSE 0 END) AS sales_after FROM catalog_sales LEFT OUTER JOIN catalog_returns ON ( cs_order_number = cr_order_number AND cs_item_sk = cr_item_sk) , warehouse , item , date_dim WHERE i_current_price BETWEEN 0.99 AND 1.49 AND i_item_sk = cs_item_sk AND cs_warehouse_sk = w_warehouse_sk AND cs_sold_date_sk = d_date_sk AND d_date BETWEEN (Cast ('2002-06-01' AS DATE) - INTERVAL '30' day) AND ( cast ('2002-06-01' AS date) + INTERVAL '30' day) GROUP BY w_state, i_item_id ORDER BY w_state, i_item_id LIMIT 100; SELECT "warehouse"."w_state" AS "w_state", "item"."i_item_id" AS "i_item_id", SUM( CASE WHEN CAST("date_dim"."d_date" AS DATE) < CAST('2002-06-01' AS DATE) THEN "catalog_sales"."cs_sales_price" - COALESCE("catalog_returns"."cr_refunded_cash", 0) ELSE 0 END ) AS "sales_before", SUM( CASE WHEN CAST("date_dim"."d_date" AS DATE) >= CAST('2002-06-01' AS DATE) THEN "catalog_sales"."cs_sales_price" - COALESCE("catalog_returns"."cr_refunded_cash", 0) ELSE 0 END ) AS "sales_after" FROM "catalog_sales" AS "catalog_sales" LEFT JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_item_sk" = "catalog_sales"."cs_item_sk" AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number" JOIN "warehouse" AS "warehouse" ON "catalog_sales"."cs_warehouse_sk" = "warehouse"."w_warehouse_sk" JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" AND "item"."i_current_price" <= 1.49 AND "item"."i_current_price" >= 0.99 JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" AND CAST("date_dim"."d_date" AS DATE) <= CAST('2002-07-01' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2002-05-02' AS DATE) GROUP BY "warehouse"."w_state", "item"."i_item_id" ORDER BY "w_state", "i_item_id" LIMIT 100; -------------------------------------- -- TPC-DS 41 -------------------------------------- SELECT Distinct(i_product_name) FROM item i1 WHERE i_manufact_id BETWEEN 765 AND 765 + 40 AND (SELECT Count(*) AS item_cnt FROM item WHERE ( i_manufact = i1.i_manufact AND ( ( i_category = 'Women' AND ( i_color = 'dim' OR i_color = 'green' ) AND ( i_units = 'Gross' OR i_units = 'Dozen' ) AND ( i_size = 'economy' OR i_size = 'petite' ) ) OR ( i_category = 'Women' AND ( i_color = 'navajo' OR i_color = 'aquamarine' ) AND ( i_units = 'Case' OR i_units = 'Unknown' ) AND ( i_size = 'large' OR i_size = 'N/A' ) ) OR ( i_category = 'Men' AND ( i_color = 'indian' OR i_color = 'dark' ) AND ( i_units = 'Oz' OR i_units = 'Lb' ) AND ( i_size = 'extra large' OR i_size = 'small' ) ) OR ( i_category = 'Men' AND ( i_color = 'peach' OR i_color = 'purple' ) AND ( i_units = 'Tbl' OR i_units = 'Bunch' ) AND ( i_size = 'economy' OR i_size = 'petite' ) ) ) ) OR ( i_manufact = i1.i_manufact AND ( ( i_category = 'Women' AND ( i_color = 'orchid' OR i_color = 'peru' ) AND ( i_units = 'Carton' OR i_units = 'Cup' ) AND ( i_size = 'economy' OR i_size = 'petite' ) ) OR ( i_category = 'Women' AND ( i_color = 'violet' OR i_color = 'papaya' ) AND ( i_units = 'Ounce' OR i_units = 'Box' ) AND ( i_size = 'large' OR i_size = 'N/A' ) ) OR ( i_category = 'Men' AND ( i_color = 'drab' OR i_color = 'grey' ) AND ( i_units = 'Each' OR i_units = 'N/A' ) AND ( i_size = 'extra large' OR i_size = 'small' ) ) OR ( i_category = 'Men' AND ( i_color = 'chocolate' OR i_color = 'antique' ) AND ( i_units = 'Dram' OR i_units = 'Gram' ) AND ( i_size = 'economy' OR i_size = 'petite' ) ) ) )) > 0 ORDER BY i_product_name LIMIT 100; SELECT DISTINCT "i1"."i_product_name" AS "i_product_name" FROM "item" AS "i1" WHERE "i1"."i_manufact_id" <= 805 AND "i1"."i_manufact_id" >= 765 AND ( SELECT COUNT(*) AS "item_cnt" FROM "item" AS "item" WHERE ( "i1"."i_manufact" = "item"."i_manufact" AND ( ( "item"."i_category" = 'Men' AND ( "item"."i_color" = 'antique' OR "item"."i_color" = 'chocolate' ) AND ( "item"."i_size" = 'economy' OR "item"."i_size" = 'petite' ) AND ( "item"."i_units" = 'Dram' OR "item"."i_units" = 'Gram' ) ) OR ( "item"."i_category" = 'Men' AND ( "item"."i_color" = 'drab' OR "item"."i_color" = 'grey' ) AND ( "item"."i_size" = 'extra large' OR "item"."i_size" = 'small' ) AND ( "item"."i_units" = 'Each' OR "item"."i_units" = 'N/A' ) ) OR ( "item"."i_category" = 'Women' AND ( "item"."i_color" = 'orchid' OR "item"."i_color" = 'peru' ) AND ( "item"."i_size" = 'economy' OR "item"."i_size" = 'petite' ) AND ( "item"."i_units" = 'Carton' OR "item"."i_units" = 'Cup' ) ) OR ( "item"."i_category" = 'Women' AND ( "item"."i_color" = 'papaya' OR "item"."i_color" = 'violet' ) AND ( "item"."i_size" = 'N/A' OR "item"."i_size" = 'large' ) AND ( "item"."i_units" = 'Box' OR "item"."i_units" = 'Ounce' ) ) ) ) OR ( "i1"."i_manufact" = "item"."i_manufact" AND ( ( "item"."i_category" = 'Men' AND ( "item"."i_color" = 'dark' OR "item"."i_color" = 'indian' ) AND ( "item"."i_size" = 'extra large' OR "item"."i_size" = 'small' ) AND ( "item"."i_units" = 'Lb' OR "item"."i_units" = 'Oz' ) ) OR ( "item"."i_category" = 'Men' AND ( "item"."i_color" = 'peach' OR "item"."i_color" = 'purple' ) AND ( "item"."i_size" = 'economy' OR "item"."i_size" = 'petite' ) AND ( "item"."i_units" = 'Bunch' OR "item"."i_units" = 'Tbl' ) ) OR ( "item"."i_category" = 'Women' AND ( "item"."i_color" = 'aquamarine' OR "item"."i_color" = 'navajo' ) AND ( "item"."i_size" = 'N/A' OR "item"."i_size" = 'large' ) AND ( "item"."i_units" = 'Case' OR "item"."i_units" = 'Unknown' ) ) OR ( "item"."i_category" = 'Women' AND ( "item"."i_color" = 'dim' OR "item"."i_color" = 'green' ) AND ( "item"."i_size" = 'economy' OR "item"."i_size" = 'petite' ) AND ( "item"."i_units" = 'Dozen' OR "item"."i_units" = 'Gross' ) ) ) ) ) > 0 ORDER BY "i1"."i_product_name" LIMIT 100; -------------------------------------- -- TPC-DS 42 -------------------------------------- # execute: true SELECT dt.d_year, item.i_category_id, item.i_category, Sum(ss_ext_sales_price) AS "_col_3" FROM date_dim dt, store_sales, item WHERE dt.d_date_sk = store_sales.ss_sold_date_sk AND store_sales.ss_item_sk = item.i_item_sk AND item.i_manager_id = 1 AND dt.d_moy = 12 AND dt.d_year = 2000 GROUP BY dt.d_year, item.i_category_id, item.i_category ORDER BY Sum(ss_ext_sales_price) DESC, dt.d_year, item.i_category_id, item.i_category LIMIT 100; SELECT "dt"."d_year" AS "d_year", "item"."i_category_id" AS "i_category_id", "item"."i_category" AS "i_category", SUM("store_sales"."ss_ext_sales_price") AS "_col_3" FROM "date_dim" AS "dt" JOIN "store_sales" AS "store_sales" ON "dt"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" AND "item"."i_manager_id" = 1 WHERE "dt"."d_moy" = 12 AND "dt"."d_year" = 2000 GROUP BY "dt"."d_year", "item"."i_category_id", "item"."i_category" ORDER BY "_col_3" DESC, "d_year", "i_category_id", "i_category" LIMIT 100; -------------------------------------- -- TPC-DS 43 -------------------------------------- # execute: true SELECT s_store_name, s_store_id, Sum(CASE WHEN ( d_day_name = 'Sunday' ) THEN ss_sales_price ELSE NULL END) sun_sales, Sum(CASE WHEN ( d_day_name = 'Monday' ) THEN ss_sales_price ELSE NULL END) mon_sales, Sum(CASE WHEN ( d_day_name = 'Tuesday' ) THEN ss_sales_price ELSE NULL END) tue_sales, Sum(CASE WHEN ( d_day_name = 'Wednesday' ) THEN ss_sales_price ELSE NULL END) wed_sales, Sum(CASE WHEN ( d_day_name = 'Thursday' ) THEN ss_sales_price ELSE NULL END) thu_sales, Sum(CASE WHEN ( d_day_name = 'Friday' ) THEN ss_sales_price ELSE NULL END) fri_sales, Sum(CASE WHEN ( d_day_name = 'Saturday' ) THEN ss_sales_price ELSE NULL END) sat_sales FROM date_dim, store_sales, store WHERE d_date_sk = ss_sold_date_sk AND s_store_sk = ss_store_sk AND s_gmt_offset = -5 AND d_year = 2002 GROUP BY s_store_name, s_store_id ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, thu_sales, fri_sales, sat_sales LIMIT 100; SELECT "store"."s_store_name" AS "s_store_name", "store"."s_store_id" AS "s_store_id", SUM( CASE WHEN "date_dim"."d_day_name" = 'Sunday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "sun_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Monday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "mon_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Tuesday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "tue_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Wednesday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "wed_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Thursday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "thu_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Friday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "fri_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Saturday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "sat_sales" FROM "date_dim" AS "date_dim" JOIN "store_sales" AS "store_sales" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "store" AS "store" ON "store"."s_gmt_offset" = -5 AND "store"."s_store_sk" = "store_sales"."ss_store_sk" WHERE "date_dim"."d_year" = 2002 GROUP BY "store"."s_store_name", "store"."s_store_id" ORDER BY "s_store_name", "s_store_id", "sun_sales", "mon_sales", "tue_sales", "wed_sales", "thu_sales", "fri_sales", "sat_sales" LIMIT 100; -------------------------------------- -- TPC-DS 44 -------------------------------------- SELECT asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing FROM (SELECT * FROM (SELECT item_sk, Rank() OVER ( ORDER BY rank_col ASC) rnk FROM (SELECT ss_item_sk item_sk, Avg(ss_net_profit) rank_col FROM store_sales ss1 WHERE ss_store_sk = 4 GROUP BY ss_item_sk HAVING Avg(ss_net_profit) > 0.9 * (SELECT Avg(ss_net_profit) rank_col FROM store_sales WHERE ss_store_sk = 4 AND ss_cdemo_sk IS NULL GROUP BY ss_store_sk))V1) V11 WHERE rnk < 11) asceding, (SELECT * FROM (SELECT item_sk, Rank() OVER ( ORDER BY rank_col DESC) rnk FROM (SELECT ss_item_sk item_sk, Avg(ss_net_profit) rank_col FROM store_sales ss1 WHERE ss_store_sk = 4 GROUP BY ss_item_sk HAVING Avg(ss_net_profit) > 0.9 * (SELECT Avg(ss_net_profit) rank_col FROM store_sales WHERE ss_store_sk = 4 AND ss_cdemo_sk IS NULL GROUP BY ss_store_sk))V2) V21 WHERE rnk < 11) descending, item i1, item i2 WHERE asceding.rnk = descending.rnk AND i1.i_item_sk = asceding.item_sk AND i2.i_item_sk = descending.item_sk ORDER BY asceding.rnk LIMIT 100; WITH "_u_0" AS ( SELECT AVG("store_sales"."ss_net_profit") AS "rank_col" FROM "store_sales" AS "store_sales" WHERE "store_sales"."ss_cdemo_sk" IS NULL AND "store_sales"."ss_store_sk" = 4 GROUP BY "store_sales"."ss_store_sk" ), "v1" AS ( SELECT "ss1"."ss_item_sk" AS "item_sk", AVG("ss1"."ss_net_profit") AS "rank_col" FROM "store_sales" AS "ss1" CROSS JOIN "_u_0" AS "_u_0" WHERE "ss1"."ss_store_sk" = 4 GROUP BY "ss1"."ss_item_sk" HAVING 0.9 * MAX("_u_0"."rank_col") < AVG("ss1"."ss_net_profit") ), "v11" AS ( SELECT "v1"."item_sk" AS "item_sk", RANK() OVER (ORDER BY "v1"."rank_col") AS "rnk" FROM "v1" AS "v1" ), "v2" AS ( SELECT "ss1"."ss_item_sk" AS "item_sk", AVG("ss1"."ss_net_profit") AS "rank_col" FROM "store_sales" AS "ss1" CROSS JOIN "_u_0" AS "_u_1" WHERE "ss1"."ss_store_sk" = 4 GROUP BY "ss1"."ss_item_sk" HAVING 0.9 * MAX("_u_1"."rank_col") < AVG("ss1"."ss_net_profit") ), "v21" AS ( SELECT "v2"."item_sk" AS "item_sk", RANK() OVER (ORDER BY "v2"."rank_col" DESC) AS "rnk" FROM "v2" AS "v2" ) SELECT "v11"."rnk" AS "rnk", "i1"."i_product_name" AS "best_performing", "i2"."i_product_name" AS "worst_performing" FROM "v11" AS "v11" JOIN "v21" AS "v21" ON "v11"."rnk" = "v21"."rnk" AND "v21"."rnk" < 11 JOIN "item" AS "i1" ON "i1"."i_item_sk" = "v11"."item_sk" JOIN "item" AS "i2" ON "i2"."i_item_sk" = "v21"."item_sk" WHERE "v11"."rnk" < 11 ORDER BY "v11"."rnk" LIMIT 100; -------------------------------------- -- TPC-DS 45 -------------------------------------- # execute: true SELECT ca_zip, ca_state, Sum(ws_sales_price) AS "_col_2" FROM web_sales, customer, customer_address, date_dim, item WHERE ws_bill_customer_sk = c_customer_sk AND c_current_addr_sk = ca_address_sk AND ws_item_sk = i_item_sk AND ( SUBSTRING(ca_zip, 1, 5) IN ( '85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792' ) OR i_item_id IN (SELECT i_item_id FROM item WHERE i_item_sk IN ( 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 )) ) AND ws_sold_date_sk = d_date_sk AND d_qoy = 1 AND d_year = 2000 GROUP BY ca_zip, ca_state ORDER BY ca_zip, ca_state LIMIT 100; WITH "_u_0" AS ( SELECT "item"."i_item_id" AS "i_item_id" FROM "item" AS "item" WHERE "item"."i_item_sk" IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) GROUP BY "item"."i_item_id" ) SELECT "customer_address"."ca_zip" AS "ca_zip", "customer_address"."ca_state" AS "ca_state", SUM("web_sales"."ws_sales_price") AS "_col_2" FROM "web_sales" AS "web_sales" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "web_sales"."ws_bill_customer_sk" JOIN "customer_address" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" AND "date_dim"."d_qoy" = 1 AND "date_dim"."d_year" = 2000 JOIN "item" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."i_item_id" = "item"."i_item_id" WHERE NOT "_u_0"."i_item_id" IS NULL OR SUBSTRING("customer_address"."ca_zip", 1, 5) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') GROUP BY "customer_address"."ca_zip", "customer_address"."ca_state" ORDER BY "ca_zip", "ca_state" LIMIT 100; -------------------------------------- -- TPC-DS 46 -------------------------------------- # execute: true SELECT c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number, amt, profit FROM (SELECT ss_ticket_number, ss_customer_sk, ca_city bought_city, Sum(ss_coupon_amt) amt, Sum(ss_net_profit) profit FROM store_sales, date_dim, store, household_demographics, customer_address WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND store_sales.ss_addr_sk = customer_address.ca_address_sk AND ( household_demographics.hd_dep_count = 6 OR household_demographics.hd_vehicle_count = 0 ) AND date_dim.d_dow IN ( 6, 0 ) AND date_dim.d_year IN ( 2000, 2000 + 1, 2000 + 2 ) AND store.s_city IN ( 'Midway', 'Fairview', 'Fairview', 'Fairview', 'Fairview' ) GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, customer_address current_addr WHERE ss_customer_sk = c_customer_sk AND customer.c_current_addr_sk = current_addr.ca_address_sk AND current_addr.ca_city <> bought_city ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number LIMIT 100; WITH "dn" AS ( SELECT "store_sales"."ss_ticket_number" AS "ss_ticket_number", "store_sales"."ss_customer_sk" AS "ss_customer_sk", "customer_address"."ca_city" AS "bought_city", SUM("store_sales"."ss_coupon_amt") AS "amt", SUM("store_sales"."ss_net_profit") AS "profit" FROM "store_sales" AS "store_sales" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_dow" IN (6, 0) AND "date_dim"."d_year" IN (2000, 2001, 2002) JOIN "household_demographics" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND ( "household_demographics"."hd_dep_count" = 6 OR "household_demographics"."hd_vehicle_count" = 0 ) JOIN "store" AS "store" ON "store"."s_city" IN ('Midway', 'Fairview', 'Fairview', 'Fairview', 'Fairview') AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "store_sales"."ss_ticket_number", "store_sales"."ss_customer_sk", "store_sales"."ss_addr_sk", "customer_address"."ca_city" ) SELECT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "current_addr"."ca_city" AS "ca_city", "dn"."bought_city" AS "bought_city", "dn"."ss_ticket_number" AS "ss_ticket_number", "dn"."amt" AS "amt", "dn"."profit" AS "profit" FROM "dn" AS "dn" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "dn"."ss_customer_sk" JOIN "customer_address" AS "current_addr" ON "current_addr"."ca_address_sk" = "customer"."c_current_addr_sk" AND "current_addr"."ca_city" <> "dn"."bought_city" ORDER BY "c_last_name", "c_first_name", "ca_city", "bought_city", "ss_ticket_number" LIMIT 100; -------------------------------------- -- TPC-DS 47 -------------------------------------- WITH v1 AS (SELECT i_category, i_brand, s_store_name, s_company_name, d_year, d_moy, Sum(ss_sales_price) sum_sales, Avg(Sum(ss_sales_price)) OVER ( partition BY i_category, i_brand, s_store_name, s_company_name, d_year) avg_monthly_sales, Rank() OVER ( partition BY i_category, i_brand, s_store_name, s_company_name ORDER BY d_year, d_moy) rn FROM item, store_sales, date_dim, store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND ( d_year = 1999 OR ( d_year = 1999 - 1 AND d_moy = 12 ) OR ( d_year = 1999 + 1 AND d_moy = 1 ) ) GROUP BY i_category, i_brand, s_store_name, s_company_name, d_year, d_moy), v2 AS (SELECT v1.i_category, v1.d_year, v1.d_moy, v1.avg_monthly_sales, v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum FROM v1, v1 v1_lag, v1 v1_lead WHERE v1.i_category = v1_lag.i_category AND v1.i_category = v1_lead.i_category AND v1.i_brand = v1_lag.i_brand AND v1.i_brand = v1_lead.i_brand AND v1.s_store_name = v1_lag.s_store_name AND v1.s_store_name = v1_lead.s_store_name AND v1.s_company_name = v1_lag.s_company_name AND v1.s_company_name = v1_lead.s_company_name AND v1.rn = v1_lag.rn + 1 AND v1.rn = v1_lead.rn - 1) SELECT * FROM v2 WHERE d_year = 1999 AND avg_monthly_sales > 0 AND CASE WHEN avg_monthly_sales > 0 THEN Abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1 ORDER BY sum_sales - avg_monthly_sales, 3 LIMIT 100; WITH "v1" AS ( SELECT "item"."i_category" AS "i_category", "item"."i_brand" AS "i_brand", "store"."s_store_name" AS "s_store_name", "store"."s_company_name" AS "s_company_name", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy", SUM("store_sales"."ss_sales_price") AS "sum_sales", AVG(SUM("store_sales"."ss_sales_price")) OVER ( PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name", "date_dim"."d_year" ) AS "avg_monthly_sales", RANK() OVER ( PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name" ORDER BY "date_dim"."d_year", "date_dim"."d_moy" ) AS "rn" FROM "item" AS "item" JOIN "store_sales" AS "store_sales" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND ( "date_dim"."d_moy" = 1 OR "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 1999 ) AND ( "date_dim"."d_moy" = 1 OR "date_dim"."d_year" = 1998 OR "date_dim"."d_year" = 1999 ) AND ( "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 ) AND ( "date_dim"."d_year" = 1998 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 ) JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name", "date_dim"."d_year", "date_dim"."d_moy" ) SELECT "v1"."i_category" AS "i_category", "v1"."d_year" AS "d_year", "v1"."d_moy" AS "d_moy", "v1"."avg_monthly_sales" AS "avg_monthly_sales", "v1"."sum_sales" AS "sum_sales", "v1_lag"."sum_sales" AS "psum", "v1_lead"."sum_sales" AS "nsum" FROM "v1" AS "v1" JOIN "v1" AS "v1_lag" ON "v1"."i_brand" = "v1_lag"."i_brand" AND "v1"."i_category" = "v1_lag"."i_category" AND "v1"."rn" = "v1_lag"."rn" + 1 AND "v1"."s_company_name" = "v1_lag"."s_company_name" AND "v1"."s_store_name" = "v1_lag"."s_store_name" JOIN "v1" AS "v1_lead" ON "v1"."i_brand" = "v1_lead"."i_brand" AND "v1"."i_category" = "v1_lead"."i_category" AND "v1"."rn" = "v1_lead"."rn" - 1 AND "v1"."s_company_name" = "v1_lead"."s_company_name" AND "v1"."s_store_name" = "v1_lead"."s_store_name" WHERE "v1"."avg_monthly_sales" > 0 AND "v1"."d_year" = 1999 AND CASE WHEN "v1"."avg_monthly_sales" > 0 THEN ABS("v1"."sum_sales" - "v1"."avg_monthly_sales") / "v1"."avg_monthly_sales" ELSE NULL END > 0.1 ORDER BY "v1"."sum_sales" - "v1"."avg_monthly_sales", "d_moy" LIMIT 100; -------------------------------------- -- TPC-DS 48 -------------------------------------- # execute: true SELECT Sum (ss_quantity) AS "_col_0" FROM store_sales, store, customer_demographics, customer_address, date_dim WHERE s_store_sk = ss_store_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1999 AND ( ( cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'W' AND cd_education_status = 'Secondary' AND ss_sales_price BETWEEN 100.00 AND 150.00 ) OR ( cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'M' AND cd_education_status = 'Advanced Degree' AND ss_sales_price BETWEEN 50.00 AND 100.00 ) OR ( cd_demo_sk = ss_cdemo_sk AND cd_marital_status = 'D' AND cd_education_status = '2 yr Degree' AND ss_sales_price BETWEEN 150.00 AND 200.00 ) ) AND ( ( ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ( 'TX', 'NE', 'MO' ) AND ss_net_profit BETWEEN 0 AND 2000 ) OR ( ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ( 'CO', 'TN', 'ND' ) AND ss_net_profit BETWEEN 150 AND 3000 ) OR ( ss_addr_sk = ca_address_sk AND ca_country = 'United States' AND ca_state IN ( 'OK', 'PA', 'CA' ) AND ss_net_profit BETWEEN 50 AND 25000 ) ); SELECT SUM("store_sales"."ss_quantity") AS "_col_0" FROM "store_sales" AS "store_sales" JOIN "customer_address" AS "customer_address" ON ( "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" AND "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('CO', 'TN', 'ND') AND "store_sales"."ss_net_profit" <= 3000 AND "store_sales"."ss_net_profit" >= 150 ) OR ( "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" AND "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('OK', 'PA', 'CA') AND "store_sales"."ss_net_profit" <= 25000 AND "store_sales"."ss_net_profit" >= 50 ) OR ( "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" AND "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('TX', 'NE', 'MO') AND "store_sales"."ss_net_profit" <= 2000 AND "store_sales"."ss_net_profit" >= 0 ) JOIN "customer_demographics" AS "customer_demographics" ON ( "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = '2 yr Degree' AND "customer_demographics"."cd_marital_status" = 'D' AND "store_sales"."ss_sales_price" <= 200.00 AND "store_sales"."ss_sales_price" >= 150.00 ) OR ( "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = 'Advanced Degree' AND "customer_demographics"."cd_marital_status" = 'M' AND "store_sales"."ss_sales_price" <= 100.00 AND "store_sales"."ss_sales_price" >= 50.00 ) OR ( "customer_demographics"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" AND "customer_demographics"."cd_education_status" = 'Secondary' AND "customer_demographics"."cd_marital_status" = 'W' AND "store_sales"."ss_sales_price" <= 150.00 AND "store_sales"."ss_sales_price" >= 100.00 ) JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" = 1999 JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk"; -------------------------------------- -- TPC-DS 49 -------------------------------------- SELECT 'web' AS channel, web.item, web.return_ratio, web.return_rank, web.currency_rank FROM (SELECT item, return_ratio, currency_ratio, Rank() OVER ( ORDER BY return_ratio) AS return_rank, Rank() OVER ( ORDER BY currency_ratio) AS currency_rank FROM (SELECT ws.ws_item_sk AS item, ( Cast(Sum(COALESCE(wr.wr_return_quantity, 0)) AS DEC(15, 4)) / Cast( Sum(COALESCE(ws.ws_quantity, 0)) AS DEC(15, 4)) ) AS return_ratio, ( Cast(Sum(COALESCE(wr.wr_return_amt, 0)) AS DEC(15, 4)) / Cast( Sum( COALESCE(ws.ws_net_paid, 0)) AS DEC(15, 4)) ) AS currency_ratio FROM web_sales ws LEFT OUTER JOIN web_returns wr ON ( ws.ws_order_number = wr.wr_order_number AND ws.ws_item_sk = wr.wr_item_sk ), date_dim WHERE wr.wr_return_amt > 10000 AND ws.ws_net_profit > 1 AND ws.ws_net_paid > 0 AND ws.ws_quantity > 0 AND ws_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 12 GROUP BY ws.ws_item_sk) in_web) web WHERE ( web.return_rank <= 10 OR web.currency_rank <= 10 ) UNION SELECT 'catalog' AS channel, catalog.item, catalog.return_ratio, catalog.return_rank, catalog.currency_rank FROM (SELECT item, return_ratio, currency_ratio, Rank() OVER ( ORDER BY return_ratio) AS return_rank, Rank() OVER ( ORDER BY currency_ratio) AS currency_rank FROM (SELECT cs.cs_item_sk AS item, ( Cast(Sum(COALESCE(cr.cr_return_quantity, 0)) AS DEC(15, 4)) / Cast( Sum(COALESCE(cs.cs_quantity, 0)) AS DEC(15, 4)) ) AS return_ratio, ( Cast(Sum(COALESCE(cr.cr_return_amount, 0)) AS DEC(15, 4 )) / Cast(Sum( COALESCE(cs.cs_net_paid, 0)) AS DEC( 15, 4)) ) AS currency_ratio FROM catalog_sales cs LEFT OUTER JOIN catalog_returns cr ON ( cs.cs_order_number = cr.cr_order_number AND cs.cs_item_sk = cr.cr_item_sk ), date_dim WHERE cr.cr_return_amount > 10000 AND cs.cs_net_profit > 1 AND cs.cs_net_paid > 0 AND cs.cs_quantity > 0 AND cs_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 12 GROUP BY cs.cs_item_sk) in_cat) catalog WHERE ( catalog.return_rank <= 10 OR catalog.currency_rank <= 10 ) UNION SELECT 'store' AS channel, store.item, store.return_ratio, store.return_rank, store.currency_rank FROM (SELECT item, return_ratio, currency_ratio, Rank() OVER ( ORDER BY return_ratio) AS return_rank, Rank() OVER ( ORDER BY currency_ratio) AS currency_rank FROM (SELECT sts.ss_item_sk AS item, ( Cast(Sum(COALESCE(sr.sr_return_quantity, 0)) AS DEC(15, 4)) / Cast( Sum(COALESCE(sts.ss_quantity, 0)) AS DEC(15, 4)) ) AS return_ratio, ( Cast(Sum(COALESCE(sr.sr_return_amt, 0)) AS DEC(15, 4)) / Cast( Sum( COALESCE(sts.ss_net_paid, 0)) AS DEC(15, 4)) ) AS currency_ratio FROM store_sales sts LEFT OUTER JOIN store_returns sr ON ( sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk ), date_dim WHERE sr.sr_return_amt > 10000 AND sts.ss_net_profit > 1 AND sts.ss_net_paid > 0 AND sts.ss_quantity > 0 AND ss_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 12 GROUP BY sts.ss_item_sk) in_store) store WHERE ( store.return_rank <= 10 OR store.currency_rank <= 10 ) ORDER BY 1, 4, 5 LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 12 AND "date_dim"."d_year" = 1999 ), "in_web" AS ( SELECT "ws"."ws_item_sk" AS "item", CAST(SUM(COALESCE("wr"."wr_return_quantity", 0)) AS DECIMAL(15, 4)) / CAST(SUM(COALESCE("ws"."ws_quantity", 0)) AS DECIMAL(15, 4)) AS "return_ratio", CAST(SUM(COALESCE("wr"."wr_return_amt", 0)) AS DECIMAL(15, 4)) / CAST(SUM(COALESCE("ws"."ws_net_paid", 0)) AS DECIMAL(15, 4)) AS "currency_ratio" FROM "web_sales" AS "ws" LEFT JOIN "web_returns" AS "wr" ON "wr"."wr_item_sk" = "ws"."ws_item_sk" AND "wr"."wr_order_number" = "ws"."ws_order_number" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "ws"."ws_sold_date_sk" WHERE "wr"."wr_return_amt" > 10000 AND "ws"."ws_net_paid" > 0 AND "ws"."ws_net_profit" > 1 AND "ws"."ws_quantity" > 0 GROUP BY "ws"."ws_item_sk" ), "web" AS ( SELECT "in_web"."item" AS "item", "in_web"."return_ratio" AS "return_ratio", RANK() OVER (ORDER BY "in_web"."return_ratio") AS "return_rank", RANK() OVER (ORDER BY "in_web"."currency_ratio") AS "currency_rank" FROM "in_web" AS "in_web" ), "in_cat" AS ( SELECT "cs"."cs_item_sk" AS "item", CAST(SUM(COALESCE("cr"."cr_return_quantity", 0)) AS DECIMAL(15, 4)) / CAST(SUM(COALESCE("cs"."cs_quantity", 0)) AS DECIMAL(15, 4)) AS "return_ratio", CAST(SUM(COALESCE("cr"."cr_return_amount", 0)) AS DECIMAL(15, 4)) / CAST(SUM(COALESCE("cs"."cs_net_paid", 0)) AS DECIMAL(15, 4)) AS "currency_ratio" FROM "catalog_sales" AS "cs" LEFT JOIN "catalog_returns" AS "cr" ON "cr"."cr_item_sk" = "cs"."cs_item_sk" AND "cr"."cr_order_number" = "cs"."cs_order_number" JOIN "date_dim_2" AS "date_dim" ON "cs"."cs_sold_date_sk" = "date_dim"."d_date_sk" WHERE "cr"."cr_return_amount" > 10000 AND "cs"."cs_net_paid" > 0 AND "cs"."cs_net_profit" > 1 AND "cs"."cs_quantity" > 0 GROUP BY "cs"."cs_item_sk" ), "catalog" AS ( SELECT "in_cat"."item" AS "item", "in_cat"."return_ratio" AS "return_ratio", RANK() OVER (ORDER BY "in_cat"."return_ratio") AS "return_rank", RANK() OVER (ORDER BY "in_cat"."currency_ratio") AS "currency_rank" FROM "in_cat" AS "in_cat" ), "in_store" AS ( SELECT "sts"."ss_item_sk" AS "item", CAST(SUM(COALESCE("sr"."sr_return_quantity", 0)) AS DECIMAL(15, 4)) / CAST(SUM(COALESCE("sts"."ss_quantity", 0)) AS DECIMAL(15, 4)) AS "return_ratio", CAST(SUM(COALESCE("sr"."sr_return_amt", 0)) AS DECIMAL(15, 4)) / CAST(SUM(COALESCE("sts"."ss_net_paid", 0)) AS DECIMAL(15, 4)) AS "currency_ratio" FROM "store_sales" AS "sts" LEFT JOIN "store_returns" AS "sr" ON "sr"."sr_item_sk" = "sts"."ss_item_sk" AND "sr"."sr_ticket_number" = "sts"."ss_ticket_number" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "sts"."ss_sold_date_sk" WHERE "sr"."sr_return_amt" > 10000 AND "sts"."ss_net_paid" > 0 AND "sts"."ss_net_profit" > 1 AND "sts"."ss_quantity" > 0 GROUP BY "sts"."ss_item_sk" ), "store" AS ( SELECT "in_store"."item" AS "item", "in_store"."return_ratio" AS "return_ratio", RANK() OVER (ORDER BY "in_store"."return_ratio") AS "return_rank", RANK() OVER (ORDER BY "in_store"."currency_ratio") AS "currency_rank" FROM "in_store" AS "in_store" ) SELECT 'web' AS "channel", "web"."item" AS "item", "web"."return_ratio" AS "return_ratio", "web"."return_rank" AS "return_rank", "web"."currency_rank" AS "currency_rank" FROM "web" AS "web" WHERE "web"."currency_rank" <= 10 OR "web"."return_rank" <= 10 UNION SELECT 'catalog' AS "channel", "catalog"."item" AS "item", "catalog"."return_ratio" AS "return_ratio", "catalog"."return_rank" AS "return_rank", "catalog"."currency_rank" AS "currency_rank" FROM "catalog" AS "catalog" WHERE "catalog"."currency_rank" <= 10 OR "catalog"."return_rank" <= 10 UNION SELECT 'store' AS "channel", "store"."item" AS "item", "store"."return_ratio" AS "return_ratio", "store"."return_rank" AS "return_rank", "store"."currency_rank" AS "currency_rank" FROM "store" AS "store" WHERE "store"."currency_rank" <= 10 OR "store"."return_rank" <= 10 ORDER BY "channel", "return_rank", "currency_rank" LIMIT 100; -------------------------------------- -- TPC-DS 50 -------------------------------------- # execute: true SELECT s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip, Sum(CASE WHEN ( sr_returned_date_sk - ss_sold_date_sk <= 30 ) THEN 1 ELSE 0 END) AS "30 days", Sum(CASE WHEN ( sr_returned_date_sk - ss_sold_date_sk > 30 ) AND ( sr_returned_date_sk - ss_sold_date_sk <= 60 ) THEN 1 ELSE 0 END) AS "31-60 days", Sum(CASE WHEN ( sr_returned_date_sk - ss_sold_date_sk > 60 ) AND ( sr_returned_date_sk - ss_sold_date_sk <= 90 ) THEN 1 ELSE 0 END) AS "61-90 days", Sum(CASE WHEN ( sr_returned_date_sk - ss_sold_date_sk > 90 ) AND ( sr_returned_date_sk - ss_sold_date_sk <= 120 ) THEN 1 ELSE 0 END) AS "91-120 days", Sum(CASE WHEN ( sr_returned_date_sk - ss_sold_date_sk > 120 ) THEN 1 ELSE 0 END) AS ">120 days" FROM store_sales, store_returns, store, date_dim d1, date_dim d2 WHERE d2.d_year = 2002 AND d2.d_moy = 9 AND ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk AND ss_sold_date_sk = d1.d_date_sk AND sr_returned_date_sk = d2.d_date_sk AND ss_customer_sk = sr_customer_sk AND ss_store_sk = s_store_sk GROUP BY s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip ORDER BY s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip LIMIT 100; SELECT "store"."s_store_name" AS "s_store_name", "store"."s_company_id" AS "s_company_id", "store"."s_street_number" AS "s_street_number", "store"."s_street_name" AS "s_street_name", "store"."s_street_type" AS "s_street_type", "store"."s_suite_number" AS "s_suite_number", "store"."s_city" AS "s_city", "store"."s_county" AS "s_county", "store"."s_state" AS "s_state", "store"."s_zip" AS "s_zip", SUM( CASE WHEN "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" <= 30 THEN 1 ELSE 0 END ) AS "30 days", SUM( CASE WHEN "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" <= 60 AND "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" > 30 THEN 1 ELSE 0 END ) AS "31-60 days", SUM( CASE WHEN "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" <= 90 AND "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" > 60 THEN 1 ELSE 0 END ) AS "61-90 days", SUM( CASE WHEN "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" <= 120 AND "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" > 90 THEN 1 ELSE 0 END ) AS "91-120 days", SUM( CASE WHEN "store_returns"."sr_returned_date_sk" - "store_sales"."ss_sold_date_sk" > 120 THEN 1 ELSE 0 END ) AS ">120 days" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_customer_sk" = "store_sales"."ss_customer_sk" AND "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "date_dim" AS "d2" ON "d2"."d_date_sk" = "store_returns"."sr_returned_date_sk" AND "d2"."d_moy" = 9 AND "d2"."d_year" = 2002 GROUP BY "store"."s_store_name", "store"."s_company_id", "store"."s_street_number", "store"."s_street_name", "store"."s_street_type", "store"."s_suite_number", "store"."s_city", "store"."s_county", "store"."s_state", "store"."s_zip" ORDER BY "s_store_name", "s_company_id", "s_street_number", "s_street_name", "s_street_type", "s_suite_number", "s_city", "s_county", "s_state", "s_zip" LIMIT 100; -------------------------------------- -- TPC-DS 51 -------------------------------------- WITH web_v1 AS ( SELECT ws_item_sk item_sk, d_date, sum(Sum(ws_sales_price)) OVER (partition BY ws_item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) cume_sales FROM web_sales , date_dim WHERE ws_sold_date_sk=d_date_sk AND d_month_seq BETWEEN 1192 AND 1192+11 AND ws_item_sk IS NOT NULL GROUP BY ws_item_sk, d_date), store_v1 AS ( SELECT ss_item_sk item_sk, d_date, sum(sum(ss_sales_price)) OVER (partition BY ss_item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) cume_sales FROM store_sales , date_dim WHERE ss_sold_date_sk=d_date_sk AND d_month_seq BETWEEN 1192 AND 1192+11 AND ss_item_sk IS NOT NULL GROUP BY ss_item_sk, d_date) SELECT * FROM ( SELECT item_sk , d_date , web_sales , store_sales , max(web_sales) OVER (partition BY item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) web_cumulative , max(store_sales) OVER (partition BY item_sk ORDER BY d_date rows BETWEEN UNBOUNDED PRECEDING AND CURRENT row) store_cumulative FROM ( SELECT CASE WHEN web.item_sk IS NOT NULL THEN web.item_sk ELSE store.item_sk END item_sk , CASE WHEN web.d_date IS NOT NULL THEN web.d_date ELSE store.d_date END d_date , web.cume_sales web_sales , store.cume_sales store_sales FROM web_v1 web FULL OUTER JOIN store_v1 store ON ( web.item_sk = store.item_sk AND web.d_date = store.d_date) )x )y WHERE web_cumulative > store_cumulative ORDER BY item_sk , d_date LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date", "date_dim"."d_month_seq" AS "d_month_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_month_seq" <= 1203 AND "date_dim"."d_month_seq" >= 1192 ), "web_v1" AS ( SELECT "web_sales"."ws_item_sk" AS "item_sk", "date_dim"."d_date" AS "d_date", SUM(SUM("web_sales"."ws_sales_price")) OVER ( PARTITION BY "web_sales"."ws_item_sk" ORDER BY "date_dim"."d_date" rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS "cume_sales" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" WHERE NOT "web_sales"."ws_item_sk" IS NULL GROUP BY "web_sales"."ws_item_sk", "date_dim"."d_date" ), "store_v1" AS ( SELECT "store_sales"."ss_item_sk" AS "item_sk", "date_dim"."d_date" AS "d_date", SUM(SUM("store_sales"."ss_sales_price")) OVER ( PARTITION BY "store_sales"."ss_item_sk" ORDER BY "date_dim"."d_date" rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS "cume_sales" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" WHERE NOT "store_sales"."ss_item_sk" IS NULL GROUP BY "store_sales"."ss_item_sk", "date_dim"."d_date" ), "y" AS ( SELECT CASE WHEN NOT "web"."item_sk" IS NULL THEN "web"."item_sk" ELSE "store"."item_sk" END AS "item_sk", CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END AS "d_date", "web"."cume_sales" AS "web_sales", "store"."cume_sales" AS "store_sales", MAX("web"."cume_sales") OVER ( PARTITION BY CASE WHEN NOT "web"."item_sk" IS NULL THEN "web"."item_sk" ELSE "store"."item_sk" END ORDER BY CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS "web_cumulative", MAX("store"."cume_sales") OVER ( PARTITION BY CASE WHEN NOT "web"."item_sk" IS NULL THEN "web"."item_sk" ELSE "store"."item_sk" END ORDER BY CASE WHEN NOT "web"."d_date" IS NULL THEN "web"."d_date" ELSE "store"."d_date" END rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ) AS "store_cumulative" FROM "web_v1" AS "web" FULL JOIN "store_v1" AS "store" ON "store"."d_date" = "web"."d_date" AND "store"."item_sk" = "web"."item_sk" ) SELECT "y"."item_sk" AS "item_sk", "y"."d_date" AS "d_date", "y"."web_sales" AS "web_sales", "y"."store_sales" AS "store_sales", "y"."web_cumulative" AS "web_cumulative", "y"."store_cumulative" AS "store_cumulative" FROM "y" AS "y" WHERE "y"."store_cumulative" < "y"."web_cumulative" ORDER BY "y"."item_sk", "y"."d_date" LIMIT 100; -------------------------------------- -- TPC-DS 52 -------------------------------------- # execute: true SELECT dt.d_year, item.i_brand_id brand_id, item.i_brand brand, Sum(ss_ext_sales_price) ext_price FROM date_dim dt, store_sales, item WHERE dt.d_date_sk = store_sales.ss_sold_date_sk AND store_sales.ss_item_sk = item.i_item_sk AND item.i_manager_id = 1 AND dt.d_moy = 11 AND dt.d_year = 1999 GROUP BY dt.d_year, item.i_brand, item.i_brand_id ORDER BY dt.d_year, ext_price DESC, brand_id LIMIT 100; SELECT "dt"."d_year" AS "d_year", "item"."i_brand_id" AS "brand_id", "item"."i_brand" AS "brand", SUM("store_sales"."ss_ext_sales_price") AS "ext_price" FROM "date_dim" AS "dt" JOIN "store_sales" AS "store_sales" ON "dt"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" AND "item"."i_manager_id" = 1 WHERE "dt"."d_moy" = 11 AND "dt"."d_year" = 1999 GROUP BY "dt"."d_year", "item"."i_brand", "item"."i_brand_id" ORDER BY "d_year", "ext_price" DESC, "brand_id" LIMIT 100; -------------------------------------- -- TPC-DS 53 -------------------------------------- SELECT * FROM (SELECT i_manufact_id, Sum(ss_sales_price) sum_sales, Avg(Sum(ss_sales_price)) OVER ( partition BY i_manufact_id) avg_quarterly_sales FROM item, store_sales, date_dim, store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND d_month_seq IN ( 1199, 1199 + 1, 1199 + 2, 1199 + 3, 1199 + 4, 1199 + 5, 1199 + 6, 1199 + 7, 1199 + 8, 1199 + 9, 1199 + 10, 1199 + 11 ) AND ( ( i_category IN ( 'Books', 'Children', 'Electronics' ) AND i_class IN ( 'personal', 'portable', 'reference', 'self-help' ) AND i_brand IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7' , 'exportiunivamalg #9', 'scholaramalgamalg #9' ) ) OR ( i_category IN ( 'Women', 'Music', 'Men' ) AND i_class IN ( 'accessories', 'classical', 'fragrances', 'pants' ) AND i_brand IN ( 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1' ) ) ) GROUP BY i_manufact_id, d_qoy) tmp1 WHERE CASE WHEN avg_quarterly_sales > 0 THEN Abs (sum_sales - avg_quarterly_sales) / avg_quarterly_sales ELSE NULL END > 0.1 ORDER BY avg_quarterly_sales, sum_sales, i_manufact_id LIMIT 100; WITH "tmp1" AS ( SELECT "item"."i_manufact_id" AS "i_manufact_id", SUM("store_sales"."ss_sales_price") AS "sum_sales", AVG(SUM("store_sales"."ss_sales_price")) OVER (PARTITION BY "item"."i_manufact_id") AS "avg_quarterly_sales" FROM "item" AS "item" JOIN "store_sales" AS "store_sales" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_month_seq" IN (1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210) JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" WHERE ( "item"."i_brand" IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') OR "item"."i_brand" IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9' ) ) AND ( "item"."i_brand" IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') OR "item"."i_category" IN ('Books', 'Children', 'Electronics') ) AND ( "item"."i_brand" IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') OR "item"."i_class" IN ('personal', 'portable', 'reference', 'self-help') ) AND ( "item"."i_brand" IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9' ) OR "item"."i_category" IN ('Women', 'Music', 'Men') ) AND ( "item"."i_brand" IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9' ) OR "item"."i_class" IN ('accessories', 'classical', 'fragrances', 'pants') ) AND ( "item"."i_category" IN ('Books', 'Children', 'Electronics') OR "item"."i_category" IN ('Women', 'Music', 'Men') ) AND ( "item"."i_category" IN ('Books', 'Children', 'Electronics') OR "item"."i_class" IN ('accessories', 'classical', 'fragrances', 'pants') ) AND ( "item"."i_category" IN ('Women', 'Music', 'Men') OR "item"."i_class" IN ('personal', 'portable', 'reference', 'self-help') ) AND ( "item"."i_class" IN ('accessories', 'classical', 'fragrances', 'pants') OR "item"."i_class" IN ('personal', 'portable', 'reference', 'self-help') ) GROUP BY "item"."i_manufact_id", "date_dim"."d_qoy" ) SELECT "tmp1"."i_manufact_id" AS "i_manufact_id", "tmp1"."sum_sales" AS "sum_sales", "tmp1"."avg_quarterly_sales" AS "avg_quarterly_sales" FROM "tmp1" AS "tmp1" WHERE CASE WHEN "tmp1"."avg_quarterly_sales" > 0 THEN ABS("tmp1"."sum_sales" - "tmp1"."avg_quarterly_sales") / "tmp1"."avg_quarterly_sales" ELSE NULL END > 0.1 ORDER BY "tmp1"."avg_quarterly_sales", "tmp1"."sum_sales", "tmp1"."i_manufact_id" LIMIT 100; -------------------------------------- -- TPC-DS 54 -------------------------------------- # execute: true WITH my_customers AS (SELECT DISTINCT c_customer_sk, c_current_addr_sk FROM (SELECT cs_sold_date_sk sold_date_sk, cs_bill_customer_sk customer_sk, cs_item_sk item_sk FROM catalog_sales UNION ALL SELECT ws_sold_date_sk sold_date_sk, ws_bill_customer_sk customer_sk, ws_item_sk item_sk FROM web_sales) cs_or_ws_sales, item, date_dim, customer WHERE sold_date_sk = d_date_sk AND item_sk = i_item_sk AND i_category = 'Sports' AND i_class = 'fitness' AND c_customer_sk = cs_or_ws_sales.customer_sk AND d_moy = 5 AND d_year = 2000), my_revenue AS (SELECT c_customer_sk, Sum(ss_ext_sales_price) AS revenue FROM my_customers, store_sales, customer_address, store, date_dim WHERE c_current_addr_sk = ca_address_sk AND ca_county = s_county AND ca_state = s_state AND ss_sold_date_sk = d_date_sk AND c_customer_sk = ss_customer_sk AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 FROM date_dim WHERE d_year = 2000 AND d_moy = 5) AND (SELECT DISTINCT d_month_seq + 3 FROM date_dim WHERE d_year = 2000 AND d_moy = 5) GROUP BY c_customer_sk), segments AS (SELECT Cast(( revenue / 50 ) AS INT) AS segment FROM my_revenue) SELECT segment, Count(*) AS num_customers, segment * 50 AS segment_base FROM segments GROUP BY segment ORDER BY segment, num_customers LIMIT 100; WITH "cs_or_ws_sales" AS ( SELECT "catalog_sales"."cs_sold_date_sk" AS "sold_date_sk", "catalog_sales"."cs_bill_customer_sk" AS "customer_sk", "catalog_sales"."cs_item_sk" AS "item_sk" FROM "catalog_sales" AS "catalog_sales" UNION ALL SELECT "web_sales"."ws_sold_date_sk" AS "sold_date_sk", "web_sales"."ws_bill_customer_sk" AS "customer_sk", "web_sales"."ws_item_sk" AS "item_sk" FROM "web_sales" AS "web_sales" ), "my_customers" AS ( SELECT DISTINCT "customer"."c_customer_sk" AS "c_customer_sk", "customer"."c_current_addr_sk" AS "c_current_addr_sk" FROM "cs_or_ws_sales" AS "cs_or_ws_sales" JOIN "customer" AS "customer" ON "cs_or_ws_sales"."customer_sk" = "customer"."c_customer_sk" JOIN "date_dim" AS "date_dim" ON "cs_or_ws_sales"."sold_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_moy" = 5 AND "date_dim"."d_year" = 2000 JOIN "item" AS "item" ON "cs_or_ws_sales"."item_sk" = "item"."i_item_sk" AND "item"."i_category" = 'Sports' AND "item"."i_class" = 'fitness' ), "_u_0" AS ( SELECT DISTINCT "date_dim"."d_month_seq" + 1 AS "_col_0" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 5 AND "date_dim"."d_year" = 2000 ), "_u_1" AS ( SELECT DISTINCT "date_dim"."d_month_seq" + 3 AS "_col_0" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 5 AND "date_dim"."d_year" = 2000 ), "my_revenue" AS ( SELECT SUM("store_sales"."ss_ext_sales_price") AS "revenue" FROM "my_customers" AS "my_customers" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "my_customers"."c_current_addr_sk" JOIN "store_sales" AS "store_sales" ON "my_customers"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "store" AS "store" ON "customer_address"."ca_county" = "store"."s_county" AND "customer_address"."ca_state" = "store"."s_state" JOIN "_u_0" AS "_u_0" ON "_u_0"."_col_0" <= "date_dim"."d_month_seq" JOIN "_u_1" AS "_u_1" ON "_u_1"."_col_0" >= "date_dim"."d_month_seq" GROUP BY "my_customers"."c_customer_sk" ) SELECT CAST(( "my_revenue"."revenue" / 50 ) AS INT) AS "segment", COUNT(*) AS "num_customers", CAST(( "my_revenue"."revenue" / 50 ) AS INT) * 50 AS "segment_base" FROM "my_revenue" AS "my_revenue" GROUP BY CAST(( "my_revenue"."revenue" / 50 ) AS INT) ORDER BY "segment", "num_customers" LIMIT 100; -------------------------------------- -- TPC-DS 55 -------------------------------------- # execute: true SELECT i_brand_id brand_id, i_brand brand, Sum(ss_ext_sales_price) ext_price FROM date_dim, store_sales, item WHERE d_date_sk = ss_sold_date_sk AND ss_item_sk = i_item_sk AND i_manager_id = 33 AND d_moy = 12 AND d_year = 1998 GROUP BY i_brand, i_brand_id ORDER BY ext_price DESC, i_brand_id LIMIT 100; SELECT "item"."i_brand_id" AS "brand_id", "item"."i_brand" AS "brand", SUM("store_sales"."ss_ext_sales_price") AS "ext_price" FROM "date_dim" AS "date_dim" JOIN "store_sales" AS "store_sales" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" AND "item"."i_manager_id" = 33 WHERE "date_dim"."d_moy" = 12 AND "date_dim"."d_year" = 1998 GROUP BY "item"."i_brand", "item"."i_brand_id" ORDER BY "ext_price" DESC, "brand_id" LIMIT 100; -------------------------------------- -- TPC-DS 56 -------------------------------------- # execute: true WITH ss AS (SELECT i_item_id, Sum(ss_ext_sales_price) total_sales FROM store_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_color IN ( 'firebrick', 'rosy', 'white' ) ) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 3 AND ss_addr_sk = ca_address_sk AND ca_gmt_offset = -6 GROUP BY i_item_id), cs AS (SELECT i_item_id, Sum(cs_ext_sales_price) total_sales FROM catalog_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_color IN ( 'firebrick', 'rosy', 'white' ) ) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 3 AND cs_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -6 GROUP BY i_item_id), ws AS (SELECT i_item_id, Sum(ws_ext_sales_price) total_sales FROM web_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_color IN ( 'firebrick', 'rosy', 'white' ) ) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 1998 AND d_moy = 3 AND ws_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -6 GROUP BY i_item_id) SELECT i_item_id, Sum(total_sales) total_sales FROM (SELECT * FROM ss UNION ALL SELECT * FROM cs UNION ALL SELECT * FROM ws) tmp1 GROUP BY i_item_id ORDER BY total_sales LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 3 AND "date_dim"."d_year" = 1998 ), "customer_address_2" AS ( SELECT "customer_address"."ca_address_sk" AS "ca_address_sk", "customer_address"."ca_gmt_offset" AS "ca_gmt_offset" FROM "customer_address" AS "customer_address" WHERE "customer_address"."ca_gmt_offset" = -6 ), "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_item_id" AS "i_item_id" FROM "item" AS "item" ), "_u_0" AS ( SELECT "item"."i_item_id" AS "i_item_id" FROM "item" AS "item" WHERE "item"."i_color" IN ('firebrick', 'rosy', 'white') GROUP BY "item"."i_item_id" ), "ss" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("store_sales"."ss_ext_sales_price") AS "total_sales" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."i_item_id" = "item"."i_item_id" WHERE NOT "_u_0"."i_item_id" IS NULL GROUP BY "item"."i_item_id" ), "cs" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("catalog_sales"."cs_ext_sales_price") AS "total_sales" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "catalog_sales"."cs_bill_addr_sk" = "customer_address"."ca_address_sk" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" LEFT JOIN "_u_0" AS "_u_1" ON "_u_1"."i_item_id" = "item"."i_item_id" WHERE NOT "_u_1"."i_item_id" IS NULL GROUP BY "item"."i_item_id" ), "ws" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("web_sales"."ws_ext_sales_price") AS "total_sales" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "web_sales"."ws_bill_addr_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" LEFT JOIN "_u_0" AS "_u_2" ON "_u_2"."i_item_id" = "item"."i_item_id" WHERE NOT "_u_2"."i_item_id" IS NULL GROUP BY "item"."i_item_id" ), "tmp1" AS ( SELECT "ss"."i_item_id" AS "i_item_id", "ss"."total_sales" AS "total_sales" FROM "ss" AS "ss" UNION ALL SELECT "cs"."i_item_id" AS "i_item_id", "cs"."total_sales" AS "total_sales" FROM "cs" AS "cs" UNION ALL SELECT "ws"."i_item_id" AS "i_item_id", "ws"."total_sales" AS "total_sales" FROM "ws" AS "ws" ) SELECT "tmp1"."i_item_id" AS "i_item_id", SUM("tmp1"."total_sales") AS "total_sales" FROM "tmp1" AS "tmp1" GROUP BY "tmp1"."i_item_id" ORDER BY "total_sales" LIMIT 100; -------------------------------------- -- TPC-DS 57 -------------------------------------- WITH v1 AS (SELECT i_category, i_brand, cc_name, d_year, d_moy, Sum(cs_sales_price) sum_sales , Avg(Sum(cs_sales_price)) OVER ( partition BY i_category, i_brand, cc_name, d_year) avg_monthly_sales , Rank() OVER ( partition BY i_category, i_brand, cc_name ORDER BY d_year, d_moy) rn FROM item, catalog_sales, date_dim, call_center WHERE cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND cc_call_center_sk = cs_call_center_sk AND ( d_year = 2000 OR ( d_year = 2000 - 1 AND d_moy = 12 ) OR ( d_year = 2000 + 1 AND d_moy = 1 ) ) GROUP BY i_category, i_brand, cc_name, d_year, d_moy), v2 AS (SELECT v1.i_brand, v1.d_year, v1.avg_monthly_sales, v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum FROM v1, v1 v1_lag, v1 v1_lead WHERE v1.i_category = v1_lag.i_category AND v1.i_category = v1_lead.i_category AND v1.i_brand = v1_lag.i_brand AND v1.i_brand = v1_lead.i_brand AND v1. cc_name = v1_lag. cc_name AND v1. cc_name = v1_lead. cc_name AND v1.rn = v1_lag.rn + 1 AND v1.rn = v1_lead.rn - 1) SELECT * FROM v2 WHERE d_year = 2000 AND avg_monthly_sales > 0 AND CASE WHEN avg_monthly_sales > 0 THEN Abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1 ORDER BY sum_sales - avg_monthly_sales, 3 LIMIT 100; WITH "v1" AS ( SELECT "item"."i_category" AS "i_category", "item"."i_brand" AS "i_brand", "call_center"."cc_name" AS "cc_name", "date_dim"."d_year" AS "d_year", SUM("catalog_sales"."cs_sales_price") AS "sum_sales", AVG(SUM("catalog_sales"."cs_sales_price")) OVER ( PARTITION BY "item"."i_category", "item"."i_brand", "call_center"."cc_name", "date_dim"."d_year" ) AS "avg_monthly_sales", RANK() OVER ( PARTITION BY "item"."i_category", "item"."i_brand", "call_center"."cc_name" ORDER BY "date_dim"."d_year", "date_dim"."d_moy" ) AS "rn" FROM "item" AS "item" JOIN "catalog_sales" AS "catalog_sales" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "call_center" AS "call_center" ON "call_center"."cc_call_center_sk" = "catalog_sales"."cs_call_center_sk" JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" AND ( "date_dim"."d_moy" = 1 OR "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 2000 ) AND ( "date_dim"."d_moy" = 1 OR "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 ) AND ( "date_dim"."d_moy" = 12 OR "date_dim"."d_year" = 2000 OR "date_dim"."d_year" = 2001 ) AND ( "date_dim"."d_year" = 1999 OR "date_dim"."d_year" = 2000 OR "date_dim"."d_year" = 2001 ) GROUP BY "item"."i_category", "item"."i_brand", "call_center"."cc_name", "date_dim"."d_year", "date_dim"."d_moy" ) SELECT "v1"."i_brand" AS "i_brand", "v1"."d_year" AS "d_year", "v1"."avg_monthly_sales" AS "avg_monthly_sales", "v1"."sum_sales" AS "sum_sales", "v1_lag"."sum_sales" AS "psum", "v1_lead"."sum_sales" AS "nsum" FROM "v1" AS "v1" JOIN "v1" AS "v1_lag" ON "v1"."cc_name" = "v1_lag"."cc_name" AND "v1"."i_brand" = "v1_lag"."i_brand" AND "v1"."i_category" = "v1_lag"."i_category" AND "v1"."rn" = "v1_lag"."rn" + 1 JOIN "v1" AS "v1_lead" ON "v1"."cc_name" = "v1_lead"."cc_name" AND "v1"."i_brand" = "v1_lead"."i_brand" AND "v1"."i_category" = "v1_lead"."i_category" AND "v1"."rn" = "v1_lead"."rn" - 1 WHERE "v1"."avg_monthly_sales" > 0 AND "v1"."d_year" = 2000 AND CASE WHEN "v1"."avg_monthly_sales" > 0 THEN ABS("v1"."sum_sales" - "v1"."avg_monthly_sales") / "v1"."avg_monthly_sales" ELSE NULL END > 0.1 ORDER BY "v1"."sum_sales" - "v1"."avg_monthly_sales", "avg_monthly_sales" LIMIT 100; -------------------------------------- -- TPC-DS 58 -------------------------------------- WITH ss_items AS (SELECT i_item_id item_id, Sum(ss_ext_sales_price) ss_item_rev FROM store_sales, item, date_dim WHERE ss_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = '2002-02-25' )) AND ss_sold_date_sk = d_date_sk GROUP BY i_item_id), cs_items AS (SELECT i_item_id item_id, Sum(cs_ext_sales_price) cs_item_rev FROM catalog_sales, item, date_dim WHERE cs_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = '2002-02-25' )) AND cs_sold_date_sk = d_date_sk GROUP BY i_item_id), ws_items AS (SELECT i_item_id item_id, Sum(ws_ext_sales_price) ws_item_rev FROM web_sales, item, date_dim WHERE ws_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq = (SELECT d_week_seq FROM date_dim WHERE d_date = '2002-02-25' )) AND ws_sold_date_sk = d_date_sk GROUP BY i_item_id) SELECT ss_items.item_id, ss_item_rev, ss_item_rev / ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 * 100 ss_dev, cs_item_rev, cs_item_rev / ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 * 100 cs_dev, ws_item_rev, ws_item_rev / ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 * 100 ws_dev, ( ss_item_rev + cs_item_rev + ws_item_rev ) / 3 average FROM ss_items, cs_items, ws_items WHERE ss_items.item_id = cs_items.item_id AND ss_items.item_id = ws_items.item_id AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev ORDER BY item_id, ss_item_rev LIMIT 100; WITH "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_item_id" AS "i_item_id" FROM "item" AS "item" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" ), "_u_0" AS ( SELECT "date_dim"."d_week_seq" AS "d_week_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_date" = '2002-02-25' ), "_u_1" AS ( SELECT "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" JOIN "_u_0" AS "_u_0" ON "_u_0"."d_week_seq" = "date_dim"."d_week_seq" GROUP BY "date_dim"."d_date" ), "ss_items" AS ( SELECT "item"."i_item_id" AS "item_id", SUM("store_sales"."ss_ext_sales_price") AS "ss_item_rev" FROM "store_sales" AS "store_sales" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."d_date" = "date_dim"."d_date" WHERE NOT "_u_1"."d_date" IS NULL GROUP BY "item"."i_item_id" ), "_u_3" AS ( SELECT "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" JOIN "_u_0" AS "_u_2" ON "_u_2"."d_week_seq" = "date_dim"."d_week_seq" GROUP BY "date_dim"."d_date" ), "cs_items" AS ( SELECT "item"."i_item_id" AS "item_id", SUM("catalog_sales"."cs_ext_sales_price") AS "cs_item_rev" FROM "catalog_sales" AS "catalog_sales" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_3" AS "_u_3" ON "_u_3"."d_date" = "date_dim"."d_date" WHERE NOT "_u_3"."d_date" IS NULL GROUP BY "item"."i_item_id" ), "_u_5" AS ( SELECT "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" JOIN "_u_0" AS "_u_4" ON "_u_4"."d_week_seq" = "date_dim"."d_week_seq" GROUP BY "date_dim"."d_date" ), "ws_items" AS ( SELECT "item"."i_item_id" AS "item_id", SUM("web_sales"."ws_ext_sales_price") AS "ws_item_rev" FROM "web_sales" AS "web_sales" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" LEFT JOIN "_u_5" AS "_u_5" ON "_u_5"."d_date" = "date_dim"."d_date" WHERE NOT "_u_5"."d_date" IS NULL GROUP BY "item"."i_item_id" ) SELECT "ss_items"."item_id" AS "item_id", "ss_items"."ss_item_rev" AS "ss_item_rev", "ss_items"."ss_item_rev" / ( "ss_items"."ss_item_rev" + "cs_items"."cs_item_rev" + "ws_items"."ws_item_rev" ) / 3 * 100 AS "ss_dev", "cs_items"."cs_item_rev" AS "cs_item_rev", "cs_items"."cs_item_rev" / ( "ss_items"."ss_item_rev" + "cs_items"."cs_item_rev" + "ws_items"."ws_item_rev" ) / 3 * 100 AS "cs_dev", "ws_items"."ws_item_rev" AS "ws_item_rev", "ws_items"."ws_item_rev" / ( "ss_items"."ss_item_rev" + "cs_items"."cs_item_rev" + "ws_items"."ws_item_rev" ) / 3 * 100 AS "ws_dev", ( "ss_items"."ss_item_rev" + "cs_items"."cs_item_rev" + "ws_items"."ws_item_rev" ) / 3 AS "average" FROM "ss_items" AS "ss_items" JOIN "cs_items" AS "cs_items" ON "cs_items"."cs_item_rev" <= 1.1 * "ss_items"."ss_item_rev" AND "cs_items"."cs_item_rev" >= 0.9 * "ss_items"."ss_item_rev" AND "cs_items"."item_id" = "ss_items"."item_id" AND "ss_items"."ss_item_rev" <= 1.1 * "cs_items"."cs_item_rev" AND "ss_items"."ss_item_rev" >= 0.9 * "cs_items"."cs_item_rev" JOIN "ws_items" AS "ws_items" ON "cs_items"."cs_item_rev" <= 1.1 * "ws_items"."ws_item_rev" AND "cs_items"."cs_item_rev" >= 0.9 * "ws_items"."ws_item_rev" AND "ss_items"."item_id" = "ws_items"."item_id" AND "ss_items"."ss_item_rev" <= 1.1 * "ws_items"."ws_item_rev" AND "ss_items"."ss_item_rev" >= 0.9 * "ws_items"."ws_item_rev" AND "ws_items"."ws_item_rev" <= 1.1 * "cs_items"."cs_item_rev" AND "ws_items"."ws_item_rev" <= 1.1 * "ss_items"."ss_item_rev" AND "ws_items"."ws_item_rev" >= 0.9 * "cs_items"."cs_item_rev" AND "ws_items"."ws_item_rev" >= 0.9 * "ss_items"."ss_item_rev" ORDER BY "item_id", "ss_item_rev" LIMIT 100; -------------------------------------- -- TPC-DS 59 -------------------------------------- # execute: true WITH wss AS (SELECT d_week_seq, ss_store_sk, Sum(CASE WHEN ( d_day_name = 'Sunday' ) THEN ss_sales_price ELSE NULL END) sun_sales, Sum(CASE WHEN ( d_day_name = 'Monday' ) THEN ss_sales_price ELSE NULL END) mon_sales, Sum(CASE WHEN ( d_day_name = 'Tuesday' ) THEN ss_sales_price ELSE NULL END) tue_sales, Sum(CASE WHEN ( d_day_name = 'Wednesday' ) THEN ss_sales_price ELSE NULL END) wed_sales, Sum(CASE WHEN ( d_day_name = 'Thursday' ) THEN ss_sales_price ELSE NULL END) thu_sales, Sum(CASE WHEN ( d_day_name = 'Friday' ) THEN ss_sales_price ELSE NULL END) fri_sales, Sum(CASE WHEN ( d_day_name = 'Saturday' ) THEN ss_sales_price ELSE NULL END) sat_sales FROM store_sales, date_dim WHERE d_date_sk = ss_sold_date_sk GROUP BY d_week_seq, ss_store_sk) SELECT s_store_name1, s_store_id1, d_week_seq1, sun_sales1 / sun_sales2 AS "_col_3", mon_sales1 / mon_sales2 AS "_col_4", tue_sales1 / tue_sales2 AS "_col_5", wed_sales1 / wed_sales2 AS "_col_6", thu_sales1 / thu_sales2 AS "_col_7", fri_sales1 / fri_sales2 AS "_col_8", sat_sales1 / sat_sales2 AS "_col_9" FROM (SELECT s_store_name s_store_name1, wss.d_week_seq d_week_seq1, s_store_id s_store_id1, sun_sales sun_sales1, mon_sales mon_sales1, tue_sales tue_sales1, wed_sales wed_sales1, thu_sales thu_sales1, fri_sales fri_sales1, sat_sales sat_sales1 FROM wss, store, date_dim d WHERE d.d_week_seq = wss.d_week_seq AND ss_store_sk = s_store_sk AND d_month_seq BETWEEN 1196 AND 1196 + 11) y, (SELECT s_store_name s_store_name2, wss.d_week_seq d_week_seq2, s_store_id s_store_id2, sun_sales sun_sales2, mon_sales mon_sales2, tue_sales tue_sales2, wed_sales wed_sales2, thu_sales thu_sales2, fri_sales fri_sales2, sat_sales sat_sales2 FROM wss, store, date_dim d WHERE d.d_week_seq = wss.d_week_seq AND ss_store_sk = s_store_sk AND d_month_seq BETWEEN 1196 + 12 AND 1196 + 23) x WHERE s_store_id1 = s_store_id2 AND d_week_seq1 = d_week_seq2 - 52 ORDER BY s_store_name1, s_store_id1, d_week_seq1 LIMIT 100; WITH "wss" AS ( SELECT "date_dim"."d_week_seq" AS "d_week_seq", "store_sales"."ss_store_sk" AS "ss_store_sk", SUM( CASE WHEN "date_dim"."d_day_name" = 'Sunday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "sun_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Monday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "mon_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Tuesday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "tue_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Wednesday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "wed_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Thursday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "thu_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Friday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "fri_sales", SUM( CASE WHEN "date_dim"."d_day_name" = 'Saturday' THEN "store_sales"."ss_sales_price" ELSE NULL END ) AS "sat_sales" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "date_dim"."d_week_seq", "store_sales"."ss_store_sk" ), "x" AS ( SELECT "wss"."d_week_seq" AS "d_week_seq2", "store"."s_store_id" AS "s_store_id2", "wss"."sun_sales" AS "sun_sales2", "wss"."mon_sales" AS "mon_sales2", "wss"."tue_sales" AS "tue_sales2", "wss"."wed_sales" AS "wed_sales2", "wss"."thu_sales" AS "thu_sales2", "wss"."fri_sales" AS "fri_sales2", "wss"."sat_sales" AS "sat_sales2" FROM "wss" AS "wss" JOIN "date_dim" AS "d" ON "d"."d_month_seq" <= 1219 AND "d"."d_month_seq" >= 1208 AND "d"."d_week_seq" = "wss"."d_week_seq" JOIN "store" AS "store" ON "store"."s_store_sk" = "wss"."ss_store_sk" ) SELECT "store"."s_store_name" AS "s_store_name1", "store"."s_store_id" AS "s_store_id1", "wss"."d_week_seq" AS "d_week_seq1", "wss"."sun_sales" / "x"."sun_sales2" AS "_col_3", "wss"."mon_sales" / "x"."mon_sales2" AS "_col_4", "wss"."tue_sales" / "x"."tue_sales2" AS "_col_5", "wss"."wed_sales" / "x"."wed_sales2" AS "_col_6", "wss"."thu_sales" / "x"."thu_sales2" AS "_col_7", "wss"."fri_sales" / "x"."fri_sales2" AS "_col_8", "wss"."sat_sales" / "x"."sat_sales2" AS "_col_9" FROM "wss" AS "wss" JOIN "date_dim" AS "d" ON "d"."d_month_seq" <= 1207 AND "d"."d_month_seq" >= 1196 AND "d"."d_week_seq" = "wss"."d_week_seq" JOIN "store" AS "store" ON "store"."s_store_sk" = "wss"."ss_store_sk" JOIN "x" AS "x" ON "store"."s_store_id" = "x"."s_store_id2" AND "wss"."d_week_seq" = "x"."d_week_seq2" - 52 ORDER BY "s_store_name1", "s_store_id1", "d_week_seq1" LIMIT 100; -------------------------------------- -- TPC-DS 60 -------------------------------------- # execute: true WITH ss AS (SELECT i_item_id, Sum(ss_ext_sales_price) total_sales FROM store_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_category IN ( 'Jewelry' )) AND ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 8 AND ss_addr_sk = ca_address_sk AND ca_gmt_offset = -6 GROUP BY i_item_id), cs AS (SELECT i_item_id, Sum(cs_ext_sales_price) total_sales FROM catalog_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_category IN ( 'Jewelry' )) AND cs_item_sk = i_item_sk AND cs_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 8 AND cs_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -6 GROUP BY i_item_id), ws AS (SELECT i_item_id, Sum(ws_ext_sales_price) total_sales FROM web_sales, date_dim, customer_address, item WHERE i_item_id IN (SELECT i_item_id FROM item WHERE i_category IN ( 'Jewelry' )) AND ws_item_sk = i_item_sk AND ws_sold_date_sk = d_date_sk AND d_year = 1999 AND d_moy = 8 AND ws_bill_addr_sk = ca_address_sk AND ca_gmt_offset = -6 GROUP BY i_item_id) SELECT i_item_id, Sum(total_sales) total_sales FROM (SELECT * FROM ss UNION ALL SELECT * FROM cs UNION ALL SELECT * FROM ws) tmp1 GROUP BY i_item_id ORDER BY i_item_id, total_sales LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 8 AND "date_dim"."d_year" = 1999 ), "customer_address_2" AS ( SELECT "customer_address"."ca_address_sk" AS "ca_address_sk", "customer_address"."ca_gmt_offset" AS "ca_gmt_offset" FROM "customer_address" AS "customer_address" WHERE "customer_address"."ca_gmt_offset" = -6 ), "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_item_id" AS "i_item_id" FROM "item" AS "item" ), "_u_0" AS ( SELECT "item"."i_item_id" AS "i_item_id" FROM "item" AS "item" WHERE "item"."i_category" IN ('Jewelry') GROUP BY "item"."i_item_id" ), "ss" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("store_sales"."ss_ext_sales_price") AS "total_sales" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."i_item_id" = "item"."i_item_id" WHERE NOT "_u_0"."i_item_id" IS NULL GROUP BY "item"."i_item_id" ), "cs" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("catalog_sales"."cs_ext_sales_price") AS "total_sales" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "customer_address_2" AS "customer_address" ON "catalog_sales"."cs_bill_addr_sk" = "customer_address"."ca_address_sk" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" LEFT JOIN "_u_0" AS "_u_1" ON "_u_1"."i_item_id" = "item"."i_item_id" WHERE NOT "_u_1"."i_item_id" IS NULL GROUP BY "item"."i_item_id" ), "ws" AS ( SELECT "item"."i_item_id" AS "i_item_id", SUM("web_sales"."ws_ext_sales_price") AS "total_sales" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" JOIN "customer_address_2" AS "customer_address" ON "customer_address"."ca_address_sk" = "web_sales"."ws_bill_addr_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" LEFT JOIN "_u_0" AS "_u_2" ON "_u_2"."i_item_id" = "item"."i_item_id" WHERE NOT "_u_2"."i_item_id" IS NULL GROUP BY "item"."i_item_id" ), "tmp1" AS ( SELECT "ss"."i_item_id" AS "i_item_id", "ss"."total_sales" AS "total_sales" FROM "ss" AS "ss" UNION ALL SELECT "cs"."i_item_id" AS "i_item_id", "cs"."total_sales" AS "total_sales" FROM "cs" AS "cs" UNION ALL SELECT "ws"."i_item_id" AS "i_item_id", "ws"."total_sales" AS "total_sales" FROM "ws" AS "ws" ) SELECT "tmp1"."i_item_id" AS "i_item_id", SUM("tmp1"."total_sales") AS "total_sales" FROM "tmp1" AS "tmp1" GROUP BY "tmp1"."i_item_id" ORDER BY "i_item_id", "total_sales" LIMIT 100; -------------------------------------- -- TPC-DS 61 -------------------------------------- SELECT promotions, total, Cast(promotions AS DECIMAL(15, 4)) / Cast(total AS DECIMAL(15, 4)) * 100 FROM (SELECT Sum(ss_ext_sales_price) promotions FROM store_sales, store, promotion, date_dim, customer, customer_address, item WHERE ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND ss_promo_sk = p_promo_sk AND ss_customer_sk = c_customer_sk AND ca_address_sk = c_current_addr_sk AND ss_item_sk = i_item_sk AND ca_gmt_offset = -7 AND i_category = 'Books' AND ( p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y' ) AND s_gmt_offset = -7 AND d_year = 2001 AND d_moy = 12) promotional_sales, (SELECT Sum(ss_ext_sales_price) total FROM store_sales, store, date_dim, customer, customer_address, item WHERE ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND ss_customer_sk = c_customer_sk AND ca_address_sk = c_current_addr_sk AND ss_item_sk = i_item_sk AND ca_gmt_offset = -7 AND i_category = 'Books' AND s_gmt_offset = -7 AND d_year = 2001 AND d_moy = 12) all_sales ORDER BY promotions, total LIMIT 100; WITH "customer_2" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk", "customer"."c_current_addr_sk" AS "c_current_addr_sk" FROM "customer" AS "customer" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 12 AND "date_dim"."d_year" = 2001 ), "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_category" AS "i_category" FROM "item" AS "item" WHERE "item"."i_category" = 'Books' ), "store_2" AS ( SELECT "store"."s_store_sk" AS "s_store_sk", "store"."s_gmt_offset" AS "s_gmt_offset" FROM "store" AS "store" WHERE "store"."s_gmt_offset" = -7 ), "customer_address_2" AS ( SELECT "customer_address"."ca_address_sk" AS "ca_address_sk", "customer_address"."ca_gmt_offset" AS "ca_gmt_offset" FROM "customer_address" AS "customer_address" WHERE "customer_address"."ca_gmt_offset" = -7 ), "promotional_sales" AS ( SELECT SUM("store_sales"."ss_ext_sales_price") AS "promotions" FROM "store_sales" AS "store_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "promotion" AS "promotion" ON ( "promotion"."p_channel_dmail" = 'Y' OR "promotion"."p_channel_email" = 'Y' OR "promotion"."p_channel_tv" = 'Y' ) AND "promotion"."p_promo_sk" = "store_sales"."ss_promo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "customer_address_2" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" ), "all_sales" AS ( SELECT SUM("store_sales"."ss_ext_sales_price") AS "total" FROM "store_sales" AS "store_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "customer_address_2" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" ) SELECT "promotional_sales"."promotions" AS "promotions", "all_sales"."total" AS "total", CAST("promotional_sales"."promotions" AS DECIMAL(15, 4)) / CAST("all_sales"."total" AS DECIMAL(15, 4)) * 100 AS "_col_2" FROM "promotional_sales" AS "promotional_sales" CROSS JOIN "all_sales" AS "all_sales" ORDER BY "promotions", "total" LIMIT 100; -------------------------------------- -- TPC-DS 62 -------------------------------------- # execute: true SELECT SUBSTRING(w_warehouse_name, 1, 20) AS "_col_0", sm_type, web_name, Sum(CASE WHEN ( ws_ship_date_sk - ws_sold_date_sk <= 30 ) THEN 1 ELSE 0 END) AS "30 days", Sum(CASE WHEN ( ws_ship_date_sk - ws_sold_date_sk > 30 ) AND ( ws_ship_date_sk - ws_sold_date_sk <= 60 ) THEN 1 ELSE 0 END) AS "31-60 days", Sum(CASE WHEN ( ws_ship_date_sk - ws_sold_date_sk > 60 ) AND ( ws_ship_date_sk - ws_sold_date_sk <= 90 ) THEN 1 ELSE 0 END) AS "61-90 days", Sum(CASE WHEN ( ws_ship_date_sk - ws_sold_date_sk > 90 ) AND ( ws_ship_date_sk - ws_sold_date_sk <= 120 ) THEN 1 ELSE 0 END) AS "91-120 days", Sum(CASE WHEN ( ws_ship_date_sk - ws_sold_date_sk > 120 ) THEN 1 ELSE 0 END) AS ">120 days" FROM web_sales, warehouse, ship_mode, web_site, date_dim WHERE d_month_seq BETWEEN 1222 AND 1222 + 11 AND ws_ship_date_sk = d_date_sk AND ws_warehouse_sk = w_warehouse_sk AND ws_ship_mode_sk = sm_ship_mode_sk AND ws_web_site_sk = web_site_sk GROUP BY SUBSTRING(w_warehouse_name, 1, 20), sm_type, web_name ORDER BY SUBSTRING(w_warehouse_name, 1, 20), sm_type, web_name LIMIT 100; SELECT SUBSTRING("warehouse"."w_warehouse_name", 1, 20) AS "_col_0", "ship_mode"."sm_type" AS "sm_type", "web_site"."web_name" AS "web_name", SUM( CASE WHEN "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" <= 30 THEN 1 ELSE 0 END ) AS "30 days", SUM( CASE WHEN "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" <= 60 AND "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" > 30 THEN 1 ELSE 0 END ) AS "31-60 days", SUM( CASE WHEN "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" <= 90 AND "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" > 60 THEN 1 ELSE 0 END ) AS "61-90 days", SUM( CASE WHEN "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" <= 120 AND "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" > 90 THEN 1 ELSE 0 END ) AS "91-120 days", SUM( CASE WHEN "web_sales"."ws_ship_date_sk" - "web_sales"."ws_sold_date_sk" > 120 THEN 1 ELSE 0 END ) AS ">120 days" FROM "web_sales" AS "web_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_ship_date_sk" AND "date_dim"."d_month_seq" <= 1233 AND "date_dim"."d_month_seq" >= 1222 JOIN "ship_mode" AS "ship_mode" ON "ship_mode"."sm_ship_mode_sk" = "web_sales"."ws_ship_mode_sk" JOIN "warehouse" AS "warehouse" ON "warehouse"."w_warehouse_sk" = "web_sales"."ws_warehouse_sk" JOIN "web_site" AS "web_site" ON "web_sales"."ws_web_site_sk" = "web_site"."web_site_sk" GROUP BY SUBSTRING("warehouse"."w_warehouse_name", 1, 20), "ship_mode"."sm_type", "web_site"."web_name" ORDER BY "_col_0", "sm_type", "web_name" LIMIT 100; -------------------------------------- -- TPC-DS 63 -------------------------------------- SELECT * FROM (SELECT i_manager_id, Sum(ss_sales_price) sum_sales, Avg(Sum(ss_sales_price)) OVER ( partition BY i_manager_id) avg_monthly_sales FROM item, store_sales, date_dim, store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND d_month_seq IN ( 1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11 ) AND ( ( i_category IN ( 'Books', 'Children', 'Electronics' ) AND i_class IN ( 'personal', 'portable', 'reference', 'self-help' ) AND i_brand IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7' , 'exportiunivamalg #9', 'scholaramalgamalg #9' ) ) OR ( i_category IN ( 'Women', 'Music', 'Men' ) AND i_class IN ( 'accessories', 'classical', 'fragrances', 'pants' ) AND i_brand IN ( 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1' ) ) ) GROUP BY i_manager_id, d_moy) tmp1 WHERE CASE WHEN avg_monthly_sales > 0 THEN Abs (sum_sales - avg_monthly_sales) / avg_monthly_sales ELSE NULL END > 0.1 ORDER BY i_manager_id, avg_monthly_sales, sum_sales LIMIT 100; WITH "tmp1" AS ( SELECT "item"."i_manager_id" AS "i_manager_id", SUM("store_sales"."ss_sales_price") AS "sum_sales", AVG(SUM("store_sales"."ss_sales_price")) OVER (PARTITION BY "item"."i_manager_id") AS "avg_monthly_sales" FROM "item" AS "item" JOIN "store_sales" AS "store_sales" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_month_seq" IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211) JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" WHERE ( "item"."i_brand" IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') OR "item"."i_brand" IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9' ) ) AND ( "item"."i_brand" IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') OR "item"."i_category" IN ('Books', 'Children', 'Electronics') ) AND ( "item"."i_brand" IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') OR "item"."i_class" IN ('personal', 'portable', 'reference', 'self-help') ) AND ( "item"."i_brand" IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9' ) OR "item"."i_category" IN ('Women', 'Music', 'Men') ) AND ( "item"."i_brand" IN ( 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9' ) OR "item"."i_class" IN ('accessories', 'classical', 'fragrances', 'pants') ) AND ( "item"."i_category" IN ('Books', 'Children', 'Electronics') OR "item"."i_category" IN ('Women', 'Music', 'Men') ) AND ( "item"."i_category" IN ('Books', 'Children', 'Electronics') OR "item"."i_class" IN ('accessories', 'classical', 'fragrances', 'pants') ) AND ( "item"."i_category" IN ('Women', 'Music', 'Men') OR "item"."i_class" IN ('personal', 'portable', 'reference', 'self-help') ) AND ( "item"."i_class" IN ('accessories', 'classical', 'fragrances', 'pants') OR "item"."i_class" IN ('personal', 'portable', 'reference', 'self-help') ) GROUP BY "item"."i_manager_id", "date_dim"."d_moy" ) SELECT "tmp1"."i_manager_id" AS "i_manager_id", "tmp1"."sum_sales" AS "sum_sales", "tmp1"."avg_monthly_sales" AS "avg_monthly_sales" FROM "tmp1" AS "tmp1" WHERE CASE WHEN "tmp1"."avg_monthly_sales" > 0 THEN ABS("tmp1"."sum_sales" - "tmp1"."avg_monthly_sales") / "tmp1"."avg_monthly_sales" ELSE NULL END > 0.1 ORDER BY "tmp1"."i_manager_id", "tmp1"."avg_monthly_sales", "tmp1"."sum_sales" LIMIT 100; -------------------------------------- -- TPC-DS 64 -------------------------------------- WITH cs_ui AS (SELECT cs_item_sk, Sum(cs_ext_list_price) AS sale, Sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund FROM catalog_sales, catalog_returns WHERE cs_item_sk = cr_item_sk AND cs_order_number = cr_order_number GROUP BY cs_item_sk HAVING Sum(cs_ext_list_price) > 2 * Sum( cr_refunded_cash + cr_reversed_charge + cr_store_credit)), cross_sales AS (SELECT i_product_name product_name, i_item_sk item_sk, s_store_name store_name, s_zip store_zip, ad1.ca_street_number b_street_number, ad1.ca_street_name b_streen_name, ad1.ca_city b_city, ad1.ca_zip b_zip, ad2.ca_street_number c_street_number, ad2.ca_street_name c_street_name, ad2.ca_city c_city, ad2.ca_zip c_zip, d1.d_year AS syear, d2.d_year AS fsyear, d3.d_year s2year, Count(*) cnt, Sum(ss_wholesale_cost) s1, Sum(ss_list_price) s2, Sum(ss_coupon_amt) s3 FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, store, customer, customer_demographics cd1, customer_demographics cd2, promotion, household_demographics hd1, household_demographics hd2, customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item WHERE ss_store_sk = s_store_sk AND ss_sold_date_sk = d1.d_date_sk AND ss_customer_sk = c_customer_sk AND ss_cdemo_sk = cd1.cd_demo_sk AND ss_hdemo_sk = hd1.hd_demo_sk AND ss_addr_sk = ad1.ca_address_sk AND ss_item_sk = i_item_sk AND ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number AND ss_item_sk = cs_ui.cs_item_sk AND c_current_cdemo_sk = cd2.cd_demo_sk AND c_current_hdemo_sk = hd2.hd_demo_sk AND c_current_addr_sk = ad2.ca_address_sk AND c_first_sales_date_sk = d2.d_date_sk AND c_first_shipto_date_sk = d3.d_date_sk AND ss_promo_sk = p_promo_sk AND hd1.hd_income_band_sk = ib1.ib_income_band_sk AND hd2.hd_income_band_sk = ib2.ib_income_band_sk AND cd1.cd_marital_status <> cd2.cd_marital_status AND i_color IN ( 'cyan', 'peach', 'blush', 'frosted', 'powder', 'orange' ) AND i_current_price BETWEEN 58 AND 58 + 10 AND i_current_price BETWEEN 58 + 1 AND 58 + 15 GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year) SELECT cs1.product_name, cs1.store_name, cs1.store_zip, cs1.b_street_number, cs1.b_streen_name, cs1.b_city, cs1.b_zip, cs1.c_street_number, cs1.c_street_name, cs1.c_city, cs1.c_zip, cs1.syear, cs1.cnt, cs1.s1, cs1.s2, cs1.s3, cs2.s1, cs2.s2, cs2.s3, cs2.syear, cs2.cnt FROM cross_sales cs1, cross_sales cs2 WHERE cs1.item_sk = cs2.item_sk AND cs1.syear = 2001 AND cs2.syear = 2001 + 1 AND cs2.cnt <= cs1.cnt AND cs1.store_name = cs2.store_name AND cs1.store_zip = cs2.store_zip ORDER BY cs1.product_name, cs1.store_name, cs2.cnt; WITH "cs_ui" AS ( SELECT "catalog_sales"."cs_item_sk" AS "cs_item_sk" FROM "catalog_sales" AS "catalog_sales" JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_item_sk" = "catalog_sales"."cs_item_sk" AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number" GROUP BY "catalog_sales"."cs_item_sk" HAVING 2 * SUM( "catalog_returns"."cr_refunded_cash" + "catalog_returns"."cr_reversed_charge" + "catalog_returns"."cr_store_credit" ) < SUM("catalog_sales"."cs_ext_list_price") ), "cross_sales" AS ( SELECT "item"."i_product_name" AS "product_name", "item"."i_item_sk" AS "item_sk", "store"."s_store_name" AS "store_name", "store"."s_zip" AS "store_zip", "ad1"."ca_street_number" AS "b_street_number", "ad1"."ca_street_name" AS "b_streen_name", "ad1"."ca_city" AS "b_city", "ad1"."ca_zip" AS "b_zip", "ad2"."ca_street_number" AS "c_street_number", "ad2"."ca_street_name" AS "c_street_name", "ad2"."ca_city" AS "c_city", "ad2"."ca_zip" AS "c_zip", "d1"."d_year" AS "syear", COUNT(*) AS "cnt", SUM("store_sales"."ss_wholesale_cost") AS "s1", SUM("store_sales"."ss_list_price") AS "s2", SUM("store_sales"."ss_coupon_amt") AS "s3" FROM "store_sales" AS "store_sales" JOIN "customer_address" AS "ad1" ON "ad1"."ca_address_sk" = "store_sales"."ss_addr_sk" JOIN "customer_demographics" AS "cd1" ON "cd1"."cd_demo_sk" = "store_sales"."ss_cdemo_sk" JOIN "cs_ui" AS "cs_ui" ON "cs_ui"."cs_item_sk" = "store_sales"."ss_item_sk" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "household_demographics" AS "hd1" ON "hd1"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "item" AS "item" ON "item"."i_color" IN ('cyan', 'peach', 'blush', 'frosted', 'powder', 'orange') AND "item"."i_current_price" <= 68 AND "item"."i_current_price" >= 59 AND "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "promotion" AS "promotion" ON "promotion"."p_promo_sk" = "store_sales"."ss_promo_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "customer_address" AS "ad2" ON "ad2"."ca_address_sk" = "customer"."c_current_addr_sk" JOIN "customer_demographics" AS "cd2" ON "cd1"."cd_marital_status" <> "cd2"."cd_marital_status" AND "cd2"."cd_demo_sk" = "customer"."c_current_cdemo_sk" JOIN "date_dim" AS "d2" ON "customer"."c_first_sales_date_sk" = "d2"."d_date_sk" JOIN "date_dim" AS "d3" ON "customer"."c_first_shipto_date_sk" = "d3"."d_date_sk" JOIN "household_demographics" AS "hd2" ON "customer"."c_current_hdemo_sk" = "hd2"."hd_demo_sk" JOIN "income_band" AS "ib1" ON "hd1"."hd_income_band_sk" = "ib1"."ib_income_band_sk" JOIN "income_band" AS "ib2" ON "hd2"."hd_income_band_sk" = "ib2"."ib_income_band_sk" GROUP BY "item"."i_product_name", "item"."i_item_sk", "store"."s_store_name", "store"."s_zip", "ad1"."ca_street_number", "ad1"."ca_street_name", "ad1"."ca_city", "ad1"."ca_zip", "ad2"."ca_street_number", "ad2"."ca_street_name", "ad2"."ca_city", "ad2"."ca_zip", "d1"."d_year", "d2"."d_year", "d3"."d_year" ) SELECT "cs1"."product_name" AS "product_name", "cs1"."store_name" AS "store_name", "cs1"."store_zip" AS "store_zip", "cs1"."b_street_number" AS "b_street_number", "cs1"."b_streen_name" AS "b_streen_name", "cs1"."b_city" AS "b_city", "cs1"."b_zip" AS "b_zip", "cs1"."c_street_number" AS "c_street_number", "cs1"."c_street_name" AS "c_street_name", "cs1"."c_city" AS "c_city", "cs1"."c_zip" AS "c_zip", "cs1"."syear" AS "syear", "cs1"."cnt" AS "cnt", "cs1"."s1" AS "s1", "cs1"."s2" AS "s2", "cs1"."s3" AS "s3", "cs2"."s1" AS "s1", "cs2"."s2" AS "s2", "cs2"."s3" AS "s3", "cs2"."syear" AS "syear", "cs2"."cnt" AS "cnt" FROM "cross_sales" AS "cs1" JOIN "cross_sales" AS "cs2" ON "cs1"."cnt" >= "cs2"."cnt" AND "cs1"."item_sk" = "cs2"."item_sk" AND "cs1"."store_name" = "cs2"."store_name" AND "cs1"."store_zip" = "cs2"."store_zip" AND "cs2"."syear" = 2002 WHERE "cs1"."syear" = 2001 ORDER BY "cs1"."product_name", "cs1"."store_name", "cs2"."cnt"; -------------------------------------- -- TPC-DS 65 -------------------------------------- # execute: true SELECT s_store_name, i_item_desc, sc.revenue, i_current_price, i_wholesale_cost, i_brand FROM store, item, (SELECT ss_store_sk, Avg(revenue) AS ave FROM (SELECT ss_store_sk, ss_item_sk, Sum(ss_sales_price) AS revenue FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1199 AND 1199 + 11 GROUP BY ss_store_sk, ss_item_sk) sa GROUP BY ss_store_sk) sb, (SELECT ss_store_sk, ss_item_sk, Sum(ss_sales_price) AS revenue FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1199 AND 1199 + 11 GROUP BY ss_store_sk, ss_item_sk) sc WHERE sb.ss_store_sk = sc.ss_store_sk AND sc.revenue <= 0.1 * sb.ave AND s_store_sk = sc.ss_store_sk AND i_item_sk = sc.ss_item_sk ORDER BY s_store_name, i_item_desc LIMIT 100; WITH "store_sales_2" AS ( SELECT "store_sales"."ss_sold_date_sk" AS "ss_sold_date_sk", "store_sales"."ss_item_sk" AS "ss_item_sk", "store_sales"."ss_store_sk" AS "ss_store_sk", "store_sales"."ss_sales_price" AS "ss_sales_price" FROM "store_sales" AS "store_sales" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_month_seq" AS "d_month_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_month_seq" <= 1210 AND "date_dim"."d_month_seq" >= 1199 ), "sc" AS ( SELECT "store_sales"."ss_store_sk" AS "ss_store_sk", "store_sales"."ss_item_sk" AS "ss_item_sk", SUM("store_sales"."ss_sales_price") AS "revenue" FROM "store_sales_2" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "store_sales"."ss_store_sk", "store_sales"."ss_item_sk" ), "sa" AS ( SELECT "store_sales"."ss_store_sk" AS "ss_store_sk", SUM("store_sales"."ss_sales_price") AS "revenue" FROM "store_sales_2" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "store_sales"."ss_store_sk", "store_sales"."ss_item_sk" ), "sb" AS ( SELECT "sa"."ss_store_sk" AS "ss_store_sk", AVG("sa"."revenue") AS "ave" FROM "sa" AS "sa" GROUP BY "sa"."ss_store_sk" ) SELECT "store"."s_store_name" AS "s_store_name", "item"."i_item_desc" AS "i_item_desc", "sc"."revenue" AS "revenue", "item"."i_current_price" AS "i_current_price", "item"."i_wholesale_cost" AS "i_wholesale_cost", "item"."i_brand" AS "i_brand" FROM "store" AS "store" JOIN "sc" AS "sc" ON "sc"."ss_store_sk" = "store"."s_store_sk" JOIN "item" AS "item" ON "item"."i_item_sk" = "sc"."ss_item_sk" JOIN "sb" AS "sb" ON "sb"."ss_store_sk" = "sc"."ss_store_sk" AND "sc"."revenue" <= 0.1 * "sb"."ave" ORDER BY "s_store_name", "i_item_desc" LIMIT 100; -------------------------------------- -- TPC-DS 66 -------------------------------------- # execute: true SELECT w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, ship_carriers, year1, Sum(jan_sales) AS jan_sales, Sum(feb_sales) AS feb_sales, Sum(mar_sales) AS mar_sales, Sum(apr_sales) AS apr_sales, Sum(may_sales) AS may_sales, Sum(jun_sales) AS jun_sales, Sum(jul_sales) AS jul_sales, Sum(aug_sales) AS aug_sales, Sum(sep_sales) AS sep_sales, Sum(oct_sales) AS oct_sales, Sum(nov_sales) AS nov_sales, Sum(dec_sales) AS dec_sales, Sum(jan_sales / w_warehouse_sq_ft) AS jan_sales_per_sq_foot, Sum(feb_sales / w_warehouse_sq_ft) AS feb_sales_per_sq_foot, Sum(mar_sales / w_warehouse_sq_ft) AS mar_sales_per_sq_foot, Sum(apr_sales / w_warehouse_sq_ft) AS apr_sales_per_sq_foot, Sum(may_sales / w_warehouse_sq_ft) AS may_sales_per_sq_foot, Sum(jun_sales / w_warehouse_sq_ft) AS jun_sales_per_sq_foot, Sum(jul_sales / w_warehouse_sq_ft) AS jul_sales_per_sq_foot, Sum(aug_sales / w_warehouse_sq_ft) AS aug_sales_per_sq_foot, Sum(sep_sales / w_warehouse_sq_ft) AS sep_sales_per_sq_foot, Sum(oct_sales / w_warehouse_sq_ft) AS oct_sales_per_sq_foot, Sum(nov_sales / w_warehouse_sq_ft) AS nov_sales_per_sq_foot, Sum(dec_sales / w_warehouse_sq_ft) AS dec_sales_per_sq_foot, Sum(jan_net) AS jan_net, Sum(feb_net) AS feb_net, Sum(mar_net) AS mar_net, Sum(apr_net) AS apr_net, Sum(may_net) AS may_net, Sum(jun_net) AS jun_net, Sum(jul_net) AS jul_net, Sum(aug_net) AS aug_net, Sum(sep_net) AS sep_net, Sum(oct_net) AS oct_net, Sum(nov_net) AS nov_net, Sum(dec_net) AS dec_net FROM (SELECT w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, 'ZOUROS' || ',' || 'ZHOU' AS ship_carriers, d_year AS year1, Sum(CASE WHEN d_moy = 1 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS jan_sales, Sum(CASE WHEN d_moy = 2 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS feb_sales, Sum(CASE WHEN d_moy = 3 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS mar_sales, Sum(CASE WHEN d_moy = 4 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS apr_sales, Sum(CASE WHEN d_moy = 5 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS may_sales, Sum(CASE WHEN d_moy = 6 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS jun_sales, Sum(CASE WHEN d_moy = 7 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS jul_sales, Sum(CASE WHEN d_moy = 8 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS aug_sales, Sum(CASE WHEN d_moy = 9 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS sep_sales, Sum(CASE WHEN d_moy = 10 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS oct_sales, Sum(CASE WHEN d_moy = 11 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS nov_sales, Sum(CASE WHEN d_moy = 12 THEN ws_ext_sales_price * ws_quantity ELSE 0 END) AS dec_sales, Sum(CASE WHEN d_moy = 1 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS jan_net, Sum(CASE WHEN d_moy = 2 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS feb_net, Sum(CASE WHEN d_moy = 3 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS mar_net, Sum(CASE WHEN d_moy = 4 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS apr_net, Sum(CASE WHEN d_moy = 5 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS may_net, Sum(CASE WHEN d_moy = 6 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS jun_net, Sum(CASE WHEN d_moy = 7 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS jul_net, Sum(CASE WHEN d_moy = 8 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS aug_net, Sum(CASE WHEN d_moy = 9 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS sep_net, Sum(CASE WHEN d_moy = 10 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS oct_net, Sum(CASE WHEN d_moy = 11 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS nov_net, Sum(CASE WHEN d_moy = 12 THEN ws_net_paid_inc_ship * ws_quantity ELSE 0 END) AS dec_net FROM web_sales, warehouse, date_dim, time_dim, ship_mode WHERE ws_warehouse_sk = w_warehouse_sk AND ws_sold_date_sk = d_date_sk AND ws_sold_time_sk = t_time_sk AND ws_ship_mode_sk = sm_ship_mode_sk AND d_year = 1998 AND t_time BETWEEN 7249 AND 7249 + 28800 AND sm_carrier IN ( 'ZOUROS', 'ZHOU' ) GROUP BY w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year UNION ALL SELECT w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, 'ZOUROS' || ',' || 'ZHOU' AS ship_carriers, d_year AS year1, Sum(CASE WHEN d_moy = 1 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS jan_sales, Sum(CASE WHEN d_moy = 2 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS feb_sales, Sum(CASE WHEN d_moy = 3 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS mar_sales, Sum(CASE WHEN d_moy = 4 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS apr_sales, Sum(CASE WHEN d_moy = 5 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS may_sales, Sum(CASE WHEN d_moy = 6 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS jun_sales, Sum(CASE WHEN d_moy = 7 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS jul_sales, Sum(CASE WHEN d_moy = 8 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS aug_sales, Sum(CASE WHEN d_moy = 9 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS sep_sales, Sum(CASE WHEN d_moy = 10 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS oct_sales, Sum(CASE WHEN d_moy = 11 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS nov_sales, Sum(CASE WHEN d_moy = 12 THEN cs_ext_sales_price * cs_quantity ELSE 0 END) AS dec_sales, Sum(CASE WHEN d_moy = 1 THEN cs_net_paid * cs_quantity ELSE 0 END) AS jan_net, Sum(CASE WHEN d_moy = 2 THEN cs_net_paid * cs_quantity ELSE 0 END) AS feb_net, Sum(CASE WHEN d_moy = 3 THEN cs_net_paid * cs_quantity ELSE 0 END) AS mar_net, Sum(CASE WHEN d_moy = 4 THEN cs_net_paid * cs_quantity ELSE 0 END) AS apr_net, Sum(CASE WHEN d_moy = 5 THEN cs_net_paid * cs_quantity ELSE 0 END) AS may_net, Sum(CASE WHEN d_moy = 6 THEN cs_net_paid * cs_quantity ELSE 0 END) AS jun_net, Sum(CASE WHEN d_moy = 7 THEN cs_net_paid * cs_quantity ELSE 0 END) AS jul_net, Sum(CASE WHEN d_moy = 8 THEN cs_net_paid * cs_quantity ELSE 0 END) AS aug_net, Sum(CASE WHEN d_moy = 9 THEN cs_net_paid * cs_quantity ELSE 0 END) AS sep_net, Sum(CASE WHEN d_moy = 10 THEN cs_net_paid * cs_quantity ELSE 0 END) AS oct_net, Sum(CASE WHEN d_moy = 11 THEN cs_net_paid * cs_quantity ELSE 0 END) AS nov_net, Sum(CASE WHEN d_moy = 12 THEN cs_net_paid * cs_quantity ELSE 0 END) AS dec_net FROM catalog_sales, warehouse, date_dim, time_dim, ship_mode WHERE cs_warehouse_sk = w_warehouse_sk AND cs_sold_date_sk = d_date_sk AND cs_sold_time_sk = t_time_sk AND cs_ship_mode_sk = sm_ship_mode_sk AND d_year = 1998 AND t_time BETWEEN 7249 AND 7249 + 28800 AND sm_carrier IN ( 'ZOUROS', 'ZHOU' ) GROUP BY w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year) x GROUP BY w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, ship_carriers, year1 ORDER BY w_warehouse_name LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_year" = 1998 ), "ship_mode_2" AS ( SELECT "ship_mode"."sm_ship_mode_sk" AS "sm_ship_mode_sk", "ship_mode"."sm_carrier" AS "sm_carrier" FROM "ship_mode" AS "ship_mode" WHERE "ship_mode"."sm_carrier" IN ('ZOUROS', 'ZHOU') ), "time_dim_2" AS ( SELECT "time_dim"."t_time_sk" AS "t_time_sk", "time_dim"."t_time" AS "t_time" FROM "time_dim" AS "time_dim" WHERE "time_dim"."t_time" <= 36049 AND "time_dim"."t_time" >= 7249 ), "warehouse_2" AS ( SELECT "warehouse"."w_warehouse_sk" AS "w_warehouse_sk", "warehouse"."w_warehouse_name" AS "w_warehouse_name", "warehouse"."w_warehouse_sq_ft" AS "w_warehouse_sq_ft", "warehouse"."w_city" AS "w_city", "warehouse"."w_county" AS "w_county", "warehouse"."w_state" AS "w_state", "warehouse"."w_country" AS "w_country" FROM "warehouse" AS "warehouse" ), "x" AS ( SELECT "warehouse"."w_warehouse_name" AS "w_warehouse_name", "warehouse"."w_warehouse_sq_ft" AS "w_warehouse_sq_ft", "warehouse"."w_city" AS "w_city", "warehouse"."w_county" AS "w_county", "warehouse"."w_state" AS "w_state", "warehouse"."w_country" AS "w_country", 'ZOUROS,ZHOU' AS "ship_carriers", "date_dim"."d_year" AS "year1", SUM( CASE WHEN "date_dim"."d_moy" = 1 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "jan_sales", SUM( CASE WHEN "date_dim"."d_moy" = 2 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "feb_sales", SUM( CASE WHEN "date_dim"."d_moy" = 3 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "mar_sales", SUM( CASE WHEN "date_dim"."d_moy" = 4 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "apr_sales", SUM( CASE WHEN "date_dim"."d_moy" = 5 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "may_sales", SUM( CASE WHEN "date_dim"."d_moy" = 6 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "jun_sales", SUM( CASE WHEN "date_dim"."d_moy" = 7 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "jul_sales", SUM( CASE WHEN "date_dim"."d_moy" = 8 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "aug_sales", SUM( CASE WHEN "date_dim"."d_moy" = 9 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "sep_sales", SUM( CASE WHEN "date_dim"."d_moy" = 10 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "oct_sales", SUM( CASE WHEN "date_dim"."d_moy" = 11 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "nov_sales", SUM( CASE WHEN "date_dim"."d_moy" = 12 THEN "web_sales"."ws_ext_sales_price" * "web_sales"."ws_quantity" ELSE 0 END ) AS "dec_sales", SUM( CASE WHEN "date_dim"."d_moy" = 1 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "jan_net", SUM( CASE WHEN "date_dim"."d_moy" = 2 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "feb_net", SUM( CASE WHEN "date_dim"."d_moy" = 3 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "mar_net", SUM( CASE WHEN "date_dim"."d_moy" = 4 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "apr_net", SUM( CASE WHEN "date_dim"."d_moy" = 5 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "may_net", SUM( CASE WHEN "date_dim"."d_moy" = 6 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "jun_net", SUM( CASE WHEN "date_dim"."d_moy" = 7 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "jul_net", SUM( CASE WHEN "date_dim"."d_moy" = 8 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "aug_net", SUM( CASE WHEN "date_dim"."d_moy" = 9 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "sep_net", SUM( CASE WHEN "date_dim"."d_moy" = 10 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "oct_net", SUM( CASE WHEN "date_dim"."d_moy" = 11 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "nov_net", SUM( CASE WHEN "date_dim"."d_moy" = 12 THEN "web_sales"."ws_net_paid_inc_ship" * "web_sales"."ws_quantity" ELSE 0 END ) AS "dec_net" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" JOIN "ship_mode_2" AS "ship_mode" ON "ship_mode"."sm_ship_mode_sk" = "web_sales"."ws_ship_mode_sk" JOIN "time_dim_2" AS "time_dim" ON "time_dim"."t_time_sk" = "web_sales"."ws_sold_time_sk" JOIN "warehouse_2" AS "warehouse" ON "warehouse"."w_warehouse_sk" = "web_sales"."ws_warehouse_sk" GROUP BY "warehouse"."w_warehouse_name", "warehouse"."w_warehouse_sq_ft", "warehouse"."w_city", "warehouse"."w_county", "warehouse"."w_state", "warehouse"."w_country", "date_dim"."d_year" UNION ALL SELECT "warehouse"."w_warehouse_name" AS "w_warehouse_name", "warehouse"."w_warehouse_sq_ft" AS "w_warehouse_sq_ft", "warehouse"."w_city" AS "w_city", "warehouse"."w_county" AS "w_county", "warehouse"."w_state" AS "w_state", "warehouse"."w_country" AS "w_country", 'ZOUROS,ZHOU' AS "ship_carriers", "date_dim"."d_year" AS "year1", SUM( CASE WHEN "date_dim"."d_moy" = 1 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "jan_sales", SUM( CASE WHEN "date_dim"."d_moy" = 2 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "feb_sales", SUM( CASE WHEN "date_dim"."d_moy" = 3 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "mar_sales", SUM( CASE WHEN "date_dim"."d_moy" = 4 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "apr_sales", SUM( CASE WHEN "date_dim"."d_moy" = 5 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "may_sales", SUM( CASE WHEN "date_dim"."d_moy" = 6 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "jun_sales", SUM( CASE WHEN "date_dim"."d_moy" = 7 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "jul_sales", SUM( CASE WHEN "date_dim"."d_moy" = 8 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "aug_sales", SUM( CASE WHEN "date_dim"."d_moy" = 9 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "sep_sales", SUM( CASE WHEN "date_dim"."d_moy" = 10 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "oct_sales", SUM( CASE WHEN "date_dim"."d_moy" = 11 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "nov_sales", SUM( CASE WHEN "date_dim"."d_moy" = 12 THEN "catalog_sales"."cs_ext_sales_price" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "dec_sales", SUM( CASE WHEN "date_dim"."d_moy" = 1 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "jan_net", SUM( CASE WHEN "date_dim"."d_moy" = 2 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "feb_net", SUM( CASE WHEN "date_dim"."d_moy" = 3 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "mar_net", SUM( CASE WHEN "date_dim"."d_moy" = 4 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "apr_net", SUM( CASE WHEN "date_dim"."d_moy" = 5 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "may_net", SUM( CASE WHEN "date_dim"."d_moy" = 6 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "jun_net", SUM( CASE WHEN "date_dim"."d_moy" = 7 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "jul_net", SUM( CASE WHEN "date_dim"."d_moy" = 8 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "aug_net", SUM( CASE WHEN "date_dim"."d_moy" = 9 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "sep_net", SUM( CASE WHEN "date_dim"."d_moy" = 10 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "oct_net", SUM( CASE WHEN "date_dim"."d_moy" = 11 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "nov_net", SUM( CASE WHEN "date_dim"."d_moy" = 12 THEN "catalog_sales"."cs_net_paid" * "catalog_sales"."cs_quantity" ELSE 0 END ) AS "dec_net" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "ship_mode_2" AS "ship_mode" ON "catalog_sales"."cs_ship_mode_sk" = "ship_mode"."sm_ship_mode_sk" JOIN "time_dim_2" AS "time_dim" ON "catalog_sales"."cs_sold_time_sk" = "time_dim"."t_time_sk" JOIN "warehouse_2" AS "warehouse" ON "catalog_sales"."cs_warehouse_sk" = "warehouse"."w_warehouse_sk" GROUP BY "warehouse"."w_warehouse_name", "warehouse"."w_warehouse_sq_ft", "warehouse"."w_city", "warehouse"."w_county", "warehouse"."w_state", "warehouse"."w_country", "date_dim"."d_year" ) SELECT "x"."w_warehouse_name" AS "w_warehouse_name", "x"."w_warehouse_sq_ft" AS "w_warehouse_sq_ft", "x"."w_city" AS "w_city", "x"."w_county" AS "w_county", "x"."w_state" AS "w_state", "x"."w_country" AS "w_country", "x"."ship_carriers" AS "ship_carriers", "x"."year1" AS "year1", SUM("x"."jan_sales") AS "jan_sales", SUM("x"."feb_sales") AS "feb_sales", SUM("x"."mar_sales") AS "mar_sales", SUM("x"."apr_sales") AS "apr_sales", SUM("x"."may_sales") AS "may_sales", SUM("x"."jun_sales") AS "jun_sales", SUM("x"."jul_sales") AS "jul_sales", SUM("x"."aug_sales") AS "aug_sales", SUM("x"."sep_sales") AS "sep_sales", SUM("x"."oct_sales") AS "oct_sales", SUM("x"."nov_sales") AS "nov_sales", SUM("x"."dec_sales") AS "dec_sales", SUM("x"."jan_sales" / "x"."w_warehouse_sq_ft") AS "jan_sales_per_sq_foot", SUM("x"."feb_sales" / "x"."w_warehouse_sq_ft") AS "feb_sales_per_sq_foot", SUM("x"."mar_sales" / "x"."w_warehouse_sq_ft") AS "mar_sales_per_sq_foot", SUM("x"."apr_sales" / "x"."w_warehouse_sq_ft") AS "apr_sales_per_sq_foot", SUM("x"."may_sales" / "x"."w_warehouse_sq_ft") AS "may_sales_per_sq_foot", SUM("x"."jun_sales" / "x"."w_warehouse_sq_ft") AS "jun_sales_per_sq_foot", SUM("x"."jul_sales" / "x"."w_warehouse_sq_ft") AS "jul_sales_per_sq_foot", SUM("x"."aug_sales" / "x"."w_warehouse_sq_ft") AS "aug_sales_per_sq_foot", SUM("x"."sep_sales" / "x"."w_warehouse_sq_ft") AS "sep_sales_per_sq_foot", SUM("x"."oct_sales" / "x"."w_warehouse_sq_ft") AS "oct_sales_per_sq_foot", SUM("x"."nov_sales" / "x"."w_warehouse_sq_ft") AS "nov_sales_per_sq_foot", SUM("x"."dec_sales" / "x"."w_warehouse_sq_ft") AS "dec_sales_per_sq_foot", SUM("x"."jan_net") AS "jan_net", SUM("x"."feb_net") AS "feb_net", SUM("x"."mar_net") AS "mar_net", SUM("x"."apr_net") AS "apr_net", SUM("x"."may_net") AS "may_net", SUM("x"."jun_net") AS "jun_net", SUM("x"."jul_net") AS "jul_net", SUM("x"."aug_net") AS "aug_net", SUM("x"."sep_net") AS "sep_net", SUM("x"."oct_net") AS "oct_net", SUM("x"."nov_net") AS "nov_net", SUM("x"."dec_net") AS "dec_net" FROM "x" AS "x" GROUP BY "x"."w_warehouse_name", "x"."w_warehouse_sq_ft", "x"."w_city", "x"."w_county", "x"."w_state", "x"."w_country", "x"."ship_carriers", "x"."year1" ORDER BY "w_warehouse_name" LIMIT 100; -------------------------------------- -- TPC-DS 67 -------------------------------------- select * from (select i_category ,i_class ,i_brand ,i_product_name ,d_year ,d_qoy ,d_moy ,s_store_id ,sumsales ,rank() over (partition by i_category order by sumsales desc) rk from (select i_category ,i_class ,i_brand ,i_product_name ,d_year ,d_qoy ,d_moy ,s_store_id ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales from store_sales ,date_dim ,store ,item where ss_sold_date_sk=d_date_sk and ss_item_sk=i_item_sk and ss_store_sk = s_store_sk and d_month_seq between 1181 and 1181+11 group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 where rk <= 100 order by i_category ,i_class ,i_brand ,i_product_name ,d_year ,d_qoy ,d_moy ,s_store_id ,sumsales ,rk limit 100; WITH "dw1" AS ( SELECT "item"."i_category" AS "i_category", "item"."i_class" AS "i_class", "item"."i_brand" AS "i_brand", "item"."i_product_name" AS "i_product_name", "date_dim"."d_year" AS "d_year", "date_dim"."d_qoy" AS "d_qoy", "date_dim"."d_moy" AS "d_moy", "store"."s_store_id" AS "s_store_id", SUM(COALESCE("store_sales"."ss_sales_price" * "store_sales"."ss_quantity", 0)) AS "sumsales" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_month_seq" <= 1192 AND "date_dim"."d_month_seq" >= 1181 JOIN "item" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY ROLLUP ( "item"."i_category", "item"."i_class", "item"."i_brand", "item"."i_product_name", "date_dim"."d_year", "date_dim"."d_qoy", "date_dim"."d_moy", "store"."s_store_id" ) ), "dw2" AS ( SELECT "dw1"."i_category" AS "i_category", "dw1"."i_class" AS "i_class", "dw1"."i_brand" AS "i_brand", "dw1"."i_product_name" AS "i_product_name", "dw1"."d_year" AS "d_year", "dw1"."d_qoy" AS "d_qoy", "dw1"."d_moy" AS "d_moy", "dw1"."s_store_id" AS "s_store_id", "dw1"."sumsales" AS "sumsales", RANK() OVER (PARTITION BY "dw1"."i_category" ORDER BY "dw1"."sumsales" DESC) AS "rk" FROM "dw1" AS "dw1" ) SELECT "dw2"."i_category" AS "i_category", "dw2"."i_class" AS "i_class", "dw2"."i_brand" AS "i_brand", "dw2"."i_product_name" AS "i_product_name", "dw2"."d_year" AS "d_year", "dw2"."d_qoy" AS "d_qoy", "dw2"."d_moy" AS "d_moy", "dw2"."s_store_id" AS "s_store_id", "dw2"."sumsales" AS "sumsales", "dw2"."rk" AS "rk" FROM "dw2" AS "dw2" WHERE "dw2"."rk" <= 100 ORDER BY "dw2"."i_category", "dw2"."i_class", "dw2"."i_brand", "dw2"."i_product_name", "dw2"."d_year", "dw2"."d_qoy", "dw2"."d_moy", "dw2"."s_store_id", "dw2"."sumsales", "dw2"."rk" LIMIT 100; -------------------------------------- -- TPC-DS 68 -------------------------------------- # execute: true SELECT c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number, extended_price, extended_tax, list_price FROM (SELECT ss_ticket_number, ss_customer_sk, ca_city bought_city, Sum(ss_ext_sales_price) extended_price, Sum(ss_ext_list_price) list_price, Sum(ss_ext_tax) extended_tax FROM store_sales, date_dim, store, household_demographics, customer_address WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND store_sales.ss_addr_sk = customer_address.ca_address_sk AND date_dim.d_dom BETWEEN 1 AND 2 AND ( household_demographics.hd_dep_count = 8 OR household_demographics.hd_vehicle_count = 3 ) AND date_dim.d_year IN ( 1998, 1998 + 1, 1998 + 2 ) AND store.s_city IN ( 'Fairview', 'Midway' ) GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, customer_address current_addr WHERE ss_customer_sk = c_customer_sk AND customer.c_current_addr_sk = current_addr.ca_address_sk AND current_addr.ca_city <> bought_city ORDER BY c_last_name, ss_ticket_number LIMIT 100; WITH "dn" AS ( SELECT "store_sales"."ss_ticket_number" AS "ss_ticket_number", "store_sales"."ss_customer_sk" AS "ss_customer_sk", "customer_address"."ca_city" AS "bought_city", SUM("store_sales"."ss_ext_sales_price") AS "extended_price", SUM("store_sales"."ss_ext_list_price") AS "list_price", SUM("store_sales"."ss_ext_tax") AS "extended_tax" FROM "store_sales" AS "store_sales" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "store_sales"."ss_addr_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_dom" <= 2 AND "date_dim"."d_dom" >= 1 AND "date_dim"."d_year" IN (1998, 1999, 2000) JOIN "household_demographics" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND ( "household_demographics"."hd_dep_count" = 8 OR "household_demographics"."hd_vehicle_count" = 3 ) JOIN "store" AS "store" ON "store"."s_city" IN ('Fairview', 'Midway') AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "store_sales"."ss_ticket_number", "store_sales"."ss_customer_sk", "store_sales"."ss_addr_sk", "customer_address"."ca_city" ) SELECT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "current_addr"."ca_city" AS "ca_city", "dn"."bought_city" AS "bought_city", "dn"."ss_ticket_number" AS "ss_ticket_number", "dn"."extended_price" AS "extended_price", "dn"."extended_tax" AS "extended_tax", "dn"."list_price" AS "list_price" FROM "dn" AS "dn" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "dn"."ss_customer_sk" JOIN "customer_address" AS "current_addr" ON "current_addr"."ca_address_sk" = "customer"."c_current_addr_sk" AND "current_addr"."ca_city" <> "dn"."bought_city" ORDER BY "c_last_name", "ss_ticket_number" LIMIT 100; -------------------------------------- -- TPC-DS 69 -------------------------------------- # execute: true SELECT cd_gender, cd_marital_status, cd_education_status, Count(*) cnt1, cd_purchase_estimate, Count(*) cnt2, cd_credit_rating, Count(*) cnt3 FROM customer c, customer_address ca, customer_demographics WHERE c.c_current_addr_sk = ca.ca_address_sk AND ca_state IN ( 'KS', 'AZ', 'NE' ) AND cd_demo_sk = c.c_current_cdemo_sk AND EXISTS (SELECT * FROM store_sales, date_dim WHERE c.c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year = 2004 AND d_moy BETWEEN 3 AND 3 + 2) AND ( NOT EXISTS (SELECT * FROM web_sales, date_dim WHERE c.c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year = 2004 AND d_moy BETWEEN 3 AND 3 + 2) AND NOT EXISTS (SELECT * FROM catalog_sales, date_dim WHERE c.c_customer_sk = cs_ship_customer_sk AND cs_sold_date_sk = d_date_sk AND d_year = 2004 AND d_moy BETWEEN 3 AND 3 + 2) ) GROUP BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating ORDER BY cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date_id" AS "d_date_id", "date_dim"."d_date" AS "d_date", "date_dim"."d_month_seq" AS "d_month_seq", "date_dim"."d_week_seq" AS "d_week_seq", "date_dim"."d_quarter_seq" AS "d_quarter_seq", "date_dim"."d_year" AS "d_year", "date_dim"."d_dow" AS "d_dow", "date_dim"."d_moy" AS "d_moy", "date_dim"."d_dom" AS "d_dom", "date_dim"."d_qoy" AS "d_qoy", "date_dim"."d_fy_year" AS "d_fy_year", "date_dim"."d_fy_quarter_seq" AS "d_fy_quarter_seq", "date_dim"."d_fy_week_seq" AS "d_fy_week_seq", "date_dim"."d_day_name" AS "d_day_name", "date_dim"."d_quarter_name" AS "d_quarter_name", "date_dim"."d_holiday" AS "d_holiday", "date_dim"."d_weekend" AS "d_weekend", "date_dim"."d_following_holiday" AS "d_following_holiday", "date_dim"."d_first_dom" AS "d_first_dom", "date_dim"."d_last_dom" AS "d_last_dom", "date_dim"."d_same_day_ly" AS "d_same_day_ly", "date_dim"."d_same_day_lq" AS "d_same_day_lq", "date_dim"."d_current_day" AS "d_current_day", "date_dim"."d_current_week" AS "d_current_week", "date_dim"."d_current_month" AS "d_current_month", "date_dim"."d_current_quarter" AS "d_current_quarter", "date_dim"."d_current_year" AS "d_current_year" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" <= 5 AND "date_dim"."d_moy" >= 3 AND "date_dim"."d_year" = 2004 ), "_u_0" AS ( SELECT "store_sales"."ss_customer_sk" AS "_u_1" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "store_sales"."ss_customer_sk" ), "_u_2" AS ( SELECT "web_sales"."ws_bill_customer_sk" AS "_u_3" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "web_sales"."ws_bill_customer_sk" ), "_u_4" AS ( SELECT "catalog_sales"."cs_ship_customer_sk" AS "_u_5" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" GROUP BY "catalog_sales"."cs_ship_customer_sk" ) SELECT "customer_demographics"."cd_gender" AS "cd_gender", "customer_demographics"."cd_marital_status" AS "cd_marital_status", "customer_demographics"."cd_education_status" AS "cd_education_status", COUNT(*) AS "cnt1", "customer_demographics"."cd_purchase_estimate" AS "cd_purchase_estimate", COUNT(*) AS "cnt2", "customer_demographics"."cd_credit_rating" AS "cd_credit_rating", COUNT(*) AS "cnt3" FROM "customer" AS "c" JOIN "customer_address" AS "ca" ON "c"."c_current_addr_sk" = "ca"."ca_address_sk" AND "ca"."ca_state" IN ('KS', 'AZ', 'NE') JOIN "customer_demographics" AS "customer_demographics" ON "c"."c_current_cdemo_sk" = "customer_demographics"."cd_demo_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "c"."c_customer_sk" LEFT JOIN "_u_2" AS "_u_2" ON "_u_2"."_u_3" = "c"."c_customer_sk" LEFT JOIN "_u_4" AS "_u_4" ON "_u_4"."_u_5" = "c"."c_customer_sk" WHERE "_u_2"."_u_3" IS NULL AND "_u_4"."_u_5" IS NULL AND NOT "_u_0"."_u_1" IS NULL GROUP BY "customer_demographics"."cd_gender", "customer_demographics"."cd_marital_status", "customer_demographics"."cd_education_status", "customer_demographics"."cd_purchase_estimate", "customer_demographics"."cd_credit_rating" ORDER BY "cd_gender", "cd_marital_status", "cd_education_status", "cd_purchase_estimate", "cd_credit_rating" LIMIT 100; -------------------------------------- -- TPC-DS 70 -------------------------------------- SELECT Sum(ss_net_profit) AS total_sum, s_state, s_county, Grouping(s_state) + Grouping(s_county) AS lochierarchy, Rank() OVER ( partition BY Grouping(s_state)+Grouping(s_county), CASE WHEN Grouping( s_county) = 0 THEN s_state END ORDER BY Sum(ss_net_profit) DESC) AS rank_within_parent FROM store_sales, date_dim d1, store WHERE d1.d_month_seq BETWEEN 1200 AND 1200 + 11 AND d1.d_date_sk = ss_sold_date_sk AND s_store_sk = ss_store_sk AND s_state IN (SELECT s_state FROM (SELECT s_state AS s_state, Rank() OVER ( partition BY s_state ORDER BY Sum(ss_net_profit) DESC) AS ranking FROM store_sales, store, date_dim WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 AND d_date_sk = ss_sold_date_sk AND s_store_sk = ss_store_sk GROUP BY s_state) tmp1 WHERE ranking <= 5) GROUP BY rollup( s_state, s_county ) ORDER BY lochierarchy DESC, CASE WHEN lochierarchy = 0 THEN s_state END, rank_within_parent LIMIT 100; WITH "store_sales_2" AS ( SELECT "store_sales"."ss_sold_date_sk" AS "ss_sold_date_sk", "store_sales"."ss_store_sk" AS "ss_store_sk", "store_sales"."ss_net_profit" AS "ss_net_profit" FROM "store_sales" AS "store_sales" ), "tmp1" AS ( SELECT "store"."s_state" AS "s_state", RANK() OVER (PARTITION BY "store"."s_state" ORDER BY SUM("store_sales"."ss_net_profit") DESC) AS "ranking" FROM "store_sales_2" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_month_seq" <= 1211 AND "date_dim"."d_month_seq" >= 1200 JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "store"."s_state" ), "_u_0" AS ( SELECT "tmp1"."s_state" AS "s_state" FROM "tmp1" AS "tmp1" WHERE "tmp1"."ranking" <= 5 GROUP BY "tmp1"."s_state" ) SELECT SUM("store_sales"."ss_net_profit") AS "total_sum", "store"."s_state" AS "s_state", "store"."s_county" AS "s_county", GROUPING("store"."s_state") + GROUPING("store"."s_county") AS "lochierarchy", RANK() OVER ( PARTITION BY GROUPING("store"."s_state") + GROUPING("store"."s_county"), CASE WHEN GROUPING("store"."s_county") = 0 THEN "store"."s_state" END ORDER BY SUM("store_sales"."ss_net_profit") DESC ) AS "rank_within_parent" FROM "store_sales_2" AS "store_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "d1"."d_month_seq" <= 1211 AND "d1"."d_month_seq" >= 1200 JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."s_state" = "store"."s_state" WHERE NOT "_u_0"."s_state" IS NULL GROUP BY ROLLUP ( "store"."s_state", "store"."s_county" ) ORDER BY "lochierarchy" DESC, CASE WHEN "lochierarchy" = 0 THEN "s_state" END, "rank_within_parent" LIMIT 100; -------------------------------------- -- TPC-DS 71 -------------------------------------- # execute: true SELECT i_brand_id brand_id, i_brand brand, t_hour, t_minute, Sum(ext_price) ext_price FROM item, (SELECT ws_ext_sales_price AS ext_price, ws_sold_date_sk AS sold_date_sk, ws_item_sk AS sold_item_sk, ws_sold_time_sk AS time_sk FROM web_sales, date_dim WHERE d_date_sk = ws_sold_date_sk AND d_moy = 11 AND d_year = 2001 UNION ALL SELECT cs_ext_sales_price AS ext_price, cs_sold_date_sk AS sold_date_sk, cs_item_sk AS sold_item_sk, cs_sold_time_sk AS time_sk FROM catalog_sales, date_dim WHERE d_date_sk = cs_sold_date_sk AND d_moy = 11 AND d_year = 2001 UNION ALL SELECT ss_ext_sales_price AS ext_price, ss_sold_date_sk AS sold_date_sk, ss_item_sk AS sold_item_sk, ss_sold_time_sk AS time_sk FROM store_sales, date_dim WHERE d_date_sk = ss_sold_date_sk AND d_moy = 11 AND d_year = 2001) AS tmp, time_dim WHERE sold_item_sk = i_item_sk AND i_manager_id = 1 AND time_sk = t_time_sk AND ( t_meal_time = 'breakfast' OR t_meal_time = 'dinner' ) GROUP BY i_brand, i_brand_id, t_hour, t_minute ORDER BY ext_price DESC, i_brand_id; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_moy" AS "d_moy" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_moy" = 11 AND "date_dim"."d_year" = 2001 ), "tmp" AS ( SELECT "web_sales"."ws_ext_sales_price" AS "ext_price", "web_sales"."ws_item_sk" AS "sold_item_sk", "web_sales"."ws_sold_time_sk" AS "time_sk" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" UNION ALL SELECT "catalog_sales"."cs_ext_sales_price" AS "ext_price", "catalog_sales"."cs_item_sk" AS "sold_item_sk", "catalog_sales"."cs_sold_time_sk" AS "time_sk" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" UNION ALL SELECT "store_sales"."ss_ext_sales_price" AS "ext_price", "store_sales"."ss_item_sk" AS "sold_item_sk", "store_sales"."ss_sold_time_sk" AS "time_sk" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" ) SELECT "item"."i_brand_id" AS "brand_id", "item"."i_brand" AS "brand", "time_dim"."t_hour" AS "t_hour", "time_dim"."t_minute" AS "t_minute", SUM("tmp"."ext_price") AS "ext_price" FROM "item" AS "item" JOIN "tmp" AS "tmp" ON "item"."i_item_sk" = "tmp"."sold_item_sk" JOIN "time_dim" AS "time_dim" ON ( "time_dim"."t_meal_time" = 'breakfast' OR "time_dim"."t_meal_time" = 'dinner' ) AND "time_dim"."t_time_sk" = "tmp"."time_sk" WHERE "item"."i_manager_id" = 1 GROUP BY "item"."i_brand", "item"."i_brand_id", "time_dim"."t_hour", "time_dim"."t_minute" ORDER BY "ext_price" DESC, "brand_id"; -------------------------------------- -- TPC-DS 72 -------------------------------------- SELECT i_item_desc, w_warehouse_name, d1.d_week_seq, Sum(CASE WHEN p_promo_sk IS NULL THEN 1 ELSE 0 END) no_promo, Sum(CASE WHEN p_promo_sk IS NOT NULL THEN 1 ELSE 0 END) promo, Count(*) total_cnt FROM catalog_sales JOIN inventory ON ( cs_item_sk = inv_item_sk ) JOIN warehouse ON ( w_warehouse_sk = inv_warehouse_sk ) JOIN item ON ( i_item_sk = cs_item_sk ) JOIN customer_demographics ON ( cs_bill_cdemo_sk = cd_demo_sk ) JOIN household_demographics ON ( cs_bill_hdemo_sk = hd_demo_sk ) JOIN date_dim d1 ON ( cs_sold_date_sk = d1.d_date_sk ) JOIN date_dim d2 ON ( inv_date_sk = d2.d_date_sk ) JOIN date_dim d3 ON ( cs_ship_date_sk = d3.d_date_sk ) LEFT OUTER JOIN promotion ON ( cs_promo_sk = p_promo_sk ) LEFT OUTER JOIN catalog_returns ON ( cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number ) WHERE d1.d_week_seq = d2.d_week_seq AND inv_quantity_on_hand < cs_quantity AND d3.d_date > d1.d_date + INTERVAL '5' day AND hd_buy_potential = '501-1000' AND d1.d_year = 2002 AND cd_marital_status = 'M' GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq LIMIT 100; SELECT "item"."i_item_desc" AS "i_item_desc", "warehouse"."w_warehouse_name" AS "w_warehouse_name", "d1"."d_week_seq" AS "d_week_seq", SUM(CASE WHEN "promotion"."p_promo_sk" IS NULL THEN 1 ELSE 0 END) AS "no_promo", SUM(CASE WHEN NOT "promotion"."p_promo_sk" IS NULL THEN 1 ELSE 0 END) AS "promo", COUNT(*) AS "total_cnt" FROM "catalog_sales" AS "catalog_sales" JOIN "inventory" AS "inventory" ON "catalog_sales"."cs_item_sk" = "inventory"."inv_item_sk" AND "catalog_sales"."cs_quantity" > "inventory"."inv_quantity_on_hand" JOIN "warehouse" AS "warehouse" ON "inventory"."inv_warehouse_sk" = "warehouse"."w_warehouse_sk" JOIN "item" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "customer_demographics" AS "customer_demographics" ON "catalog_sales"."cs_bill_cdemo_sk" = "customer_demographics"."cd_demo_sk" AND "customer_demographics"."cd_marital_status" = 'M' JOIN "household_demographics" AS "household_demographics" ON "catalog_sales"."cs_bill_hdemo_sk" = "household_demographics"."hd_demo_sk" AND "household_demographics"."hd_buy_potential" = '501-1000' JOIN "date_dim" AS "d1" ON "catalog_sales"."cs_sold_date_sk" = "d1"."d_date_sk" AND "d1"."d_year" = 2002 JOIN "date_dim" AS "d2" ON "d1"."d_week_seq" = "d2"."d_week_seq" AND "d2"."d_date_sk" = "inventory"."inv_date_sk" JOIN "date_dim" AS "d3" ON "catalog_sales"."cs_ship_date_sk" = "d3"."d_date_sk" AND "d3"."d_date" > "d1"."d_date" + INTERVAL '5' DAY LEFT JOIN "promotion" AS "promotion" ON "catalog_sales"."cs_promo_sk" = "promotion"."p_promo_sk" LEFT JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_item_sk" = "catalog_sales"."cs_item_sk" AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number" GROUP BY "item"."i_item_desc", "warehouse"."w_warehouse_name", "d1"."d_week_seq" ORDER BY "total_cnt" DESC, "i_item_desc", "w_warehouse_name", "d_week_seq" LIMIT 100; -------------------------------------- -- TPC-DS 73 -------------------------------------- # execute: true SELECT c_last_name, c_first_name, c_salutation, c_preferred_cust_flag, ss_ticket_number, cnt FROM (SELECT ss_ticket_number, ss_customer_sk, Count(*) cnt FROM store_sales, date_dim, store, household_demographics WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND date_dim.d_dom BETWEEN 1 AND 2 AND ( household_demographics.hd_buy_potential = '>10000' OR household_demographics.hd_buy_potential = '0-500' ) AND household_demographics.hd_vehicle_count > 0 AND CASE WHEN household_demographics.hd_vehicle_count > 0 THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count ELSE NULL END > 1 AND date_dim.d_year IN ( 2000, 2000 + 1, 2000 + 2 ) AND store.s_county IN ( 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) GROUP BY ss_ticket_number, ss_customer_sk) dj, customer WHERE ss_customer_sk = c_customer_sk AND cnt BETWEEN 1 AND 5 ORDER BY cnt DESC, c_last_name ASC; WITH "dj" AS ( SELECT "store_sales"."ss_ticket_number" AS "ss_ticket_number", "store_sales"."ss_customer_sk" AS "ss_customer_sk", COUNT(*) AS "cnt" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_dom" <= 2 AND "date_dim"."d_dom" >= 1 AND "date_dim"."d_year" IN (2000, 2001, 2002) JOIN "household_demographics" AS "household_demographics" ON ( "household_demographics"."hd_buy_potential" = '0-500' OR "household_demographics"."hd_buy_potential" = '>10000' ) AND "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND "household_demographics"."hd_vehicle_count" > 0 AND CASE WHEN "household_demographics"."hd_vehicle_count" > 0 THEN "household_demographics"."hd_dep_count" / "household_demographics"."hd_vehicle_count" ELSE NULL END > 1 JOIN "store" AS "store" ON "store"."s_county" IN ( 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County' ) AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "store_sales"."ss_ticket_number", "store_sales"."ss_customer_sk" ) SELECT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "customer"."c_salutation" AS "c_salutation", "customer"."c_preferred_cust_flag" AS "c_preferred_cust_flag", "dj"."ss_ticket_number" AS "ss_ticket_number", "dj"."cnt" AS "cnt" FROM "dj" AS "dj" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "dj"."ss_customer_sk" WHERE "dj"."cnt" <= 5 AND "dj"."cnt" >= 1 ORDER BY "cnt" DESC, "c_last_name"; -------------------------------------- -- TPC-DS 74 -------------------------------------- # execute: true WITH year_total AS (SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, d_year AS year1, Sum(ss_net_paid) year_total, 's' sale_type FROM customer, store_sales, date_dim WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk AND d_year IN ( 1999, 1999 + 1 ) GROUP BY c_customer_id, c_first_name, c_last_name, d_year UNION ALL SELECT c_customer_id customer_id, c_first_name customer_first_name, c_last_name customer_last_name, d_year AS year1, Sum(ws_net_paid) year_total, 'w' sale_type FROM customer, web_sales, date_dim WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk AND d_year IN ( 1999, 1999 + 1 ) GROUP BY c_customer_id, c_first_name, c_last_name, d_year) SELECT t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_w_firstyear, year_total t_w_secyear WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id AND t_s_firstyear.customer_id = t_w_secyear.customer_id AND t_s_firstyear.customer_id = t_w_firstyear.customer_id AND t_s_firstyear.sale_type = 's' AND t_w_firstyear.sale_type = 'w' AND t_s_secyear.sale_type = 's' AND t_w_secyear.sale_type = 'w' AND t_s_firstyear.year1 = 1999 AND t_s_secyear.year1 = 1999 + 1 AND t_w_firstyear.year1 = 1999 AND t_w_secyear.year1 = 1999 + 1 AND t_s_firstyear.year_total > 0 AND t_w_firstyear.year_total > 0 AND CASE WHEN t_w_firstyear.year_total > 0 THEN t_w_secyear.year_total / t_w_firstyear.year_total ELSE NULL END > CASE WHEN t_s_firstyear.year_total > 0 THEN t_s_secyear.year_total / t_s_firstyear.year_total ELSE NULL END ORDER BY 1, 2, 3 LIMIT 100; WITH "customer_2" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk", "customer"."c_customer_id" AS "c_customer_id", "customer"."c_first_name" AS "c_first_name", "customer"."c_last_name" AS "c_last_name" FROM "customer" AS "customer" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_year" IN (1999, 2000) ), "year_total" AS ( SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_first_name" AS "customer_first_name", "customer"."c_last_name" AS "customer_last_name", "date_dim"."d_year" AS "year1", SUM("store_sales"."ss_net_paid") AS "year_total", 's' AS "sale_type" FROM "customer_2" AS "customer" JOIN "store_sales" AS "store_sales" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "customer"."c_customer_id", "customer"."c_first_name", "customer"."c_last_name", "date_dim"."d_year" UNION ALL SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_first_name" AS "customer_first_name", "customer"."c_last_name" AS "customer_last_name", "date_dim"."d_year" AS "year1", SUM("web_sales"."ws_net_paid") AS "year_total", 'w' AS "sale_type" FROM "customer_2" AS "customer" JOIN "web_sales" AS "web_sales" ON "customer"."c_customer_sk" = "web_sales"."ws_bill_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "customer"."c_customer_id", "customer"."c_first_name", "customer"."c_last_name", "date_dim"."d_year" ) SELECT "t_s_secyear"."customer_id" AS "customer_id", "t_s_secyear"."customer_first_name" AS "customer_first_name", "t_s_secyear"."customer_last_name" AS "customer_last_name" FROM "year_total" AS "t_s_firstyear" JOIN "year_total" AS "t_s_secyear" ON "t_s_firstyear"."customer_id" = "t_s_secyear"."customer_id" AND "t_s_secyear"."sale_type" = 's' AND "t_s_secyear"."year1" = 2000 JOIN "year_total" AS "t_w_firstyear" ON "t_s_firstyear"."customer_id" = "t_w_firstyear"."customer_id" AND "t_w_firstyear"."sale_type" = 'w' AND "t_w_firstyear"."year1" = 1999 AND "t_w_firstyear"."year_total" > 0 JOIN "year_total" AS "t_w_secyear" ON "t_s_firstyear"."customer_id" = "t_w_secyear"."customer_id" AND "t_w_secyear"."sale_type" = 'w' AND "t_w_secyear"."year1" = 2000 AND CASE WHEN "t_s_firstyear"."year_total" > 0 THEN "t_s_secyear"."year_total" / "t_s_firstyear"."year_total" ELSE NULL END < CASE WHEN "t_w_firstyear"."year_total" > 0 THEN "t_w_secyear"."year_total" / "t_w_firstyear"."year_total" ELSE NULL END WHERE "t_s_firstyear"."sale_type" = 's' AND "t_s_firstyear"."year1" = 1999 AND "t_s_firstyear"."year_total" > 0 ORDER BY "customer_id", "customer_first_name", "customer_last_name" LIMIT 100; -------------------------------------- -- TPC-DS 75 -------------------------------------- # execute: true WITH all_sales AS (SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, Sum(sales_cnt) AS sales_cnt, Sum(sales_amt) AS sales_amt FROM (SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt FROM catalog_sales JOIN item ON i_item_sk = cs_item_sk JOIN date_dim ON d_date_sk = cs_sold_date_sk LEFT JOIN catalog_returns ON ( cs_order_number = cr_order_number AND cs_item_sk = cr_item_sk ) WHERE i_category = 'Men' UNION SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt FROM store_sales JOIN item ON i_item_sk = ss_item_sk JOIN date_dim ON d_date_sk = ss_sold_date_sk LEFT JOIN store_returns ON ( ss_ticket_number = sr_ticket_number AND ss_item_sk = sr_item_sk ) WHERE i_category = 'Men' UNION SELECT d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt FROM web_sales JOIN item ON i_item_sk = ws_item_sk JOIN date_dim ON d_date_sk = ws_sold_date_sk LEFT JOIN web_returns ON ( ws_order_number = wr_order_number AND ws_item_sk = wr_item_sk ) WHERE i_category = 'Men') sales_detail GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) SELECT prev_yr.d_year AS prev_year, curr_yr.d_year AS year1, curr_yr.i_brand_id, curr_yr.i_class_id, curr_yr.i_category_id, curr_yr.i_manufact_id, prev_yr.sales_cnt AS prev_yr_cnt, curr_yr.sales_cnt AS curr_yr_cnt, curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff FROM all_sales curr_yr, all_sales prev_yr WHERE curr_yr.i_brand_id = prev_yr.i_brand_id AND curr_yr.i_class_id = prev_yr.i_class_id AND curr_yr.i_category_id = prev_yr.i_category_id AND curr_yr.i_manufact_id = prev_yr.i_manufact_id AND curr_yr.d_year = 2002 AND prev_yr.d_year = 2002 - 1 AND Cast(curr_yr.sales_cnt AS DECIMAL(17, 2)) / Cast(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9 ORDER BY sales_cnt_diff LIMIT 100; WITH "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id", "item"."i_category" AS "i_category", "item"."i_manufact_id" AS "i_manufact_id" FROM "item" AS "item" WHERE "item"."i_category" = 'Men' ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year" FROM "date_dim" AS "date_dim" ), "sales_detail" AS ( SELECT "date_dim"."d_year" AS "d_year", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id", "item"."i_manufact_id" AS "i_manufact_id", "catalog_sales"."cs_quantity" - COALESCE("catalog_returns"."cr_return_quantity", 0) AS "sales_cnt", "catalog_sales"."cs_ext_sales_price" - COALESCE("catalog_returns"."cr_return_amount", 0.0) AS "sales_amt" FROM "catalog_sales" AS "catalog_sales" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_item_sk" = "catalog_sales"."cs_item_sk" AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number" UNION SELECT "date_dim"."d_year" AS "d_year", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id", "item"."i_manufact_id" AS "i_manufact_id", "store_sales"."ss_quantity" - COALESCE("store_returns"."sr_return_quantity", 0) AS "sales_cnt", "store_sales"."ss_ext_sales_price" - COALESCE("store_returns"."sr_return_amt", 0.0) AS "sales_amt" FROM "store_sales" AS "store_sales" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" LEFT JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" UNION SELECT "date_dim"."d_year" AS "d_year", "item"."i_brand_id" AS "i_brand_id", "item"."i_class_id" AS "i_class_id", "item"."i_category_id" AS "i_category_id", "item"."i_manufact_id" AS "i_manufact_id", "web_sales"."ws_quantity" - COALESCE("web_returns"."wr_return_quantity", 0) AS "sales_cnt", "web_sales"."ws_ext_sales_price" - COALESCE("web_returns"."wr_return_amt", 0.0) AS "sales_amt" FROM "web_sales" AS "web_sales" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" LEFT JOIN "web_returns" AS "web_returns" ON "web_returns"."wr_item_sk" = "web_sales"."ws_item_sk" AND "web_returns"."wr_order_number" = "web_sales"."ws_order_number" ), "all_sales" AS ( SELECT "sales_detail"."d_year" AS "d_year", "sales_detail"."i_brand_id" AS "i_brand_id", "sales_detail"."i_class_id" AS "i_class_id", "sales_detail"."i_category_id" AS "i_category_id", "sales_detail"."i_manufact_id" AS "i_manufact_id", SUM("sales_detail"."sales_cnt") AS "sales_cnt", SUM("sales_detail"."sales_amt") AS "sales_amt" FROM "sales_detail" AS "sales_detail" GROUP BY "sales_detail"."d_year", "sales_detail"."i_brand_id", "sales_detail"."i_class_id", "sales_detail"."i_category_id", "sales_detail"."i_manufact_id" ) SELECT "prev_yr"."d_year" AS "prev_year", "curr_yr"."d_year" AS "year1", "curr_yr"."i_brand_id" AS "i_brand_id", "curr_yr"."i_class_id" AS "i_class_id", "curr_yr"."i_category_id" AS "i_category_id", "curr_yr"."i_manufact_id" AS "i_manufact_id", "prev_yr"."sales_cnt" AS "prev_yr_cnt", "curr_yr"."sales_cnt" AS "curr_yr_cnt", "curr_yr"."sales_cnt" - "prev_yr"."sales_cnt" AS "sales_cnt_diff", "curr_yr"."sales_amt" - "prev_yr"."sales_amt" AS "sales_amt_diff" FROM "all_sales" AS "curr_yr" JOIN "all_sales" AS "prev_yr" ON "curr_yr"."i_brand_id" = "prev_yr"."i_brand_id" AND "curr_yr"."i_category_id" = "prev_yr"."i_category_id" AND "curr_yr"."i_class_id" = "prev_yr"."i_class_id" AND "curr_yr"."i_manufact_id" = "prev_yr"."i_manufact_id" AND "prev_yr"."d_year" = 2001 AND CAST("curr_yr"."sales_cnt" AS DECIMAL(17, 2)) / CAST("prev_yr"."sales_cnt" AS DECIMAL(17, 2)) < 0.9 WHERE "curr_yr"."d_year" = 2002 ORDER BY "sales_cnt_diff" LIMIT 100; -------------------------------------- -- TPC-DS 76 -------------------------------------- # execute: true SELECT channel, col_name, d_year, d_qoy, i_category, Count(*) sales_cnt, Sum(ext_sales_price) sales_amt FROM (SELECT 'store' AS channel, 'ss_hdemo_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim WHERE ss_hdemo_sk IS NULL AND ss_sold_date_sk = d_date_sk AND ss_item_sk = i_item_sk UNION ALL SELECT 'web' AS channel, 'ws_ship_hdemo_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price FROM web_sales, item, date_dim WHERE ws_ship_hdemo_sk IS NULL AND ws_sold_date_sk = d_date_sk AND ws_item_sk = i_item_sk UNION ALL SELECT 'catalog' AS channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price FROM catalog_sales, item, date_dim WHERE cs_warehouse_sk IS NULL AND cs_sold_date_sk = d_date_sk AND cs_item_sk = i_item_sk) foo GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year", "date_dim"."d_qoy" AS "d_qoy" FROM "date_dim" AS "date_dim" ), "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_category" AS "i_category" FROM "item" AS "item" ), "foo" AS ( SELECT 'store' AS "channel", 'ss_hdemo_sk' AS "col_name", "date_dim"."d_year" AS "d_year", "date_dim"."d_qoy" AS "d_qoy", "item"."i_category" AS "i_category", "store_sales"."ss_ext_sales_price" AS "ext_sales_price" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" WHERE "store_sales"."ss_hdemo_sk" IS NULL UNION ALL SELECT 'web' AS "channel", 'ws_ship_hdemo_sk' AS "col_name", "date_dim"."d_year" AS "d_year", "date_dim"."d_qoy" AS "d_qoy", "item"."i_category" AS "i_category", "web_sales"."ws_ext_sales_price" AS "ext_sales_price" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" WHERE "web_sales"."ws_ship_hdemo_sk" IS NULL UNION ALL SELECT 'catalog' AS "channel", 'cs_warehouse_sk' AS "col_name", "date_dim"."d_year" AS "d_year", "date_dim"."d_qoy" AS "d_qoy", "item"."i_category" AS "i_category", "catalog_sales"."cs_ext_sales_price" AS "ext_sales_price" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" WHERE "catalog_sales"."cs_warehouse_sk" IS NULL ) SELECT "foo"."channel" AS "channel", "foo"."col_name" AS "col_name", "foo"."d_year" AS "d_year", "foo"."d_qoy" AS "d_qoy", "foo"."i_category" AS "i_category", COUNT(*) AS "sales_cnt", SUM("foo"."ext_sales_price") AS "sales_amt" FROM "foo" AS "foo" GROUP BY "foo"."channel", "foo"."col_name", "foo"."d_year", "foo"."d_qoy", "foo"."i_category" ORDER BY "channel", "col_name", "d_year", "d_qoy", "i_category" LIMIT 100; -------------------------------------- -- TPC-DS 77 -------------------------------------- WITH ss AS ( SELECT s_store_sk, Sum(ss_ext_sales_price) AS sales, Sum(ss_net_profit) AS profit FROM store_sales, date_dim, store WHERE ss_sold_date_sk = d_date_sk AND d_date BETWEEN Cast('2001-08-16' AS DATE) AND ( Cast('2001-08-16' AS DATE) + INTERVAL '30' day) AND ss_store_sk = s_store_sk GROUP BY s_store_sk) , sr AS ( SELECT s_store_sk, sum(sr_return_amt) AS returns1, sum(sr_net_loss) AS profit_loss FROM store_returns, date_dim, store WHERE sr_returned_date_sk = d_date_sk AND d_date BETWEEN cast('2001-08-16' AS date) AND ( cast('2001-08-16' AS date) + INTERVAL '30' day) AND sr_store_sk = s_store_sk GROUP BY s_store_sk), cs AS ( SELECT cs_call_center_sk, sum(cs_ext_sales_price) AS sales, sum(cs_net_profit) AS profit FROM catalog_sales, date_dim WHERE cs_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2001-08-16' AS date) AND ( cast('2001-08-16' AS date) + INTERVAL '30' day) GROUP BY cs_call_center_sk ), cr AS ( SELECT cr_call_center_sk, sum(cr_return_amount) AS returns1, sum(cr_net_loss) AS profit_loss FROM catalog_returns, date_dim WHERE cr_returned_date_sk = d_date_sk AND d_date BETWEEN cast('2001-08-16' AS date) AND ( cast('2001-08-16' AS date) + INTERVAL '30' day) GROUP BY cr_call_center_sk ), ws AS ( SELECT wp_web_page_sk, sum(ws_ext_sales_price) AS sales, sum(ws_net_profit) AS profit FROM web_sales, date_dim, web_page WHERE ws_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2001-08-16' AS date) AND ( cast('2001-08-16' AS date) + INTERVAL '30' day) AND ws_web_page_sk = wp_web_page_sk GROUP BY wp_web_page_sk), wr AS ( SELECT wp_web_page_sk, sum(wr_return_amt) AS returns1, sum(wr_net_loss) AS profit_loss FROM web_returns, date_dim, web_page WHERE wr_returned_date_sk = d_date_sk AND d_date BETWEEN cast('2001-08-16' AS date) AND ( cast('2001-08-16' AS date) + INTERVAL '30' day) AND wr_web_page_sk = wp_web_page_sk GROUP BY wp_web_page_sk) SELECT channel , id , sum(sales) AS sales , sum(returns1) AS returns1 , sum(profit) AS profit FROM ( SELECT 'store channel' AS channel , ss.s_store_sk AS id , sales , COALESCE(returns1, 0) AS returns1 , (profit - COALESCE(profit_loss,0)) AS profit FROM ss LEFT JOIN sr ON ss.s_store_sk = sr.s_store_sk UNION ALL SELECT 'catalog channel' AS channel , cs_call_center_sk AS id , sales , returns1 , (profit - profit_loss) AS profit FROM cs , cr UNION ALL SELECT 'web channel' AS channel , ws.wp_web_page_sk AS id , sales , COALESCE(returns1, 0) returns1 , (profit - COALESCE(profit_loss,0)) AS profit FROM ws LEFT JOIN wr ON ws.wp_web_page_sk = wr.wp_web_page_sk ) x GROUP BY rollup (channel, id) ORDER BY channel , id LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE CAST("date_dim"."d_date" AS DATE) <= CAST('2001-09-15' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2001-08-16' AS DATE) ), "store_2" AS ( SELECT "store"."s_store_sk" AS "s_store_sk" FROM "store" AS "store" ), "ss" AS ( SELECT "store"."s_store_sk" AS "s_store_sk", SUM("store_sales"."ss_ext_sales_price") AS "sales", SUM("store_sales"."ss_net_profit") AS "profit" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "store"."s_store_sk" ), "sr" AS ( SELECT "store"."s_store_sk" AS "s_store_sk", SUM("store_returns"."sr_return_amt") AS "returns1", SUM("store_returns"."sr_net_loss") AS "profit_loss" FROM "store_returns" AS "store_returns" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_returns"."sr_returned_date_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_returns"."sr_store_sk" GROUP BY "store"."s_store_sk" ), "cs" AS ( SELECT "catalog_sales"."cs_call_center_sk" AS "cs_call_center_sk", SUM("catalog_sales"."cs_ext_sales_price") AS "sales", SUM("catalog_sales"."cs_net_profit") AS "profit" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" GROUP BY "catalog_sales"."cs_call_center_sk" ), "cr" AS ( SELECT SUM("catalog_returns"."cr_return_amount") AS "returns1", SUM("catalog_returns"."cr_net_loss") AS "profit_loss" FROM "catalog_returns" AS "catalog_returns" JOIN "date_dim_2" AS "date_dim" ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk" GROUP BY "catalog_returns"."cr_call_center_sk" ), "web_page_2" AS ( SELECT "web_page"."wp_web_page_sk" AS "wp_web_page_sk" FROM "web_page" AS "web_page" ), "ws" AS ( SELECT "web_page"."wp_web_page_sk" AS "wp_web_page_sk", SUM("web_sales"."ws_ext_sales_price") AS "sales", SUM("web_sales"."ws_net_profit") AS "profit" FROM "web_sales" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" JOIN "web_page_2" AS "web_page" ON "web_page"."wp_web_page_sk" = "web_sales"."ws_web_page_sk" GROUP BY "web_page"."wp_web_page_sk" ), "wr" AS ( SELECT "web_page"."wp_web_page_sk" AS "wp_web_page_sk", SUM("web_returns"."wr_return_amt") AS "returns1", SUM("web_returns"."wr_net_loss") AS "profit_loss" FROM "web_returns" AS "web_returns" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_returns"."wr_returned_date_sk" JOIN "web_page_2" AS "web_page" ON "web_page"."wp_web_page_sk" = "web_returns"."wr_web_page_sk" GROUP BY "web_page"."wp_web_page_sk" ), "x" AS ( SELECT 'store channel' AS "channel", "ss"."s_store_sk" AS "id", "ss"."sales" AS "sales", COALESCE("sr"."returns1", 0) AS "returns1", "ss"."profit" - COALESCE("sr"."profit_loss", 0) AS "profit" FROM "ss" AS "ss" LEFT JOIN "sr" AS "sr" ON "sr"."s_store_sk" = "ss"."s_store_sk" UNION ALL SELECT 'catalog channel' AS "channel", "cs"."cs_call_center_sk" AS "id", "cs"."sales" AS "sales", "cr"."returns1" AS "returns1", "cs"."profit" - "cr"."profit_loss" AS "profit" FROM "cs" AS "cs" CROSS JOIN "cr" AS "cr" UNION ALL SELECT 'web channel' AS "channel", "ws"."wp_web_page_sk" AS "id", "ws"."sales" AS "sales", COALESCE("wr"."returns1", 0) AS "returns1", "ws"."profit" - COALESCE("wr"."profit_loss", 0) AS "profit" FROM "ws" AS "ws" LEFT JOIN "wr" AS "wr" ON "wr"."wp_web_page_sk" = "ws"."wp_web_page_sk" ) SELECT "x"."channel" AS "channel", "x"."id" AS "id", SUM("x"."sales") AS "sales", SUM("x"."returns1") AS "returns1", SUM("x"."profit") AS "profit" FROM "x" AS "x" GROUP BY ROLLUP ( "x"."channel", "x"."id" ) ORDER BY "channel", "id" LIMIT 100; -------------------------------------- -- TPC-DS 78 -------------------------------------- # execute: true WITH ws AS (SELECT d_year AS ws_sold_year, ws_item_sk, ws_bill_customer_sk ws_customer_sk, Sum(ws_quantity) ws_qty, Sum(ws_wholesale_cost) ws_wc, Sum(ws_sales_price) ws_sp FROM web_sales LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk JOIN date_dim ON ws_sold_date_sk = d_date_sk WHERE wr_order_number IS NULL GROUP BY d_year, ws_item_sk, ws_bill_customer_sk), cs AS (SELECT d_year AS cs_sold_year, cs_item_sk, cs_bill_customer_sk cs_customer_sk, Sum(cs_quantity) cs_qty, Sum(cs_wholesale_cost) cs_wc, Sum(cs_sales_price) cs_sp FROM catalog_sales LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk JOIN date_dim ON cs_sold_date_sk = d_date_sk WHERE cr_order_number IS NULL GROUP BY d_year, cs_item_sk, cs_bill_customer_sk), ss AS (SELECT d_year AS ss_sold_year, ss_item_sk, ss_customer_sk, Sum(ss_quantity) ss_qty, Sum(ss_wholesale_cost) ss_wc, Sum(ss_sales_price) ss_sp FROM store_sales LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk JOIN date_dim ON ss_sold_date_sk = d_date_sk WHERE sr_ticket_number IS NULL GROUP BY d_year, ss_item_sk, ss_customer_sk) SELECT ss_item_sk, Round(ss_qty / ( COALESCE(ws_qty + cs_qty, 1) ), 2) ratio, ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, COALESCE(ws_qty, 0) + COALESCE(cs_qty, 0) other_chan_qty, COALESCE(ws_wc, 0) + COALESCE(cs_wc, 0) other_chan_wholesale_cost, COALESCE(ws_sp, 0) + COALESCE(cs_sp, 0) other_chan_sales_price FROM ss LEFT JOIN ws ON ( ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk ) LEFT JOIN cs ON ( cs_sold_year = ss_sold_year AND cs_item_sk = cs_item_sk AND cs_customer_sk = ss_customer_sk ) WHERE COALESCE(ws_qty, 0) > 0 AND COALESCE(cs_qty, 0) > 0 AND ss_sold_year = 1999 ORDER BY ss_item_sk, ss_qty DESC, ss_wc DESC, ss_sp DESC, other_chan_qty, other_chan_wholesale_cost, other_chan_sales_price, Round(ss_qty / ( COALESCE(ws_qty + cs_qty, 1) ), 2) LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_year" AS "d_year" FROM "date_dim" AS "date_dim" ), "ws" AS ( SELECT "date_dim"."d_year" AS "ws_sold_year", "web_sales"."ws_item_sk" AS "ws_item_sk", "web_sales"."ws_bill_customer_sk" AS "ws_customer_sk", SUM("web_sales"."ws_quantity") AS "ws_qty", SUM("web_sales"."ws_wholesale_cost") AS "ws_wc", SUM("web_sales"."ws_sales_price") AS "ws_sp" FROM "web_sales" AS "web_sales" LEFT JOIN "web_returns" AS "web_returns" ON "web_returns"."wr_item_sk" = "web_sales"."ws_item_sk" AND "web_returns"."wr_order_number" = "web_sales"."ws_order_number" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" WHERE "web_returns"."wr_order_number" IS NULL GROUP BY "date_dim"."d_year", "web_sales"."ws_item_sk", "web_sales"."ws_bill_customer_sk" ), "cs" AS ( SELECT "date_dim"."d_year" AS "cs_sold_year", "catalog_sales"."cs_item_sk" AS "cs_item_sk", "catalog_sales"."cs_bill_customer_sk" AS "cs_customer_sk", SUM("catalog_sales"."cs_quantity") AS "cs_qty", SUM("catalog_sales"."cs_wholesale_cost") AS "cs_wc", SUM("catalog_sales"."cs_sales_price") AS "cs_sp" FROM "catalog_sales" AS "catalog_sales" LEFT JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_item_sk" = "catalog_sales"."cs_item_sk" AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" WHERE "catalog_returns"."cr_order_number" IS NULL GROUP BY "date_dim"."d_year", "catalog_sales"."cs_item_sk", "catalog_sales"."cs_bill_customer_sk" ), "ss" AS ( SELECT "date_dim"."d_year" AS "ss_sold_year", "store_sales"."ss_item_sk" AS "ss_item_sk", "store_sales"."ss_customer_sk" AS "ss_customer_sk", SUM("store_sales"."ss_quantity") AS "ss_qty", SUM("store_sales"."ss_wholesale_cost") AS "ss_wc", SUM("store_sales"."ss_sales_price") AS "ss_sp" FROM "store_sales" AS "store_sales" LEFT JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" WHERE "store_returns"."sr_ticket_number" IS NULL GROUP BY "date_dim"."d_year", "store_sales"."ss_item_sk", "store_sales"."ss_customer_sk" ) SELECT "ss"."ss_item_sk" AS "ss_item_sk", ROUND("ss"."ss_qty" / COALESCE("ws"."ws_qty" + "cs"."cs_qty", 1), 2) AS "ratio", "ss"."ss_qty" AS "store_qty", "ss"."ss_wc" AS "store_wholesale_cost", "ss"."ss_sp" AS "store_sales_price", COALESCE("ws"."ws_qty", 0) + COALESCE("cs"."cs_qty", 0) AS "other_chan_qty", COALESCE("ws"."ws_wc", 0) + COALESCE("cs"."cs_wc", 0) AS "other_chan_wholesale_cost", COALESCE("ws"."ws_sp", 0) + COALESCE("cs"."cs_sp", 0) AS "other_chan_sales_price" FROM "ss" AS "ss" LEFT JOIN "ws" AS "ws" ON "ss"."ss_customer_sk" = "ws"."ws_customer_sk" AND "ss"."ss_item_sk" = "ws"."ws_item_sk" AND "ss"."ss_sold_year" = "ws"."ws_sold_year" LEFT JOIN "cs" AS "cs" ON "cs"."cs_customer_sk" = "ss"."ss_customer_sk" AND "cs"."cs_item_sk" = "cs"."cs_item_sk" AND "cs"."cs_sold_year" = "ss"."ss_sold_year" WHERE "ss"."ss_sold_year" = 1999 AND COALESCE("cs"."cs_qty", 0) > 0 AND COALESCE("ws"."ws_qty", 0) > 0 ORDER BY "ss_item_sk", "ss"."ss_qty" DESC, "ss"."ss_wc" DESC, "ss"."ss_sp" DESC, "other_chan_qty", "other_chan_wholesale_cost", "other_chan_sales_price", ROUND("ss"."ss_qty" / COALESCE("ws"."ws_qty" + "cs"."cs_qty", 1), 2) LIMIT 100; -------------------------------------- -- TPC-DS 79 -------------------------------------- # execute: true SELECT c_last_name, c_first_name, SUBSTRING(s_city, 1, 30) AS "_col_2", ss_ticket_number, amt, profit FROM (SELECT ss_ticket_number, ss_customer_sk, store.s_city, Sum(ss_coupon_amt) amt, Sum(ss_net_profit) profit FROM store_sales, date_dim, store, household_demographics WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk AND store_sales.ss_store_sk = store.s_store_sk AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk AND ( household_demographics.hd_dep_count = 8 OR household_demographics.hd_vehicle_count > 4 ) AND date_dim.d_dow = 1 AND date_dim.d_year IN ( 2000, 2000 + 1, 2000 + 2 ) AND store.s_number_employees BETWEEN 200 AND 295 GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer WHERE ss_customer_sk = c_customer_sk ORDER BY c_last_name, c_first_name, SUBSTRING(s_city, 1, 30), profit LIMIT 100; WITH "ms" AS ( SELECT "store_sales"."ss_ticket_number" AS "ss_ticket_number", "store_sales"."ss_customer_sk" AS "ss_customer_sk", "store"."s_city" AS "s_city", SUM("store_sales"."ss_coupon_amt") AS "amt", SUM("store_sales"."ss_net_profit") AS "profit" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_dow" = 1 AND "date_dim"."d_year" IN (2000, 2001, 2002) JOIN "household_demographics" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND ( "household_demographics"."hd_dep_count" = 8 OR "household_demographics"."hd_vehicle_count" > 4 ) JOIN "store" AS "store" ON "store"."s_number_employees" <= 295 AND "store"."s_number_employees" >= 200 AND "store"."s_store_sk" = "store_sales"."ss_store_sk" GROUP BY "store_sales"."ss_ticket_number", "store_sales"."ss_customer_sk", "store_sales"."ss_addr_sk", "store"."s_city" ) SELECT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", SUBSTRING("ms"."s_city", 1, 30) AS "_col_2", "ms"."ss_ticket_number" AS "ss_ticket_number", "ms"."amt" AS "amt", "ms"."profit" AS "profit" FROM "ms" AS "ms" JOIN "customer" AS "customer" ON "customer"."c_customer_sk" = "ms"."ss_customer_sk" ORDER BY "c_last_name", "c_first_name", SUBSTRING("ms"."s_city", 1, 30), "profit" LIMIT 100; -------------------------------------- -- TPC-DS 80 -------------------------------------- WITH ssr AS ( SELECT s_store_id AS store_id, Sum(ss_ext_sales_price) AS sales, Sum(COALESCE(sr_return_amt, 0)) AS returns1, Sum(ss_net_profit - COALESCE(sr_net_loss, 0)) AS profit FROM store_sales LEFT OUTER JOIN store_returns ON ( ss_item_sk = sr_item_sk AND ss_ticket_number = sr_ticket_number), date_dim, store, item, promotion WHERE ss_sold_date_sk = d_date_sk AND d_date BETWEEN Cast('2000-08-26' AS DATE) AND ( Cast('2000-08-26' AS DATE) + INTERVAL '30' day) AND ss_store_sk = s_store_sk AND ss_item_sk = i_item_sk AND i_current_price > 50 AND ss_promo_sk = p_promo_sk AND p_channel_tv = 'N' GROUP BY s_store_id) , csr AS ( SELECT cp_catalog_page_id AS catalog_page_id, sum(cs_ext_sales_price) AS sales, sum(COALESCE(cr_return_amount, 0)) AS returns1, sum(cs_net_profit - COALESCE(cr_net_loss, 0)) AS profit FROM catalog_sales LEFT OUTER JOIN catalog_returns ON ( cs_item_sk = cr_item_sk AND cs_order_number = cr_order_number), date_dim, catalog_page, item, promotion WHERE cs_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-26' AS date) AND ( cast('2000-08-26' AS date) + INTERVAL '30' day) AND cs_catalog_page_sk = cp_catalog_page_sk AND cs_item_sk = i_item_sk AND i_current_price > 50 AND cs_promo_sk = p_promo_sk AND p_channel_tv = 'N' GROUP BY cp_catalog_page_id) , wsr AS ( SELECT web_site_id, sum(ws_ext_sales_price) AS sales, sum(COALESCE(wr_return_amt, 0)) AS returns1, sum(ws_net_profit - COALESCE(wr_net_loss, 0)) AS profit FROM web_sales LEFT OUTER JOIN web_returns ON ( ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number), date_dim, web_site, item, promotion WHERE ws_sold_date_sk = d_date_sk AND d_date BETWEEN cast('2000-08-26' AS date) AND ( cast('2000-08-26' AS date) + INTERVAL '30' day) AND ws_web_site_sk = web_site_sk AND ws_item_sk = i_item_sk AND i_current_price > 50 AND ws_promo_sk = p_promo_sk AND p_channel_tv = 'N' GROUP BY web_site_id) SELECT channel , id , sum(sales) AS sales , sum(returns1) AS returns1 , sum(profit) AS profit FROM ( SELECT 'store channel' AS channel , 'store' || store_id AS id , sales , returns1 , profit FROM ssr UNION ALL SELECT 'catalog channel' AS channel , 'catalog_page' || catalog_page_id AS id , sales , returns1 , profit FROM csr UNION ALL SELECT 'web channel' AS channel , 'web_site' || web_site_id AS id , sales , returns1 , profit FROM wsr ) x GROUP BY rollup (channel, id) ORDER BY channel , id LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE CAST("date_dim"."d_date" AS DATE) <= CAST('2000-09-25' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-08-26' AS DATE) ), "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_current_price" AS "i_current_price" FROM "item" AS "item" WHERE "item"."i_current_price" > 50 ), "promotion_2" AS ( SELECT "promotion"."p_promo_sk" AS "p_promo_sk", "promotion"."p_channel_tv" AS "p_channel_tv" FROM "promotion" AS "promotion" WHERE "promotion"."p_channel_tv" = 'N' ), "ssr" AS ( SELECT "store"."s_store_id" AS "store_id", SUM("store_sales"."ss_ext_sales_price") AS "sales", SUM(COALESCE("store_returns"."sr_return_amt", 0)) AS "returns1", SUM("store_sales"."ss_net_profit" - COALESCE("store_returns"."sr_net_loss", 0)) AS "profit" FROM "store_sales" AS "store_sales" LEFT JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "promotion_2" AS "promotion" ON "promotion"."p_promo_sk" = "store_sales"."ss_promo_sk" GROUP BY "store"."s_store_id" ), "csr" AS ( SELECT "catalog_page"."cp_catalog_page_id" AS "catalog_page_id", SUM("catalog_sales"."cs_ext_sales_price") AS "sales", SUM(COALESCE("catalog_returns"."cr_return_amount", 0)) AS "returns1", SUM("catalog_sales"."cs_net_profit" - COALESCE("catalog_returns"."cr_net_loss", 0)) AS "profit" FROM "catalog_sales" AS "catalog_sales" LEFT JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_item_sk" = "catalog_sales"."cs_item_sk" AND "catalog_returns"."cr_order_number" = "catalog_sales"."cs_order_number" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" JOIN "catalog_page" AS "catalog_page" ON "catalog_page"."cp_catalog_page_sk" = "catalog_sales"."cs_catalog_page_sk" JOIN "item_2" AS "item" ON "catalog_sales"."cs_item_sk" = "item"."i_item_sk" JOIN "promotion_2" AS "promotion" ON "catalog_sales"."cs_promo_sk" = "promotion"."p_promo_sk" GROUP BY "catalog_page"."cp_catalog_page_id" ), "wsr" AS ( SELECT "web_site"."web_site_id" AS "web_site_id", SUM("web_sales"."ws_ext_sales_price") AS "sales", SUM(COALESCE("web_returns"."wr_return_amt", 0)) AS "returns1", SUM("web_sales"."ws_net_profit" - COALESCE("web_returns"."wr_net_loss", 0)) AS "profit" FROM "web_sales" AS "web_sales" LEFT JOIN "web_returns" AS "web_returns" ON "web_returns"."wr_item_sk" = "web_sales"."ws_item_sk" AND "web_returns"."wr_order_number" = "web_sales"."ws_order_number" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" JOIN "web_site" AS "web_site" ON "web_sales"."ws_web_site_sk" = "web_site"."web_site_sk" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" JOIN "promotion_2" AS "promotion" ON "promotion"."p_promo_sk" = "web_sales"."ws_promo_sk" GROUP BY "web_site"."web_site_id" ), "x" AS ( SELECT 'store channel' AS "channel", 'store' || "ssr"."store_id" AS "id", "ssr"."sales" AS "sales", "ssr"."returns1" AS "returns1", "ssr"."profit" AS "profit" FROM "ssr" AS "ssr" UNION ALL SELECT 'catalog channel' AS "channel", 'catalog_page' || "csr"."catalog_page_id" AS "id", "csr"."sales" AS "sales", "csr"."returns1" AS "returns1", "csr"."profit" AS "profit" FROM "csr" AS "csr" UNION ALL SELECT 'web channel' AS "channel", 'web_site' || "wsr"."web_site_id" AS "id", "wsr"."sales" AS "sales", "wsr"."returns1" AS "returns1", "wsr"."profit" AS "profit" FROM "wsr" AS "wsr" ) SELECT "x"."channel" AS "channel", "x"."id" AS "id", SUM("x"."sales") AS "sales", SUM("x"."returns1") AS "returns1", SUM("x"."profit") AS "profit" FROM "x" AS "x" GROUP BY ROLLUP ( "x"."channel", "x"."id" ) ORDER BY "channel", "id" LIMIT 100; -------------------------------------- -- TPC-DS 81 -------------------------------------- # execute: true WITH customer_total_return AS (SELECT cr_returning_customer_sk AS ctr_customer_sk, ca_state AS ctr_state, Sum(cr_return_amt_inc_tax) AS ctr_total_return FROM catalog_returns, date_dim, customer_address WHERE cr_returned_date_sk = d_date_sk AND d_year = 1999 AND cr_returning_addr_sk = ca_address_sk GROUP BY cr_returning_customer_sk, ca_state) SELECT c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type, ctr_total_return FROM customer_total_return ctr1, customer_address, customer WHERE ctr1.ctr_total_return > (SELECT Avg(ctr_total_return) * 1.2 FROM customer_total_return ctr2 WHERE ctr1.ctr_state = ctr2.ctr_state) AND ca_address_sk = c_current_addr_sk AND ca_state = 'TX' AND ctr1.ctr_customer_sk = c_customer_sk ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type, ctr_total_return LIMIT 100; WITH "customer_total_return" AS ( SELECT "catalog_returns"."cr_returning_customer_sk" AS "ctr_customer_sk", "customer_address"."ca_state" AS "ctr_state", SUM("catalog_returns"."cr_return_amt_inc_tax") AS "ctr_total_return" FROM "catalog_returns" AS "catalog_returns" JOIN "customer_address" AS "customer_address" ON "catalog_returns"."cr_returning_addr_sk" = "customer_address"."ca_address_sk" JOIN "date_dim" AS "date_dim" ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_year" = 1999 GROUP BY "catalog_returns"."cr_returning_customer_sk", "customer_address"."ca_state" ), "_u_0" AS ( SELECT AVG("ctr2"."ctr_total_return") * 1.2 AS "_col_0", "ctr2"."ctr_state" AS "_u_1" FROM "customer_total_return" AS "ctr2" GROUP BY "ctr2"."ctr_state" ) SELECT "customer"."c_customer_id" AS "c_customer_id", "customer"."c_salutation" AS "c_salutation", "customer"."c_first_name" AS "c_first_name", "customer"."c_last_name" AS "c_last_name", "customer_address"."ca_street_number" AS "ca_street_number", "customer_address"."ca_street_name" AS "ca_street_name", "customer_address"."ca_street_type" AS "ca_street_type", "customer_address"."ca_suite_number" AS "ca_suite_number", "customer_address"."ca_city" AS "ca_city", "customer_address"."ca_county" AS "ca_county", "customer_address"."ca_state" AS "ca_state", "customer_address"."ca_zip" AS "ca_zip", "customer_address"."ca_country" AS "ca_country", "customer_address"."ca_gmt_offset" AS "ca_gmt_offset", "customer_address"."ca_location_type" AS "ca_location_type", "ctr1"."ctr_total_return" AS "ctr_total_return" FROM "customer_total_return" AS "ctr1" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_state" = 'TX' JOIN "customer" AS "customer" ON "ctr1"."ctr_customer_sk" = "customer"."c_customer_sk" AND "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "ctr1"."ctr_state" WHERE "_u_0"."_col_0" < "ctr1"."ctr_total_return" ORDER BY "c_customer_id", "c_salutation", "c_first_name", "c_last_name", "ca_street_number", "ca_street_name", "ca_street_type", "ca_suite_number", "ca_city", "ca_county", "ca_state", "ca_zip", "ca_country", "ca_gmt_offset", "ca_location_type", "ctr_total_return" LIMIT 100; -------------------------------------- -- TPC-DS 82 -------------------------------------- SELECT i_item_id , i_item_desc , i_current_price FROM item, inventory, date_dim, store_sales WHERE i_current_price BETWEEN 63 AND 63+30 AND inv_item_sk = i_item_sk AND d_date_sk=inv_date_sk AND d_date BETWEEN Cast('1998-04-27' AS DATE) AND ( Cast('1998-04-27' AS DATE) + INTERVAL '60' day) AND i_manufact_id IN (57,293,427,320) AND inv_quantity_on_hand BETWEEN 100 AND 500 AND ss_item_sk = i_item_sk GROUP BY i_item_id, i_item_desc, i_current_price ORDER BY i_item_id LIMIT 100; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "item"."i_current_price" AS "i_current_price" FROM "item" AS "item" JOIN "inventory" AS "inventory" ON "inventory"."inv_item_sk" = "item"."i_item_sk" AND "inventory"."inv_quantity_on_hand" <= 500 AND "inventory"."inv_quantity_on_hand" >= 100 JOIN "store_sales" AS "store_sales" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "inventory"."inv_date_sk" AND CAST("date_dim"."d_date" AS DATE) <= CAST('1998-06-26' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('1998-04-27' AS DATE) WHERE "item"."i_current_price" <= 93 AND "item"."i_current_price" >= 63 AND "item"."i_manufact_id" IN (57, 293, 427, 320) GROUP BY "item"."i_item_id", "item"."i_item_desc", "item"."i_current_price" ORDER BY "i_item_id" LIMIT 100; -------------------------------------- -- TPC-DS 83 -------------------------------------- # execute: true WITH sr_items AS (SELECT i_item_id item_id, Sum(sr_return_quantity) sr_item_qty FROM store_returns, item, date_dim WHERE sr_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim WHERE d_date IN ( '1999-06-30', '1999-08-28', '1999-11-18' ))) AND sr_returned_date_sk = d_date_sk GROUP BY i_item_id), cr_items AS (SELECT i_item_id item_id, Sum(cr_return_quantity) cr_item_qty FROM catalog_returns, item, date_dim WHERE cr_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim WHERE d_date IN ( '1999-06-30', '1999-08-28', '1999-11-18' ))) AND cr_returned_date_sk = d_date_sk GROUP BY i_item_id), wr_items AS (SELECT i_item_id item_id, Sum(wr_return_quantity) wr_item_qty FROM web_returns, item, date_dim WHERE wr_item_sk = i_item_sk AND d_date IN (SELECT d_date FROM date_dim WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim WHERE d_date IN ( '1999-06-30', '1999-08-28', '1999-11-18' ))) AND wr_returned_date_sk = d_date_sk GROUP BY i_item_id) SELECT sr_items.item_id, sr_item_qty, sr_item_qty / ( sr_item_qty + cr_item_qty + wr_item_qty ) / 3.0 * 100 sr_dev, cr_item_qty, cr_item_qty / ( sr_item_qty + cr_item_qty + wr_item_qty ) / 3.0 * 100 cr_dev, wr_item_qty, wr_item_qty / ( sr_item_qty + cr_item_qty + wr_item_qty ) / 3.0 * 100 wr_dev, ( sr_item_qty + cr_item_qty + wr_item_qty ) / 3.0 average FROM sr_items, cr_items, wr_items WHERE sr_items.item_id = cr_items.item_id AND sr_items.item_id = wr_items.item_id ORDER BY sr_items.item_id, sr_item_qty LIMIT 100; WITH "item_2" AS ( SELECT "item"."i_item_sk" AS "i_item_sk", "item"."i_item_id" AS "i_item_id" FROM "item" AS "item" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" ), "_u_0" AS ( SELECT "date_dim"."d_week_seq" AS "d_week_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_date" IN ('1999-06-30', '1999-08-28', '1999-11-18') GROUP BY "date_dim"."d_week_seq" ), "_u_1" AS ( SELECT "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."d_week_seq" = "date_dim"."d_week_seq" WHERE NOT "_u_0"."d_week_seq" IS NULL GROUP BY "date_dim"."d_date" ), "sr_items" AS ( SELECT "item"."i_item_id" AS "item_id", SUM("store_returns"."sr_return_quantity") AS "sr_item_qty" FROM "store_returns" AS "store_returns" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "store_returns"."sr_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_returns"."sr_returned_date_sk" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."d_date" = "date_dim"."d_date" WHERE NOT "_u_1"."d_date" IS NULL GROUP BY "item"."i_item_id" ), "_u_3" AS ( SELECT "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" LEFT JOIN "_u_0" AS "_u_2" ON "_u_2"."d_week_seq" = "date_dim"."d_week_seq" WHERE NOT "_u_2"."d_week_seq" IS NULL GROUP BY "date_dim"."d_date" ), "cr_items" AS ( SELECT "item"."i_item_id" AS "item_id", SUM("catalog_returns"."cr_return_quantity") AS "cr_item_qty" FROM "catalog_returns" AS "catalog_returns" JOIN "item_2" AS "item" ON "catalog_returns"."cr_item_sk" = "item"."i_item_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk" LEFT JOIN "_u_3" AS "_u_3" ON "_u_3"."d_date" = "date_dim"."d_date" WHERE NOT "_u_3"."d_date" IS NULL GROUP BY "item"."i_item_id" ), "_u_5" AS ( SELECT "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" LEFT JOIN "_u_0" AS "_u_4" ON "_u_4"."d_week_seq" = "date_dim"."d_week_seq" WHERE NOT "_u_4"."d_week_seq" IS NULL GROUP BY "date_dim"."d_date" ), "wr_items" AS ( SELECT "item"."i_item_id" AS "item_id", SUM("web_returns"."wr_return_quantity") AS "wr_item_qty" FROM "web_returns" AS "web_returns" JOIN "item_2" AS "item" ON "item"."i_item_sk" = "web_returns"."wr_item_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_returns"."wr_returned_date_sk" LEFT JOIN "_u_5" AS "_u_5" ON "_u_5"."d_date" = "date_dim"."d_date" WHERE NOT "_u_5"."d_date" IS NULL GROUP BY "item"."i_item_id" ) SELECT "sr_items"."item_id" AS "item_id", "sr_items"."sr_item_qty" AS "sr_item_qty", "sr_items"."sr_item_qty" / ( "sr_items"."sr_item_qty" + "cr_items"."cr_item_qty" + "wr_items"."wr_item_qty" ) / 3.0 * 100 AS "sr_dev", "cr_items"."cr_item_qty" AS "cr_item_qty", "cr_items"."cr_item_qty" / ( "sr_items"."sr_item_qty" + "cr_items"."cr_item_qty" + "wr_items"."wr_item_qty" ) / 3.0 * 100 AS "cr_dev", "wr_items"."wr_item_qty" AS "wr_item_qty", "wr_items"."wr_item_qty" / ( "sr_items"."sr_item_qty" + "cr_items"."cr_item_qty" + "wr_items"."wr_item_qty" ) / 3.0 * 100 AS "wr_dev", ( "sr_items"."sr_item_qty" + "cr_items"."cr_item_qty" + "wr_items"."wr_item_qty" ) / 3.0 AS "average" FROM "sr_items" AS "sr_items" JOIN "cr_items" AS "cr_items" ON "cr_items"."item_id" = "sr_items"."item_id" JOIN "wr_items" AS "wr_items" ON "sr_items"."item_id" = "wr_items"."item_id" ORDER BY "sr_items"."item_id", "sr_item_qty" LIMIT 100; -------------------------------------- -- TPC-DS 84 -------------------------------------- # execute: true SELECT c_customer_id AS customer_id, c_last_name || ', ' || c_first_name AS customername FROM customer, customer_address, customer_demographics, household_demographics, income_band, store_returns WHERE ca_city = 'Green Acres' AND c_current_addr_sk = ca_address_sk AND ib_lower_bound >= 54986 AND ib_upper_bound <= 54986 + 50000 AND ib_income_band_sk = hd_income_band_sk AND cd_demo_sk = c_current_cdemo_sk AND hd_demo_sk = c_current_hdemo_sk AND sr_cdemo_sk = cd_demo_sk ORDER BY c_customer_id LIMIT 100; SELECT "customer"."c_customer_id" AS "customer_id", "customer"."c_last_name" || ', ' || "customer"."c_first_name" AS "customername" FROM "customer" AS "customer" JOIN "customer_address" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" AND "customer_address"."ca_city" = 'Green Acres' JOIN "customer_demographics" AS "customer_demographics" ON "customer"."c_current_cdemo_sk" = "customer_demographics"."cd_demo_sk" JOIN "household_demographics" AS "household_demographics" ON "customer"."c_current_hdemo_sk" = "household_demographics"."hd_demo_sk" JOIN "income_band" AS "income_band" ON "household_demographics"."hd_income_band_sk" = "income_band"."ib_income_band_sk" AND "income_band"."ib_lower_bound" >= 54986 AND "income_band"."ib_upper_bound" <= 104986 JOIN "store_returns" AS "store_returns" ON "customer_demographics"."cd_demo_sk" = "store_returns"."sr_cdemo_sk" ORDER BY "customer"."c_customer_id" LIMIT 100; -------------------------------------- -- TPC-DS 85 -------------------------------------- # execute: true SELECT SUBSTRING(r_reason_desc, 1, 20) AS "_col_0", Avg(ws_quantity) AS "_col_1", Avg(wr_refunded_cash) AS "_col_2", Avg(wr_fee) AS "_col_3" FROM web_sales, web_returns, web_page, customer_demographics cd1, customer_demographics cd2, customer_address, date_dim, reason WHERE ws_web_page_sk = wp_web_page_sk AND ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number AND ws_sold_date_sk = d_date_sk AND d_year = 2001 AND cd1.cd_demo_sk = wr_refunded_cdemo_sk AND cd2.cd_demo_sk = wr_returning_cdemo_sk AND ca_address_sk = wr_refunded_addr_sk AND r_reason_sk = wr_reason_sk AND ( ( cd1.cd_marital_status = 'W' AND cd1.cd_marital_status = cd2.cd_marital_status AND cd1.cd_education_status = 'Primary' AND cd1.cd_education_status = cd2.cd_education_status AND ws_sales_price BETWEEN 100.00 AND 150.00 ) OR ( cd1.cd_marital_status = 'D' AND cd1.cd_marital_status = cd2.cd_marital_status AND cd1.cd_education_status = 'Secondary' AND cd1.cd_education_status = cd2.cd_education_status AND ws_sales_price BETWEEN 50.00 AND 100.00 ) OR ( cd1.cd_marital_status = 'M' AND cd1.cd_marital_status = cd2.cd_marital_status AND cd1.cd_education_status = 'Advanced Degree' AND cd1.cd_education_status = cd2.cd_education_status AND ws_sales_price BETWEEN 150.00 AND 200.00 ) ) AND ( ( ca_country = 'United States' AND ca_state IN ( 'KY', 'ME', 'IL' ) AND ws_net_profit BETWEEN 100 AND 200 ) OR ( ca_country = 'United States' AND ca_state IN ( 'OK', 'NE', 'MN' ) AND ws_net_profit BETWEEN 150 AND 300 ) OR ( ca_country = 'United States' AND ca_state IN ( 'FL', 'WI', 'KS' ) AND ws_net_profit BETWEEN 50 AND 250 ) ) GROUP BY r_reason_desc ORDER BY SUBSTRING(r_reason_desc, 1, 20), Avg(ws_quantity), Avg(wr_refunded_cash), Avg(wr_fee) LIMIT 100; SELECT SUBSTRING("reason"."r_reason_desc", 1, 20) AS "_col_0", AVG("web_sales"."ws_quantity") AS "_col_1", AVG("web_returns"."wr_refunded_cash") AS "_col_2", AVG("web_returns"."wr_fee") AS "_col_3" FROM "web_sales" AS "web_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" AND "date_dim"."d_year" = 2001 JOIN "web_page" AS "web_page" ON "web_page"."wp_web_page_sk" = "web_sales"."ws_web_page_sk" JOIN "web_returns" AS "web_returns" ON "web_returns"."wr_item_sk" = "web_sales"."ws_item_sk" AND "web_returns"."wr_order_number" = "web_sales"."ws_order_number" JOIN "customer_demographics" AS "cd1" ON "cd1"."cd_demo_sk" = "web_returns"."wr_refunded_cdemo_sk" JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "web_returns"."wr_refunded_addr_sk" AND ( ( "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('FL', 'WI', 'KS') AND "web_sales"."ws_net_profit" <= 250 AND "web_sales"."ws_net_profit" >= 50 ) OR ( "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('KY', 'ME', 'IL') AND "web_sales"."ws_net_profit" <= 200 AND "web_sales"."ws_net_profit" >= 100 ) OR ( "customer_address"."ca_country" = 'United States' AND "customer_address"."ca_state" IN ('OK', 'NE', 'MN') AND "web_sales"."ws_net_profit" <= 300 AND "web_sales"."ws_net_profit" >= 150 ) ) JOIN "reason" AS "reason" ON "reason"."r_reason_sk" = "web_returns"."wr_reason_sk" JOIN "customer_demographics" AS "cd2" ON "cd2"."cd_demo_sk" = "web_returns"."wr_returning_cdemo_sk" AND ( ( "cd1"."cd_education_status" = "cd2"."cd_education_status" AND "cd1"."cd_education_status" = 'Advanced Degree' AND "cd1"."cd_marital_status" = "cd2"."cd_marital_status" AND "cd1"."cd_marital_status" = 'M' AND "web_sales"."ws_sales_price" <= 200.00 AND "web_sales"."ws_sales_price" >= 150.00 ) OR ( "cd1"."cd_education_status" = "cd2"."cd_education_status" AND "cd1"."cd_education_status" = 'Primary' AND "cd1"."cd_marital_status" = "cd2"."cd_marital_status" AND "cd1"."cd_marital_status" = 'W' AND "web_sales"."ws_sales_price" <= 150.00 AND "web_sales"."ws_sales_price" >= 100.00 ) OR ( "cd1"."cd_education_status" = "cd2"."cd_education_status" AND "cd1"."cd_education_status" = 'Secondary' AND "cd1"."cd_marital_status" = "cd2"."cd_marital_status" AND "cd1"."cd_marital_status" = 'D' AND "web_sales"."ws_sales_price" <= 100.00 AND "web_sales"."ws_sales_price" >= 50.00 ) ) GROUP BY "reason"."r_reason_desc" ORDER BY "_col_0", "_col_1", "_col_2", "_col_3" LIMIT 100; -------------------------------------- -- TPC-DS 86 -------------------------------------- SELECT Sum(ws_net_paid) AS total_sum, i_category, i_class, Grouping(i_category) + Grouping(i_class) AS lochierarchy, Rank() OVER ( partition BY Grouping(i_category)+Grouping(i_class), CASE WHEN Grouping( i_class) = 0 THEN i_category END ORDER BY Sum(ws_net_paid) DESC) AS rank_within_parent FROM web_sales, date_dim d1, item WHERE d1.d_month_seq BETWEEN 1183 AND 1183 + 11 AND d1.d_date_sk = ws_sold_date_sk AND i_item_sk = ws_item_sk GROUP BY rollup( i_category, i_class ) ORDER BY lochierarchy DESC, CASE WHEN lochierarchy = 0 THEN i_category END, rank_within_parent LIMIT 100; SELECT SUM("web_sales"."ws_net_paid") AS "total_sum", "item"."i_category" AS "i_category", "item"."i_class" AS "i_class", GROUPING("item"."i_category") + GROUPING("item"."i_class") AS "lochierarchy", RANK() OVER ( PARTITION BY GROUPING("item"."i_category") + GROUPING("item"."i_class"), CASE WHEN GROUPING("item"."i_class") = 0 THEN "item"."i_category" END ORDER BY SUM("web_sales"."ws_net_paid") DESC ) AS "rank_within_parent" FROM "web_sales" AS "web_sales" JOIN "date_dim" AS "d1" ON "d1"."d_date_sk" = "web_sales"."ws_sold_date_sk" AND "d1"."d_month_seq" <= 1194 AND "d1"."d_month_seq" >= 1183 JOIN "item" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" GROUP BY ROLLUP ( "item"."i_category", "item"."i_class" ) ORDER BY "lochierarchy" DESC, CASE WHEN "lochierarchy" = 0 THEN "i_category" END, "rank_within_parent" LIMIT 100; -------------------------------------- -- TPC-DS 87 -------------------------------------- # execute: true select count(*) as "_col_0" from ((select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_customer_sk = customer.c_customer_sk and d_month_seq between 1188 and 1188+11) except (select distinct c_last_name, c_first_name, d_date from catalog_sales, date_dim, customer where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk and d_month_seq between 1188 and 1188+11) except (select distinct c_last_name, c_first_name, d_date from web_sales, date_dim, customer where web_sales.ws_sold_date_sk = date_dim.d_date_sk and web_sales.ws_bill_customer_sk = customer.c_customer_sk and d_month_seq between 1188 and 1188+11) ) cool_cust; WITH "customer_2" AS ( SELECT "customer"."c_customer_sk" AS "c_customer_sk", "customer"."c_first_name" AS "c_first_name", "customer"."c_last_name" AS "c_last_name" FROM "customer" AS "customer" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date", "date_dim"."d_month_seq" AS "d_month_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_month_seq" <= 1199 AND "date_dim"."d_month_seq" >= 1188 ), "cool_cust" AS ( ( SELECT DISTINCT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "date_dim"."d_date" AS "d_date" FROM "store_sales" AS "store_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "store_sales"."ss_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" ) EXCEPT ( SELECT DISTINCT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "date_dim"."d_date" AS "d_date" FROM "catalog_sales" AS "catalog_sales" JOIN "customer_2" AS "customer" ON "catalog_sales"."cs_bill_customer_sk" = "customer"."c_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" ) EXCEPT ( SELECT DISTINCT "customer"."c_last_name" AS "c_last_name", "customer"."c_first_name" AS "c_first_name", "date_dim"."d_date" AS "d_date" FROM "web_sales" AS "web_sales" JOIN "customer_2" AS "customer" ON "customer"."c_customer_sk" = "web_sales"."ws_bill_customer_sk" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" ) ) SELECT COUNT(*) AS "_col_0" FROM "cool_cust" AS "cool_cust"; -------------------------------------- -- TPC-DS 88 -------------------------------------- # execute: true select * from (select count(*) h8_30_to_9 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 8 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s1, (select count(*) h9_to_9_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 9 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s2, (select count(*) h9_30_to_10 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 9 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s3, (select count(*) h10_to_10_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 10 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s4, (select count(*) h10_30_to_11 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 10 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s5, (select count(*) h11_to_11_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 11 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s6, (select count(*) h11_30_to_12 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 11 and time_dim.t_minute >= 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s7, (select count(*) h12_to_12_30 from store_sales, household_demographics , time_dim, store where ss_sold_time_sk = time_dim.t_time_sk and ss_hdemo_sk = household_demographics.hd_demo_sk and ss_store_sk = s_store_sk and time_dim.t_hour = 12 and time_dim.t_minute < 30 and ((household_demographics.hd_dep_count = -1 and household_demographics.hd_vehicle_count<=-1+2) or (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or (household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2)) and store.s_store_name = 'ese') s8; WITH "store_sales_2" AS ( SELECT "store_sales"."ss_sold_time_sk" AS "ss_sold_time_sk", "store_sales"."ss_hdemo_sk" AS "ss_hdemo_sk", "store_sales"."ss_store_sk" AS "ss_store_sk" FROM "store_sales" AS "store_sales" ), "household_demographics_2" AS ( SELECT "household_demographics"."hd_demo_sk" AS "hd_demo_sk", "household_demographics"."hd_dep_count" AS "hd_dep_count", "household_demographics"."hd_vehicle_count" AS "hd_vehicle_count" FROM "household_demographics" AS "household_demographics" WHERE ( "household_demographics"."hd_dep_count" = -1 OR "household_demographics"."hd_dep_count" = 2 OR "household_demographics"."hd_dep_count" = 3 ) AND ( "household_demographics"."hd_dep_count" = 2 OR "household_demographics"."hd_dep_count" = 3 OR "household_demographics"."hd_vehicle_count" <= 1 ) AND ( "household_demographics"."hd_dep_count" = 3 OR "household_demographics"."hd_vehicle_count" <= 4 ) AND "household_demographics"."hd_vehicle_count" <= 5 ), "store_2" AS ( SELECT "store"."s_store_sk" AS "s_store_sk", "store"."s_store_name" AS "s_store_name" FROM "store" AS "store" WHERE "store"."s_store_name" = 'ese' ), "s1" AS ( SELECT COUNT(*) AS "h8_30_to_9" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 8 AND "time_dim"."t_minute" >= 30 ), "s2" AS ( SELECT COUNT(*) AS "h9_to_9_30" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 9 AND "time_dim"."t_minute" < 30 ), "s3" AS ( SELECT COUNT(*) AS "h9_30_to_10" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 9 AND "time_dim"."t_minute" >= 30 ), "s4" AS ( SELECT COUNT(*) AS "h10_to_10_30" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 10 AND "time_dim"."t_minute" < 30 ), "s5" AS ( SELECT COUNT(*) AS "h10_30_to_11" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 10 AND "time_dim"."t_minute" >= 30 ), "s6" AS ( SELECT COUNT(*) AS "h11_to_11_30" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 11 AND "time_dim"."t_minute" < 30 ), "s7" AS ( SELECT COUNT(*) AS "h11_30_to_12" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 11 AND "time_dim"."t_minute" >= 30 ), "s8" AS ( SELECT COUNT(*) AS "h12_to_12_30" FROM "store_sales_2" AS "store_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" JOIN "store_2" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 12 AND "time_dim"."t_minute" < 30 ) SELECT "s1"."h8_30_to_9" AS "h8_30_to_9", "s2"."h9_to_9_30" AS "h9_to_9_30", "s3"."h9_30_to_10" AS "h9_30_to_10", "s4"."h10_to_10_30" AS "h10_to_10_30", "s5"."h10_30_to_11" AS "h10_30_to_11", "s6"."h11_to_11_30" AS "h11_to_11_30", "s7"."h11_30_to_12" AS "h11_30_to_12", "s8"."h12_to_12_30" AS "h12_to_12_30" FROM "s1" AS "s1" CROSS JOIN "s2" AS "s2" CROSS JOIN "s3" AS "s3" CROSS JOIN "s4" AS "s4" CROSS JOIN "s5" AS "s5" CROSS JOIN "s6" AS "s6" CROSS JOIN "s7" AS "s7" CROSS JOIN "s8" AS "s8"; -------------------------------------- -- TPC-DS 89 -------------------------------------- SELECT * FROM (SELECT i_category, i_class, i_brand, s_store_name, s_company_name, d_moy, Sum(ss_sales_price) sum_sales, Avg(Sum(ss_sales_price)) OVER ( partition BY i_category, i_brand, s_store_name, s_company_name ) avg_monthly_sales FROM item, store_sales, date_dim, store WHERE ss_item_sk = i_item_sk AND ss_sold_date_sk = d_date_sk AND ss_store_sk = s_store_sk AND d_year IN ( 2002 ) AND ( ( i_category IN ( 'Home', 'Men', 'Sports' ) AND i_class IN ( 'paint', 'accessories', 'fitness' ) ) OR ( i_category IN ( 'Shoes', 'Jewelry', 'Women' ) AND i_class IN ( 'mens', 'pendants', 'swimwear' ) ) ) GROUP BY i_category, i_class, i_brand, s_store_name, s_company_name, d_moy) tmp1 WHERE CASE WHEN ( avg_monthly_sales <> 0 ) THEN ( Abs(sum_sales - avg_monthly_sales) / avg_monthly_sales ) ELSE NULL END > 0.1 ORDER BY sum_sales - avg_monthly_sales, s_store_name LIMIT 100; WITH "tmp1" AS ( SELECT "item"."i_category" AS "i_category", "item"."i_class" AS "i_class", "item"."i_brand" AS "i_brand", "store"."s_store_name" AS "s_store_name", "store"."s_company_name" AS "s_company_name", "date_dim"."d_moy" AS "d_moy", SUM("store_sales"."ss_sales_price") AS "sum_sales", AVG(SUM("store_sales"."ss_sales_price")) OVER ( PARTITION BY "item"."i_category", "item"."i_brand", "store"."s_store_name", "store"."s_company_name" ) AS "avg_monthly_sales" FROM "item" AS "item" JOIN "store_sales" AS "store_sales" ON "item"."i_item_sk" = "store_sales"."ss_item_sk" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND "date_dim"."d_year" IN (2002) JOIN "store" AS "store" ON "store"."s_store_sk" = "store_sales"."ss_store_sk" WHERE ( "item"."i_category" IN ('Home', 'Men', 'Sports') OR "item"."i_category" IN ('Shoes', 'Jewelry', 'Women') ) AND ( "item"."i_category" IN ('Home', 'Men', 'Sports') OR "item"."i_class" IN ('mens', 'pendants', 'swimwear') ) AND ( "item"."i_category" IN ('Shoes', 'Jewelry', 'Women') OR "item"."i_class" IN ('paint', 'accessories', 'fitness') ) AND ( "item"."i_class" IN ('mens', 'pendants', 'swimwear') OR "item"."i_class" IN ('paint', 'accessories', 'fitness') ) GROUP BY "item"."i_category", "item"."i_class", "item"."i_brand", "store"."s_store_name", "store"."s_company_name", "date_dim"."d_moy" ) SELECT "tmp1"."i_category" AS "i_category", "tmp1"."i_class" AS "i_class", "tmp1"."i_brand" AS "i_brand", "tmp1"."s_store_name" AS "s_store_name", "tmp1"."s_company_name" AS "s_company_name", "tmp1"."d_moy" AS "d_moy", "tmp1"."sum_sales" AS "sum_sales", "tmp1"."avg_monthly_sales" AS "avg_monthly_sales" FROM "tmp1" AS "tmp1" WHERE CASE WHEN "tmp1"."avg_monthly_sales" <> 0 THEN ( ABS("tmp1"."sum_sales" - "tmp1"."avg_monthly_sales") / "tmp1"."avg_monthly_sales" ) ELSE NULL END > 0.1 ORDER BY "tmp1"."sum_sales" - "tmp1"."avg_monthly_sales", "tmp1"."s_store_name" LIMIT 100; -------------------------------------- -- TPC-DS 90 -------------------------------------- SELECT Cast(amc AS DECIMAL(15, 4)) / Cast(pmc AS DECIMAL(15, 4)) am_pm_ratio FROM (SELECT Count(*) amc FROM web_sales, household_demographics, time_dim, web_page WHERE ws_sold_time_sk = time_dim.t_time_sk AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk AND ws_web_page_sk = web_page.wp_web_page_sk AND time_dim.t_hour BETWEEN 12 AND 12 + 1 AND household_demographics.hd_dep_count = 8 AND web_page.wp_char_count BETWEEN 5000 AND 5200) at1, (SELECT Count(*) pmc FROM web_sales, household_demographics, time_dim, web_page WHERE ws_sold_time_sk = time_dim.t_time_sk AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk AND ws_web_page_sk = web_page.wp_web_page_sk AND time_dim.t_hour BETWEEN 20 AND 20 + 1 AND household_demographics.hd_dep_count = 8 AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt ORDER BY am_pm_ratio LIMIT 100; WITH "web_sales_2" AS ( SELECT "web_sales"."ws_sold_time_sk" AS "ws_sold_time_sk", "web_sales"."ws_ship_hdemo_sk" AS "ws_ship_hdemo_sk", "web_sales"."ws_web_page_sk" AS "ws_web_page_sk" FROM "web_sales" AS "web_sales" ), "household_demographics_2" AS ( SELECT "household_demographics"."hd_demo_sk" AS "hd_demo_sk", "household_demographics"."hd_dep_count" AS "hd_dep_count" FROM "household_demographics" AS "household_demographics" WHERE "household_demographics"."hd_dep_count" = 8 ), "web_page_2" AS ( SELECT "web_page"."wp_web_page_sk" AS "wp_web_page_sk", "web_page"."wp_char_count" AS "wp_char_count" FROM "web_page" AS "web_page" WHERE "web_page"."wp_char_count" <= 5200 AND "web_page"."wp_char_count" >= 5000 ), "at1" AS ( SELECT COUNT(*) AS "amc" FROM "web_sales_2" AS "web_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "web_sales"."ws_ship_hdemo_sk" JOIN "time_dim" AS "time_dim" ON "time_dim"."t_hour" <= 13 AND "time_dim"."t_hour" >= 12 AND "time_dim"."t_time_sk" = "web_sales"."ws_sold_time_sk" JOIN "web_page_2" AS "web_page" ON "web_page"."wp_web_page_sk" = "web_sales"."ws_web_page_sk" ), "pt" AS ( SELECT COUNT(*) AS "pmc" FROM "web_sales_2" AS "web_sales" JOIN "household_demographics_2" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "web_sales"."ws_ship_hdemo_sk" JOIN "time_dim" AS "time_dim" ON "time_dim"."t_hour" <= 21 AND "time_dim"."t_hour" >= 20 AND "time_dim"."t_time_sk" = "web_sales"."ws_sold_time_sk" JOIN "web_page_2" AS "web_page" ON "web_page"."wp_web_page_sk" = "web_sales"."ws_web_page_sk" ) SELECT CAST("at1"."amc" AS DECIMAL(15, 4)) / CAST("pt"."pmc" AS DECIMAL(15, 4)) AS "am_pm_ratio" FROM "at1" AS "at1" CROSS JOIN "pt" AS "pt" ORDER BY "am_pm_ratio" LIMIT 100; -------------------------------------- -- TPC-DS 91 -------------------------------------- # execute: true SELECT cc_call_center_id call_center, cc_name call_center_name, cc_manager manager, Sum(cr_net_loss) returns_loss FROM call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics WHERE cr_call_center_sk = cc_call_center_sk AND cr_returned_date_sk = d_date_sk AND cr_returning_customer_sk = c_customer_sk AND cd_demo_sk = c_current_cdemo_sk AND hd_demo_sk = c_current_hdemo_sk AND ca_address_sk = c_current_addr_sk AND d_year = 1999 AND d_moy = 12 AND ( ( cd_marital_status = 'M' AND cd_education_status = 'Unknown' ) OR ( cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree' ) ) AND hd_buy_potential LIKE 'Unknown%' AND ca_gmt_offset = -7 GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status ORDER BY Sum(cr_net_loss) DESC; SELECT "call_center"."cc_call_center_id" AS "call_center", "call_center"."cc_name" AS "call_center_name", "call_center"."cc_manager" AS "manager", SUM("catalog_returns"."cr_net_loss") AS "returns_loss" FROM "call_center" AS "call_center" JOIN "catalog_returns" AS "catalog_returns" ON "call_center"."cc_call_center_sk" = "catalog_returns"."cr_call_center_sk" JOIN "customer" AS "customer" ON "catalog_returns"."cr_returning_customer_sk" = "customer"."c_customer_sk" JOIN "date_dim" AS "date_dim" ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_moy" = 12 AND "date_dim"."d_year" = 1999 JOIN "customer_address" AS "customer_address" ON "customer"."c_current_addr_sk" = "customer_address"."ca_address_sk" AND "customer_address"."ca_gmt_offset" = -7 JOIN "customer_demographics" AS "customer_demographics" ON "customer"."c_current_cdemo_sk" = "customer_demographics"."cd_demo_sk" AND ( "customer_demographics"."cd_education_status" = 'Advanced Degree' OR "customer_demographics"."cd_education_status" = 'Unknown' ) AND ( "customer_demographics"."cd_education_status" = 'Advanced Degree' OR "customer_demographics"."cd_marital_status" = 'M' ) AND ( "customer_demographics"."cd_education_status" = 'Unknown' OR "customer_demographics"."cd_marital_status" = 'W' ) AND ( "customer_demographics"."cd_marital_status" = 'M' OR "customer_demographics"."cd_marital_status" = 'W' ) JOIN "household_demographics" AS "household_demographics" ON "customer"."c_current_hdemo_sk" = "household_demographics"."hd_demo_sk" AND "household_demographics"."hd_buy_potential" LIKE 'Unknown%' GROUP BY "call_center"."cc_call_center_id", "call_center"."cc_name", "call_center"."cc_manager", "customer_demographics"."cd_marital_status", "customer_demographics"."cd_education_status" ORDER BY "returns_loss" DESC; -------------------------------------- -- TPC-DS 92 -------------------------------------- SELECT Sum(ws_ext_discount_amt) AS "Excess Discount Amount" FROM web_sales , item , date_dim WHERE i_manufact_id = 718 AND i_item_sk = ws_item_sk AND d_date BETWEEN '2002-03-29' AND ( Cast('2002-03-29' AS DATE) + INTERVAL '90' day) AND d_date_sk = ws_sold_date_sk AND ws_ext_discount_amt > ( SELECT 1.3 * avg(ws_ext_discount_amt) FROM web_sales , date_dim WHERE ws_item_sk = i_item_sk AND d_date BETWEEN '2002-03-29' AND ( cast('2002-03-29' AS date) + INTERVAL '90' day) AND d_date_sk = ws_sold_date_sk ) ORDER BY sum(ws_ext_discount_amt) LIMIT 100; WITH "web_sales_2" AS ( SELECT "web_sales"."ws_sold_date_sk" AS "ws_sold_date_sk", "web_sales"."ws_item_sk" AS "ws_item_sk", "web_sales"."ws_ext_discount_amt" AS "ws_ext_discount_amt" FROM "web_sales" AS "web_sales" ), "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_date" AS "d_date" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_date" >= '2002-03-29' AND CAST("date_dim"."d_date" AS DATE) <= CAST('2002-06-27' AS DATE) ), "_u_0" AS ( SELECT 1.3 * AVG("web_sales"."ws_ext_discount_amt") AS "_col_0", "web_sales"."ws_item_sk" AS "_u_1" FROM "web_sales_2" AS "web_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" GROUP BY "web_sales"."ws_item_sk" ) SELECT SUM("web_sales"."ws_ext_discount_amt") AS "Excess Discount Amount" FROM "web_sales_2" AS "web_sales" JOIN "item" AS "item" ON "item"."i_item_sk" = "web_sales"."ws_item_sk" AND "item"."i_manufact_id" = 718 JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "web_sales"."ws_sold_date_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "item"."i_item_sk" WHERE "_u_0"."_col_0" < "web_sales"."ws_ext_discount_amt" ORDER BY SUM("web_sales"."ws_ext_discount_amt") LIMIT 100; -------------------------------------- -- TPC-DS 93 -------------------------------------- # execute: true SELECT ss_customer_sk, Sum(act_sales) sumsales FROM (SELECT ss_item_sk, ss_ticket_number, ss_customer_sk, CASE WHEN sr_return_quantity IS NOT NULL THEN ( ss_quantity - sr_return_quantity ) * ss_sales_price ELSE ( ss_quantity * ss_sales_price ) END act_sales FROM store_sales LEFT OUTER JOIN store_returns ON ( sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number ), reason WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 38') t GROUP BY ss_customer_sk ORDER BY sumsales, ss_customer_sk LIMIT 100; SELECT "store_sales"."ss_customer_sk" AS "ss_customer_sk", SUM( CASE WHEN NOT "store_returns"."sr_return_quantity" IS NULL THEN ( "store_sales"."ss_quantity" - "store_returns"."sr_return_quantity" ) * "store_sales"."ss_sales_price" ELSE ( "store_sales"."ss_quantity" * "store_sales"."ss_sales_price" ) END ) AS "sumsales" FROM "store_sales" AS "store_sales" LEFT JOIN "store_returns" AS "store_returns" ON "store_returns"."sr_item_sk" = "store_sales"."ss_item_sk" AND "store_returns"."sr_ticket_number" = "store_sales"."ss_ticket_number" JOIN "reason" AS "reason" ON "reason"."r_reason_desc" = 'reason 38' WHERE "reason"."r_reason_sk" = "store_returns"."sr_reason_sk" GROUP BY "store_sales"."ss_customer_sk" ORDER BY "sumsales", "ss_customer_sk" LIMIT 100; -------------------------------------- -- TPC-DS 94 -------------------------------------- SELECT Count(DISTINCT ws_order_number) AS "order count" , Sum(ws_ext_ship_cost) AS "total shipping cost" , Sum(ws_net_profit) AS "total net profit" FROM web_sales ws1 , date_dim , customer_address , web_site WHERE d_date BETWEEN '2000-3-01' AND ( Cast('2000-3-01' AS DATE) + INTERVAL '60' day) AND ws1.ws_ship_date_sk = d_date_sk AND ws1.ws_ship_addr_sk = ca_address_sk AND ca_state = 'MT' AND ws1.ws_web_site_sk = web_site_sk AND web_company_name = 'pri' AND EXISTS ( SELECT * FROM web_sales ws2 WHERE ws1.ws_order_number = ws2.ws_order_number AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) AND NOT EXISTS ( SELECT * FROM web_returns wr1 WHERE ws1.ws_order_number = wr1.wr_order_number) ORDER BY count(DISTINCT ws_order_number) LIMIT 100; WITH "_u_0" AS ( SELECT "ws2"."ws_order_number" AS "_u_1", ARRAY_AGG("ws2"."ws_warehouse_sk") AS "_u_2" FROM "web_sales" AS "ws2" GROUP BY "ws2"."ws_order_number" ), "_u_3" AS ( SELECT "wr1"."wr_order_number" AS "_u_4" FROM "web_returns" AS "wr1" GROUP BY "wr1"."wr_order_number" ) SELECT COUNT(DISTINCT "ws1"."ws_order_number") AS "order count", SUM("ws1"."ws_ext_ship_cost") AS "total shipping cost", SUM("ws1"."ws_net_profit") AS "total net profit" FROM "web_sales" AS "ws1" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date" >= '2000-3-01' AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk" AND ( CAST('2000-3-01' AS DATE) + INTERVAL '60' DAY ) >= CAST("date_dim"."d_date" AS DATE) JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "ws1"."ws_ship_addr_sk" AND "customer_address"."ca_state" = 'MT' JOIN "web_site" AS "web_site" ON "web_site"."web_company_name" = 'pri' AND "web_site"."web_site_sk" = "ws1"."ws_web_site_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "ws1"."ws_order_number" LEFT JOIN "_u_3" AS "_u_3" ON "_u_3"."_u_4" = "ws1"."ws_order_number" WHERE "_u_3"."_u_4" IS NULL AND ARRAY_ANY("_u_0"."_u_2", "_x" -> "ws1"."ws_warehouse_sk" <> "_x") AND NOT "_u_0"."_u_1" IS NULL ORDER BY COUNT(DISTINCT "ws1"."ws_order_number") LIMIT 100; -------------------------------------- -- TPC-DS 95 -------------------------------------- WITH ws_wh AS ( SELECT ws1.ws_order_number, ws1.ws_warehouse_sk wh1, ws2.ws_warehouse_sk wh2 FROM web_sales ws1, web_sales ws2 WHERE ws1.ws_order_number = ws2.ws_order_number AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) SELECT Count(DISTINCT ws_order_number) AS "order count" , Sum(ws_ext_ship_cost) AS "total shipping cost" , Sum(ws_net_profit) AS "total net profit" FROM web_sales ws1 , date_dim , customer_address , web_site WHERE d_date BETWEEN '2000-4-01' AND ( Cast('2000-4-01' AS DATE) + INTERVAL '60' day) AND ws1.ws_ship_date_sk = d_date_sk AND ws1.ws_ship_addr_sk = ca_address_sk AND ca_state = 'IN' AND ws1.ws_web_site_sk = web_site_sk AND web_company_name = 'pri' AND ws1.ws_order_number IN ( SELECT ws_order_number FROM ws_wh) AND ws1.ws_order_number IN ( SELECT wr_order_number FROM web_returns, ws_wh WHERE wr_order_number = ws_wh.ws_order_number) ORDER BY count(DISTINCT ws_order_number) LIMIT 100; WITH "ws_wh" AS ( SELECT "ws1"."ws_order_number" AS "ws_order_number" FROM "web_sales" AS "ws1" JOIN "web_sales" AS "ws2" ON "ws1"."ws_order_number" = "ws2"."ws_order_number" AND "ws1"."ws_warehouse_sk" <> "ws2"."ws_warehouse_sk" ), "_u_0" AS ( SELECT "ws_wh"."ws_order_number" AS "ws_order_number" FROM "ws_wh" AS "ws_wh" GROUP BY "ws_wh"."ws_order_number" ), "_u_1" AS ( SELECT "web_returns"."wr_order_number" AS "wr_order_number" FROM "web_returns" AS "web_returns" JOIN "ws_wh" AS "ws_wh" ON "web_returns"."wr_order_number" = "ws_wh"."ws_order_number" GROUP BY "web_returns"."wr_order_number" ) SELECT COUNT(DISTINCT "ws1"."ws_order_number") AS "order count", SUM("ws1"."ws_ext_ship_cost") AS "total shipping cost", SUM("ws1"."ws_net_profit") AS "total net profit" FROM "web_sales" AS "ws1" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date" >= '2000-4-01' AND "date_dim"."d_date_sk" = "ws1"."ws_ship_date_sk" AND ( CAST('2000-4-01' AS DATE) + INTERVAL '60' DAY ) >= CAST("date_dim"."d_date" AS DATE) JOIN "customer_address" AS "customer_address" ON "customer_address"."ca_address_sk" = "ws1"."ws_ship_addr_sk" AND "customer_address"."ca_state" = 'IN' JOIN "web_site" AS "web_site" ON "web_site"."web_company_name" = 'pri' AND "web_site"."web_site_sk" = "ws1"."ws_web_site_sk" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."ws_order_number" = "ws1"."ws_order_number" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."wr_order_number" = "ws1"."ws_order_number" WHERE NOT "_u_0"."ws_order_number" IS NULL AND NOT "_u_1"."wr_order_number" IS NULL ORDER BY COUNT(DISTINCT "ws1"."ws_order_number") LIMIT 100; -------------------------------------- -- TPC-DS 96 -------------------------------------- # execute: true SELECT Count(*) AS "_col_0" FROM store_sales, household_demographics, time_dim, store WHERE ss_sold_time_sk = time_dim.t_time_sk AND ss_hdemo_sk = household_demographics.hd_demo_sk AND ss_store_sk = s_store_sk AND time_dim.t_hour = 15 AND time_dim.t_minute >= 30 AND household_demographics.hd_dep_count = 7 AND store.s_store_name = 'ese' ORDER BY Count(*) LIMIT 100; SELECT COUNT(*) AS "_col_0" FROM "store_sales" AS "store_sales" JOIN "household_demographics" AS "household_demographics" ON "household_demographics"."hd_demo_sk" = "store_sales"."ss_hdemo_sk" AND "household_demographics"."hd_dep_count" = 7 JOIN "store" AS "store" ON "store"."s_store_name" = 'ese' AND "store"."s_store_sk" = "store_sales"."ss_store_sk" JOIN "time_dim" AS "time_dim" ON "store_sales"."ss_sold_time_sk" = "time_dim"."t_time_sk" AND "time_dim"."t_hour" = 15 AND "time_dim"."t_minute" >= 30 ORDER BY COUNT(*) LIMIT 100; -------------------------------------- -- TPC-DS 97 -------------------------------------- # execute: true WITH ssci AS (SELECT ss_customer_sk customer_sk, ss_item_sk item_sk FROM store_sales, date_dim WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1196 AND 1196 + 11 GROUP BY ss_customer_sk, ss_item_sk), csci AS (SELECT cs_bill_customer_sk customer_sk, cs_item_sk item_sk FROM catalog_sales, date_dim WHERE cs_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1196 AND 1196 + 11 GROUP BY cs_bill_customer_sk, cs_item_sk) SELECT Sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL THEN 1 ELSE 0 END) store_only, Sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL THEN 1 ELSE 0 END) catalog_only, Sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL THEN 1 ELSE 0 END) store_and_catalog FROM ssci FULL OUTER JOIN csci ON ( ssci.customer_sk = csci.customer_sk AND ssci.item_sk = csci.item_sk ) LIMIT 100; WITH "date_dim_2" AS ( SELECT "date_dim"."d_date_sk" AS "d_date_sk", "date_dim"."d_month_seq" AS "d_month_seq" FROM "date_dim" AS "date_dim" WHERE "date_dim"."d_month_seq" <= 1207 AND "date_dim"."d_month_seq" >= 1196 ), "ssci" AS ( SELECT "store_sales"."ss_customer_sk" AS "customer_sk", "store_sales"."ss_item_sk" AS "item_sk" FROM "store_sales" AS "store_sales" JOIN "date_dim_2" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" GROUP BY "store_sales"."ss_customer_sk", "store_sales"."ss_item_sk" ), "csci" AS ( SELECT "catalog_sales"."cs_bill_customer_sk" AS "customer_sk", "catalog_sales"."cs_item_sk" AS "item_sk" FROM "catalog_sales" AS "catalog_sales" JOIN "date_dim_2" AS "date_dim" ON "catalog_sales"."cs_sold_date_sk" = "date_dim"."d_date_sk" GROUP BY "catalog_sales"."cs_bill_customer_sk", "catalog_sales"."cs_item_sk" ) SELECT SUM( CASE WHEN "csci"."customer_sk" IS NULL AND NOT "ssci"."customer_sk" IS NULL THEN 1 ELSE 0 END ) AS "store_only", SUM( CASE WHEN "ssci"."customer_sk" IS NULL AND NOT "csci"."customer_sk" IS NULL THEN 1 ELSE 0 END ) AS "catalog_only", SUM( CASE WHEN NOT "csci"."customer_sk" IS NULL AND NOT "ssci"."customer_sk" IS NULL THEN 1 ELSE 0 END ) AS "store_and_catalog" FROM "ssci" AS "ssci" FULL JOIN "csci" AS "csci" ON "csci"."customer_sk" = "ssci"."customer_sk" AND "csci"."item_sk" = "ssci"."item_sk" LIMIT 100; -------------------------------------- -- TPC-DS 98 -------------------------------------- SELECT i_item_id, i_item_desc, i_category, i_class, i_current_price, Sum(ss_ext_sales_price) AS itemrevenue, Sum(ss_ext_sales_price) * 100 / Sum(Sum(ss_ext_sales_price)) OVER ( PARTITION BY i_class) AS revenueratio FROM store_sales, item, date_dim WHERE ss_item_sk = i_item_sk AND i_category IN ( 'Men', 'Home', 'Electronics' ) AND ss_sold_date_sk = d_date_sk AND d_date BETWEEN CAST('2000-05-18' AS DATE) AND ( CAST('2000-05-18' AS DATE) + INTERVAL '30' DAY ) GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio; SELECT "item"."i_item_id" AS "i_item_id", "item"."i_item_desc" AS "i_item_desc", "item"."i_category" AS "i_category", "item"."i_class" AS "i_class", "item"."i_current_price" AS "i_current_price", SUM("store_sales"."ss_ext_sales_price") AS "itemrevenue", SUM("store_sales"."ss_ext_sales_price") * 100 / SUM(SUM("store_sales"."ss_ext_sales_price")) OVER (PARTITION BY "item"."i_class") AS "revenueratio" FROM "store_sales" AS "store_sales" JOIN "date_dim" AS "date_dim" ON "date_dim"."d_date_sk" = "store_sales"."ss_sold_date_sk" AND CAST("date_dim"."d_date" AS DATE) <= CAST('2000-06-17' AS DATE) AND CAST("date_dim"."d_date" AS DATE) >= CAST('2000-05-18' AS DATE) JOIN "item" AS "item" ON "item"."i_category" IN ('Men', 'Home', 'Electronics') AND "item"."i_item_sk" = "store_sales"."ss_item_sk" GROUP BY "item"."i_item_id", "item"."i_item_desc", "item"."i_category", "item"."i_class", "item"."i_current_price" ORDER BY "i_category", "i_class", "i_item_id", "i_item_desc", "revenueratio"; -------------------------------------- -- TPC-DS 99 -------------------------------------- # execute: true SELECT SUBSTRING(w_warehouse_name, 1, 20) AS "_col_0", sm_type, cc_name, Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk <= 30 ) THEN 1 ELSE 0 END) AS "30 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 30 ) AND ( cs_ship_date_sk - cs_sold_date_sk <= 60 ) THEN 1 ELSE 0 END) AS "31-60 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 60 ) AND ( cs_ship_date_sk - cs_sold_date_sk <= 90 ) THEN 1 ELSE 0 END) AS "61-90 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 90 ) AND ( cs_ship_date_sk - cs_sold_date_sk <= 120 ) THEN 1 ELSE 0 END) AS "91-120 days", Sum(CASE WHEN ( cs_ship_date_sk - cs_sold_date_sk > 120 ) THEN 1 ELSE 0 END) AS ">120 days" FROM catalog_sales, warehouse, ship_mode, call_center, date_dim WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 AND cs_ship_date_sk = d_date_sk AND cs_warehouse_sk = w_warehouse_sk AND cs_ship_mode_sk = sm_ship_mode_sk AND cs_call_center_sk = cc_call_center_sk GROUP BY SUBSTRING(w_warehouse_name, 1, 20), sm_type, cc_name ORDER BY SUBSTRING(w_warehouse_name, 1, 20), sm_type, cc_name LIMIT 100; SELECT SUBSTRING("warehouse"."w_warehouse_name", 1, 20) AS "_col_0", "ship_mode"."sm_type" AS "sm_type", "call_center"."cc_name" AS "cc_name", SUM( CASE WHEN "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" <= 30 THEN 1 ELSE 0 END ) AS "30 days", SUM( CASE WHEN "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" <= 60 AND "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" > 30 THEN 1 ELSE 0 END ) AS "31-60 days", SUM( CASE WHEN "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" <= 90 AND "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" > 60 THEN 1 ELSE 0 END ) AS "61-90 days", SUM( CASE WHEN "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" <= 120 AND "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" > 90 THEN 1 ELSE 0 END ) AS "91-120 days", SUM( CASE WHEN "catalog_sales"."cs_ship_date_sk" - "catalog_sales"."cs_sold_date_sk" > 120 THEN 1 ELSE 0 END ) AS ">120 days" FROM "catalog_sales" AS "catalog_sales" JOIN "call_center" AS "call_center" ON "call_center"."cc_call_center_sk" = "catalog_sales"."cs_call_center_sk" JOIN "date_dim" AS "date_dim" ON "catalog_sales"."cs_ship_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_month_seq" <= 1211 AND "date_dim"."d_month_seq" >= 1200 JOIN "ship_mode" AS "ship_mode" ON "catalog_sales"."cs_ship_mode_sk" = "ship_mode"."sm_ship_mode_sk" JOIN "warehouse" AS "warehouse" ON "catalog_sales"."cs_warehouse_sk" = "warehouse"."w_warehouse_sk" GROUP BY SUBSTRING("warehouse"."w_warehouse_name", 1, 20), "ship_mode"."sm_type", "call_center"."cc_name" ORDER BY "_col_0", "sm_type", "cc_name" LIMIT 100; ================================================ FILE: tests/fixtures/optimizer/tpc-h/tpc-h.sql ================================================ -------------------------------------- -- TPC-H 1 -------------------------------------- select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where CAST(l_shipdate AS DATE) <= date '1998-12-01' - interval '90' day group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus; SELECT "lineitem"."l_returnflag" AS "l_returnflag", "lineitem"."l_linestatus" AS "l_linestatus", SUM("lineitem"."l_quantity") AS "sum_qty", SUM("lineitem"."l_extendedprice") AS "sum_base_price", SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "sum_disc_price", SUM( "lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" ) * ( 1 + "lineitem"."l_tax" ) ) AS "sum_charge", AVG("lineitem"."l_quantity") AS "avg_qty", AVG("lineitem"."l_extendedprice") AS "avg_price", AVG("lineitem"."l_discount") AS "avg_disc", COUNT(*) AS "count_order" FROM "lineitem" AS "lineitem" WHERE CAST("lineitem"."l_shipdate" AS DATE) <= CAST('1998-09-02' AS DATE) GROUP BY "lineitem"."l_returnflag", "lineitem"."l_linestatus" ORDER BY "l_returnflag", "l_linestatus"; -------------------------------------- -- TPC-H 2 -------------------------------------- select s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment from part, supplier, partsupp, nation, region where p_partkey = ps_partkey and s_suppkey = ps_suppkey and p_size = 15 and p_type like '%BRASS' and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' and ps_supplycost = ( select min(ps_supplycost) from partsupp, supplier, nation, region where p_partkey = ps_partkey and s_suppkey = ps_suppkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'EUROPE' ) order by s_acctbal desc, n_name, s_name, p_partkey limit 100; WITH "partsupp_2" AS ( SELECT "partsupp"."ps_partkey" AS "ps_partkey", "partsupp"."ps_suppkey" AS "ps_suppkey", "partsupp"."ps_supplycost" AS "ps_supplycost" FROM "partsupp" AS "partsupp" ), "region_2" AS ( SELECT "region"."r_regionkey" AS "r_regionkey", "region"."r_name" AS "r_name" FROM "region" AS "region" WHERE "region"."r_name" = 'EUROPE' ), "_u_0" AS ( SELECT MIN("partsupp"."ps_supplycost") AS "_col_0", "partsupp"."ps_partkey" AS "_u_1" FROM "partsupp_2" AS "partsupp" JOIN "supplier" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" JOIN "nation" AS "nation" ON "nation"."n_nationkey" = "supplier"."s_nationkey" JOIN "region_2" AS "region" ON "nation"."n_regionkey" = "region"."r_regionkey" GROUP BY "partsupp"."ps_partkey" ) SELECT "supplier"."s_acctbal" AS "s_acctbal", "supplier"."s_name" AS "s_name", "nation"."n_name" AS "n_name", "part"."p_partkey" AS "p_partkey", "part"."p_mfgr" AS "p_mfgr", "supplier"."s_address" AS "s_address", "supplier"."s_phone" AS "s_phone", "supplier"."s_comment" AS "s_comment" FROM "part" AS "part" CROSS JOIN "supplier" AS "supplier" JOIN "partsupp_2" AS "partsupp" ON "part"."p_partkey" = "partsupp"."ps_partkey" AND "partsupp"."ps_suppkey" = "supplier"."s_suppkey" JOIN "nation" AS "nation" ON "nation"."n_nationkey" = "supplier"."s_nationkey" JOIN "region_2" AS "region" ON "nation"."n_regionkey" = "region"."r_regionkey" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "part"."p_partkey" WHERE "_u_0"."_col_0" = "partsupp"."ps_supplycost" AND "part"."p_size" = 15 AND "part"."p_type" LIKE '%BRASS' ORDER BY "s_acctbal" DESC, "n_name", "s_name", "p_partkey" LIMIT 100; -------------------------------------- -- TPC-H 3 -------------------------------------- select l_orderkey, sum(l_extendedprice * (1 - l_discount)) as revenue, CAST(o_orderdate AS STRING) AS o_orderdate, o_shippriority from customer, orders, lineitem where c_mktsegment = 'BUILDING' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < '1995-03-15' and l_shipdate > '1995-03-15' group by l_orderkey, o_orderdate, o_shippriority order by revenue desc, o_orderdate limit 10; SELECT "lineitem"."l_orderkey" AS "l_orderkey", SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "revenue", "orders"."o_orderdate" AS "o_orderdate", "orders"."o_shippriority" AS "o_shippriority" FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" AND "orders"."o_orderdate" < '1995-03-15' JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_shipdate" > '1995-03-15' WHERE "customer"."c_mktsegment" = 'BUILDING' GROUP BY "lineitem"."l_orderkey", "orders"."o_orderdate", "orders"."o_shippriority" ORDER BY "revenue" DESC, "o_orderdate" LIMIT 10; -------------------------------------- -- TPC-H 4 -------------------------------------- select o_orderpriority, count(*) as order_count from orders where CAST(o_orderdate AS DATE) >= date '1993-07-01' and CAST(o_orderdate AS DATE) < date '1993-07-01' + interval '3' month and exists ( select * from lineitem where l_orderkey = o_orderkey and l_commitdate < l_receiptdate ) group by o_orderpriority order by o_orderpriority; WITH "_u_0" AS ( SELECT "lineitem"."l_orderkey" AS "l_orderkey" FROM "lineitem" AS "lineitem" WHERE "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" GROUP BY "lineitem"."l_orderkey" ) SELECT "orders"."o_orderpriority" AS "o_orderpriority", COUNT(*) AS "order_count" FROM "orders" AS "orders" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."l_orderkey" = "orders"."o_orderkey" WHERE CAST("orders"."o_orderdate" AS DATE) < CAST('1993-10-01' AS DATE) AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1993-07-01' AS DATE) AND NOT "_u_0"."l_orderkey" IS NULL GROUP BY "orders"."o_orderpriority" ORDER BY "o_orderpriority"; -------------------------------------- -- TPC-H 5 -------------------------------------- select n_name, sum(l_extendedprice * (1 - l_discount)) as revenue from customer, orders, lineitem, supplier, nation, region where c_custkey = o_custkey and l_orderkey = o_orderkey and l_suppkey = s_suppkey and c_nationkey = s_nationkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'ASIA' and CAST(o_orderdate AS DATE) >= date '1994-01-01' and CAST(o_orderdate AS DATE) < date '1994-01-01' + interval '1' year group by n_name order by revenue desc; SELECT "nation"."n_name" AS "n_name", SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "revenue" FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" AND CAST("orders"."o_orderdate" AS DATE) < CAST('1995-01-01' AS DATE) AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1994-01-01' AS DATE) JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" JOIN "supplier" AS "supplier" ON "customer"."c_nationkey" = "supplier"."s_nationkey" AND "lineitem"."l_suppkey" = "supplier"."s_suppkey" JOIN "nation" AS "nation" ON "nation"."n_nationkey" = "supplier"."s_nationkey" JOIN "region" AS "region" ON "nation"."n_regionkey" = "region"."r_regionkey" AND "region"."r_name" = 'ASIA' GROUP BY "nation"."n_name" ORDER BY "revenue" DESC; -------------------------------------- -- TPC-H 6 -------------------------------------- select sum(l_extendedprice * l_discount) as revenue from lineitem where CAST(l_shipdate AS DATE) >= date '1994-01-01' and CAST(l_shipdate AS DATE) < date '1994-01-01' + interval '1' year and l_discount between 0.06 - 0.01 and 0.06 + 0.01 and l_quantity < 24; SELECT SUM("lineitem"."l_extendedprice" * "lineitem"."l_discount") AS "revenue" FROM "lineitem" AS "lineitem" WHERE "lineitem"."l_discount" <= 0.07 AND "lineitem"."l_discount" >= 0.05 AND "lineitem"."l_quantity" < 24 AND CAST("lineitem"."l_shipdate" AS DATE) < CAST('1995-01-01' AS DATE) AND CAST("lineitem"."l_shipdate" AS DATE) >= CAST('1994-01-01' AS DATE); -------------------------------------- -- TPC-H 7 -------------------------------------- select supp_nation, cust_nation, l_year, sum(volume) as revenue from ( select n1.n_name as supp_nation, n2.n_name as cust_nation, extract(year from cast(l_shipdate as date)) as l_year, l_extendedprice * (1 - l_discount) as volume from supplier, lineitem, orders, customer, nation n1, nation n2 where s_suppkey = l_suppkey and o_orderkey = l_orderkey and c_custkey = o_custkey and s_nationkey = n1.n_nationkey and c_nationkey = n2.n_nationkey and ( (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') ) and CAST(l_shipdate AS DATE) between date '1995-01-01' and date '1996-12-31' ) as shipping group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; SELECT "n1"."n_name" AS "supp_nation", "n2"."n_name" AS "cust_nation", EXTRACT(YEAR FROM CAST("lineitem"."l_shipdate" AS DATE)) AS "l_year", SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "revenue" FROM "supplier" AS "supplier" JOIN "lineitem" AS "lineitem" ON "lineitem"."l_suppkey" = "supplier"."s_suppkey" AND CAST("lineitem"."l_shipdate" AS DATE) <= CAST('1996-12-31' AS DATE) AND CAST("lineitem"."l_shipdate" AS DATE) >= CAST('1995-01-01' AS DATE) JOIN "nation" AS "n1" ON ( "n1"."n_name" = 'FRANCE' OR "n1"."n_name" = 'GERMANY' ) AND "n1"."n_nationkey" = "supplier"."s_nationkey" JOIN "orders" AS "orders" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" JOIN "customer" AS "customer" ON "customer"."c_custkey" = "orders"."o_custkey" JOIN "nation" AS "n2" ON "customer"."c_nationkey" = "n2"."n_nationkey" AND ( "n1"."n_name" = 'FRANCE' OR "n2"."n_name" = 'FRANCE' ) AND ( "n1"."n_name" = 'GERMANY' OR "n2"."n_name" = 'GERMANY' ) AND ( "n2"."n_name" = 'FRANCE' OR "n2"."n_name" = 'GERMANY' ) GROUP BY "n1"."n_name", "n2"."n_name", EXTRACT(YEAR FROM CAST("lineitem"."l_shipdate" AS DATE)) ORDER BY "supp_nation", "cust_nation", "l_year"; -------------------------------------- -- TPC-H 8 -------------------------------------- select o_year, sum(case when nation = 'BRAZIL' then volume else 0 end) / sum(volume) as mkt_share from ( select extract(YEAR from cast(o_orderdate as date)) as o_year, l_extendedprice * (1 - l_discount) as volume, n2.n_name as nation from part, supplier, lineitem, orders, customer, nation n1, nation n2, region where p_partkey = l_partkey and s_suppkey = l_suppkey and l_orderkey = o_orderkey and o_custkey = c_custkey and c_nationkey = n1.n_nationkey and n1.n_regionkey = r_regionkey and r_name = 'AMERICA' and s_nationkey = n2.n_nationkey and CAST(o_orderdate AS DATE) between date '1995-01-01' and date '1996-12-31' and p_type = 'ECONOMY ANODIZED STEEL' ) as all_nations group by o_year order by o_year; SELECT EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE)) AS "o_year", SUM( CASE WHEN "n2"."n_name" = 'BRAZIL' THEN "lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" ) ELSE 0 END ) / SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "mkt_share" FROM "part" AS "part" JOIN "lineitem" AS "lineitem" ON "lineitem"."l_partkey" = "part"."p_partkey" JOIN "orders" AS "orders" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND CAST("orders"."o_orderdate" AS DATE) <= CAST('1996-12-31' AS DATE) AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1995-01-01' AS DATE) JOIN "supplier" AS "supplier" ON "lineitem"."l_suppkey" = "supplier"."s_suppkey" JOIN "customer" AS "customer" ON "customer"."c_custkey" = "orders"."o_custkey" JOIN "nation" AS "n2" ON "n2"."n_nationkey" = "supplier"."s_nationkey" JOIN "nation" AS "n1" ON "customer"."c_nationkey" = "n1"."n_nationkey" JOIN "region" AS "region" ON "n1"."n_regionkey" = "region"."r_regionkey" AND "region"."r_name" = 'AMERICA' WHERE "part"."p_type" = 'ECONOMY ANODIZED STEEL' GROUP BY EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE)) ORDER BY "o_year"; -------------------------------------- -- TPC-H 9 -------------------------------------- select nation, o_year, sum(amount) as sum_profit from ( select n_name as nation, extract(year from cast(o_orderdate as date)) as o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from part, supplier, lineitem, partsupp, orders, nation where s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey and p_partkey = l_partkey and o_orderkey = l_orderkey and s_nationkey = n_nationkey and p_name like '%green%' ) as profit group by nation, o_year order by nation, o_year desc; SELECT "nation"."n_name" AS "nation", EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE)) AS "o_year", SUM( "lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" ) - "partsupp"."ps_supplycost" * "lineitem"."l_quantity" ) AS "sum_profit" FROM "part" AS "part" JOIN "lineitem" AS "lineitem" ON "lineitem"."l_partkey" = "part"."p_partkey" JOIN "orders" AS "orders" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" JOIN "partsupp" AS "partsupp" ON "lineitem"."l_partkey" = "partsupp"."ps_partkey" AND "lineitem"."l_suppkey" = "partsupp"."ps_suppkey" JOIN "supplier" AS "supplier" ON "lineitem"."l_suppkey" = "supplier"."s_suppkey" JOIN "nation" AS "nation" ON "nation"."n_nationkey" = "supplier"."s_nationkey" WHERE "part"."p_name" LIKE '%green%' GROUP BY "nation"."n_name", EXTRACT(YEAR FROM CAST("orders"."o_orderdate" AS DATE)) ORDER BY "nation", "o_year" DESC; -------------------------------------- -- TPC-H 10 -------------------------------------- select c_custkey, c_name, sum(l_extendedprice * (1 - l_discount)) as revenue, c_acctbal, n_name, c_address, c_phone, c_comment from customer, orders, lineitem, nation where c_custkey = o_custkey and l_orderkey = o_orderkey and CAST(o_orderdate AS DATE) >= date '1993-10-01' and CAST(o_orderdate AS DATE) < date '1993-10-01' + interval '3' month and l_returnflag = 'R' and c_nationkey = n_nationkey group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment order by revenue desc limit 20; SELECT "customer"."c_custkey" AS "c_custkey", "customer"."c_name" AS "c_name", SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "revenue", "customer"."c_acctbal" AS "c_acctbal", "nation"."n_name" AS "n_name", "customer"."c_address" AS "c_address", "customer"."c_phone" AS "c_phone", "customer"."c_comment" AS "c_comment" FROM "customer" AS "customer" JOIN "nation" AS "nation" ON "customer"."c_nationkey" = "nation"."n_nationkey" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" AND CAST("orders"."o_orderdate" AS DATE) < CAST('1994-01-01' AS DATE) AND CAST("orders"."o_orderdate" AS DATE) >= CAST('1993-10-01' AS DATE) JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_returnflag" = 'R' GROUP BY "customer"."c_custkey", "customer"."c_name", "customer"."c_acctbal", "customer"."c_phone", "nation"."n_name", "customer"."c_address", "customer"."c_comment" ORDER BY "revenue" DESC LIMIT 20; -------------------------------------- -- TPC-H 11 -------------------------------------- select ps_partkey, sum(ps_supplycost * ps_availqty) as value from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY' group by ps_partkey having sum(ps_supplycost * ps_availqty) > ( select sum(ps_supplycost * ps_availqty) * 0.0001 from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY' ) order by value desc; WITH "supplier_2" AS ( SELECT "supplier"."s_suppkey" AS "s_suppkey", "supplier"."s_nationkey" AS "s_nationkey" FROM "supplier" AS "supplier" ), "nation_2" AS ( SELECT "nation"."n_nationkey" AS "n_nationkey", "nation"."n_name" AS "n_name" FROM "nation" AS "nation" WHERE "nation"."n_name" = 'GERMANY' ), "_u_0" AS ( SELECT SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") * 0.0001 AS "_col_0" FROM "partsupp" AS "partsupp" JOIN "supplier_2" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" JOIN "nation_2" AS "nation" ON "nation"."n_nationkey" = "supplier"."s_nationkey" ) SELECT "partsupp"."ps_partkey" AS "ps_partkey", SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") AS "value" FROM "partsupp" AS "partsupp" CROSS JOIN "_u_0" AS "_u_0" JOIN "supplier_2" AS "supplier" ON "partsupp"."ps_suppkey" = "supplier"."s_suppkey" JOIN "nation_2" AS "nation" ON "nation"."n_nationkey" = "supplier"."s_nationkey" GROUP BY "partsupp"."ps_partkey" HAVING MAX("_u_0"."_col_0") < SUM("partsupp"."ps_supplycost" * "partsupp"."ps_availqty") ORDER BY "value" DESC; -------------------------------------- -- TPC-H 12 -------------------------------------- select l_shipmode, sum(case when o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH' then 1 else 0 end) as high_line_count, sum(case when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count from orders, lineitem where o_orderkey = l_orderkey and l_shipmode in ('MAIL', 'SHIP') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate and CAST(l_receiptdate AS DATE) >= date '1994-01-01' and CAST(l_receiptdate AS DATE) < date '1994-01-01' + interval '1' year group by l_shipmode order by l_shipmode; SELECT "lineitem"."l_shipmode" AS "l_shipmode", SUM( CASE WHEN "orders"."o_orderpriority" = '1-URGENT' OR "orders"."o_orderpriority" = '2-HIGH' THEN 1 ELSE 0 END ) AS "high_line_count", SUM( CASE WHEN "orders"."o_orderpriority" <> '1-URGENT' AND "orders"."o_orderpriority" <> '2-HIGH' THEN 1 ELSE 0 END ) AS "low_line_count" FROM "orders" AS "orders" JOIN "lineitem" AS "lineitem" ON "lineitem"."l_commitdate" < "lineitem"."l_receiptdate" AND "lineitem"."l_commitdate" > "lineitem"."l_shipdate" AND "lineitem"."l_orderkey" = "orders"."o_orderkey" AND "lineitem"."l_shipmode" IN ('MAIL', 'SHIP') AND CAST("lineitem"."l_receiptdate" AS DATE) < CAST('1995-01-01' AS DATE) AND CAST("lineitem"."l_receiptdate" AS DATE) >= CAST('1994-01-01' AS DATE) GROUP BY "lineitem"."l_shipmode" ORDER BY "l_shipmode"; -------------------------------------- -- TPC-H 13 -------------------------------------- select c_count, count(*) as custdist from ( select c_custkey, count(o_orderkey) from customer left outer join orders on c_custkey = o_custkey and o_comment not like '%special%requests%' group by c_custkey ) as c_orders (c_custkey, c_count) group by c_count order by custdist desc, c_count desc; WITH "orders_2" AS ( SELECT "orders"."o_orderkey" AS "o_orderkey", "orders"."o_custkey" AS "o_custkey", "orders"."o_comment" AS "o_comment" FROM "orders" AS "orders" WHERE NOT "orders"."o_comment" LIKE '%special%requests%' ), "c_orders" AS ( SELECT COUNT("orders"."o_orderkey") AS "c_count" FROM "customer" AS "customer" LEFT JOIN "orders_2" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" GROUP BY "customer"."c_custkey" ) SELECT "c_orders"."c_count" AS "c_count", COUNT(*) AS "custdist" FROM "c_orders" AS "c_orders" GROUP BY "c_orders"."c_count" ORDER BY "custdist" DESC, "c_count" DESC; -------------------------------------- -- TPC-H 14 -------------------------------------- select 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part where l_partkey = p_partkey and CAST(l_shipdate AS DATE) >= date '1995-09-01' and CAST(l_shipdate AS DATE) < date '1995-09-01' + interval '1' month; SELECT 100.00 * SUM( CASE WHEN "part"."p_type" LIKE 'PROMO%' THEN "lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" ) ELSE 0 END ) / SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "promo_revenue" FROM "lineitem" AS "lineitem" JOIN "part" AS "part" ON "lineitem"."l_partkey" = "part"."p_partkey" WHERE CAST("lineitem"."l_shipdate" AS DATE) < CAST('1995-10-01' AS DATE) AND CAST("lineitem"."l_shipdate" AS DATE) >= CAST('1995-09-01' AS DATE); -------------------------------------- -- TPC-H 15 -------------------------------------- with revenue (supplier_no, total_revenue) as ( select l_suppkey, sum(l_extendedprice * (1 - l_discount)) from lineitem where CAST(l_shipdate AS DATE) >= date '1996-01-01' and CAST(l_shipdate AS DATE) < date '1996-01-01' + interval '3' month group by l_suppkey) select s_suppkey, s_name, s_address, s_phone, total_revenue from supplier, revenue where s_suppkey = supplier_no and total_revenue = ( select max(total_revenue) from revenue ) order by s_suppkey; WITH "revenue" AS ( SELECT "lineitem"."l_suppkey" AS "supplier_no", SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "total_revenue" FROM "lineitem" AS "lineitem" WHERE CAST("lineitem"."l_shipdate" AS DATE) < CAST('1996-04-01' AS DATE) AND CAST("lineitem"."l_shipdate" AS DATE) >= CAST('1996-01-01' AS DATE) GROUP BY "lineitem"."l_suppkey" ), "_u_0" AS ( SELECT MAX("revenue"."total_revenue") AS "_col_0" FROM "revenue" AS "revenue" ) SELECT "supplier"."s_suppkey" AS "s_suppkey", "supplier"."s_name" AS "s_name", "supplier"."s_address" AS "s_address", "supplier"."s_phone" AS "s_phone", "revenue"."total_revenue" AS "total_revenue" FROM "supplier" AS "supplier" JOIN "revenue" AS "revenue" ON "revenue"."supplier_no" = "supplier"."s_suppkey" JOIN "_u_0" AS "_u_0" ON "_u_0"."_col_0" = "revenue"."total_revenue" ORDER BY "s_suppkey"; -------------------------------------- -- TPC-H 16 -------------------------------------- select p_brand, p_type, p_size, count(distinct ps_suppkey) as supplier_cnt from partsupp, part where p_partkey = ps_partkey and p_brand <> 'Brand#45' and p_type not like 'MEDIUM POLISHED%' and p_size in (49, 14, 23, 45, 19, 3, 36, 9) and ps_suppkey not in ( select s_suppkey from supplier where s_comment like '%Customer%Complaints%' ) group by p_brand, p_type, p_size order by supplier_cnt desc, p_brand, p_type, p_size; WITH "_u_0" AS ( SELECT "supplier"."s_suppkey" AS "s_suppkey" FROM "supplier" AS "supplier" WHERE "supplier"."s_comment" LIKE '%Customer%Complaints%' GROUP BY "supplier"."s_suppkey" ) SELECT "part"."p_brand" AS "p_brand", "part"."p_type" AS "p_type", "part"."p_size" AS "p_size", COUNT(DISTINCT "partsupp"."ps_suppkey") AS "supplier_cnt" FROM "partsupp" AS "partsupp" JOIN "part" AS "part" ON "part"."p_brand" <> 'Brand#45' AND "part"."p_partkey" = "partsupp"."ps_partkey" AND "part"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) AND NOT "part"."p_type" LIKE 'MEDIUM POLISHED%' LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."s_suppkey" = "partsupp"."ps_suppkey" WHERE "_u_0"."s_suppkey" IS NULL GROUP BY "part"."p_brand", "part"."p_type", "part"."p_size" ORDER BY "supplier_cnt" DESC, "p_brand", "p_type", "p_size"; -------------------------------------- -- TPC-H 17 -------------------------------------- select sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, part where p_partkey = l_partkey and p_brand = 'Brand#23' and p_container = 'MED BOX' and l_quantity < ( select 0.2 * avg(l_quantity) from lineitem where l_partkey = p_partkey ); WITH "_u_0" AS ( SELECT 0.2 * AVG("lineitem"."l_quantity") AS "_col_0", "lineitem"."l_partkey" AS "_u_1" FROM "lineitem" AS "lineitem" GROUP BY "lineitem"."l_partkey" ) SELECT SUM("lineitem"."l_extendedprice") / 7.0 AS "avg_yearly" FROM "lineitem" AS "lineitem" JOIN "part" AS "part" ON "lineitem"."l_partkey" = "part"."p_partkey" AND "part"."p_brand" = 'Brand#23' AND "part"."p_container" = 'MED BOX' LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."_u_1" = "part"."p_partkey" WHERE "_u_0"."_col_0" > "lineitem"."l_quantity"; -------------------------------------- -- TPC-H 18 -------------------------------------- select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) total_quantity from customer, orders, lineitem where o_orderkey in ( select l_orderkey from lineitem group by l_orderkey having sum(l_quantity) > 300 ) and c_custkey = o_custkey and o_orderkey = l_orderkey group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate limit 100; WITH "_u_0" AS ( SELECT "lineitem"."l_orderkey" AS "l_orderkey" FROM "lineitem" AS "lineitem" GROUP BY "lineitem"."l_orderkey" HAVING SUM("lineitem"."l_quantity") > 300 ) SELECT "customer"."c_name" AS "c_name", "customer"."c_custkey" AS "c_custkey", "orders"."o_orderkey" AS "o_orderkey", "orders"."o_orderdate" AS "o_orderdate", "orders"."o_totalprice" AS "o_totalprice", SUM("lineitem"."l_quantity") AS "total_quantity" FROM "customer" AS "customer" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" JOIN "lineitem" AS "lineitem" ON "lineitem"."l_orderkey" = "orders"."o_orderkey" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."l_orderkey" = "orders"."o_orderkey" WHERE NOT "_u_0"."l_orderkey" IS NULL GROUP BY "customer"."c_name", "customer"."c_custkey", "orders"."o_orderkey", "orders"."o_orderdate", "orders"."o_totalprice" ORDER BY "o_totalprice" DESC, "o_orderdate" LIMIT 100; -------------------------------------- -- TPC-H 19 -------------------------------------- select sum(l_extendedprice* (1 - l_discount)) as revenue from lineitem, part where ( p_partkey = l_partkey and p_brand = 'Brand#12' and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') and l_quantity >= 1 and l_quantity <= 11 and p_size between 1 and 5 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ) or ( p_partkey = l_partkey and p_brand = 'Brand#23' and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') and l_quantity >= 10 and l_quantity <= 20 and p_size between 1 and 10 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ) or ( p_partkey = l_partkey and p_brand = 'Brand#34' and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') and l_quantity >= 20 and l_quantity <= 30 and p_size between 1 and 15 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON' ); SELECT SUM("lineitem"."l_extendedprice" * ( 1 - "lineitem"."l_discount" )) AS "revenue" FROM "lineitem" AS "lineitem" JOIN "part" AS "part" ON ( "lineitem"."l_partkey" = "part"."p_partkey" AND "lineitem"."l_quantity" <= 11 AND "lineitem"."l_quantity" >= 1 AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON' AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG') AND "part"."p_brand" = 'Brand#12' AND "part"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND "part"."p_size" <= 5 AND "part"."p_size" >= 1 ) OR ( "lineitem"."l_partkey" = "part"."p_partkey" AND "lineitem"."l_quantity" <= 20 AND "lineitem"."l_quantity" >= 10 AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON' AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG') AND "part"."p_brand" = 'Brand#23' AND "part"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND "part"."p_size" <= 10 AND "part"."p_size" >= 1 ) OR ( "lineitem"."l_partkey" = "part"."p_partkey" AND "lineitem"."l_quantity" <= 30 AND "lineitem"."l_quantity" >= 20 AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON' AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG') AND "part"."p_brand" = 'Brand#34' AND "part"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND "part"."p_size" <= 15 AND "part"."p_size" >= 1 ) WHERE ( "lineitem"."l_partkey" = "part"."p_partkey" AND "lineitem"."l_quantity" <= 11 AND "lineitem"."l_quantity" >= 1 AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON' AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG') AND "part"."p_brand" = 'Brand#12' AND "part"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') AND "part"."p_size" <= 5 AND "part"."p_size" >= 1 ) OR ( "lineitem"."l_partkey" = "part"."p_partkey" AND "lineitem"."l_quantity" <= 20 AND "lineitem"."l_quantity" >= 10 AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON' AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG') AND "part"."p_brand" = 'Brand#23' AND "part"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') AND "part"."p_size" <= 10 AND "part"."p_size" >= 1 ) OR ( "lineitem"."l_partkey" = "part"."p_partkey" AND "lineitem"."l_quantity" <= 30 AND "lineitem"."l_quantity" >= 20 AND "lineitem"."l_shipinstruct" = 'DELIVER IN PERSON' AND "lineitem"."l_shipmode" IN ('AIR', 'AIR REG') AND "part"."p_brand" = 'Brand#34' AND "part"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') AND "part"."p_size" <= 15 AND "part"."p_size" >= 1 ); -------------------------------------- -- TPC-H 20 -------------------------------------- select s_name, s_address from supplier, nation where s_suppkey in ( select ps_suppkey from partsupp where ps_partkey in ( select p_partkey from part where p_name like 'forest%' ) and ps_availqty > ( select 0.5 * sum(l_quantity) from lineitem where l_partkey = ps_partkey and l_suppkey = ps_suppkey and CAST(l_shipdate AS DATE) >= date '1994-01-01' and CAST(l_shipdate AS DATE) < date '1994-01-01' + interval '1' year ) ) and s_nationkey = n_nationkey and n_name = 'CANADA' order by s_name; WITH "_u_0" AS ( SELECT "part"."p_partkey" AS "p_partkey" FROM "part" AS "part" WHERE "part"."p_name" LIKE 'forest%' GROUP BY "part"."p_partkey" ), "_u_1" AS ( SELECT 0.5 * SUM("lineitem"."l_quantity") AS "_col_0", "lineitem"."l_partkey" AS "_u_2", "lineitem"."l_suppkey" AS "_u_3" FROM "lineitem" AS "lineitem" WHERE CAST("lineitem"."l_shipdate" AS DATE) < CAST('1995-01-01' AS DATE) AND CAST("lineitem"."l_shipdate" AS DATE) >= CAST('1994-01-01' AS DATE) GROUP BY "lineitem"."l_partkey", "lineitem"."l_suppkey" ), "_u_4" AS ( SELECT "partsupp"."ps_suppkey" AS "ps_suppkey" FROM "partsupp" AS "partsupp" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."p_partkey" = "partsupp"."ps_partkey" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."_u_2" = "partsupp"."ps_partkey" AND "_u_1"."_u_3" = "partsupp"."ps_suppkey" WHERE "_u_1"."_col_0" < "partsupp"."ps_availqty" AND NOT "_u_0"."p_partkey" IS NULL GROUP BY "partsupp"."ps_suppkey" ) SELECT "supplier"."s_name" AS "s_name", "supplier"."s_address" AS "s_address" FROM "supplier" AS "supplier" JOIN "nation" AS "nation" ON "nation"."n_name" = 'CANADA' AND "nation"."n_nationkey" = "supplier"."s_nationkey" LEFT JOIN "_u_4" AS "_u_4" ON "_u_4"."ps_suppkey" = "supplier"."s_suppkey" WHERE NOT "_u_4"."ps_suppkey" IS NULL ORDER BY "s_name"; -------------------------------------- -- TPC-H 21 -------------------------------------- select s_name, count(*) as numwait from supplier, lineitem l1, orders, nation where s_suppkey = l1.l_suppkey and o_orderkey = l1.l_orderkey and o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists ( select * from lineitem l2 where l2.l_orderkey = l1.l_orderkey and l2.l_suppkey <> l1.l_suppkey ) and not exists ( select * from lineitem l3 where l3.l_orderkey = l1.l_orderkey and l3.l_suppkey <> l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate ) and s_nationkey = n_nationkey and n_name = 'SAUDI ARABIA' group by s_name order by numwait desc, s_name limit 100; WITH "_u_0" AS ( SELECT "l2"."l_orderkey" AS "l_orderkey", ARRAY_AGG("l2"."l_suppkey") AS "_u_1" FROM "lineitem" AS "l2" GROUP BY "l2"."l_orderkey" ), "_u_2" AS ( SELECT "l3"."l_orderkey" AS "l_orderkey", ARRAY_AGG("l3"."l_suppkey") AS "_u_3" FROM "lineitem" AS "l3" WHERE "l3"."l_commitdate" < "l3"."l_receiptdate" GROUP BY "l3"."l_orderkey" ) SELECT "supplier"."s_name" AS "s_name", COUNT(*) AS "numwait" FROM "supplier" AS "supplier" JOIN "lineitem" AS "l1" ON "l1"."l_commitdate" < "l1"."l_receiptdate" AND "l1"."l_suppkey" = "supplier"."s_suppkey" JOIN "orders" AS "orders" ON "l1"."l_orderkey" = "orders"."o_orderkey" AND "orders"."o_orderstatus" = 'F' JOIN "nation" AS "nation" ON "nation"."n_name" = 'SAUDI ARABIA' AND "nation"."n_nationkey" = "supplier"."s_nationkey" LEFT JOIN "_u_0" AS "_u_0" ON "_u_0"."l_orderkey" = "l1"."l_orderkey" LEFT JOIN "_u_2" AS "_u_2" ON "_u_2"."l_orderkey" = "l1"."l_orderkey" WHERE ( "_u_2"."l_orderkey" IS NULL OR NOT ARRAY_ANY("_u_2"."_u_3", "_x" -> "l1"."l_suppkey" <> "_x") ) AND ARRAY_ANY("_u_0"."_u_1", "_x" -> "l1"."l_suppkey" <> "_x") AND NOT "_u_0"."l_orderkey" IS NULL GROUP BY "supplier"."s_name" ORDER BY "numwait" DESC, "s_name" LIMIT 100; -------------------------------------- -- TPC-H 22 -------------------------------------- select cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal from ( select substring(c_phone, 1, 2) as cntrycode, c_acctbal from customer where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') and c_acctbal > ( select avg(c_acctbal) from customer where c_acctbal > 0.00 and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') ) and not exists ( select * from orders where o_custkey = c_custkey ) ) as custsale group by cntrycode order by cntrycode; WITH "_u_0" AS ( SELECT AVG("customer"."c_acctbal") AS "_col_0" FROM "customer" AS "customer" WHERE "customer"."c_acctbal" > 0.00 AND SUBSTRING("customer"."c_phone", 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') ), "_u_1" AS ( SELECT "orders"."o_custkey" AS "_u_2" FROM "orders" AS "orders" GROUP BY "orders"."o_custkey" ) SELECT SUBSTRING("customer"."c_phone", 1, 2) AS "cntrycode", COUNT(*) AS "numcust", SUM("customer"."c_acctbal") AS "totacctbal" FROM "customer" AS "customer" JOIN "_u_0" AS "_u_0" ON "_u_0"."_col_0" < "customer"."c_acctbal" LEFT JOIN "_u_1" AS "_u_1" ON "_u_1"."_u_2" = "customer"."c_custkey" WHERE "_u_1"."_u_2" IS NULL AND SUBSTRING("customer"."c_phone", 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') GROUP BY SUBSTRING("customer"."c_phone", 1, 2) ORDER BY "cntrycode"; ================================================ FILE: tests/fixtures/optimizer/unnest_subqueries.sql ================================================ SELECT * FROM x WHERE x.a = (SELECT SUM(y.a) AS a FROM y); SELECT * FROM x CROSS JOIN (SELECT SUM(y.a) AS a FROM y) AS _u_0 WHERE x.a = _u_0.a; SELECT * FROM x WHERE x.a IN (SELECT y.a AS a FROM y); SELECT * FROM x LEFT JOIN (SELECT y.a AS a FROM y GROUP BY y.a) AS _u_0 ON x.a = _u_0.a WHERE NOT _u_0.a IS NULL; SELECT * FROM x WHERE x.a IN (SELECT y.b AS b FROM y); SELECT * FROM x LEFT JOIN (SELECT y.b AS b FROM y GROUP BY y.b) AS _u_0 ON x.a = _u_0.b WHERE NOT _u_0.b IS NULL; SELECT * FROM x WHERE x.a = ANY (SELECT y.a AS a FROM y); SELECT * FROM x LEFT JOIN (SELECT y.a AS a FROM y GROUP BY y.a) AS _u_0 ON x.a = _u_0.a WHERE NOT _u_0.a IS NULL; SELECT * FROM x WHERE x.a = (SELECT SUM(y.b) AS b FROM y WHERE x.a = y.a); SELECT * FROM x LEFT JOIN (SELECT SUM(y.b) AS b, y.a AS _u_1 FROM y WHERE TRUE GROUP BY y.a) AS _u_0 ON x.a = _u_0._u_1 WHERE x.a = _u_0.b; SELECT * FROM x WHERE x.a > (SELECT SUM(y.b) AS b FROM y WHERE x.a = y.a); SELECT * FROM x LEFT JOIN (SELECT SUM(y.b) AS b, y.a AS _u_1 FROM y WHERE TRUE GROUP BY y.a) AS _u_0 ON x.a = _u_0._u_1 WHERE x.a > _u_0.b; SELECT * FROM x WHERE x.a <> ANY (SELECT y.a AS a FROM y WHERE y.a = x.a); SELECT * FROM x LEFT JOIN (SELECT y.a AS a FROM y WHERE TRUE GROUP BY y.a) AS _u_0 ON _u_0.a = x.a WHERE x.a <> _u_0.a; SELECT * FROM x WHERE x.a NOT IN (SELECT y.a AS a FROM y WHERE y.a = x.a); SELECT * FROM x LEFT JOIN (SELECT y.a AS a FROM y WHERE TRUE GROUP BY y.a) AS _u_0 ON _u_0.a = x.a WHERE NOT x.a = _u_0.a; SELECT * FROM x WHERE x.a IN (SELECT y.a AS a FROM y WHERE y.b = x.a); SELECT * FROM x LEFT JOIN (SELECT ARRAY_AGG(y.a) AS a, y.b AS _u_1 FROM y WHERE TRUE GROUP BY y.b) AS _u_0 ON _u_0._u_1 = x.a WHERE ARRAY_ANY(_u_0.a, _x -> _x = x.a); SELECT * FROM x WHERE x.a < (SELECT SUM(y.a) AS a FROM y WHERE y.a = x.a and y.a = x.b and y.b <> x.d); SELECT * FROM x LEFT JOIN (SELECT SUM(y.a) AS a, y.a AS _u_1, ARRAY_AGG(y.b) AS _u_2 FROM y WHERE TRUE AND TRUE AND TRUE GROUP BY y.a) AS _u_0 ON _u_0._u_1 = x.a AND _u_0._u_1 = x.b WHERE (x.a < _u_0.a AND ARRAY_ANY(_u_0._u_2, _x -> _x <> x.d)); SELECT * FROM x WHERE EXISTS (SELECT y.a AS a, y.b AS b FROM y WHERE x.a = y.a); SELECT * FROM x LEFT JOIN (SELECT y.a AS a FROM y WHERE TRUE GROUP BY y.a) AS _u_0 ON x.a = _u_0.a WHERE NOT _u_0.a IS NULL; SELECT * FROM x WHERE x.a IN (SELECT y.a AS a FROM y LIMIT 10); SELECT * FROM x WHERE x.a IN (SELECT y.a AS a FROM y LIMIT 10); SELECT * FROM x.a WHERE x.a IN (SELECT y.a AS a FROM y OFFSET 10); SELECT * FROM x.a WHERE x.a IN (SELECT y.a AS a FROM y OFFSET 10); SELECT * FROM x.a WHERE x.a IN (SELECT y.a AS a, y.b AS b FROM y); SELECT * FROM x.a WHERE x.a IN (SELECT y.a AS a, y.b AS b FROM y); SELECT * FROM x.a WHERE x.a > ANY (SELECT y.a FROM y); SELECT * FROM x.a WHERE x.a > ANY (SELECT y.a FROM y); SELECT * FROM x WHERE x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a LIMIT 10); SELECT * FROM x WHERE x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a LIMIT 10); SELECT * FROM x WHERE x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a OFFSET 10); SELECT * FROM x WHERE x.a = (SELECT SUM(y.c) AS c FROM y WHERE y.a = x.a OFFSET 10); SELECT * FROM x WHERE x.a > ALL (SELECT y.c AS c FROM y WHERE y.a = x.a); SELECT * FROM x LEFT JOIN (SELECT ARRAY_AGG(y.c) AS c, y.a AS _u_1 FROM y WHERE TRUE GROUP BY y.a) AS _u_0 ON _u_0._u_1 = x.a WHERE ARRAY_ALL(_u_0.c, _x -> x.a > _x); SELECT * FROM x WHERE x.a > (SELECT COUNT(*) as d FROM y WHERE y.a = x.a); SELECT * FROM x LEFT JOIN (SELECT COUNT(*) AS d, y.a AS _u_1 FROM y WHERE TRUE GROUP BY y.a) AS _u_0 ON _u_0._u_1 = x.a WHERE x.a > COALESCE(_u_0.d, 0); # title: invalid statement left alone SELECT * FROM x WHERE x.a = SUM(SELECT 1); SELECT * FROM x WHERE x.a = SUM(SELECT 1); SELECT * FROM x WHERE x.a IN (SELECT max(y.b) AS b FROM y GROUP BY y.a); SELECT * FROM x LEFT JOIN (SELECT _q.b AS b FROM (SELECT MAX(y.b) AS b FROM y GROUP BY y.a) AS _q GROUP BY _q.b) AS _u_0 ON x.a = _u_0.b WHERE NOT _u_0.b IS NULL; SELECT x.a > (SELECT SUM(y.a) AS b FROM y) FROM x; SELECT x.a > _u_0.b FROM x CROSS JOIN (SELECT SUM(y.a) AS b FROM y) AS _u_0; SELECT (SELECT MAX(t2.c1) AS c1 FROM t2 WHERE t2.c2 = t1.c2 AND t2.c3 <= TRUNC(t1.c3)) AS c FROM t1; SELECT _u_0.c1 AS c FROM t1 LEFT JOIN (SELECT MAX(t2.c1) AS c1, t2.c2 AS _u_1, MAX(t2.c3) AS _u_2 FROM t2 WHERE TRUE AND TRUE GROUP BY t2.c2) AS _u_0 ON _u_0._u_1 = t1.c2 WHERE _u_0._u_2 <= TRUNC(t1.c3); SELECT s.t AS t FROM s WHERE 1 IN (SELECT t.a AS a FROM t WHERE t.b > 1); SELECT s.t AS t FROM s LEFT JOIN (SELECT t.a AS a FROM t WHERE t.b > 1 GROUP BY t.a) AS _u_0 ON 1 = _u_0.a WHERE NOT _u_0.a IS NULL; # title: can't create GROUP BY clause with an aggregate SELECT s.t FROM s WHERE 1 IN (SELECT MAX(t.a) AS t1 FROM t); SELECT s.t FROM s LEFT JOIN (SELECT MAX(t.a) AS t1 FROM t) AS _u_0 ON 1 = _u_0.t1 WHERE NOT _u_0.t1 IS NULL; # title: can't create GROUP BY clause with an aggregate (nested) SELECT s.t FROM s WHERE 1 IN (SELECT MAX(t.a) + 1 AS t1 FROM t); SELECT s.t FROM s LEFT JOIN (SELECT MAX(t.a) + 1 AS t1 FROM t) AS _u_0 ON 1 = _u_0.t1 WHERE NOT _u_0.t1 IS NULL; SELECT BIT_COUNT(EXISTS(SELECT 1 WHERE FALSE)) AS col FROM t0; SELECT BIT_COUNT(EXISTS(SELECT 1 WHERE FALSE)) AS col FROM t0; # title: EXISTS in SELECT with GROUP BY - empty subquery should return 0, not eliminate rows SELECT EXISTS (SELECT 1 WHERE FALSE) AS ref0 FROM t1, t0 GROUP BY t0.c2; SELECT NOT MAX(_u_0."1") IS NULL AS ref0 FROM t1, t0 LEFT JOIN (SELECT 1 WHERE FALSE) AS _u_0 ON TRUE GROUP BY t0.c2; # title: EXISTS in SELECT with GROUP BY - non-empty subquery should return 1 SELECT EXISTS (SELECT 1 WHERE TRUE) AS ref0 FROM t1, t0 GROUP BY t0.c2; SELECT NOT MAX(_u_0."1") IS NULL AS ref0 FROM t1, t0 LEFT JOIN (SELECT 1 WHERE TRUE) AS _u_0 ON TRUE GROUP BY t0.c2; # title: Multiple EXISTS in SELECT with GROUP BY SELECT EXISTS (SELECT 1 WHERE FALSE) AS ref0, EXISTS (SELECT 1 WHERE TRUE) AS ref1 FROM t1, t0 GROUP BY t0.c2; SELECT NOT MAX(_u_0."1") IS NULL AS ref0, NOT MAX(_u_1."1") IS NULL AS ref1 FROM t1, t0 LEFT JOIN (SELECT 1 WHERE FALSE) AS _u_0 ON TRUE LEFT JOIN (SELECT 1 WHERE TRUE) AS _u_1 ON TRUE GROUP BY t0.c2; # title: EXISTS in SELECT with HAVING clause SELECT EXISTS (SELECT 1 WHERE FALSE) AS ref0 FROM t1 GROUP BY t1.c0 HAVING COUNT(*) > 0; SELECT NOT MAX(_u_0."1") IS NULL AS ref0 FROM t1 LEFT JOIN (SELECT 1 WHERE FALSE) AS _u_0 ON TRUE GROUP BY t1.c0 HAVING COUNT(*) > 0; # title: Skip unnesting GENERATE_SERIES WITH t2 AS (SELECT CAST(t1.c1 AS BIGINT) AS ref1 FROM GENERATE_SERIES((SELECT MAX(x.a) FROM x AS x), 10, 1) AS t1(c1)) SELECT t2.ref1 AS ref1 FROM t2 AS t2; WITH t2 AS (SELECT CAST(t1.c1 AS BIGINT) AS ref1 FROM GENERATE_SERIES((SELECT MAX(x.a) FROM x AS x), 10, 1) AS t1(c1)) SELECT t2.ref1 AS ref1 FROM t2 AS t2; # title: Skip unnesting UNNEST (same issue as GENERATE_SERIES) WITH t2 AS (SELECT t1.c1 FROM UNNEST((SELECT ARRAY(x.a) FROM x)) AS t1(c1)) SELECT t2.c1 FROM t2; WITH t2 AS (SELECT t1.c1 FROM UNNEST((SELECT ARRAY(x.a) FROM x)) AS t1(c1)) SELECT t2.c1 FROM t2; # title: Skip unnesting GENERATE_SERIES but unnesting the rest in the query SELECT t1.c1 > (SELECT SUM(y.a) AS b FROM y) FROM x JOIN GENERATE_SERIES((SELECT MAX(x.a) FROM x AS x), 10, 1) AS t1(c1) ON t1.c1 > x.a; SELECT t1.c1 > _u_0.b FROM x JOIN GENERATE_SERIES((SELECT MAX(x.a) FROM x AS x), 10, 1) AS t1(c1) ON t1.c1 > x.a CROSS JOIN (SELECT SUM(y.a) AS b FROM y) AS _u_0; # title: correlated scalar subquery with EQ + range predicates inside a function in SELECT should not crash (issue #7295) SELECT COALESCE((SELECT MAX(b.val) FROM t b WHERE b.val < a.val AND b.id = a.id), a.val) AS result FROM t a; SELECT COALESCE((SELECT MAX(b.val) FROM t AS b WHERE b.val < a.val AND b.id = a.id), a.val) AS result FROM t AS a; ================================================ FILE: tests/fixtures/partial.sql ================================================ SELECT a FROM SELECT a FROM x WHERE SELECT a + a * SELECT a FROM x GROUP BY WITH a AS (SELECT 1), b AS (SELECT 2) SELECT FROM x ================================================ FILE: tests/fixtures/pretty.sql ================================================ SET x TO 1; SET x = 1; SELECT * FROM test; SELECT * FROM test; WITH a AS ((SELECT 1 AS b) UNION ALL (SELECT 2 AS b)) SELECT * FROM a; WITH a AS ( ( SELECT 1 AS b ) UNION ALL ( SELECT 2 AS b ) ) SELECT * FROM a; WITH cte1 AS ( SELECT a, z and e AS b FROM cte WHERE x IN (1, 2, 3) AND z < -1 OR z > 1 AND w = 'AND' ), cte2 AS ( SELECT RANK() OVER (PARTITION BY a, b ORDER BY x DESC) a, b FROM cte CROSS JOIN ( SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT CASE x AND 1 + 1 = 2 WHEN TRUE THEN 1 AND 4 + 3 AND Z WHEN x and y THEN 2 ELSE 3 AND 4 AND g END UNION ALL SELECT 1 FROM (SELECT 1) AS x, y, (SELECT 2) z UNION ALL SELECT MAX(COALESCE(x AND y, a and b and c, d and e)), FOO(CASE WHEN a and b THEN c and d ELSE 3 END) GROUP BY x, GROUPING SETS (a, (b, c)), CUBE(y, z) ) x ) SELECT a, b c FROM ( SELECT a w, 1 + 1 AS c FROM foo WHERE w IN (SELECT z FROM q) GROUP BY a, b ) x LEFT JOIN ( SELECT a, b FROM (SELECT * FROM bar WHERE (c > 1 AND d > 1) OR e > 1 GROUP BY a HAVING a > 1 LIMIT 10) z ) y ON x.a = y.b AND x.a > 1 OR (x.c = y.d OR x.c = y.e); WITH cte1 AS ( SELECT a, z AND e AS b FROM cte WHERE x IN (1, 2, 3) AND z < -1 OR z > 1 AND w = 'AND' ), cte2 AS ( SELECT RANK() OVER (PARTITION BY a, b ORDER BY x DESC) AS a, b FROM cte CROSS JOIN ( SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT CASE x AND 1 + 1 = 2 WHEN TRUE THEN 1 AND 4 + 3 AND Z WHEN x AND y THEN 2 ELSE 3 AND 4 AND g END UNION ALL SELECT 1 FROM ( SELECT 1 ) AS x, y, ( SELECT 2 ) AS z UNION ALL SELECT MAX(COALESCE(x AND y, a AND b AND c, d AND e)), FOO(CASE WHEN a AND b THEN c AND d ELSE 3 END) GROUP BY x, GROUPING SETS ( a, (b, c) ), CUBE ( y, z ) ) AS x ) SELECT a, b AS c FROM ( SELECT a AS w, 1 + 1 AS c FROM foo WHERE w IN ( SELECT z FROM q ) GROUP BY a, b ) AS x LEFT JOIN ( SELECT a, b FROM ( SELECT * FROM bar WHERE ( c > 1 AND d > 1 ) OR e > 1 GROUP BY a HAVING a > 1 LIMIT 10 ) AS z ) AS y ON x.a = y.b AND x.a > 1 OR ( x.c = y.d OR x.c = y.e ); SELECT myCol1, myCol2 FROM baseTable LATERAL VIEW OUTER explode(col1) myTable1 AS myCol1 LATERAL VIEW explode(col2) myTable2 AS myCol2 where a > 1 and b > 2 or c > 3; SELECT myCol1, myCol2 FROM baseTable LATERAL VIEW OUTER EXPLODE(col1) myTable1 AS myCol1 LATERAL VIEW EXPLODE(col2) myTable2 AS myCol2 WHERE a > 1 AND b > 2 OR c > 3; SELECT * FROM (WITH y AS ( SELECT 1 AS z) SELECT z from y) x; SELECT * FROM ( WITH y AS ( SELECT 1 AS z ) SELECT z FROM y ) AS x; INSERT OVERWRITE TABLE x VALUES (1, 2.0, '3.0'), (4, 5.0, '6.0'); INSERT OVERWRITE TABLE x VALUES (1, 2.0, '3.0'), (4, 5.0, '6.0'); INSERT INTO TABLE foo REPLACE WHERE cond SELECT * FROM bar; INSERT INTO foo REPLACE WHERE cond SELECT * FROM bar; INSERT OVERWRITE TABLE zipcodes PARTITION(state = '0') VALUES (896, 'US', 'TAMPA', 33607); INSERT OVERWRITE TABLE zipcodes PARTITION(state = '0') VALUES (896, 'US', 'TAMPA', 33607); WITH regional_sales AS ( SELECT region, SUM(amount) AS total_sales FROM orders GROUP BY region ), top_regions AS ( SELECT region FROM regional_sales WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales) ) SELECT region, product, SUM(quantity) AS product_units, SUM(amount) AS product_sales FROM orders WHERE region IN (SELECT region FROM top_regions) GROUP BY region, product; WITH regional_sales AS ( SELECT region, SUM(amount) AS total_sales FROM orders GROUP BY region ), top_regions AS ( SELECT region FROM regional_sales WHERE total_sales > ( SELECT SUM(total_sales) / 10 FROM regional_sales ) ) SELECT region, product, SUM(quantity) AS product_units, SUM(amount) AS product_sales FROM orders WHERE region IN ( SELECT region FROM top_regions ) GROUP BY region, product; CREATE TABLE "t_customer_account" ( "id" int, "customer_id" int, "bank" varchar(100), "account_no" varchar(100)); CREATE TABLE "t_customer_account" ( "id" INT, "customer_id" INT, "bank" VARCHAR(100), "account_no" VARCHAR(100) ); SELECT x("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff"), array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff"), array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff", array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff")), array(array("aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff")), ; SELECT X( "aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff" ), ARRAY( "aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff" ), ARRAY( "aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff", ARRAY( "aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff" ) ), ARRAY( ARRAY( "aaaaaaaaaaaaaa", "bbbbbbbbbbbbb", "ccccccccc", "ddddddddddddd", "eeeeeeeeeeeee", "fffffff" ) ); /* multi line comment */ SELECT * FROM foo; /* multi line comment */ SELECT * FROM foo; SELECT x FROM a.b.c /*x*/, e.f.g /*x*/; SELECT x FROM a.b.c /* x */, e.f.g /* x */; SELECT x FROM (SELECT * FROM bla /*x*/WHERE id = 1) /*x*/; SELECT x FROM ( SELECT * FROM bla /* x */ WHERE id = 1 ) /* x */; SELECT * /* multi line comment */; SELECT * /* multi line comment */; WITH table_data AS ( SELECT 'bob' AS name, ARRAY['banana', 'apple', 'orange'] AS fruit_basket ) SELECT name, fruit, basket_index FROM table_data CROSS JOIN UNNEST(fruit_basket) WITH ORDINALITY AS fruit(basket_index); WITH table_data AS ( SELECT 'bob' AS name, ARRAY('banana', 'apple', 'orange') AS fruit_basket ) SELECT name, fruit, basket_index FROM table_data CROSS JOIN UNNEST(fruit_basket) WITH ORDINALITY AS fruit(basket_index); SELECT A.* EXCEPT A.COL_1, A.COL_2 FROM TABLE_1 A; SELECT A.* EXCEPT (A.COL_1), A.COL_2 FROM TABLE_1 AS A; SELECT * FROM a JOIN b JOIN c ON b.id = c.id ON a.id = b.id CROSS JOIN d JOIN e ON d.id = e.id; SELECT * FROM a JOIN b JOIN c ON b.id = c.id ON a.id = b.id CROSS JOIN d JOIN e ON d.id = e.id; SELECT * FROM a JOIN b JOIN c USING (e) JOIN d USING (f) USING (g); SELECT * FROM a JOIN b JOIN c USING (e) JOIN d USING (f) USING (g); ('aaaaaaaaaaa', 'bbbbbbbbbbbbbbbb', 'ccccccccccccc', 'ddddddddddd', 'eeeeeeeeeeeeeeeeeeeee'); ( 'aaaaaaaaaaa', 'bbbbbbbbbbbbbbbb', 'ccccccccccccc', 'ddddddddddd', 'eeeeeeeeeeeeeeeeeeeee' ); /* COMMENT */ INSERT FIRST WHEN salary > 4000 THEN INTO emp2 WHEN salary > 5000 THEN INTO emp3 WHEN salary > 6000 THEN INTO emp4 SELECT salary FROM employees; /* COMMENT */ INSERT FIRST WHEN salary > 4000 THEN INTO emp2 WHEN salary > 5000 THEN INTO emp3 WHEN salary > 6000 THEN INTO emp4 SELECT salary FROM employees; SELECT * FROM foo wHERE 1=1 AND -- my comment EXISTS ( SELECT 1 FROM bar ); SELECT * FROM foo WHERE 1 = 1 AND EXISTS( SELECT 1 FROM bar ) /* my comment */; SELECT 1 FROM foo WHERE 1=1 AND -- first comment -- second comment foo.a = 1; SELECT 1 FROM foo WHERE 1 = 1 AND /* first comment */ foo.a /* second comment */ = 1; MERGE INTO t USING s ON t.id = s.id WHEN MATCHED THEN UPDATE SET status = s.status, amount = s.amount; MERGE INTO t USING s ON t.id = s.id WHEN MATCHED THEN UPDATE SET status = s.status, amount = s.amount; SELECT id, -- SUM(total) as all_that, ARRAY_AGG(foo)[0][0] AS first_foo, FROM facts GROUP BY all; SELECT id, ARRAY_AGG(foo)[0][0] AS first_foo /* SUM(total) as all_that, */ FROM facts GROUP BY ALL; ALTER TABLE ct ADD CONSTRAINT ct_id_fk FOREIGN KEY (id) REFERENCES et (fid) DEFERRABLE INITIALLY DEFERRED; ALTER TABLE ct ADD CONSTRAINT ct_id_fk FOREIGN KEY (id) REFERENCES et ( fid ) DEFERRABLE INITIALLY DEFERRED; SELECT * FROM a WHERE /*111*/ b = 1 /*222*/ ORDER BY c; SELECT * FROM a WHERE b /* 111 */ = 1 /* 222 */ ORDER BY c; SELECT COUNT(*) FROM table_a /* join comment */ JOIN table_b ON table_a.id = table_b.id /* group by comment */ GROUP BY table_a.id /* having comment */ HAVING table_a.id = 1; SELECT COUNT(*) FROM table_a /* join comment */ JOIN table_b ON table_a.id = table_b.id /* group by comment */ GROUP BY table_a.id /* having comment */ HAVING table_a.id = 1; ================================================ FILE: tests/gen_fixtures.py ================================================ import time from sqlglot.optimizer import optimize TPCH_SCHEMA = { "lineitem": { "l_orderkey": "bigint", "l_partkey": "bigint", "l_suppkey": "bigint", "l_linenumber": "bigint", "l_quantity": "double", "l_extendedprice": "double", "l_discount": "double", "l_tax": "double", "l_returnflag": "string", "l_linestatus": "string", "l_shipdate": "string", "l_commitdate": "string", "l_receiptdate": "string", "l_shipinstruct": "string", "l_shipmode": "string", "l_comment": "string", }, "orders": { "o_orderkey": "bigint", "o_custkey": "bigint", "o_orderstatus": "string", "o_totalprice": "double", "o_orderdate": "string", "o_orderpriority": "string", "o_clerk": "string", "o_shippriority": "int", "o_comment": "string", }, "customer": { "c_custkey": "bigint", "c_name": "string", "c_address": "string", "c_nationkey": "bigint", "c_phone": "string", "c_acctbal": "double", "c_mktsegment": "string", "c_comment": "string", }, "part": { "p_partkey": "bigint", "p_name": "string", "p_mfgr": "string", "p_brand": "string", "p_type": "string", "p_size": "int", "p_container": "string", "p_retailprice": "double", "p_comment": "string", }, "supplier": { "s_suppkey": "bigint", "s_name": "string", "s_address": "string", "s_nationkey": "bigint", "s_phone": "string", "s_acctbal": "double", "s_comment": "string", }, "partsupp": { "ps_partkey": "bigint", "ps_suppkey": "bigint", "ps_availqty": "int", "ps_supplycost": "double", "ps_comment": "string", }, "nation": { "n_nationkey": "bigint", "n_name": "string", "n_regionkey": "bigint", "n_comment": "string", }, "region": { "r_regionkey": "bigint", "r_name": "string", "r_comment": "string", }, } TPCDS_SCHEMA = { "catalog_sales": { "cs_sold_date_sk": "bigint", "cs_sold_time_sk": "bigint", "cs_ship_date_sk": "bigint", "cs_bill_customer_sk": "bigint", "cs_bill_cdemo_sk": "bigint", "cs_bill_hdemo_sk": "bigint", "cs_bill_addr_sk": "bigint", "cs_ship_customer_sk": "bigint", "cs_ship_cdemo_sk": "bigint", "cs_ship_hdemo_sk": "bigint", "cs_ship_addr_sk": "bigint", "cs_call_center_sk": "bigint", "cs_catalog_page_sk": "bigint", "cs_ship_mode_sk": "bigint", "cs_warehouse_sk": "bigint", "cs_item_sk": "bigint", "cs_promo_sk": "bigint", "cs_order_number": "bigint", "cs_quantity": "bigint", "cs_wholesale_cost": "double", "cs_list_price": "double", "cs_sales_price": "double", "cs_ext_discount_amt": "double", "cs_ext_sales_price": "double", "cs_ext_wholesale_cost": "double", "cs_ext_list_price": "double", "cs_ext_tax": "double", "cs_coupon_amt": "double", "cs_ext_ship_cost": "double", "cs_net_paid": "double", "cs_net_paid_inc_tax": "double", "cs_net_paid_inc_ship": "double", "cs_net_paid_inc_ship_tax": "double", "cs_net_profit": "double", }, "catalog_returns": { "cr_returned_date_sk": "bigint", "cr_returned_time_sk": "bigint", "cr_item_sk": "bigint", "cr_refunded_customer_sk": "bigint", "cr_refunded_cdemo_sk": "bigint", "cr_refunded_hdemo_sk": "bigint", "cr_refunded_addr_sk": "bigint", "cr_returning_customer_sk": "bigint", "cr_returning_cdemo_sk": "bigint", "cr_returning_hdemo_sk": "bigint", "cr_returning_addr_sk": "bigint", "cr_call_center_sk": "bigint", "cr_catalog_page_sk": "bigint", "cr_ship_mode_sk": "bigint", "cr_warehouse_sk": "bigint", "cr_reason_sk": "bigint", "cr_order_number": "bigint", "cr_return_quantity": "bigint", "cr_return_amount": "double", "cr_return_tax": "double", "cr_return_amt_inc_tax": "double", "cr_fee": "double", "cr_return_ship_cost": "double", "cr_refunded_cash": "double", "cr_reversed_charge": "double", "cr_store_credit": "double", "cr_net_loss": "double", }, "inventory": { "inv_date_sk": "bigint", "inv_item_sk": "bigint", "inv_warehouse_sk": "bigint", "inv_quantity_on_hand": "bigint", }, "store_sales": { "ss_sold_date_sk": "bigint", "ss_sold_time_sk": "bigint", "ss_item_sk": "bigint", "ss_customer_sk": "bigint", "ss_cdemo_sk": "bigint", "ss_hdemo_sk": "bigint", "ss_addr_sk": "bigint", "ss_store_sk": "bigint", "ss_promo_sk": "bigint", "ss_ticket_number": "bigint", "ss_quantity": "bigint", "ss_wholesale_cost": "double", "ss_list_price": "double", "ss_sales_price": "double", "ss_ext_discount_amt": "double", "ss_ext_sales_price": "double", "ss_ext_wholesale_cost": "double", "ss_ext_list_price": "double", "ss_ext_tax": "double", "ss_coupon_amt": "double", "ss_net_paid": "double", "ss_net_paid_inc_tax": "double", "ss_net_profit": "double", }, "store_returns": { "sr_returned_date_sk": "bigint", "sr_return_time_sk": "bigint", "sr_item_sk": "bigint", "sr_customer_sk": "bigint", "sr_cdemo_sk": "bigint", "sr_hdemo_sk": "bigint", "sr_addr_sk": "bigint", "sr_store_sk": "bigint", "sr_reason_sk": "bigint", "sr_ticket_number": "bigint", "sr_return_quantity": "bigint", "sr_return_amt": "double", "sr_return_tax": "double", "sr_return_amt_inc_tax": "double", "sr_fee": "double", "sr_return_ship_cost": "double", "sr_refunded_cash": "double", "sr_reversed_charge": "double", "sr_store_credit": "double", "sr_net_loss": "double", }, "web_sales": { "ws_sold_date_sk": "bigint", "ws_sold_time_sk": "bigint", "ws_ship_date_sk": "bigint", "ws_item_sk": "bigint", "ws_bill_customer_sk": "bigint", "ws_bill_cdemo_sk": "bigint", "ws_bill_hdemo_sk": "bigint", "ws_bill_addr_sk": "bigint", "ws_ship_customer_sk": "bigint", "ws_ship_cdemo_sk": "bigint", "ws_ship_hdemo_sk": "bigint", "ws_ship_addr_sk": "bigint", "ws_web_page_sk": "bigint", "ws_web_site_sk": "bigint", "ws_ship_mode_sk": "bigint", "ws_warehouse_sk": "bigint", "ws_promo_sk": "bigint", "ws_order_number": "bigint", "ws_quantity": "bigint", "ws_wholesale_cost": "double", "ws_list_price": "double", "ws_sales_price": "double", "ws_ext_discount_amt": "double", "ws_ext_sales_price": "double", "ws_ext_wholesale_cost": "double", "ws_ext_list_price": "double", "ws_ext_tax": "double", "ws_coupon_amt": "double", "ws_ext_ship_cost": "double", "ws_net_paid": "double", "ws_net_paid_inc_tax": "double", "ws_net_paid_inc_ship": "double", "ws_net_paid_inc_ship_tax": "double", "ws_net_profit": "double", }, "web_returns": { "wr_returned_date_sk": "bigint", "wr_returned_time_sk": "bigint", "wr_item_sk": "bigint", "wr_refunded_customer_sk": "bigint", "wr_refunded_cdemo_sk": "bigint", "wr_refunded_hdemo_sk": "bigint", "wr_refunded_addr_sk": "bigint", "wr_returning_customer_sk": "bigint", "wr_returning_cdemo_sk": "bigint", "wr_returning_hdemo_sk": "bigint", "wr_returning_addr_sk": "bigint", "wr_web_page_sk": "bigint", "wr_reason_sk": "bigint", "wr_order_number": "bigint", "wr_return_quantity": "bigint", "wr_return_amt": "double", "wr_return_tax": "double", "wr_return_amt_inc_tax": "double", "wr_fee": "double", "wr_return_ship_cost": "double", "wr_refunded_cash": "double", "wr_reversed_charge": "double", "wr_account_credit": "double", "wr_net_loss": "double", }, "call_center": { "cc_call_center_sk": "bigint", "cc_call_center_id": "string", "cc_rec_start_date": "string", "cc_rec_end_date": "string", "cc_closed_date_sk": "bigint", "cc_open_date_sk": "bigint", "cc_name": "string", "cc_class": "string", "cc_employees": "bigint", "cc_sq_ft": "bigint", "cc_hours": "string", "cc_manager": "string", "cc_mkt_id": "bigint", "cc_mkt_class": "string", "cc_mkt_desc": "string", "cc_market_manager": "string", "cc_division": "bigint", "cc_division_name": "string", "cc_company": "bigint", "cc_company_name": "string", "cc_street_number": "string", "cc_street_name": "string", "cc_street_type": "string", "cc_suite_number": "string", "cc_city": "string", "cc_county": "string", "cc_state": "string", "cc_zip": "string", "cc_country": "string", "cc_gmt_offset": "double", "cc_tax_percentage": "double", }, "catalog_page": { "cp_catalog_page_sk": "bigint", "cp_catalog_page_id": "string", "cp_start_date_sk": "bigint", "cp_end_date_sk": "bigint", "cp_department": "string", "cp_catalog_number": "bigint", "cp_catalog_page_number": "bigint", "cp_description": "string", "cp_type": "string", }, "customer": { "c_customer_sk": "bigint", "c_customer_id": "string", "c_current_cdemo_sk": "bigint", "c_current_hdemo_sk": "bigint", "c_current_addr_sk": "bigint", "c_first_shipto_date_sk": "bigint", "c_first_sales_date_sk": "bigint", "c_salutation": "string", "c_first_name": "string", "c_last_name": "string", "c_preferred_cust_flag": "string", "c_birth_day": "bigint", "c_birth_month": "bigint", "c_birth_year": "bigint", "c_birth_country": "string", "c_login": "string", "c_email_address": "string", "c_last_review_date": "string", }, "customer_address": { "ca_address_sk": "bigint", "ca_address_id": "string", "ca_street_number": "string", "ca_street_name": "string", "ca_street_type": "string", "ca_suite_number": "string", "ca_city": "string", "ca_county": "string", "ca_state": "string", "ca_zip": "string", "ca_country": "string", "ca_gmt_offset": "double", "ca_location_type": "string", }, "customer_demographics": { "cd_demo_sk": "bigint", "cd_gender": "string", "cd_marital_status": "string", "cd_education_status": "string", "cd_purchase_estimate": "bigint", "cd_credit_rating": "string", "cd_dep_count": "bigint", "cd_dep_employed_count": "bigint", "cd_dep_college_count": "bigint", }, "date_dim": { "d_date_sk": "bigint", "d_date_id": "string", "d_date": "string", "d_month_seq": "bigint", "d_week_seq": "bigint", "d_quarter_seq": "bigint", "d_year": "bigint", "d_dow": "bigint", "d_moy": "bigint", "d_dom": "bigint", "d_qoy": "bigint", "d_fy_year": "bigint", "d_fy_quarter_seq": "bigint", "d_fy_week_seq": "bigint", "d_day_name": "string", "d_quarter_name": "string", "d_holiday": "string", "d_weekend": "string", "d_following_holiday": "string", "d_first_dom": "bigint", "d_last_dom": "bigint", "d_same_day_ly": "bigint", "d_same_day_lq": "bigint", "d_current_day": "string", "d_current_week": "string", "d_current_month": "string", "d_current_quarter": "string", "d_current_year": "string", }, "household_demographics": { "hd_demo_sk": "bigint", "hd_income_band_sk": "bigint", "hd_buy_potential": "string", "hd_dep_count": "bigint", "hd_vehicle_count": "bigint", }, "income_band": { "ib_income_band_sk": "bigint", "ib_lower_bound": "bigint", "ib_upper_bound": "bigint", }, "item": { "i_item_sk": "bigint", "i_item_id": "string", "i_rec_start_date": "string", "i_rec_end_date": "string", "i_item_desc": "string", "i_current_price": "double", "i_wholesale_cost": "double", "i_brand_id": "bigint", "i_brand": "string", "i_class_id": "bigint", "i_class": "string", "i_category_id": "bigint", "i_category": "string", "i_manufact_id": "bigint", "i_manufact": "string", "i_size": "string", "i_formulation": "string", "i_color": "string", "i_units": "string", "i_container": "string", "i_manager_id": "bigint", "i_product_name": "string", }, "promotion": { "p_promo_sk": "bigint", "p_promo_id": "string", "p_start_date_sk": "bigint", "p_end_date_sk": "bigint", "p_item_sk": "bigint", "p_cost": "double", "p_response_target": "bigint", "p_promo_name": "string", "p_channel_dmail": "string", "p_channel_email": "string", "p_channel_catalog": "string", "p_channel_tv": "string", "p_channel_radio": "string", "p_channel_press": "string", "p_channel_event": "string", "p_channel_demo": "string", "p_channel_details": "string", "p_purpose": "string", "p_discount_active": "string", }, "reason": {"r_reason_sk": "bigint", "r_reason_id": "string", "r_reason_desc": "string"}, "ship_mode": { "sm_ship_mode_sk": "bigint", "sm_ship_mode_id": "string", "sm_type": "string", "sm_code": "string", "sm_carrier": "string", "sm_contract": "string", }, "store": { "s_store_sk": "bigint", "s_store_id": "string", "s_rec_start_date": "string", "s_rec_end_date": "string", "s_closed_date_sk": "bigint", "s_store_name": "string", "s_number_employees": "bigint", "s_floor_space": "bigint", "s_hours": "string", "s_manager": "string", "s_market_id": "bigint", "s_geography_class": "string", "s_market_desc": "string", "s_market_manager": "string", "s_division_id": "bigint", "s_division_name": "string", "s_company_id": "bigint", "s_company_name": "string", "s_street_number": "string", "s_street_name": "string", "s_street_type": "string", "s_suite_number": "string", "s_city": "string", "s_county": "string", "s_state": "string", "s_zip": "string", "s_country": "string", "s_gmt_offset": "double", "s_tax_precentage": "double", }, "time_dim": { "t_time_sk": "bigint", "t_time_id": "string", "t_time": "bigint", "t_hour": "bigint", "t_minute": "bigint", "t_second": "bigint", "t_am_pm": "string", "t_shift": "string", "t_sub_shift": "string", "t_meal_time": "string", }, "warehouse": { "w_warehouse_sk": "bigint", "w_warehouse_id": "string", "w_warehouse_name": "string", "w_warehouse_sq_ft": "bigint", "w_street_number": "string", "w_street_name": "string", "w_street_type": "string", "w_suite_number": "string", "w_city": "string", "w_county": "string", "w_state": "string", "w_zip": "string", "w_country": "string", "w_gmt_offset": "double", }, "web_page": { "wp_web_page_sk": "bigint", "wp_web_page_id": "string", "wp_rec_start_date": "string", "wp_rec_end_date": "string", "wp_creation_date_sk": "bigint", "wp_access_date_sk": "bigint", "wp_autogen_flag": "string", "wp_customer_sk": "bigint", "wp_url": "string", "wp_type": "string", "wp_char_count": "bigint", "wp_link_count": "bigint", "wp_image_count": "bigint", "wp_max_ad_count": "bigint", }, "web_site": { "web_site_sk": "bigint", "web_site_id": "string", "web_rec_start_date": "string", "web_rec_end_date": "string", "web_name": "string", "web_open_date_sk": "bigint", "web_close_date_sk": "bigint", "web_class": "string", "web_manager": "string", "web_mkt_id": "bigint", "web_mkt_class": "string", "web_mkt_desc": "string", "web_market_manager": "string", "web_company_id": "bigint", "web_company_name": "string", "web_street_number": "string", "web_street_name": "string", "web_street_type": "string", "web_suite_number": "string", "web_city": "string", "web_county": "string", "web_state": "string", "web_zip": "string", "web_country": "string", "web_gmt_offset": "string", "web_tax_percentage": "double", }, } def rewrite_fixtures(in_path, out_path, schema, num, kind): with open(out_path, "w", encoding="utf-8") as fixture: for i in range(num): i = i + 1 with open(in_path.format(i=i), encoding="utf-8") as file: original = "\n".join( line.rstrip() for line in file.read().split(";")[0].split("\n") if not line.startswith("--") ) original = original.replace("`", '"').strip() now = time.time() try: optimized = optimize(original, schema=schema) except Exception as e: print("****", i, e, "****") continue fixture.write( f"""-------------------------------------- -- TPC-{kind} {i} -------------------------------------- {original}; {optimized.sql(pretty=True)}; """ ) print(i, time.time() - now) rewrite_fixtures( "/home/toby/dev/tpch/{i}.sql", "/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-h/tpc-h.sql", TPCH_SCHEMA, 22, "H", ) rewrite_fixtures( "/home/toby/dev/tpcds/query{i}.sql", "/home/toby/dev/sqlglot/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql", TPCDS_SCHEMA, 99, "DS", ) ================================================ FILE: tests/helpers.py ================================================ import os FILE_DIR = os.path.dirname(__file__) FIXTURES_DIR = os.path.join(FILE_DIR, "fixtures") def _filter_comments(s): return "\n".join([line for line in s.splitlines() if line and not line.startswith("--")]) def _extract_meta(sql): meta = {} sql_lines = sql.split("\n") i = 0 while sql_lines[i].startswith("#"): key, val = sql_lines[i].split(":", maxsplit=1) meta[key.lstrip("#").strip()] = val.strip() i += 1 sql = "\n".join(sql_lines[i:]) return sql, meta def assert_logger_contains(message, logger, level="error"): output = "\n".join(str(args[0][0]) for args in getattr(logger, level).call_args_list) if message not in output: print(f"Expected '{message}' not in {output}") raise def load_sql_fixtures(filename): with open(os.path.join(FIXTURES_DIR, filename), encoding="utf-8") as f: yield from _filter_comments(f.read()).splitlines() def load_sql_fixture_pairs(filename): with open(os.path.join(FIXTURES_DIR, filename), encoding="utf-8") as f: statements = _filter_comments(f.read()).split(";") size = len(statements) for i in range(0, size, 2): if i + 1 < size: sql = statements[i].strip() sql, meta = _extract_meta(sql) expected = statements[i + 1].strip() yield meta, sql, expected def string_to_bool(string): if string is None: return False if string in (True, False): return string return string and string.lower() in ("true", "1") SKIP_INTEGRATION = string_to_bool(os.environ.get("SKIP_INTEGRATION", "0").lower()) TPCH_SCHEMA = { "lineitem": { "l_orderkey": "bigint", "l_partkey": "bigint", "l_suppkey": "bigint", "l_linenumber": "bigint", "l_quantity": "double", "l_extendedprice": "double", "l_discount": "double", "l_tax": "double", "l_returnflag": "string", "l_linestatus": "string", "l_shipdate": "string", "l_commitdate": "string", "l_receiptdate": "string", "l_shipinstruct": "string", "l_shipmode": "string", "l_comment": "string", }, "orders": { "o_orderkey": "bigint", "o_custkey": "bigint", "o_orderstatus": "string", "o_totalprice": "double", "o_orderdate": "string", "o_orderpriority": "string", "o_clerk": "string", "o_shippriority": "int", "o_comment": "string", }, "customer": { "c_custkey": "bigint", "c_name": "string", "c_address": "string", "c_nationkey": "bigint", "c_phone": "string", "c_acctbal": "double", "c_mktsegment": "string", "c_comment": "string", }, "part": { "p_partkey": "bigint", "p_name": "string", "p_mfgr": "string", "p_brand": "string", "p_type": "string", "p_size": "int", "p_container": "string", "p_retailprice": "double", "p_comment": "string", }, "supplier": { "s_suppkey": "bigint", "s_name": "string", "s_address": "string", "s_nationkey": "bigint", "s_phone": "string", "s_acctbal": "double", "s_comment": "string", }, "partsupp": { "ps_partkey": "bigint", "ps_suppkey": "bigint", "ps_availqty": "int", "ps_supplycost": "double", "ps_comment": "string", }, "nation": { "n_nationkey": "bigint", "n_name": "string", "n_regionkey": "bigint", "n_comment": "string", }, "region": { "r_regionkey": "bigint", "r_name": "string", "r_comment": "string", }, } TPCDS_SCHEMA = { "catalog_sales": { "cs_sold_date_sk": "bigint", "cs_sold_time_sk": "bigint", "cs_ship_date_sk": "bigint", "cs_bill_customer_sk": "bigint", "cs_bill_cdemo_sk": "bigint", "cs_bill_hdemo_sk": "bigint", "cs_bill_addr_sk": "bigint", "cs_ship_customer_sk": "bigint", "cs_ship_cdemo_sk": "bigint", "cs_ship_hdemo_sk": "bigint", "cs_ship_addr_sk": "bigint", "cs_call_center_sk": "bigint", "cs_catalog_page_sk": "bigint", "cs_ship_mode_sk": "bigint", "cs_warehouse_sk": "bigint", "cs_item_sk": "bigint", "cs_promo_sk": "bigint", "cs_order_number": "bigint", "cs_quantity": "bigint", "cs_wholesale_cost": "double", "cs_list_price": "double", "cs_sales_price": "double", "cs_ext_discount_amt": "double", "cs_ext_sales_price": "double", "cs_ext_wholesale_cost": "double", "cs_ext_list_price": "double", "cs_ext_tax": "double", "cs_coupon_amt": "double", "cs_ext_ship_cost": "double", "cs_net_paid": "double", "cs_net_paid_inc_tax": "double", "cs_net_paid_inc_ship": "double", "cs_net_paid_inc_ship_tax": "double", "cs_net_profit": "double", }, "catalog_returns": { "cr_returned_date_sk": "bigint", "cr_returned_time_sk": "bigint", "cr_item_sk": "bigint", "cr_refunded_customer_sk": "bigint", "cr_refunded_cdemo_sk": "bigint", "cr_refunded_hdemo_sk": "bigint", "cr_refunded_addr_sk": "bigint", "cr_returning_customer_sk": "bigint", "cr_returning_cdemo_sk": "bigint", "cr_returning_hdemo_sk": "bigint", "cr_returning_addr_sk": "bigint", "cr_call_center_sk": "bigint", "cr_catalog_page_sk": "bigint", "cr_ship_mode_sk": "bigint", "cr_warehouse_sk": "bigint", "cr_reason_sk": "bigint", "cr_order_number": "bigint", "cr_return_quantity": "bigint", "cr_return_amount": "double", "cr_return_tax": "double", "cr_return_amt_inc_tax": "double", "cr_fee": "double", "cr_return_ship_cost": "double", "cr_refunded_cash": "double", "cr_reversed_charge": "double", "cr_store_credit": "double", "cr_net_loss": "double", }, "inventory": { "inv_date_sk": "bigint", "inv_item_sk": "bigint", "inv_warehouse_sk": "bigint", "inv_quantity_on_hand": "bigint", }, "store_sales": { "ss_sold_date_sk": "bigint", "ss_sold_time_sk": "bigint", "ss_item_sk": "bigint", "ss_customer_sk": "bigint", "ss_cdemo_sk": "bigint", "ss_hdemo_sk": "bigint", "ss_addr_sk": "bigint", "ss_store_sk": "bigint", "ss_promo_sk": "bigint", "ss_ticket_number": "bigint", "ss_quantity": "bigint", "ss_wholesale_cost": "double", "ss_list_price": "double", "ss_sales_price": "double", "ss_ext_discount_amt": "double", "ss_ext_sales_price": "double", "ss_ext_wholesale_cost": "double", "ss_ext_list_price": "double", "ss_ext_tax": "double", "ss_coupon_amt": "double", "ss_net_paid": "double", "ss_net_paid_inc_tax": "double", "ss_net_profit": "double", }, "store_returns": { "sr_returned_date_sk": "bigint", "sr_return_time_sk": "bigint", "sr_item_sk": "bigint", "sr_customer_sk": "bigint", "sr_cdemo_sk": "bigint", "sr_hdemo_sk": "bigint", "sr_addr_sk": "bigint", "sr_store_sk": "bigint", "sr_reason_sk": "bigint", "sr_ticket_number": "bigint", "sr_return_quantity": "bigint", "sr_return_amt": "double", "sr_return_tax": "double", "sr_return_amt_inc_tax": "double", "sr_fee": "double", "sr_return_ship_cost": "double", "sr_refunded_cash": "double", "sr_reversed_charge": "double", "sr_store_credit": "double", "sr_net_loss": "double", }, "web_sales": { "ws_sold_date_sk": "bigint", "ws_sold_time_sk": "bigint", "ws_ship_date_sk": "bigint", "ws_item_sk": "bigint", "ws_bill_customer_sk": "bigint", "ws_bill_cdemo_sk": "bigint", "ws_bill_hdemo_sk": "bigint", "ws_bill_addr_sk": "bigint", "ws_ship_customer_sk": "bigint", "ws_ship_cdemo_sk": "bigint", "ws_ship_hdemo_sk": "bigint", "ws_ship_addr_sk": "bigint", "ws_web_page_sk": "bigint", "ws_web_site_sk": "bigint", "ws_ship_mode_sk": "bigint", "ws_warehouse_sk": "bigint", "ws_promo_sk": "bigint", "ws_order_number": "bigint", "ws_quantity": "bigint", "ws_wholesale_cost": "double", "ws_list_price": "double", "ws_sales_price": "double", "ws_ext_discount_amt": "double", "ws_ext_sales_price": "double", "ws_ext_wholesale_cost": "double", "ws_ext_list_price": "double", "ws_ext_tax": "double", "ws_coupon_amt": "double", "ws_ext_ship_cost": "double", "ws_net_paid": "double", "ws_net_paid_inc_tax": "double", "ws_net_paid_inc_ship": "double", "ws_net_paid_inc_ship_tax": "double", "ws_net_profit": "double", }, "web_returns": { "wr_returned_date_sk": "bigint", "wr_returned_time_sk": "bigint", "wr_item_sk": "bigint", "wr_refunded_customer_sk": "bigint", "wr_refunded_cdemo_sk": "bigint", "wr_refunded_hdemo_sk": "bigint", "wr_refunded_addr_sk": "bigint", "wr_returning_customer_sk": "bigint", "wr_returning_cdemo_sk": "bigint", "wr_returning_hdemo_sk": "bigint", "wr_returning_addr_sk": "bigint", "wr_web_page_sk": "bigint", "wr_reason_sk": "bigint", "wr_order_number": "bigint", "wr_return_quantity": "bigint", "wr_return_amt": "double", "wr_return_tax": "double", "wr_return_amt_inc_tax": "double", "wr_fee": "double", "wr_return_ship_cost": "double", "wr_refunded_cash": "double", "wr_reversed_charge": "double", "wr_account_credit": "double", "wr_net_loss": "double", }, "call_center": { "cc_call_center_sk": "bigint", "cc_call_center_id": "string", "cc_rec_start_date": "string", "cc_rec_end_date": "string", "cc_closed_date_sk": "bigint", "cc_open_date_sk": "bigint", "cc_name": "string", "cc_class": "string", "cc_employees": "bigint", "cc_sq_ft": "bigint", "cc_hours": "string", "cc_manager": "string", "cc_mkt_id": "bigint", "cc_mkt_class": "string", "cc_mkt_desc": "string", "cc_market_manager": "string", "cc_division": "bigint", "cc_division_name": "string", "cc_company": "bigint", "cc_company_name": "string", "cc_street_number": "string", "cc_street_name": "string", "cc_street_type": "string", "cc_suite_number": "string", "cc_city": "string", "cc_county": "string", "cc_state": "string", "cc_zip": "string", "cc_country": "string", "cc_gmt_offset": "double", "cc_tax_percentage": "double", }, "catalog_page": { "cp_catalog_page_sk": "bigint", "cp_catalog_page_id": "string", "cp_start_date_sk": "bigint", "cp_end_date_sk": "bigint", "cp_department": "string", "cp_catalog_number": "bigint", "cp_catalog_page_number": "bigint", "cp_description": "string", "cp_type": "string", }, "customer": { "c_customer_sk": "bigint", "c_customer_id": "string", "c_current_cdemo_sk": "bigint", "c_current_hdemo_sk": "bigint", "c_current_addr_sk": "bigint", "c_first_shipto_date_sk": "bigint", "c_first_sales_date_sk": "bigint", "c_salutation": "string", "c_first_name": "string", "c_last_name": "string", "c_preferred_cust_flag": "string", "c_birth_day": "bigint", "c_birth_month": "bigint", "c_birth_year": "bigint", "c_birth_country": "string", "c_login": "string", "c_email_address": "string", "c_last_review_date": "string", }, "customer_address": { "ca_address_sk": "bigint", "ca_address_id": "string", "ca_street_number": "string", "ca_street_name": "string", "ca_street_type": "string", "ca_suite_number": "string", "ca_city": "string", "ca_county": "string", "ca_state": "string", "ca_zip": "string", "ca_country": "string", "ca_gmt_offset": "double", "ca_location_type": "string", }, "customer_demographics": { "cd_demo_sk": "bigint", "cd_gender": "string", "cd_marital_status": "string", "cd_education_status": "string", "cd_purchase_estimate": "bigint", "cd_credit_rating": "string", "cd_dep_count": "bigint", "cd_dep_employed_count": "bigint", "cd_dep_college_count": "bigint", }, "date_dim": { "d_date_sk": "bigint", "d_date_id": "string", "d_date": "string", "d_month_seq": "bigint", "d_week_seq": "bigint", "d_quarter_seq": "bigint", "d_year": "bigint", "d_dow": "bigint", "d_moy": "bigint", "d_dom": "bigint", "d_qoy": "bigint", "d_fy_year": "bigint", "d_fy_quarter_seq": "bigint", "d_fy_week_seq": "bigint", "d_day_name": "string", "d_quarter_name": "string", "d_holiday": "string", "d_weekend": "string", "d_following_holiday": "string", "d_first_dom": "bigint", "d_last_dom": "bigint", "d_same_day_ly": "bigint", "d_same_day_lq": "bigint", "d_current_day": "string", "d_current_week": "string", "d_current_month": "string", "d_current_quarter": "string", "d_current_year": "string", }, "household_demographics": { "hd_demo_sk": "bigint", "hd_income_band_sk": "bigint", "hd_buy_potential": "string", "hd_dep_count": "bigint", "hd_vehicle_count": "bigint", }, "income_band": { "ib_income_band_sk": "bigint", "ib_lower_bound": "bigint", "ib_upper_bound": "bigint", }, "item": { "i_item_sk": "bigint", "i_item_id": "string", "i_rec_start_date": "string", "i_rec_end_date": "string", "i_item_desc": "string", "i_current_price": "double", "i_wholesale_cost": "double", "i_brand_id": "bigint", "i_brand": "string", "i_class_id": "bigint", "i_class": "string", "i_category_id": "bigint", "i_category": "string", "i_manufact_id": "bigint", "i_manufact": "string", "i_size": "string", "i_formulation": "string", "i_color": "string", "i_units": "string", "i_container": "string", "i_manager_id": "bigint", "i_product_name": "string", }, "promotion": { "p_promo_sk": "bigint", "p_promo_id": "string", "p_start_date_sk": "bigint", "p_end_date_sk": "bigint", "p_item_sk": "bigint", "p_cost": "double", "p_response_target": "bigint", "p_promo_name": "string", "p_channel_dmail": "string", "p_channel_email": "string", "p_channel_catalog": "string", "p_channel_tv": "string", "p_channel_radio": "string", "p_channel_press": "string", "p_channel_event": "string", "p_channel_demo": "string", "p_channel_details": "string", "p_purpose": "string", "p_discount_active": "string", }, "reason": {"r_reason_sk": "bigint", "r_reason_id": "string", "r_reason_desc": "string"}, "ship_mode": { "sm_ship_mode_sk": "bigint", "sm_ship_mode_id": "string", "sm_type": "string", "sm_code": "string", "sm_carrier": "string", "sm_contract": "string", }, "store": { "s_store_sk": "bigint", "s_store_id": "string", "s_rec_start_date": "string", "s_rec_end_date": "string", "s_closed_date_sk": "bigint", "s_store_name": "string", "s_number_employees": "bigint", "s_floor_space": "bigint", "s_hours": "string", "s_manager": "string", "s_market_id": "bigint", "s_geography_class": "string", "s_market_desc": "string", "s_market_manager": "string", "s_division_id": "bigint", "s_division_name": "string", "s_company_id": "bigint", "s_company_name": "string", "s_street_number": "string", "s_street_name": "string", "s_street_type": "string", "s_suite_number": "string", "s_city": "string", "s_county": "string", "s_state": "string", "s_zip": "string", "s_country": "string", "s_gmt_offset": "double", "s_tax_precentage": "double", }, "time_dim": { "t_time_sk": "bigint", "t_time_id": "string", "t_time": "bigint", "t_hour": "bigint", "t_minute": "bigint", "t_second": "bigint", "t_am_pm": "string", "t_shift": "string", "t_sub_shift": "string", "t_meal_time": "string", }, "warehouse": { "w_warehouse_sk": "bigint", "w_warehouse_id": "string", "w_warehouse_name": "string", "w_warehouse_sq_ft": "bigint", "w_street_number": "string", "w_street_name": "string", "w_street_type": "string", "w_suite_number": "string", "w_city": "string", "w_county": "string", "w_state": "string", "w_zip": "string", "w_country": "string", "w_gmt_offset": "double", }, "web_page": { "wp_web_page_sk": "bigint", "wp_web_page_id": "string", "wp_rec_start_date": "string", "wp_rec_end_date": "string", "wp_creation_date_sk": "bigint", "wp_access_date_sk": "bigint", "wp_autogen_flag": "string", "wp_customer_sk": "bigint", "wp_url": "string", "wp_type": "string", "wp_char_count": "bigint", "wp_link_count": "bigint", "wp_image_count": "bigint", "wp_max_ad_count": "bigint", }, "web_site": { "web_site_sk": "bigint", "web_site_id": "string", "web_rec_start_date": "string", "web_rec_end_date": "string", "web_name": "string", "web_open_date_sk": "bigint", "web_close_date_sk": "bigint", "web_class": "string", "web_manager": "string", "web_mkt_id": "bigint", "web_mkt_class": "string", "web_mkt_desc": "string", "web_market_manager": "string", "web_company_id": "bigint", "web_company_name": "string", "web_street_number": "string", "web_street_name": "string", "web_street_type": "string", "web_suite_number": "string", "web_city": "string", "web_county": "string", "web_state": "string", "web_zip": "string", "web_country": "string", "web_gmt_offset": "string", "web_tax_percentage": "double", }, } ================================================ FILE: tests/test_build.py ================================================ import unittest from sqlglot import ( alias, and_, case, condition, except_, exp, from_, intersect, not_, or_, parse_one, select, union, ) class TestBuild(unittest.TestCase): def test_build(self): x = condition("x") x_plus_one = x + 1 # Make sure we're not mutating x by changing its parent to be x_plus_one self.assertIsNone(x.parent) self.assertNotEqual(id(x_plus_one.this), id(x)) for expression, sql, *dialect in [ (lambda: x + 1, "x + 1"), (lambda: 1 + x, "1 + x"), (lambda: x - 1, "x - 1"), (lambda: 1 - x, "1 - x"), (lambda: x * 1, "x * 1"), (lambda: 1 * x, "1 * x"), (lambda: x / 1, "x / 1"), (lambda: 1 / x, "1 / x"), (lambda: x // 1, "CAST(x / 1 AS INT)"), (lambda: 1 // x, "CAST(1 / x AS INT)"), (lambda: x % 1, "x % 1"), (lambda: 1 % x, "1 % x"), (lambda: x**1, "POWER(x, 1)"), (lambda: 1**x, "POWER(1, x)"), (lambda: x & 1, "x AND 1"), (lambda: 1 & x, "1 AND x"), (lambda: x | 1, "x OR 1"), (lambda: 1 | x, "1 OR x"), (lambda: x < 1, "x < 1"), (lambda: 1 < x, "x > 1"), (lambda: x <= 1, "x <= 1"), (lambda: 1 <= x, "x >= 1"), (lambda: x > 1, "x > 1"), (lambda: 1 > x, "x < 1"), (lambda: x >= 1, "x >= 1"), (lambda: 1 >= x, "x <= 1"), (lambda: x.eq(1), "x = 1"), (lambda: x.neq(1), "x <> 1"), (lambda: x.is_(exp.Null()), "x IS NULL"), (lambda: x.as_("y"), "x AS y"), (lambda: x.isin(1, "2"), "x IN (1, '2')"), (lambda: x.isin(query="select 1"), "x IN (SELECT 1)"), (lambda: x.isin(unnest="x"), "x IN (SELECT UNNEST(x))"), (lambda: x.isin(unnest="x"), "x IN UNNEST(x)", "bigquery"), (lambda: x.isin(unnest=["x", "y"]), "x IN (SELECT UNNEST(x, y))"), (lambda: x.between(1, 2), "x BETWEEN 1 AND 2"), (lambda: 1 + x + 2 + 3, "1 + x + 2 + 3"), (lambda: 1 + x * 2 + 3, "1 + (x * 2) + 3"), (lambda: x * 1 * 2 + 3, "(x * 1 * 2) + 3"), (lambda: 1 + (x * 2) / 3, "1 + ((x * 2) / 3)"), (lambda: x & "y", "x AND 'y'"), (lambda: x | "y", "x OR 'y'"), (lambda: -x, "-x"), (lambda: ~x, "NOT x"), (lambda: x[1], "x[1]"), (lambda: x[1, 2], "x[1, 2]"), (lambda: x["y"] + 1, "x['y'] + 1"), (lambda: x.like("y"), "x LIKE 'y'"), (lambda: x.ilike("y"), "x ILIKE 'y'"), (lambda: x.rlike("y"), "REGEXP_LIKE(x, 'y')"), ( lambda: case().when("x = 1", "x").else_("bar"), "CASE WHEN x = 1 THEN x ELSE bar END", ), ( lambda: case("x").when("1", "x").else_("bar"), "CASE x WHEN 1 THEN x ELSE bar END", ), (lambda: exp.func("COALESCE", "x", 1), "COALESCE(x, 1)"), (lambda: exp.column("x").desc(), "x DESC"), (lambda: exp.column("x").desc(nulls_first=True), "x DESC NULLS FIRST"), (lambda: select("x"), "SELECT x"), (lambda: select("x"), "SELECT x"), (lambda: select("x", "y"), "SELECT x, y"), (lambda: select("x").from_("tbl"), "SELECT x FROM tbl"), (lambda: select("x", "y").from_("tbl"), "SELECT x, y FROM tbl"), (lambda: select("x").select("y").from_("tbl"), "SELECT x, y FROM tbl"), (lambda: select("comment", "begin"), "SELECT comment, begin"), ( lambda: select("x").select("y", append=False).from_("tbl"), "SELECT y FROM tbl", ), ( lambda: select("x").from_("tbl").from_("tbl2"), "SELECT x FROM tbl2", ), (lambda: select("SUM(x) AS y"), "SELECT SUM(x) AS y"), ( lambda: select("x").from_("tbl").where("x > 0"), "SELECT x FROM tbl WHERE x > 0", ), ( lambda: select("x").from_("tbl").where("x < 4 OR x > 5"), "SELECT x FROM tbl WHERE x < 4 OR x > 5", ), ( lambda: select("x").from_("tbl").where("x > 0").where("x < 9"), "SELECT x FROM tbl WHERE x > 0 AND x < 9", ), ( lambda: select("x").from_("tbl").where("x > 0", "x < 9"), "SELECT x FROM tbl WHERE x > 0 AND x < 9", ), ( lambda: select("x").from_("tbl").where(None).where(False, ""), "SELECT x FROM tbl WHERE FALSE", ), ( lambda: select("x").from_("tbl").where("x > 0").where("x < 9", append=False), "SELECT x FROM tbl WHERE x < 9", ), ( lambda: select("x").from_("tbl").where("x > 0").lock(), "SELECT x FROM tbl WHERE x > 0 FOR UPDATE", "mysql", ), ( lambda: select("x").from_("tbl").where("x > 0").lock(update=False), "SELECT x FROM tbl WHERE x > 0 FOR SHARE", "postgres", ), ( lambda: select("x").from_("tbl").hint("repartition(100)"), "SELECT /*+ REPARTITION(100) */ x FROM tbl", "spark", ), ( lambda: select("x").from_("tbl").hint("coalesce(3)", "broadcast(x)"), "SELECT /*+ COALESCE(3), BROADCAST(x) */ x FROM tbl", "spark", ), ( lambda: select("x", "y").from_("tbl").group_by("x"), "SELECT x, y FROM tbl GROUP BY x", ), ( lambda: select("x", "y").from_("tbl").group_by("x, y"), "SELECT x, y FROM tbl GROUP BY x, y", ), ( lambda: select("x", "y", "z", "a").from_("tbl").group_by("x, y", "z").group_by("a"), "SELECT x, y, z, a FROM tbl GROUP BY x, y, z, a", ), ( lambda: select(1).from_("tbl").group_by("x with cube"), "SELECT 1 FROM tbl GROUP BY x WITH CUBE", ), ( lambda: select("x").distinct("a", "b").from_("tbl"), "SELECT DISTINCT ON (a, b) x FROM tbl", ), ( lambda: select("x").distinct(distinct=True).from_("tbl"), "SELECT DISTINCT x FROM tbl", ), (lambda: select("x").distinct(distinct=False).from_("tbl"), "SELECT x FROM tbl"), ( lambda: select("x").lateral("OUTER explode(y) tbl2 AS z").from_("tbl"), "SELECT x FROM tbl LATERAL VIEW OUTER EXPLODE(y) tbl2 AS z", ), ( lambda: select("x").from_("tbl").join("tbl2 ON tbl.y = tbl2.y"), "SELECT x FROM tbl JOIN tbl2 ON tbl.y = tbl2.y", ), ( lambda: select("x").from_("tbl").join("tbl2", on="tbl.y = tbl2.y"), "SELECT x FROM tbl JOIN tbl2 ON tbl.y = tbl2.y", ), ( lambda: select("x").from_("tbl").join("tbl2", on=["tbl.y = tbl2.y", "a = b"]), "SELECT x FROM tbl JOIN tbl2 ON tbl.y = tbl2.y AND a = b", ), ( lambda: select("x").from_("tbl").join("tbl2", join_type="left outer"), "SELECT x FROM tbl LEFT OUTER JOIN tbl2", ), ( lambda: ( select("x").from_("tbl").join(exp.Table(this="tbl2"), join_type="left outer") ), "SELECT x FROM tbl LEFT OUTER JOIN tbl2", ), ( lambda: ( select("x") .from_("tbl") .join(exp.Table(this="tbl2"), join_type="left outer", join_alias="foo") ), "SELECT x FROM tbl LEFT OUTER JOIN tbl2 AS foo", ), ( lambda: ( select("x").from_("tbl").join(select("y").from_("tbl2"), join_type="left outer") ), "SELECT x FROM tbl LEFT OUTER JOIN (SELECT y FROM tbl2)", ), ( lambda: ( select("x") .from_("tbl") .join( select("y").from_("tbl2").subquery("aliased"), join_type="left outer", ) ), "SELECT x FROM tbl LEFT OUTER JOIN (SELECT y FROM tbl2) AS aliased", ), ( lambda: ( select("x") .from_("tbl") .join( select("y").from_("tbl2"), join_type="left outer", join_alias="aliased", ) ), "SELECT x FROM tbl LEFT OUTER JOIN (SELECT y FROM tbl2) AS aliased", ), ( lambda: ( select("x").from_("tbl").join(parse_one("left join x", into=exp.Join), on="a=b") ), "SELECT x FROM tbl LEFT JOIN x ON a = b", ), ( lambda: select("x").from_("tbl").join("left join x", on="a=b"), "SELECT x FROM tbl LEFT JOIN x ON a = b", ), ( lambda: ( select("x").from_("tbl").join("select b from tbl2", on="a=b", join_type="left") ), "SELECT x FROM tbl LEFT JOIN (SELECT b FROM tbl2) ON a = b", ), ( lambda: ( select("x") .from_("tbl") .join( "select b from tbl2", on="a=b", join_type="left", join_alias="aliased", ) ), "SELECT x FROM tbl LEFT JOIN (SELECT b FROM tbl2) AS aliased ON a = b", ), ( lambda: ( select("x", "y", "z") .from_("merged_df") .join("vte_diagnosis_df", using=["patient_id", "encounter_id"]) ), "SELECT x, y, z FROM merged_df JOIN vte_diagnosis_df USING (patient_id, encounter_id)", ), ( lambda: ( select("x", "y", "z") .from_("merged_df") .join( "vte_diagnosis_df", using=[exp.to_identifier("patient_id"), exp.to_identifier("encounter_id")], ) ), "SELECT x, y, z FROM merged_df JOIN vte_diagnosis_df USING (patient_id, encounter_id)", ), ( lambda: parse_one("JOIN x", into=exp.Join).on("y = 1", "z = 1"), "JOIN x ON y = 1 AND z = 1", ), ( lambda: parse_one("JOIN x", into=exp.Join).on("y = 1"), "JOIN x ON y = 1", ), ( lambda: parse_one("JOIN x", into=exp.Join).using("bar", "bob"), "JOIN x USING (bar, bob)", ), ( lambda: parse_one("JOIN x", into=exp.Join).using("bar"), "JOIN x USING (bar)", ), ( lambda: select("x").from_("foo").join("bla", using="bob"), "SELECT x FROM foo JOIN bla USING (bob)", ), ( lambda: select("x").from_("foo").join("bla", using="bob"), "SELECT x FROM foo JOIN bla USING (bob)", ), ( lambda: select("x", "COUNT(y)").from_("tbl").group_by("x").having("COUNT(y) > 0"), "SELECT x, COUNT(y) FROM tbl GROUP BY x HAVING COUNT(y) > 0", ), ( lambda: select("x").from_("tbl").order_by("y"), "SELECT x FROM tbl ORDER BY y", ), ( lambda: parse_one("select * from x union select * from y").order_by("y"), "SELECT * FROM x UNION SELECT * FROM y ORDER BY y", ), ( lambda: select("x").from_("tbl").cluster_by("y"), "SELECT x FROM tbl CLUSTER BY y", "hive", ), ( lambda: select("x").from_("tbl").sort_by("y"), "SELECT x FROM tbl SORT BY y", "hive", ), ( lambda: select("x").from_("tbl").order_by("x, y DESC"), "SELECT x FROM tbl ORDER BY x, y DESC", ), ( lambda: select("x").from_("tbl").cluster_by("x, y DESC"), "SELECT x FROM tbl CLUSTER BY x, y DESC", "hive", ), ( lambda: select("x").from_("tbl").sort_by("x, y DESC"), "SELECT x FROM tbl SORT BY x, y DESC", "hive", ), ( lambda: select("x", "y", "z", "a").from_("tbl").order_by("x, y", "z").order_by("a"), "SELECT x, y, z, a FROM tbl ORDER BY x, y, z, a", ), ( lambda: ( select("x", "y", "z", "a").from_("tbl").cluster_by("x, y", "z").cluster_by("a") ), "SELECT x, y, z, a FROM tbl CLUSTER BY x, y, z, a", "hive", ), ( lambda: select("x", "y", "z", "a").from_("tbl").sort_by("x, y", "z").sort_by("a"), "SELECT x, y, z, a FROM tbl SORT BY x, y, z, a", "hive", ), (lambda: select("x").from_("tbl").limit(10), "SELECT x FROM tbl LIMIT 10"), ( lambda: select("x").from_("tbl").offset(10), "SELECT x FROM tbl OFFSET 10", ), ( lambda: select("x").from_("tbl").with_("tbl", as_="SELECT x FROM tbl2"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x") .from_("tbl") .with_("tbl", as_="SELECT x FROM tbl2", materialized=True) ), "WITH tbl AS MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x") .from_("tbl") .with_("tbl", as_="SELECT x FROM tbl2", materialized=False) ), "WITH tbl AS NOT MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x").from_("tbl").with_("tbl", as_="SELECT x FROM tbl2", recursive=True) ), "WITH RECURSIVE tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x") .from_("tbl") .with_("tbl", as_=select("x").from_("tbl2"), recursive=True, materialized=True) ), "WITH RECURSIVE tbl AS MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x") .from_("tbl") .with_("tbl", as_=select("x").from_("tbl2"), recursive=True, materialized=False) ), "WITH RECURSIVE tbl AS NOT MATERIALIZED (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: select("x").from_("tbl").with_("tbl", as_=select("x").from_("tbl2")), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x").from_("tbl").with_("tbl (x, y)", as_=select("x", "y").from_("tbl2")) ), "WITH tbl(x, y) AS (SELECT x, y FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x") .from_("tbl") .with_("tbl", as_=select("x").from_("tbl2")) .with_("tbl2", as_=select("x").from_("tbl3")) ), "WITH tbl AS (SELECT x FROM tbl2), tbl2 AS (SELECT x FROM tbl3) SELECT x FROM tbl", ), ( lambda: ( select("x") .from_("tbl") .with_("tbl", as_=select("x", "y").from_("tbl2")) .select("y") ), "WITH tbl AS (SELECT x, y FROM tbl2) SELECT x, y FROM tbl", ), ( lambda: select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl"), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl", ), ( lambda: ( select("x") .with_("tbl", as_=select("x").from_("tbl2")) .from_("tbl") .group_by("x") ), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl GROUP BY x", ), ( lambda: ( select("x") .with_("tbl", as_=select("x").from_("tbl2")) .from_("tbl") .order_by("x") ), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl ORDER BY x", ), ( lambda: ( select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").limit(10) ), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl LIMIT 10", ), ( lambda: ( select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").offset(10) ), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl OFFSET 10", ), ( lambda: ( select("x") .with_("tbl", as_=select("x").from_("tbl2")) .from_("tbl") .join("tbl3") ), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl, tbl3", ), ( lambda: ( select("x").with_("tbl", as_=select("x").from_("tbl2")).from_("tbl").distinct() ), "WITH tbl AS (SELECT x FROM tbl2) SELECT DISTINCT x FROM tbl", ), ( lambda: ( select("x") .with_("tbl", as_=select("x").from_("tbl2")) .from_("tbl") .where("x > 10") ), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl WHERE x > 10", ), ( lambda: ( select("x") .with_("tbl", as_=select("x").from_("tbl2")) .from_("tbl") .having("x > 20") ), "WITH tbl AS (SELECT x FROM tbl2) SELECT x FROM tbl HAVING x > 20", ), (lambda: select("x").from_("tbl").subquery(), "(SELECT x FROM tbl)"), ( lambda: select("x").from_("tbl").subquery("y"), "(SELECT x FROM tbl) AS y", ), ( lambda: select("x").from_(select("x").from_("tbl").subquery()), "SELECT x FROM (SELECT x FROM tbl)", ), (lambda: from_("tbl").select("x"), "SELECT x FROM tbl"), ( lambda: parse_one("SELECT a FROM tbl").assert_is(exp.Select).select("b"), "SELECT a, b FROM tbl", ), ( lambda: parse_one("SELECT * FROM y").assert_is(exp.Select).ctas("x"), "CREATE TABLE x AS SELECT * FROM y", ), ( lambda: ( parse_one("SELECT * FROM y") .assert_is(exp.Select) .ctas("foo.x", properties={"format": "parquet", "y": "2"}) ), "CREATE TABLE foo.x STORED AS PARQUET TBLPROPERTIES ('y'='2') AS SELECT * FROM y", "hive", ), (lambda: and_("x=1", "y=1"), "x = 1 AND y = 1"), (lambda: condition("x").and_("y['a']").and_("1"), "(x AND y['a']) AND 1"), (lambda: condition("x=1").and_("y=1"), "x = 1 AND y = 1"), (lambda: and_("x=1", "y=1", "z=1"), "x = 1 AND y = 1 AND z = 1"), (lambda: condition("x=1").and_("y=1", "z=1"), "x = 1 AND y = 1 AND z = 1"), (lambda: and_("x=1", and_("y=1", "z=1")), "x = 1 AND (y = 1 AND z = 1)"), ( lambda: condition("x=1").and_("y=1").and_("z=1"), "(x = 1 AND y = 1) AND z = 1", ), (lambda: or_(and_("x=1", "y=1"), "z=1"), "(x = 1 AND y = 1) OR z = 1"), ( lambda: condition("x=1").and_("y=1").or_("z=1"), "(x = 1 AND y = 1) OR z = 1", ), (lambda: or_("z=1", and_("x=1", "y=1")), "z = 1 OR (x = 1 AND y = 1)"), ( lambda: or_("z=1 OR a=1", and_("x=1", "y=1")), "(z = 1 OR a = 1) OR (x = 1 AND y = 1)", ), (lambda: not_("x=1"), "NOT x = 1"), (lambda: condition("x=1").not_(), "NOT x = 1"), (lambda: condition("x=1").and_("y=1").not_(), "NOT (x = 1 AND y = 1)"), ( lambda: select("*").from_("x").where(condition("y=1").and_("z=1")), "SELECT * FROM x WHERE y = 1 AND z = 1", ), ( lambda: exp.subquery("select x from tbl", "foo").select("x").where("x > 0"), "SELECT x FROM (SELECT x FROM tbl) AS foo WHERE x > 0", ), ( lambda: exp.subquery("select x from tbl UNION select x from bar", "unioned").select( "x" ), "SELECT x FROM (SELECT x FROM tbl UNION SELECT x FROM bar) AS unioned", ), (lambda: parse_one("(SELECT 1)").select("2"), "(SELECT 1, 2)"), ( lambda: parse_one("(SELECT 1)").limit(1), "(SELECT 1) LIMIT 1", ), ( lambda: parse_one("WITH t AS (SELECT 1) (SELECT 1)").limit(1), "WITH t AS (SELECT 1) SELECT 1 LIMIT 1", ), ( lambda: parse_one("(SELECT 1 LIMIT 2)").limit(1), "(SELECT 1 LIMIT 2) LIMIT 1", ), ( lambda: parse_one("SELECT 1 UNION SELECT 2").limit(5).offset(2), "SELECT 1 UNION SELECT 2 LIMIT 5 OFFSET 2", ), (lambda: parse_one("(SELECT 1)").subquery(), "((SELECT 1))"), (lambda: parse_one("(SELECT 1)").subquery("alias"), "((SELECT 1)) AS alias"), ( lambda: parse_one("(select * from foo)").with_("foo", "select 1 as c"), "WITH foo AS (SELECT 1 AS c) (SELECT * FROM foo)", ), ( lambda: exp.update("tbl", {"x": None, "y": {"x": 1}}), "UPDATE tbl SET x = NULL, y = MAP(ARRAY('x'), ARRAY(1))", ), ( lambda: exp.update("tbl", {"x": 1}, where="y > 0"), "UPDATE tbl SET x = 1 WHERE y > 0", ), ( lambda: exp.update("tbl", {"x": 1}, where=exp.condition("y > 0")), "UPDATE tbl SET x = 1 WHERE y > 0", ), ( lambda: exp.update("tbl", {"x": 1}, from_="tbl2"), "UPDATE tbl SET x = 1 FROM tbl2", ), ( lambda: exp.update("tbl", {"x": 1}, from_="tbl2 cross join tbl3"), "UPDATE tbl SET x = 1 FROM tbl2 CROSS JOIN tbl3", ), ( lambda: exp.update( "my_table", {"x": 1}, from_="baz", where="my_table.id = baz.id", with_={"baz": "SELECT id FROM foo UNION SELECT id FROM bar"}, ), "WITH baz AS (SELECT id FROM foo UNION SELECT id FROM bar) UPDATE my_table SET x = 1 FROM baz WHERE my_table.id = baz.id", ), ( lambda: exp.update("my_table").set_("x = 1"), "UPDATE my_table SET x = 1", ), ( lambda: exp.update("my_table").set_("x = 1").where("y = 2"), "UPDATE my_table SET x = 1 WHERE y = 2", ), ( lambda: exp.update("my_table").set_("a = 1").set_("b = 2"), "UPDATE my_table SET a = 1, b = 2", ), ( lambda: ( exp.update("my_table") .set_("x = 1") .where("my_table.id = baz.id") .from_("baz") .with_("baz", "SELECT id FROM foo") ), "WITH baz AS (SELECT id FROM foo) UPDATE my_table SET x = 1 FROM baz WHERE my_table.id = baz.id", ), ( lambda: union("SELECT * FROM foo", "SELECT * FROM bla"), "SELECT * FROM foo UNION SELECT * FROM bla", ), ( lambda: parse_one("SELECT * FROM foo").union("SELECT * FROM bla"), "SELECT * FROM foo UNION SELECT * FROM bla", ), ( lambda: intersect("SELECT * FROM foo", "SELECT * FROM bla"), "SELECT * FROM foo INTERSECT SELECT * FROM bla", ), ( lambda: parse_one("SELECT * FROM foo").intersect("SELECT * FROM bla"), "SELECT * FROM foo INTERSECT SELECT * FROM bla", ), ( lambda: except_("SELECT * FROM foo", "SELECT * FROM bla"), "SELECT * FROM foo EXCEPT SELECT * FROM bla", ), ( lambda: parse_one("SELECT * FROM foo").except_("SELECT * FROM bla"), "SELECT * FROM foo EXCEPT SELECT * FROM bla", ), ( lambda: parse_one("(SELECT * FROM foo)").union("SELECT * FROM bla"), "(SELECT * FROM foo) UNION SELECT * FROM bla", ), ( lambda: parse_one("(SELECT * FROM foo)").union("SELECT * FROM bla", distinct=False), "(SELECT * FROM foo) UNION ALL SELECT * FROM bla", ), ( lambda: alias(parse_one("LAG(x) OVER (PARTITION BY y)"), "a"), "LAG(x) OVER (PARTITION BY y) AS a", ), ( lambda: alias(parse_one("LAG(x) OVER (ORDER BY z)"), "a"), "LAG(x) OVER (ORDER BY z) AS a", ), ( lambda: alias(parse_one("LAG(x) OVER (PARTITION BY y ORDER BY z)"), "a"), "LAG(x) OVER (PARTITION BY y ORDER BY z) AS a", ), ( lambda: alias(parse_one("LAG(x) OVER ()"), "a"), "LAG(x) OVER () AS a", ), (lambda: exp.values([("1", 2)]), "VALUES ('1', 2)"), (lambda: exp.values([("1", 2)], "alias"), "(VALUES ('1', 2)) AS alias"), (lambda: exp.values([("1", 2), ("2", 3)]), "VALUES ('1', 2), ('2', 3)"), ( lambda: exp.values( [("1", 2, None), ("2", 3, None)], "alias", ["col1", "col2", "col3"] ), "(VALUES ('1', 2, NULL), ('2', 3, NULL)) AS alias(col1, col2, col3)", ), (lambda: exp.delete("y", where="x > 1"), "DELETE FROM y WHERE x > 1"), (lambda: exp.delete("y", where=exp.and_("x > 1")), "DELETE FROM y WHERE x > 1"), ( lambda: ( select("AVG(a) OVER b") .from_("table") .window("b AS (PARTITION BY c ORDER BY d)") ), "SELECT AVG(a) OVER b FROM table WINDOW b AS (PARTITION BY c ORDER BY d)", ), ( lambda: ( select("AVG(a) OVER b", "MIN(c) OVER d") .from_("table") .window("b AS (PARTITION BY e ORDER BY f)") .window("d AS (PARTITION BY g ORDER BY h)") ), "SELECT AVG(a) OVER b, MIN(c) OVER d FROM table WINDOW b AS (PARTITION BY e ORDER BY f), d AS (PARTITION BY g ORDER BY h)", ), ( lambda: ( select("*") .from_("table") .qualify("row_number() OVER (PARTITION BY a ORDER BY b) = 1") ), "SELECT * FROM table QUALIFY ROW_NUMBER() OVER (PARTITION BY a ORDER BY b) = 1", ), (lambda: exp.delete("tbl1", "x = 1").delete("tbl2"), "DELETE FROM tbl2 WHERE x = 1"), (lambda: exp.delete("tbl").where("x = 1"), "DELETE FROM tbl WHERE x = 1"), (lambda: exp.delete(exp.table_("tbl")), "DELETE FROM tbl"), ( lambda: exp.delete("tbl", "x = 1").where("y = 2"), "DELETE FROM tbl WHERE x = 1 AND y = 2", ), ( lambda: exp.delete("tbl", "x = 1").where(exp.condition("y = 2").or_("z = 3")), "DELETE FROM tbl WHERE x = 1 AND (y = 2 OR z = 3)", ), ( lambda: exp.delete("tbl").where("x = 1").returning("*", dialect="postgres"), "DELETE FROM tbl WHERE x = 1 RETURNING *", "postgres", ), ( lambda: exp.delete("tbl", where="x = 1", returning="*", dialect="postgres"), "DELETE FROM tbl WHERE x = 1 RETURNING *", "postgres", ), ( lambda: exp.insert("SELECT * FROM tbl2", "tbl"), "INSERT INTO tbl SELECT * FROM tbl2", ), ( lambda: exp.insert("SELECT * FROM tbl2", "tbl", returning="*"), "INSERT INTO tbl SELECT * FROM tbl2 RETURNING *", ), ( lambda: exp.insert("SELECT * FROM tbl2", "tbl", overwrite=True), "INSERT OVERWRITE TABLE tbl SELECT * FROM tbl2", ), ( lambda: exp.insert("VALUES (1, 2), (3, 4)", "tbl", columns=["cola", "colb"]), "INSERT INTO tbl (cola, colb) VALUES (1, 2), (3, 4)", ), ( lambda: exp.insert("VALUES (1), (2)", "tbl", columns=["col a"]), 'INSERT INTO tbl ("col a") VALUES (1), (2)', ), ( lambda: exp.insert("SELECT * FROM cte", "t").with_("cte", as_="SELECT x FROM tbl"), "WITH cte AS (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte", ), ( lambda: exp.insert("SELECT * FROM cte", "t").with_( "cte", as_="SELECT x FROM tbl", materialized=True ), "WITH cte AS MATERIALIZED (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte", ), ( lambda: exp.insert("SELECT * FROM cte", "t").with_( "cte", as_="SELECT x FROM tbl", materialized=False ), "WITH cte AS NOT MATERIALIZED (SELECT x FROM tbl) INSERT INTO t SELECT * FROM cte", ), ( lambda: exp.convert((exp.column("x"), exp.column("y"))).isin((1, 2), (3, 4)), "(x, y) IN ((1, 2), (3, 4))", "postgres", ), (lambda: exp.cast("CAST(x AS INT)", "int"), "CAST(x AS INT)"), (lambda: exp.cast("CAST(x AS TEXT)", "int"), "CAST(CAST(x AS TEXT) AS INT)"), ( lambda: exp.rename_column("table1", "c1", "c2", True), "ALTER TABLE table1 RENAME COLUMN IF EXISTS c1 TO c2", ), ( lambda: exp.rename_column("table1", "c1", "c2", False), "ALTER TABLE table1 RENAME COLUMN c1 TO c2", ), ( lambda: exp.rename_column("table1", "c1", "c2"), "ALTER TABLE table1 RENAME COLUMN c1 TO c2", ), ( lambda: exp.merge( "WHEN MATCHED THEN UPDATE SET col1 = source.col1", "WHEN NOT MATCHED THEN INSERT (col1) VALUES (source.col1)", into="target_table", using="source_table", on="target_table.id = source_table.id", ), "MERGE INTO target_table USING source_table ON target_table.id = source_table.id WHEN MATCHED THEN UPDATE SET col1 = source.col1 WHEN NOT MATCHED THEN INSERT (col1) VALUES (source.col1)", ), ( lambda: exp.merge( "WHEN MATCHED AND source.is_deleted = 1 THEN DELETE", "WHEN MATCHED THEN UPDATE SET val = source.val", "WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val)", into="target_table", using="source_table", on="target_table.id = source_table.id", ), "MERGE INTO target_table USING source_table ON target_table.id = source_table.id WHEN MATCHED AND source.is_deleted = 1 THEN DELETE WHEN MATCHED THEN UPDATE SET val = source.val WHEN NOT MATCHED THEN INSERT (id, val) VALUES (source.id, source.val)", ), ( lambda: exp.merge( "WHEN MATCHED THEN UPDATE SET target.name = source.name", into=exp.table_("target_table").as_("target"), using=exp.table_("source_table").as_("source"), on="target.id = source.id", ), "MERGE INTO target_table AS target USING source_table AS source ON target.id = source.id WHEN MATCHED THEN UPDATE SET target.name = source.name", ), ( lambda: exp.merge( "WHEN MATCHED THEN UPDATE SET target.name = source.name", into=exp.table_("target_table").as_("target"), using=exp.table_("source_table").as_("source"), on="target.id = source.id", returning="target.*", ), "MERGE INTO target_table AS target USING source_table AS source ON target.id = source.id WHEN MATCHED THEN UPDATE SET target.name = source.name RETURNING target.*", ), ( lambda: exp.merge( exp.When( matched=True, then=exp.Update( expressions=[ exp.column("name", "target").eq(exp.column("name", "source")) ] ), ), into=exp.table_("target_table").as_("target"), using=exp.table_("source_table").as_("source"), on="target.id = source.id", returning="target.*", ), "MERGE INTO target_table AS target USING source_table AS source ON target.id = source.id WHEN MATCHED THEN UPDATE SET target.name = source.name RETURNING target.*", ), ( lambda: exp.union("SELECT 1", "SELECT 2", "SELECT 3", "SELECT 4"), "SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4", ), ( lambda: ( select("x") .with_("var1", as_=select("x").from_("tbl2").subquery(), scalar=True) .from_("tbl") .where("x > var1") ), "WITH (SELECT x FROM tbl2) AS var1 SELECT x FROM tbl WHERE x > var1", "clickhouse", ), ( lambda: ( select("x") .with_("var1", as_=select("x").from_("tbl2"), scalar=True) .from_("tbl") .where("x > var1") ), "WITH (SELECT x FROM tbl2) AS var1 SELECT x FROM tbl WHERE x > var1", "clickhouse", ), ]: with self.subTest(sql): self.assertEqual(expression().sql(dialect[0] if dialect else None), sql) ================================================ FILE: tests/test_dialect_entry_points.py ================================================ import unittest from unittest.mock import Mock, patch from sqlglot import Dialect from sqlglot.dialects.dialect import Dialect as DialectBase class FakeDialect(DialectBase): pass class TestDialectEntryPoints(unittest.TestCase): def setUp(self): Dialect._classes.clear() def tearDown(self): Dialect._classes.clear() def test_entry_point_plugin_discovery_modern_api(self): fake_entry_point = Mock() fake_entry_point.name = "fakedb" fake_entry_point.load.return_value = FakeDialect mock_selectable = Mock() mock_selectable.select.return_value = [fake_entry_point] mock_entry_points = Mock(return_value=mock_selectable) with patch("sqlglot.dialects.dialect.entry_points", mock_entry_points): dialect = Dialect.get("fakedb") self.assertIsNotNone(dialect) self.assertEqual(dialect, FakeDialect) fake_entry_point.load.assert_called_once() mock_selectable.select.assert_called_once_with(group="sqlglot.dialects", name="fakedb") def test_entry_point_plugin_discovery_legacy_api(self): fake_entry_point = Mock() fake_entry_point.name = "fakedb" fake_entry_point.load.return_value = FakeDialect mock_dict = Mock(spec=["get"]) mock_dict.get.return_value = [fake_entry_point] mock_entry_points = Mock(return_value=mock_dict) with patch("sqlglot.dialects.dialect.entry_points", mock_entry_points): dialect = Dialect.get("fakedb") self.assertIsNotNone(dialect) self.assertEqual(dialect, FakeDialect) fake_entry_point.load.assert_called_once() mock_dict.get.assert_called_once_with("sqlglot.dialects", []) def test_entry_point_plugin_not_found(self): mock_selectable = Mock() mock_selectable.select.return_value = [] mock_entry_points = Mock(return_value=mock_selectable) with patch("sqlglot.dialects.dialect.entry_points", mock_entry_points): dialect = Dialect.get("nonexistent") self.assertIsNone(dialect) ================================================ FILE: tests/test_dialect_imports.py ================================================ import unittest from unittest.mock import patch from sqlglot.dialects import __getattr__ as dialects_getattr class TestDialectImports(unittest.TestCase): def test_athena_import_no_deadlock(self): """Test that importing Athena dialect doesn't cause a deadlock. Athena imports from other dialects during its module initialization, which could cause a deadlock with a non-reentrant lock. """ # This should complete without hanging from sqlglot.dialects import Athena # Verify it imported successfully self.assertIsNotNone(Athena) self.assertTrue(hasattr(Athena, "Parser")) self.assertTrue(hasattr(Athena, "Generator")) def test_nested_dialect_import_with_rlock(self): """Test that nested dialect imports work with RLock.""" import_count = 0 def mock_import_module(name): nonlocal import_count import_count += 1 # Simulate Athena importing Hive if name.endswith(".athena"): # This simulates athena.py trying to import Hive dialects_getattr("Hive") # Return a mock module with the expected attribute import types module = types.ModuleType(name) dialect_name = name.split(".")[-1].title() setattr(module, dialect_name, f"Mock{dialect_name}") return module with patch("importlib.import_module", side_effect=mock_import_module): # This should not deadlock result = dialects_getattr("Athena") self.assertEqual(result, "MockAthena") # Should have imported both Athena and Hive self.assertEqual(import_count, 2) ================================================ FILE: tests/test_diff.py ================================================ import unittest from sqlglot import exp, parse_one from sqlglot.diff import Insert, Move, Remove, Update, diff def diff_delta_only(source, target, matchings=None, **kwargs): return diff(source, target, matchings=matchings, delta_only=True, **kwargs) class TestDiff(unittest.TestCase): def test_simple(self): self._validate_delta_only( diff_delta_only(parse_one("SELECT a + b"), parse_one("SELECT a - b")), [ Remove(expression=parse_one("a + b")), # the Add node Insert(expression=parse_one("a - b")), # the Sub node Move(source=parse_one("a"), target=parse_one("a")), # the `a` Column node Move(source=parse_one("b"), target=parse_one("b")), # the `b` Column node ], ) self._validate_delta_only( diff_delta_only(parse_one("SELECT a, b, c"), parse_one("SELECT a, c")), [ Remove(expression=parse_one("b")), # the Column node ], ) self._validate_delta_only( diff_delta_only(parse_one("SELECT a, b"), parse_one("SELECT a, b, c")), [ Insert(expression=parse_one("c")), # the Column node ], ) self._validate_delta_only( diff_delta_only( parse_one("SELECT a FROM table_one"), parse_one("SELECT a FROM table_two"), ), [ Update( source=exp.to_table("table_one", quoted=False), target=exp.to_table("table_two", quoted=False), ), # the Table node ], ) def test_lambda(self): self._validate_delta_only( diff_delta_only( parse_one("SELECT a, b, c, x(a -> a)"), parse_one("SELECT a, b, c, x(b -> b)") ), [ Update( source=exp.Lambda( this=exp.to_identifier("a"), expressions=[exp.to_identifier("a")] ), target=exp.Lambda( this=exp.to_identifier("b"), expressions=[exp.to_identifier("b")] ), ), ], ) def test_udf(self): self._validate_delta_only( diff_delta_only( parse_one('SELECT a, b, "my.udf1"()'), parse_one('SELECT a, b, "my.udf2"()') ), [ Insert(expression=parse_one('"my.udf2"()')), Remove(expression=parse_one('"my.udf1"()')), ], ) self._validate_delta_only( diff_delta_only( parse_one('SELECT a, b, "my.udf"(x, y, z)'), parse_one('SELECT a, b, "my.udf"(x, y, w)'), ), [ Insert(expression=exp.column("w")), Remove(expression=exp.column("z")), ], ) def test_node_position_changed(self): expr_src = parse_one("SELECT a, b, c") expr_tgt = parse_one("SELECT c, a, b") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Move(source=expr_src.selects[2], target=expr_tgt.selects[0]), ], ) expr_src = parse_one("SELECT a + b") expr_tgt = parse_one("SELECT b + a") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Move(source=expr_src.selects[0].left, target=expr_tgt.selects[0].right), ], ) expr_src = parse_one("SELECT aaaa AND bbbb") expr_tgt = parse_one("SELECT bbbb AND aaaa") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Move(source=expr_src.selects[0].left, target=expr_tgt.selects[0].right), ], ) expr_src = parse_one("SELECT aaaa OR bbbb OR cccc") expr_tgt = parse_one("SELECT cccc OR bbbb OR aaaa") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Move(source=expr_src.selects[0].left.left, target=expr_tgt.selects[0].right), Move(source=expr_src.selects[0].right, target=expr_tgt.selects[0].left.left), ], ) expr_src = parse_one("SELECT a, b FROM t WHERE CONCAT('a', 'b') = 'ab'") expr_tgt = parse_one("SELECT a FROM t WHERE CONCAT('a', 'b', b) = 'ab'") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Move(source=expr_src.selects[1], target=expr_tgt.find(exp.Concat).expressions[-1]), ], ) expr_src = parse_one("SELECT a as a, b as b FROM t WHERE CONCAT('a', 'b') = 'ab'") expr_tgt = parse_one("SELECT a as a FROM t WHERE CONCAT('a', 'b', b) = 'ab'") b_alias = expr_src.selects[1] self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Remove(expression=b_alias), Move(source=b_alias.this, target=expr_tgt.find(exp.Concat).expressions[-1]), ], ) def test_cte(self): expr_src = """ WITH cte1 AS (SELECT a, b, LOWER(c) AS c FROM table_one WHERE d = 'filter'), cte2 AS (SELECT d, e, f FROM table_two) SELECT a, b, d, e FROM cte1 JOIN cte2 ON f = c """ expr_tgt = """ WITH cte1 AS (SELECT a, b, c FROM table_one WHERE d = 'different_filter'), cte2 AS (SELECT d, e, f FROM table_two) SELECT a, b, d, e FROM cte1 JOIN cte2 ON f = c """ self._validate_delta_only( diff_delta_only(parse_one(expr_src), parse_one(expr_tgt)), [ Remove(expression=parse_one("LOWER(c) AS c")), # the Alias node Remove(expression=parse_one("LOWER(c)")), # the Lower node Remove(expression=parse_one("'filter'")), # the Literal node Insert(expression=parse_one("'different_filter'")), # the Literal node Move(source=parse_one("c"), target=parse_one("c")), # the new Column c ], ) def test_join(self): expr_src = parse_one("SELECT a, b FROM t1 LEFT JOIN t2 ON t1.key = t2.key") expr_tgt = parse_one("SELECT a, b FROM t1 RIGHT JOIN t2 ON t1.key = t2.key") src_join = expr_src.find(exp.Join) tgt_join = expr_tgt.find(exp.Join) self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Remove(expression=src_join), Insert(expression=tgt_join), Move(source=exp.to_table("t2"), target=exp.to_table("t2")), Move(source=src_join.args["on"], target=tgt_join.args["on"]), ], ) expr_src = parse_one("SELECT a.x FROM a INNER JOIN b ON a.x = b.y LEFT JOIN c ON a.p = c.q") expr_tgt = parse_one("SELECT a.x FROM a inner JOIN b ON a.x = b.y left JOIN c ON a.p = c.q") self._validate_delta_only(diff_delta_only(expr_src, expr_tgt), []) def test_window_functions(self): expr_src = parse_one("SELECT ROW_NUMBER() OVER (PARTITION BY a ORDER BY b)") expr_tgt = parse_one("SELECT RANK() OVER (PARTITION BY a ORDER BY b)") self._validate_delta_only(diff_delta_only(expr_src, expr_src), []) self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Remove(expression=parse_one("ROW_NUMBER()")), Insert(expression=parse_one("RANK()")), Update(source=expr_src.selects[0], target=expr_tgt.selects[0]), ], ) expr_src = parse_one("SELECT MAX(x) OVER (ORDER BY y) FROM z", "oracle") expr_tgt = parse_one("SELECT MAX(x) KEEP (DENSE_RANK LAST ORDER BY y) FROM z", "oracle") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [Update(source=expr_src.selects[0], target=expr_tgt.selects[0])], ) def test_pre_matchings(self): expr_src = parse_one("SELECT 1") expr_tgt = parse_one("SELECT 1, 2, 3, 4") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Remove(expression=expr_src), Insert(expression=expr_tgt), Insert(expression=exp.Literal.number(2)), Insert(expression=exp.Literal.number(3)), Insert(expression=exp.Literal.number(4)), Move(source=exp.Literal.number(1), target=exp.Literal.number(1)), ], ) self._validate_delta_only( diff_delta_only(expr_src, expr_tgt, matchings=[(expr_src, expr_tgt)]), [ Insert(expression=exp.Literal.number(2)), Insert(expression=exp.Literal.number(3)), Insert(expression=exp.Literal.number(4)), ], ) self._validate_delta_only( diff_delta_only( expr_src, expr_tgt, matchings=[(expr_src, expr_tgt), (expr_src, expr_tgt)] ), [ Insert(expression=exp.Literal.number(2)), Insert(expression=exp.Literal.number(3)), Insert(expression=exp.Literal.number(4)), ], ) expr_tgt.selects[0].replace(expr_src.selects[0]) self._validate_delta_only( diff_delta_only(expr_src, expr_tgt, matchings=[(expr_src, expr_tgt)]), [ Insert(expression=exp.Literal.number(2)), Insert(expression=exp.Literal.number(3)), Insert(expression=exp.Literal.number(4)), ], ) def test_identifier(self): expr_src = parse_one("SELECT a FROM tbl") expr_tgt = parse_one("SELECT a, tbl.b from tbl") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Insert(expression=exp.to_column("tbl.b")), ], ) expr_src = parse_one("SELECT 1 AS c1, 2 AS c2") expr_tgt = parse_one("SELECT 2 AS c1, 3 AS c2") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Remove(expression=exp.alias_(1, "c1")), Remove(expression=exp.Literal.number(1)), Insert(expression=exp.alias_(3, "c2")), Insert(expression=exp.Literal.number(3)), Update(source=exp.alias_(2, "c2"), target=exp.alias_(2, "c1")), ], ) def test_dialect_aware_diff(self): from sqlglot.generator import logger with self.assertLogs(logger) as cm: # We want to assert there are no warnings, but the 'assertLogs' method does not support that. # Therefore, we are adding a dummy warning, and then we will assert it is the only warning. logger.warning("Dummy warning") expression = parse_one("SELECT foo FROM bar FOR UPDATE", dialect="oracle") self._validate_delta_only( diff_delta_only(expression, expression.copy(), dialect="oracle"), [] ) self.assertEqual(["WARNING:sqlglot:Dummy warning"], cm.output) def test_non_expression_leaf_delta(self): expr_src = parse_one("SELECT a UNION SELECT b") expr_tgt = parse_one("SELECT a UNION ALL SELECT b") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Update(source=expr_src, target=expr_tgt), ], ) expr_src = parse_one("SELECT a FROM t ORDER BY b ASC") expr_tgt = parse_one("SELECT a FROM t ORDER BY b DESC") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Update( source=expr_src.find(exp.Order).expressions[0], target=expr_tgt.find(exp.Order).expressions[0], ), ], ) expr_src = parse_one("SELECT a, b FROM t ORDER BY c ASC") expr_tgt = parse_one("SELECT b, a FROM t ORDER BY c DESC") self._validate_delta_only( diff_delta_only(expr_src, expr_tgt), [ Update( source=expr_src.find(exp.Order).expressions[0], target=expr_tgt.find(exp.Order).expressions[0], ), Move(source=expr_src.selects[0], target=expr_tgt.selects[1]), ], ) def test_none_args_are_not_treated_as_leaves(self): expr_src = exp.Column( this=exp.to_identifier("b"), table=exp.to_identifier("a"), db=None, catalog=None ) expr_tgt = exp.Column(this=exp.to_identifier("b"), table=exp.to_identifier("a")) self.assertEqual(set(expr_src.args), {"this", "table", "db", "catalog"}) self.assertEqual(set(expr_tgt.args), {"this", "table"}) self._validate_delta_only(diff_delta_only(expr_src, expr_tgt), []) def test_comments_do_not_affect_diff(self): expr_src = parse_one("select a from tbl") expr_tgt = parse_one("select a from tbl -- this is comment") self.assertEqual(expr_tgt.args["from_"].this.comments, [" this is comment"]) self._validate_delta_only(diff_delta_only(expr_src, expr_tgt), []) def _validate_delta_only(self, actual_delta, expected_delta): self.assertEqual(set(actual_delta), set(expected_delta)) ================================================ FILE: tests/test_docs.py ================================================ import doctest import importlib import pkgutil import unittest import sqlglot def load_tests(loader, tests, ignore): """ This finds and runs all the doctests """ modules = set() for info in pkgutil.walk_packages(sqlglot.__path__, prefix="sqlglot."): if info.name == "sqlglot.__main__": continue try: modules.add(importlib.import_module(info.name)) except Exception: continue assert len(modules) >= 20 for module in sorted(modules, key=lambda m: m.__name__): tests.addTests(doctest.DocTestSuite(module)) return tests if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_errors.py ================================================ import unittest from sqlglot.errors import highlight_sql, ANSI_UNDERLINE, ANSI_RESET class TestErrors(unittest.TestCase): def test_highlight_sql_single_character(self): sql = "SELECT a FROM t" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(7, 7)]) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "a") self.assertEqual(end_ctx, " FROM t") self.assertEqual(formatted, f"SELECT {ANSI_UNDERLINE}a{ANSI_RESET} FROM t") def test_highlight_sql_multi_character(self): sql = "SELECT foo FROM table" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(7, 9)]) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "foo") self.assertEqual(end_ctx, " FROM table") self.assertEqual(formatted, f"SELECT {ANSI_UNDERLINE}foo{ANSI_RESET} FROM table") def test_highlight_sql_multiple_highlights(self): sql = "SELECT a, b, c FROM table" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(7, 7), (10, 10)]) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "a, b") self.assertEqual(end_ctx, ", c FROM table") self.assertEqual( formatted, f"SELECT {ANSI_UNDERLINE}a{ANSI_RESET}, {ANSI_UNDERLINE}b{ANSI_RESET}, c FROM table", ) def test_highlight_sql_at_end(self): sql = "SELECT a FROM t" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(14, 14)]) self.assertEqual(start_ctx, "SELECT a FROM ") self.assertEqual(highlight, "t") self.assertEqual(end_ctx, "") self.assertEqual(formatted, f"SELECT a FROM {ANSI_UNDERLINE}t{ANSI_RESET}") def test_highlight_sql_entire_string(self): sql = "SELECT a" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(0, 7)]) self.assertEqual(start_ctx, "") self.assertEqual(highlight, "SELECT a") self.assertEqual(end_ctx, "") self.assertEqual(formatted, f"{ANSI_UNDERLINE}SELECT a{ANSI_RESET}") def test_highlight_sql_adjacent_highlights(self): sql = "SELECT ab FROM t" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(7, 7), (8, 8)]) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "ab") self.assertEqual(end_ctx, " FROM t") self.assertEqual( formatted, f"SELECT {ANSI_UNDERLINE}a{ANSI_RESET}{ANSI_UNDERLINE}b{ANSI_RESET} FROM t" ) def test_highlight_sql_small_context_length(self): sql = "SELECT a, b, c FROM table WHERE x = 1" formatted, start_ctx, highlight, end_ctx = highlight_sql( sql, [(7, 7), (10, 10)], context_length=5 ) self.assertEqual(start_ctx, "LECT ") self.assertEqual(highlight, "a, b") self.assertEqual(end_ctx, ", c F") self.assertEqual( formatted, f"LECT {ANSI_UNDERLINE}a{ANSI_RESET}, {ANSI_UNDERLINE}b{ANSI_RESET}, c F" ) def test_highlight_sql_empty_positions(self): sql = "SELECT a FROM t" with self.assertRaises(ValueError): highlight_sql(sql, []) def test_highlight_sql_partial_overlap(self): sql = "SELECT foo FROM table" formatted, start_ctx, highlight, end_ctx = highlight_sql( sql, [(7, 9), (8, 10)], # "foo" and "oo " ) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "foo ") self.assertEqual(end_ctx, "FROM table") self.assertEqual( formatted, f"SELECT {ANSI_UNDERLINE}foo{ANSI_RESET}{ANSI_UNDERLINE} {ANSI_RESET}FROM table", ) def test_highlight_sql_full_overlap(self): sql = "SELECT foobar FROM table" formatted, start_ctx, highlight, end_ctx = highlight_sql( sql, [(7, 12), (9, 11)], # "foobar" and "oba" ) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "foobar") self.assertEqual(end_ctx, " FROM table") self.assertEqual(formatted, f"SELECT {ANSI_UNDERLINE}foobar{ANSI_RESET} FROM table") def test_highlight_sql_identical_positions(self): sql = "SELECT a FROM t" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(7, 7), (7, 7)]) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "a") self.assertEqual(end_ctx, " FROM t") self.assertEqual(formatted, f"SELECT {ANSI_UNDERLINE}a{ANSI_RESET} FROM t") def test_highlight_sql_reversed_positions(self): sql = "SELECT a, b FROM table" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(10, 10), (7, 7)]) self.assertEqual(start_ctx, "SELECT ") self.assertEqual(highlight, "a, b") self.assertEqual(end_ctx, " FROM table") self.assertEqual( formatted, f"SELECT {ANSI_UNDERLINE}a{ANSI_RESET}, {ANSI_UNDERLINE}b{ANSI_RESET} FROM table", ) def test_highlight_sql_zero_context_length(self): sql = "SELECT a, b FROM table" formatted, start_ctx, highlight, end_ctx = highlight_sql(sql, [(7, 7)], context_length=0) self.assertEqual(start_ctx, "") self.assertEqual(end_ctx, "") self.assertEqual(highlight, "a") self.assertEqual(formatted, f"{ANSI_UNDERLINE}a{ANSI_RESET}") if __name__ == "__main__": unittest.main() ================================================ FILE: tests/test_executor.py ================================================ import ast import csv import datetime import unittest from datetime import date, time from concurrent.futures import ProcessPoolExecutor import duckdb import numpy as np import pandas as pd from pandas.testing import assert_frame_equal from sqlglot import exp, find_tables, parse_one, transpile from sqlglot.errors import ExecuteError from sqlglot.executor import execute from sqlglot.executor.python import Python from sqlglot.executor.table import Table, ensure_tables from sqlglot.optimizer import optimize from sqlglot.planner import Plan from tests.helpers import ( FIXTURES_DIR, SKIP_INTEGRATION, TPCH_SCHEMA, TPCDS_SCHEMA, load_sql_fixture_pairs, ) DIR_TPCH = FIXTURES_DIR + "/optimizer/tpc-h/" DIR_TPCDS = FIXTURES_DIR + "/optimizer/tpc-ds/" def open_file(file_name): """Open a file that may be compressed as gzip and return it in universal newline mode.""" with open(file_name, "rb") as f: gzipped = f.read(2) == b"\x1f\x8b" if gzipped: import gzip return gzip.open(file_name, "rt", newline="") return open(file_name, encoding="utf-8", newline="") _schema = None _tables = None def initializer(schema, tables): global _schema, _tables _schema = schema _tables = tables def mp_execute(expression, meta): if not meta.get("execute"): return None tables = {} for t in find_tables(expression): name = t.name tables[name] = _tables[name] return execute(expression, schema=_schema, tables=tables) @unittest.skipIf(SKIP_INTEGRATION, "Skipping Integration Tests since `SKIP_INTEGRATION` is set") class TestExecutor(unittest.TestCase): @classmethod def setUpClass(cls): cls.tpch_conn = duckdb.connect() cls.tpcds_conn = duckdb.connect() cls.tpch_tables = {} cls.tpcds_tables = {} def setup(conn, directory, table, columns, tables): file_name = f"{directory}{table}.csv.gz" conn.execute( f""" CREATE VIEW {table} AS SELECT * FROM READ_CSV('{file_name}', delim='|', header=True, columns={columns}) """ ) reader = csv.reader(open_file(file_name), delimiter="|") rows = [] ctypes = [] tables[table] = rows next(reader) for row in reader: if not ctypes: for i, v in enumerate(row): try: ctypes.append(type(ast.literal_eval(v))) except (ValueError, SyntaxError): ctypes.append(str) rows.append( tuple(None if (t is not str and v == "") else t(v) for t, v in zip(ctypes, row)) ) tables[table] = Table(columns=columns, rows=rows) for table, columns in TPCH_SCHEMA.items(): setup(cls.tpch_conn, DIR_TPCH, table, columns, cls.tpch_tables) for table, columns in TPCDS_SCHEMA.items(): setup(cls.tpcds_conn, DIR_TPCDS, table, columns, cls.tpcds_tables) cls.cache = {} cls.tpch_sqls = list(load_sql_fixture_pairs("optimizer/tpc-h/tpc-h.sql")) cls.tpcds_sqls = list(load_sql_fixture_pairs("optimizer/tpc-ds/tpc-ds.sql")) @classmethod def tearDownClass(cls): cls.tpch_conn.close() cls.tpcds_conn.close() def cached_execute(self, sql, tpch=True): conn = self.tpch_conn if tpch else self.tpcds_conn if sql not in self.cache: self.cache[sql] = conn.execute(transpile(sql, write="duckdb")[0]).fetchdf() return self.cache[sql] def rename_anonymous(self, source, target): for i, column in enumerate(source.columns): if "_col_" in column: source.rename(columns={column: target.columns[i]}, inplace=True) def test_py_dialect(self): generate = Python().generate self.assertEqual(generate(parse_one("'x '''")), r"'x \''") self.assertEqual(generate(parse_one("MAP([1], [2])")), "MAP([1], [2])") self.assertEqual(generate(parse_one("1 is null")), "1 == None") self.assertEqual(generate(parse_one("x is null")), "scope[None][x] is None") def test_optimized_tpch(self): for i, (_, sql, optimized) in enumerate(self.tpch_sqls, start=1): with self.subTest(f"{i}, {sql}"): a = self.cached_execute(sql, tpch=True) b = self.tpch_conn.execute(transpile(optimized, write="duckdb")[0]).fetchdf() self.rename_anonymous(b, a) assert_frame_equal(a, b) def subtestHelper(self, i, table, tpch=True): with self.subTest(f"{'tpc-h' if tpch else 'tpc-ds'} {i + 1}"): _, sql, _ = self.tpch_sqls[i] if tpch else self.tpcds_sqls[i] a = self.cached_execute(sql, tpch=tpch) b = pd.DataFrame( ((np.nan if c is None else c for c in r) for r in table.rows), columns=table.columns, ) assert_frame_equal(a, b, check_dtype=False, check_index_type=False) def _mp_execute(self, schema, tables, sqls, tpch): with ProcessPoolExecutor( initializer=initializer, initargs=(schema, tables), ) as pool: futures = [pool.submit(mp_execute, parse_one(sql), args) for args, sql, _ in sqls] for i, future in enumerate(futures): table = future.result() if table is not None: self.subtestHelper(i, table, tpch=tpch) def test_execute_tpch(self): self._mp_execute(TPCH_SCHEMA, self.tpch_tables, self.tpch_sqls, True) def test_execute_tpcds(self): self._mp_execute(TPCDS_SCHEMA, self.tpcds_tables, self.tpcds_sqls, False) def test_execute_callable(self): tables = { "x": [ {"a": "a", "b": "d"}, {"a": "b", "b": "e"}, {"a": "c", "b": "f"}, ], "y": [ {"b": "d", "c": "g"}, {"b": "e", "c": "h"}, {"b": "f", "c": "i"}, ], "z": [], } schema = { "x": { "a": "VARCHAR", "b": "VARCHAR", }, "y": { "b": "VARCHAR", "c": "VARCHAR", }, "z": {"d": "VARCHAR"}, } for sql, cols, rows in [ ("SELECT * FROM x", ["a", "b"], [("a", "d"), ("b", "e"), ("c", "f")]), ( "SELECT * FROM x JOIN y ON x.b = y.b", ["a", "b", "b", "c"], [("a", "d", "d", "g"), ("b", "e", "e", "h"), ("c", "f", "f", "i")], ), ( "SELECT j.c AS d FROM x AS i JOIN y AS j ON i.b = j.b", ["d"], [("g",), ("h",), ("i",)], ), ( "SELECT CONCAT(x.a, y.c) FROM x JOIN y ON x.b = y.b WHERE y.b = 'e'", ["_col_0"], [("bh",)], ), ( "SELECT * FROM x JOIN y ON x.b = y.b WHERE y.b = 'e'", ["a", "b", "b", "c"], [("b", "e", "e", "h")], ), ( "SELECT * FROM z", ["d"], [], ), ( "SELECT d FROM z ORDER BY d", ["d"], [], ), ( "SELECT a FROM x WHERE x.a <> 'b'", ["a"], [("a",), ("c",)], ), ( "SELECT a AS i FROM x ORDER BY a", ["i"], [("a",), ("b",), ("c",)], ), ( "SELECT a AS i FROM x ORDER BY i", ["i"], [("a",), ("b",), ("c",)], ), ( "SELECT 100 - ORD(a) AS a, a AS i FROM x ORDER BY a", ["a", "i"], [(1, "c"), (2, "b"), (3, "a")], ), ( "SELECT a /* test */ FROM x LIMIT 1", ["a"], [("a",)], ), ( "SELECT DISTINCT a FROM (SELECT 1 AS a UNION ALL SELECT 1 AS a)", ["a"], [(1,)], ), ( "SELECT DISTINCT a, SUM(b) AS b " "FROM (SELECT 'a' AS a, 1 AS b UNION ALL SELECT 'a' AS a, 2 AS b UNION ALL SELECT 'b' AS a, 1 AS b) " "GROUP BY a " "LIMIT 1", ["a", "b"], [("a", 3)], ), ( "SELECT COUNT(1) AS a FROM (SELECT 1)", ["a"], [(1,)], ), ( "SELECT COUNT(1) AS a FROM (SELECT 1) LIMIT 0", ["a"], [], ), ( "SELECT a FROM x GROUP BY a LIMIT 0", ["a"], [], ), ( "SELECT a FROM x LIMIT 0", ["a"], [], ), ]: with self.subTest(sql): result = execute(sql, schema=schema, tables=tables) self.assertEqual(result.columns, tuple(cols)) self.assertEqual(result.rows, rows) def test_set_operations(self): tables = { "x": [ {"a": "a"}, {"a": "b"}, {"a": "c"}, ], "y": [ {"a": "b"}, {"a": "c"}, {"a": "d"}, ], } schema = { "x": { "a": "VARCHAR", }, "y": { "a": "VARCHAR", }, } for sql, cols, rows in [ ( "SELECT a FROM x UNION ALL SELECT a FROM y", ["a"], [("a",), ("b",), ("c",), ("b",), ("c",), ("d",)], ), ( "SELECT a FROM x UNION SELECT a FROM y", ["a"], [("a",), ("b",), ("c",), ("d",)], ), ( "SELECT a FROM x EXCEPT SELECT a FROM y", ["a"], [("a",)], ), ( "(SELECT a FROM x) EXCEPT (SELECT a FROM y)", ["a"], [("a",)], ), ( "SELECT a FROM x INTERSECT SELECT a FROM y", ["a"], [("b",), ("c",)], ), ( """SELECT i.a FROM ( SELECT a FROM x UNION SELECT a FROM y ) AS i JOIN ( SELECT a FROM x UNION SELECT a FROM y ) AS j ON i.a = j.a""", ["a"], [("a",), ("b",), ("c",), ("d",)], ), ( "SELECT 1 AS a UNION SELECT 2 AS a UNION SELECT 3 AS a", ["a"], [(1,), (2,), (3,)], ), ( "SELECT 1 / 2 AS a", ["a"], [ (0.5,), ], ), ("SELECT 1 / 0 AS a", ["a"], ZeroDivisionError), ( exp.select( exp.alias_(exp.Literal.number(1).div(exp.Literal.number(2), typed=True), "a") ), ["a"], [ (0,), ], ), ( exp.select( exp.alias_(exp.Literal.number(1).div(exp.Literal.number(0), safe=True), "a") ), ["a"], [ (None,), ], ), ( "SELECT a FROM x UNION ALL SELECT a FROM x LIMIT 1", ["a"], [("a",)], ), ]: with self.subTest(sql): if isinstance(rows, list): result = execute(sql, schema=schema, tables=tables) self.assertEqual(result.columns, tuple(cols)) self.assertEqual(set(result.rows), set(rows)) else: with self.assertRaises(ExecuteError) as ctx: execute(sql, schema=schema, tables=tables) self.assertIsInstance(ctx.exception.__cause__, rows) def test_execute_catalog_db_table(self): tables = { "catalog": { "db": { "x": [ {"a": "a"}, {"a": "b"}, {"a": "c"}, ], } } } schema = { "catalog": { "db": { "x": { "a": "VARCHAR", } } } } result1 = execute("SELECT * FROM x", schema=schema, tables=tables) result2 = execute("SELECT * FROM catalog.db.x", schema=schema, tables=tables) assert result1.columns == result2.columns assert result1.rows == result2.rows def test_execute_tables(self): tables = { "sushi": [ {"id": 1, "price": 1.0}, {"id": 2, "price": 2.0}, {"id": 3, "price": 3.0}, ], "order_items": [ {"sushi_id": 1, "order_id": 1}, {"sushi_id": 1, "order_id": 1}, {"sushi_id": 2, "order_id": 1}, {"sushi_id": 3, "order_id": 2}, ], "orders": [ {"id": 1, "user_id": 1}, {"id": 2, "user_id": 2}, ], } self.assertEqual( execute( """ SELECT o.user_id, SUM(s.price) AS price FROM orders o JOIN order_items i ON o.id = i.order_id JOIN sushi s ON i.sushi_id = s.id GROUP BY o.user_id """, tables=tables, ).rows, [ (1, 4.0), (2, 3.0), ], ) self.assertEqual( execute( """ SELECT o.id, x.* FROM orders o LEFT JOIN ( SELECT 1 AS id, 'b' AS x UNION ALL SELECT 3 AS id, 'c' AS x ) x ON o.id = x.id """, tables=tables, ).rows, [(1, 1, "b"), (2, None, None)], ) self.assertEqual( execute( """ SELECT o.id, x.* FROM orders o RIGHT JOIN ( SELECT 1 AS id, 'b' AS x UNION ALL SELECT 3 AS id, 'c' AS x ) x ON o.id = x.id """, tables=tables, ).rows, [ (1, 1, "b"), (None, 3, "c"), ], ) def test_execute_subqueries(self): tables = { "table": [ {"a": 1, "b": 1}, {"a": 2, "b": 2}, ], } self.assertEqual( execute( """ SELECT * FROM table WHERE a = (SELECT MAX(a) FROM table) """, tables=tables, ).rows, [ (2, 2), ], ) table1_view = exp.Select().select("id", "sub_type").from_("table1").subquery() select_from_sub_query = exp.Select().select("id AS id_alias", "sub_type").from_(table1_view) expression = exp.Select().select("*").from_("cte1").with_("cte1", as_=select_from_sub_query) schema = {"table1": {"id": "str", "sub_type": "str"}} executed = execute(expression, tables={t: [] for t in schema}, schema=schema) self.assertEqual(executed.rows, []) self.assertEqual(executed.columns, ("id_alias", "sub_type")) def test_correlated_count(self): tables = { "parts": [{"pnum": 0, "qoh": 1}], "supplies": [], } schema = { "parts": {"pnum": "int", "qoh": "int"}, "supplies": {"pnum": "int", "shipdate": "int"}, } self.assertEqual( execute( """ select * from parts where parts.qoh >= ( select count(supplies.shipdate) + 1 from supplies where supplies.pnum = parts.pnum and supplies.shipdate < 10 ) """, tables=tables, schema=schema, ).rows, [ (0, 1), ], ) def test_table_depth_mismatch(self): tables = {"table": []} schema = {"db": {"table": {"col": "VARCHAR"}}} with self.assertRaises(ExecuteError): execute("SELECT * FROM table", schema=schema, tables=tables) def test_tables(self): tables = ensure_tables( { "catalog1": { "db1": { "t1": [ {"a": 1}, ], "t2": [ {"a": 1}, ], }, "db2": { "t3": [ {"a": 1}, ], "t4": [ {"a": 1}, ], }, }, "catalog2": { "db3": { "t5": Table(columns=("a",), rows=[(1,)]), "t6": Table(columns=("a",), rows=[(1,)]), }, "db4": { "t7": Table(columns=("a",), rows=[(1,)]), "t8": Table(columns=("a",), rows=[(1,)]), }, }, } ) t1 = tables.find(exp.table_(table="t1", db="db1", catalog="catalog1")) self.assertEqual(t1.columns, ("a",)) self.assertEqual(t1.rows, [(1,)]) t8 = tables.find(exp.table_(table="t8")) self.assertEqual(t1.columns, t8.columns) self.assertEqual(t1.rows, t8.rows) def test_static_queries(self): for sql, cols, rows in [ ("SELECT 1", ["1"], [(1,)]), ("SELECT 1 + 2 AS x", ["x"], [(3,)]), ("SELECT CONCAT('a', 'b') AS x", ["x"], [("ab",)]), ("SELECT CONCAT('a', 1) AS x", ["x"], [("a1",)]), ("SELECT 1 AS x, 2 AS y", ["x", "y"], [(1, 2)]), ("SELECT 'foo' LIMIT 1", ["foo"], [("foo",)]), ( "SELECT SUM(x), COUNT(x) FROM (SELECT 1 AS x WHERE FALSE)", ["_col_0", "_col_1"], [(None, 0)], ), ]: with self.subTest(sql): result = execute(sql) self.assertEqual(result.columns, tuple(cols)) self.assertEqual(result.rows, rows) def test_aggregate_without_group_by(self): result = execute("SELECT SUM(x) FROM t", tables={"t": [{"x": 1}, {"x": 2}]}) self.assertEqual(result.columns, ("_col_0",)) self.assertEqual(result.rows, [(3,)]) def test_scalar_functions(self): now = datetime.datetime.now() for sql, expected in [ ("CONCAT('a', 'b')", "ab"), ("CONCAT('a', NULL)", None), ("CONCAT_WS('_', 'a', 'b')", "a_b"), ("STR_POSITION('foobarbar', 'bar')", 4), ("STR_POSITION('foobarbar', 'bar', 5)", 7), ("STR_POSITION('foobarbar', NULL)", None), ("STR_POSITION(NULL, 'bar')", None), ("UPPER('foo')", "FOO"), ("UPPER(NULL)", None), ("LOWER('FOO')", "foo"), ("LOWER(NULL)", None), ("IFNULL('a', 'b')", "a"), ("IFNULL(NULL, 'b')", "b"), ("IFNULL(NULL, NULL)", None), ("SUBSTRING('12345')", "12345"), ("SUBSTRING('12345', 3)", "345"), ("SUBSTRING('12345', 3, 0)", ""), ("SUBSTRING('12345', 3, 1)", "3"), ("SUBSTRING('12345', 3, 2)", "34"), ("SUBSTRING('12345', 3, 3)", "345"), ("SUBSTRING('12345', 3, 4)", "345"), ("SUBSTRING('12345', -3)", "345"), ("SUBSTRING('12345', -3, 0)", ""), ("SUBSTRING('12345', -3, 1)", "3"), ("SUBSTRING('12345', -3, 2)", "34"), ("SUBSTRING('12345', 0)", ""), ("SUBSTRING('12345', 0, 1)", ""), ("SUBSTRING(NULL)", None), ("SUBSTRING(NULL, 1)", None), ("CAST(1 AS TEXT)", "1"), ("CAST('1' AS LONG)", 1), ("CAST('1.1' AS FLOAT)", 1.1), ("CAST('12:05:01' AS TIME)", time(12, 5, 1)), ("COALESCE(NULL)", None), ("COALESCE(NULL, NULL)", None), ("COALESCE(NULL, 'b')", "b"), ("COALESCE('a', 'b')", "a"), ("1 << 1", 2), ("1 >> 1", 0), ("1 & 1", 1), ("1 | 1", 1), ("1 < 1", False), ("1 <= 1", True), ("1 > 1", False), ("1 >= 1", True), ("1 + NULL", None), ("IF(true, 1, 0)", 1), ("IF(false, 1, 0)", 0), ("CASE WHEN 0 = 1 THEN 'foo' ELSE 'bar' END", "bar"), ("CAST('2022-01-01' AS DATE) + INTERVAL '1' DAY", date(2022, 1, 2)), ("INTERVAL '1' week", datetime.timedelta(weeks=1)), ("1 IN (1, 2, 3)", True), ("1 IN (2, 3)", False), ("1 IN (1)", True), ("NULL IS NULL", True), ("NULL IS NOT NULL", False), ("NULL = NULL", None), ("NULL <> NULL", None), ("YEAR(CURRENT_TIMESTAMP)", now.year), ("MONTH(CURRENT_TIME)", now.month), ("DAY(CURRENT_DATETIME())", now.day), ("YEAR(CURRENT_DATE())", now.year), ("MONTH(CURRENT_DATE())", now.month), ("DAY(CURRENT_DATE())", now.day), ("YEAR(CURRENT_TIMESTAMP) + 1", now.year + 1), ( "YEAR(CURRENT_TIMESTAMP) IN (YEAR(CURRENT_TIMESTAMP) + 1, YEAR(CURRENT_TIMESTAMP) * 10)", False, ), ("YEAR(CURRENT_TIMESTAMP) = (YEAR(CURRENT_TIMESTAMP))", True), ("YEAR(CURRENT_TIMESTAMP) <> (YEAR(CURRENT_TIMESTAMP))", False), ("YEAR(CURRENT_DATE()) + 1", now.year + 1), ( "YEAR(CURRENT_DATE()) IN (YEAR(CURRENT_DATE()) + 1, YEAR(CURRENT_DATE()) * 10)", False, ), ("YEAR(CURRENT_DATE()) = (YEAR(CURRENT_DATE()))", True), ("YEAR(CURRENT_DATE()) <> (YEAR(CURRENT_DATE()))", False), ("1::bool", True), ("0::bool", False), ("MAP(['a'], [1]).a", 1), ("MAP()", {}), ("STRFTIME('%j', '2023-03-23 15:00:00')", "082"), ("STRFTIME('%j', NULL)", None), ("DATESTRTODATE('2022-01-01')", date(2022, 1, 1)), ("TIMESTRTOTIME('2022-01-01')", datetime.datetime(2022, 1, 1)), ("LEFT('12345', 3)", "123"), ("RIGHT('12345', 3)", "345"), ("DATEDIFF('2022-01-03'::date, '2022-01-01'::TIMESTAMP::DATE)", 2), ("TRIM(' foo ')", "foo"), ("TRIM('afoob', 'ab')", "foo"), ("ARRAY_JOIN(['foo', 'bar'], ':')", "foo:bar"), ("ARRAY_JOIN(['hello', null ,'world'], ' ', ',')", "hello , world"), ("ARRAY_JOIN(['', null ,'world'], ' ', ',')", " , world"), ("STRUCT('foo', 'bar', null, null)", {"foo": "bar"}), ("ROUND(1.5)", 2), ("ROUND(1.2)", 1), ("ROUND(1.2345, 2)", 1.23), ("ROUND(NULL)", None), ( "UNIXTOTIME(1659981729)", datetime.datetime(2022, 8, 8, 18, 2, 9, tzinfo=datetime.timezone.utc), ), ("TIMESTRTOTIME('2013-04-05 01:02:03')", datetime.datetime(2013, 4, 5, 1, 2, 3)), ( "UNIXTOTIME(40 * 365 * 86400)", datetime.datetime(2009, 12, 22, 00, 00, 00, tzinfo=datetime.timezone.utc), ), ( "STRTOTIME('08/03/2024 12:34:56', '%d/%m/%Y %H:%M:%S')", datetime.datetime(2024, 3, 8, 12, 34, 56), ), ("STRTOTIME('27/01/2024', '%d/%m/%Y')", datetime.datetime(2024, 1, 27)), ]: with self.subTest(sql): result = execute(f"SELECT {sql}") self.assertEqual(result.rows, [(expected,)]) result = execute( "WITH t AS (SELECT 'a' AS c1, 'b' AS c2) SELECT NVL(c1, c2) FROM t", dialect="oracle", ) self.assertEqual(result.rows, [("a",)]) def test_case_sensitivity(self): result = execute("SELECT A AS A FROM X", tables={"x": [{"a": 1}]}) self.assertEqual(result.columns, ("a",)) self.assertEqual(result.rows, [(1,)]) result = execute('SELECT A AS "A" FROM X', tables={"x": [{"a": 1}]}) self.assertEqual(result.columns, ("A",)) self.assertEqual(result.rows, [(1,)]) def test_nested_table_reference(self): tables = { "some_catalog": { "some_schema": { "some_table": [ {"id": 1, "price": 1.0}, {"id": 2, "price": 2.0}, {"id": 3, "price": 3.0}, ] } } } result = execute("SELECT * FROM some_catalog.some_schema.some_table s", tables=tables) self.assertEqual(result.columns, ("id", "price")) self.assertEqual(result.rows, [(1, 1.0), (2, 2.0), (3, 3.0)]) def test_group_by(self): tables = { "x": [ {"a": 1, "b": 10}, {"a": 2, "b": 20}, {"a": 3, "b": 28}, {"a": 2, "b": 25}, {"a": 1, "b": 40}, ], } for sql, expected, columns in ( ( "SELECT a, AVG(b) FROM x GROUP BY a ORDER BY AVG(b)", [(2, 22.5), (1, 25.0), (3, 28.0)], ("a", "_col_1"), ), ( "SELECT a, AVG(b) FROM x GROUP BY a having avg(b) > 23", [(1, 25.0), (3, 28.0)], ("a", "_col_1"), ), ( "SELECT a, AVG(b) FROM x GROUP BY a having avg(b + 1) > 23", [(1, 25.0), (2, 22.5), (3, 28.0)], ("a", "_col_1"), ), ( "SELECT a, AVG(b) FROM x GROUP BY a having sum(b) + 5 > 50", [(1, 25.0)], ("a", "_col_1"), ), ( "SELECT a + 1 AS a, AVG(b + 1) FROM x GROUP BY a + 1 having AVG(b + 1) > 26", [(4, 29.0)], ("a", "_col_1"), ), ( "SELECT a, avg(b) FROM x GROUP BY a HAVING a = 1", [(1, 25.0)], ("a", "_col_1"), ), ( "SELECT a + 1, avg(b) FROM x GROUP BY a + 1 HAVING a + 1 = 2", [(2, 25.0)], ("_col_0", "_col_1"), ), ( "SELECT a FROM x GROUP BY a ORDER BY AVG(b)", [(2,), (1,), (3,)], ("a",), ), ( "SELECT a, SUM(b) FROM x GROUP BY a ORDER BY COUNT(*)", [(3, 28), (1, 50), (2, 45)], ("a", "_col_1"), ), ( "SELECT a, SUM(b) FROM x GROUP BY a ORDER BY COUNT(*) DESC", [(1, 50), (2, 45), (3, 28)], ("a", "_col_1"), ), ( "SELECT a, ARRAY_UNIQUE_AGG(b) FROM x GROUP BY a", [(1, [40, 10]), (2, [25, 20]), (3, [28])], ("a", "_col_1"), ), ): with self.subTest(sql): result = execute(sql, tables=tables) self.assertEqual(result.columns, columns) self.assertEqual(result.rows, expected) def test_nested_values(self): tables = {"foo": [{"raw": {"name": "Hello, World", "a": [{"b": 1}]}}]} result = execute("SELECT raw:name AS name FROM foo", dialect="snowflake", tables=tables) self.assertEqual(result.columns, ("NAME",)) self.assertEqual(result.rows, [("Hello, World",)]) result = execute("SELECT raw:a[0].b AS b FROM foo", dialect="snowflake", tables=tables) self.assertEqual(result.columns, ("B",)) self.assertEqual(result.rows, [(1,)]) result = execute("SELECT raw:a[1].b AS b FROM foo", dialect="snowflake", tables=tables) self.assertEqual(result.columns, ("B",)) self.assertEqual(result.rows, [(None,)]) result = execute("SELECT raw:a[0].c AS c FROM foo", dialect="snowflake", tables=tables) self.assertEqual(result.columns, ("C",)) self.assertEqual(result.rows, [(None,)]) tables = { '"ITEM"': [ {"id": 1, "attributes": {"flavor": "cherry", "taste": "sweet"}}, {"id": 2, "attributes": {"flavor": "lime", "taste": "sour"}}, {"id": 3, "attributes": {"flavor": "apple", "taste": None}}, ] } result = execute( "SELECT i.attributes.flavor FROM `ITEM` i", dialect="bigquery", tables=tables ) self.assertEqual(result.columns, ("flavor",)) self.assertEqual(result.rows, [("cherry",), ("lime",), ("apple",)]) tables = {"t": [{"x": [1, 2, 3]}]} result = execute("SELECT x FROM t", dialect="duckdb", tables=tables) self.assertEqual(result.columns, ("x",)) self.assertEqual(result.rows, [([1, 2, 3],)]) def test_agg_order(self): plan = Plan( optimize(""" SELECT AVG(bill_length_mm) AS avg_bill_length, AVG(bill_depth_mm) AS avg_bill_depth FROM penguins """) ) assert [agg.alias for agg in plan.root.aggregations] == [ "avg_bill_length", "avg_bill_depth", ] def test_table_to_pylist(self): columns = ["id", "product", "price"] rows = [[1, "Shirt", 20.0], [2, "Shoes", 60.0]] table = Table(columns=columns, rows=rows) expected = [ {"id": 1, "product": "Shirt", "price": 20.0}, {"id": 2, "product": "Shoes", "price": 60.0}, ] self.assertEqual(table.to_pylist(), expected) ================================================ FILE: tests/test_expressions.py ================================================ import datetime import math import sys import unittest from sqlglot import ParseError, alias, exp, parse_one class TestExprs(unittest.TestCase): maxDiff = None def test_to_s(self): self.assertEqual(repr(parse_one("5")), "Literal(this=5, is_string=False)") self.assertEqual(repr(parse_one("5.3")), "Literal(this=5.3, is_string=False)") self.assertEqual(repr(parse_one("True")), "Boolean(this=True)") self.assertEqual(repr(parse_one("' x'")), "Literal(this=' x', is_string=True)") self.assertEqual(repr(parse_one("' \n x'")), "Literal(this=' \\n x', is_string=True)") self.assertEqual( repr(parse_one(" x ")), "Column(\n this=Identifier(this=x, quoted=False))" ) self.assertEqual( repr(parse_one('" x "')), "Column(\n this=Identifier(this=' x ', quoted=True))" ) def test_arg_key(self): self.assertEqual(parse_one("sum(1)").find(exp.Literal).arg_key, "this") def test_depth(self): self.assertEqual(parse_one("x(1)").find(exp.Literal).depth, 1) def test_iter(self): self.assertEqual([exp.Literal.number(1), exp.Literal.number(2)], list(parse_one("[1, 2]"))) with self.assertRaises(TypeError): for x in parse_one("1"): pass def test_eq(self): query = parse_one("SELECT x FROM t") self.assertEqual(query, query.copy()) self.assertNotEqual(exp.to_identifier("a"), exp.to_identifier("A")) self.assertEqual( exp.Column(table=exp.to_identifier("b"), this=exp.to_identifier("b")), exp.Column(this=exp.to_identifier("b"), table=exp.to_identifier("b")), ) self.assertNotEqual(exp.to_identifier("a", quoted=True), exp.to_identifier("A")) self.assertNotEqual(exp.to_identifier("A", quoted=True), exp.to_identifier("A")) self.assertNotEqual( exp.to_identifier("A", quoted=True), exp.to_identifier("a", quoted=True) ) self.assertNotEqual(parse_one("'x'"), parse_one("'X'")) self.assertNotEqual(parse_one("'1'"), parse_one("1")) self.assertEqual(parse_one("`a`", read="hive"), parse_one('"a"')) self.assertEqual(parse_one("`a`", read="hive"), parse_one('"a" ')) self.assertEqual(parse_one("`a`.`b`", read="hive"), parse_one('"a"."b"')) self.assertEqual(parse_one("select a, b+1"), parse_one("SELECT a, b + 1")) self.assertNotEqual(parse_one("`a`.`b`.`c`", read="hive"), parse_one("a.b.c")) self.assertNotEqual(parse_one("a.b.c.d", read="hive"), parse_one("a.b.c")) self.assertEqual(parse_one("a.b.c.d", read="hive"), parse_one("a.b.c.d")) self.assertEqual(parse_one("a + b * c - 1.0"), parse_one("a+b*c-1.0")) self.assertNotEqual(parse_one("a + b * c - 1.0"), parse_one("a + b * c + 1.0")) self.assertEqual(parse_one("a as b"), parse_one("a AS b")) self.assertNotEqual(parse_one("a as b"), parse_one("a")) self.assertEqual( parse_one("ROW() OVER(Partition by y)"), parse_one("ROW() OVER (partition BY y)"), ) self.assertEqual(exp.Table(pivots=[]), exp.Table()) self.assertNotEqual(exp.Table(pivots=[None]), exp.Table()) self.assertEqual( exp.DataType.build("int"), exp.DataType(this=exp.DataType.Type.INT, nested=False) ) self.assertNotEqual( exp.Identifier(this="a", temporary=True), exp.Identifier(this="a"), ) def test_eq_on_same_instance_short_circuits(self): expr = parse_one("1") expr == expr self.assertIsNone(expr._hash) def test_find(self): expression = parse_one("CREATE TABLE x STORED AS PARQUET AS SELECT * FROM y") self.assertTrue(expression.find(exp.Create)) self.assertFalse(expression.find(exp.Group)) self.assertEqual( [table.name for table in expression.find_all(exp.Table)], ["x", "y"], ) def test_find_all(self): expression = parse_one( """ SELECT * FROM ( SELECT b.* FROM a.b b ) x JOIN ( SELECT c.foo FROM a.c c WHERE foo = 1 ) y ON x.c = y.foo CROSS JOIN ( SELECT * FROM ( SELECT d.bar FROM d ) nested ) z ON x.c = y.foo """ ) self.assertEqual( [table.name for table in expression.find_all(exp.Table)], ["b", "c", "d"], ) expression = parse_one("select a + b + c + d") self.assertEqual( [column.name for column in expression.find_all(exp.Column)], ["d", "c", "a", "b"], ) self.assertEqual( [column.name for column in expression.find_all(exp.Column, bfs=False)], ["a", "b", "c", "d"], ) def test_find_ancestor(self): column = parse_one("select * from foo where (a + 1 > 2)").find(exp.Column) self.assertIsInstance(column, exp.Column) self.assertIsInstance(column.parent_select, exp.Select) self.assertIsNone(column.find_ancestor(exp.Join)) def test_to_dot(self): orig = parse_one('a.b.c."d".e.f') self.assertEqual(".".join(str(p) for p in orig.parts), 'a.b.c."d".e.f') self.assertEqual( ".".join( str(p) for p in exp.Dot.build( [ exp.to_table("a.b.c"), exp.to_identifier("d"), exp.to_identifier("e"), exp.to_identifier("f"), ] ).parts ), "a.b.c.d.e.f", ) self.assertEqual(".".join(str(p) for p in orig.parts), 'a.b.c."d".e.f') column = orig.find(exp.Column) dot = column.to_dot() self.assertEqual(dot.sql(), 'a.b.c."d".e.f') self.assertEqual( dot, exp.Dot( this=exp.Dot( this=exp.Dot( this=exp.Dot( this=exp.Dot( this=exp.to_identifier("a"), expression=exp.to_identifier("b"), ), expression=exp.to_identifier("c"), ), expression=exp.to_identifier("d", quoted=True), ), expression=exp.to_identifier("e"), ), expression=exp.to_identifier("f"), ), ) def test_root(self): ast = parse_one("select * from (select a from x)") self.assertIs(ast, ast.root()) self.assertIs(ast, ast.find(exp.Column).root()) def test_alias_or_name(self): expression = parse_one( "SELECT a, b AS B, c + d AS e, *, 'zz', 'zz' AS z FROM foo as bar, baz" ) self.assertEqual( [e.alias_or_name for e in expression.expressions], ["a", "B", "e", "*", "zz", "z"], ) self.assertEqual( {e.alias_or_name for e in expression.find_all(exp.Table)}, {"bar", "baz"}, ) expression = parse_one( """ WITH first AS (SELECT * FROM foo), second AS (SELECT * FROM bar) SELECT * FROM first, second, (SELECT * FROM baz) AS third """ ) self.assertEqual( [e.alias_or_name for e in expression.args["with_"].expressions], ["first", "second"], ) self.assertEqual("first", expression.args["from_"].alias_or_name) self.assertEqual( [e.alias_or_name for e in expression.args["joins"]], ["second", "third"], ) self.assertEqual(parse_one("x.*").name, "*") self.assertEqual(parse_one("NULL").name, "NULL") self.assertEqual(parse_one("a.b.c").name, "c") def test_table_name(self): bq_dashed_table = exp.to_table("a-1.b.c", dialect="bigquery") self.assertEqual(exp.table_name(bq_dashed_table), '"a-1".b.c') self.assertEqual(exp.table_name(bq_dashed_table, dialect="bigquery"), "`a-1`.b.c") self.assertEqual(exp.table_name("a-1.b.c", dialect="bigquery"), "`a-1`.b.c") self.assertEqual(exp.table_name(parse_one("a", into=exp.Table)), "a") self.assertEqual(exp.table_name(parse_one("a.b", into=exp.Table)), "a.b") self.assertEqual(exp.table_name(parse_one("a.b.c", into=exp.Table)), "a.b.c") self.assertEqual(exp.table_name("a.b.c"), "a.b.c") self.assertEqual(exp.table_name(exp.to_table("a.b.c.d.e", dialect="bigquery")), "a.b.c.d.e") self.assertEqual(exp.table_name(exp.to_table("'@foo'", dialect="snowflake")), "'@foo'") self.assertEqual(exp.table_name(exp.to_table("@foo", dialect="snowflake")), "@foo") self.assertEqual(exp.table_name(bq_dashed_table, identify=True), '"a-1"."b"."c"') self.assertEqual( exp.table_name( parse_one("foo.`{bar,er}`", read="databricks", into=exp.Table), dialect="databricks" ), "foo.`{bar,er}`", ) self.assertEqual( exp.table_name(parse_one("/*c*/foo.bar", into=exp.Table), identify=True), '"foo"."bar"' ) def test_table(self): self.assertEqual(exp.table_("a", alias="b"), parse_one("select * from a b").find(exp.Table)) self.assertEqual(exp.table_("a", "").sql(), "a") self.assertEqual(exp.Table(db=exp.to_identifier("a")).sql(), "a") def test_replace_tables(self): self.assertEqual( exp.replace_tables( parse_one( 'select * from a AS a, b, c.a, d.a cross join e.a cross join "f-F"."A" cross join G' ), { "a": "a1", "b": "b.a", "c.a": "c.a2", "d.a": "d2", "`f-F`.`A`": '"F"', "g": "g1.a", }, dialect="bigquery", ).sql(), 'SELECT * FROM a1 AS a /* a */, b.a /* b */, c.a2 /* c.a */, d2 /* d.a */ CROSS JOIN e.a CROSS JOIN "F" /* f-F.A */ CROSS JOIN g1.a /* g */', ) self.assertEqual( exp.replace_tables( parse_one("select * from example.table", dialect="bigquery"), {"example.table": "`my-project.example.table`"}, dialect="bigquery", ).sql(), 'SELECT * FROM "my-project"."example"."table" /* example.table */', ) self.assertEqual( exp.replace_tables( parse_one("select * from example.table /* sqlglot.meta replace=false */"), {"example.table": "a.b"}, ).sql(), "SELECT * FROM example.table /* sqlglot.meta replace=false */", ) def test_expand(self): self.assertEqual( exp.expand( parse_one('select * from "a-b"."C" AS a'), { "`a-b`.`c`": parse_one("select 1"), }, dialect="spark", ).sql(), "SELECT * FROM (SELECT 1) AS a /* source: a-b.c */", ) def test_expand_with_lazy_source_provider(self): self.assertEqual( exp.expand( parse_one('select * from "a-b"."C" AS a'), {"`a-b`.c": lambda: parse_one("select 1", dialect="spark")}, dialect="spark", ).sql(), "SELECT * FROM (SELECT 1) AS a /* source: a-b.c */", ) def test_replace_placeholders(self): self.assertEqual( exp.replace_placeholders( parse_one("select * from :tbl1 JOIN :tbl2 ON :col1 = :str1 WHERE :col2 > :int1"), tbl1=exp.to_identifier("foo"), tbl2=exp.to_identifier("bar"), col1=exp.to_identifier("a"), col2=exp.to_identifier("c"), str1="b", int1=100, ).sql(), "SELECT * FROM foo JOIN bar ON a = 'b' WHERE c > 100", ) self.assertEqual( exp.replace_placeholders( parse_one("select * from ? JOIN ? ON ? = ? WHERE ? = 'bla'"), exp.to_identifier("foo"), exp.to_identifier("bar"), exp.to_identifier("a"), "b", "bla", ).sql(), "SELECT * FROM foo JOIN bar ON a = 'b' WHERE 'bla' = 'bla'", ) self.assertEqual( exp.replace_placeholders( parse_one("select * from ? WHERE ? > 100"), exp.to_identifier("foo"), ).sql(), "SELECT * FROM foo WHERE ? > 100", ) self.assertEqual( exp.replace_placeholders( parse_one("select * from :name WHERE ? > 100"), another_name="bla" ).sql(), "SELECT * FROM :name WHERE ? > 100", ) self.assertEqual( exp.replace_placeholders( parse_one("select * from (SELECT :col1 FROM ?) WHERE :col2 > ?"), exp.to_identifier("tbl1"), 100, "tbl3", col1=exp.to_identifier("a"), col2=exp.to_identifier("b"), col3="c", ).sql(), "SELECT * FROM (SELECT a FROM tbl1) WHERE b > 100", ) self.assertEqual( exp.replace_placeholders( parse_one("select * from foo WHERE x > ? AND y IS ?"), 0, False ).sql(), "SELECT * FROM foo WHERE x > 0 AND y IS FALSE", ) self.assertEqual( exp.replace_placeholders( parse_one("select * from foo WHERE x > :int1 AND y IS :bool1"), int1=0, bool1=False ).sql(), "SELECT * FROM foo WHERE x > 0 AND y IS FALSE", ) def test_function_building(self): self.assertEqual(exp.func("max", 1).sql(), "MAX(1)") self.assertEqual(exp.func("max", 1, 2).sql(), "MAX(1, 2)") self.assertEqual(exp.func("bla", 1, "foo").sql(), "BLA(1, foo)") self.assertEqual(exp.func("COUNT", exp.Star()).sql(), "COUNT(*)") self.assertEqual(exp.func("bloo").sql(), "BLOO()") self.assertEqual( exp.func("concat", exp.convert("a"), dialect="duckdb").sql("duckdb"), "CONCAT('a')" ) self.assertEqual( exp.func("locate", "'x'", "'xo'", dialect="hive").sql("hive"), "LOCATE('x', 'xo')" ) self.assertEqual( exp.func("log", exp.to_identifier("x"), 2, dialect="bigquery").sql("bigquery"), "LOG(x, 2)", ) self.assertEqual( exp.func("log", dialect="bigquery", expression="x", this=2).sql("bigquery"), "LOG(x, 2)", ) self.assertIsInstance(exp.func("instr", "x", "b", dialect="mysql"), exp.StrPosition) self.assertIsInstance(exp.func("instr", "x", "b", dialect="sqlite"), exp.StrPosition) self.assertIsInstance(exp.func("bla", 1, "foo"), exp.Anonymous) self.assertIsInstance( exp.func("cast", this=exp.Literal.number(5), to=exp.DataType.build("DOUBLE")), exp.Cast, ) with self.assertRaises(ValueError): exp.func("some_func", 1, arg2="foo") with self.assertRaises(ValueError): exp.func("abs") with self.assertRaises(ValueError) as cm: exp.func("to_hex", dialect="bigquery", this=5) self.assertEqual( str(cm.exception), "Unable to convert 'to_hex' into a Func. Either manually construct the Func " "expression of interest or parse the function call.", ) def test_named_selects(self): expression = parse_one( "SELECT a, b AS B, c + d AS e, *, 'zz', 'zz' AS z FROM foo as bar, baz" ) self.assertEqual(expression.named_selects, ["a", "B", "e", "*", "zz", "z"]) expression = parse_one( """ WITH first AS (SELECT * FROM foo) SELECT foo.bar, foo.baz as bazz, SUM(x) FROM first """ ) self.assertEqual(expression.named_selects, ["bar", "bazz"]) expression = parse_one( """ SELECT foo, bar FROM first UNION SELECT "ss" as foo, bar FROM second UNION ALL SELECT foo, bazz FROM third """ ) self.assertEqual(expression.named_selects, ["foo", "bar"]) def test_selects(self): expression = parse_one("SELECT FROM x") self.assertEqual(expression.selects, []) expression = parse_one("SELECT a FROM x") self.assertEqual([s.sql() for s in expression.selects], ["a"]) expression = parse_one("SELECT a, b FROM x") self.assertEqual([s.sql() for s in expression.selects], ["a", "b"]) expression = parse_one("(SELECT a, b FROM x)") self.assertEqual([s.sql() for s in expression.selects], ["a", "b"]) def test_alias_column_names(self): expression = parse_one("SELECT * FROM (SELECT * FROM x) AS y") subquery = expression.find(exp.Subquery) self.assertEqual(subquery.alias_column_names, []) expression = parse_one("SELECT * FROM (SELECT * FROM x) AS y(a)") subquery = expression.find(exp.Subquery) self.assertEqual(subquery.alias_column_names, ["a"]) expression = parse_one("SELECT * FROM (SELECT * FROM x) AS y(a, b)") subquery = expression.find(exp.Subquery) self.assertEqual(subquery.alias_column_names, ["a", "b"]) expression = parse_one("WITH y AS (SELECT * FROM x) SELECT * FROM y") cte = expression.find(exp.CTE) self.assertEqual(cte.alias_column_names, []) expression = parse_one("WITH y(a, b) AS (SELECT * FROM x) SELECT * FROM y") cte = expression.find(exp.CTE) self.assertEqual(cte.alias_column_names, ["a", "b"]) expression = parse_one("SELECT * FROM tbl AS tbl(a, b)") table = expression.find(exp.Table) self.assertEqual(table.alias_column_names, ["a", "b"]) def test_cast(self): expression = parse_one("CAST(x AS DATE)") self.assertIs(expression.type, expression.to) expression = parse_one("select cast(x as DATE)") casts = list(expression.find_all(exp.Cast)) self.assertEqual(len(casts), 1) cast = casts[0] self.assertTrue(cast.to.is_type(exp.DataType.Type.DATE)) # check that already cast values arent re-cast if wrapped in a cast to the same type recast = exp.cast(cast, to=exp.DataType.Type.DATE) self.assertEqual(recast, cast) self.assertEqual(recast.sql(), "CAST(x AS DATE)") # however, recasting is fine if the types are different recast = exp.cast(cast, to=exp.DataType.Type.VARCHAR) self.assertNotEqual(recast, cast) self.assertEqual(len(list(recast.find_all(exp.Cast))), 2) self.assertEqual(recast.sql(), "CAST(CAST(x AS DATE) AS VARCHAR)") # check that dialect is used when casting strings self.assertEqual( exp.cast("x", to="regtype", dialect="postgres").sql(), "CAST(x AS REGTYPE)" ) self.assertEqual(exp.cast("`x`", to="date", dialect="hive").sql(), 'CAST("x" AS DATE)') def test_ctes(self): expression = parse_one("SELECT a FROM x") self.assertEqual(expression.ctes, []) expression = parse_one("WITH x AS (SELECT a FROM y) SELECT a FROM x") self.assertEqual([s.sql() for s in expression.ctes], ["x AS (SELECT a FROM y)"]) def test_hash(self): self.assertEqual( { parse_one("select a.b"), parse_one("1+2"), parse_one('"a"."b"'), parse_one("a.b.c.d"), }, { parse_one("select a.b"), parse_one("1+2"), parse_one('"a"."b"'), parse_one("a.b.c.d"), }, ) def test_sql(self): self.assertEqual(parse_one("x + y * 2").sql(), "x + y * 2") self.assertEqual(parse_one('select "x"').sql(dialect="hive", pretty=True), "SELECT\n `x`") self.assertEqual(parse_one("X + y").sql(identify=True, normalize=True), '"x" + "y"') self.assertEqual(parse_one('"X" + Y').sql(identify=True, normalize=True), '"X" + "y"') self.assertEqual(parse_one("SUM(X)").sql(identify=True, normalize=True), 'SUM("x")') def test_transform_with_arguments(self): expression = parse_one("a") def fun(node, alias_=True): if alias_: return parse_one("a AS a") return node transformed_expression = expression.transform(fun) self.assertEqual(transformed_expression.sql(dialect="presto"), "a AS a") transformed_expression_2 = expression.transform(fun, alias_=False) self.assertEqual(transformed_expression_2.sql(dialect="presto"), "a") def test_transform_simple(self): expression = parse_one("IF(a > 0, a, b)") def fun(node): if isinstance(node, exp.Column) and node.name == "a": return parse_one("c - 2") return node actual_expression_1 = expression.transform(fun) self.assertEqual(actual_expression_1.sql(dialect="presto"), "IF(c - 2 > 0, c - 2, b)") self.assertIsNot(actual_expression_1, expression) actual_expression_2 = expression.transform(fun, copy=False) self.assertEqual(actual_expression_2.sql(dialect="presto"), "IF(c - 2 > 0, c - 2, b)") self.assertIs(actual_expression_2, expression) def test_transform_no_infinite_recursion(self): expression = parse_one("a") def fun(node): if isinstance(node, exp.Column) and node.name == "a": return parse_one("FUN(a)") return node self.assertEqual(expression.transform(fun).sql(), "FUN(a)") def test_transform_with_parent_mutation(self): expression = parse_one("SELECT COUNT(1) FROM table") def fun(node): if str(node) == "COUNT(1)": # node gets silently mutated here - its parent points to the filter node return exp.Filter(this=node, expression=exp.Where(this=exp.true())) return node transformed = expression.transform(fun) self.assertEqual(transformed.sql(), "SELECT COUNT(1) FILTER(WHERE TRUE) FROM table") def test_transform_multiple_children(self): expression = parse_one("SELECT * FROM x") def fun(node): if isinstance(node, exp.Star): return [parse_one(c) for c in ["a", "b"]] return node self.assertEqual(expression.transform(fun).sql(), "SELECT a, b FROM x") def test_transform_node_removal(self): expression = parse_one("SELECT a, b FROM x") def remove_column_b(node): if isinstance(node, exp.Column) and node.name == "b": return None return node self.assertEqual(expression.transform(remove_column_b).sql(), "SELECT a FROM x") expression = parse_one("CAST(x AS FLOAT)") def remove_non_list_arg(node): if isinstance(node, exp.DataType): return None return node self.assertEqual(expression.transform(remove_non_list_arg).sql(), "CAST(x AS)") expression = parse_one("SELECT a, b FROM x") def remove_all_columns(node): if isinstance(node, exp.Column): return None return node self.assertEqual(expression.transform(remove_all_columns).sql(), "SELECT FROM x") def test_replace(self): expression = parse_one("SELECT a, b FROM x") expression.find(exp.Column).replace(parse_one("c")) self.assertEqual(expression.sql(), "SELECT c, b FROM x") expression.find(exp.Table).replace(parse_one("y")) self.assertEqual(expression.sql(), "SELECT c, b FROM y") # we try to replace a with a list but a's parent is actually ordered, not the ORDER BY node expression = parse_one("SELECT * FROM x ORDER BY a DESC, c") expression.find(exp.Ordered).this.replace([exp.column("a").asc(), exp.column("b").desc()]) self.assertEqual(expression.sql(), "SELECT * FROM x ORDER BY a, b DESC, c") def test_arg_deletion(self): # Using the pop helper method expression = parse_one("SELECT a, b FROM x") expression.find(exp.Column).pop() self.assertEqual(expression.sql(), "SELECT b FROM x") expression.find(exp.Column).pop() self.assertEqual(expression.sql(), "SELECT FROM x") expression.pop() self.assertEqual(expression.sql(), "SELECT FROM x") expression = parse_one("WITH x AS (SELECT a FROM x) SELECT * FROM x") expression.find(exp.With).pop() self.assertEqual(expression.sql(), "SELECT * FROM x") # Manually deleting by setting to None expression = parse_one("SELECT * FROM foo JOIN bar") self.assertEqual(len(expression.args.get("joins", [])), 1) expression.set("joins", None) self.assertEqual(expression.sql(), "SELECT * FROM foo") self.assertEqual(expression.args.get("joins", []), []) self.assertIsNone(expression.args.get("joins")) def test_walk(self): expression = parse_one("SELECT * FROM (SELECT * FROM x)") self.assertEqual(len(list(expression.walk())), 9) self.assertEqual(len(list(expression.walk(bfs=False))), 9) self.assertTrue(all(isinstance(e, exp.Expr) for e in expression.walk())) self.assertTrue(all(isinstance(e, exp.Expr) for e in expression.walk(bfs=False))) def test_str_position_order(self): str_position_exp = parse_one("STR_POSITION('mytest', 'test')") self.assertIsInstance(str_position_exp, exp.StrPosition) self.assertEqual(str_position_exp.args.get("this").this, "mytest") self.assertEqual(str_position_exp.args.get("substr").this, "test") def test_functions(self): self.assertIsInstance(parse_one("x LIKE ANY (y)"), exp.Like) self.assertIsInstance(parse_one("x ILIKE ANY (y)"), exp.ILike) self.assertIsInstance(parse_one("ABS(a)"), exp.Abs) self.assertIsInstance(parse_one("APPROX_DISTINCT(a)"), exp.ApproxDistinct) self.assertIsInstance(parse_one("ARRAY(a)"), exp.Array) self.assertIsInstance(parse_one("ARRAY_AGG(a)"), exp.ArrayAgg) self.assertIsInstance(parse_one("ARRAY_CONTAINS(a, 'a')"), exp.ArrayContains) self.assertIsInstance(parse_one("ARRAY_SIZE(a)"), exp.ArraySize) self.assertIsInstance(parse_one("ARRAY_INTERSECTION([1, 2], [2, 3])"), exp.ArrayIntersect) self.assertIsInstance(parse_one("ARRAY_INTERSECT([1, 2], [2, 3])"), exp.ArrayIntersect) self.assertIsInstance(parse_one("AVG(a)"), exp.Avg) self.assertIsInstance(parse_one("BEGIN DEFERRED TRANSACTION"), exp.Transaction) self.assertIsInstance(parse_one("CEIL(a)"), exp.Ceil) self.assertIsInstance(parse_one("CEILING(a)"), exp.Ceil) self.assertIsInstance(parse_one("COALESCE(a, b)"), exp.Coalesce) self.assertIsInstance(parse_one("COMMIT"), exp.Commit) self.assertIsInstance(parse_one("COUNT(a)"), exp.Count) self.assertIsInstance(parse_one("COUNT_IF(a > 0)"), exp.CountIf) self.assertIsInstance(parse_one("DATE_ADD(a, 1)"), exp.DateAdd) self.assertIsInstance(parse_one("DATE_DIFF(a, 2)"), exp.DateDiff) self.assertIsInstance(parse_one("DATE_STR_TO_DATE(a)"), exp.DateStrToDate) self.assertIsInstance(parse_one("TS_OR_DS_TO_TIME(a)"), exp.TsOrDsToTime) self.assertIsInstance(parse_one("DAY(a)"), exp.Day) self.assertIsInstance(parse_one("EXP(a)"), exp.Exp) self.assertIsInstance(parse_one("FLOOR(a)"), exp.Floor) self.assertIsInstance(parse_one("GENERATE_SERIES(a, b, c)"), exp.GenerateSeries) self.assertIsInstance(parse_one("GLOB(x, y)"), exp.Glob) self.assertIsInstance(parse_one("GREATEST(a, b)"), exp.Greatest) self.assertIsInstance(parse_one("IF(a, b, c)"), exp.If) self.assertIsInstance(parse_one("INITCAP(a)"), exp.Initcap) self.assertIsInstance(parse_one("JSON_EXTRACT(a, '$.name')"), exp.JSONExtract) self.assertIsInstance(parse_one("JSON_EXTRACT_SCALAR(a, '$.name')"), exp.JSONExtractScalar) self.assertIsInstance(parse_one("LEAST(a, b)"), exp.Least) self.assertIsInstance(parse_one("LIKE(x, y)"), exp.Like) self.assertIsInstance(parse_one("LN(a)"), exp.Ln) self.assertIsInstance(parse_one("LOG(b, n)"), exp.Log) self.assertIsInstance(parse_one("LOG2(a)"), exp.Log) self.assertIsInstance(parse_one("LOG10(a)"), exp.Log) self.assertIsInstance(parse_one("MAX(a)"), exp.Max) self.assertIsInstance(parse_one("MIN(a)"), exp.Min) self.assertIsInstance(parse_one("MONTH(a)"), exp.Month) self.assertIsInstance(parse_one("QUARTER(a)"), exp.Quarter) self.assertIsInstance(parse_one("POSITION(' ' IN a)"), exp.StrPosition) self.assertIsInstance(parse_one("POW(a, 2)"), exp.Pow) self.assertIsInstance(parse_one("POWER(a, 2)"), exp.Pow) self.assertIsInstance(parse_one("QUANTILE(a, 0.90)"), exp.Quantile) self.assertIsInstance(parse_one("REGEXP_LIKE(a, 'test')"), exp.RegexpLike) self.assertIsInstance(parse_one("REGEXP_SPLIT(a, 'test')"), exp.RegexpSplit) self.assertIsInstance(parse_one("ROLLBACK"), exp.Rollback) self.assertIsInstance(parse_one("ROUND(a)"), exp.Round) self.assertIsInstance(parse_one("ROUND(a, 2)"), exp.Round) self.assertIsInstance(parse_one("SPLIT(a, 'test')"), exp.Split) self.assertIsInstance(parse_one("ST_POINT(10, 20)"), exp.StPoint) self.assertIsInstance(parse_one("ST_DISTANCE(a, b)"), exp.StDistance) self.assertIsInstance(parse_one("STR_POSITION(a, 'test')"), exp.StrPosition) self.assertIsInstance(parse_one("STR_TO_UNIX(a, 'format')"), exp.StrToUnix) self.assertIsInstance(parse_one("STRUCT_EXTRACT(a, 'test')"), exp.StructExtract) self.assertIsInstance(parse_one("SUBSTR('a', 1, 1)"), exp.Substring) self.assertIsInstance(parse_one("SUBSTRING('a', 1, 1)"), exp.Substring) self.assertIsInstance(parse_one("SUM(a)"), exp.Sum) self.assertIsInstance(parse_one("SQRT(a)"), exp.Sqrt) self.assertIsInstance(parse_one("STDDEV(a)"), exp.Stddev) self.assertIsInstance(parse_one("STDDEV_POP(a)"), exp.StddevPop) self.assertIsInstance(parse_one("STDDEV_SAMP(a)"), exp.StddevSamp) self.assertIsInstance(parse_one("TIME_TO_STR(a, 'format')"), exp.TimeToStr) self.assertIsInstance(parse_one("TIME_TO_TIME_STR(a)"), exp.Cast) self.assertIsInstance(parse_one("TIME_TO_UNIX(a)"), exp.TimeToUnix) self.assertIsInstance(parse_one("TIME_STR_TO_DATE(a)"), exp.TimeStrToDate) self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a)"), exp.TimeStrToTime) self.assertIsInstance(parse_one("TIME_STR_TO_TIME(a, 'some_zone')"), exp.TimeStrToTime) self.assertIsInstance(parse_one("TIME_STR_TO_UNIX(a)"), exp.TimeStrToUnix) self.assertIsInstance(parse_one("TRIM(LEADING 'b' FROM 'bla')"), exp.Trim) self.assertIsInstance(parse_one("TS_OR_DS_ADD(a, 1, 'day')"), exp.TsOrDsAdd) self.assertIsInstance(parse_one("TS_OR_DS_TO_DATE(a)"), exp.TsOrDsToDate) self.assertIsInstance(parse_one("TS_OR_DS_TO_DATE_STR(a)"), exp.Substring) self.assertIsInstance(parse_one("UNIX_TO_STR(a, 'format')"), exp.UnixToStr) self.assertIsInstance(parse_one("UNIX_TO_TIME(a)"), exp.UnixToTime) self.assertIsInstance(parse_one("UNIX_TO_TIME_STR(a)"), exp.UnixToTimeStr) self.assertIsInstance(parse_one("VARIANCE(a)"), exp.Variance) self.assertIsInstance(parse_one("VARIANCE_POP(a)"), exp.VariancePop) self.assertIsInstance(parse_one("YEAR(a)"), exp.Year) self.assertIsInstance(parse_one("HLL(a)"), exp.Hll) self.assertIsInstance(parse_one("ARRAY(time, foo)"), exp.Array) self.assertIsInstance(parse_one("STANDARD_HASH('hello', 'sha256')"), exp.StandardHash) self.assertIsInstance(parse_one("DATE(foo)"), exp.Date) self.assertIsInstance(parse_one("HEX(foo)"), exp.Hex) self.assertIsInstance(parse_one("LOWER(HEX(foo))"), exp.LowerHex) self.assertIsInstance(parse_one("TO_HEX(foo)", read="bigquery"), exp.LowerHex) self.assertIsInstance(parse_one("UPPER(TO_HEX(foo))", read="bigquery"), exp.Hex) self.assertIsInstance(parse_one("TO_HEX(MD5(foo))", read="bigquery"), exp.MD5) self.assertIsInstance(parse_one("TRANSFORM(a, b)", read="spark"), exp.Transform) self.assertIsInstance(parse_one("ADD_MONTHS(a, b)"), exp.AddMonths) ast = parse_one("GREATEST(a, b, c)") self.assertIsInstance(ast.expressions, list) self.assertEqual(len(ast.expressions), 2) def test_column(self): column = exp.column(exp.Star(), table="t") self.assertEqual(column.sql(), "t.*") column = parse_one("a.b.c.d") self.assertEqual(column.catalog, "a") self.assertEqual(column.db, "b") self.assertEqual(column.table, "c") self.assertEqual(column.name, "d") column = parse_one("a") self.assertEqual(column.name, "a") self.assertEqual(column.table, "") fields = parse_one("a.b.c.d.e") self.assertIsInstance(fields, exp.Dot) self.assertEqual(fields.text("expression"), "e") column = fields.find(exp.Column) self.assertEqual(column.name, "d") self.assertEqual(column.table, "c") self.assertEqual(column.db, "b") self.assertEqual(column.catalog, "a") column = parse_one("a[0].b") self.assertIsInstance(column, exp.Dot) self.assertIsInstance(column.this, exp.Bracket) self.assertIsInstance(column.this.this, exp.Column) column = parse_one("a.*") self.assertIsInstance(column, exp.Column) self.assertIsInstance(column.this, exp.Star) self.assertIsInstance(column.args["table"], exp.Identifier) self.assertEqual(column.table, "a") self.assertIsInstance(parse_one("*"), exp.Star) self.assertEqual(exp.column("a", table="b", db="c", catalog="d"), exp.to_column("d.c.b.a")) dot = exp.column("d", "c", "b", "a", fields=["e", "f"]) self.assertIsInstance(dot, exp.Dot) self.assertEqual(dot.sql(), "a.b.c.d.e.f") dot = exp.column("d", "c", "b", "a", fields=["e", "f"], quoted=True) self.assertEqual(dot.sql(), '"a"."b"."c"."d"."e"."f"') def test_text(self): column = parse_one("a.b.c.d.e") self.assertEqual(column.text("expression"), "e") self.assertEqual(column.text("y"), "") self.assertEqual(parse_one("select * from x.y").find(exp.Table).text("db"), "x") self.assertEqual(parse_one("select *").name, "") self.assertEqual(parse_one("1 + 1").name, "1") self.assertEqual(parse_one("'a'").name, "a") def test_alias(self): self.assertEqual(alias("foo", "bar").sql(), "foo AS bar") self.assertEqual(alias("foo", "bar-1").sql(), 'foo AS "bar-1"') self.assertEqual(alias("foo", "bar_1").sql(), "foo AS bar_1") self.assertEqual(alias("foo * 2", "2bar").sql(), 'foo * 2 AS "2bar"') self.assertEqual(alias('"foo"', "_bar").sql(), '"foo" AS _bar') self.assertEqual(alias("foo", "bar", quoted=True).sql(), 'foo AS "bar"') def test_alias_with_placeholder(self): # Snowflake's `AS :name` syntax parses the alias as a Placeholder node. # Regression test: Expression.alias should return the placeholder name, not "". expr = parse_one("SELECT PARSE_JSON(col) AS :userInfo FROM t", dialect="snowflake") select = expr.selects[0] self.assertIsInstance(select.args.get("alias"), exp.Placeholder) self.assertEqual(select.alias, "userInfo") self.assertEqual(select.alias_or_name, "userInfo") self.assertEqual(select.output_name, "userInfo") def test_unit(self): unit = parse_one("timestamp_trunc(current_timestamp, week(thursday))") self.assertIsNotNone(unit.find(exp.CurrentTimestamp)) week = unit.find(exp.Week) self.assertEqual(week.this, exp.var("thursday")) for abbreviated_unit, unnabreviated_unit in exp.TimeUnit.UNABBREVIATED_UNIT_NAME.items(): interval = parse_one(f"interval '500 {abbreviated_unit}'") self.assertIsInstance(interval.unit, exp.Var) self.assertEqual(interval.unit.name, unnabreviated_unit) def test_identifier(self): self.assertTrue(exp.to_identifier('"x"').quoted) self.assertFalse(exp.to_identifier("x").quoted) self.assertTrue(exp.to_identifier("foo ").quoted) self.assertFalse(exp.to_identifier("_x").quoted) def test_function_normalizer(self): self.assertEqual(parse_one("HELLO()").sql(normalize_functions="lower"), "hello()") self.assertEqual(parse_one("hello()").sql(normalize_functions="upper"), "HELLO()") self.assertEqual(parse_one("heLLO()").sql(normalize_functions=False), "heLLO()") self.assertEqual(parse_one("SUM(x)").sql(normalize_functions="lower"), "sum(x)") self.assertEqual(parse_one("sum(x)").sql(normalize_functions="upper"), "SUM(x)") def test_properties_from_dict(self): self.assertEqual( exp.Properties.from_dict( { "FORMAT": "parquet", "PARTITIONED_BY": (exp.to_identifier("a"), exp.to_identifier("b")), "custom": 1, "ENGINE": None, "COLLATE": True, } ), exp.Properties( expressions=[ exp.FileFormatProperty(this=exp.Literal.string("parquet")), exp.PartitionedByProperty( this=exp.Tuple(expressions=[exp.to_identifier("a"), exp.to_identifier("b")]) ), exp.Property(this=exp.Literal.string("custom"), value=exp.Literal.number(1)), exp.EngineProperty(this=exp.null()), exp.CollateProperty(this=exp.true()), ] ), ) self.assertRaises(ValueError, exp.Properties.from_dict, {"FORMAT": object}) def test_convert(self): from collections import namedtuple import pytz PointTuple = namedtuple("Point", ["x", "y"]) class PointClass: def __init__(self, x=0, y=0): self.x = x self.y = y for value, expected in [ (1, "1"), ("1", "'1'"), (None, "NULL"), (True, "TRUE"), ((1, "2", None), "(1, '2', NULL)"), ([1, "2", None], "ARRAY(1, '2', NULL)"), ({"x": None}, "MAP(ARRAY('x'), ARRAY(NULL))"), ( datetime.datetime(2022, 10, 1, 1, 1, 1, 1), "TIME_STR_TO_TIME('2022-10-01 01:01:01.000001')", ), ( datetime.datetime(2022, 10, 1, 1, 1, 1, tzinfo=datetime.timezone.utc), "TIME_STR_TO_TIME('2022-10-01 01:01:01+00:00', 'UTC')", ), ( pytz.timezone("America/Los_Angeles").localize( datetime.datetime(2022, 10, 1, 1, 1, 1) ), "TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')", ), (datetime.date(2022, 10, 1), "DATE_STR_TO_DATE('2022-10-01')"), (math.nan, "NULL"), (b"\x00\x00\x00\x00\x00\x00\x07\xd3", "2003"), (PointTuple(1, 2), "STRUCT(1 AS x, 2 AS y)"), (PointClass(1, 2), "STRUCT(1 AS x, 2 AS y)"), ]: with self.subTest(value): self.assertEqual(exp.convert(value).sql(), expected) self.assertEqual( exp.convert({"test": "value"}).sql(dialect="spark"), "MAP_FROM_ARRAYS(ARRAY('test'), ARRAY('value'))", ) @unittest.skipUnless(sys.version_info >= (3, 9), "zoneinfo only available from python 3.9+") def test_convert_python39(self): import zoneinfo for value, expected in [ ( datetime.datetime( 2022, 10, 1, 1, 1, 1, tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles") ), "TIME_STR_TO_TIME('2022-10-01 01:01:01-07:00', 'America/Los_Angeles')", ) ]: with self.subTest(value): self.assertEqual(exp.convert(value).sql(), expected) def test_comment_alias(self): sql = """ SELECT a, b AS B, c, /*comment*/ d AS D, -- another comment CAST(x AS INT), -- yet another comment y AND /* foo */ w AS E -- final comment FROM foo """ expression = parse_one(sql) self.assertEqual( [e.alias_or_name for e in expression.expressions], ["a", "B", "c", "D", "x", "E"], ) self.assertEqual( expression.sql(), "SELECT a, b AS B, c /* comment */, d AS D /* another comment */, CAST(x AS INT) /* yet another comment */, y AND /* foo */ w AS E /* final comment */ FROM foo", ) self.assertEqual( expression.sql(comments=False), "SELECT a, b AS B, c, d AS D, CAST(x AS INT), y AND w AS E FROM foo", ) self.assertEqual( expression.sql(pretty=True, comments=False), """SELECT a, b AS B, c, d AS D, CAST(x AS INT), y AND w AS E FROM foo""", ) self.assertEqual( expression.sql(pretty=True), """SELECT a, b AS B, c, /* comment */ d AS D, /* another comment */ CAST(x AS INT), /* yet another comment */ y AND /* foo */ w AS E /* final comment */ FROM foo""", ) self.assertEqual(parse_one('max(x) as "a b" -- comment').comments, [" comment"]) def test_to_interval(self): self.assertEqual(exp.to_interval("1day").sql(), "INTERVAL '1' DAY") self.assertEqual(exp.to_interval(" 5 months").sql(), "INTERVAL '5' MONTHS") self.assertEqual(exp.to_interval("-2 day").sql(), "INTERVAL '-2' DAY") self.assertEqual(exp.to_interval(exp.Literal.string("1day")).sql(), "INTERVAL '1' DAY") self.assertEqual(exp.to_interval(exp.Literal.string("-2 day")).sql(), "INTERVAL '-2' DAY") self.assertEqual( exp.to_interval(exp.Literal.string(" 5 months")).sql(), "INTERVAL '5' MONTHS" ) def test_to_table(self): table_only = exp.to_table("table_name") self.assertEqual(table_only.name, "table_name") self.assertIsNone(table_only.args.get("db")) self.assertIsNone(table_only.args.get("catalog")) db_and_table = exp.to_table("db.table_name") self.assertEqual(db_and_table.name, "table_name") self.assertEqual(db_and_table.args.get("db"), exp.to_identifier("db")) self.assertIsNone(db_and_table.args.get("catalog")) catalog_db_and_table = exp.to_table("catalog.db.table_name") self.assertEqual(catalog_db_and_table.name, "table_name") self.assertEqual(catalog_db_and_table.args.get("db"), exp.to_identifier("db")) self.assertEqual(catalog_db_and_table.args.get("catalog"), exp.to_identifier("catalog")) table_only_unsafe_identifier = exp.to_table("3e") self.assertEqual(table_only_unsafe_identifier.sql(), '"3e"') def test_to_column(self): column_only = exp.to_column("column_name") self.assertEqual(column_only.name, "column_name") self.assertIsNone(column_only.args.get("table")) table_and_column = exp.to_column("table_name.column_name") self.assertEqual(table_and_column.name, "column_name") self.assertEqual(table_and_column.args.get("table"), exp.to_identifier("table_name")) self.assertEqual(exp.to_column("foo bar").sql(), '"foo bar"') self.assertEqual(exp.to_column("`column_name`", dialect="spark").sql(), '"column_name"') self.assertEqual(exp.to_column("column_name", quoted=True).sql(), '"column_name"') self.assertEqual( exp.to_column("column_name", table=exp.to_identifier("table_name")).sql(), "table_name.column_name", ) def test_union(self): expression = parse_one("SELECT cola, colb UNION SELECT colx, coly") self.assertIsInstance(expression, exp.Union) self.assertEqual(expression.named_selects, ["cola", "colb"]) self.assertEqual( expression.selects, [ exp.Column(this=exp.to_identifier("cola")), exp.Column(this=exp.to_identifier("colb")), ], ) def test_values(self): self.assertEqual( exp.values([(1, 2), (3, 4)], "t", ["a", "b"]).sql(), "(VALUES (1, 2), (3, 4)) AS t(a, b)", ) self.assertEqual( exp.values( [(1, 2), (3, 4)], "t", {"a": exp.DataType.build("TEXT"), "b": exp.DataType.build("TEXT")}, ).sql(), "(VALUES (1, 2), (3, 4)) AS t(a, b)", ) with self.assertRaises(ValueError): exp.values([(1, 2), (3, 4)], columns=["a"]) def test_data_type_builder(self): self.assertEqual(exp.DataType.build("TEXT").sql(), "TEXT") self.assertEqual(exp.DataType.build("DECIMAL(10, 2)").sql(), "DECIMAL(10, 2)") self.assertEqual(exp.DataType.build("VARCHAR(255)").sql(), "VARCHAR(255)") self.assertEqual(exp.DataType.build("ARRAY").sql(), "ARRAY") self.assertEqual(exp.DataType.build("CHAR").sql(), "CHAR") self.assertEqual(exp.DataType.build("NCHAR").sql(), "CHAR") self.assertEqual(exp.DataType.build("VARCHAR").sql(), "VARCHAR") self.assertEqual(exp.DataType.build("NVARCHAR").sql(), "VARCHAR") self.assertEqual(exp.DataType.build("TEXT").sql(), "TEXT") self.assertEqual(exp.DataType.build("BINARY").sql(), "BINARY") self.assertEqual(exp.DataType.build("VARBINARY").sql(), "VARBINARY") self.assertEqual(exp.DataType.build("INT").sql(), "INT") self.assertEqual(exp.DataType.build("TINYINT").sql(), "TINYINT") self.assertEqual(exp.DataType.build("SMALLINT").sql(), "SMALLINT") self.assertEqual(exp.DataType.build("BIGINT").sql(), "BIGINT") self.assertEqual(exp.DataType.build("FLOAT").sql(), "FLOAT") self.assertEqual(exp.DataType.build("DOUBLE").sql(), "DOUBLE") self.assertEqual(exp.DataType.build("DECIMAL").sql(), "DECIMAL") self.assertEqual(exp.DataType.build("BOOLEAN").sql(), "BOOLEAN") self.assertEqual(exp.DataType.build("JSON").sql(), "JSON") self.assertEqual(exp.DataType.build("JSONB", dialect="postgres").sql(), "JSONB") self.assertEqual(exp.DataType.build("INTERVAL").sql(), "INTERVAL") self.assertEqual(exp.DataType.build("TIME").sql(), "TIME") self.assertEqual(exp.DataType.build("TIMESTAMP").sql(), "TIMESTAMP") self.assertEqual(exp.DataType.build("TIMESTAMPTZ").sql(), "TIMESTAMPTZ") self.assertEqual(exp.DataType.build("TIMESTAMPLTZ").sql(), "TIMESTAMPLTZ") self.assertEqual(exp.DataType.build("DATE").sql(), "DATE") self.assertEqual(exp.DataType.build("DATETIME").sql(), "DATETIME") self.assertEqual(exp.DataType.build("ARRAY").sql(), "ARRAY") self.assertEqual(exp.DataType.build("MAP").sql(), "MAP") self.assertEqual(exp.DataType.build("UUID").sql(), "UUID") self.assertEqual(exp.DataType.build("GEOGRAPHY").sql(), "GEOGRAPHY") self.assertEqual(exp.DataType.build("GEOMETRY").sql(), "GEOMETRY") self.assertEqual(exp.DataType.build("STRUCT").sql(), "STRUCT") self.assertEqual(exp.DataType.build("HLLSKETCH", dialect="redshift").sql(), "HLLSKETCH") self.assertEqual(exp.DataType.build("HSTORE", dialect="postgres").sql(), "HSTORE") self.assertEqual(exp.DataType.build("NULL").sql(), "NULL") self.assertEqual(exp.DataType.build("NULL", dialect="bigquery").sql(), "NULL") self.assertEqual(exp.DataType.build("UNKNOWN").sql(), "UNKNOWN") self.assertEqual(exp.DataType.build("UNKNOWN", dialect="bigquery").sql(), "UNKNOWN") self.assertEqual(exp.DataType.build("UNKNOWN", dialect="snowflake").sql(), "UNKNOWN") self.assertEqual(exp.DataType.build("TIMESTAMP", dialect="bigquery").sql(), "TIMESTAMPTZ") self.assertEqual(exp.DataType.build("USER-DEFINED").sql(), "USER-DEFINED") self.assertEqual(exp.DataType.build("ARRAY").sql(), "ARRAY") self.assertEqual(exp.DataType.build("ARRAY").sql(), "ARRAY") self.assertEqual(exp.DataType.build("varchar(100) collate 'en-ci'").sql(), "VARCHAR(100)") self.assertEqual(exp.DataType.build("int[3]").sql(dialect="duckdb"), "INT[3]") self.assertEqual(exp.DataType.build("int[3][3]").sql(dialect="duckdb"), "INT[3][3]") self.assertEqual(exp.DataType.build("time_ns", "duckdb").sql(), "TIME_NS") self.assertEqual(exp.DataType.build("bignum", "duckdb").sql(), "BIGNUM") self.assertEqual( exp.DataType.build("struct", dialect="spark").sql(), "STRUCT" ) with self.assertRaises(ParseError): exp.DataType.build("varchar(") def test_rename_table(self): self.assertEqual( exp.rename_table("t1", "t2").sql(), "ALTER TABLE t1 RENAME TO t2", ) def test_to_py(self): self.assertEqual(parse_one("- -1").to_py(), 1) self.assertIs(parse_one("TRUE").to_py(), True) self.assertIs(parse_one("1").to_py(), 1) self.assertIs(parse_one("'1'").to_py(), "1") self.assertIs(parse_one("null").to_py(), None) with self.assertRaises(ValueError): parse_one("x").to_py() def test_is_int(self): self.assertTrue(parse_one("- -1").is_int) def test_is_star(self): assert parse_one("*").is_star assert parse_one("foo.*").is_star assert parse_one("SELECT * FROM foo").is_star assert parse_one("(SELECT * FROM foo)").is_star assert parse_one("SELECT *, 1 FROM foo").is_star assert parse_one("SELECT foo.* FROM foo").is_star assert parse_one("SELECT * EXCEPT (a, b) FROM foo").is_star assert parse_one("SELECT foo.* EXCEPT (foo.a, foo.b) FROM foo").is_star assert parse_one("SELECT * REPLACE (a AS b, b AS C)").is_star assert parse_one("SELECT * EXCEPT (a, b) REPLACE (a AS b, b AS C)").is_star assert parse_one("SELECT * INTO newevent FROM event").is_star assert parse_one("SELECT * FROM foo UNION SELECT * FROM bar").is_star assert parse_one("SELECT * FROM bla UNION SELECT 1 AS x").is_star assert parse_one("SELECT 1 AS x UNION SELECT * FROM bla").is_star assert parse_one("SELECT 1 AS x UNION SELECT 1 AS x UNION SELECT * FROM foo").is_star def test_set_metadata(self): ast = parse_one("SELECT foo.col FROM foo") self.assertIsNone(ast._meta) # calling ast.meta would lazily instantiate self._meta self.assertEqual(ast.meta, {}) self.assertEqual(ast._meta, {}) ast.meta["some_meta_key"] = "some_meta_value" self.assertEqual(ast.meta.get("some_meta_key"), "some_meta_value") self.assertEqual(ast.meta.get("some_other_meta_key"), None) ast.meta["some_other_meta_key"] = "some_other_meta_value" self.assertEqual(ast.meta.get("some_other_meta_key"), "some_other_meta_value") def test_unnest(self): ast = parse_one("SELECT (((1)))") self.assertIs(ast.selects[0].unnest(), ast.find(exp.Literal)) ast = parse_one("SELECT * FROM (((SELECT * FROM t)))") self.assertIs(ast.args["from_"].this.unnest(), list(ast.find_all(exp.Select))[1]) ast = parse_one("SELECT * FROM ((((SELECT * FROM t))) AS foo)") second_subquery = ast.args["from_"].this.this innermost_subquery = list(ast.find_all(exp.Select))[1].parent self.assertIs(second_subquery, innermost_subquery.unwrap()) def test_is_type(self): ast = parse_one("CAST(x AS VARCHAR)") assert ast.is_type("VARCHAR") assert not ast.is_type("VARCHAR(5)") assert not ast.is_type("FLOAT") ast = parse_one("CAST(x AS VARCHAR(5))") assert ast.is_type("VARCHAR") assert ast.is_type("VARCHAR(5)") assert not ast.is_type("VARCHAR(4)") assert not ast.is_type("FLOAT") ast = parse_one("CAST(x AS ARRAY)") assert ast.is_type("ARRAY") assert ast.is_type("ARRAY") assert not ast.is_type("ARRAY") assert not ast.is_type("INT") ast = parse_one("CAST(x AS ARRAY)") assert ast.is_type("ARRAY") assert not ast.is_type("ARRAY") assert not ast.is_type("ARRAY") assert not ast.is_type("INT") ast = parse_one("CAST(x AS STRUCT)") assert ast.is_type("STRUCT") assert ast.is_type("STRUCT") assert not ast.is_type("STRUCT") dtype = exp.DataType.build("foo", udt=True) assert dtype.is_type("foo") assert not dtype.is_type("bar") dtype = exp.DataType.build("a.b.c", udt=True) assert dtype.is_type("a.b.c") dtype = exp.DataType.build("Nullable(Int32)", dialect="clickhouse") assert dtype.is_type("int") assert not dtype.is_type("int", check_nullable=True) with self.assertRaises(ParseError): exp.DataType.build("foo") def test_set_meta(self): query = parse_one("SELECT * FROM foo /* sqlglot.meta x = 1, y = a, z */") self.assertEqual(query.find(exp.Table).meta, {"x": True, "y": "a", "z": True}) self.assertEqual(query.sql(), "SELECT * FROM foo /* sqlglot.meta x = 1, y = a, z */") def test_assert_is(self): parse_one("x").assert_is(exp.Column) with self.assertRaisesRegex( AssertionError, "x is not \\." ): parse_one("x").assert_is(exp.Identifier) def test_parse_identifier(self): self.assertEqual(exp.parse_identifier("a ' b"), exp.to_identifier("a ' b")) def test_convert_datetime_time(self): # Test converting datetime.time objects to TsOrDsToTime expressions time_obj = datetime.time(14, 30, 45) result = exp.convert(time_obj) self.assertIsInstance(result, exp.TsOrDsToTime) self.assertIsInstance(result.this, exp.Literal) self.assertEqual(result.sql(), "CAST('14:30:45' AS TIME)") self.assertTrue(result.this.is_string) # Test with microseconds time_with_microseconds = datetime.time(9, 15, 30, 123456) result = exp.convert(time_with_microseconds) self.assertIsInstance(result, exp.TsOrDsToTime) self.assertEqual(result.sql(), "CAST('09:15:30.123456' AS TIME)") # Test midnight midnight = datetime.time(0, 0, 0) result = exp.convert(midnight) self.assertIsInstance(result, exp.TsOrDsToTime) self.assertEqual(result.sql(), "CAST('00:00:00' AS TIME)") # Test noon noon = datetime.time(12, 0, 0) result = exp.convert(noon) self.assertIsInstance(result, exp.TsOrDsToTime) self.assertEqual(result.sql(), "CAST('12:00:00' AS TIME)") def test_hash_large_ast(self): expr = parse_one("SELECT 1 UNION ALL " * 3000 + "SELECT 1") assert expr == expr def test_literal_number(self): for number in ( 1, -1.1, 1.1, 0, "-1", "1", "1.1", "-1.1", "1e6", "inf", "binary_double_nan", ): with self.subTest(f"Test Literal number method for: {repr(number)}"): literal = exp.Literal.number(number) self.assertTrue(literal.is_number) if isinstance(number, str): is_negative = number.startswith("-") expected_this = number.lstrip("-") else: is_negative = number < 0 expected_this = str(abs(number)) if is_negative: self.assertIsInstance(literal, exp.Neg) self.assertIsInstance(literal.this, exp.Literal) this = literal.this.this else: self.assertIsInstance(literal, exp.Literal) this = literal.this self.assertEqual(this, expected_this) def test_update_positions_empty_meta(self): expr1 = exp.Column(this="a") expr2 = exp.Column(this="b") expr2.meta.clear() expr1.update_positions(expr2) assert expr1.meta == {} ================================================ FILE: tests/test_generator.py ================================================ import unittest from sqlglot import exp, parse_one from sqlglot.expressions import Expression, Func from sqlglot.parsers.snowflake import SnowflakeParser import sqlglot.expressions.core as _core_module _EXPRESSION_IS_COMPILED = getattr(_core_module, "__file__", "").endswith(".so") class TestGenerator(unittest.TestCase): @unittest.skipIf(_EXPRESSION_IS_COMPILED, "mypyc compiled expressions cannot be subclassed") def test_fallback_function_sql(self): class SpecialUdf(Expression, Func): arg_types = {"a": True, "b": False} SnowflakeParser.FUNCTIONS["SPECIAL_UDF"] = SpecialUdf.from_arg_list try: sql = "SELECT SPECIAL_UDF(a) FROM x" expression = parse_one(sql, dialect="snowflake") self.assertEqual(expression.sql(), "SELECT SPECIAL_UDF(a) FROM x") finally: del SnowflakeParser.FUNCTIONS["SPECIAL_UDF"] @unittest.skipIf(_EXPRESSION_IS_COMPILED, "mypyc compiled expressions cannot be subclassed") def test_fallback_function_var_args_sql(self): class SpecialUdf(Expression, Func): arg_types = {"a": True, "expressions": False} is_var_len_args = True SnowflakeParser.FUNCTIONS["SPECIAL_UDF"] = SpecialUdf.from_arg_list try: sql = "SELECT SPECIAL_UDF(a, b, c, d + 1) FROM x" expression = parse_one(sql, dialect="snowflake") self.assertEqual(expression.sql(), sql) finally: del SnowflakeParser.FUNCTIONS["SPECIAL_UDF"] self.assertEqual( exp.DateTrunc(this=exp.to_column("event_date"), unit=exp.var("MONTH")).sql(), "DATE_TRUNC('MONTH', event_date)", ) def test_identify(self): self.assertEqual(parse_one("x").sql(identify=True), '"x"') self.assertEqual(parse_one("x").sql(identify=False), "x") self.assertEqual(parse_one("X").sql(identify=True), '"X"') self.assertEqual(parse_one('"x"').sql(identify=False), '"x"') self.assertEqual(parse_one("x").sql(identify="safe"), '"x"') self.assertEqual(parse_one("X").sql(identify="safe"), "X") self.assertEqual(parse_one("x as 1").sql(identify="safe"), '"x" AS "1"') self.assertEqual(parse_one("X as 1").sql(identify="safe"), 'X AS "1"') def test_generate_nested_binary(self): sql = "SELECT 'foo'" + (" || 'foo'" * 1000) self.assertEqual(parse_one(sql).sql(copy=False), sql) def test_overlap_operator(self): for op in ("&<", "&>"): with self.subTest(op=op): input_sql = f"SELECT '[1,10]'::int4range {op} '[5,15]'::int4range" expected_sql = ( f"SELECT CAST('[1,10]' AS INT4RANGE) {op} CAST('[5,15]' AS INT4RANGE)" ) ast = parse_one(input_sql, read="postgres") self.assertEqual(ast.sql(), expected_sql) self.assertEqual(ast.sql("postgres"), expected_sql) def test_pretty_nested_types(self): def assert_pretty_nested( datatype: exp.DataType, single_line: str, pretty: str, max_text_width: int = 10, **kwargs, ) -> None: self.assertEqual(datatype.sql(), single_line) self.assertEqual( datatype.sql(pretty=True, max_text_width=max_text_width, **kwargs), pretty ) # STRUCT type_str = "STRUCT" assert_pretty_nested( exp.DataType.build(type_str), type_str, "STRUCT<\n a INT,\n b TEXT\n>", ) # STRUCT - type def shorter than max text width so stays one line assert_pretty_nested( exp.DataType.build(type_str), type_str, "STRUCT", max_text_width=50, ) # STRUCT, leading_comma = True assert_pretty_nested( exp.DataType.build(type_str), type_str, "STRUCT<\n a INT\n , b TEXT\n>", leading_comma=True, ) # ARRAY type_str = "ARRAY" assert_pretty_nested( exp.DataType.build(type_str), type_str, "ARRAY<\n DECIMAL(38, 9)\n>", ) # ARRAY nested STRUCT type_str = "ARRAY>" assert_pretty_nested( exp.DataType.build(type_str), type_str, "ARRAY<\n STRUCT<\n a INT,\n b TEXT\n >\n>", ) # RANGE type_str = "RANGE" assert_pretty_nested( exp.DataType.build(type_str), type_str, "RANGE<\n DECIMAL(38, 9)\n>", ) # LIST type_str = "LIST" assert_pretty_nested( exp.DataType.build(type_str), type_str, "LIST<\n INT,\n INT,\n TEXT\n>", ) # MAP type_str = "MAP" assert_pretty_nested( exp.DataType.build(type_str), type_str, "MAP<\n INT,\n DECIMAL(38, 9)\n>", ) ================================================ FILE: tests/test_helper.py ================================================ import unittest from sqlglot.helper import merge_ranges, name_sequence, tsort class TestHelper(unittest.TestCase): def test_tsort(self): self.assertEqual(tsort({"a": set()}), ["a"]) self.assertEqual(tsort({"a": {"b"}}), ["b", "a"]) self.assertEqual(tsort({"a": {"c"}, "b": set(), "c": set()}), ["b", "c", "a"]) self.assertEqual( tsort( { "a": {"b", "c"}, "b": {"c"}, "c": set(), "d": {"a"}, } ), ["c", "b", "a", "d"], ) with self.assertRaises(ValueError): tsort( { "a": {"b", "c"}, "b": {"a"}, "c": set(), } ) def test_name_sequence(self): s1 = name_sequence("a") s2 = name_sequence("b") self.assertEqual(s1(), "a0") self.assertEqual(s1(), "a1") self.assertEqual(s2(), "b0") self.assertEqual(s1(), "a2") self.assertEqual(s2(), "b1") self.assertEqual(s2(), "b2") def test_merge_ranges(self): self.assertEqual([], merge_ranges([])) self.assertEqual([(0, 1)], merge_ranges([(0, 1)])) self.assertEqual([(0, 1), (2, 3)], merge_ranges([(0, 1), (2, 3)])) self.assertEqual([(0, 3)], merge_ranges([(0, 1), (1, 3)])) self.assertEqual([(0, 1), (2, 4)], merge_ranges([(2, 3), (0, 1), (3, 4)])) ================================================ FILE: tests/test_integration_loader.py ================================================ import os from tests.helpers import SKIP_INTEGRATION INTEGRATION_TEST_DIR = os.path.join( os.path.dirname(__file__), "..", "sqlglot-integration-tests", "tests", "sqlglot", ) def load_tests(loader, suite, pattern): if not SKIP_INTEGRATION and os.path.isdir(INTEGRATION_TEST_DIR): suite.addTests(loader.discover(INTEGRATION_TEST_DIR, pattern="test*.py")) return suite ================================================ FILE: tests/test_jsonpath.py ================================================ import json import os import unittest from sqlglot import exp from sqlglot.errors import ParseError, TokenError from sqlglot.jsonpath import parse from tests.helpers import FIXTURES_DIR class TestJsonpath(unittest.TestCase): maxDiff = None def test_jsonpath(self): expected_expressions = [ exp.JSONPathRoot(), exp.JSONPathKey(this=exp.JSONPathWildcard()), exp.JSONPathKey(this="a"), exp.JSONPathSubscript(this=0), exp.JSONPathKey(this="x"), exp.JSONPathUnion(expressions=[exp.JSONPathWildcard(), "y", 1]), exp.JSONPathKey(this="z"), exp.JSONPathSelector(this=exp.JSONPathFilter(this="(@.a == 'b'), 1:")), exp.JSONPathSubscript(this=exp.JSONPathSlice(start=1, end=5, step=None)), exp.JSONPathUnion(expressions=[1, exp.JSONPathFilter(this="@.a")]), exp.JSONPathSelector(this=exp.JSONPathScript(this="@.x)")), ] self.assertEqual( parse("$.*.a[0]['x'][*, 'y', 1].z[?(@.a == 'b'), 1:][1:5][1,?@.a][(@.x)]"), exp.JSONPath(expressions=expected_expressions), ) def test_identity(self): for selector, expected in ( ("$.select", "$.select"), ("$[(@.length-1)]", "$[(@.length-1)]"), ("$[((@.length-1))]", "$[((@.length-1))]"), ): with self.subTest(f"{selector} -> {expected}"): self.assertEqual(parse(selector).sql(), f"'{expected}'") def test_cts_file(self): with open(os.path.join(FIXTURES_DIR, "jsonpath", "cts.json"), encoding="utf-8") as file: tests = json.load(file)["tests"] # sqlglot json path generator rewrites to a normal form overrides = { "$.☺": '$["☺"]', """$['a',1]""": """$["a",1]""", """$[*,'a']""": """$[*,"a"]""", """$..['a','d']""": """$..["a","d"]""", """$[1, ?@.a=='b', 1:]""": """$[1,?@.a=='b', 1:]""", """$["a"]""": """$.a""", """$["c"]""": """$.c""", """$['a']""": """$.a""", """$['c']""": """$.c""", """$[' ']""": """$[" "]""", """$['\\'']""": """$["\'"]""", """$['\\\\']""": """$["\\\\"]""", """$['\\/']""": """$["\\/"]""", """$['\\b']""": """$["\\b"]""", """$['\\f']""": """$["\\f"]""", """$['\\n']""": """$["\\n"]""", """$['\\r']""": """$["\\r"]""", """$['\\t']""": """$["\\t"]""", """$['\\u263A']""": """$["\\u263A"]""", """$['\\u263a']""": """$["\\u263a"]""", """$['\\uD834\\uDD1E']""": """$["\\uD834\\uDD1E"]""", """$['\\uD83D\\uDE00']""": """$["\\uD83D\\uDE00"]""", """$['']""": """$[""]""", """$[? @.a]""": """$[?@.a]""", """$[?\n@.a]""": """$[?@.a]""", """$[?\t@.a]""": """$[?@.a]""", """$[?\r@.a]""": """$[?@.a]""", """$[? (@.a)]""": """$[?(@.a)]""", """$[?\n(@.a)]""": """$[?(@.a)]""", """$[?\t(@.a)]""": """$[?(@.a)]""", """$[?\r(@.a)]""": """$[?(@.a)]""", """$[ ?@.a]""": """$[?@.a]""", """$[\n?@.a]""": """$[?@.a]""", """$[\t?@.a]""": """$[?@.a]""", """$[\r?@.a]""": """$[?@.a]""", """$ ['a']""": """$.a""", """$\n['a']""": """$.a""", """$\t['a']""": """$.a""", """$\r['a']""": """$.a""", """$['a'] ['b']""": """$.a.b""", """$['a'] \n['b']""": """$.a.b""", """$['a'] \t['b']""": """$.a.b""", """$['a'] \r['b']""": """$.a.b""", """$ .a""": """$.a""", """$\n.a""": """$.a""", """$\t.a""": """$.a""", """$\r.a""": """$.a""", """$[ 'a']""": """$.a""", """$[\n'a']""": """$.a""", """$[\t'a']""": """$.a""", """$[\r'a']""": """$.a""", """$['a' ]""": """$.a""", """$['a'\n]""": """$.a""", """$['a'\t]""": """$.a""", """$['a'\r]""": """$.a""", """$['a' ,'b']""": """$["a","b"]""", """$['a'\n,'b']""": """$["a","b"]""", """$['a'\t,'b']""": """$["a","b"]""", """$['a'\r,'b']""": """$["a","b"]""", """$['a', 'b']""": """$["a","b"]""", """$['a',\n'b']""": """$["a","b"]""", """$['a',\t'b']""": """$["a","b"]""", """$['a',\r'b']""": """$["a","b"]""", """$[1 :5:2]""": """$[1:5:2]""", """$[1\n:5:2]""": """$[1:5:2]""", """$[1\t:5:2]""": """$[1:5:2]""", """$[1\r:5:2]""": """$[1:5:2]""", """$[1: 5:2]""": """$[1:5:2]""", """$[1:\n5:2]""": """$[1:5:2]""", """$[1:\t5:2]""": """$[1:5:2]""", """$[1:\r5:2]""": """$[1:5:2]""", """$[1:5 :2]""": """$[1:5:2]""", """$[1:5\n:2]""": """$[1:5:2]""", """$[1:5\t:2]""": """$[1:5:2]""", """$[1:5\r:2]""": """$[1:5:2]""", """$[1:5: 2]""": """$[1:5:2]""", """$[1:5:\n2]""": """$[1:5:2]""", """$[1:5:\t2]""": """$[1:5:2]""", """$[1:5:\r2]""": """$[1:5:2]""", } for test in tests: selector = test["selector"] with self.subTest(f"{selector.strip()} /* {test['name']} */"): if test.get("invalid_selector"): try: parse(selector) except (ParseError, TokenError): pass else: path = parse(selector) self.assertEqual(path.sql(), f"'{overrides.get(selector, selector)}'") ================================================ FILE: tests/test_lineage.py ================================================ from __future__ import annotations import unittest import sqlglot from sqlglot.lineage import lineage from sqlglot.schema import MappingSchema class TestLineage(unittest.TestCase): maxDiff = None @classmethod def setUpClass(cls): sqlglot.schema = MappingSchema() def test_lineage(self) -> None: node = lineage( "a", "SELECT a FROM z", schema={"x": {"a": "int"}}, sources={"y": "SELECT * FROM x", "z": "SELECT a FROM y"}, ) self.assertEqual( node.source.sql(), "SELECT z.a AS a FROM (SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y /* source: y */) AS z /* source: z */", ) self.assertEqual(node.source_name, "") downstream = node.downstream[0] self.assertEqual( downstream.source.sql(), "SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y /* source: y */", ) self.assertEqual(downstream.source_name, "z") downstream = downstream.downstream[0] self.assertEqual( downstream.source.sql(), "SELECT x.a AS a FROM x AS x", ) self.assertEqual(downstream.source_name, "y") graph_html = node.to_html() self.assertGreater(len(graph_html._repr_html_()), 1000) for edge in graph_html.edges: self.assertIn("from", edge) self.assertIn("to", edge) # test that sql is not modified sql = "SELECT a FROM x" ast = sqlglot.parse_one(sql) node = lineage("a", ast) self.assertEqual(ast.sql(), sql) # test that sources are not modified ast = sqlglot.parse_one(sql) source_sql = "SELECT a FROM y" source = sqlglot.parse_one(source_sql) node = lineage("a", ast, sources={"x": source}) self.assertEqual(source.sql(), source_sql) def test_lineage_sql_with_cte(self) -> None: node = lineage( "a", "WITH z AS (SELECT a FROM y) SELECT a FROM z", schema={"x": {"a": "int"}}, sources={"y": "SELECT * FROM x"}, ) self.assertEqual( node.source.sql(), "WITH z AS (SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y /* source: y */) SELECT z.a AS a FROM z AS z", ) self.assertEqual(node.source_name, "") self.assertEqual(node.reference_node_name, "") # Node containing expanded CTE expression downstream = node.downstream[0] self.assertEqual( downstream.source.sql(), "SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y /* source: y */", ) self.assertEqual(downstream.source_name, "") self.assertEqual(downstream.reference_node_name, "z") downstream = downstream.downstream[0] self.assertEqual( downstream.source.sql(), "SELECT x.a AS a FROM x AS x", ) self.assertEqual(downstream.source_name, "y") self.assertEqual(downstream.reference_node_name, "") def test_lineage_source_with_cte(self) -> None: node = lineage( "a", "SELECT a FROM z", schema={"x": {"a": "int"}}, sources={"z": "WITH y AS (SELECT * FROM x) SELECT a FROM y"}, ) self.assertEqual( node.source.sql(), "SELECT z.a AS a FROM (WITH y AS (SELECT x.a AS a FROM x AS x) SELECT y.a AS a FROM y AS y) AS z /* source: z */", ) self.assertEqual(node.source_name, "") self.assertEqual(node.reference_node_name, "") downstream = node.downstream[0] self.assertEqual( downstream.source.sql(), "WITH y AS (SELECT x.a AS a FROM x AS x) SELECT y.a AS a FROM y AS y", ) self.assertEqual(downstream.source_name, "z") self.assertEqual(downstream.reference_node_name, "") downstream = downstream.downstream[0] self.assertEqual( downstream.source.sql(), "SELECT x.a AS a FROM x AS x", ) self.assertEqual(downstream.source_name, "z") self.assertEqual(downstream.reference_node_name, "y") def test_lineage_source_with_star(self) -> None: node = lineage( "a", "WITH y AS (SELECT * FROM x) SELECT a FROM y", ) self.assertEqual( node.source.sql(), "WITH y AS (SELECT * FROM x AS x) SELECT y.a AS a FROM y AS y", ) self.assertEqual(node.source_name, "") self.assertEqual(node.reference_node_name, "") downstream = node.downstream[0] self.assertEqual( downstream.source.sql(), "SELECT * FROM x AS x", ) self.assertEqual(downstream.source_name, "") self.assertEqual(downstream.reference_node_name, "y") def test_lineage_join_with_star(self) -> None: node = lineage( "*", "SELECT * from x JOIN y USING (uid)", ) self.assertEqual( node.source.sql(), "SELECT * FROM x AS x JOIN y AS y ON x.uid = y.uid", ) self.assertEqual(node.source_name, "") self.assertEqual(node.reference_node_name, "") self.assertEqual(len(node.downstream), 2) downstream = node.downstream[0] self.assertEqual(downstream.expression.sql(), "x AS x") self.assertEqual(downstream.name, "*") downstream = node.downstream[1] self.assertEqual(downstream.expression.sql(), "y AS y") self.assertEqual(downstream.name, "*") def test_lineage_join_with_qualified_star(self) -> None: node = lineage( "*", "SELECT x.* from x JOIN y USING (uid)", ) self.assertEqual( node.source.sql(), "SELECT x.* FROM x AS x JOIN y AS y ON x.uid = y.uid", ) self.assertEqual(node.source_name, "") self.assertEqual(node.reference_node_name, "") self.assertEqual(len(node.downstream), 1) downstream = node.downstream[0] self.assertEqual(downstream.expression.sql(), "x AS x") self.assertEqual(downstream.name, "x.*") def test_lineage_external_col(self) -> None: node = lineage( "a", "WITH y AS (SELECT * FROM x) SELECT a FROM y JOIN z USING (uid)", ) self.assertEqual( node.source.sql(), "WITH y AS (SELECT * FROM x AS x) SELECT a AS a FROM y AS y JOIN z AS z ON y.uid = z.uid", ) self.assertEqual(node.source_name, "") self.assertEqual(node.reference_node_name, "") downstream = node.downstream[0] self.assertEqual( downstream.source.sql(), "?", ) self.assertEqual(downstream.source_name, "") self.assertEqual(downstream.reference_node_name, "") def test_lineage_values(self) -> None: node = lineage( "a", "SELECT a FROM y", sources={"y": "SELECT a FROM (VALUES (1), (2)) AS t (a)"}, ) self.assertEqual( node.source.sql(), "SELECT y.a AS a FROM (SELECT t.a AS a FROM (VALUES (1), (2)) AS t(a)) AS y /* source: y */", ) self.assertEqual(node.source_name, "") downstream = node.downstream[0] self.assertEqual(downstream.source.sql(), "SELECT t.a AS a FROM (VALUES (1), (2)) AS t(a)") self.assertEqual(downstream.expression.sql(), "t.a AS a") self.assertEqual(downstream.source_name, "y") downstream = downstream.downstream[0] self.assertEqual(downstream.source.sql(), "(VALUES (1), (2)) AS t(a)") self.assertEqual(downstream.expression.sql(), "a") self.assertEqual(downstream.source_name, "y") def test_lineage_cte_name_appears_in_schema(self) -> None: schema = {"a": {"b": {"t1": {"c1": "int"}, "t2": {"c2": "int"}}}} node = lineage( "c2", "WITH t1 AS (SELECT * FROM a.b.t2), inter AS (SELECT * FROM t1) SELECT * FROM inter", schema=schema, ) self.assertEqual( node.source.sql(), "WITH t1 AS (SELECT t2.c2 AS c2 FROM a.b.t2 AS t2), inter AS (SELECT t1.c2 AS c2 FROM t1 AS t1) SELECT inter.c2 AS c2 FROM inter AS inter", ) self.assertEqual(node.source_name, "") downstream = node.downstream[0] self.assertEqual(downstream.source.sql(), "SELECT t1.c2 AS c2 FROM t1 AS t1") self.assertEqual(downstream.expression.sql(), "t1.c2 AS c2") self.assertEqual(downstream.source_name, "") downstream = downstream.downstream[0] self.assertEqual(downstream.source.sql(), "SELECT t2.c2 AS c2 FROM a.b.t2 AS t2") self.assertEqual(downstream.expression.sql(), "t2.c2 AS c2") self.assertEqual(downstream.source_name, "") downstream = downstream.downstream[0] self.assertEqual(downstream.source.sql(), "a.b.t2 AS t2") self.assertEqual(downstream.expression.sql(), "a.b.t2 AS t2") self.assertEqual(downstream.source_name, "") self.assertEqual(downstream.downstream, []) def test_lineage_union(self) -> None: node = lineage( "x", "SELECT ax AS x FROM a UNION SELECT bx FROM b UNION SELECT cx FROM c", ) assert len(node.downstream) == 3 node = lineage( "x", "SELECT x FROM (SELECT ax AS x FROM a UNION SELECT bx FROM b UNION SELECT cx FROM c)", ) assert len(node.downstream) == 3 def test_lineage_lateral_flatten(self) -> None: node = lineage( "VALUE", "SELECT FLATTENED.VALUE FROM TEST_TABLE, LATERAL FLATTEN(INPUT => RESULT, OUTER => TRUE) FLATTENED", dialect="snowflake", ) self.assertEqual(node.name, "VALUE") downstream = node.downstream[0] self.assertEqual(downstream.name, "FLATTENED.VALUE") self.assertEqual( downstream.source.sql(dialect="snowflake"), "LATERAL FLATTEN(INPUT => TEST_TABLE.RESULT, OUTER => TRUE) AS FLATTENED(SEQ, KEY, PATH, INDEX, VALUE, THIS)", ) self.assertEqual(downstream.expression.sql(dialect="snowflake"), "VALUE") self.assertEqual(len(downstream.downstream), 1) downstream = downstream.downstream[0] self.assertEqual(downstream.name, "TEST_TABLE.RESULT") self.assertEqual(downstream.source.sql(dialect="snowflake"), "TEST_TABLE AS TEST_TABLE") node = lineage( "FIELD", "SELECT FLATTENED.VALUE:field::text AS FIELD FROM SNOWFLAKE.SCHEMA.MODEL AS MODEL_ALIAS, LATERAL FLATTEN(INPUT => MODEL_ALIAS.A) AS FLATTENED", schema={"SNOWFLAKE": {"SCHEMA": {"TABLE": {"A": "integer"}}}}, sources={"SNOWFLAKE.SCHEMA.MODEL": "SELECT A FROM SNOWFLAKE.SCHEMA.TABLE"}, dialect="snowflake", ) self.assertEqual(node.name, "FIELD") downstream = node.downstream[0] self.assertEqual(downstream.name, "FLATTENED.VALUE") self.assertEqual( downstream.source.sql(dialect="snowflake"), "LATERAL FLATTEN(INPUT => MODEL_ALIAS.A) AS FLATTENED(SEQ, KEY, PATH, INDEX, VALUE, THIS)", ) self.assertEqual(downstream.expression.sql(dialect="snowflake"), "VALUE") self.assertEqual(len(downstream.downstream), 1) downstream = downstream.downstream[0] self.assertEqual(downstream.name, "MODEL_ALIAS.A") self.assertEqual(downstream.source_name, "SNOWFLAKE.SCHEMA.MODEL") self.assertEqual( downstream.source.sql(dialect="snowflake"), "SELECT TABLE.A AS A FROM SNOWFLAKE.SCHEMA.TABLE AS TABLE", ) self.assertEqual(downstream.expression.sql(dialect="snowflake"), "TABLE.A AS A") self.assertEqual(len(downstream.downstream), 1) downstream = downstream.downstream[0] self.assertEqual(downstream.name, "TABLE.A") self.assertEqual( downstream.source.sql(dialect="snowflake"), "SNOWFLAKE.SCHEMA.TABLE AS TABLE", ) self.assertEqual( downstream.expression.sql(dialect="snowflake"), "SNOWFLAKE.SCHEMA.TABLE AS TABLE", ) def test_subquery(self) -> None: node = lineage( "output", "SELECT (SELECT max(t3.my_column) my_column FROM foo t3) AS output FROM table3", ) self.assertEqual(node.name, "output") node = node.downstream[0] self.assertEqual(node.name, "my_column") node = node.downstream[0] self.assertEqual(node.name, "t3.my_column") self.assertEqual(node.source.sql(), "foo AS t3") node = lineage( "y", "SELECT SUM((SELECT max(a) a from x) + (SELECT min(b) b from x) + c) AS y FROM x", ) self.assertEqual(node.name, "y") self.assertEqual(len(node.downstream), 3) self.assertEqual(node.downstream[0].name, "a") self.assertEqual(node.downstream[1].name, "b") self.assertEqual(node.downstream[2].name, "x.c") node = lineage( "x", "WITH cte AS (SELECT a, b FROM z) SELECT sum(SELECT a FROM cte) AS x, (SELECT b FROM cte) as y FROM cte", ) self.assertEqual(node.name, "x") self.assertEqual(len(node.downstream), 1) node = node.downstream[0] self.assertEqual(node.name, "a") node = node.downstream[0] self.assertEqual(node.name, "cte.a") self.assertEqual(node.reference_node_name, "cte") node = node.downstream[0] self.assertEqual(node.name, "z.a") node = lineage( "a", """ WITH foo AS ( SELECT 1 AS a ), bar AS ( ( SELECT a + 1 AS a FROM foo ) ) ( SELECT a + b AS a FROM bar CROSS JOIN ( SELECT 2 AS b ) AS baz ) """, ) self.assertEqual(node.name, "a") self.assertEqual(len(node.downstream), 2) a, b = sorted(node.downstream, key=lambda n: n.name) self.assertEqual(a.name, "bar.a") self.assertEqual(len(a.downstream), 1) self.assertEqual(b.name, "baz.b") self.assertEqual(b.downstream, []) node = a.downstream[0] self.assertEqual(node.name, "foo.a") # Select from derived table node = lineage( "a", "SELECT a FROM (SELECT a FROM x) subquery", ) self.assertEqual(node.name, "a") self.assertEqual(len(node.downstream), 1) node = node.downstream[0] self.assertEqual(node.name, "subquery.a") self.assertEqual(node.reference_node_name, "subquery") node = lineage( "a", "SELECT a FROM (SELECT a FROM x)", ) self.assertEqual(node.name, "a") self.assertEqual(len(node.downstream), 1) node = node.downstream[0] self.assertEqual(node.name, "_0.a") self.assertEqual(node.reference_node_name, "_0") def test_lineage_cte_union(self) -> None: query = """ WITH dataset AS ( SELECT * FROM catalog.db.table_a UNION SELECT * FROM catalog.db.table_b ) SELECT x, created_at FROM dataset; """ node = lineage("x", query) self.assertEqual(node.name, "x") downstream_a = node.downstream[0] self.assertEqual(downstream_a.name, "0") self.assertEqual(downstream_a.source.sql(), "SELECT * FROM catalog.db.table_a AS table_a") self.assertEqual(downstream_a.reference_node_name, "dataset") downstream_b = node.downstream[1] self.assertEqual(downstream_b.name, "0") self.assertEqual(downstream_b.source.sql(), "SELECT * FROM catalog.db.table_b AS table_b") self.assertEqual(downstream_b.reference_node_name, "dataset") def test_lineage_source_union(self) -> None: query = "SELECT x, created_at FROM dataset;" node = lineage( "x", query, sources={ "dataset": """ SELECT * FROM catalog.db.table_a UNION SELECT * FROM catalog.db.table_b """ }, ) self.assertEqual(node.name, "x") downstream_a = node.downstream[0] self.assertEqual(downstream_a.name, "0") self.assertEqual(downstream_a.source_name, "dataset") self.assertEqual(downstream_a.source.sql(), "SELECT * FROM catalog.db.table_a AS table_a") self.assertEqual(downstream_a.reference_node_name, "") downstream_b = node.downstream[1] self.assertEqual(downstream_b.name, "0") self.assertEqual(downstream_b.source_name, "dataset") self.assertEqual(downstream_b.source.sql(), "SELECT * FROM catalog.db.table_b AS table_b") self.assertEqual(downstream_b.reference_node_name, "") def test_select_star(self) -> None: node = lineage("x", "SELECT x from (SELECT * from table_a)") self.assertEqual(node.name, "x") downstream = node.downstream[0] self.assertEqual(downstream.name, "_0.x") self.assertEqual(downstream.source.sql(), "SELECT * FROM table_a AS table_a") downstream = downstream.downstream[0] self.assertEqual(downstream.name, "*") self.assertEqual(downstream.source.sql(), "table_a AS table_a") def test_unnest(self) -> None: node = lineage( "b", "with _data as (select [struct(1 as a, 2 as b)] as col) select b from _data cross join unnest(col)", ) self.assertEqual(node.name, "b") def test_lineage_normalize(self) -> None: node = lineage("a", "WITH x AS (SELECT 1 a) SELECT a FROM x", dialect="snowflake") self.assertEqual(node.name, "A") with self.assertRaises(sqlglot.errors.SqlglotError): lineage('"a"', "WITH x AS (SELECT 1 a) SELECT a FROM x", dialect="snowflake") with self.assertRaises(sqlglot.errors.SqlglotError): lineage( "b", "SELECT a,b FROM table1 UNION ALL BY NAME SELECT a FROM table2", dialect="duckdb", ) def test_ddl_lineage(self) -> None: sql = """ INSERT /*+ HINT1 */ INTO target (x, y) SELECT subq.x, subq.y FROM ( SELECT /*+ HINT2 */ t.x AS x, TO_DATE('2023-12-19', 'YYYY-MM-DD') AS y FROM s.t t WHERE 1 = 1 AND y = TO_DATE('2023-12-19', 'YYYY-MM-DD') ) subq """ node = lineage("y", sql, dialect="oracle") self.assertEqual(node.name, "Y") self.assertEqual(node.expression.sql(dialect="oracle"), "SUBQ.Y AS Y") downstream = node.downstream[0] self.assertEqual(downstream.name, "SUBQ.Y") self.assertEqual( downstream.expression.sql(dialect="oracle"), "TO_DATE('2023-12-19', 'YYYY-MM-DD') AS Y", ) def test_trim(self) -> None: sql = """ SELECT a, b, c FROM (select a, b, c from y) z """ node = lineage("a", sql, trim_selects=False) self.assertEqual(node.name, "a") self.assertEqual( node.source.sql(), "SELECT z.a AS a, z.b AS b, z.c AS c FROM (SELECT y.a AS a, y.b AS b, y.c AS c FROM y AS y) AS z", ) downstream = node.downstream[0] self.assertEqual(downstream.name, "z.a") self.assertEqual(downstream.source.sql(), "SELECT y.a AS a, y.b AS b, y.c AS c FROM y AS y") def test_node_name_doesnt_contain_comment(self) -> None: sql = "SELECT * FROM (SELECT x /* c */ FROM t1) AS t2" node = lineage("x", sql) self.assertEqual(len(node.downstream), 1) self.assertEqual(len(node.downstream[0].downstream), 1) self.assertEqual(node.downstream[0].downstream[0].name, "t1.x") def test_pivot_without_alias(self) -> None: sql = """ SELECT a as other_a FROM (select value,category from sample_data) PIVOT ( sum(value) FOR category IN ('a', 'b') ); """ node = lineage("other_a", sql) self.assertEqual(node.downstream[0].name, "_0.value") self.assertEqual(node.downstream[0].downstream[0].name, "sample_data.value") def test_pivot_with_alias(self) -> None: sql = """ SELECT cat_a_s as other_as FROM sample_data PIVOT ( sum(value) as s, max(price) FOR category IN ('a' as cat_a, 'b') ) """ node = lineage("other_as", sql) self.assertEqual(len(node.downstream), 1) self.assertEqual(node.downstream[0].name, "sample_data.value") def test_pivot_with_cte(self) -> None: sql = """ WITH t as ( SELECT a as other_a FROM sample_data PIVOT ( sum(value) FOR category IN ('a', 'b') ) ) select other_a from t """ node = lineage("other_a", sql) self.assertEqual(node.downstream[0].name, "t.other_a") self.assertEqual(node.downstream[0].reference_node_name, "t") self.assertEqual(node.downstream[0].downstream[0].name, "sample_data.value") def test_pivot_with_implicit_column_of_pivoted_source(self) -> None: sql = """ SELECT empid FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ( '2023_Q1', '2023_Q2', '2023_Q3')) ORDER BY empid; """ node = lineage("empid", sql) self.assertEqual(node.downstream[0].name, "quarterly_sales.empid") def test_pivot_with_implicit_column_of_pivoted_source_and_cte(self) -> None: sql = """ WITH t as ( SELECT empid FROM quarterly_sales PIVOT(SUM(amount) FOR quarter IN ( '2023_Q1', '2023_Q2', '2023_Q3')) ) select empid from t """ node = lineage("empid", sql) self.assertEqual(node.downstream[0].name, "t.empid") self.assertEqual(node.downstream[0].reference_node_name, "t") self.assertEqual(node.downstream[0].downstream[0].name, "quarterly_sales.empid") def test_table_udtf_snowflake(self) -> None: lateral_flatten = """ SELECT f.value:external_id::string AS external_id FROM database_name.schema_name.table_name AS raw, LATERAL FLATTEN(events) AS f """ table_flatten = """ SELECT f.value:external_id::string AS external_id FROM database_name.schema_name.table_name AS raw JOIN TABLE(FLATTEN(events)) AS f """ lateral_node = lineage("external_id", lateral_flatten, dialect="snowflake") table_node = lineage("external_id", table_flatten, dialect="snowflake") self.assertEqual(lateral_node.name, "EXTERNAL_ID") self.assertEqual(table_node.name, "EXTERNAL_ID") lateral_node = lateral_node.downstream[0] table_node = table_node.downstream[0] self.assertEqual(lateral_node.name, "F.VALUE") self.assertEqual( lateral_node.source.sql("snowflake"), "LATERAL FLATTEN(RAW.EVENTS) AS F(SEQ, KEY, PATH, INDEX, VALUE, THIS)", ) self.assertEqual(table_node.name, "F.VALUE") self.assertEqual(table_node.source.sql("snowflake"), "TABLE(FLATTEN(RAW.EVENTS)) AS F") lateral_node = lateral_node.downstream[0] table_node = table_node.downstream[0] self.assertEqual(lateral_node.name, "RAW.EVENTS") self.assertEqual( lateral_node.source.sql("snowflake"), "DATABASE_NAME.SCHEMA_NAME.TABLE_NAME AS RAW", ) self.assertEqual(table_node.name, "RAW.EVENTS") self.assertEqual( table_node.source.sql("snowflake"), "DATABASE_NAME.SCHEMA_NAME.TABLE_NAME AS RAW", ) def test_pivot_with_subquery(self) -> None: schema = { "loan_ledger": { "product_type": "varchar", "month": "date", "loan_id": "int", } } sql = """ WITH cte AS ( SELECT * FROM ( SELECT product_type, month, loan_id FROM loan_ledger ) PIVOT ( COUNT(loan_id) FOR month IN ('2024-10', '2024-11') ) ) SELECT cte.product_type AS product_type, cte."2024-10" AS "2024-10" FROM cte """ node = lineage("product_type", sql, dialect="duckdb", schema=schema) self.assertEqual(node.downstream[0].name, "cte.product_type") self.assertEqual(node.downstream[0].downstream[0].name, "_0.product_type") self.assertEqual( node.downstream[0].downstream[0].downstream[0].name, "loan_ledger.product_type", ) node = lineage('"2024-10"', sql, dialect="duckdb", schema=schema) self.assertEqual(node.downstream[0].name, "cte.2024-10") self.assertEqual(node.downstream[0].downstream[0].name, "_0.loan_id") self.assertEqual(node.downstream[0].downstream[0].downstream[0].name, "loan_ledger.loan_id") def test_copy_flag(self) -> None: schema = { "x": { "a": "int", }, } query = sqlglot.parse_one("SELECT a FROM z") sources = { "y": sqlglot.parse_one("SELECT * FROM x"), "z": sqlglot.parse_one("SELECT * FROM y"), } lineage("a", query, schema=schema, sources=sources, copy=False) self.assertEqual(sources["y"].sql(), "SELECT * FROM x") self.assertEqual(sources["z"].sql(), "SELECT * FROM y") self.assertEqual( query.sql(), "SELECT z.a AS a FROM (SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y /* source: y */) AS z /* source: z */", ) query = sqlglot.parse_one("SELECT a FROM z") sources = { "y": sqlglot.parse_one("SELECT * FROM x"), "z": sqlglot.parse_one("SELECT * FROM y"), } lineage("a", query, schema=schema, sources=sources, copy=True) self.assertEqual(sources["y"].sql(), "SELECT * FROM x") self.assertEqual(sources["z"].sql(), "SELECT * FROM y") self.assertEqual(query.sql(), "SELECT a FROM z") query = sqlglot.parse_one("SELECT a FROM z") sources = { "y": sqlglot.parse_one("SELECT * FROM x"), "z": sqlglot.parse_one("SELECT * FROM y"), } query = sqlglot.parse_one("SELECT a FROM x") lineage("a", query, schema=schema, copy=False) self.assertEqual(query.sql(), "SELECT x.a AS a FROM x AS x") query = sqlglot.parse_one("SELECT a FROM x") lineage("a", query, schema=schema, copy=True) self.assertEqual(query.sql(), "SELECT a FROM x") def test_lineage_shared_cte_performance(self) -> None: """Shared CTEs referenced from multiple places should not cause exponential expansion. Each cte_k joins cte_{k-1} with itself and references both sides (t1.a + t2.a), so without memoization to_node() is called 2^N times. With N=12 that's 4096 expansions. """ n_levels = 12 ctes = ["cte_0 AS (SELECT a FROM base_table)"] for k in range(1, n_levels): prev = f"cte_{k - 1}" ctes.append( f"cte_{k} AS (SELECT t1.a + t2.a AS a FROM {prev} t1 JOIN {prev} t2 ON t1.a = t2.a)" ) sql = "WITH " + ",\n ".join(ctes) + f"\nSELECT a FROM cte_{n_levels - 1}" node = lineage("a", sql, schema={"base_table": {"a": "int"}}) # Walk the DAG and verify structure. all_nodes = list(node.walk()) # shared references keep node count small (O(N), not O(2^N)). self.assertLess( len(all_nodes), 200, f"got {len(all_nodes)} nodes -- DAG walk may be broken", ) # walk() should yield each node exactly once. all_ids = [id(n) for n in all_nodes] self.assertEqual(len(all_ids), len(set(all_ids))) # Leaf nodes should reference base_table. leaves = [n for n in all_nodes if not n.downstream] self.assertGreater(len(leaves), 0) self.assertTrue(all("base_table" in n.source.sql() for n in leaves)) def test_lineage_cte_self_join_distinct_aliases(self) -> None: node = lineage( "combined", "WITH shared AS (SELECT a FROM x) SELECT s1.a + s2.a AS combined FROM shared s1, shared s2", schema={"x": {"a": "int"}}, ) downstream_names = sorted(d.name for d in node.downstream) self.assertEqual(downstream_names, ["s1.a", "s2.a"]) ================================================ FILE: tests/test_optimizer.py ================================================ import unittest from concurrent.futures import ProcessPoolExecutor, as_completed from functools import partial from unittest.mock import patch import duckdb from pandas.testing import assert_frame_equal import sqlglot from sqlglot import exp, optimizer, parse_one from sqlglot.errors import ANSI_RESET, ANSI_UNDERLINE, OptimizeError, SchemaError from sqlglot.optimizer.annotate_types import annotate_types from sqlglot.optimizer.normalize import normalization_distance from sqlglot.optimizer.scope import build_scope, traverse_scope, walk_in_scope from sqlglot.schema import MappingSchema from tests.helpers import ( TPCDS_SCHEMA, TPCH_SCHEMA, assert_logger_contains, load_sql_fixture_pairs, load_sql_fixtures, string_to_bool, ) def parse_and_optimize(func, sql, read_dialect, **kwargs): return func(parse_one(sql, read=read_dialect), **kwargs) def qualify_columns(expression, validate_qualify_columns=True, **kwargs): expression = optimizer.qualify.qualify( expression, infer_schema=True, validate_qualify_columns=validate_qualify_columns, identify=False, **kwargs, ) return expression def pushdown_projections(expression, **kwargs): expression = optimizer.qualify_tables.qualify_tables(expression) expression = optimizer.qualify_columns.qualify_columns(expression, infer_schema=True, **kwargs) expression = optimizer.pushdown_projections.pushdown_projections(expression, **kwargs) return expression def normalize(expression, **kwargs): schema = kwargs.get("schema") expression = optimizer.normalize.normalize(expression, dnf=False) expression = annotate_types(expression, schema=schema) return optimizer.simplify.simplify(expression) def simplify(expression, **kwargs): dialect = kwargs.get("dialect") schema = kwargs.get("schema") expression = annotate_types(expression, schema=schema) return optimizer.simplify.simplify( expression, constant_propagation=True, coalesce_simplification=True, dialect=dialect ) def pushdown_ctes(expression, **kwargs): optimizer.qualify_columns.pushdown_cte_alias_columns(build_scope(expression)) return expression def annotate_functions(expression, **kwargs): dialect = kwargs.get("dialect") schema = kwargs.get("schema") annotated = annotate_types(expression, dialect=dialect, schema=schema) return annotated.expressions[0] class TestOptimizer(unittest.TestCase): maxDiff = None @classmethod def setUpClass(cls): sqlglot.schema = MappingSchema() cls.conn = duckdb.connect() cls.conn.execute( """ CREATE TABLE x (a INT, b INT); CREATE TABLE y (b INT, c INT); CREATE TABLE z (b INT, c INT); CREATE TABLE w (d TEXT, e TEXT); INSERT INTO x VALUES (1, 1); INSERT INTO x VALUES (2, 2); INSERT INTO x VALUES (2, 2); INSERT INTO x VALUES (3, 3); INSERT INTO x VALUES (null, null); INSERT INTO y VALUES (2, 2); INSERT INTO y VALUES (2, 2); INSERT INTO y VALUES (3, 3); INSERT INTO y VALUES (4, 4); INSERT INTO y VALUES (null, null); INSERT INTO y VALUES (3, 3); INSERT INTO y VALUES (3, 3); INSERT INTO y VALUES (4, 4); INSERT INTO y VALUES (5, 5); INSERT INTO y VALUES (null, null); INSERT INTO w VALUES ('a', 'b'); """ ) def setUp(self): self.schema = { "x": { "a": "INT", "b": "INT", }, "y": { "b": "INT", "c": "INT", }, "z": { "b": "INT", "c": "INT", }, "w": { "d": "TEXT", "e": "TEXT", }, "temporal": { "d": "DATE", "t": "DATETIME", }, "structs": { "one": "STRUCT", "nested_0": "STRUCT>>", "quoted": 'STRUCT<"foo bar" INT>', }, "t_bool": { "a": "BOOLEAN", }, } def check_file( self, file, func, pretty=False, execute=False, only=None, **kwargs, ): with ProcessPoolExecutor() as pool: results = {} for i, (meta, sql, expected) in enumerate( load_sql_fixture_pairs(f"optimizer/{file}.sql"), start=1 ): title = meta.get("title") or f"{i}, {sql}" if only and title != only: continue dialect = meta.get("dialect") leave_tables_isolated = meta.get("leave_tables_isolated") validate_qualify_columns = meta.get("validate_qualify_columns") canonicalize_table_aliases = meta.get("canonicalize_table_aliases") func_kwargs = kwargs.copy() if leave_tables_isolated is not None: func_kwargs["leave_tables_isolated"] = string_to_bool(leave_tables_isolated) if validate_qualify_columns is not None: func_kwargs["validate_qualify_columns"] = string_to_bool( validate_qualify_columns ) if dialect: func_kwargs["dialect"] = dialect if canonicalize_table_aliases is not None: func_kwargs["canonicalize_table_aliases"] = string_to_bool( canonicalize_table_aliases ) future = pool.submit(parse_and_optimize, func, sql, dialect, **func_kwargs) results[future] = ( sql, title, expected, dialect, execute if meta.get("execute") is None else False, ) for future in as_completed(results): sql, title, expected, dialect, execute = results[future] with self.subTest(title): optimized = future.result() actual = optimized.sql(pretty=pretty, dialect=dialect) self.assertEqual( expected, actual, ) for expression in optimized.walk(): for arg_key, arg in expression.args.items(): if isinstance(arg, exp.Expr): self.assertEqual(arg_key, arg.arg_key) self.assertIs(arg.parent, expression) if string_to_bool(execute): with self.subTest(f"(execute) {title}"): df1 = self.conn.execute( sqlglot.transpile(sql, read=dialect, write="duckdb")[0] ).df() df2 = self.conn.execute(optimized.sql(dialect="duckdb")).df() assert_frame_equal(df1, df2) @patch("sqlglot.generator.logger") def test_optimize(self, logger): self.assertEqual(optimizer.optimize("x = 1 + 1", identify=False).sql(), "x = 2") schema = { "x": {"a": "INT", "b": "INT"}, "y": {"b": "INT", "c": "INT"}, "z": {"a": "INT", "c": "INT"}, "u": {"f": "INT", "g": "INT", "h": "TEXT"}, } self.check_file( "optimizer", optimizer.optimize, infer_schema=True, pretty=True, execute=True, schema=schema, ) def test_isolate_table_selects(self): self.check_file( "isolate_table_selects", optimizer.isolate_table_selects.isolate_table_selects, schema=self.schema, ) def test_qualify_tables(self): tables = set() optimizer.qualify.qualify( parse_one("with foo AS (select * from bar) select * from foo join baz"), qualify_columns=False, on_qualify=lambda t: tables.add(t.name), ) self.assertEqual(tables, {"bar", "baz"}) self.assertEqual( optimizer.qualify.qualify( parse_one("WITH tesT AS (SELECT * FROM t1) SELECT * FROM test", "bigquery"), db="db", catalog="catalog", dialect="bigquery", quote_identifiers=False, ).sql("bigquery"), "WITH test AS (SELECT * FROM catalog.db.t1 AS t1) SELECT * FROM test AS test", ) self.assertEqual( optimizer.qualify_tables.qualify_tables( parse_one( "WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y'))" ), db="db", catalog="catalog", ).sql(), "WITH cte AS (SELECT * FROM catalog.db.t AS t) SELECT * FROM cte AS cte PIVOT(SUM(c) FOR v IN ('x', 'y')) AS _0", ) self.assertEqual( optimizer.qualify_tables.qualify_tables( parse_one( "WITH cte AS (SELECT * FROM t) SELECT * FROM cte PIVOT(SUM(c) FOR v IN ('x', 'y')) AS pivot_alias" ), db="db", catalog="catalog", ).sql(), "WITH cte AS (SELECT * FROM catalog.db.t AS t) SELECT * FROM cte AS cte PIVOT(SUM(c) FOR v IN ('x', 'y')) AS pivot_alias", ) self.assertEqual( optimizer.qualify_tables.qualify_tables( parse_one("select a from b"), catalog="catalog" ).sql(), "SELECT a FROM b AS b", ) self.assertEqual( optimizer.qualify_tables.qualify_tables(parse_one("select a from b"), db='"DB"').sql(), 'SELECT a FROM "DB".b AS b', ) self.check_file( "qualify_tables", optimizer.qualify_tables.qualify_tables, db="db", catalog="c", ) def test_normalize(self): self.assertEqual( optimizer.normalize.normalize( parse_one("x AND (y OR z)"), dnf=True, ).sql(), "(x AND y) OR (x AND z)", ) self.assertEqual( optimizer.normalize.normalize( parse_one("x AND (y OR z)"), ).sql(), "x AND (y OR z)", ) self.check_file("normalize", normalize, schema=self.schema) @patch("sqlglot.generator.logger") def test_qualify_columns(self, logger): self.assertEqual( optimizer.qualify.qualify( parse_one( """ SELECT Teams.Name, count(*) FROM raw.TeamMemberships as TeamMemberships join raw.Teams on Teams.Id = TeamMemberships.TeamId GROUP BY 1 """, read="bigquery", ), schema={ "raw": { "TeamMemberships": { "Id": "INTEGER", "UserId": "INTEGER", "TeamId": "INTEGER", }, "Teams": { "Id": "INTEGER", "Name": "STRING", }, } }, dialect="bigquery", ).sql(dialect="bigquery"), "SELECT `teams`.`name` AS `name`, count(*) AS `_col_1` FROM `raw`.`TeamMemberships` AS `teammemberships` JOIN `raw`.`Teams` AS `teams` ON `teams`.`id` = `teammemberships`.`teamid` GROUP BY `teams`.`name`", ) self.assertEqual( optimizer.qualify.qualify( parse_one( "SELECT `my_db.my_table`.`my_column` FROM `my_db.my_table`", read="bigquery", ), dialect="bigquery", ).sql(dialect="bigquery"), "SELECT `my_table`.`my_column` AS `my_column` FROM `my_db.my_table` AS `my_table`", ) self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one( "WITH RECURSIVE t AS (SELECT 1 AS x UNION ALL SELECT x + 1 FROM t AS child WHERE x < 10) SELECT * FROM t" ), schema={}, infer_schema=False, ).sql(), "WITH RECURSIVE t AS (SELECT 1 AS x UNION ALL SELECT child.x + 1 AS _col_0 FROM t AS child WHERE child.x < 10) SELECT t.x AS x FROM t", ) self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one("WITH x AS (SELECT a FROM db.y) SELECT * FROM db.x"), schema={"db": {"x": {"z": "int"}, "y": {"a": "int"}}}, expand_stars=False, ).sql(), "WITH x AS (SELECT y.a AS a FROM db.y) SELECT * FROM db.x", ) self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one("WITH x AS (SELECT a FROM db.y) SELECT z FROM db.x"), schema={"db": {"x": {"z": "int"}, "y": {"a": "int"}}}, infer_schema=False, ).sql(), "WITH x AS (SELECT y.a AS a FROM db.y) SELECT x.z AS z FROM db.x", ) self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one("select y from x"), schema={}, infer_schema=False, ).sql(), "SELECT y AS y FROM x", ) self.assertEqual( optimizer.qualify.qualify( parse_one( "WITH X AS (SELECT Y.A FROM DB.y CROSS JOIN a.b.INFORMATION_SCHEMA.COLUMNS) SELECT `A` FROM X", read="bigquery", ), dialect="bigquery", ).sql(), 'WITH "x" AS (SELECT "y"."a" AS "a" FROM "DB"."y" AS "y" CROSS JOIN "a"."b"."INFORMATION_SCHEMA.COLUMNS" AS "columns") SELECT "x"."a" AS "a" FROM "x" AS "x"', ) self.assertEqual( optimizer.qualify.qualify( parse_one( "CREATE FUNCTION udfs.`myTest`(`x` FLOAT64) AS (1)", read="bigquery", ), dialect="bigquery", ).sql(dialect="bigquery"), "CREATE FUNCTION `udfs`.`myTest`(`x` FLOAT64) AS (1)", ) self.assertEqual( optimizer.qualify.qualify( parse_one("SELECT `bar_bazfoo_$id` FROM test", read="spark"), schema={"test": {"bar_bazFoo_$id": "BIGINT"}}, dialect="spark", ).sql(dialect="spark"), "SELECT `test`.`bar_bazfoo_$id` AS `bar_bazfoo_$id` FROM `test` AS `test`", ) qualified = optimizer.qualify.qualify( parse_one("WITH t AS (SELECT 1 AS c) (SELECT c FROM t)") ) self.assertIs(qualified.selects[0].parent, qualified) self.assertEqual( qualified.sql(), 'WITH "t" AS (SELECT 1 AS "c") SELECT "t"."c" AS "c" FROM "t" AS "t"', ) # can't coalesce USING columns because they don't exist in every already-joined table self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one( "SELECT id, dt, v FROM (SELECT t1.id, t1.dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp USING (id) LEFT JOIN t2 AS t2 USING (other_id, dt, common) WHERE t1.id > 10 GROUP BY 1, 2) AS `_0`", dialect="bigquery", ), schema=MappingSchema( schema={ "t1": {"id": "int64", "dt": "date", "common": "int64"}, "lkp": {"id": "int64", "other_id": "int64", "common": "int64"}, "t2": {"other_id": "int64", "dt": "date", "v": "int64", "common": "int64"}, }, dialect="bigquery", ), ).sql(dialect="bigquery"), "SELECT `_0`.id AS id, `_0`.dt AS dt, `_0`.v AS v FROM (SELECT t1.id AS id, t1.dt AS dt, sum(coalesce(t2.v, 0)) AS v FROM t1 AS t1 LEFT JOIN lkp AS lkp ON t1.id = lkp.id LEFT JOIN t2 AS t2 ON lkp.other_id = t2.other_id AND t1.dt = t2.dt AND COALESCE(t1.common, lkp.common) = t2.common WHERE t1.id > 10 GROUP BY t1.id, t1.dt) AS `_0`", ) # Detection of correlation where columns are referenced in derived tables nested within subqueries self.assertEqual( optimizer.qualify.qualify( parse_one( "SELECT a.g FROM a WHERE a.e < (SELECT MAX(u) FROM (SELECT SUM(c.b) AS u FROM c WHERE c.d = f GROUP BY c.e) w)" ), schema={ "a": {"g": "INT", "e": "INT", "f": "INT"}, "c": {"d": "INT", "e": "INT", "b": "INT"}, }, quote_identifiers=False, ).sql(), "SELECT a.g AS g FROM a AS a WHERE a.e < (SELECT MAX(w.u) AS _col_0 FROM (SELECT SUM(c.b) AS u FROM c AS c WHERE c.d = a.f GROUP BY c.e) AS w)", ) # Detection of correlation where columns are referenced in derived tables nested within lateral joins self.assertEqual( optimizer.qualify.qualify( parse_one( "SELECT u.user_id, l.log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date FROM (SELECT l.log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l", dialect="postgres", ), schema={ "users": {"user_id": "text", "log_date": "date"}, "logs": {"user_id": "text", "log_date": "date"}, }, quote_identifiers=False, ).sql("postgres"), "SELECT u.user_id AS user_id, l.log_date AS log_date FROM users AS u CROSS JOIN LATERAL (SELECT l1.log_date AS log_date FROM (SELECT l.log_date AS log_date FROM logs AS l WHERE l.user_id = u.user_id AND l.log_date <= 100 ORDER BY l.log_date LIMIT 1) AS l1) AS l", ) self.assertEqual( optimizer.qualify.qualify( parse_one( "SELECT A.b_id FROM A JOIN B ON A.b_id=B.b_id JOIN C USING(c_id)", dialect="postgres", ), schema={ "A": {"b_id": "int"}, "B": {"b_id": "int", "c_id": "int"}, "C": {"c_id": "int"}, }, quote_identifiers=False, ).sql("postgres"), "SELECT a.b_id AS b_id FROM a AS a JOIN b AS b ON a.b_id = b.b_id JOIN c AS c ON b.c_id = c.c_id", ) self.assertEqual( optimizer.qualify.qualify( parse_one( "SELECT A.b_id FROM A JOIN B ON A.b_id=B.b_id JOIN C ON B.b_id = C.b_id JOIN D USING(d_id)", dialect="postgres", ), schema={ "A": {"b_id": "int"}, "B": {"b_id": "int", "d_id": "int"}, "C": {"b_id": "int"}, "D": {"d_id": "int"}, }, quote_identifiers=False, ).sql("postgres"), "SELECT a.b_id AS b_id FROM a AS a JOIN b AS b ON a.b_id = b.b_id JOIN c AS c ON b.b_id = c.b_id JOIN d AS d ON b.d_id = d.d_id", ) self.assertEqual( optimizer.qualify.qualify( parse_one( """ SELECT (SELECT SUM(c.amount) FROM UNNEST(credits) AS c WHERE type != 'promotion') as total FROM billing """, read="bigquery", ), schema={"billing": {"credits": "ARRAY>"}}, dialect="bigquery", ).sql(dialect="bigquery"), "SELECT (SELECT SUM(`c`.`amount`) AS `_col_0` FROM UNNEST(`billing`.`credits`) AS `c` WHERE `type` <> 'promotion') AS `total` FROM `billing` AS `billing`", ) self.assertEqual( optimizer.qualify.qualify( parse_one( """ WITH cte AS (SELECT * FROM base_table) SELECT (SELECT SUM(item.price) FROM UNNEST(items) AS item WHERE category = 'electronics') as electronics_total FROM cte """, read="bigquery", ), schema={ "base_table": { "id": "INT64", "items": "ARRAY>", } }, dialect="bigquery", ).sql(dialect="bigquery"), "WITH `cte` AS (SELECT `base_table`.`id` AS `id`, `base_table`.`items` AS `items` FROM `base_table` AS `base_table`) SELECT (SELECT SUM(`item`.`price`) AS `_col_0` FROM UNNEST(`cte`.`items`) AS `item` WHERE `category` = 'electronics') AS `electronics_total` FROM `cte` AS `cte`", ) self.check_file( "qualify_columns", qualify_columns, execute=True, schema=self.schema, ) self.check_file("qualify_columns_ddl", qualify_columns, schema=self.schema) self.assertEqual( optimizer.qualify.qualify( parse_one( """ SELECT ( SELECT col_st.value FROM UNNEST(col_st) AS col_st ) AS vcol1 FROM t AS b """, read="bigquery", ), schema={ "t": { "col_st": "ARRAY>", } }, dialect="bigquery", ).sql(dialect="bigquery"), "SELECT (SELECT `col_st`.`value` AS `value` FROM UNNEST(`b`.`col_st`) AS `col_st`) AS `vcol1` FROM `t` AS `b`", ) # Schema-qualified table joined twice (once unaliased, once aliased) should resolve correctly self.assertEqual( optimizer.qualify.qualify( parse_one( "SELECT 1 FROM dbo.a JOIN dbo.b ON dbo.b.id = dbo.a.id JOIN dbo.b AS x ON x.id = dbo.a.id" ), ).sql(), 'SELECT 1 AS "1" FROM "dbo"."a" AS "a" JOIN "dbo"."b" AS "b" ON "b"."id" = "a"."id" JOIN "dbo"."b" AS "x" ON "x"."id" = "a"."id"', ) def test_validate_columns(self): with self.assertRaisesRegex( OptimizeError, "Column 'foo' could not be resolved. Line: 1, Col: 10" ): optimizer.qualify.qualify( parse_one("select foo from x"), schema={"foo": {"y": "int"}}, ) # Test ambiguous columns error with PIVOT (which skips "could not be resolved" check) with self.assertRaisesRegex(OptimizeError, "Ambiguous column 'a'"): expression = parse_one( "SELECT * FROM (SELECT a, b, c FROM x) PIVOT (SUM(b) FOR c IN ('x', 'y'))" ) qualified = optimizer.qualify_columns.qualify_columns( expression, schema={"x": {"a": "int", "b": "int", "c": "str"}} ) optimizer.qualify_columns.validate_qualify_columns(qualified) def test_qualify_columns__with_invisible(self): schema = MappingSchema(self.schema, {"x": {"a"}, "y": {"b"}, "z": {"b"}}) self.check_file("qualify_columns__with_invisible", qualify_columns, schema=schema) def test_pushdown_cte_alias_columns(self): self.check_file( "pushdown_cte_alias_columns", pushdown_ctes, ) def test_qualify_columns__invalid(self): for sql in load_sql_fixtures("optimizer/qualify_columns__invalid.sql"): with self.subTest(sql): with self.assertRaises((OptimizeError, SchemaError)): expression = optimizer.qualify_columns.qualify_columns( parse_one(sql), schema=self.schema ) optimizer.qualify_columns.validate_qualify_columns(expression) # this makes sure the fallback scenario in get_table in resolver is covered # and the error message is column cannot be resolved instead of unknown table sql = """ SELECT INLINE_VIEW.a AS ACCOUNT FROM ( ( SELECT a FROM table1 ) inline_view LEFT JOIN table2 ON a = table2.id ) LEFT JOIN table3 ON inline_view.a = table3.a """ with self.assertRaises(OptimizeError) as ctx: schema = MappingSchema() schema.add_table("table3", ["a"]) expression = optimizer.qualify_columns.qualify_columns(parse_one(sql), schema=schema) optimizer.qualify_columns.validate_qualify_columns(expression) error_msg = str(ctx.exception) self.assertIn("Column 'a' could not be resolved", error_msg) def test_optimize_error_highlighting(self): # highlighting works with sql parameter sql = "SELECT nonexistent FROM x" with self.assertRaises(OptimizeError) as ctx: optimizer.optimize(sql, schema=self.schema, sql=sql) error_msg = str(ctx.exception) self.assertIn("Column 'nonexistent' could not be resolved", error_msg) self.assertIn(f"{ANSI_UNDERLINE}nonexistent{ANSI_RESET}", error_msg) # no highlighting when sql is None sql = "SELECT nonexistent FROM x" with self.assertRaises(OptimizeError) as ctx: optimizer.optimize(sql, schema=self.schema, sql=None) error_msg = str(ctx.exception) self.assertIn("Column 'nonexistent' could not be resolved", error_msg) self.assertNotIn(f"{ANSI_UNDERLINE}nonexistent{ANSI_RESET}", error_msg) def test_normalize_identifiers(self): self.check_file( "normalize_identifiers", optimizer.normalize_identifiers.normalize_identifiers, ) self.assertEqual(optimizer.normalize_identifiers.normalize_identifiers("a%").sql(), '"a%"') def test_quote_identifiers(self): self.check_file( "quote_identifiers", optimizer.qualify_columns.quote_identifiers, ) def test_pushdown_projection(self): self.check_file("pushdown_projections", pushdown_projections, schema=self.schema) def test_simplify(self): self.check_file("simplify", simplify, schema=self.schema) # Stress test with huge union query union_sql = "SELECT 1 UNION ALL " * 1000 + "SELECT 1" expression = parse_one(union_sql) self.assertEqual(optimizer.simplify.simplify(expression).sql(), union_sql) # Ensure simplify mutates the AST properly expression = parse_one("SELECT 1 + 2") simplify(expression.selects[0]) self.assertEqual(expression.sql(), "SELECT 3") expression = parse_one("SELECT a, c, b FROM table1 WHERE 1 = 1") self.assertEqual(simplify(simplify(expression.find(exp.Where))).sql(), "WHERE TRUE") expression = parse_one("TRUE AND TRUE AND TRUE") self.assertEqual(exp.true(), optimizer.simplify.simplify(expression)) # CONCAT in (e.g.) Presto is parsed as Concat instead of SafeConcat which is the default type # This test checks that simplify_concat preserves the corresponding expression types. concat = parse_one("CONCAT('a', x, 'b', 'c')", read="presto") simplified_concat = optimizer.simplify.simplify(concat) safe_concat = parse_one("CONCAT('a', x, 'b', 'c')") simplified_safe_concat = optimizer.simplify.simplify(safe_concat) self.assertEqual(simplified_concat.args["safe"], False) self.assertEqual(simplified_safe_concat.args["safe"], True) self.assertEqual("CONCAT('a', x, 'bc')", simplified_concat.sql(dialect="presto")) self.assertEqual("CONCAT('a', x, 'bc')", simplified_safe_concat.sql()) anon_unquoted_str = parse_one("anonymous(x, y)") self.assertEqual(optimizer.simplify.gen(anon_unquoted_str), "ANONYMOUS(x,y)") query = parse_one("SELECT x FROM t") self.assertEqual(optimizer.simplify.gen(query), optimizer.simplify.gen(query.copy())) anon_unquoted_identifier = exp.Anonymous( this=exp.to_identifier("anonymous"), expressions=[exp.column("x"), exp.column("y")], ) self.assertEqual(optimizer.simplify.gen(anon_unquoted_identifier), "ANONYMOUS(x,y)") anon_quoted = parse_one('"anonymous"(x, y)') self.assertEqual(optimizer.simplify.gen(anon_quoted), '"anonymous"(x,y)') with self.assertRaises(ValueError) as e: anon_invalid = exp.Anonymous(this=5) optimizer.simplify.gen(anon_invalid) self.assertIn( "Anonymous.this expects a str or an Identifier, got 'int'.", str(e.exception), ) sql = parse_one( """ WITH cte AS (select 1 union select 2), cte2 AS ( SELECT ROW() OVER (PARTITION BY y) FROM ( (select 1) limit 10 ) ) SELECT *, a + 1, a div 1, filter("B", (x, y) -> x + y) FROM (z AS z CROSS JOIN z) AS f(a) LEFT JOIN a.b.c.d.e.f.g USING(n) ORDER BY 1 """ ) self.assertEqual( optimizer.simplify.gen(sql), """ SELECT :with_,WITH :expressions,CTE :this,UNION :this,SELECT :expressions,1,:expression,SELECT :expressions,2,:distinct,True,:alias, AS cte,CTE :this,SELECT :expressions,WINDOW :this,ROW(),:partition_by,y,:over,OVER,:from_,FROM ((SELECT :expressions,1):limit,LIMIT :expression,10),:alias, AS cte2,:expressions,STAR,a + 1,a DIV 1,FILTER("B",LAMBDA :this,x + y,:expressions,x,y),:from_,FROM (z AS z:joins,JOIN :this,z,:kind,CROSS) AS f(a),:joins,JOIN :this,a.b.c.d.e.f.g,:side,LEFT,:using,n,:order,ORDER :expressions,ORDERED :this,1,:nulls_first,True """.strip(), ) self.assertEqual( optimizer.simplify.gen(parse_one("select item_id /* description */"), comments=True), "SELECT :expressions,item_id /* description */", ) def test_simplify_nested(self): sql = """ SELECT x, 1 + 1 FROM foo WHERE x > (((select x + 1 + 1, sum(y + 1 + 1) FROM bar GROUP BY x + 1 + 1))) """ self.assertEqual( parse_one(""" SELECT x, 2 FROM foo WHERE x > ((( select x + 1 + 1, sum(y + 2) FROM bar GROUP BY x + 1 + 1 ))) """).sql(pretty=True), optimizer.simplify.simplify(parse_one(sql)).sql(pretty=True), ) def test_unnest_subqueries(self): self.check_file("unnest_subqueries", optimizer.unnest_subqueries.unnest_subqueries) def test_pushdown_predicates(self): self.check_file("pushdown_predicates", optimizer.pushdown_predicates.pushdown_predicates) def test_expand_alias_refs(self): # check negative integer literal as group by column self.assertEqual( optimizer.optimize("SELECT -99 AS e GROUP BY e").sql(), 'SELECT -99 AS "e" GROUP BY 1', ) # check order of lateral expansion with no schema self.assertEqual( optimizer.optimize("SELECT a + 1 AS d, d + 1 AS e FROM x WHERE e > 1 GROUP BY e").sql(), 'SELECT "x"."a" + 1 AS "d", "x"."a" + 1 + 1 AS "e" FROM "x" AS "x" WHERE ("x"."a" + 2) > 1 GROUP BY "x"."a" + 1 + 1', ) unused_schema = {"l": {"c": "int"}} self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one("SELECT CAST(x AS INT) AS y FROM z AS z"), schema=unused_schema, infer_schema=False, ).sql(), "SELECT CAST(x AS INT) AS y FROM z AS z", ) # BigQuery expands overlapping alias only for GROUP BY + HAVING sql = "WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT id AS my_id, CONCAT(id, name) AS full_name FROM data WHERE my_id = 1 GROUP BY my_id, full_name HAVING my_id = 1" self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one(sql, dialect="bigquery"), schema=MappingSchema(schema=unused_schema, dialect="bigquery"), ).sql(), "WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT data.id AS my_id, CONCAT(data.id, data.name) AS full_name FROM data WHERE data.my_id = 1 GROUP BY data.id, CONCAT(data.id, data.name) HAVING data.id = 1", ) # Clickhouse expands overlapping alias across the entire query self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one(sql, dialect="clickhouse"), schema=MappingSchema(schema=unused_schema, dialect="clickhouse"), ).sql(), "WITH data AS (SELECT 1 AS id, 2 AS my_id, 'a' AS name, 'b' AS full_name) SELECT data.id AS my_id, CONCAT(data.id, data.name) AS full_name FROM data WHERE data.id = 1 GROUP BY data.id, CONCAT(data.id, data.name) HAVING data.id = 1", ) # Edge case: BigQuery shouldn't expand aliases in complex expressions sql = "WITH data AS (SELECT 1 AS id) SELECT FUNC(id) AS id FROM data GROUP BY FUNC(id)" self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one(sql, dialect="bigquery"), schema=MappingSchema(schema=unused_schema, dialect="bigquery"), ).sql(), "WITH data AS (SELECT 1 AS id) SELECT FUNC(data.id) AS id FROM data GROUP BY FUNC(data.id)", ) sql = "SELECT x.a, max(x.b) as x FROM x AS x GROUP BY 1 HAVING x > 1" self.assertEqual( optimizer.qualify_columns.qualify_columns( parse_one(sql, dialect="bigquery"), schema=MappingSchema(schema=unused_schema, dialect="bigquery"), ).sql(), "SELECT x.a AS a, MAX(x.b) AS x FROM x AS x GROUP BY 1 HAVING x > 1", ) def test_optimize_joins(self): self.check_file( "optimize_joins", optimizer.optimize_joins.optimize_joins, ) def test_eliminate_joins(self): self.check_file( "eliminate_joins", optimizer.eliminate_joins.eliminate_joins, pretty=True, ) def test_eliminate_ctes(self): self.check_file( "eliminate_ctes", optimizer.eliminate_ctes.eliminate_ctes, pretty=True, ) @patch("sqlglot.generator.logger") def test_merge_subqueries(self, logger): optimize = partial( optimizer.optimize, rules=[ optimizer.qualify_tables.qualify_tables, optimizer.qualify_columns.qualify_columns, optimizer.merge_subqueries.merge_subqueries, ], ) self.check_file("merge_subqueries", optimize, execute=True, schema=self.schema) def test_eliminate_subqueries(self): self.check_file("eliminate_subqueries", optimizer.eliminate_subqueries.eliminate_subqueries) def test_canonicalize(self): optimize = partial( optimizer.optimize, rules=[ optimizer.qualify.qualify, optimizer.qualify_columns.quote_identifiers, annotate_types, optimizer.canonicalize.canonicalize, ], ) self.check_file("canonicalize", optimize, schema=self.schema) # In T-SQL and Redshift, SELECT a + b can produce a NULL, so we can't transpile it # into a CONCAT in Postgres, because that coalesces NULL values with empty strings ast = optimize("SELECT CAST(a AS TEXT) + CAST(b AS TEXT) FROM t", dialect="tsql") self.assertEqual( ast.sql("postgres"), 'SELECT CAST("t"."a" AS TEXT) || CAST("t"."b" AS TEXT) AS "_col_0" FROM "t" AS "t"', ) def test_tpch(self): self.check_file("tpc-h/tpc-h", optimizer.optimize, schema=TPCH_SCHEMA, pretty=True) def test_tpcds(self): self.check_file("tpc-ds/tpc-ds", optimizer.optimize, schema=TPCDS_SCHEMA, pretty=True) def test_file_schema(self): self.assertEqual( optimizer.optimize( "SELECT * FROM foo", on_qualify=lambda table: table.replace(exp.to_table("bar")), ).sql(), 'SELECT * FROM "bar"', ) def test_scope(self): ast = parse_one("SELECT IF(a IN UNNEST(b), 1, 0) AS c FROM t", dialect="bigquery") self.assertEqual(build_scope(ast).columns, [exp.column("a"), exp.column("b")]) many_unions = parse_one(" UNION ALL ".join(["SELECT x FROM t"] * 10000)) scopes_using_traverse = list(build_scope(many_unions).traverse()) scopes_using_traverse_scope = traverse_scope(many_unions) self.assertEqual(len(scopes_using_traverse), len(scopes_using_traverse_scope)) assert all( x.expression is y.expression for x, y in zip(scopes_using_traverse, scopes_using_traverse_scope) ) sql = """ WITH q AS ( SELECT x.b FROM x ), r AS ( SELECT y.b FROM y ), z as ( SELECT cola, colb FROM (VALUES(1, 'test')) AS tab(cola, colb) ) SELECT r.b, s.b FROM r JOIN ( SELECT y.c AS b FROM y ) s ON s.b = r.b WHERE s.b > (SELECT MAX(x.a) FROM x WHERE x.b = s.b) """ expression = parse_one(sql) for scopes in traverse_scope(expression), list(build_scope(expression).traverse()): self.assertEqual(len(scopes), 7) self.assertEqual(scopes[0].expression.sql(), "SELECT x.b FROM x") self.assertEqual(scopes[1].expression.sql(), "SELECT y.b FROM y") self.assertEqual(scopes[2].expression.sql(), "(VALUES (1, 'test')) AS tab(cola, colb)") self.assertEqual( scopes[3].expression.sql(), "SELECT cola, colb FROM (VALUES (1, 'test')) AS tab(cola, colb)", ) self.assertEqual(scopes[4].expression.sql(), "SELECT y.c AS b FROM y") self.assertEqual(scopes[5].expression.sql(), "SELECT MAX(x.a) FROM x WHERE x.b = s.b") self.assertEqual(scopes[6].expression.sql(), parse_one(sql).sql()) self.assertEqual(set(scopes[6].sources), {"q", "z", "r", "s"}) self.assertEqual(len(scopes[6].columns), 6) self.assertEqual({c.table for c in scopes[6].columns}, {"r", "s"}) self.assertEqual(scopes[6].source_columns("q"), []) self.assertEqual(len(scopes[6].source_columns("r")), 2) self.assertEqual({c.table for c in scopes[6].source_columns("r")}, {"r"}) self.assertEqual({c.sql() for c in scopes[-1].find_all(exp.Column)}, {"r.b", "s.b"}) self.assertEqual(scopes[-1].find(exp.Column).sql(), "r.b") self.assertEqual({c.sql() for c in scopes[0].find_all(exp.Column)}, {"x.b"}) # Check that we can walk in scope from an arbitrary node self.assertEqual( { node.sql() for node in walk_in_scope(expression.find(exp.Where)) if isinstance(node, exp.Column) }, {"s.b"}, ) # Check that parentheses don't introduce a new scope unless an alias is attached sql = "SELECT * FROM (((SELECT * FROM (t1 JOIN t2) AS t3) JOIN (SELECT * FROM t4)))" expression = parse_one(sql) for scopes in traverse_scope(expression), list(build_scope(expression).traverse()): self.assertEqual(len(scopes), 4) self.assertEqual(scopes[0].expression.sql(), "t1, t2") self.assertEqual(set(scopes[0].sources), {"t1", "t2"}) self.assertEqual(scopes[1].expression.sql(), "SELECT * FROM (t1, t2) AS t3") self.assertEqual(set(scopes[1].sources), {"t3"}) self.assertEqual(scopes[2].expression.sql(), "SELECT * FROM t4") self.assertEqual(set(scopes[2].sources), {"t4"}) self.assertEqual( scopes[3].expression.sql(), "SELECT * FROM (((SELECT * FROM (t1, t2) AS t3), (SELECT * FROM t4)))", ) self.assertEqual(set(scopes[3].sources), {""}) inner_query = "SELECT bar FROM baz" for udtf in (f"UNNEST(({inner_query}))", f"LATERAL ({inner_query})"): sql = f"SELECT a FROM foo CROSS JOIN {udtf}" expression = parse_one(sql) for scopes in traverse_scope(expression), list(build_scope(expression).traverse()): self.assertEqual(len(scopes), 3) self.assertEqual(scopes[0].expression.sql(), inner_query) self.assertEqual(set(scopes[0].sources), {"baz"}) self.assertEqual(scopes[1].expression.sql(), udtf) self.assertEqual(set(scopes[1].sources), {"", "foo"}) # foo is a lateral source self.assertEqual(scopes[2].expression.sql(), f"SELECT a FROM foo CROSS JOIN {udtf}") self.assertEqual(set(scopes[2].sources), {"", "foo"}) # Check DML statement scopes sql = ( "UPDATE customers SET total_spent = (SELECT 1 FROM t1) WHERE EXISTS (SELECT 1 FROM t2)" ) self.assertEqual(len(traverse_scope(parse_one(sql))), 3) sql = "UPDATE tbl1 SET col = 1 WHERE EXISTS (SELECT 1 FROM tbl2 WHERE tbl1.id = tbl2.id)" self.assertEqual(len(traverse_scope(parse_one(sql))), 1) sql = "UPDATE tbl1 SET col = 0" self.assertEqual(len(traverse_scope(parse_one(sql))), 0) sql = "SELECT * FROM t LEFT JOIN UNNEST(a) AS a1 LEFT JOIN UNNEST(a1.a) AS a2" scope = build_scope(parse_one(sql, read="bigquery")) self.assertEqual(set(scope.selected_sources), {"t", "a1", "a2"}) @patch("sqlglot.optimizer.scope.logger") def test_scope_warning(self, logger): self.assertEqual(len(traverse_scope(parse_one("WITH q AS (@y) SELECT * FROM q"))), 1) assert_logger_contains( "Cannot traverse scope %s with type '%s'", logger, level="warning", ) def test_annotate_types(self): for i, (meta, sql, expected) in enumerate( load_sql_fixture_pairs("optimizer/annotate_types.sql"), start=1 ): title = meta.get("title") or f"{i}, {sql}" dialect = meta.get("dialect") result = parse_and_optimize(annotate_types, sql, dialect, dialect=dialect) with self.subTest(title): self.assertEqual( result.type.sql(dialect), exp.DataType.build(expected, dialect=dialect).sql(dialect), ) def test_annotate_funcs(self): test_schema = { "tbl": { "bin_col": "BINARY", "str_col": "STRING", "bignum_col": "BIGNUMERIC", "date_col": "DATE", "decfloat_col": "DECFLOAT", "float_col": "FLOAT", "timestamp_col": "TIMESTAMP", "double_col": "DOUBLE", "bigint_col": "BIGINT", "obj_col": "OBJECT", "int_col": "INT", "bool_col": "BOOLEAN", "bytes_col": "BYTES", "interval_col": "INTERVAL", "array_col": "ARRAY", } } for i, (meta, sql, expected) in enumerate( load_sql_fixture_pairs("optimizer/annotate_functions.sql"), start=1 ): title = meta.get("title") or f"{i}, {sql}" dialect = meta.get("dialect") or "" sql = f"SELECT {sql} FROM tbl" for dialect in dialect.split(", "): with self.subTest(title): result = parse_and_optimize( annotate_functions, sql, dialect, schema=test_schema, dialect=dialect ) self.assertEqual( result.type.sql(dialect), exp.DataType.build(expected, dialect=dialect).sql(dialect), ) def test_cast_type_annotation(self): expression = annotate_types(parse_one("CAST('2020-01-01' AS TIMESTAMPTZ(9))")) self.assertEqual(expression.type.this, exp.DataType.Type.TIMESTAMPTZ) self.assertEqual(expression.this.type.this, exp.DataType.Type.VARCHAR) self.assertEqual(expression.args["to"].type.this, exp.DataType.Type.TIMESTAMPTZ) self.assertEqual(expression.args["to"].expressions[0].this.type.this, exp.DataType.Type.INT) expression = annotate_types(parse_one("ARRAY(1)::ARRAY")) self.assertEqual(expression.type, parse_one("ARRAY", into=exp.DataType)) expression = annotate_types(parse_one("CAST(x AS INTERVAL)")) self.assertEqual(expression.type.this, exp.DataType.Type.INTERVAL) self.assertEqual(expression.this.type.this, exp.DataType.Type.UNKNOWN) self.assertEqual(expression.args["to"].type.this, exp.DataType.Type.INTERVAL) def test_cache_annotation(self): expression = annotate_types( parse_one("CACHE LAZY TABLE x OPTIONS('storageLevel' = 'value') AS SELECT 1") ) self.assertEqual(expression.expression.expressions[0].type.this, exp.DataType.Type.INT) def test_binary_annotation(self): expression = annotate_types(parse_one("SELECT 0.0 + (2 + 3)")).expressions[0] self.assertEqual(expression.type.this, exp.DataType.Type.DOUBLE) self.assertEqual(expression.left.type.this, exp.DataType.Type.DOUBLE) self.assertEqual(expression.right.type.this, exp.DataType.Type.INT) self.assertEqual(expression.right.this.type.this, exp.DataType.Type.INT) self.assertEqual(expression.right.this.left.type.this, exp.DataType.Type.INT) self.assertEqual(expression.right.this.right.type.this, exp.DataType.Type.INT) for numeric_type in ("BIGINT", "DOUBLE", "INT"): query = f"SELECT '1' + CAST(x AS {numeric_type})" expression = annotate_types(parse_one(query)).expressions[0] self.assertEqual(expression.type, exp.DataType.build(numeric_type)) def test_typeddiv_annotation(self): expressions = annotate_types( parse_one("SELECT 2 / 3, 2 / 3.0", dialect="presto") ).expressions self.assertEqual(expressions[0].type.this, exp.DataType.Type.BIGINT) self.assertEqual(expressions[1].type.this, exp.DataType.Type.DOUBLE) expressions = annotate_types( parse_one("SELECT SUM(2 / 3), CAST(2 AS DECIMAL) / 3", dialect="mysql") ).expressions self.assertEqual(expressions[0].type.this, exp.DataType.Type.DOUBLE) self.assertEqual(expressions[0].this.type.this, exp.DataType.Type.DOUBLE) self.assertEqual(expressions[1].type.this, exp.DataType.Type.DECIMAL) def test_bracket_annotation(self): expression = annotate_types(parse_one("SELECT A[:]")).expressions[0] self.assertEqual(expression.type.this, exp.DataType.Type.UNKNOWN) self.assertEqual(expression.expressions[0].type.this, exp.DataType.Type.UNKNOWN) expression = annotate_types(parse_one("SELECT ARRAY[1, 2, 3][1]")).expressions[0] self.assertEqual(expression.this.type.sql(), "ARRAY") self.assertEqual(expression.type.this, exp.DataType.Type.INT) expression = annotate_types(parse_one("SELECT ARRAY[1, 2, 3][1 : 2]")).expressions[0] self.assertEqual(expression.this.type.sql(), "ARRAY") self.assertEqual(expression.type.sql(), "ARRAY") expression = annotate_types( parse_one("SELECT ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]][1][2]") ).expressions[0] self.assertEqual(expression.this.this.type.sql(), "ARRAY>") self.assertEqual(expression.this.type.sql(), "ARRAY") self.assertEqual(expression.type.this, exp.DataType.Type.INT) expression = annotate_types( parse_one("SELECT ARRAY[ARRAY[1], ARRAY[2], ARRAY[3]][1:2]") ).expressions[0] self.assertEqual(expression.type.sql(), "ARRAY>") expression = annotate_types(parse_one("MAP(1.0, 2, '2', 3.0)['2']", read="spark")) self.assertEqual(expression.type.this, exp.DataType.Type.DOUBLE) expression = annotate_types(parse_one("MAP(1.0, 2, x, 3.0)[2]", read="spark")) self.assertEqual(expression.type.this, exp.DataType.Type.UNKNOWN) expression = annotate_types(parse_one("MAP(ARRAY(1.0, x), ARRAY(2, 3.0))[x]")) self.assertEqual(expression.type.this, exp.DataType.Type.DOUBLE) expression = annotate_types( parse_one("SELECT MAP(1.0, 2, 2, t.y)[2] FROM t", read="spark"), schema={"t": {"y": "int"}}, ).expressions[0] self.assertEqual(expression.type.this, exp.DataType.Type.INT) def test_interval_math_annotation(self): schema = { "x": { "a": "DATE", "b": "DATETIME", } } for sql, expected_type in [ ( "SELECT '2023-01-01' + INTERVAL '1' DAY", exp.DataType.Type.DATE, ), ( "SELECT '2023-01-01' + INTERVAL '1' HOUR", exp.DataType.Type.DATETIME, ), ( "SELECT '2023-01-01 00:00:01' + INTERVAL '1' HOUR", exp.DataType.Type.DATETIME, ), ("SELECT 'nonsense' + INTERVAL '1' DAY", exp.DataType.Type.UNKNOWN), ("SELECT x.a + INTERVAL '1' DAY FROM x AS x", exp.DataType.Type.DATE), ( "SELECT x.a + INTERVAL '1' HOUR FROM x AS x", exp.DataType.Type.DATETIME, ), ("SELECT x.b + INTERVAL '1' DAY FROM x AS x", exp.DataType.Type.DATETIME), ("SELECT x.b + INTERVAL '1' HOUR FROM x AS x", exp.DataType.Type.DATETIME), ( "SELECT DATE_ADD('2023-01-01', 1, 'DAY')", exp.DataType.Type.DATE, ), ( "SELECT DATE_ADD('2023-01-01 00:00:00', 1, 'DAY')", exp.DataType.Type.DATETIME, ), ("SELECT DATE_ADD(x.a, 1, 'DAY') FROM x AS x", exp.DataType.Type.DATE), ( "SELECT DATE_ADD(x.a, 1, 'HOUR') FROM x AS x", exp.DataType.Type.DATETIME, ), ("SELECT DATE_ADD(x.b, 1, 'DAY') FROM x AS x", exp.DataType.Type.DATETIME), ("SELECT DATE_TRUNC('DAY', x.a) FROM x AS x", exp.DataType.Type.DATE), ("SELECT DATE_TRUNC('DAY', x.b) FROM x AS x", exp.DataType.Type.DATETIME), ( "SELECT DATE_TRUNC('SECOND', x.a) FROM x AS x", exp.DataType.Type.DATETIME, ), ( "SELECT DATE_TRUNC('DAY', '2023-01-01') FROM x AS x", exp.DataType.Type.DATE, ), ( "SELECT DATEDIFF('2023-01-01', '2023-01-02', DAY) FROM x AS x", exp.DataType.Type.INT, ), ]: with self.subTest(sql): expression = annotate_types(parse_one(sql), schema=schema) self.assertEqual(expected_type, expression.expressions[0].type.this) self.assertEqual(sql, expression.sql()) def test_lateral_annotation(self): expression = optimizer.optimize( parse_one("SELECT c FROM (select 1 a) as x LATERAL VIEW EXPLODE (a) AS c") ).expressions[0] self.assertEqual(expression.type.this, exp.DataType.Type.INT) def test_derived_tables_column_annotation(self): schema = {"x": {"cola": "INT"}, "y": {"cola": "FLOAT"}} sql = """ SELECT a.cola AS cola FROM ( SELECT x.cola + y.cola AS cola FROM ( SELECT x.cola AS cola FROM x AS x ) AS x JOIN ( SELECT y.cola AS cola FROM y AS y ) AS y ) AS a """ expression = annotate_types(parse_one(sql), schema=schema) self.assertEqual( expression.expressions[0].type.this, exp.DataType.Type.FLOAT ) # a.cola AS cola addition_alias = expression.args["from_"].this.this.expressions[0] self.assertEqual( addition_alias.type.this, exp.DataType.Type.FLOAT ) # x.cola + y.cola AS cola addition = addition_alias.this self.assertEqual(addition.type.this, exp.DataType.Type.FLOAT) self.assertEqual(addition.this.type.this, exp.DataType.Type.INT) self.assertEqual(addition.expression.type.this, exp.DataType.Type.FLOAT) def test_cte_column_annotation(self): schema = {"x": {"cola": "CHAR"}, "y": {"colb": "TEXT", "colc": "BOOLEAN"}} sql = """ WITH tbl AS ( SELECT x.cola + 'bla' AS cola, y.colb AS colb, y.colc AS colc FROM ( SELECT x.cola AS cola FROM x AS x ) AS x JOIN ( SELECT y.colb AS colb, y.colc AS colc FROM y AS y ) AS y ) SELECT tbl.cola + tbl.colb + 'foo' AS col FROM tbl AS tbl WHERE tbl.colc = True """ expression = annotate_types(parse_one(sql), schema=schema) self.assertEqual( expression.expressions[0].type.this, exp.DataType.Type.TEXT ) # tbl.cola + tbl.colb + 'foo' AS col outer_addition = expression.expressions[0].this # (tbl.cola + tbl.colb) + 'foo' self.assertEqual(outer_addition.type.this, exp.DataType.Type.TEXT) self.assertEqual(outer_addition.left.type.this, exp.DataType.Type.TEXT) self.assertEqual(outer_addition.right.type.this, exp.DataType.Type.VARCHAR) inner_addition = expression.expressions[0].this.left # tbl.cola + tbl.colb self.assertEqual(inner_addition.left.type.this, exp.DataType.Type.VARCHAR) self.assertEqual(inner_addition.right.type.this, exp.DataType.Type.TEXT) # WHERE tbl.colc = True self.assertEqual(expression.args["where"].this.type.this, exp.DataType.Type.BOOLEAN) cte_select = expression.args["with_"].expressions[0].this self.assertEqual( cte_select.expressions[0].type.this, exp.DataType.Type.VARCHAR ) # x.cola + 'bla' AS cola self.assertEqual( cte_select.expressions[1].type.this, exp.DataType.Type.TEXT ) # y.colb AS colb self.assertEqual( cte_select.expressions[2].type.this, exp.DataType.Type.BOOLEAN ) # y.colc AS colc cte_select_addition = cte_select.expressions[0].this # x.cola + 'bla' self.assertEqual(cte_select_addition.type.this, exp.DataType.Type.VARCHAR) self.assertEqual(cte_select_addition.left.type.this, exp.DataType.Type.CHAR) self.assertEqual(cte_select_addition.right.type.this, exp.DataType.Type.VARCHAR) # Check that x.cola AS cola and y.colb AS colb have types CHAR and TEXT, respectively for d, t in zip( cte_select.find_all(exp.Subquery), [exp.DataType.Type.CHAR, exp.DataType.Type.TEXT], ): self.assertEqual(d.this.expressions[0].this.type.this, t) def test_function_annotation(self): schema = {"x": {"cola": "VARCHAR", "colb": "CHAR"}} sql = ( "SELECT x.cola || TRIM(x.colb) AS col, DATE(x.colb), DATEFROMPARTS(y, m, d) FROM x AS x" ) expression = annotate_types(parse_one(sql), schema=schema) concat_expr_alias = expression.expressions[0] self.assertEqual(concat_expr_alias.type.this, exp.DataType.Type.VARCHAR) concat_expr = concat_expr_alias.this self.assertEqual(concat_expr.type.this, exp.DataType.Type.VARCHAR) self.assertEqual(concat_expr.left.type.this, exp.DataType.Type.VARCHAR) # x.cola self.assertEqual(concat_expr.right.type.this, exp.DataType.Type.VARCHAR) # TRIM(x.colb) self.assertEqual(concat_expr.right.this.type.this, exp.DataType.Type.CHAR) # x.colb date_expr = expression.expressions[1] self.assertEqual(date_expr.type.this, exp.DataType.Type.DATE) date_expr = expression.expressions[2] self.assertEqual(date_expr.type.this, exp.DataType.Type.DATE) sql = "SELECT CASE WHEN 1=1 THEN x.cola ELSE x.colb END AS col FROM x AS x" case_expr_alias = annotate_types(parse_one(sql), schema=schema).expressions[0] self.assertEqual(case_expr_alias.type.this, exp.DataType.Type.VARCHAR) case_expr = case_expr_alias.this self.assertEqual(case_expr.type.this, exp.DataType.Type.VARCHAR) self.assertEqual(case_expr.args["default"].type.this, exp.DataType.Type.CHAR) case_ifs_expr = case_expr.args["ifs"][0] self.assertEqual(case_ifs_expr.type.this, exp.DataType.Type.VARCHAR) self.assertEqual(case_ifs_expr.args["true"].type.this, exp.DataType.Type.VARCHAR) timestamp = annotate_types(parse_one("TIMESTAMP(x)")) self.assertEqual(timestamp.type.this, exp.DataType.Type.TIMESTAMP) timestamptz = annotate_types(parse_one("TIMESTAMP(x)", read="bigquery")) self.assertEqual(timestamptz.type.this, exp.DataType.Type.TIMESTAMPTZ) def test_unknown_annotation(self): schema = {"x": {"cola": "VARCHAR"}} sql = "SELECT x.cola + SOME_ANONYMOUS_FUNC(x.cola) AS col FROM x AS x" concat_expr_alias = annotate_types(parse_one(sql), schema=schema).expressions[0] self.assertEqual(concat_expr_alias.type.this, exp.DataType.Type.UNKNOWN) concat_expr = concat_expr_alias.this self.assertEqual(concat_expr.type.this, exp.DataType.Type.UNKNOWN) self.assertEqual(concat_expr.left.type.this, exp.DataType.Type.VARCHAR) # x.cola self.assertEqual( concat_expr.right.type.this, exp.DataType.Type.UNKNOWN ) # SOME_ANONYMOUS_FUNC(x.cola) self.assertEqual( concat_expr.right.expressions[0].type.this, exp.DataType.Type.VARCHAR ) # x.cola (arg) # Ensures we don't raise if there are unqualified columns annotate_types(parse_one("select x from y lateral view explode(y) as x")).expressions[0] # NULL UNKNOWN should yield UNKNOWN self.assertEqual( annotate_types(parse_one("SELECT NULL + ANONYMOUS_FUNC()")).expressions[0].type.this, exp.DataType.Type.UNKNOWN, ) def test_udf_annotation(self): # Unqualified UDF schema = MappingSchema( schema={"t": {"col": "INT"}}, udf_mapping={"my_func": "VARCHAR"}, ) expr = annotate_types(parse_one("SELECT my_func(col) FROM t"), schema=schema) self.assertEqual(expr.selects[0].type.this, exp.DataType.Type.VARCHAR) # Qualified UDF (2-level) schema = MappingSchema( schema={"db": {"t": {"col": "INT"}}}, udf_mapping={"db": {"my_func": "DOUBLE"}}, ) expr = annotate_types(parse_one("SELECT db.my_func(col) FROM db.t"), schema=schema) anon = expr.selects[0].find(exp.Anonymous) self.assertEqual(anon.type.this, exp.DataType.Type.DOUBLE) # Dot parent should also have the type self.assertEqual(expr.selects[0].type.this, exp.DataType.Type.DOUBLE) # Qualified UDF (3-level) schema = MappingSchema( schema={"cat": {"db": {"t": {"col": "INT"}}}}, udf_mapping={"cat": {"db": {"my_func": "BOOLEAN"}}}, ) expr = annotate_types(parse_one("SELECT cat.db.my_func(col) FROM cat.db.t"), schema=schema) anon = expr.selects[0].find(exp.Anonymous) self.assertEqual(anon.type.this, exp.DataType.Type.BOOLEAN) # Unknown UDF returns UNKNOWN schema = MappingSchema( schema={"t": {"col": "INT"}}, udf_mapping={"known_func": "DATE"}, ) expr = annotate_types(parse_one("SELECT unknown_func(col) FROM t"), schema=schema) self.assertEqual(expr.selects[0].type.this, exp.DataType.Type.UNKNOWN) # Test get_udf_type with string input schema = MappingSchema(udf_mapping={"my_func": "INT"}) self.assertEqual(schema.get_udf_type("my_func(x)").this, exp.DataType.Type.INT) schema = MappingSchema(udf_mapping={"db": {"my_func": "FLOAT"}}) self.assertEqual(schema.get_udf_type("db.my_func(x, y)").this, exp.DataType.Type.FLOAT) schema = MappingSchema(udf_mapping={"cat": {"db": {"my_func": "DATE"}}}) self.assertEqual( schema.get_udf_type("cat.db.my_func(a, b, c)").this, exp.DataType.Type.DATE ) # Unknown UDF string returns UNKNOWN schema = MappingSchema(udf_mapping={"known": "INT"}) self.assertEqual(schema.get_udf_type("unknown(x)").this, exp.DataType.Type.UNKNOWN) def test_predicate_annotation(self): expression = annotate_types(parse_one("x BETWEEN a AND b")) self.assertEqual(expression.type.this, exp.DataType.Type.BOOLEAN) expression = annotate_types(parse_one("x IN (a, b, c, d)")) self.assertEqual(expression.type.this, exp.DataType.Type.BOOLEAN) def test_aggfunc_annotation(self): schema = {"x": {"cola": "SMALLINT", "colb": "FLOAT", "colc": "TEXT", "cold": "DATE"}} tests = { ("AVG", "cola"): exp.DataType.Type.DOUBLE, ("SUM", "cola"): exp.DataType.Type.BIGINT, ("SUM", "colb"): exp.DataType.Type.DOUBLE, ("MIN", "cola"): exp.DataType.Type.SMALLINT, ("MIN", "colb"): exp.DataType.Type.FLOAT, ("MAX", "colc"): exp.DataType.Type.TEXT, ("MAX", "cold"): exp.DataType.Type.DATE, ("COUNT", "colb"): exp.DataType.Type.BIGINT, ("STDDEV", "cola"): exp.DataType.Type.DOUBLE, ("ABS", "cola"): exp.DataType.Type.SMALLINT, ("ABS", "colb"): exp.DataType.Type.FLOAT, } for (func, col), target_type in tests.items(): expression = annotate_types( parse_one(f"SELECT {func}(x.{col}) AS _col_0 FROM x AS x"), schema=schema, ) self.assertEqual(expression.expressions[0].type.this, target_type) def test_concat_annotation(self): expression = annotate_types(parse_one("CONCAT('A', 'B')")) self.assertEqual(expression.type.this, exp.DataType.Type.VARCHAR) def test_root_subquery_annotation(self): expression = annotate_types(parse_one("(SELECT 1, 2 FROM x) LIMIT 0")) self.assertIsInstance(expression, exp.Subquery) self.assertEqual(exp.DataType.Type.INT, expression.selects[0].type.this) self.assertEqual(exp.DataType.Type.INT, expression.selects[1].type.this) def test_nested_type_annotation(self): schema = { "order": { "customer_id": "bigint", "item_id": "bigint", "item_price": "numeric", } } sql = """ SELECT ARRAY_AGG(DISTINCT order.item_id) FILTER (WHERE order.item_price > 10) AS items, FROM order AS order GROUP BY order.customer_id """ expression = annotate_types(parse_one(sql), schema=schema) self.assertEqual(exp.DataType.Type.ARRAY, expression.selects[0].type.this) self.assertEqual(expression.selects[0].type.sql(), "ARRAY") expression = annotate_types( parse_one("SELECT ARRAY_CAT(ARRAY[1,2,3], ARRAY[4,5])", read="postgres") ) self.assertEqual(exp.DataType.Type.ARRAY, expression.selects[0].type.this) self.assertEqual(expression.selects[0].type.sql(), "ARRAY") schema = MappingSchema({"t": {"c": "STRUCT<`f` STRING>"}}, dialect="bigquery") expression = annotate_types(parse_one("SELECT t.c, [t.c] FROM t"), schema=schema) self.assertEqual(expression.selects[0].type.sql(dialect="bigquery"), "STRUCT<`f` STRING>") self.assertEqual( expression.selects[1].type.sql(dialect="bigquery"), "ARRAY>", ) expression = annotate_types( parse_one("SELECT unnest(t.x) FROM t AS t", dialect="postgres"), schema={"t": {"x": "array"}}, ) self.assertTrue(expression.selects[0].is_type("int")) def test_type_annotation_cache(self): sql = "SELECT 1 + 1" expression = annotate_types(parse_one(sql)) self.assertEqual(exp.DataType.Type.INT, expression.selects[0].type.this) expression.selects[0].this.replace(parse_one("1.2")) expression = annotate_types(expression) self.assertEqual(exp.DataType.Type.DOUBLE, expression.selects[0].type.this) def test_user_defined_type_annotation(self): schema = MappingSchema({"t": {"x": "int"}}, dialect="postgres") expression = annotate_types(parse_one("SELECT CAST(x AS IPADDRESS) FROM t"), schema=schema) self.assertEqual(exp.DataType.Type.USERDEFINED, expression.selects[0].type.this) self.assertEqual(expression.selects[0].type.sql(dialect="postgres"), "IPADDRESS") def test_unnest_annotation(self): expression = annotate_types( optimizer.qualify.qualify( parse_one( """ SELECT a, a.b, a.b.c FROM x, UNNEST(x.a) AS a """, read="bigquery", ) ), schema={"x": {"a": "ARRAY>>"}}, ) self.assertEqual(expression.selects[0].type, exp.DataType.build("STRUCT>")) self.assertEqual(expression.selects[1].type, exp.DataType.build("STRUCT")) self.assertEqual(expression.selects[2].type, exp.DataType.build("int")) self.assertEqual( annotate_types( optimizer.qualify.qualify( parse_one( "SELECT x FROM UNNEST(GENERATE_DATE_ARRAY('2021-01-01', current_date(), interval 1 day)) AS x" ) ) ) .selects[0] .type, exp.DataType.build("date"), ) self.assertEqual( annotate_types( optimizer.qualify.qualify( parse_one( "SELECT x FROM UNNEST(GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-06 02:00:00', interval 1 day)) AS x" ) ) ) .selects[0] .type, exp.DataType.build("timestamp"), ) def test_unnest_struct_field_annotation(self): """Test that UNNEST of struct array without column aliases exposes struct fields with proper types""" expression = annotate_types( optimizer.qualify.qualify( parse_one( """ WITH data AS ( SELECT [STRUCT('Bob' AS first_name, 'Smith' AS last_name)] AS users ) SELECT first_name, last_name FROM data, UNNEST(users) """, dialect="bigquery", ), dialect="bigquery", ), dialect="bigquery", ) self.assertEqual( expression.selects[0].type, exp.DataType.build("VARCHAR", dialect="bigquery") ) self.assertEqual( expression.selects[1].type, exp.DataType.build("VARCHAR", dialect="bigquery") ) expression = annotate_types( optimizer.qualify.qualify( parse_one( """ SELECT person FROM UNNEST([STRUCT('Charlie' AS name, 40 AS age)]) AS person """, dialect="bigquery", ), dialect="bigquery", ), dialect="bigquery", ) select_type = expression.selects[0].type self.assertTrue(select_type.is_type(exp.DataType.Type.STRUCT)) self.assertEqual(len(select_type.expressions), 2) fields = {col_def.name: col_def.kind for col_def in select_type.expressions} self.assertEqual(fields.get("name"), exp.DataType.build("VARCHAR", dialect="bigquery")) self.assertEqual(fields.get("age"), exp.DataType.build("INT", dialect="bigquery")) expression = annotate_types( optimizer.qualify.qualify( parse_one( """ WITH data AS ( SELECT [STRUCT('Bob' AS first_name, 'Smith' AS last_name)] AS users ) SELECT first_name, last_name FROM data, UNNEST(users) AS p """, dialect="bigquery", ), dialect="bigquery", ), dialect="bigquery", ) self.assertEqual( expression.selects[0].type, exp.DataType.build("VARCHAR", dialect="bigquery") ) self.assertEqual( expression.selects[1].type, exp.DataType.build("VARCHAR", dialect="bigquery") ) expression = annotate_types( optimizer.qualify.qualify( parse_one( """ SELECT name FROM UNNEST([STRUCT('Charlie' AS name, 40 AS age)]) AS person """, dialect="bigquery", ), dialect="bigquery", ), dialect="bigquery", ) select_type = expression.selects[0].type self.assertTrue(select_type.is_type(exp.DataType.build("VARCHAR", dialect="bigquery"))) def test_map_annotation(self): # ToMap annotation expression = annotate_types(parse_one("SELECT MAP {'x': 1}", read="duckdb")) self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, INT)")) # Map annotation expression = annotate_types( parse_one("SELECT MAP(['key1', 'key2', 'key3'], [10, 20, 30])", read="duckdb") ) self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, INT)")) # VarMap annotation expression = annotate_types(parse_one("SELECT MAP('a', 'b')", read="spark")) self.assertEqual(expression.selects[0].type, exp.DataType.build("MAP(VARCHAR, VARCHAR)")) def test_union_annotation(self): for left, right, expected_type in ( ("SELECT 1::INT AS c", "SELECT 2::BIGINT AS c", "BIGINT"), ("SELECT 1::INT AS c", "SELECT 2::BIGDECIMAL AS c", "BIGDECIMAL"), ("SELECT 1 AS c", "SELECT NULL AS c", "INT"), ("SELECT FOO() AS c", "SELECT 1 AS c", "UNKNOWN"), ("SELECT FOO() AS c", "SELECT BAR() AS c", "UNKNOWN"), ): with self.subTest(f"left: {left}, right: {right}, expected: {expected_type}"): lr = annotate_types(parse_one(f"SELECT t.c FROM ({left} UNION ALL {right}) t(c)")) rl = annotate_types(parse_one(f"SELECT t.c FROM ({right} UNION ALL {left}) t(c)")) assert lr.selects[0].type == rl.selects[0].type == exp.DataType.build(expected_type) union_by_name = annotate_types( parse_one( "SELECT t.a, t.d FROM (SELECT 1 a, 3 d, UNION ALL BY NAME SELECT 7.0 d, 8::BIGINT a) AS t(a, d)" ) ) self.assertEqual(union_by_name.selects[0].type.this, exp.DataType.Type.BIGINT) self.assertEqual(union_by_name.selects[1].type.this, exp.DataType.Type.DOUBLE) # Test chained UNIONs sql = """ WITH t AS ( SELECT NULL AS col UNION SELECT NULL AS col UNION SELECT 'a' AS col UNION SELECT NULL AS col UNION SELECT NULL AS col ) SELECT col FROM t; """ self.assertEqual(optimizer.optimize(sql).selects[0].type.this, exp.DataType.Type.VARCHAR) # Test UNIONs with nested subqueries sql = """ WITH t AS ( SELECT NULL AS col UNION (SELECT NULL AS col UNION ALL SELECT 'a' AS col) ) SELECT col FROM t; """ self.assertEqual(optimizer.optimize(sql).selects[0].type.this, exp.DataType.Type.VARCHAR) sql = """ WITH t AS ( (SELECT NULL AS col UNION ALL SELECT 'a' AS col) UNION SELECT NULL AS col ) SELECT col FROM t; """ self.assertEqual(optimizer.optimize(sql).selects[0].type.this, exp.DataType.Type.VARCHAR) # BigQuery: STRING coerces to temporal types in UNION for left, right, expected_type in ( ("SELECT '2010-01-01' AS c", "SELECT DATE '2020-02-02' AS c", "DATE"), ( "SELECT '2010-01-01 00:00:00' AS c", "SELECT DATETIME '2020-02-02 00:00:00' AS c", "DATETIME", ), ("SELECT '00:00:00' AS c", "SELECT TIME '00:01:00' AS c", "TIME"), ( "SELECT '2010-01-01 00:00:00' AS c", "SELECT TIMESTAMP '2020-02-02 00:00:00' AS c", "TIMESTAMP", ), ): with self.subTest(f"left: {left}, right: {right}, expected: {expected_type}"): lr = annotate_types( parse_one( f"SELECT t.c FROM ({left} UNION ALL {right}) t(c)", dialect="bigquery" ), dialect="bigquery", ) rl = annotate_types( parse_one( f"SELECT t.c FROM ({right} UNION ALL {left}) t(c)", dialect="bigquery" ), dialect="bigquery", ) assert ( lr.selects[0].type == rl.selects[0].type == exp.DataType.build(expected_type, dialect="bigquery") ) def test_udtf_annotation(self): table_udtf = parse_one( "SELECT * FROM TABLE(GENERATOR(ROWCOUNT => 100000))", read="snowflake", ) self.assertEqual( annotate_types(table_udtf, dialect="snowflake").sql("snowflake"), "SELECT * FROM TABLE(GENERATOR(ROWCOUNT => 100000))", ) def test_recursive_cte(self): query = parse_one( """ with recursive t(n) AS ( select 1 union all select n + 1 FROM t where n < 3 ), y AS ( select n FROM t union all select n + 1 FROM y where n < 2 ) select * from y """ ) scope_t, scope_y = build_scope(query).cte_scopes self.assertEqual(set(scope_t.cte_sources), {"t"}) self.assertEqual(set(scope_y.cte_sources), {"t", "y"}) def test_schema_with_spaces(self): schema = { "a": { "b c": "text", '"d e"': "text", } } self.assertEqual( optimizer.optimize(parse_one("SELECT * FROM a"), schema=schema), parse_one('SELECT "a"."b c" AS "b c", "a"."d e" AS "d e" FROM "a" AS "a"'), ) def test_quotes(self): schema = { "example": { '"source"': { "id": "text", '"name"': "text", '"payload"': "text", } } } expected = parse_one( """ SELECT "source"."ID" AS "ID", "source"."name" AS "name", "source"."payload" AS "payload" FROM "EXAMPLE"."source" AS "source" """, read="snowflake", ).sql(pretty=True, dialect="snowflake") for func in (optimizer.qualify.qualify, optimizer.optimize): source_query = parse_one('SELECT * FROM example."source" AS "source"', read="snowflake") transformed = func(source_query, dialect="snowflake", schema=schema) self.assertEqual(transformed.sql(pretty=True, dialect="snowflake"), expected) def test_no_pseudocolumn_expansion(self): schema = { "a": { "a": "text", "b": "text", "_PARTITIONDATE": "date", "_PARTITIONTIME": "timestamp", } } self.assertEqual( optimizer.optimize( parse_one("SELECT * FROM a"), schema=MappingSchema(schema, dialect="bigquery"), ), parse_one('SELECT "a"."a" AS "a", "a"."b" AS "b" FROM "a" AS "a"'), ) def test_semistructured(self): query = parse_one("select a.b:c from d", read="snowflake") qualified = optimizer.qualify.qualify(query) self.assertEqual(qualified.expressions[0].alias, "c") def test_gen(self): for func in exp.ALL_FUNCTIONS: self.assertIsInstance(optimizer.simplify.gen(func()), str) def test_normalization_distance(self): def gen_expr(depth: int) -> exp.Expr: return parse_one(" OR ".join("a AND b" for _ in range(depth))) self.assertEqual(4, normalization_distance(gen_expr(2), max_=100)) self.assertEqual(18, normalization_distance(gen_expr(3), max_=100)) self.assertEqual(110, normalization_distance(gen_expr(10), max_=100)) def test_manually_annotate_snowflake(self): dialect = "snowflake" schema = { "SCHEMA": { "TBL": {"COL": "INT", "col2": "VARCHAR"}, } } example_query = 'SELECT * FROM "SCHEMA"."TBL"' expression = parse_one(example_query, dialect=dialect) qual = optimizer.qualify.qualify(expression, schema=schema, dialect=dialect) annotated = optimizer.annotate_types.annotate_types(qual, schema=schema, dialect=dialect) self.assertTrue(annotated.selects[0].is_type("INT")) self.assertTrue(annotated.selects[1].is_type("VARCHAR")) def test_annotate_table_as_struct_bigquery(self): dialect = "bigquery" schema = {"d": {"s": {"t": {"c1": "int64", "c2": "struct"}}}} def _annotate(query: str) -> exp.Expr: expression = parse_one(query, dialect=dialect) qual = optimizer.qualify.qualify(expression, schema=schema, dialect=dialect) return optimizer.annotate_types.annotate_types(qual, schema=schema, dialect=dialect) example_query = "SELECT t FROM d.s.t" annotated = _annotate(example_query) self.assertIsInstance(annotated.selects[0].this, exp.TableColumn) self.assertEqual( annotated.sql("bigquery"), "SELECT `t` AS `_col_0` FROM `d`.`s`.`t` AS `t`" ) self.assertTrue( annotated.selects[0].is_type("STRUCT>") ) example_query = "SELECT subq FROM (SELECT * from d.s.t) subq" annotated = _annotate(example_query) self.assertTrue( annotated.selects[0].is_type("STRUCT>") ) example_query = "WITH t AS (SELECT 1 AS c) SELECT t FROM t" annotated = _annotate(example_query) self.assertTrue(annotated.selects[0].is_type("STRUCT")) example_query = "WITH t AS (SELECT FOO() AS c) SELECT t FROM t" annotated = _annotate(example_query) self.assertTrue(annotated.selects[0].is_type("UNKNOWN")) for query in ("SELECT 'foo'", "(SELECT 'foo')"): query = f"SELECT ARRAY({query})" with self.subTest(f"Annotating '{query}' in BigQuery"): self.assertTrue(_annotate(query).selects[0].is_type("ARRAY")) def test_semi_anti_join(self): # - Do not remove semi/anti join # - Do not remove CTEs/subqueries that participate in anti/semi joins, even though they do not count as selected sources for join_kind in ("LEFT ANTI", "ANTI", "SEMI"): query = f"WITH x AS (SELECT 1 AS b UNION ALL SELECT 2 AS b) SELECT x.b FROM x {join_kind} JOIN (SELECT 1 AS b) AS sub ON x.b = sub.b" self.assertEqual( optimizer.optimize(query).sql(), f'WITH "x" AS (SELECT 1 AS "b" UNION ALL SELECT 2 AS "b"), "sub" AS (SELECT 1 AS "b") SELECT "x"."b" AS "b" FROM "x" AS "x" {join_kind} JOIN "sub" AS "sub" ON "sub"."b" = "x"."b"', ) def test_qualify_group_by_conflict_bigquery(self): dialect = "bigquery" schema = {"custom_fields": {"id": "int", "col": "struct"}} query = "SELECT id, ARRAY_AGG(col) AS custom_fields FROM custom_fields AS custom_fields GROUP BY id HAVING id >= 1" qual = optimizer.qualify.qualify( parse_one(query, dialect=dialect), schema=schema, dialect=dialect, ) sql = qual.sql(dialect=dialect) self.assertEqual( sql, "SELECT `custom_fields`.`id` AS `id`, ARRAY_AGG(`custom_fields`.`col`) AS `custom_fields` FROM `custom_fields` AS `custom_fields` GROUP BY `id` HAVING `id` >= 1", ) def test_struct_annotation_bigquery(self): sql = """ WITH t1 AS (SELECT 'foo' AS c), t2 AS (SELECT ARRAY_AGG(STRUCT(c)) AS arr FROM t1) SELECT arr[0].c FROM t2 """ query = parse_one(sql, dialect="bigquery") qualified = optimizer.qualify.qualify(query, dialect="bigquery") annotated = optimizer.annotate_types.annotate_types(qualified, dialect="bigquery") assert annotated.selects[0].type == exp.DataType.build("VARCHAR") def test_bigquery_unnest_alias_shadowing(self): """Test that BigQuery UNNEST table alias shadows column names from other tables.""" sql = """ SELECT timeline_date FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03')) AS timeline_date LEFT JOIN production_tier ON production_tier.timeline_date = timeline_date """ schema = {"production_tier": {"timeline_date": "DATE", "id": "INT"}} result = optimizer.qualify.qualify( parse_one(sql, dialect="bigquery"), schema=schema, dialect="bigquery", ) result_sql = result.sql(dialect="bigquery") self.assertEqual( result_sql, "SELECT `timeline_date` AS `timeline_date` " "FROM UNNEST(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-03', INTERVAL '1' DAY)) AS `timeline_date` " "LEFT JOIN `production_tier` AS `production_tier` " "ON `production_tier`.`timeline_date` = `timeline_date`", ) def test_struct_field_case_sensitivity_annotation(self): schema = {"t": {"struct_col": "STRUCT"}} def _assert_dot_annotation(query: str, dialect: str, expected: exp.DataType.Type): parsed = parse_one(query, dialect=dialect) qualified = optimizer.qualify.qualify(parsed, schema=schema, dialect=dialect) annotated = optimizer.annotate_types.annotate_types( qualified, schema=schema, dialect=dialect ) self.assertEqual(annotated.selects[0].type.this, expected) # BigQuery is case-insensitive: exact field name match _assert_dot_annotation( "SELECT struct_col.fooBar FROM t", "bigquery", exp.DataType.Type.TEXT ) # BigQuery: lower case _assert_dot_annotation( "SELECT struct_col.foobar FROM t", "bigquery", exp.DataType.Type.TEXT ) # BigQuery: different case _assert_dot_annotation( "SELECT struct_col.Foobar FROM t", "bigquery", exp.DataType.Type.TEXT ) # ClickHouse is case-sensitive: exact field name match _assert_dot_annotation( "SELECT struct_col.fooBar FROM t", "clickhouse", exp.DataType.Type.TEXT ) # ClickHouse: lower case _assert_dot_annotation( "SELECT struct_col.foobar FROM t", "clickhouse", exp.DataType.Type.UNKNOWN ) def test_annotate_object_construct(self): sql = "SELECT OBJECT_CONSTRUCT('foo', 'bar', 'a b', 'c d') AS c" query = parse_one(sql, dialect="snowflake") annotated = optimizer.annotate_types.annotate_types(query, dialect="snowflake") self.assertEqual( annotated.selects[0].type.sql("snowflake"), 'OBJECT("foo" VARCHAR, "a b" VARCHAR)' ) def test_nonnull_annotation(self): for literal_sql in ("1", "'foo'", "2.5"): with self.subTest(f"Test NULL annotation for literal: {literal_sql}"): sql = f"SELECT {literal_sql}" query = parse_one(sql) annotated = annotate_types(query) assert annotated.selects[0].meta.get("nonnull") is True schema = {"foo": {"id": "INT"}} operand_pairs = ( ("1", "1", True), ("foo.id", "foo.id", None), ("1", "foo.id", None), ("foo.id", "1", None), ) for predicate in (">", "<", ">=", "<=", "=", "!=", "<>", "LIKE", "NOT LIKE"): for operand1, operand2, nonnull in operand_pairs: sql_predicate = f"{operand1} {predicate} {operand2}" with self.subTest(f"Test NULL propagation for predicate: {predicate}"): sql = f"SELECT {sql_predicate} FROM foo" query = parse_one(sql) annotated = annotate_types(query, schema=schema) assert annotated.selects[0].meta.get("nonnull") is nonnull for predicate in ("IS NULL", "IS NOT NULL"): sql_predicate = f"foo.id {predicate}" with self.subTest(f"Test NULL propagation for predicate: {predicate}"): sql = f"SELECT {sql_predicate} FROM foo" query = parse_one(sql) annotated = annotate_types(query, schema=schema) assert annotated.selects[0].meta.get("nonnull") is True for connector in ("AND", "OR"): for predicate in (">", "<", ">=", "<=", "=", "!=", "<>", "LIKE", "NOT LIKE"): for operand1, operand2, nonnull in operand_pairs: sql_predicate = f"({operand1} {predicate} {operand2})" sql_connector = f"{sql_predicate} {connector} {sql_predicate}" with self.subTest( f"Test NULL propagation for connector: {connector} with predicates: {predicate}" ): sql = f"SELECT {sql_connector} FROM foo" query = parse_one(sql) annotated = annotate_types(query, schema=schema) assert annotated.selects[0].meta.get("nonnull") is nonnull for unary in ("NOT", "-"): for value, nonnull in (("1", True), ("foo.id", None)): with self.subTest(f"Test NULL propagation for unary: {unary} with value: {value}"): sql = f"SELECT {unary} {value} FROM foo" query = parse_one(sql) annotated = annotate_types(query, schema=schema) assert annotated.selects[0].meta.get("nonnull") is nonnull ch_query = parse_one("select c1, c2 from t") ch_schema = {"t": {"c1": "Int32", "c2": "Nullable(Int32)"}} qualified_query = qualify_columns(ch_query, schema=ch_schema, dialect="clickhouse") annotated = annotate_types(qualified_query, schema=ch_schema, dialect="clickhouse") assert annotated.selects[0].meta.get("nonnull") is True assert annotated.selects[1].meta.get("nonnull") is None def test_case_sensitive_json_dot_access(self): schema = { "t": { "col": "JSON", "struct_col": "STRUCT>>", } } def _parse_and_optimize(query: str, dialect: str) -> exp.Expr: query = parse_one(query, dialect=dialect) optimized = optimizer.optimize(query, schema=schema, dialect=dialect) return optimized.sql(dialect=dialect) # BigQuery for dot_access in ("col.fOo.BaR.BaZ", "t.col.fOo.BaR.BaZ"): with self.subTest(f"Test case sensitive JSON dot access for BigQuery: {dot_access}"): dot_access_normalized = "`t`.`col`.`fOo`.`BaR`.`BaZ`" sql = _parse_and_optimize( f"SELECT JSON_VALUE({dot_access}, '$') AS col FROM t", dialect="bigquery" ) assert ( sql == f"SELECT JSON_VALUE({dot_access_normalized}, '$') AS `col` FROM `t` AS `t`" ) sql = _parse_and_optimize(f"SELECT {dot_access} AS col FROM t", dialect="bigquery") assert sql == f"SELECT {dot_access_normalized} AS `col` FROM `t` AS `t`" # BigQuery: STRUCT field accesses are still normalized sql = _parse_and_optimize( "SELECT struct_col.FlD1.flD2.FLD3 AS col FROM t", dialect="bigquery" ) assert sql == "SELECT `t`.`struct_col`.`fld1`.`fld2`.`fld3` AS `col` FROM `t` AS `t`" # Databricks sql = _parse_and_optimize("SELECT col:A.a, col:a.A FROM t", dialect="databricks") assert sql == "SELECT `t`.`col`:A.a AS `a`, `t`.`col`:a.A AS `A` FROM `t` AS `t`" # Clickhouse sql = _parse_and_optimize("SELECT col.A.a, col.a.A FROM t", dialect="clickhouse") assert sql == 'SELECT "t"."col"."A"."a" AS "a", "t"."col"."a"."A" AS "A" FROM "t" AS "t"' # DuckDB sql = _parse_and_optimize("SELECT col.A.a, col.a.A FROM t", dialect="duckdb") assert sql == 'SELECT "t"."col"."A"."a" AS "a", "t"."col"."a"."A" AS "a" FROM "t" AS "t"' # Snowflake sql = _parse_and_optimize("SELECT col:A.a, col:a.A FROM t", dialect="snowflake") assert ( sql == '''SELECT GET_PATH("T"."COL", 'A.a') AS "a", GET_PATH("T"."COL", 'a.A') AS "A" FROM "T" AS "T"''' ) query = parse_one( "SELECT JSON_VALUE(item.id) FROM UNNEST(JSON_QUERY_ARRAY(PARSE_JSON('[{\"id\": 1}]'))) AS item", dialect="bigquery", ) optimized = optimizer.optimize(query, dialect="bigquery") for i in optimized.find_all(exp.Identifier): self.assertNotIsInstance(i.this, exp.Identifier) assert ( optimized.sql("bigquery") == "SELECT JSON_VALUE(`item`.`id`, '$') AS `_col_0` FROM UNNEST(JSON_QUERY_ARRAY(PARSE_JSON('[{\"id\": 1}]'), '$')) AS `item`" ) def test_deep_ast_type_annotation(self): union_sql = "SELECT 1 UNION ALL " * 2000 + "SELECT 1" annotated = annotate_types(parse_one(union_sql)) self.assertEqual(annotated.sql(), union_sql) self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.INT) binary_sql = "SELECT " + "t.a + " * 2000 + "t.a FROM t" annotated = annotate_types(parse_one(binary_sql), schema={"t": {"a": "INT"}}) self.assertEqual(annotated.sql(), binary_sql) self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.INT) def test_null_coerce_annotation(self): null_sql = "SELECT t.foo FROM (SELECT CAST(1 AS BIGDECIMAL) AS foo UNION ALL SELECT NULL AS foo) AS t" annotated = parse_and_optimize(annotate_types, null_sql, "bigquery", dialect="bigquery") self.assertEqual(annotated.sql(), null_sql) self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.BIGDECIMAL) null_sql = "SELECT t.foo FROM (SELECT NULL AS foo UNION ALL SELECT CAST(1 AS BIGDECIMAL) AS foo) AS t" annotated = parse_and_optimize(annotate_types, null_sql, "bigquery", dialect="bigquery") self.assertEqual(annotated.sql(), null_sql) self.assertEqual(annotated.selects[0].type.this, exp.DataType.Type.BIGDECIMAL) def test_correlated_subqueries_annotation(self): correlated_sql = "SELECT (SELECT col) FROM t" query = parse_one(correlated_sql, dialect="bigquery") qualified = optimizer.qualify.qualify( query, dialect="bigquery", schema={"t": {"col": "BIGNUMERIC"}} ) annotated = optimizer.annotate_types.annotate_types( qualified, dialect="bigquery", schema={"t": {"col": "BIGNUMERIC"}} ) self.assertEqual( annotated.sql("bigquery"), "SELECT (SELECT `t`.`col` AS `col`) AS `_col_0` FROM `t` AS `t`", ) assert annotated.selects[0].type == exp.DataType.build("BIGNUMERIC", dialect="bigquery") correlated_sql = """ SELECT ( SELECT MAX(u_x) FROM UNNEST([1, d_x]) AS u_x WHERE u_x < d_z ) AS c_i FROM ( SELECT CAST(20 AS BIGNUMERIC) AS d_x, 30 AS d_z ) AS d_t """ query = parse_one(correlated_sql, dialect="bigquery") qualified = optimizer.qualify.qualify( query, dialect="bigquery", schema={"d_t": {"d_x": "STRING"}} ) annotated = optimizer.annotate_types.annotate_types( qualified, dialect="bigquery", schema={"d_t": {"d_x": "STRING"}} ) self.assertEqual( annotated.sql("bigquery"), "SELECT (SELECT MAX(`u_x`) AS `_col_0` FROM UNNEST([1, `d_t`.`d_x`]) AS `u_x` WHERE `u_x` < `d_t`.`d_z`) AS `c_i` FROM (SELECT CAST(20 AS BIGNUMERIC) AS `d_x`, 30 AS `d_z`) AS `d_t`", ) assert annotated.selects[0].type == exp.DataType.build("BIGNUMERIC", dialect="bigquery") correlated_sql = "SELECT (SELECT col FROM t) as u FROM (SELECT 1 AS col) AS t" query = parse_one(correlated_sql) qualified = optimizer.qualify.qualify(query, schema={"t": {"col": "TEXT"}}) annotated = optimizer.annotate_types.annotate_types( qualified, schema={"t": {"col": "TEXT"}} ) self.assertEqual( annotated.sql(), 'SELECT (SELECT "t"."col" AS "col" FROM "t" AS "t") AS "u" FROM (SELECT 1 AS "col") AS "t"', ) assert annotated.selects[0].type == exp.DataType.build("TEXT") ================================================ FILE: tests/test_parser.py ================================================ import time import unittest from unittest.mock import patch from sqlglot import Parser, exp, parse, parse_one from sqlglot.errors import ErrorLevel, ParseError from sqlglot.parser import logger as parser_logger from tests.helpers import assert_logger_contains class TestParser(unittest.TestCase): def test_parse_empty(self): with self.assertRaises(ParseError): parse_one("") def test_parse_into(self): self.assertIsInstance(parse_one("(1)", into=exp.Tuple), exp.Tuple) self.assertIsInstance(parse_one("(1,)", into=exp.Tuple), exp.Tuple) self.assertIsInstance(parse_one("(x=1)", into=exp.Tuple), exp.Tuple) self.assertIsInstance(parse_one("select * from t", into=exp.Select), exp.Select) self.assertIsInstance(parse_one("select * from t limit 5", into=exp.Select), exp.Select) self.assertIsInstance(parse_one("left join foo", into=exp.Join), exp.Join) self.assertIsInstance(parse_one("int", into=exp.DataType), exp.DataType) self.assertIsInstance(parse_one("array", into=exp.DataType), exp.DataType) self.assertIsInstance(parse_one("foo", into=exp.Table), exp.Table) self.assertIsInstance( parse_one( "WHEN MATCHED THEN UPDATE SET target.salary = COALESCE(source.salary, target.salary)", into=exp.Whens, ), exp.Whens, ) with self.assertRaises(ParseError) as ctx: parse_one("SELECT * FROM tbl", into=exp.Table) self.assertEqual( str(ctx.exception), "Failed to parse 'SELECT * FROM tbl' into ", ) self.assertIsInstance(parse_one("foo INT NOT NULL", into=exp.ColumnDef), exp.ColumnDef) def test_parse_into_error(self): expected_message = ( "Failed to parse 'SELECT 1;' into []" ) expected_errors = [ { "description": "Invalid expression / Unexpected token", "line": 1, "col": 6, "start_context": "", "highlight": "SELECT", "end_context": " 1;", "into_expression": exp.From, } ] with self.assertRaises(ParseError) as ctx: parse_one("SELECT 1;", read="sqlite", into=[exp.From]) self.assertEqual(str(ctx.exception), expected_message) self.assertEqual(ctx.exception.errors, expected_errors) def test_parse_into_errors(self): expected_message = "Failed to parse 'SELECT 1;' into [, ]" expected_errors = [ { "description": "Invalid expression / Unexpected token", "line": 1, "col": 6, "start_context": "", "highlight": "SELECT", "end_context": " 1;", "into_expression": exp.From, }, { "description": "Invalid expression / Unexpected token", "line": 1, "col": 6, "start_context": "", "highlight": "SELECT", "end_context": " 1;", "into_expression": exp.Join, }, ] with self.assertRaises(ParseError) as ctx: parse_one("SELECT 1;", "sqlite", into=[exp.From, exp.Join]) self.assertEqual(str(ctx.exception), expected_message) self.assertEqual(ctx.exception.errors, expected_errors) def test_column(self): columns = parse_one("select a, ARRAY[1] b, case when 1 then 1 end").find_all(exp.Column) assert len(list(columns)) == 1 self.assertIsNotNone(parse_one("date").find(exp.Column)) def test_tuple(self): parse_one("(a,)").assert_is(exp.Tuple) def test_structs(self): cast = parse_one("cast(x as struct)") self.assertIsInstance(cast.to.expressions[0], exp.DataType) self.assertEqual(cast.sql(), "CAST(x AS STRUCT)") cast = parse_one("cast(x as struct)") self.assertIsInstance(cast.to.expressions[0], exp.DataType) self.assertEqual(cast.sql(), "CAST(x AS STRUCT)") def test_float(self): self.assertEqual(parse_one(".2"), parse_one("0.2")) def test_unnest(self): unnest_sql = "UNNEST(foo)" expr = parse_one(unnest_sql) self.assertIsInstance(expr, exp.Unnest) self.assertIsInstance(expr.expressions, list) self.assertEqual(expr.sql(), unnest_sql) def test_unnest_projection(self): expr = parse_one("SELECT foo IN UNNEST(bla) AS bar") self.assertIsInstance(expr.selects[0], exp.Alias) self.assertEqual(expr.selects[0].output_name, "bar") self.assertIsNotNone(parse_one("select unnest(x)").find(exp.Unnest)) def test_unary_plus(self): self.assertEqual(parse_one("+15"), exp.Literal.number(15)) def test_table(self): tables = [t.sql() for t in parse_one("select * from a, b.c, .d").find_all(exp.Table)] self.assertEqual(set(tables), {"a", "b.c", "d"}) def test_union(self): self.assertIsInstance(parse_one("SELECT * FROM (SELECT 1) UNION SELECT 2"), exp.Union) self.assertIsInstance( parse_one("SELECT x FROM y HAVING x > (SELECT 1) UNION SELECT 2"), exp.Union ) # Check that modifiers are attached to the topmost union node and not the rightmost query single_union = "SELECT x FROM t1 UNION ALL SELECT x FROM t2 LIMIT 1" expr = parse_one(single_union) limit = expr.assert_is(exp.Union).args.get("limit") self.assertIsInstance(limit, exp.Limit) self.assertEqual(expr.sql(), single_union) two_unions = ( "SELECT x FROM t1 UNION ALL SELECT x FROM t2 UNION ALL SELECT x FROM t3 LIMIT 1" ) expr = parse_one(two_unions) limit = expr.assert_is(exp.Union).args.get("limit") self.assertIsInstance(limit, exp.Limit) self.assertEqual(expr.sql(), two_unions) expr = parse_one(single_union, read="clickhouse") self.assertIsNone(expr.args.get("limit")) self.assertEqual(expr.sql(dialect="clickhouse"), single_union) def test_select(self): self.assertIsNotNone(parse_one("select 1 natural")) self.assertIsNotNone(parse_one("select * from (select 1) x order by x.y").args["order"]) self.assertIsNotNone( parse_one("select * from x where a = (select 1) order by x.y").args["order"] ) self.assertEqual(len(parse_one("select * from (select 1) x cross join y").args["joins"]), 1) self.assertEqual( parse_one("""SELECT * FROM x CROSS JOIN y, z LATERAL VIEW EXPLODE(y)""").sql(), """SELECT * FROM x CROSS JOIN y, z LATERAL VIEW EXPLODE(y)""", ) self.assertIsNone( parse_one("create table a as (select b from c) index").find(exp.TableAlias) ) def test_command(self): with self.assertLogs(parser_logger) as cm: expressions = parse("SET x = 1; ADD JAR s3://a; SELECT 1", read="hive") self.assertEqual(len(expressions), 3) self.assertEqual(expressions[0].sql(), "SET x = 1") self.assertEqual(expressions[1].sql(), "ADD JAR s3://a") self.assertEqual(expressions[2].sql(), "SELECT 1") assert "'ADD JAR s3://a'" in cm.output[0] def test_lambda_struct(self): expression = parse_one("FILTER(a.b, x -> x.id = id)") lambda_expr = expression.expression self.assertIsInstance(lambda_expr.this.this, exp.Dot) self.assertEqual(lambda_expr.sql(), "x -> x.id = id") self.assertIsNone(parse_one("FILTER([], x -> x)").find(exp.Column)) def test_transactions(self): expression = parse_one("BEGIN TRANSACTION") self.assertIsNone(expression.this) self.assertEqual(expression.args["modes"], []) self.assertEqual(expression.sql(), "BEGIN") expression = parse_one("START TRANSACTION", read="mysql") self.assertIsNone(expression.this) self.assertEqual(expression.args["modes"], []) self.assertEqual(expression.sql(), "BEGIN") expression = parse_one("BEGIN DEFERRED TRANSACTION") self.assertEqual(expression.this, "DEFERRED") self.assertEqual(expression.args["modes"], []) self.assertEqual(expression.sql(), "BEGIN") expression = parse_one( "START TRANSACTION READ WRITE, ISOLATION LEVEL SERIALIZABLE", read="presto" ) self.assertIsNone(expression.this) self.assertEqual(expression.args["modes"][0], "READ WRITE") self.assertEqual(expression.args["modes"][1], "ISOLATION LEVEL SERIALIZABLE") self.assertEqual(expression.sql(), "BEGIN READ WRITE, ISOLATION LEVEL SERIALIZABLE") expression = parse_one("BEGIN", read="bigquery") self.assertNotIsInstance(expression, exp.Transaction) self.assertIsNone(expression.expression) self.assertEqual(expression.sql(), "BEGIN") def test_identify(self): expression = parse_one( """ SELECT a, "b", c AS c, d AS "D", e AS "y|z'" FROM y."z" """ ) assert expression.expressions[0].name == "a" assert expression.expressions[1].name == "b" assert expression.expressions[2].alias == "c" assert expression.expressions[3].alias == "D" assert expression.expressions[4].alias == "y|z'" table = expression.args["from_"].this assert table.name == "z" assert table.args["db"].name == "y" def test_multi(self): expressions = parse( """ SELECT * FROM a; SELECT * FROM b; """ ) assert len(expressions) == 2 assert expressions[0].args["from_"].name == "a" assert expressions[1].args["from_"].name == "b" expressions = parse("SELECT 1; ; SELECT 2") assert len(expressions) == 3 assert expressions[1] is None def test_expression(self): ignore = Parser(error_level=ErrorLevel.IGNORE) self.assertIsInstance(ignore.expression(exp.Hint(expressions=[])), exp.Hint) self.assertIsInstance(ignore.expression(exp.Hint(y="")), exp.Hint) self.assertIsInstance(ignore.expression(exp.Hint()), exp.Hint) default = Parser(error_level=ErrorLevel.RAISE) with self.assertRaises(TypeError): default.expression(exp.Hint(y="")) self.assertIsInstance(default.expression(exp.Hint(expressions=[])), exp.Hint) default.expression(exp.Hint()) self.assertEqual(len(default.errors), 2) warn = Parser(error_level=ErrorLevel.WARN) warn.expression(exp.Hint()) self.assertEqual(len(warn.errors), 1) def test_parse_errors(self): with self.assertRaises(ParseError): parse_one("IF(a > 0, a, b, c)") with self.assertRaises(ParseError): parse_one("IF(a > 0)") with self.assertRaises(ParseError): parse_one("SELECT CASE FROM x") with self.assertRaises(ParseError): parse_one("WITH cte AS (SELECT * FROM x)") with self.assertRaises(ParseError): parse_one("SELECT foo( FROM bar") self.assertEqual( parse_one( "CREATE TABLE t (i UInt8) ENGINE = AggregatingMergeTree() ORDER BY tuple()", read="clickhouse", error_level=ErrorLevel.RAISE, ).sql(dialect="clickhouse"), "CREATE TABLE t (i UInt8) ENGINE=AggregatingMergeTree() ORDER BY tuple()", ) with self.assertRaises(ParseError): parse_one("SELECT A[:") self.assertEqual(parse_one("as as", error_level=ErrorLevel.IGNORE).sql(), "AS as") def test_space(self): self.assertEqual( parse_one("SELECT ROW() OVER(PARTITION BY x) FROM x GROUP BY y").sql(), "SELECT ROW() OVER (PARTITION BY x) FROM x GROUP BY y", ) self.assertEqual( parse_one( """SELECT * FROM x GROUP BY y""" ).sql(), "SELECT * FROM x GROUP BY y", ) def test_missing_by(self): with self.assertRaises(ParseError): parse_one("SELECT FROM x ORDER BY") def test_parameter(self): self.assertEqual(parse_one("SELECT @x, @@x, @1").sql(), "SELECT @x, @@x, @1") def test_var(self): self.assertIsInstance(parse_one("INTERVAL '1' DAY").args["unit"], exp.Var) self.assertEqual(parse_one("SELECT @JOIN, @'foo'").sql(), "SELECT @JOIN, @'foo'") def test_comments_select(self): expression = parse_one( """ --comment1.1 --comment1.2 SELECT /*comment1.3*/ a, --comment2 b as B, --comment3:testing "test--annotation", c, --comment4 --foo e, -- f -- space FROM foo """ ) self.assertEqual(expression.comments, ["comment1.1", "comment1.2", "comment1.3"]) self.assertEqual(expression.expressions[0].comments, ["comment2"]) self.assertEqual(expression.expressions[1].comments, ["comment3:testing"]) self.assertEqual(expression.expressions[2].comments, None) self.assertEqual(expression.expressions[3].comments, ["comment4 --foo"]) self.assertEqual(expression.expressions[4].comments, [""]) self.assertEqual(expression.expressions[5].comments, [" space"]) expression = parse_one( """ SELECT a.column_name --# Comment 1 ,b.column_name2, --# Comment 2 b.column_name3 AS NAME3 --# Comment 3 FROM table_name a JOIN table_name2 b ON a.column_name = b.column_name """ ) self.assertEqual(expression.expressions[0].comments, ["# Comment 1"]) self.assertEqual(expression.expressions[1].comments, ["# Comment 2"]) self.assertEqual(expression.expressions[2].comments, ["# Comment 3"]) def test_comments_select_cte(self): expression = parse_one( """ /*comment1.1*/ /*comment1.2*/ WITH a AS (SELECT 1) SELECT /*comment2*/ a.* FROM /*comment3*/ a """ ) self.assertEqual(expression.comments, ["comment2"]) self.assertEqual(expression.args.get("from_").comments, ["comment3"]) self.assertEqual(expression.args.get("with_").comments, ["comment1.1", "comment1.2"]) def test_comments_insert(self): expression = parse_one( """ --comment1.1 --comment1.2 INSERT INTO /*comment1.3*/ x /*comment2*/ VALUES /*comment3*/ (1, 'a', 2.0) """ ) self.assertEqual(expression.comments, ["comment1.1", "comment1.2", "comment1.3"]) self.assertEqual(expression.this.comments, ["comment2"]) def test_comments_insert_cte(self): expression = parse_one( """ /*comment1.1*/ /*comment1.2*/ WITH a AS (SELECT 1) INSERT INTO /*comment2*/ b /*comment3*/ SELECT * FROM a """ ) self.assertEqual(expression.comments, ["comment2"]) self.assertEqual(expression.this.comments, ["comment3"]) self.assertEqual(expression.args.get("with_").comments, ["comment1.1", "comment1.2"]) def test_comments_update(self): expression = parse_one( """ --comment1.1 --comment1.2 UPDATE /*comment1.3*/ tbl /*comment2*/ SET /*comment3*/ x = 2 WHERE /*comment4*/ x <> 2 """ ) self.assertEqual(expression.comments, ["comment1.1", "comment1.2", "comment1.3"]) self.assertEqual(expression.this.comments, ["comment2"]) self.assertEqual(expression.args.get("where").comments, ["comment4"]) def test_comments_update_cte(self): expression = parse_one( """ /*comment1.1*/ /*comment1.2*/ WITH a AS (SELECT * FROM b) UPDATE /*comment2*/ a /*comment3*/ SET col = 1 """ ) self.assertEqual(expression.comments, ["comment2"]) self.assertEqual(expression.this.comments, ["comment3"]) self.assertEqual(expression.args.get("with_").comments, ["comment1.1", "comment1.2"]) def test_comments_delete(self): expression = parse_one( """ --comment1.1 --comment1.2 DELETE /*comment1.3*/ FROM /*comment2*/ x /*comment3*/ WHERE /*comment4*/ y > 1 """ ) self.assertEqual(expression.comments, ["comment1.1", "comment1.2", "comment1.3"]) self.assertEqual(expression.this.comments, ["comment3"]) self.assertEqual(expression.args.get("where").comments, ["comment4"]) def test_comments_delete_cte(self): expression = parse_one( """ /*comment1.1*/ /*comment1.2*/ WITH a AS (SELECT * FROM b) --comment2 DELETE FROM a /*comment3*/ """ ) self.assertEqual(expression.comments, ["comment2"]) self.assertEqual(expression.this.comments, ["comment3"]) self.assertEqual(expression.args["with_"].comments, ["comment1.1", "comment1.2"]) def test_type_literals(self): self.assertEqual(parse_one("int 1"), parse_one("CAST(1 AS INT)")) self.assertEqual(parse_one("int.5"), parse_one("CAST(0.5 AS INT)")) self.assertEqual( parse_one("TIMESTAMP '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMP)" ) self.assertEqual( parse_one("TIMESTAMP(1) '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMP(1))" ) self.assertEqual( parse_one("TIMESTAMP WITH TIME ZONE '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMPTZ)", ) self.assertEqual( parse_one("TIMESTAMP WITH LOCAL TIME ZONE '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMPLTZ)", ) self.assertEqual( parse_one("TIMESTAMP WITHOUT TIME ZONE '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMP)", ) self.assertEqual( parse_one("TIMESTAMP(1) WITH TIME ZONE '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMPTZ(1))", ) self.assertEqual( parse_one("TIMESTAMP(1) WITH LOCAL TIME ZONE '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMPLTZ(1))", ) self.assertEqual( parse_one("TIMESTAMP(1) WITHOUT TIME ZONE '2022-01-01'").sql(), "CAST('2022-01-01' AS TIMESTAMP(1))", ) self.assertEqual(parse_one("TIMESTAMP(1) WITH TIME ZONE").sql(), "TIMESTAMPTZ(1)") self.assertEqual(parse_one("TIMESTAMP(1) WITH LOCAL TIME ZONE").sql(), "TIMESTAMPLTZ(1)") self.assertEqual(parse_one("TIMESTAMP(1) WITHOUT TIME ZONE").sql(), "TIMESTAMP(1)") self.assertEqual(parse_one("""JSON '{"x":"y"}'""").sql(), """PARSE_JSON('{"x":"y"}')""") self.assertIsInstance(parse_one("TIMESTAMP(1)"), exp.Func) self.assertIsInstance(parse_one("TIMESTAMP('2022-01-01')"), exp.Func) self.assertIsInstance(parse_one("TIMESTAMP()"), exp.Func) self.assertIsInstance(parse_one("map.x"), exp.Column) self.assertIsInstance(parse_one("CAST(x AS CHAR(5))").to.expressions[0], exp.DataTypeParam) self.assertEqual(parse_one("1::int64", dialect="bigquery"), parse_one("CAST(1 AS BIGINT)")) def test_set_expression(self): set_ = parse_one("SET") self.assertEqual(set_.sql(), "SET") self.assertIsInstance(set_, exp.Set) set_session = parse_one("SET SESSION x = 1") self.assertEqual(set_session.sql(), "SET SESSION x = 1") self.assertIsInstance(set_session, exp.Set) set_item = set_session.expressions[0] self.assertIsInstance(set_item, exp.SetItem) self.assertIsInstance(set_item.this, exp.EQ) self.assertIsInstance(set_item.this.this, exp.Column) self.assertIsInstance(set_item.this.expression, exp.Literal) self.assertEqual(set_item.args.get("kind"), "SESSION") set_to = parse_one("SET x TO 1") self.assertEqual(set_to.sql(), "SET x = 1") self.assertIsInstance(set_to, exp.Set) with self.assertLogs(parser_logger) as cm: set_as_command = parse_one("SET DEFAULT ROLE ALL TO USER") assert "'SET DEFAULT ROLE ALL TO USER'" in cm.output[0] self.assertEqual(set_as_command.sql(), "SET DEFAULT ROLE ALL TO USER") self.assertIsInstance(set_as_command, exp.Command) self.assertEqual(set_as_command.this, "SET") self.assertEqual(set_as_command.expression, " DEFAULT ROLE ALL TO USER") def test_pretty_config_override(self): self.assertEqual(parse_one("SELECT col FROM x").sql(), "SELECT col FROM x") with patch("sqlglot.pretty", True): self.assertEqual(parse_one("SELECT col FROM x").sql(), "SELECT\n col\nFROM x") self.assertEqual(parse_one("SELECT col FROM x").sql(pretty=True), "SELECT\n col\nFROM x") @patch("sqlglot.parser.logger") def test_comment_error_n(self, logger): parse_one( """SUM ( -- test )""", error_level=ErrorLevel.WARN, ) assert_logger_contains( "Required keyword: 'this' missing for . Line 4, Col: 1.", logger, ) @patch("sqlglot.parser.logger") def test_comment_error_r(self, logger): parse_one( """SUM(-- test\r)""", error_level=ErrorLevel.WARN, ) assert_logger_contains( "Required keyword: 'this' missing for . Line 2, Col: 1.", logger, ) @patch("sqlglot.parser.logger") def test_create_table_error(self, logger): parse_one( """CREATE TABLE SELECT""", error_level=ErrorLevel.WARN, ) assert_logger_contains( "Expected table name", logger, ) def test_pivot_columns(self): nothing_aliased = """ SELECT * FROM ( SELECT partname, price FROM part ) PIVOT (AVG(price) FOR partname IN ('prop', 'rudder')) """ everything_aliased = """ SELECT * FROM ( SELECT partname, price FROM part ) PIVOT (AVG(price) AS avg_price FOR partname IN ('prop' AS prop1, 'rudder' AS rudder1)) """ only_pivot_columns_aliased = """ SELECT * FROM ( SELECT partname, price FROM part ) PIVOT (AVG(price) FOR partname IN ('prop' AS prop1, 'rudder' AS rudder1)) """ columns_partially_aliased = """ SELECT * FROM ( SELECT partname, price FROM part ) PIVOT (AVG(price) FOR partname IN ('prop' AS prop1, 'rudder')) """ multiple_aggregates_aliased = """ SELECT * FROM ( SELECT partname, price, quality FROM part ) PIVOT (AVG(price) AS p, MAX(quality) AS q FOR partname IN ('prop' AS prop1, 'rudder')) """ multiple_aggregates_not_aliased = """ SELECT * FROM ( SELECT partname, price, quality FROM part ) PIVOT (AVG(price), MAX(quality) FOR partname IN ('prop' AS prop1, 'rudder')) """ multiple_aggregates_not_aliased_with_quoted_identifier_spark = """ SELECT * FROM ( SELECT partname, price, quality FROM part ) PIVOT (AVG(`PrIcE`), MAX(quality) FOR partname IN ('prop' AS prop1, 'rudder')) """ multiple_aggregates_not_aliased_with_quoted_identifier_duckdb = """ SELECT * FROM ( SELECT partname, price, quality FROM part ) PIVOT (AVG("PrIcE"), MAX(quality) FOR partname IN ('prop' AS prop1, 'rudder')) """ two_in_clauses_duckdb = """ SELECT * FROM cities PIVOT ( sum(population) AS total, count(population) AS count FOR year IN (2000, 2010) country IN ('NL', 'US') ) """ three_in_clauses_duckdb = """ SELECT * FROM cities PIVOT ( sum(population) AS total, count(population) AS count FOR year IN (2000, 2010) country IN ('NL', 'US') name IN ('Amsterdam', 'Seattle') ) """ query_to_column_names = { nothing_aliased: { "bigquery": ["prop", "rudder"], "duckdb": ["prop", "rudder"], "redshift": ["prop", "rudder"], "snowflake": ['''"'prop'"''', '''"'rudder'"'''], "spark": ["prop", "rudder"], }, everything_aliased: { "bigquery": ["avg_price_prop1", "avg_price_rudder1"], "duckdb": ["prop1_avg_price", "rudder1_avg_price"], "redshift": ["prop1_avg_price", "rudder1_avg_price"], "spark": ["prop1", "rudder1"], }, only_pivot_columns_aliased: { "bigquery": ["prop1", "rudder1"], "duckdb": ["prop1", "rudder1"], "redshift": ["prop1", "rudder1"], "spark": ["prop1", "rudder1"], }, columns_partially_aliased: { "bigquery": ["prop1", "rudder"], "duckdb": ["prop1", "rudder"], "redshift": ["prop1", "rudder"], "spark": ["prop1", "rudder"], }, multiple_aggregates_aliased: { "bigquery": ["p_prop1", "q_prop1", "p_rudder", "q_rudder"], "duckdb": ["prop1_p", "prop1_q", "rudder_p", "rudder_q"], "spark": ["prop1_p", "prop1_q", "rudder_p", "rudder_q"], }, multiple_aggregates_not_aliased: { "duckdb": [ '"prop1_avg(price)"', '"prop1_max(quality)"', '"rudder_avg(price)"', '"rudder_max(quality)"', ], "spark": [ "`prop1_avg(price)`", "`prop1_max(quality)`", "`rudder_avg(price)`", "`rudder_max(quality)`", ], }, multiple_aggregates_not_aliased_with_quoted_identifier_spark: { "spark": [ "`prop1_avg(PrIcE)`", "`prop1_max(quality)`", "`rudder_avg(PrIcE)`", "`rudder_max(quality)`", ], }, multiple_aggregates_not_aliased_with_quoted_identifier_duckdb: { "duckdb": [ '"prop1_avg(PrIcE)"', '"prop1_max(quality)"', '"rudder_avg(PrIcE)"', '"rudder_max(quality)"', ], }, two_in_clauses_duckdb: { "duckdb": [ '"2000_NL_total"', '"2000_NL_count"', '"2000_US_total"', '"2000_US_count"', '"2010_NL_total"', '"2010_NL_count"', '"2010_US_total"', '"2010_US_count"', ], }, three_in_clauses_duckdb: { "duckdb": [ '"2000_NL_Amsterdam_total"', '"2000_NL_Amsterdam_count"', '"2000_NL_Seattle_total"', '"2000_NL_Seattle_count"', '"2000_US_Amsterdam_total"', '"2000_US_Amsterdam_count"', '"2000_US_Seattle_total"', '"2000_US_Seattle_count"', '"2010_NL_Amsterdam_total"', '"2010_NL_Amsterdam_count"', '"2010_NL_Seattle_total"', '"2010_NL_Seattle_count"', '"2010_US_Amsterdam_total"', '"2010_US_Amsterdam_count"', '"2010_US_Seattle_total"', '"2010_US_Seattle_count"', ], }, } for query, dialect_columns in query_to_column_names.items(): for dialect, expected_columns in dialect_columns.items(): with self.subTest(f"Testing query '{query}' for dialect {dialect}"): expr = parse_one(query, read=dialect) columns = expr.args["from_"].this.args["pivots"][0].args["columns"] self.assertEqual( expected_columns, [col.sql(dialect=dialect) for col in columns] ) def test_parse_nested(self): def warn_over_threshold(query: str, max_threshold: float = 0.2): now = time.time() ast = parse_one(query) end = time.time() - now self.assertIsNotNone(ast) if end >= max_threshold: parser_logger.warning( f"Query {query[:100]}... surpassed the time threshold of {max_threshold} seconds" ) warn_over_threshold("SELECT * FROM a " + ("LEFT JOIN b ON a.id = b.id " * 38)) warn_over_threshold("SELECT * FROM a " + ("LEFT JOIN UNNEST(ARRAY[]) " * 15)) warn_over_threshold("SELECT * FROM a " + ("OUTER APPLY (SELECT * FROM b) " * 30)) warn_over_threshold("SELECT * FROM a " + ("NATURAL FULL OUTER JOIN x " * 30)) def test_parse_properties(self): self.assertEqual( parse_one("create materialized table x").sql(), "CREATE MATERIALIZED TABLE x" ) def test_parse_floats(self): self.assertTrue(parse_one("1. ").is_number) def test_parse_terse_coalesce(self): self.assertIsNotNone(parse_one("SELECT x ?? y FROM z").find(exp.Coalesce)) self.assertEqual( parse_one("SELECT a, b ?? 'No Data' FROM z").sql(), "SELECT a, COALESCE(b, 'No Data') FROM z", ) self.assertEqual( parse_one("SELECT a, b ?? c ?? 'No Data' FROM z").sql(), "SELECT a, COALESCE(COALESCE(b, c), 'No Data') FROM z", ) def test_parse_intervals(self): ast = parse_one( "SELECT a FROM tbl WHERE a <= DATE '1998-12-01' - INTERVAL '71 days' GROUP BY b" ) self.assertEqual(ast.find(exp.Interval).this.sql(), "'71'") self.assertEqual(ast.find(exp.Interval).unit.assert_is(exp.Var).sql(), "DAYS") def test_parse_concat_ws(self): ast = parse_one("CONCAT_WS(' ', 'John', 'Doe')") self.assertEqual(ast.sql(), "CONCAT_WS(' ', 'John', 'Doe')") self.assertEqual(ast.expressions[0].sql(), "' '") self.assertEqual(ast.expressions[1].sql(), "'John'") self.assertEqual(ast.expressions[2].sql(), "'Doe'") # Ensure we can parse without argument when error level is ignore ast = parse( "CONCAT_WS()", error_level=ErrorLevel.IGNORE, ) self.assertEqual(ast[0].sql(), "CONCAT_WS()") def test_parse_drop_schema(self): for dialect in [None, "bigquery", "snowflake", "duckdb"]: with self.subTest(dialect): ast = parse_one("DROP SCHEMA catalog.schema", dialect=dialect) self.assertEqual( ast, exp.Drop( this=exp.Table( this=None, db=exp.Identifier(this="schema", quoted=False), catalog=exp.Identifier(this="catalog", quoted=False), ), kind="SCHEMA", ), ) self.assertEqual(ast.sql(dialect=dialect), "DROP SCHEMA catalog.schema") # when there is IF EXISTS it must not displace catalog mapping for dialect in [None, "bigquery", "snowflake", "duckdb"]: with self.subTest(f"{dialect} IF EXISTS"): ast = parse_one("DROP SCHEMA IF EXISTS catalog.schema", dialect=dialect) self.assertEqual( ast, exp.Drop( this=exp.Table( this=None, db=exp.Identifier(this="schema", quoted=False), catalog=exp.Identifier(this="catalog", quoted=False), ), kind="SCHEMA", exists=True, ), ) self.assertEqual(ast.sql(dialect=dialect), "DROP SCHEMA IF EXISTS catalog.schema") # single part name no catalog and schema name in db for dialect in [None, "bigquery", "snowflake", "duckdb"]: with self.subTest(f"DROP SCHEMA for {dialect}"): ast = parse_one("DROP SCHEMA IF EXISTS myschema", dialect=dialect) self.assertEqual( ast, exp.Drop( this=exp.Table( this=None, db=exp.Identifier(this="myschema", quoted=False), ), kind="SCHEMA", exists=True, ), ) self.assertEqual(ast.sql(), "DROP SCHEMA IF EXISTS myschema") def test_parse_create_schema(self): for dialect in [None, "bigquery", "snowflake"]: with self.subTest(dialect): ast = parse_one("CREATE SCHEMA catalog.schema", dialect=dialect) self.assertEqual( ast, exp.Create( this=exp.Table( this=None, db=exp.Identifier(this="schema", quoted=False), catalog=exp.Identifier(this="catalog", quoted=False), ), kind="SCHEMA", ), ) self.assertEqual(ast.sql(dialect=dialect), "CREATE SCHEMA catalog.schema") def test_values_as_identifier(self): sql = "SELECT values FROM t WHERE values + 1 > x" for dialect in ( "bigquery", "clickhouse", "duckdb", "postgres", "redshift", "snowflake", ): with self.subTest(dialect): self.assertEqual(parse_one(sql, dialect=dialect).sql(dialect=dialect), sql) def test_alter_set(self): sqls = [ "ALTER TABLE tbl SET TBLPROPERTIES ('x'='1', 'Z'='2')", "ALTER TABLE tbl SET SERDE 'test' WITH SERDEPROPERTIES ('k'='v', 'kay'='vee')", "ALTER TABLE tbl SET SERDEPROPERTIES ('k'='v', 'kay'='vee')", "ALTER TABLE tbl SET LOCATION 'new_location'", "ALTER TABLE tbl SET FILEFORMAT file_format", "ALTER TABLE tbl SET TAGS ('tag1' = 't1', 'tag2' = 't2')", ] for dialect in ( "hive", "spark2", "spark", "databricks", ): for sql in sqls: with self.subTest(f"Testing query '{sql}' for dialect {dialect}"): self.assertEqual(parse_one(sql, dialect=dialect).sql(dialect=dialect), sql) def test_distinct_from(self): self.assertIsInstance(parse_one("a IS DISTINCT FROM b OR c IS DISTINCT FROM d"), exp.Or) def test_trailing_comments(self): expressions = parse( """ select * from x; -- my comment """ ) self.assertEqual( ";\n".join(e.sql() for e in expressions), "SELECT * FROM x;\n/* my comment */" ) def test_parse_prop_eq(self): self.assertIsInstance(parse_one("x(a := b and c)").expressions[0], exp.PropertyEQ) def test_collate(self): collates = [ ('pg_catalog."default"', exp.Column), ('"en_DE"', exp.Identifier), ("LATIN1_GENERAL_BIN", exp.Var), ("'en'", exp.Literal), ] for collate_pair in collates: collate_node = parse_one( f"""SELECT * FROM t WHERE foo LIKE '%bar%' COLLATE {collate_pair[0]}""" ).find(exp.Collate) self.assertIsInstance(collate_node, exp.Collate) self.assertIsInstance(collate_node.expression, collate_pair[1]) def test_drop_column(self): ast = parse_one("ALTER TABLE tbl DROP COLUMN col") self.assertEqual(len(list(ast.find_all(exp.Table))), 1) self.assertEqual(len(list(ast.find_all(exp.Column))), 1) def test_udf_meta(self): ast = parse_one("YEAR(a) /* sqlglot.anonymous */") self.assertIsInstance(ast, exp.Anonymous) # Meta flag is case sensitive ast = parse_one("YEAR(a) /* sqlglot.anONymous */") self.assertIsInstance(ast, exp.Year) # Incomplete or incorrect anonymous meta comments are not registered ast = parse_one("YEAR(a) /* sqlglot.anon */") self.assertIsInstance(ast, exp.Year) def test_token_position_meta(self): ast = parse_one( "SELECT a, b FROM test_schema.test_table_a UNION ALL SELECT c, d FROM test_catalog.test_schema.test_table_b" ) for identifier in ast.find_all(exp.Identifier): self.assertEqual(set(identifier.meta), {"line", "col", "start", "end"}) self.assertEqual( ast.this.args["from_"].this.args["this"].meta, {"line": 1, "col": 41, "start": 29, "end": 40}, ) self.assertEqual( ast.this.args["from_"].this.args["db"].meta, {"line": 1, "col": 28, "start": 17, "end": 27}, ) self.assertEqual( ast.expression.args["from_"].this.args["this"].meta, {"line": 1, "col": 106, "start": 94, "end": 105}, ) self.assertEqual( ast.expression.args["from_"].this.args["db"].meta, {"line": 1, "col": 93, "start": 82, "end": 92}, ) self.assertEqual( ast.expression.args["from_"].this.args["catalog"].meta, {"line": 1, "col": 81, "start": 69, "end": 80}, ) ast = parse_one("SELECT FOO()") self.assertEqual(ast.find(exp.Anonymous).meta, {"line": 1, "col": 10, "start": 7, "end": 9}) ast = parse_one("SELECT * FROM t") self.assertEqual(ast.find(exp.Star).meta, {"line": 1, "col": 8, "start": 7, "end": 7}) ast = parse_one("SELECT t.* FROM t") self.assertEqual(ast.find(exp.Star).meta, {"line": 1, "col": 10, "start": 9, "end": 9}) ast = parse_one("SELECT 1") self.assertEqual(ast.find(exp.Literal).meta, {"line": 1, "col": 8, "start": 7, "end": 7}) self.assertEqual(parse_one("max(1)").meta, {"col": 3, "end": 2, "line": 1, "start": 0}) def test_quoted_identifier_meta(self): sql = 'SELECT "a" FROM "test_schema"."test_table_a"' ast = parse_one(sql) db_meta = ast.args["from_"].this.args["db"].meta self.assertEqual(sql[db_meta["start"] : db_meta["end"] + 1], '"test_schema"') table_meta = ast.args["from_"].this.this.meta self.assertEqual(sql[table_meta["start"] : table_meta["end"] + 1], '"test_table_a"') def test_qualified_function(self): sql = "a.b.c.d.e.f.g.foo()" ast = parse_one(sql) assert not any(isinstance(n, exp.Column) for n in ast.walk()) assert len(list(ast.find_all(exp.Dot))) == 7 def test_pivot_missing_agg_func(self): with self.assertRaises(ParseError) as ctx: parse_one("select * from tbl pivot(col1 for col2 in (val1, val1))") self.assertIn("Expecting an aggregation function in PIVOT", str(ctx.exception)) def test_multiple_query_modifiers(self): sql = "SELECT * FROM a WHERE b = 'true' AND c > 50 WHERE c = 'false'" with self.assertRaises(ParseError) as ctx: parse_one(sql) self.assertIn("Found multiple 'WHERE' clauses. Line 1, Col: 49.", str(ctx.exception)) self.assertEqual( parse_one(sql, error_level=ErrorLevel.IGNORE).sql(), "SELECT * FROM a WHERE c = 'false'", ) def test_parse_into_grant_principal(self): self.assertIsInstance(parse_one("ROLE blah", into=exp.GrantPrincipal), exp.GrantPrincipal) self.assertIsInstance(parse_one("GROUP blah", into=exp.GrantPrincipal), exp.GrantPrincipal) self.assertIsInstance(parse_one("blah", into=exp.GrantPrincipal), exp.GrantPrincipal) self.assertIsInstance( parse_one("ROLE `blah`", into=exp.GrantPrincipal, dialect="databricks"), exp.GrantPrincipal, ) self.assertEqual( parse_one("ROLE `blah`", into=exp.GrantPrincipal, dialect="databricks").sql( dialect="databricks" ), "ROLE `blah`", ) def test_parse_into_grant_privilege(self): self.assertIsInstance(parse_one("SELECT", into=exp.GrantPrivilege), exp.GrantPrivilege) self.assertIsInstance( parse_one("ALL PRIVILEGES", into=exp.GrantPrivilege), exp.GrantPrivilege ) ================================================ FILE: tests/test_schema.py ================================================ import unittest from sqlglot import exp, parse_one, to_table from sqlglot.errors import SchemaError from sqlglot.schema import MappingSchema, ensure_schema class TestSchema(unittest.TestCase): def assert_column_names(self, schema, *table_results): for table, result in table_results: with self.subTest(f"{table} -> {result}"): self.assertEqual(schema.column_names(to_table(table)), result) def assert_column_names_raises(self, schema, *tables): for table in tables: with self.subTest(table): with self.assertRaises(SchemaError): schema.column_names(to_table(table)) def assert_column_names_empty(self, schema, *tables): for table in tables: with self.subTest(table): self.assertEqual(schema.column_names(to_table(table)), []) def test_schema(self): schema = ensure_schema( { "x": { "a": "uint64", }, "y": { "b": "uint64", "c": "uint64", }, }, ) self.assert_column_names( schema, ("x", ["a"]), ("y", ["b", "c"]), ("z.x", ["a"]), ("z.x.y", ["b", "c"]), ) self.assert_column_names_empty( schema, "z", "z.z", "z.z.z", ) def test_schema_db(self): schema = ensure_schema( { "d1": { "x": { "a": "uint64", }, "y": { "b": "uint64", }, }, "d2": { "x": { "c": "uint64", }, }, }, ) self.assert_column_names( schema, ("d1.x", ["a"]), ("d2.x", ["c"]), ("y", ["b"]), ("d1.y", ["b"]), ("z.d1.y", ["b"]), ) self.assert_column_names_raises( schema, "x", ) self.assert_column_names_empty( schema, "z.x", "z.y", ) def test_schema_catalog(self): schema = ensure_schema( { "c1": { "d1": { "x": { "a": "uint64", }, "y": { "b": "uint64", }, "z": { "c": "uint64", }, }, }, "c2": { "d1": { "y": { "d": "uint64", }, "z": { "e": "uint64", }, }, "d2": { "z": { "f": "uint64", }, }, }, } ) self.assert_column_names( schema, ("x", ["a"]), ("d1.x", ["a"]), ("c1.d1.x", ["a"]), ("c1.d1.y", ["b"]), ("c1.d1.z", ["c"]), ("c2.d1.y", ["d"]), ("c2.d1.z", ["e"]), ("d2.z", ["f"]), ("c2.d2.z", ["f"]), ) self.assert_column_names_raises( schema, "y", "z", "d1.y", "d1.z", ) self.assert_column_names_empty( schema, "q", "d2.x", "a.b.c", ) def test_schema_add_table_with_and_without_mapping(self): schema = MappingSchema() schema.add_table("test") self.assertEqual(schema.column_names("test"), []) schema.add_table("test", {"x": "string"}) self.assertEqual(schema.column_names("test"), ["x"]) schema.add_table("test", {"x": "string", "y": "int"}) self.assertEqual(schema.column_names("test"), ["x", "y"]) schema.add_table("test") self.assertEqual(schema.column_names("test"), ["x", "y"]) def test_schema_get_column_type(self): schema = MappingSchema({"A": {"b": "varchar"}}) self.assertEqual(schema.get_column_type("a", "B").this, exp.DataType.Type.VARCHAR) self.assertEqual( schema.get_column_type(exp.table_("a"), exp.column("b")).this, exp.DataType.Type.VARCHAR, ) self.assertEqual( schema.get_column_type("a", exp.column("b")).this, exp.DataType.Type.VARCHAR ) self.assertEqual( schema.get_column_type(exp.table_("a"), "b").this, exp.DataType.Type.VARCHAR ) schema = MappingSchema({"a": {"b": {"c": "varchar"}}}) self.assertEqual( schema.get_column_type(exp.table_("b", db="a"), exp.column("c")).this, exp.DataType.Type.VARCHAR, ) self.assertEqual( schema.get_column_type(exp.table_("b", db="a"), "c").this, exp.DataType.Type.VARCHAR ) schema = MappingSchema({"a": {"b": {"c": {"d": "varchar"}}}}) self.assertEqual( schema.get_column_type(exp.table_("c", db="b", catalog="a"), exp.column("d")).this, exp.DataType.Type.VARCHAR, ) self.assertEqual( schema.get_column_type(exp.table_("c", db="b", catalog="a"), "d").this, exp.DataType.Type.VARCHAR, ) schema = MappingSchema({"foo": {"bar": parse_one("INT", into=exp.DataType)}}) self.assertEqual(schema.get_column_type("foo", "bar").this, exp.DataType.Type.INT) def test_schema_normalization(self): schema = MappingSchema( schema={"x": {"`y`": {"Z": {"a": "INT", "`B`": "VARCHAR"}, "w": {"C": "INT"}}}}, dialect="clickhouse", ) table_z = exp.table_("Z", db="y", catalog="x") table_w = exp.table_("w", db="y", catalog="x") self.assertEqual(schema.column_names(table_z), ["a", "B"]) self.assertEqual(schema.column_names(table_w), ["C"]) schema = MappingSchema(schema={"x": {"`y`": "INT"}}, dialect="clickhouse") self.assertEqual(schema.column_names(exp.table_("x")), ["y"]) # Check that add_table normalizes both the table and the column names to be added / updated schema = MappingSchema() schema.add_table("Foo", {"SomeColumn": "INT", '"SomeColumn"': "DOUBLE"}) self.assertEqual(schema.column_names(exp.table_("fOO")), ["somecolumn", "SomeColumn"]) # Check that names are normalized to uppercase for Snowflake schema = MappingSchema(schema={"x": {"foo": "int", '"bLa"': "int"}}, dialect="snowflake") self.assertEqual(schema.column_names(exp.table_("x")), ["FOO", "bLa"]) # Check that switching off the normalization logic works as expected schema = MappingSchema(schema={"x": {"foo": "int"}}, normalize=False, dialect="snowflake") self.assertEqual(schema.column_names(exp.table_("x")), ["foo"]) # Check that the correct dialect is used when calling schema methods # Note: T-SQL is case-insensitive by default, so `fo` in clickhouse will match the normalized table name schema = MappingSchema(schema={"[Fo]": {"x": "int"}}, dialect="tsql") self.assertEqual( schema.column_names("[Fo]"), schema.column_names("`fo`", dialect="clickhouse") ) # Check that all column identifiers are normalized to lowercase for BigQuery, even quoted # ones. Also, ensure that tables aren't normalized, since they're case-sensitive by default. schema = MappingSchema(schema={"Foo": {"`BaR`": "int"}}, dialect="bigquery") self.assertEqual(schema.column_names("Foo"), ["bar"]) self.assertEqual(schema.column_names("foo"), []) # Check that the schema's normalization setting can be overridden schema = MappingSchema(schema={"X": {"y": "int"}}, normalize=False, dialect="snowflake") self.assertEqual(schema.column_names("x", normalize=True), ["y"]) # Check that identifiers in nested data types are normalized # BigQuery: STRUCT field names should be lowercased schema = MappingSchema({"t": {"col": "STRUCT"}}, dialect="bigquery") col_type = schema.get_column_type("t", "col") self.assertEqual(col_type.expressions[0].name, "foobar") # Snowflake: STRUCT field names should be uppercased schema = MappingSchema({"t": {"col": "STRUCT"}}, dialect="snowflake") col_type = schema.get_column_type("T", "COL") self.assertEqual(col_type.expressions[0].name, "FOOBAR") # ClickHouse: STRUCT field names should preserve case (case-sensitive) schema = MappingSchema({"t": {"col": "STRUCT"}}, dialect="clickhouse") col_type = schema.get_column_type("t", "col") self.assertEqual(col_type.expressions[0].name, "FooBar") # Nested STRUCT field names should also be normalized schema = MappingSchema( {"t": {"col": "STRUCT>"}}, dialect="bigquery" ) col_type = schema.get_column_type("t", "col") self.assertEqual(col_type.expressions[0].name, "outer") self.assertEqual(col_type.expressions[0].kind.expressions[0].name, "inner") # ARRAY of STRUCT field names should also be normalized schema = MappingSchema({"t": {"col": "ARRAY>"}}, dialect="bigquery") col_type = schema.get_column_type("t", "col") struct_type = col_type.expressions[0] self.assertEqual(struct_type.expressions[0].name, "foobar") def test_same_number_of_qualifiers(self): schema = MappingSchema({"x": {"y": {"c1": "int"}}}) with self.assertRaises(SchemaError) as ctx: schema.add_table("z", {"c2": "int"}) self.assertEqual( str(ctx.exception), "Table z must match the schema's nesting level: 2.", ) schema = MappingSchema() schema.add_table("x.y", {"c1": "int"}) with self.assertRaises(SchemaError) as ctx: schema.add_table("z", {"c2": "int"}) self.assertEqual( str(ctx.exception), "Table z must match the schema's nesting level: 2.", ) with self.assertRaises(SchemaError) as ctx: MappingSchema({"x": {"y": {"c1": "int"}}, "z": {"c2": "int"}}) self.assertEqual( str(ctx.exception), "Table z must match the schema's nesting level: 2.", ) with self.assertRaises(SchemaError) as ctx: MappingSchema( { "catalog": { "db": {"tbl": {"col": "a"}}, }, "tbl2": {"col": "b"}, }, ) self.assertEqual( str(ctx.exception), "Table tbl2 must match the schema's nesting level: 3.", ) with self.assertRaises(SchemaError) as ctx: MappingSchema( { "tbl2": {"col": "b"}, "catalog": { "db": {"tbl": {"col": "a"}}, }, }, ) self.assertEqual( str(ctx.exception), "Table catalog.db.tbl must match the schema's nesting level: 1.", ) def test_has_column(self): schema = MappingSchema({"x": {"c": "int"}}) self.assertTrue(schema.has_column("x", exp.column("c"))) self.assertFalse(schema.has_column("x", exp.column("k"))) def test_find(self): schema = MappingSchema({"x": {"c": "int"}}) found = schema.find(exp.to_table("x")) self.assertEqual(found, {"c": "int"}) found = schema.find(exp.to_table("x"), ensure_data_types=True) self.assertEqual(found, {"c": exp.DataType.build("int")}) ================================================ FILE: tests/test_serde.py ================================================ import json import pickle import unittest from sqlglot import exp, parse_one from sqlglot.optimizer.annotate_types import annotate_types from tests.helpers import load_sql_fixtures import sqlglot.expressions.core as _core_module _EXPRESSION_IS_COMPILED = getattr(_core_module, "__file__", "").endswith(".so") if not _EXPRESSION_IS_COMPILED: class CustomExpression(exp.Expression): ... class TestSerde(unittest.TestCase): def dump_load(self, expression): return exp.Expr.load(json.loads(json.dumps(expression.dump()))) def test_serde(self): for sql in load_sql_fixtures("identity.sql"): with self.subTest(sql): before = parse_one(sql) after = self.dump_load(before) self.assertEqual(repr(before), repr(after)) @unittest.skipIf(_EXPRESSION_IS_COMPILED, "mypyc compiled expressions cannot be subclassed") def test_custom_expression(self): before = CustomExpression() after = self.dump_load(before) self.assertEqual(before, after) def test_type_annotations(self): before = annotate_types(parse_one("CAST('1' AS STRUCT>)")) after = self.dump_load(before) self.assertEqual(before.type, after.type) self.assertEqual(before.this.type, after.this.type) def test_meta(self): before = parse_one("SELECT * FROM X") before.meta["x"] = 1 after = self.dump_load(before) self.assertEqual(before.meta, after.meta) def test_recursion(self): sql = "SELECT 1" sql += " UNION ALL SELECT 1" * 5000 expr = parse_one(sql) before = expr.sql() self.assertEqual(before, self.dump_load(expr).sql()) self.assertEqual(before, pickle.loads(pickle.dumps(expr)).sql()) ================================================ FILE: tests/test_time.py ================================================ import unittest import sys from sqlglot.time import format_time, subsecond_precision class TestTime(unittest.TestCase): def test_format_time(self): self.assertEqual(format_time("", {}), None) self.assertEqual(format_time(" ", {}), " ") mapping = {"a": "b", "aa": "c"} self.assertEqual(format_time("a", mapping), "b") self.assertEqual(format_time("aa", mapping), "c") self.assertEqual(format_time("aaada", mapping), "cbdb") self.assertEqual(format_time("da", mapping), "db") def test_subsecond_precision(self): self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456+00:00")) self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.123+00:00")) self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14+00:00")) self.assertEqual(0, subsecond_precision("2023-01-01 12:13:14")) self.assertEqual(0, subsecond_precision("garbage")) @unittest.skipUnless( sys.version_info >= (3, 11), "Python 3.11 relaxed datetime.fromisoformat() parsing with regards to microseconds", ) def test_subsecond_precision_python311(self): # ref: https://docs.python.org/3/whatsnew/3.11.html#datetime self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.123456789+00:00")) self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.12345+00:00")) self.assertEqual(6, subsecond_precision("2023-01-01 12:13:14.1234+00:00")) self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.12+00:00")) self.assertEqual(3, subsecond_precision("2023-01-01 12:13:14.1+00:00")) ================================================ FILE: tests/test_tokens.py ================================================ import unittest from sqlglot.dialects import BigQuery from sqlglot.errors import TokenError from sqlglot.tokens import Tokenizer, TokenType class TestTokens(unittest.TestCase): def test_space_keywords(self): for string, length in ( ("group bys", 2), (" group bys", 2), (" group bys ", 2), ("group by)", 2), ("group bys)", 3), ("group \r", 1), ): tokens = Tokenizer().tokenize(string) self.assertTrue("GROUP" in tokens[0].text.upper()) self.assertEqual(len(tokens), length) def test_comment_attachment(self): tokenizer = Tokenizer() sql_comment = [ ("/*comment*/ foo", ["comment"]), ("/*comment*/ foo --test", ["comment", "test"]), ("--comment\nfoo --test", ["comment", "test"]), ("foo --comment", ["comment"]), ("foo", []), ("foo /*comment 1*/ /*comment 2*/", ["comment 1", "comment 2"]), ("foo\n-- comment", [" comment"]), ("1 /*/2 */", ["/2 "]), ("1\n/*comment*/;", ["comment"]), ] for sql, comment in sql_comment: self.assertEqual(tokenizer.tokenize(sql)[0].comments, comment) def test_token_line_col(self): tokens = Tokenizer().tokenize( """SELECT /* line break */ 'x y', x""" ) self.assertEqual(tokens[0].line, 1) self.assertEqual(tokens[0].col, 6) self.assertEqual(tokens[1].line, 5) self.assertEqual(tokens[1].col, 3) self.assertEqual(tokens[2].line, 5) self.assertEqual(tokens[2].col, 4) self.assertEqual(tokens[3].line, 6) self.assertEqual(tokens[3].col, 1) tokens = Tokenizer().tokenize("SELECT .") self.assertEqual(tokens[1].line, 1) self.assertEqual(tokens[1].col, 8) self.assertEqual(Tokenizer().tokenize("'''abc'")[0].start, 0) self.assertEqual(Tokenizer().tokenize("'''abc'")[0].end, 6) self.assertEqual(Tokenizer().tokenize("'abc'")[0].start, 0) tokens = Tokenizer().tokenize("SELECT\r\n 1,\r\n 2") self.assertEqual(tokens[0].line, 1) self.assertEqual(tokens[0].col, 6) self.assertEqual(tokens[1].line, 2) self.assertEqual(tokens[1].col, 3) self.assertEqual(tokens[2].line, 2) self.assertEqual(tokens[2].col, 4) self.assertEqual(tokens[3].line, 3) self.assertEqual(tokens[3].col, 3) tokens = Tokenizer().tokenize(" SELECT\n 100") self.assertEqual(tokens[0].line, 1) self.assertEqual(tokens[0].col, 8) self.assertEqual(tokens[1].line, 2) self.assertEqual(tokens[1].col, 7) def test_crlf(self): tokens = Tokenizer().tokenize("SELECT a\r\nFROM b") tokens = [(token.token_type, token.text) for token in tokens] self.assertEqual( tokens, [ (TokenType.SELECT, "SELECT"), (TokenType.VAR, "a"), (TokenType.FROM, "FROM"), (TokenType.VAR, "b"), ], ) for simple_query in ("SELECT 1\r\n", "\r\nSELECT 1"): tokens = Tokenizer().tokenize(simple_query) tokens = [(token.token_type, token.text) for token in tokens] self.assertEqual( tokens, [ (TokenType.SELECT, "SELECT"), (TokenType.NUMBER, "1"), ], ) def test_command(self): tokens = Tokenizer().tokenize("SHOW;") self.assertEqual(tokens[0].token_type, TokenType.SHOW) self.assertEqual(tokens[1].token_type, TokenType.SEMICOLON) tokens = Tokenizer().tokenize("EXECUTE") self.assertEqual(tokens[0].token_type, TokenType.EXECUTE) self.assertEqual(len(tokens), 1) tokens = Tokenizer().tokenize("FETCH;SHOW;") self.assertEqual(tokens[0].token_type, TokenType.FETCH) self.assertEqual(tokens[1].token_type, TokenType.SEMICOLON) self.assertEqual(tokens[2].token_type, TokenType.SHOW) self.assertEqual(tokens[3].token_type, TokenType.SEMICOLON) def test_error_msg(self): with self.assertRaisesRegex(TokenError, "Error tokenizing 'select /'"): Tokenizer().tokenize("select /*") def test_jinja(self): # Check that {#, #} are treated as token delimiters, even though BigQuery overrides COMMENTS tokenizer = BigQuery.Tokenizer() tokens = tokenizer.tokenize( """ SELECT {{ x }}, {{- x -}}, {# it's a comment #} {% for x in y -%} a {{+ b }} {% endfor %}; """ ) tokens = [(token.token_type, token.text) for token in tokens] self.assertEqual( tokens, [ (TokenType.SELECT, "SELECT"), (TokenType.L_BRACE, "{"), (TokenType.L_BRACE, "{"), (TokenType.VAR, "x"), (TokenType.R_BRACE, "}"), (TokenType.R_BRACE, "}"), (TokenType.COMMA, ","), (TokenType.BLOCK_START, "{{-"), (TokenType.VAR, "x"), (TokenType.BLOCK_END, "-}}"), (TokenType.COMMA, ","), (TokenType.BLOCK_START, "{%"), (TokenType.FOR, "for"), (TokenType.VAR, "x"), (TokenType.IN, "in"), (TokenType.VAR, "y"), (TokenType.BLOCK_END, "-%}"), (TokenType.VAR, "a"), (TokenType.BLOCK_START, "{{+"), (TokenType.VAR, "b"), (TokenType.R_BRACE, "}"), (TokenType.R_BRACE, "}"), (TokenType.BLOCK_START, "{%"), (TokenType.VAR, "endfor"), (TokenType.BLOCK_END, "%}"), (TokenType.SEMICOLON, ";"), ], ) tokens = tokenizer.tokenize("""'{{ var('x') }}'""") tokens = [(token.token_type, token.text) for token in tokens] self.assertEqual( tokens, [ (TokenType.STRING, "{{ var("), (TokenType.VAR, "x"), (TokenType.STRING, ") }}"), ], ) def test_partial_token_list(self): tokenizer = Tokenizer() try: # This is expected to fail due to the unbalanced string quotes tokenizer.tokenize("foo 'bar") except TokenError as e: self.assertIn("Error tokenizing 'foo 'ba'", str(e)) partial_tokens = tokenizer.tokens self.assertEqual(len(partial_tokens), 1) self.assertEqual(partial_tokens[0].token_type, TokenType.VAR) self.assertEqual(partial_tokens[0].text, "foo") def test_unicode_identifiers(self): tokens = Tokenizer().tokenize("SELECT café FROM t") self.assertEqual(next(t for t in tokens if t.token_type == TokenType.VAR).text, "café") def test_token_repr(self): # Ensures both the Python and the Rust tokenizer produce a human-friendly representation self.assertEqual( repr(Tokenizer().tokenize("foo")), "[]", ) ================================================ FILE: tests/test_transforms.py ================================================ import unittest from sqlglot import parse_one, expressions as exp from sqlglot.transforms import ( eliminate_distinct_on, eliminate_join_marks, eliminate_qualify, eliminate_window_clause, inherit_struct_field_names, remove_precision_parameterized_types, ) class TestTransforms(unittest.TestCase): maxDiff = None def validate(self, transform, sql, target, dialect=None): with self.subTest(f"{dialect} - {sql}"): self.assertEqual( exp.maybe_parse(sql, dialect=dialect).transform(transform).sql(dialect=dialect), target, ) def test_eliminate_distinct_on(self): self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (a) a, b FROM x ORDER BY c DESC", "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (a) a, b FROM x", "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY a) AS _row_number FROM x) AS _t WHERE _row_number = 1", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (a, b) a, b FROM x ORDER BY c DESC", "SELECT a, b FROM (SELECT a AS a, b AS b, ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT a, b FROM x ORDER BY c DESC", "SELECT DISTINCT a, b FROM x ORDER BY c DESC", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (_row_number) _row_number FROM x ORDER BY c DESC", "SELECT _row_number FROM (SELECT _row_number AS _row_number, ROW_NUMBER() OVER (PARTITION BY _row_number ORDER BY c DESC) AS _row_number_2 FROM x) AS _t WHERE _row_number_2 = 1", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (x.a, x.b) x.a, x.b FROM x ORDER BY c DESC", "SELECT a, b FROM (SELECT x.a AS a, x.b AS b, ROW_NUMBER() OVER (PARTITION BY x.a, x.b ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (a) x.a, y.a FROM x CROSS JOIN y ORDER BY c DESC", "SELECT a, a_2 FROM (SELECT x.a AS a, y.a AS a_2, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x CROSS JOIN y) AS _t WHERE _row_number = 1", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (a) a, a + b FROM x ORDER BY c DESC", "SELECT a, _col FROM (SELECT a AS a, a + b AS _col, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", ) self.validate( eliminate_distinct_on, "SELECT DISTINCT ON (a) * FROM x ORDER BY c DESC", "SELECT * FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1", ) self.validate( eliminate_distinct_on, 'SELECT DISTINCT ON (a) a AS "A", b FROM x ORDER BY c DESC', 'SELECT "A", b FROM (SELECT a AS "A", b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1', ) self.validate( eliminate_distinct_on, 'SELECT DISTINCT ON (a) "A", b FROM x ORDER BY c DESC', 'SELECT "A", b FROM (SELECT "A" AS "A", b AS b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY c DESC) AS _row_number FROM x) AS _t WHERE _row_number = 1', ) def test_eliminate_qualify(self): self.validate( eliminate_qualify, "SELECT i, a + 1 FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p) = 1", "SELECT i, _c FROM (SELECT i, a + 1 AS _c, ROW_NUMBER() OVER (PARTITION BY p) AS _w, p FROM qt) AS _t WHERE _w = 1", ) self.validate( eliminate_qualify, "SELECT i FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1 AND p = 0", "SELECT i FROM (SELECT i, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS _w, p, o FROM qt) AS _t WHERE _w = 1 AND p = 0", ) self.validate( eliminate_qualify, "SELECT i, p, o FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1", "SELECT i, p, o FROM (SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS _w FROM qt) AS _t WHERE _w = 1", ) self.validate( eliminate_qualify, "SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS row_num FROM qt QUALIFY row_num = 1", "SELECT i, p, o, row_num FROM (SELECT i, p, o, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS row_num FROM qt) AS _t WHERE row_num = 1", ) self.validate( eliminate_qualify, "SELECT * FROM qt QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1", "SELECT * FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) AS _w FROM qt) AS _t WHERE _w = 1", ) self.validate( eliminate_qualify, "SELECT c2, SUM(c3) OVER (PARTITION BY c2) AS r FROM t1 WHERE c3 < 4 GROUP BY c2, c3 HAVING SUM(c1) > 3 QUALIFY r IN (SELECT MIN(c1) FROM test GROUP BY c2 HAVING MIN(c1) > 3)", "SELECT c2, r FROM (SELECT c2, SUM(c3) OVER (PARTITION BY c2) AS r, c1 FROM t1 WHERE c3 < 4 GROUP BY c2, c3 HAVING SUM(c1) > 3) AS _t WHERE r IN (SELECT MIN(c1) FROM test GROUP BY c2 HAVING MIN(c1) > 3)", ) self.validate( eliminate_qualify, "SELECT x FROM y QUALIFY ROW_NUMBER() OVER (PARTITION BY p)", "SELECT x FROM (SELECT x, ROW_NUMBER() OVER (PARTITION BY p) AS _w, p FROM y) AS _t WHERE _w", ) self.validate( eliminate_qualify, "SELECT x AS z FROM y QUALIFY ROW_NUMBER() OVER (PARTITION BY z)", "SELECT z FROM (SELECT x AS z, ROW_NUMBER() OVER (PARTITION BY x) AS _w FROM y) AS _t WHERE _w", ) self.validate( eliminate_qualify, "SELECT SOME_UDF(x) AS z FROM y QUALIFY ROW_NUMBER() OVER (PARTITION BY x ORDER BY z)", "SELECT z FROM (SELECT SOME_UDF(x) AS z, ROW_NUMBER() OVER (PARTITION BY x ORDER BY SOME_UDF(x)) AS _w, x FROM y) AS _t WHERE _w", ) self.validate( eliminate_qualify, "SELECT x, t, x || t AS z FROM y QUALIFY ROW_NUMBER() OVER (PARTITION BY x ORDER BY z DESC)", "SELECT x, t, z FROM (SELECT x, t, x || t AS z, ROW_NUMBER() OVER (PARTITION BY x ORDER BY x || t DESC) AS _w FROM y) AS _t WHERE _w", ) self.validate( eliminate_qualify, "SELECT y.x AS x, y.t AS z FROM y QUALIFY ROW_NUMBER() OVER (PARTITION BY x ORDER BY x DESC, z)", "SELECT x, z FROM (SELECT y.x AS x, y.t AS z, ROW_NUMBER() OVER (PARTITION BY y.x ORDER BY y.x DESC, y.t) AS _w FROM y) AS _t WHERE _w", ) self.validate( eliminate_qualify, "select max(col) over (partition by col_id) as col, from some_table qualify row_number() over (partition by col_id order by col asc)=1", "SELECT col FROM (SELECT MAX(col) OVER (PARTITION BY col_id) AS col, ROW_NUMBER() OVER (PARTITION BY col_id ORDER BY MAX(col) OVER (PARTITION BY col_id) ASC) AS _w, col_id FROM some_table) AS _t WHERE _w = 1", ) def test_remove_precision_parameterized_types(self): self.validate( remove_precision_parameterized_types, "SELECT CAST(1 AS DECIMAL(10, 2)), CAST('13' AS VARCHAR(10))", "SELECT CAST(1 AS DECIMAL), CAST('13' AS VARCHAR)", ) def test_eliminate_join_marks(self): for dialect in ("oracle", "redshift"): # No join marks => query remains unaffected self.validate( eliminate_join_marks, "SELECT a.f1, b.f2 FROM a JOIN b ON a.id = b.id WHERE a.blabla = 'a'", "SELECT a.f1, b.f2 FROM a JOIN b ON a.id = b.id WHERE a.blabla = 'a'", dialect, ) self.validate( eliminate_join_marks, "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) > 5", "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y > 5", dialect, ) self.validate( eliminate_join_marks, "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x (+) = T2.x and T2.y > 5", "SELECT T1.d, T2.c FROM T2 LEFT JOIN T1 ON T1.x = T2.x WHERE T2.y > 5", dialect, ) self.validate( eliminate_join_marks, "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y (+) IS NULL", "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x AND T2.y IS NULL", dialect, ) self.validate( eliminate_join_marks, "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T2.y IS NULL", "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T2.y IS NULL", dialect, ) self.validate( eliminate_join_marks, "SELECT T1.d, T2.c FROM T1, T2 WHERE T1.x = T2.x (+) and T1.Z > 4", "SELECT T1.d, T2.c FROM T1 LEFT JOIN T2 ON T1.x = T2.x WHERE T1.Z > 4", dialect, ) self.validate( eliminate_join_marks, "SELECT * FROM table1, table2 WHERE table1.col = table2.col(+)", "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col", dialect, ) self.validate( eliminate_join_marks, "SELECT * FROM table1, table2, table3, table4 WHERE table1.col = table2.col(+) and table2.col >= table3.col(+) and table1.col = table4.col(+)", "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col LEFT JOIN table3 ON table2.col >= table3.col LEFT JOIN table4 ON table1.col = table4.col", dialect, ) self.validate( eliminate_join_marks, "SELECT * FROM table1, table2, table3 WHERE table1.col = table2.col(+) and table2.col >= table3.col(+)", "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col LEFT JOIN table3 ON table2.col >= table3.col", dialect, ) # 2 join marks on one side of predicate self.validate( eliminate_join_marks, "SELECT * FROM table1, table2 WHERE table1.col = table2.col1(+) + table2.col2(+)", "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col1 + table2.col2", dialect, ) # join mark and expression self.validate( eliminate_join_marks, "SELECT * FROM table1, table2 WHERE table1.col = table2.col1(+) + 25", "SELECT * FROM table1 LEFT JOIN table2 ON table1.col = table2.col1 + 25", dialect, ) # eliminate join mark while preserving non-participating joins self.validate( eliminate_join_marks, "SELECT * FROM a, b, c WHERE a.id = b.id AND b.id(+) = c.id", "SELECT * FROM a LEFT JOIN b ON b.id = c.id CROSS JOIN c WHERE a.id = b.id", dialect, ) alias = "AS " if dialect != "oracle" else "" self.validate( eliminate_join_marks, "SELECT table1.id, table2.cloumn1, table3.id FROM table1, table2, (SELECT tableInner1.id FROM tableInner1, tableInner2 WHERE tableInner1.id = tableInner2.id(+)) AS table3 WHERE table1.id = table2.id(+) and table1.id = table3.id(+)", f"SELECT table1.id, table2.cloumn1, table3.id FROM table1 LEFT JOIN table2 ON table1.id = table2.id LEFT JOIN (SELECT tableInner1.id FROM tableInner1 LEFT JOIN tableInner2 ON tableInner1.id = tableInner2.id) {alias}table3 ON table1.id = table3.id", dialect, ) # if multiple conditions, we check that after transformations the tree remains consistent s = "select a.id from a, b where a.id = b.id (+) AND b.d (+) = const" tree = eliminate_join_marks(parse_one(s, dialect=dialect)) assert all(type(t.parent_select) is exp.Select for t in tree.find_all(exp.Table)) assert ( tree.sql(dialect=dialect) == "SELECT a.id FROM a LEFT JOIN b ON a.id = b.id AND b.d = const" ) # validate parens self.validate( eliminate_join_marks, "select t1.a, t2.b from t1, t2 where (1 = 1) and (t1.id = t2.id1 (+))", "SELECT t1.a, t2.b FROM t1 LEFT JOIN t2 ON t1.id = t2.id1 WHERE (1 = 1)", dialect, ) # validate a CASE self.validate( eliminate_join_marks, "select t1.a, t2.b from t1, t2 where t1.id = case when t2.id (+) = 'n/a' then null else t2.id (+) end", "SELECT t1.a, t2.b FROM t1 LEFT JOIN t2 ON t1.id = CASE WHEN t2.id = 'n/a' THEN NULL ELSE t2.id END", dialect, ) # validate OR self.validate( eliminate_join_marks, "select t1.a, t2.b from t1, t2 where t1.id = t2.id1 (+) or t1.id = t2.id2 (+)", "SELECT t1.a, t2.b FROM t1 LEFT JOIN t2 ON t1.id = t2.id1 OR t1.id = t2.id2", dialect, ) # validate knockout script = """ SELECT c.customer_name, (SELECT MAX(o.order_date) FROM orders o WHERE o.customer_id(+) = c.customer_id) AS latest_order_date FROM customers c """ self.assertRaises( AssertionError, eliminate_join_marks, parse_one(script, dialect=dialect) ) def test_eliminate_window_clause(self): self.validate( eliminate_window_clause, "SELECT purchases, LAST_VALUE(item) OVER (d) AS most_popular FROM Produce WINDOW a AS (PARTITION BY purchases), b AS (a ORDER BY purchases), c AS (b ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING), d AS (c)", "SELECT purchases, LAST_VALUE(item) OVER (PARTITION BY purchases ORDER BY purchases ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS most_popular FROM Produce", ) self.validate( eliminate_window_clause, "SELECT LAST_VALUE(c) OVER (a) AS c2 FROM (SELECT LAST_VALUE(i) OVER (a) AS c FROM p WINDOW a AS (PARTITION BY x)) AS q(c) WINDOW a AS (PARTITION BY y)", "SELECT LAST_VALUE(c) OVER (PARTITION BY y) AS c2 FROM (SELECT LAST_VALUE(i) OVER (PARTITION BY x) AS c FROM p) AS q(c)", ) def test_inherit_struct_field_names(self): def _parse_and_set_struct_name_inheritance(sql: str) -> exp.Expr: ast = exp.maybe_parse(sql) for array in ast.find_all(exp.Array): array.set("struct_name_inheritance", True) return ast # Basic case: field names inherited from first struct self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92), STRUCT('Diana', 95))" ), "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score), STRUCT('Diana' AS name, 95 AS score))", ) # Single struct in array: no inheritance needed self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score))" ), "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score))", ) # Empty array: no change self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance("SELECT ARRAY()"), "SELECT ARRAY()", ) # First struct has no field names: no inheritance self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice', 85), STRUCT('Bob', 92))" ), "SELECT ARRAY(STRUCT('Alice', 85), STRUCT('Bob', 92))", ) # Mismatched field counts: skip inheritance self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob'))" ), "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob'))", ) # Struct already has field names: don't override self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS fullname, 92 AS points))" ), "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS fullname, 92 AS points))", ) # Mixed: some structs inherit, some already have names self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92), STRUCT('Carol' AS name, 88 AS score), STRUCT('Diana', 95))" ), "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score), STRUCT('Carol' AS name, 88 AS score), STRUCT('Diana' AS name, 95 AS score))", ) # Non-struct elements: no change self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance("SELECT ARRAY(1, 2, 3)"), "SELECT ARRAY(1, 2, 3)", ) # Multiple arrays: each processed independently self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92)), ARRAY(STRUCT('X' AS col), STRUCT('Y'))" ), "SELECT ARRAY(STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score)), ARRAY(STRUCT('X' AS col), STRUCT('Y' AS col))", ) # Partial field names in first struct: inherit only the named ones self.validate( inherit_struct_field_names, _parse_and_set_struct_name_inheritance( "SELECT ARRAY(STRUCT('Alice' AS name, 85), STRUCT('Bob', 92))" ), "SELECT ARRAY(STRUCT('Alice' AS name, 85), STRUCT('Bob', 92))", ) ================================================ FILE: tests/test_transpile.py ================================================ import os import unittest from unittest import mock from sqlglot import parse_one, transpile from sqlglot.errors import ErrorLevel, ParseError, UnsupportedError from sqlglot.helper import logger as helper_logger from sqlglot.parser import logger as parser_logger from tests.helpers import ( assert_logger_contains, load_sql_fixture_pairs, load_sql_fixtures, ) class TestTranspile(unittest.TestCase): file_dir = os.path.dirname(__file__) fixtures_dir = os.path.join(file_dir, "fixtures") maxDiff = None def validate(self, sql, target, **kwargs): self.assertEqual(transpile(sql, **kwargs)[0], target) def test_weird_chars(self): self.assertEqual(transpile("0Êß")[0], "0 AS Êß") self.assertEqual( # Ideographic space after SELECT (\u3000) transpile("SELECT * FROM t WHERE c = 1")[0], "SELECT * FROM t WHERE c = 1", ) def test_alias(self): self.assertEqual(transpile("SELECT SUM(y) KEEP")[0], "SELECT SUM(y) AS KEEP") self.assertEqual(transpile("SELECT 1 overwrite")[0], "SELECT 1 AS overwrite") self.assertEqual(transpile("SELECT 1 is")[0], "SELECT 1 AS is") self.assertEqual(transpile("SELECT 1 current_time")[0], "SELECT 1 AS current_time") self.assertEqual( transpile("SELECT 1 current_timestamp")[0], "SELECT 1 AS current_timestamp" ) self.assertEqual(transpile("SELECT 1 current_date")[0], "SELECT 1 AS current_date") self.assertEqual(transpile("SELECT 1 current_datetime")[0], "SELECT 1 AS current_datetime") self.assertEqual(transpile("SELECT 1 row")[0], "SELECT 1 AS row") self.assertEqual( transpile("SELECT 1 FROM a.b.table1 t UNPIVOT((c3) FOR c4 IN (a, b))")[0], "SELECT 1 FROM a.b.table1 AS t UNPIVOT((c3) FOR c4 IN (a, b))", ) for key in ("union", "from", "join"): with self.subTest(f"alias {key}"): self.validate(f"SELECT x AS {key}", f"SELECT x AS {key}") self.validate(f'SELECT x "{key}"', f'SELECT x AS "{key}"') with self.assertRaises(ParseError): self.validate(f"SELECT x {key}", "") def test_unary(self): self.validate("+++1", "1") self.validate("+-1", "-1") self.validate("+- - -1", "- - -1") def test_paren(self): with self.assertRaises(ParseError): transpile("1 + (2 + 3") transpile("select f(") def test_some(self): self.validate( "SELECT * FROM x WHERE a = SOME (SELECT 1)", "SELECT * FROM x WHERE a = ANY(SELECT 1)", ) def test_leading_comma(self): self.validate( "SELECT a, b, c FROM (SELECT a, b, c FROM t)", "SELECT\n" " a\n" " , b\n" " , c\n" "FROM (\n" " SELECT\n" " a\n" " , b\n" " , c\n" " FROM t\n" ")", leading_comma=True, pretty=True, pad=4, indent=4, ) self.validate( "SELECT FOO, BAR, BAZ", "SELECT\n FOO\n , BAR\n , BAZ", leading_comma=True, pretty=True, ) self.validate( "SELECT FOO, /*x*/\nBAR, /*y*/\nBAZ", "SELECT\n FOO /* x */\n , BAR /* y */\n , BAZ", leading_comma=True, pretty=True, ) # without pretty, this should be a no-op self.validate( "SELECT FOO, BAR, BAZ", "SELECT FOO, BAR, BAZ", leading_comma=True, ) def test_space(self): self.validate("SELECT MIN(3)>MIN(2)", "SELECT MIN(3) > MIN(2)") self.validate("SELECT MIN(3)>=MIN(2)", "SELECT MIN(3) >= MIN(2)") self.validate("SELECT 1>0", "SELECT 1 > 0") self.validate("SELECT 3>=3", "SELECT 3 >= 3") self.validate("SELECT a\r\nFROM b", "SELECT a FROM b") def test_comments(self): self.validate( "select /* asfd /* asdf */ asdf */ 1", "/* asfd / * asdf * / asdf */ SELECT 1", ) self.validate( "SELECT c /* foo */ AS alias", "SELECT c AS alias /* foo */", ) self.validate( "SELECT c AS /* foo */ (a, b, c) FROM t", "SELECT c AS (a, b, c) /* foo */ FROM t", ) self.validate( "SELECT * FROM t1\n/*x*/\nUNION ALL SELECT * FROM t2", "SELECT * FROM t1 /* x */ UNION ALL SELECT * FROM t2", ) self.validate( "/* comment */ SELECT * FROM a UNION SELECT * FROM b", "/* comment */ SELECT * FROM a UNION SELECT * FROM b", ) self.validate( "SELECT * FROM t1\n/*x*/\nINTERSECT ALL SELECT * FROM t2", "SELECT * FROM t1 /* x */ INTERSECT ALL SELECT * FROM t2", ) self.validate( "SELECT\n foo\n/* comments */\n;", "SELECT foo /* comments */", ) self.validate( "SELECT * FROM a INNER /* comments */ JOIN b", "SELECT * FROM a /* comments */ INNER JOIN b", ) self.validate( "SELECT * FROM a LEFT /* comment 1 */ OUTER /* comment 2 */ JOIN b", "SELECT * FROM a /* comment 1 */ /* comment 2 */ LEFT OUTER JOIN b", ) self.validate( "SELECT CASE /* test */ WHEN a THEN b ELSE c END", "SELECT CASE WHEN a THEN b ELSE c END /* test */", ) self.validate("SELECT 1 /*/2 */", "SELECT 1 /* /2 */") self.validate("SELECT */*comment*/", "SELECT * /* comment */") self.validate( "SELECT * FROM table /*comment 1*/ /*comment 2*/", "SELECT * FROM table /* comment 1 */ /* comment 2 */", ) self.validate("SELECT 1 FROM foo -- comment", "SELECT 1 FROM foo /* comment */") self.validate( "SELECT * FROM\n/* comment */\ndb.schema1.tbl PIVOT (SUM(a) FOR b IN ('x', 'y'))", "SELECT * FROM db.schema1.tbl PIVOT(SUM(a) FOR b IN ('x', 'y')) /* comment */", ) self.validate("SELECT --+5\nx FROM foo", "/* +5 */ SELECT x FROM foo") self.validate("SELECT --!5\nx FROM foo", "/* !5 */ SELECT x FROM foo") self.validate( "SELECT 1 /* inline */ FROM foo -- comment", "SELECT 1 /* inline */ FROM foo /* comment */", ) self.validate( "SELECT FUN(x) /*x*/, [1,2,3] /*y*/", "SELECT FUN(x) /* x */, ARRAY(1, 2, 3) /* y */" ) self.validate( """ SELECT 1 -- comment FROM foo -- comment """, "SELECT 1 /* comment */ FROM foo /* comment */", ) self.validate( """ SELECT 1 /* big comment like this */ FROM foo -- comment """, """SELECT 1 /* big comment like this */ FROM foo /* comment */""", ) self.validate( "select x from foo -- x", "SELECT x FROM foo /* x */", ) self.validate( """select x, -- from foo""", "SELECT x FROM foo", ) self.validate( """ -- comment 1 -- comment 2 -- comment 3 SELECT * FROM foo """, "/* comment 1 */ /* comment 2 */ /* comment 3 */ SELECT * FROM foo", ) self.validate( """ -- comment 1 -- comment 2 -- comment 3 SELECT * FROM foo""", """/* comment 1 */ /* comment 2 */ /* comment 3 */ SELECT * FROM foo""", pretty=True, ) self.validate( """ SELECT * FROM tbl /*line1 line2 line3*/ /*another comment*/ where 1=1 -- comment at the end""", """SELECT * FROM tbl /* line1 line2 line3 */ /* another comment */ WHERE 1 = 1 /* comment at the end */""", ) self.validate( """ SELECT * FROM tbl /*line1 line2 line3*/ /*another comment*/ where 1=1 -- comment at the end""", """SELECT * FROM tbl /* line1 line2 line3 */ /* another comment */ WHERE 1 = 1 /* comment at the end */""", pretty=True, ) self.validate( """ /* multi line comment */ SELECT tbl.cola /* comment 1 */ + tbl.colb /* comment 2 */, CAST(x AS CHAR), # comment 3 y -- comment 4 FROM bar /* comment 5 */, tbl # comment 6 """, """/* multi line comment */ SELECT tbl.cola /* comment 1 */ + tbl.colb /* comment 2 */, CAST(x AS CHAR), /* comment 3 */ y /* comment 4 */ FROM bar /* comment 5 */, tbl /* comment 6 */""", read="mysql", pretty=True, ) self.validate( """ SELECT a FROM b WHERE foo -- comment 1 AND bar -- comment 2 AND bla -- comment 3 LIMIT 10 ; """, "SELECT a FROM b WHERE foo AND /* comment 1 */ bar AND /* comment 2 */ bla LIMIT 10 /* comment 3 */", ) self.validate( """ SELECT a FROM b WHERE foo -- comment 1 """, "SELECT a FROM b WHERE foo /* comment 1 */", ) self.validate( """ select a -- from from b -- where where foo -- comment 1 and bar -- comment 2 and bla """, """SELECT a /* from */ FROM b /* where */ WHERE foo AND /* comment 1 */ bar AND /* comment 2 */ bla""", pretty=True, ) self.validate( """ -- test WITH v AS ( SELECT 1 AS literal ) SELECT * FROM v """, """/* test */ WITH v AS ( SELECT 1 AS literal ) SELECT * FROM v""", pretty=True, ) self.validate( "(/* 1 */ 1 ) /* 2 */", "(1) /* 1 */ /* 2 */", ) self.validate( "select * from t where not a in (23) /*test*/ and b in (14)", "SELECT * FROM t WHERE NOT a IN (23) /* test */ AND b IN (14)", ) self.validate( "select * from t where a in (23) /*test*/ and b in (14)", "SELECT * FROM t WHERE a IN (23) /* test */ AND b IN (14)", ) self.validate( "select * from t where ((condition = 1)/*test*/)", "SELECT * FROM t WHERE ((condition = 1) /* test */)", ) self.validate( "SELECT 1 // hi this is a comment", "SELECT 1 /* hi this is a comment */", read="snowflake", ) self.validate( "-- comment\nDROP TABLE IF EXISTS foo", "/* comment */ DROP TABLE IF EXISTS foo", ) self.validate( """ -- comment1 -- comment2 -- comment3 DROP TABLE IF EXISTS db.tba """, """/* comment1 */ /* comment2 */ /* comment3 */ DROP TABLE IF EXISTS db.tba""", pretty=True, ) self.validate( """ -- comment4 CREATE TABLE db.tba AS SELECT a, b, c FROM tb_01 WHERE -- comment5 a = 1 AND b = 2 --comment6 -- and c = 1 -- comment7 ; """, """/* comment4 */ CREATE TABLE db.tba AS SELECT a, b, c FROM tb_01 WHERE a /* comment5 */ = 1 AND b = 2 /* comment6 */ /* and c = 1 */ /* comment7 */""", pretty=True, ) self.validate( """ SELECT -- This is testing comments col, -- 2nd testing comments CASE WHEN a THEN b ELSE c END as d FROM t """, """SELECT col, /* This is testing comments */ CASE WHEN a THEN b ELSE c END AS d /* 2nd testing comments */ FROM t""", pretty=True, ) self.validate( """ SELECT * FROM a -- comments INNER JOIN b """, """SELECT * FROM a /* comments */ INNER JOIN b""", pretty=True, ) self.validate( "SELECT * FROM a LEFT /* comment 1 */ OUTER /* comment 2 */ JOIN b", """SELECT * FROM a /* comment 1 */ /* comment 2 */ LEFT OUTER JOIN b""", pretty=True, ) self.validate( "SELECT\n a /* sqlglot.meta case_sensitive */ -- noqa\nFROM tbl", """SELECT a /* sqlglot.meta case_sensitive */ /* noqa */ FROM tbl""", pretty=True, ) self.validate( """ SELECT 'hotel1' AS hotel, * FROM dw_1_dw_1_1.exactonline_1.transactionlines /* UNION ALL SELECT 'Thon Partner Hotel Jølster' AS hotel, name, date, CAST(identifier AS VARCHAR) AS identifier, value FROM d2o_889_oupjr_1348.public.accountvalues_forecast */ UNION ALL SELECT 'hotel2' AS hotel, * FROM dw_1_dw_1_1.exactonline_2.transactionlines""", """SELECT 'hotel1' AS hotel, * FROM dw_1_dw_1_1.exactonline_1.transactionlines /* UNION ALL SELECT 'Thon Partner Hotel Jølster' AS hotel, name, date, CAST(identifier AS VARCHAR) AS identifier, value FROM d2o_889_oupjr_1348.public.accountvalues_forecast */ UNION ALL SELECT 'hotel2' AS hotel, * FROM dw_1_dw_1_1.exactonline_2.transactionlines""", pretty=True, ) self.validate( """/* The result of some calculations */ with base as ( select sum(sb.hep_amount) as hep_amount, -- I AM REMOVED sum(sb.hep_budget) /* Budget defined in sharepoint */ as blub , 1 as bla from gold.data_budget sb group by all ) select * from base """, """/* The result of some calculations */ WITH base AS ( SELECT SUM(sb.hep_amount) AS hep_amount, SUM(sb.hep_budget) /* I AM REMOVED */ AS blub, /* Budget defined in sharepoint */ 1 AS bla FROM gold.data_budget AS sb GROUP BY ALL ) SELECT * FROM base""", pretty=True, ) self.validate( """-- comment SOME_FUNC(arg IGNORE NULLS) OVER (PARTITION BY foo ORDER BY bla) AS col""", "SOME_FUNC(arg IGNORE NULLS) OVER (PARTITION BY foo ORDER BY bla) AS col /* comment */", pretty=True, ) self.validate( """ SELECT * FROM x INNER JOIN y -- inner join z LEFT JOIN z using (id) using (id) """, """SELECT * FROM x INNER JOIN y /* inner join z */ LEFT JOIN z USING (id) USING (id)""", pretty=True, ) self.validate( """with x as ( SELECT * /* NOTE: LEFT JOIN because blah blah blah */ FROM a ) select * from x""", """WITH x AS ( SELECT * /* NOTE: LEFT JOIN because blah blah blah */ FROM a ) SELECT * FROM x""", pretty=True, ) self.validate( """SELECT X FROM catalog.db.table WHERE Y -- AND Z""", """SELECT X FROM catalog.db.table WHERE Y AND Z""", ) self.validate( """with a as /* comment */ ( select * from b) select * from a""", """WITH a /* comment */ AS (SELECT * FROM b) SELECT * FROM a""", ) self.validate( """ -- comment at the top WITH -- comment for tbl1 tbl1 AS (SELECT 1) -- comment for tbl2 , tbl2 AS (SELECT 2) -- comment for tbl3 , tbl3 AS (SELECT 3) -- comment for final select SELECT * FROM tbl1""", """/* comment at the top */ WITH tbl1 /* comment for tbl1 */ AS ( SELECT 1 ), tbl2 /* comment for tbl2 */ AS ( SELECT 2 ), tbl3 /* comment for tbl3 */ AS ( SELECT 3 ) /* comment for final select */ SELECT * FROM tbl1""", pretty=True, ) self.validate( """ WITH x /* a */ AS ( SELECT 2 AS n /* b */ FROM (/* c */ SELECT /* c2 */ a /* d */ FROM t) AS x ) SELECT * FROM x /* e */ WHERE n >= (/* f */ SELECT MAX(x) FROM t) ORDER BY n /* g */ -- h """, """WITH x /* a */ AS ( SELECT 2 AS n /* b */ FROM ( /* c */ /* c2 */ SELECT a /* d */ FROM t ) AS x ) SELECT * FROM x /* e */ WHERE n >= ( SELECT MAX(x) FROM t ) /* f */ ORDER BY n /* g */ /* h */""", pretty=True, ) def test_comment_single_line_with_block_close(self): # Single-line comments containing */ must be escaped when converted to block comments, # otherwise the */ prematurely closes the block comment and turns comment text into SQL. self.validate( "-- aa */ SELECT * FROM secret_table --\nSELECT 1", "/* aa * / SELECT * FROM secret_table -- */ SELECT 1", ) self.validate( "-- comment */ DROP TABLE users --\nSELECT 1", "/* comment * / DROP TABLE users -- */ SELECT 1", ) # Nested block comments have their inner markers escaped to prevent misparse on re-emit self.validate( "SELECT c /* c1 /* c2 */ c3 */", "SELECT c /* c1 / * c2 * / c3 */", ) self.validate( "SELECT c /* c1 /* c2 /* c3 */ */ */", "SELECT c /* c1 / * c2 / * c3 * / * / */", ) def test_types(self): self.validate("INT 1", "CAST(1 AS INT)") self.validate("VARCHAR 'x' y", "CAST('x' AS VARCHAR) AS y") self.validate("STRING 'x' y", "CAST('x' AS TEXT) AS y") self.validate("x::INT", "CAST(x AS INT)") self.validate("x::INTEGER", "CAST(x AS INT)") self.validate("x::INT y", "CAST(x AS INT) AS y") self.validate("x::INT AS y", "CAST(x AS INT) AS y") self.validate("x::INT::BOOLEAN", "CAST(CAST(x AS INT) AS BOOLEAN)") self.validate("interval::int", "CAST(interval AS INT)") self.validate("x::user_defined_type", "CAST(x AS user_defined_type)") self.validate("CAST(x::INT AS BOOLEAN)", "CAST(CAST(x AS INT) AS BOOLEAN)") self.validate("CAST(x AS INT)::BOOLEAN", "CAST(CAST(x AS INT) AS BOOLEAN)") with self.assertRaises(ParseError): transpile("x::z", read="clickhouse") def test_not_range(self): self.validate("a NOT LIKE b", "NOT a LIKE b") self.validate("a NOT BETWEEN b AND c", "NOT a BETWEEN b AND c") self.validate("a NOT IN (1, 2)", "NOT a IN (1, 2)") self.validate("a IS NOT NULL", "NOT a IS NULL") self.validate("a LIKE TEXT 'y'", "a LIKE CAST('y' AS TEXT)") def test_extract(self): self.validate( "EXTRACT(day FROM '2020-01-01'::TIMESTAMP)", "EXTRACT(DAY FROM CAST('2020-01-01' AS TIMESTAMP))", ) self.validate( "EXTRACT(timezone FROM '2020-01-01'::TIMESTAMP)", "EXTRACT(TIMEZONE FROM CAST('2020-01-01' AS TIMESTAMP))", ) self.validate( "EXTRACT(year FROM '2020-01-01'::TIMESTAMP WITH TIME ZONE)", "EXTRACT(YEAR FROM CAST('2020-01-01' AS TIMESTAMPTZ))", ) self.validate( "extract(month from '2021-01-31'::timestamp without time zone)", "EXTRACT(MONTH FROM CAST('2021-01-31' AS TIMESTAMP))", ) self.validate("extract(week from current_date + 2)", "EXTRACT(WEEK FROM CURRENT_DATE + 2)") self.validate( "EXTRACT(minute FROM datetime1 - datetime2)", "EXTRACT(MINUTE FROM datetime1 - datetime2)", ) def test_if(self): self.validate( "SELECT IF(a > 1, 1, 0) FROM foo", "SELECT CASE WHEN a > 1 THEN 1 ELSE 0 END FROM foo", ) self.validate( "SELECT IF a > 1 THEN b END", "SELECT CASE WHEN a > 1 THEN b END", ) self.validate( "SELECT IF a > 1 THEN b ELSE c END", "SELECT CASE WHEN a > 1 THEN b ELSE c END", ) self.validate("SELECT IF(a > 1, 1) FROM foo", "SELECT CASE WHEN a > 1 THEN 1 END FROM foo") def test_with(self): self.validate( "WITH a AS (SELECT 1) WITH b AS (SELECT 2) SELECT *", "WITH a AS (SELECT 1), b AS (SELECT 2) SELECT *", ) self.validate( "WITH a AS (SELECT 1), WITH b AS (SELECT 2) SELECT *", "WITH a AS (SELECT 1), b AS (SELECT 2) SELECT *", ) self.validate( "WITH A(filter) AS (VALUES 1, 2, 3) SELECT * FROM A WHERE filter >= 2", "WITH A(filter) AS (SELECT * FROM (VALUES (1), (2), (3)) AS _values) SELECT * FROM A WHERE filter >= 2", read="presto", ) self.validate( "SELECT BOOL_OR(a > 10) FROM (VALUES 1, 2, 15) AS T(a)", "SELECT BOOL_OR(a > 10) FROM (VALUES (1), (2), (15)) AS T(a)", read="presto", ) def test_alter(self): self.validate( "ALTER TABLE integers ADD k INTEGER", "ALTER TABLE integers ADD COLUMN k INT", ) self.validate( "ALTER TABLE integers ALTER i TYPE VARCHAR", "ALTER TABLE integers ALTER COLUMN i SET DATA TYPE VARCHAR", ) self.validate( "ALTER TABLE integers ALTER i TYPE VARCHAR COLLATE foo USING bar", "ALTER TABLE integers ALTER COLUMN i SET DATA TYPE VARCHAR COLLATE foo USING bar", ) def test_time(self): self.validate("INTERVAL '1 day'", "INTERVAL '1' DAY") self.validate("INTERVAL '1 days' * 5", "INTERVAL '1' DAYS * 5") self.validate("5 * INTERVAL '1 day'", "5 * INTERVAL '1' DAY") self.validate("INTERVAL 1 day", "INTERVAL '1' DAY") self.validate("INTERVAL 2 months", "INTERVAL '2' MONTHS") self.validate("TIMESTAMP '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMP)") self.validate("TIMESTAMP WITH TIME ZONE '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMPTZ)") self.validate( "TIMESTAMP(9) WITH TIME ZONE '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMPTZ(9))", ) self.validate( "TIMESTAMP WITHOUT TIME ZONE '2020-01-01'", "CAST('2020-01-01' AS TIMESTAMP)", ) self.validate("'2020-01-01'::TIMESTAMP", "CAST('2020-01-01' AS TIMESTAMP)") self.validate( "'2020-01-01'::TIMESTAMP WITHOUT TIME ZONE", "CAST('2020-01-01' AS TIMESTAMP)", ) self.validate( "'2020-01-01'::TIMESTAMP WITH TIME ZONE", "CAST('2020-01-01' AS TIMESTAMPTZ)", ) self.validate( "timestamp with time zone '2025-11-20 00:00:00+00' AT TIME ZONE 'Africa/Cairo'", "CAST('2025-11-20 00:00:00+00' AS TIMESTAMPTZ) AT TIME ZONE 'Africa/Cairo'", ) self.validate("DATE '2020-01-01'", "CAST('2020-01-01' AS DATE)") self.validate("'2020-01-01'::DATE", "CAST('2020-01-01' AS DATE)") self.validate("STR_TO_TIME('x', 'y')", "STRPTIME('x', 'y')", write="duckdb") self.validate("STR_TO_UNIX('x', 'y')", "EPOCH(STRPTIME('x', 'y'))", write="duckdb") self.validate("TIME_TO_STR(x, 'y')", "STRFTIME(x, 'y')", write="duckdb") self.validate("TIME_TO_UNIX(x)", "EPOCH(x)", write="duckdb") self.validate( "UNIX_TO_STR(123, 'y')", "STRFTIME(TO_TIMESTAMP(123), 'y')", write="duckdb", ) self.validate( "UNIX_TO_TIME(123)", "TO_TIMESTAMP(123)", write="duckdb", ) self.validate( "STR_TO_TIME(x, 'y')", "CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, 'y')) AS TIMESTAMP)", write="hive", ) self.validate( "STR_TO_TIME(x, 'yyyy-MM-dd HH:mm:ss')", "CAST(x AS TIMESTAMP)", write="hive", ) self.validate( "STR_TO_TIME(x, 'yyyy-MM-dd')", "CAST(x AS TIMESTAMP)", write="hive", ) self.validate( "STR_TO_UNIX('x', 'y')", "UNIX_TIMESTAMP('x', 'y')", write="hive", ) self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="hive") self.validate("TIME_STR_TO_TIME(x)", "TIME_STR_TO_TIME(x)", write=None) self.validate( "TIME_STR_TO_TIME(x, 'America/Los_Angeles')", "TIME_STR_TO_TIME(x, 'America/Los_Angeles')", write=None, ) self.validate("TIME_STR_TO_UNIX(x)", "TIME_STR_TO_UNIX(x)", write=None) self.validate("TIME_TO_TIME_STR(x)", "CAST(x AS TEXT)", write=None) self.validate("TIME_TO_STR(x, 'y')", "TIME_TO_STR(x, 'y')", write=None) self.validate("TIME_TO_UNIX(x)", "TIME_TO_UNIX(x)", write=None) self.validate("UNIX_TO_STR(x, 'y')", "UNIX_TO_STR(x, 'y')", write=None) self.validate("UNIX_TO_TIME(x)", "UNIX_TO_TIME(x)", write=None) self.validate("UNIX_TO_TIME_STR(x)", "UNIX_TO_TIME_STR(x)", write=None) self.validate("TIME_STR_TO_DATE(x)", "TIME_STR_TO_DATE(x)", write=None) self.validate("TIME_STR_TO_DATE(x)", "TO_DATE(x)", write="hive") self.validate("UNIX_TO_STR(x, 'yyyy-MM-dd HH:mm:ss')", "FROM_UNIXTIME(x)", write="hive") self.validate("STR_TO_UNIX(x, 'yyyy-MM-dd HH:mm:ss')", "UNIX_TIMESTAMP(x)", write="hive") self.validate("IF(x > 1, x + 1)", "IF(x > 1, x + 1)", write="presto") self.validate("IF(x > 1, 1 + 1)", "IF(x > 1, 1 + 1)", write="hive") self.validate("IF(x > 1, 1, 0)", "IF(x > 1, 1, 0)", write="hive") self.validate( "TIME_TO_UNIX(x)", "UNIX_TIMESTAMP(x)", write="hive", ) self.validate("UNIX_TO_STR(123, 'y')", "FROM_UNIXTIME(123, 'y')", write="hive") self.validate( "UNIX_TO_TIME(123)", "FROM_UNIXTIME(123)", write="hive", ) self.validate("STR_TO_TIME('x', 'y')", "DATE_PARSE('x', 'y')", write="presto") self.validate( "STR_TO_UNIX('x', 'y')", "TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST('x' AS VARCHAR), 'y')), PARSE_DATETIME(DATE_FORMAT(CAST('x' AS TIMESTAMP), 'y'), 'y')))", write="presto", ) self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="presto") self.validate("TIME_TO_UNIX(x)", "TO_UNIXTIME(x)", write="presto") self.validate( "UNIX_TO_STR(123, 'y')", "DATE_FORMAT(FROM_UNIXTIME(123), 'y')", write="presto", ) self.validate("UNIX_TO_TIME(123)", "FROM_UNIXTIME(123)", write="presto") self.validate("STR_TO_TIME('x', 'y')", "TO_TIMESTAMP('x', 'y')", write="spark") self.validate("STR_TO_UNIX('x', 'y')", "UNIX_TIMESTAMP('x', 'y')", write="spark") self.validate("TIME_TO_STR(x, 'y')", "DATE_FORMAT(x, 'y')", write="spark") self.validate( "TIME_TO_UNIX(x)", "UNIX_TIMESTAMP(x)", write="spark", ) self.validate("UNIX_TO_STR(123, 'y')", "FROM_UNIXTIME(123, 'y')", write="spark") self.validate( "UNIX_TO_TIME(123)", "CAST(FROM_UNIXTIME(123) AS TIMESTAMP)", write="spark", ) self.validate( "CREATE TEMPORARY TABLE test AS SELECT 1", "CREATE TEMPORARY VIEW test AS SELECT 1", write="spark2", ) def test_index_offset(self): with self.assertLogs(helper_logger) as cm: self.validate("x[0]", "x[1]", write="presto", identity=False) self.validate("x[1]", "x[0]", read="presto", identity=False) self.validate("x[x - 1]", "x[x - 1]", write="presto", identity=False) self.validate( "x[array_size(y) - 1]", "x[(CARDINALITY(y) - 1) + 1]", write="presto", identity=False, ) self.validate("x[3 - 1]", "x[3]", write="presto", identity=False) self.validate("MAP(a, b)[0]", "MAP(a, b)[0]", write="presto", identity=False) self.assertEqual( cm.output, [ "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (-1)", "INFO:sqlglot:Applying array index offset (1)", "INFO:sqlglot:Applying array index offset (1)", ], ) def test_identify_lambda(self): self.validate("x(y -> y)", 'X("y" -> "y")', identify=True) def test_identity(self): self.assertEqual(transpile("")[0], "") for sql in load_sql_fixtures("identity.sql"): with self.subTest(sql): self.assertEqual(transpile(sql)[0], sql.strip()) def test_command_identity(self): for sql in ( "ALTER AGGREGATE bla(foo) OWNER TO CURRENT_USER", "ALTER DOMAIN foo VALIDATE CONSTRAINT bla", "ALTER ROLE CURRENT_USER WITH REPLICATION", "ALTER RULE foo ON bla RENAME TO baz", "ALTER SEQUENCE IF EXISTS baz RESTART WITH boo", "ALTER TABLE integers DROP PRIMARY KEY", "ALTER TABLE table1 MODIFY COLUMN name1 SET TAG foo='bar'", "ALTER TABLE table1 RENAME COLUMN c1 AS c2", "ALTER TABLE table1 RENAME COLUMN c1 TO c2, c2 TO c3", "ALTER TABLE table1 RENAME COLUMN c1 c2", "ALTER TYPE electronic_mail RENAME TO email", "ALTER schema doo", "CALL catalog.system.iceberg_procedure_name(named_arg_1 => 'arg_1', named_arg_2 => 'arg_2')", "COMMENT ON ACCESS METHOD gin IS 'GIN index access method'", "CREATE OR REPLACE STAGE", "EXECUTE statement", "EXPLAIN SELECT * FROM x", "LOAD foo", "OPTIMIZE TABLE y", "PREPARE statement", "SET -v", "SET @user OFF", "SHOW TABLES", "VACUUM FREEZE my_table", ): with self.subTest(sql): with self.assertLogs(parser_logger) as cm: self.assertEqual(transpile(sql)[0], sql) assert f"'{sql[:100]}' contains unsupported syntax" in cm.output[0] def test_normalize_name(self): self.assertEqual( transpile("cardinality(x)", read="presto", write="presto", normalize_functions="lower")[ 0 ], "cardinality(x)", ) def test_partial(self): for sql in load_sql_fixtures("partial.sql"): with self.subTest(sql): self.assertEqual(transpile(sql, error_level=ErrorLevel.IGNORE)[0], sql.strip()) def test_pretty(self): for _, sql, pretty in load_sql_fixture_pairs("pretty.sql"): with self.subTest(sql[:100]): generated = transpile(sql, pretty=True)[0] self.assertEqual(generated, pretty) self.assertEqual(parse_one(sql), parse_one(pretty)) def test_pretty_line_breaks(self): self.assertEqual(transpile("SELECT '1\n2'", pretty=True)[0], "SELECT\n '1\n2'") self.assertEqual( transpile("SELECT '1\n2'", pretty=True, unsupported_level=ErrorLevel.IGNORE)[0], "SELECT\n '1\n2'", ) def test_sql_security(self): sqlglot_sql = "CREATE VIEW v SQL SECURITY INVOKER AS SELECT 1" for dialect, sql in [ ("clickhouse", "CREATE VIEW v SQL SECURITY INVOKER AS SELECT 1"), ("trino", "CREATE VIEW v SECURITY INVOKER AS SELECT 1"), ("presto", "CREATE VIEW v SECURITY INVOKER AS SELECT 1"), ("starrocks", "CREATE VIEW v SECURITY INVOKER AS SELECT 1"), ("mysql", "CREATE SQL SECURITY INVOKER VIEW v AS SELECT 1"), ]: with self.subTest(dialect): self.validate(sql, read=dialect, identity=False, target=sqlglot_sql) self.validate(sql, write=dialect, identity=False, target=sql) @mock.patch("sqlglot.parser.logger") def test_error_level(self, logger): invalid = "x + 1. (" expected_messages = [ "Required keyword: 'expressions' missing for . Line 1, Col: 8.\n x + 1. \033[4m(\033[0m", "Expecting ). Line 1, Col: 8.\n x + 1. \033[4m(\033[0m", ] expected_errors = [ { "description": "Required keyword: 'expressions' missing for ", "line": 1, "col": 8, "start_context": "x + 1. ", "highlight": "(", "end_context": "", "into_expression": None, }, { "description": "Expecting )", "line": 1, "col": 8, "start_context": "x + 1. ", "highlight": "(", "end_context": "", "into_expression": None, }, ] transpile(invalid, error_level=ErrorLevel.WARN) for error in expected_messages: assert_logger_contains(error, logger) with self.assertRaises(ParseError) as ctx: transpile(invalid, error_level=ErrorLevel.IMMEDIATE) self.assertEqual(str(ctx.exception), expected_messages[0]) self.assertEqual(ctx.exception.errors[0], expected_errors[0]) with self.assertRaises(ParseError) as ctx: transpile(invalid, error_level=ErrorLevel.RAISE) self.assertEqual(str(ctx.exception), "\n\n".join(expected_messages)) self.assertEqual(ctx.exception.errors, expected_errors) more_than_max_errors = "((((" expected_messages = ( "Required keyword: 'this' missing for . Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n" "Expecting ). Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n" "Expecting ). Line 1, Col: 4.\n (((\033[4m(\033[0m\n\n" "... and 2 more" ) expected_errors = [ { "description": "Required keyword: 'this' missing for ", "line": 1, "col": 4, "start_context": "(((", "highlight": "(", "end_context": "", "into_expression": None, }, { "description": "Expecting )", "line": 1, "col": 4, "start_context": "(((", "highlight": "(", "end_context": "", "into_expression": None, }, ] # Also expect three trailing structured errors that match the first expected_errors += [expected_errors[1]] * 3 with self.assertRaises(ParseError) as ctx: transpile(more_than_max_errors, error_level=ErrorLevel.RAISE) self.assertEqual(str(ctx.exception), expected_messages) self.assertEqual(ctx.exception.errors, expected_errors) @mock.patch("sqlglot.generator.logger") def test_unsupported_level(self, logger): def unsupported(level): transpile( "SELECT MAP(a, b), MAP(a, b), MAP(a, b), MAP(a, b)", read="presto", write="hive", unsupported_level=level, ) error = "Cannot convert array columns into map." unsupported(ErrorLevel.WARN) assert_logger_contains("\n".join([error] * 4), logger, level="warning") with self.assertRaises(UnsupportedError) as ctx: unsupported(ErrorLevel.RAISE) self.assertEqual(str(ctx.exception).count(error), 3) with self.assertRaises(UnsupportedError) as ctx: unsupported(ErrorLevel.IMMEDIATE) self.assertEqual(str(ctx.exception).count(error), 1) def test_recursion(self): sql = "1 AND 2 OR 3 AND " * 1000 sql += "4" self.assertEqual(len(parse_one(sql).sql()), 17001)