Repository: dbcli/athenacli Branch: main Commit: b1b18c4f1652 Files: 61 Total size: 169.2 KB Directory structure: gitextract_6pfg9wew/ ├── .github/ │ └── pull_request_template.md ├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── Dockerfile ├── LICENSE.txt ├── README.md ├── athenacli/ │ ├── __init__.py │ ├── athenaclirc │ ├── clibuffer.py │ ├── clistyle.py │ ├── clitoolbar.py │ ├── compat.py │ ├── completer.py │ ├── completion_refresher.py │ ├── config.py │ ├── key_bindings.py │ ├── lexer.py │ ├── main.py │ ├── packages/ │ │ ├── __init__.py │ │ ├── completion_engine.py │ │ ├── filepaths.py │ │ ├── format_utils.py │ │ ├── literals/ │ │ │ ├── __init__.py │ │ │ ├── literals.json │ │ │ └── main.py │ │ ├── parseutils.py │ │ ├── prompt_utils.py │ │ ├── special/ │ │ │ ├── __init__.py │ │ │ ├── dbcommands.py │ │ │ ├── favoritequeries.py │ │ │ ├── iocommands.py │ │ │ ├── main.py │ │ │ └── utils.py │ │ └── tabular_output/ │ │ ├── __init__.py │ │ └── sql_format.py │ ├── sqlexecute.py │ └── style.py ├── changelog.md ├── docs/ │ ├── Makefile │ ├── awsconfig.rst │ ├── conf.py │ ├── develop.rst │ ├── faq.rst │ ├── features.rst │ ├── index.rst │ ├── install.rst │ └── usage.rst ├── examples/ │ └── create_table.sql ├── release.py ├── requirements-dev.txt ├── setup.cfg ├── setup.py ├── test/ │ ├── test_completion_engine.py │ ├── test_completion_refresher.py │ ├── test_dbspecial.py │ ├── test_format_utils.py │ ├── test_naive_completion.py │ ├── test_parseutils.py │ └── test_prompt_utils.py └── tox.ini ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/pull_request_template.md ================================================ ## Description ## Checklist - [ ] I've added this contribution to the `changelog.md`. - [ ] I've added my name to the `AUTHORS` file (or it's already there). ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .cache nosetests.xml coverage.xml .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ # vscode .vscode/ .venv/ .python-version # Jetbrains Editors .idea/ ================================================ FILE: .travis.yml ================================================ language: python python: - "3.6" - "3.7" - "3.8" install: - pip install -r requirements-dev.txt - pip install -e . script: - pytest test notifications: webhooks: urls: - YOUR_WEBHOOK_URL on_success: change # options: [always|never|change] default: always on_failure: always # options: [always|never|change] default: always ================================================ FILE: AUTHORS.rst ================================================ Project Lead: ------------- * Zhaolong Zhu Core Developers: ---------------- * Amjith Ramanujam * Irina Truong * Zhaolong Zhu * Jash Gala Contributors: ------------- * Joe Block * Jash Gala * Hourann * Paul Gross * Aaron Brager * Patrick Park * Jan Katins * Rui Chen * Scott Morgan * Deepu Mohan Puthrote * Toska Chin * Pete Sheridan * Anthony Ross * Li Ben Yuan * Alex Gaynor * Branch Vincent * Jacob Williams * Markus Tauchnitz * David Gayda Creator: -------- Zhaolong Zhu ================================================ FILE: Dockerfile ================================================ FROM python:3.7 RUN pip install athenacli RUN apt-get update && apt-get install -y vim RUN useradd -ms /bin/bash athena USER athena WORKDIR /home/athena CMD athenacli ================================================ FILE: LICENSE.txt ================================================ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the {organization} nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ [![Build Status](https://travis-ci.org/dbcli/athenacli.svg?branch=master)](https://travis-ci.org/dbcli/athenacli) [![PyPI](https://img.shields.io/pypi/v/athenacli.svg)](https://pypi.python.org/pypi/athenacli) [![Downloads](https://pepy.tech/badge/athenacli)](https://pepy.tech/project/athenacli) [![image](https://img.shields.io/pypi/l/athenacli.svg)](https://pypi.org/project/athenacli/) [![image](https://img.shields.io/pypi/pyversions/athenacli.svg)](https://pypi.org/project/athenacli/) [![Join the chat at https://gitter.im/dbcli/athenacli](https://badges.gitter.im/dbcli/athenacli.svg)](https://gitter.im/dbcli/athenacli?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) # Introduction AthenaCLI is a command line interface (CLI) for the [Athena](https://aws.amazon.com/athena/) service that can do auto-completion and syntax highlighting, and is a proud member of the dbcli community. ![](./docs/_static/gif/athenacli.gif) # Quick Start ## Install ### Install via `pip` If you already know how to install python packages, then you can simply do: ``` bash $ pip install athenacli ``` ### Install via `brew` [Homebrew](https://brew.sh/) users can install by: ```sh $ brew install athenacli ``` If you don't know how to install python packages, please check the [Install](./docs/install.rst) page for more options (e.g docker) ## Config A config file is automatically created at `~/.athenacli/athenaclirc` at first launch (run athenacli). See the file itself for a description of all available options. Below 4 variables are required. If you are a user of aws cli, you can refer to [awsconfig](./docs/awsconfig.rst) file to see how to reuse credentials configuration of aws cli. ``` text # AWS credentials aws_access_key_id = '' aws_secret_access_key = '' aws_session_token = '' region = '' # e.g us-west-2, us-east-1 # Amazon S3 staging directory where query results are stored. # NOTE: S3 should in the same region as specified above. # The format is 's3://' s3_staging_dir = '' # Name of athena workgroup that you want to use work_group = '' # e.g. primary ``` or you can also use environment variables: ``` bash $ export AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY_ID $ export AWS_SECRET_ACCESS_KEY=YOUR_SECRET_ACCESS_KEY $ export AWS_SESSION_TOKEN=YOUR_SESSION_TOKEN $ export AWS_DEFAULT_REGION=us-west-2 $ export AWS_ATHENA_S3_STAGING_DIR=s3://YOUR_S3_BUCKET/path/to/ $ export AWS_ATHENA_WORK_GROUP=YOUR_ATHENA_WORK_GROUP ``` ## Create a table ``` bash $ athenacli -e examples/create_table.sql ``` You can find `examples/create_table.sql` [here](./examples/create_table.sql). ## Run a query ``` bash $ athenacli -e 'select elb_name, request_ip from elb_logs LIMIT 10' ``` ## REPL ``` bash $ athenacli [] ``` # Features - Auto-completes as you type for SQL keywords as well as tables and columns in the database. - Syntax highlighting. - Smart-completion will suggest context-sensitive completion. - `SELECT * FROM ` will only show table names. - `SELECT * FROM users WHERE ` will only show column names. - Pretty prints tabular data and various table formats. - Some special commands. e.g. Favorite queries. - Alias support. Column completions will work even when table names are aliased. Please refer to the [Features](./docs/features.rst) page for the screenshots of above features. # Usages ```bash $ athenacli --help Usage: main.py [OPTIONS] [CATALOG.][DATABASE] A Athena terminal client with auto-completion and syntax highlighting. Examples: - athenacli - athenacli my_database Options: -e, --execute TEXT Execute a command (or a file) and quit. -r, --region TEXT AWS region. --aws-access-key-id TEXT AWS access key id. --aws-secret-access-key TEXT AWS secretaccess key. --aws-session-token TEXT AWS session token. --s3-staging-dir TEXT Amazon S3 staging directory where query results are stored. --work-group TEXT Amazon Athena workgroup in which query is run, default is primary --athenaclirc PATH Location of athenaclirc file. --help Show this message and exit. ``` Please go to the [Usages](https://athenacli.readthedocs.io/en/latest/usage.html) for detailed information on how to use AthenaCLI. # Contributions If you're interested in contributing to this project, first of all I would like to extend my heartfelt gratitude. I've written a small [doc](https://athenacli.readthedocs.io/en/latest/develop.html) to describe how to get this running in a development setup. Please feel free to reach out to me if you need help. My email: zhuzhaolong0 AT gmail com # FAQs Please refer to the [FAQs](https://athenacli.readthedocs.io/en/latest/faq.html) for other information, e.g. "How can I get support for athenacli?". # Credits A special thanks to [Amjith Ramanujam](https://github.com/amjith) for creating pgcli and mycli, which inspired me to create this AthenaCLI, and AthenaCLI is created based on a clone of mycli. Thanks to [Jonathan Slenders](https://github.com/jonathanslenders) for creating the [Python Prompt Toolkit](https://github.com/jonathanslenders/python-prompt-toolkit), which leads me to pgcli and mycli. It's a lot of fun playing with this library. Thanks to [PyAthena](https://github.com/laughingman7743/PyAthena) for a pure python adapter to Athena database. Last but not least, thanks my team and manager encourage me to work on this hobby project. # Similar projects - [satterly/athena-cli](https://github.com/satterly/athena-cli): Presto-like CLI tool for AWS Athena. - [pengwynn/athena-cli](https://github.com/pengwynn/athena-cli): CLI for Amazon Athena, powered by JRuby. ================================================ FILE: athenacli/__init__.py ================================================ __version__ = '1.7.0' ================================================ FILE: athenacli/athenaclirc ================================================ [aws_profile default] # If you are a user of aws cli, you might want to use some configurations of aws cli, # please refer to https://athenacli.readthedocs.io/en/latest/awsconfig.html for more infomation. aws_access_key_id = '' aws_secret_access_key = '' aws_session_token = '' region = '' # e.g us-west-2, us-east-1 # Add role_arn to assume a role required for Athena access role_arn = '' # e.g. arn:aws:iam::987654321000:role/MyAthenaAccessRole # Amazon S3 staging directory where query results are stored # NOTE: S3 should in the same region as specified above. # The format is 's3://' s3_staging_dir = '' # Name of athena workgroup that you want to use work_group = '' # e.g. primary [main] # log_file location. log_file = ~/.athenacli/app.log # history_file location. history_file = '~/.athenacli/history' # Multi-line mode allows breaking up the sql statements into multiple lines. If # this is set to True, then the end of the statements must have a semi-colon. # If this is set to False then sql statements can't be split into multiple # lines. End of line (return) is considered as the end of the statement. multi_line = True # Destructive warning mode will alert you before executing a sql statement # that may cause harm to the database such as "drop table", "drop database" # or "shutdown". destructive_warning = True # Default log level. Possible values: "CRITICAL", "ERROR", "WARNING", "INFO" # and "DEBUG". "NONE" disables logging. log_level = INFO # Timing of sql statments and table rendering. timing = True # Table format. Possible values: ascii, double, github, # psql, plain, simple, grid, fancy_grid, pipe, orgtbl, rst, mediawiki, html, # latex, latex_booktabs, textile, moinmoin, jira, vertical, tsv, csv. # Recommended: ascii table_format = ascii # Syntax coloring style. Possible values (many support the "-dark" suffix): # manni, igor, xcode, vim, autumn, vs, rrt, native, perldoc, borland, tango, emacs, # friendly, monokai, paraiso, colorful, murphy, bw, pastie, paraiso, trac, default, # fruity. # Screenshots at http://mycli.net/syntax syntax_style = default # Keybindings: Possible values: emacs, vi. # Emacs mode: Ctrl-A is home, Ctrl-E is end. All emacs keybindings are available in the REPL. # When Vi mode is enabled you can use modal editing features offered by Vi in the REPL. key_bindings = emacs # Athena prompt # \d - Database name # \r - Region # \D - The full current date # \m - Minutes of the current time # \n - Newline # \P - AM/PM # \R - The current time, in 24-hour military time (0–23) # \s - Seconds of the current time prompt = '\r:\d> ' prompt_continuation = '-> ' # enable pager on startup enable_pager = True # Custom colors for the completion menu, toolbar, etc. [colors] completion-menu.completion.current = 'bg:#ffffff #000000' completion-menu.completion = 'bg:#008888 #ffffff' completion-menu.meta.completion.current = 'bg:#44aaaa #000000' completion-menu.meta.completion = 'bg:#448888 #ffffff' completion-menu.multi-column-meta = 'bg:#aaffff #000000' scrollbar.arrow = 'bg:#003333' scrollbar = 'bg:#00aaaa' selected = '#ffffff bg:#6666aa' search = '#ffffff bg:#4444aa' search.current = '#ffffff bg:#44aa44' bottom-toolbar = 'bg:#222222 #aaaaaa' bottom-toolbar.off = 'bg:#222222 #888888' bottom-toolbar.on = 'bg:#222222 #ffffff' search-toolbar = 'noinherit bold' search-toolbar.text = 'nobold' system-toolbar = 'noinherit bold' arg-toolbar = 'noinherit bold' arg-toolbar.text = 'nobold' bottom-toolbar.transaction.valid = 'bg:#222222 #00ff5f bold' bottom-toolbar.transaction.failed = 'bg:#222222 #ff005f bold' # style classes for colored table output output.header = "#00ff5f bold" output.odd-row = "" output.even-row = "" # Favorite queries. [favorite_queries] ================================================ FILE: athenacli/clibuffer.py ================================================ from prompt_toolkit.enums import DEFAULT_BUFFER from prompt_toolkit.filters import Condition from prompt_toolkit.application import get_app def cli_is_multiline(cli): @Condition def cond(): doc = get_app().layout.get_buffer_by_name(DEFAULT_BUFFER).document if not cli.multi_line: return False else: return not _multiline_exception(doc.text) return cond def _multiline_exception(text): orig = text text = text.strip() # Multi-statement favorite query is a special case. Because there will # be a semicolon separating statements, we can't consider semicolon an # EOL. Let's consider an empty line an EOL instead. # if text.startswith('\\fs'): # return orig.endswith('\n') return (text.startswith('\\') or # Special Command text.endswith(';') or # Ended with a semi-colon text.endswith('\\g') or # Ended with \g text.endswith('\\G') or # Ended with \G (text == 'exit') or # Exit doesn't need semi-colon (text == 'quit') or # Quit doesn't need semi-colon (text == ':q') or # To all the vim fans out there (text == '') # Just a plain enter without any text ) ================================================ FILE: athenacli/clistyle.py ================================================ import logging import pygments.styles from pygments.token import string_to_tokentype, Token from pygments.style import Style as PygmentsStyle from pygments.util import ClassNotFound from prompt_toolkit.styles.pygments import style_from_pygments_cls from prompt_toolkit.styles import merge_styles, Style logger = logging.getLogger(__name__) # map Pygments tokens (ptk 1.0) to class names (ptk 2.0). TOKEN_TO_PROMPT_STYLE = { Token.Menu.Completions.Completion.Current: 'completion-menu.completion.current', Token.Menu.Completions.Completion: 'completion-menu.completion', Token.Menu.Completions.Meta.Current: 'completion-menu.meta.completion.current', Token.Menu.Completions.Meta: 'completion-menu.meta.completion', Token.Menu.Completions.MultiColumnMeta: 'completion-menu.multi-column-meta', Token.Menu.Completions.ProgressButton: 'scrollbar.arrow', # best guess Token.Menu.Completions.ProgressBar: 'scrollbar', # best guess Token.SelectedText: 'selected', Token.SearchMatch: 'search', Token.SearchMatch.Current: 'search.current', Token.Toolbar: 'bottom-toolbar', Token.Toolbar.Off: 'bottom-toolbar.off', Token.Toolbar.On: 'bottom-toolbar.on', Token.Toolbar.Search: 'search-toolbar', Token.Toolbar.Search.Text: 'search-toolbar.text', Token.Toolbar.System: 'system-toolbar', Token.Toolbar.Arg: 'arg-toolbar', Token.Toolbar.Arg.Text: 'arg-toolbar.text', Token.Toolbar.Transaction.Valid: 'bottom-toolbar.transaction.valid', Token.Toolbar.Transaction.Failed: 'bottom-toolbar.transaction.failed', Token.Output.Header: 'output.header', Token.Output.OddRow: 'output.odd-row', Token.Output.EvenRow: 'output.even-row', } # reverse dict for cli_helpers, because they still expect Pygments tokens. PROMPT_STYLE_TO_TOKEN = { v: k for k, v in TOKEN_TO_PROMPT_STYLE.items() } def parse_pygments_style(token_name, style_object, style_dict): """Parse token type and style string. :param token_name: str name of Pygments token. Example: "Token.String" :param style_object: pygments.style.Style instance to use as base :param style_dict: dict of token names and their styles, customized to this cli """ token_type = string_to_tokentype(token_name) try: other_token_type = string_to_tokentype(style_dict[token_name]) return token_type, style_object.styles[other_token_type] except AttributeError as err: return token_type, style_dict[token_name] def style_factory(name, cli_style): try: style = pygments.styles.get_style_by_name(name) except ClassNotFound: style = pygments.styles.get_style_by_name('native') prompt_styles = [] # prompt-toolkit used pygments tokens for styling before, switched to style # names in 2.0. Convert old token types to new style names, for backwards compatibility. for token in cli_style: if token.startswith('Token.'): # treat as pygments token (1.0) token_type, style_value = parse_pygments_style( token, style, cli_style) if token_type in TOKEN_TO_PROMPT_STYLE: prompt_style = TOKEN_TO_PROMPT_STYLE[token_type] prompt_styles.append((prompt_style, style_value)) else: # we don't want to support tokens anymore logger.error('Unhandled style / class name: %s', token) else: # treat as prompt style name (2.0). See default style names here: # https://github.com/jonathanslenders/python-prompt-toolkit/blob/master/prompt_toolkit/styles/defaults.py prompt_styles.append((token, cli_style[token])) override_style = Style([('bottom-toolbar', 'noreverse')]) return merge_styles([ style_from_pygments_cls(style), override_style, Style(prompt_styles) ]) def style_factory_output(name, cli_style): try: style = pygments.styles.get_style_by_name(name).styles except ClassNotFound: style = pygments.styles.get_style_by_name('native').styles for token in cli_style: if token.startswith('Token.'): token_type, style_value = parse_pygments_style( token, style, cli_style) style.update({token_type: style_value}) elif token in PROMPT_STYLE_TO_TOKEN: token_type = PROMPT_STYLE_TO_TOKEN[token] style.update({token_type: cli_style[token]}) else: # TODO: cli helpers will have to switch to ptk.Style logger.error('Unhandled style / class name: %s', token) class OutputStyle(PygmentsStyle): default_style = "" styles = style return OutputStyle ================================================ FILE: athenacli/clitoolbar.py ================================================ from prompt_toolkit.key_binding.vi_state import InputMode from prompt_toolkit.application import get_app from prompt_toolkit.enums import EditingMode def create_toolbar_tokens_func(cli, show_fish_help): """Return a function that generates the toolbar tokens.""" def get_toolbar_tokens(): result = [] result.append(('class:bottom-toolbar', ' ')) if cli.multi_line: result.append( ('class:bottom-toolbar', ' (Semi-colon [;] will end the line) ') ) if cli.multi_line: result.append(('class:bottom-toolbar.on', '[F3] Multiline: ON ')) else: result.append(('class:bottom-toolbar.off', '[F3] Multiline: OFF')) if cli.prompt_app.editing_mode == EditingMode.VI: result.append(( 'class:bottom-toolbar.on', 'Vi-mode ({})'.format(_get_vi_mode()) )) if show_fish_help(): result.append( ('class:bottom-toolbar', ' Right-arrow to complete suggestion') ) if cli.completion_refresher.is_refreshing(): result.append( ('class:bottom-toolbar', ' Refreshing completions...') ) return result return get_toolbar_tokens def _get_vi_mode(): """Get the current vi mode for display.""" return { InputMode.INSERT: 'I', InputMode.NAVIGATION: 'N', InputMode.REPLACE: 'R', InputMode.INSERT_MULTIPLE: 'M', }[get_app().vi_state.input_mode] ================================================ FILE: athenacli/compat.py ================================================ # -*- coding: utf-8 -*- """Platform and Python version compatibility support.""" import sys WIN = sys.platform in ('win32', 'cygwin') ================================================ FILE: athenacli/completer.py ================================================ import logging from re import compile, escape from collections import Counter from itertools import chain from prompt_toolkit.completion import Completer, Completion from .packages.completion_engine import ( suggest_type, Column, Function, Table, View, Alias, Database, Schema, Keyword, Show, Special, TableFormat, FileName, FavoriteQuery ) from .packages.parseutils import last_word from .packages.filepaths import parse_path, complete_path, suggest_path from .packages.literals.main import get_literals from .packages.special.favoritequeries import favoritequeries _logger = logging.getLogger(__name__) class AthenaCompleter(Completer): keywords_tree = get_literals('keywords', type_=dict) keywords = tuple(chain(keywords_tree.keys(), *keywords_tree.values())) functions = get_literals('functions') def __init__(self, smart_completion=True, supported_formats=(), keyword_casing='upper'): super(self.__class__, self).__init__() self.smart_completion = smart_completion self.reserved_words = set() for x in self.keywords: self.reserved_words.update(x.split()) self.name_pattern = compile(r"^[_a-z][_a-z0-9\$]*$") self.special_commands = [] self.table_formats = supported_formats if keyword_casing not in ('upper', 'lower', 'auto'): keyword_casing = 'auto' self.keyword_casing = keyword_casing self.reset_completions() def escape_name(self, name, char='`'): if name and ((not self.name_pattern.match(name)) or (name.upper() in self.reserved_words) or (name.upper() in self.functions)): name = '%s%s%s' % (char, name, char) return name def unescape_name(self, name): """Unquote a string.""" if name and name[0] == '"' and name[-1] == '"': name = name[1:-1] return name def escaped_names(self, names, char='`'): return [self.escape_name(name, char) for name in names] def extend_special_commands(self, special_commands): # Special commands are not part of all_completions since they can only # be at the beginning of a line. self.special_commands.extend(special_commands) def extend_database_names(self, databases): self.databases.extend(databases) def extend_keywords(self, additional_keywords): self.keywords.extend(additional_keywords) self.all_completions.update(additional_keywords) def extend_schemata(self, schema): if schema is None: return metadata = self.dbmetadata['tables'] metadata[schema] = {} # dbmetadata.values() are the 'tables' and 'functions' dicts for metadata in self.dbmetadata.values(): metadata[schema] = {} self.all_completions.update(schema) def extend_relations(self, data, kind): """Extend metadata for tables or views :param data: list of (rel_name, ) tuples :param kind: either 'tables' or 'views' :return: """ # 'data' is a generator object. It can throw an exception while being # consumed. This could happen if the user has launched the app without # specifying a database name. This exception must be handled to prevent # crashing. try: data = [self.escaped_names(d) for d in data] except Exception: data = [] # dbmetadata['tables'][$schema_name][$table_name] should be a list of # column names. Default to an asterisk metadata = self.dbmetadata[kind] for relname in data: try: metadata[self.dbname][relname[0]] = ['*'] except KeyError: _logger.error('%r %r listed in unrecognized schema %r', kind, relname[0], self.dbname) self.all_completions.add(relname[0]) def extend_columns(self, column_data, kind): """Extend column metadata :param column_data: list of (rel_name, column_name) tuples :param kind: either 'tables' or 'views' :return: """ # 'column_data' is a generator object. It can throw an exception while # being consumed. This could happen if the user has launched the app # without specifying a database name. This exception must be handled to # prevent crashing. try: column_data = [self.escaped_names(d, '"') for d in column_data] except Exception: column_data = [] metadata = self.dbmetadata[kind] for relname, column in column_data: metadata[self.dbname][relname].append(column) self.all_completions.add(column) def extend_functions(self, func_data): # 'func_data' is a generator object. It can throw an exception while # being consumed. This could happen if the user has launched the app # without specifying a database name. This exception must be handled to # prevent crashing. try: func_data = [self.escaped_names(d, '"') for d in func_data] except Exception: func_data = [] # dbmetadata['functions'][$schema_name][$function_name] should return # function metadata. metadata = self.dbmetadata['functions'] for func in func_data: metadata[self.dbname][func[0]] = None self.all_completions.add(func[0]) def set_dbname(self, dbname): self.dbname = dbname def reset_completions(self): self.databases = [] self.dbname = '' self.dbmetadata = {'tables': {}, 'views': {}, 'functions': {}} self.all_completions = set(self.keywords + self.functions) @staticmethod def find_matches(text, collection, start_only=False, fuzzy=True, casing=None): """Find completion matches for the given text. Given the user's input text and a collection of available completions, find completions matching the last word of the text. If `start_only` is True, the text will match an available completion only at the beginning. Otherwise, a completion is considered a match if the text appears anywhere within it. yields prompt_toolkit Completion instances for any matches found in the collection of available completions. """ last = last_word(text, include='most_punctuations') text = last.lower() completions = [] if fuzzy: regex = '.*?'.join(map(escape, text)) pat = compile('(%s)' % regex) for item in sorted(collection): r = pat.search(item.lower()) if r: completions.append((len(r.group()), r.start(), item)) else: match_end_limit = len(text) if start_only else None for item in sorted(collection): match_point = item.lower().find(text, 0, match_end_limit) if match_point >= 0: completions.append((len(text), match_point, item)) if casing == 'auto': casing = 'lower' if last and last[-1].islower() else 'upper' def apply_case(kw): if casing == 'upper': return kw.upper() return kw.lower() return [Completion(z if casing is None else apply_case(z), -len(text)) for x, y, z in sorted(completions)] def get_completions(self, document, complete_event, smart_completion=None): word_before_cursor = document.get_word_before_cursor(WORD=True) if smart_completion is None: smart_completion = self.smart_completion # If smart_completion is off then match any word that starts with # 'word_before_cursor'. if not smart_completion: return self.find_matches(word_before_cursor, self.all_completions, start_only=True, fuzzy=False) completions = [] suggestions = suggest_type(document.text, document.text_before_cursor) for suggestion in suggestions: suggestion_type = type(suggestion) _logger.debug('Suggestion type: %r', suggestion_type) # Map suggestion type to method # e.g. 'table' -> self.get_table_matches matcher = self.suggestion_matchers[suggestion_type] completions.extend(matcher(self, suggestion, word_before_cursor)) return completions def get_column_matches(self, suggestion, word_before_cursor): tables = suggestion.tables _logger.debug('Completion column scope: %r', tables) scoped_cols = self.populate_scoped_cols(tables) if suggestion.drop_unique: # drop_unique is used for 'tb11 JOIN tbl2 USING (...' # which should suggest only columns that appear in more than # one table scoped_cols = [ col for (col, count) in Counter(scoped_cols).items() if count > 1 and col != '*' ] return self.find_matches(word_before_cursor, scoped_cols) def get_function_matches(self, suggestion, word_before_cursor): # suggest user-defined functions using substring matching funcs = self.populate_schema_objects(suggestion.schema, 'functions') user_funcs = self.find_matches(word_before_cursor, funcs) # suggest hardcoded functions using startswith matching only if # there is no schema qualifier. If a schema qualifier is # present it probably denotes a table. # eg: SELECT * FROM users u WHERE u. if not suggestion.schema: predefined_funcs = self.find_matches( word_before_cursor, self.functions, start_only=True, fuzzy=False, casing=self.keyword_casing ) user_funcs.extend(predefined_funcs) return user_funcs def get_table_matches(self, suggestion, word_before_cursor): tables = self.populate_schema_objects(suggestion.schema, 'tables') return self.find_matches(word_before_cursor, tables) def get_view_matches(self, suggestion, word_before_cursor): views = self.populate_schema_objects(suggestion.schema, 'views') return self.find_matches(word_before_cursor, views) def get_alias_matches(self, suggestion, word_before_cursor): aliases = suggestion.aliases return self.find_matches(word_before_cursor, aliases) def get_database_matches(self, _, word_before_cursor): return self.find_matches(word_before_cursor, self.databases) def get_schema_matches(self, _, word_before_cursor): # TODO return set() def get_keyword_matches(self, suggestion, word_before_cursor): keywords = self.keywords_tree.keys() # Get well known following keywords for the last token. If any, narrow # candidates to this list. next_keywords = self.keywords_tree.get(suggestion.last_token, []) if next_keywords: keywords = next_keywords return self.find_matches( word_before_cursor, keywords, start_only=True, fuzzy=False, casing=self.keyword_casing ) def get_show_matches(self, _, word_before_cursor): return self.find_matches( word_before_cursor, self.show_items, casing=self.keyword_casing ) def get_special_matches(self, _, word_before_cursor): return self.find_matches( word_before_cursor, self.special_commands, start_only=True, fuzzy=True ) def get_table_format_matches(self, _, word_before_cursor): return self.find_matches( word_before_cursor, self.table_formats, start_only=True, fuzzy=False ) def get_file_name_matches(self, _, word_before_cursor): return self.find_files(word_before_cursor) def get_favorite_query_matches(self, _, word_before_cursor): return self.find_matches(word_before_cursor, favoritequeries.list()) suggestion_matchers = { Column: get_column_matches, Function: get_function_matches, Table: get_table_matches, View: get_view_matches, Alias: get_alias_matches, Database: get_database_matches, Schema: get_schema_matches, Keyword: get_keyword_matches, Show: get_show_matches, Special: get_special_matches, TableFormat: get_table_format_matches, FileName: get_file_name_matches, FavoriteQuery: get_favorite_query_matches, } def find_files(self, word): """Yield matching directory or file names. :param word: :return: iterable """ base_path, last_path, position = parse_path(word) paths = suggest_path(word) for name in sorted(paths): suggestion = complete_path(name, last_path) if suggestion: yield Completion(suggestion, position) def populate_scoped_cols(self, scoped_tbls): """Find all columns in a set of scoped_tables :param scoped_tbls: list of (schema, table, alias) tuples :return: list of column names """ columns = [] meta = self.dbmetadata for tbl in scoped_tbls: # A fully qualified schema.relname reference or default_schema # DO NOT escape schema names. schema = tbl[0] or self.dbname relname = tbl[1] escaped_relname = self.escape_name(tbl[1]) # We don't know if schema.relname is a table or view. Since # tables and views cannot share the same name, we can check one # at a time try: columns.extend(meta['tables'][schema][relname]) # Table exists, so don't bother checking for a view continue except KeyError: try: columns.extend(meta['tables'][schema][escaped_relname]) # Table exists, so don't bother checking for a view continue except KeyError: pass try: columns.extend(meta['views'][schema][relname]) except KeyError: pass return columns def populate_schema_objects(self, schema, obj_type): """Returns list of tables or functions for a (optional) schema""" metadata = self.dbmetadata[obj_type] schema = schema or self.dbname try: objects = metadata[schema].keys() except KeyError: # schema doesn't exist objects = [] return objects ================================================ FILE: athenacli/completion_refresher.py ================================================ import threading from collections import OrderedDict from athenacli.completer import AthenaCompleter from athenacli.sqlexecute import SQLExecute from athenacli.packages.special.main import COMMANDS import logging LOGGER = logging.getLogger(__name__) class CompletionRefresher(object): refreshers = OrderedDict() def __init__(self): self._completer_thread = None self._restart_refresh = threading.Event() def refresh(self, executor, callbacks, completer_options=None): """Creates a SQLCompleter object and populates it with the relevant completion suggestions in a background thread. executor - SQLExecute object, used to extract the credentials to connect to the database. callbacks - A function or a list of functions to call after the thread has completed the refresh. The newly created completion object will be passed in as an argument to each callback. completer_options - dict of options to pass to SQLCompleter. """ if completer_options is None: completer_options = {} if self.is_refreshing(): self._restart_refresh.set() return [(None, None, None, 'Auto-completion refresh restarted.')] else: self._completer_thread = threading.Thread( target=self._bg_refresh, args=(executor, callbacks, completer_options), name='completion_refresh') self._completer_thread.setDaemon(True) self._completer_thread.start() return [(None, None, None, 'Auto-completion refresh started in the background.')] def is_refreshing(self): return self._completer_thread and self._completer_thread.is_alive() def _bg_refresh(self, sqlexecute, callbacks, completer_options): completer = AthenaCompleter(**completer_options) # Create a new pgexecute method to popoulate the completions. e = sqlexecute executor = SQLExecute( aws_access_key_id = e.aws_access_key_id, aws_secret_access_key = e.aws_secret_access_key, aws_session_token = e.aws_session_token, region_name = e.region_name, s3_staging_dir = e.s3_staging_dir, work_group = e.work_group, role_arn = e.role_arn, database = e.database ) # If callbacks is a single function then push it into a list. if callable(callbacks): callbacks = [callbacks] while 1: for refresher in self.refreshers.values(): refresher(completer, executor) if self._restart_refresh.is_set(): self._restart_refresh.clear() break else: # Break out of while loop if the for loop finishes natually # without hitting the break statement. break # Start over the refresh from the beginning if the for loop hit the # break statement. continue for callback in callbacks: callback(completer) def refresher(name, refreshers=CompletionRefresher.refreshers): """Decorator to add the decorated function to the dictionary of refreshers. Any function decorated with a @refresher will be executed as part of the completion refresh routine.""" def wrapper(wrapped): refreshers[name] = wrapped return wrapped return wrapper @refresher('databases') def refresh_databases(completer, executor): completer.extend_database_names(executor.databases()) @refresher('schemata') def refresh_schemata(completer, executor): # schemata will be the name of the current database. completer.extend_schemata(executor.database) completer.set_dbname(executor.database) @refresher('tables') def refresh_tables(completer, executor): completer.extend_relations(executor.tables(), kind='tables') completer.extend_columns(executor.table_columns(), kind='tables') @refresher('special_commands') def refresh_special(completer, executor): completer.extend_special_commands(COMMANDS.keys()) ================================================ FILE: athenacli/config.py ================================================ import shutil import logging import os import sys import errno import boto3 from configobj import ConfigObj, ConfigObjError from collections import defaultdict try: basestring except NameError: basestring = str LOGGER = logging.getLogger(__name__) class AWSConfig(object): def __init__(self, aws_access_key_id, aws_secret_access_key, aws_session_token, region, s3_staging_dir, work_group, profile, config): key = 'aws_profile %s' % profile try: _cfg = config[key] except: # this assumes that the profile is only known in the regular AWS config -> the boto lib will get it # from there. This is especially important if we have some kind of additional temporary session keys for # which the login fails if we set aws_access_key_id/aws_secret_access_key here _cfg = defaultdict(lambda: None) self.aws_access_key_id = self.get_val(aws_access_key_id, _cfg['aws_access_key_id']) self.aws_secret_access_key = self.get_val(aws_secret_access_key, _cfg['aws_secret_access_key']) self.aws_session_token = self.get_val(aws_session_token, _cfg['aws_session_token']) self.region = self.get_val(region, _cfg['region'], self.get_region()) self.s3_staging_dir = self.get_val(s3_staging_dir, _cfg['s3_staging_dir']) self.work_group = self.get_val(work_group, _cfg['work_group']) # enable connection to assume role self.role_arn = self.get_val(_cfg.get('role_arn')) def get_val(self, *vals): """Return the first True value in `vals` list, otherwise return None.""" for v in vals: if v: return v def get_region(self): """Try to get region name from aws credentials/config files or environment variables""" return boto3.session.Session().region_name def log(logger, level, message): """Logs message to stderr if logging isn't initialized.""" if logger.parent.name != 'root': logger.log(level, message) else: print(message, file=sys.stderr) def read_config_file(f): """Read a config file.""" if isinstance(f, basestring): f = os.path.expanduser(f) try: config = ConfigObj(f, interpolation=False, encoding='utf8') except ConfigObjError as e: log(LOGGER, logging.ERROR, "Unable to parse line {0} of config file " "'{1}'.".format(e.line_number, f)) log(LOGGER, logging.ERROR, "Using successfully parsed config values.") return e.config except (IOError, OSError) as e: log(LOGGER, logging.WARNING, "You don't have permission to read " "config file '{0}'.".format(e.filename)) return None return config def read_config_files(files): """Read and merge a list of config files.""" config = ConfigObj() for _file in files: _config = read_config_file(_file) if bool(_config) is True: config.merge(_config) config.filename = _config.filename return config def write_default_config(source, destination, overwrite=False): destination = os.path.expanduser(destination) dirname = os.path.dirname(destination) if not os.path.exists(dirname): mkdir_p(dirname) if not overwrite and os.path.exists(destination): return shutil.copyfile(source, destination) def mkdir_p(path): "like `mkdir -p`" try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise ================================================ FILE: athenacli/key_bindings.py ================================================ import logging from prompt_toolkit.enums import EditingMode from prompt_toolkit.filters import completion_is_selected from prompt_toolkit.key_binding import KeyBindings _logger = logging.getLogger(__name__) def cli_bindings(cli): """ Custom key bindings for cli. """ kb = KeyBindings() @kb.add('f2') def _(event): """ Enable/Disable SmartCompletion Mode. """ _logger.debug('Detected F2 key.') cli.completer.start_completion = not cli.completer.start_completion @kb.add('f3') def _(event): """ Enable/Disable Multiline Mode. """ _logger.debug('Detected F3 key.') cli.multi_line = not cli.multi_line @kb.add('f4') def _(event): """ Toggle between Vi and Emacs mode. """ _logger.debug('Detected F4 key.') if cli.key_bindings == 'vi': event.app.editing_mode = EditingMode.EMACS cli.key_bindings = 'emacs' else: event.app.editing_mode = EditingMode.VI cli.key_bindings = 'vi' @kb.add('tab') def _(event): """ Force autocompletion at cursor. """ _logger.debug('Detected key.') b = event.app.current_buffer if b.complete_state: b.complete_next() else: b.start_completion(select_first=True) @kb.add('c-space') def _(event): """ Initialize autocompletion at cursor. If the autocompletion menu is not showing, display it with the appropriate completions for the context. If the menu is showing, select the next completion. """ _logger.debug('Detected key.') b = event.app.current_buffer if b.complete_state: b.complete_next() else: b.start_completion(select_first=False) @kb.add('enter', filter=completion_is_selected) def _(event): """ Makes the enter key work as the tab key only when showing the menu. """ _logger.debug('Detected enter key.') event.current_buffer.complete_state = None b = event.app.current_buffer b.complete_state = None return kb ================================================ FILE: athenacli/lexer.py ================================================ from pygments.lexer import inherit from pygments.lexers.sql import MySqlLexer from pygments.token import Keyword class Lexer(MySqlLexer): """Extends MySQL lexer to add keywords.""" tokens = { 'root': [ (r'\brepair\b', Keyword), (r'\boffset\b', Keyword), inherit ], } ================================================ FILE: athenacli/main.py ================================================ # -*- coding: utf-8 -*- import os import shutil import sys import select import click import threading import logging import itertools import sqlparse import traceback from time import time from datetime import datetime from random import choice from collections import namedtuple from prompt_toolkit.completion import DynamicCompleter from prompt_toolkit.shortcuts import PromptSession, CompleteStyle from prompt_toolkit.styles.pygments import style_from_pygments_cls from prompt_toolkit.layout.menus import CompletionsMenu from prompt_toolkit.history import FileHistory from prompt_toolkit.document import Document from prompt_toolkit.layout.processors import ( HighlightMatchingBracketProcessor, ConditionalProcessor) from prompt_toolkit.lexers import PygmentsLexer from prompt_toolkit.filters import HasFocus, IsDone from prompt_toolkit.enums import DEFAULT_BUFFER, EditingMode from prompt_toolkit.auto_suggest import AutoSuggestFromHistory from cli_helpers.tabular_output import TabularOutputFormatter from cli_helpers.tabular_output import preprocessors from pyathena.error import OperationalError import athenacli.packages.special as special from athenacli.sqlexecute import SQLExecute from athenacli.completer import AthenaCompleter from athenacli.style import AthenaStyle from athenacli.completion_refresher import CompletionRefresher from athenacli.packages.tabular_output import sql_format from athenacli.clistyle import style_factory, style_factory_output from athenacli.packages.prompt_utils import confirm, confirm_destructive_query from athenacli.key_bindings import cli_bindings from athenacli.clitoolbar import create_toolbar_tokens_func from athenacli.lexer import Lexer from athenacli.clibuffer import cli_is_multiline from athenacli.sqlexecute import SQLExecute from athenacli.config import read_config_files, write_default_config, mkdir_p, AWSConfig # Query tuples are used for maintaining history Query = namedtuple('Query', ['query', 'successful', 'mutating']) LOGGER = logging.getLogger(__name__) PACKAGE_ROOT = os.path.abspath(os.path.dirname(__file__)) ATHENACLIRC = '~/.athenacli/athenaclirc' DEFAULT_CONFIG_FILE = os.path.join(PACKAGE_ROOT, 'athenaclirc') class AthenaCli(object): DEFAULT_PROMPT = '\\d@\\r> ' MAX_LEN_PROMPT = 45 def __init__(self, region, aws_access_key_id, aws_secret_access_key, aws_session_token, s3_staging_dir, work_group, athenaclirc, profile, database): config_files = [DEFAULT_CONFIG_FILE] if os.path.exists(os.path.expanduser(athenaclirc)): config_files.append(athenaclirc) _cfg = self.config = read_config_files(config_files) self.init_logging(_cfg['main']['log_file'], _cfg['main']['log_level']) aws_config = AWSConfig( aws_access_key_id, aws_secret_access_key, aws_session_token, region, s3_staging_dir, work_group, profile, _cfg ) try: self.connect(aws_config, database) except Exception as e: self.echo(str(e), err=True, fg='red') err_msg = ''' There was an error while connecting to AWS Athena. It could be caused due to missing/incomplete configuration. Please verify the configuration in %s and run athenacli again. For more details about the error, you can check the log file: %s''' % (athenaclirc, _cfg['main']['log_file']) self.echo(err_msg) LOGGER.exception('error: %r', e) sys.exit(1) special.set_timing_enabled(_cfg['main'].as_bool('timing')) self.multi_line = _cfg['main'].as_bool('multi_line') self.key_bindings = _cfg['main']['key_bindings'] self.prompt = _cfg['main']['prompt'] or self.DEFAULT_PROMPT self.destructive_warning = _cfg['main']['destructive_warning'] self.syntax_style = _cfg['main']['syntax_style'] self.prompt_continuation_format = _cfg['main']['prompt_continuation'] self.formatter = TabularOutputFormatter(_cfg['main']['table_format']) self.formatter.cli = self sql_format.register_new_formatter(self.formatter) self.cli_style = _cfg['colors'] self.output_style = style_factory_output(self.syntax_style, self.cli_style) self.completer = AthenaCompleter() self._completer_lock = threading.Lock() self.completion_refresher = CompletionRefresher() self.prompt_app = None self.query_history = [] # Register custom special commands. self.register_special_commands() def init_logging(self, log_file, log_level_str): file_path = os.path.expanduser(log_file) if not os.path.exists(file_path): mkdir_p(os.path.dirname(file_path)) handler = logging.FileHandler(os.path.expanduser(log_file)) log_level_map = { 'CRITICAL': logging.CRITICAL, 'ERROR': logging.ERROR, 'WARNING': logging.WARNING, 'INFO': logging.INFO, 'DEBUG': logging.DEBUG, } log_level = log_level_map[log_level_str.upper()] formatter = logging.Formatter( '%(asctime)s (%(process)d/%(threadName)s) ' '%(name)s %(levelname)s - %(message)s') handler.setFormatter(formatter) LOGGER.addHandler(handler) LOGGER.setLevel(log_level) root_logger = logging.getLogger('athenacli') root_logger.addHandler(handler) root_logger.setLevel(log_level) root_logger.debug('Initializing athenacli logging.') root_logger.debug('Log file %r.', log_file) pgspecial_logger = logging.getLogger('special') pgspecial_logger.addHandler(handler) pgspecial_logger.setLevel(log_level) def register_special_commands(self): special.register_special_command( self.change_db, 'use', '\\u', 'Change to a new database.', aliases=('\\u',)) special.register_special_command( self.change_prompt_format, 'prompt', '\\R', 'Change prompt format.', aliases=('\\R',), case_sensitive=True) special.register_special_command( self.change_table_format, 'tableformat', '\\T', 'Change the table format used to output results.', aliases=('\\T',), case_sensitive=True) def change_table_format(self, arg, **_): try: self.formatter.format_name = arg yield (None, None, None, 'Changed table format to {}'.format(arg)) except ValueError: msg = 'Table format {} not recognized. Allowed formats:'.format( arg) for table_type in self.formatter.supported_formats: msg += "\n\t{}".format(table_type) yield (None, None, None, msg) def change_db(self, arg, **_): if arg is None: self.sqlexecute.connect() else: self.sqlexecute.connect(database=arg) yield (None, None, None, 'You are now connected to database "%s"' % self.sqlexecute.database) def change_prompt_format(self, arg, **_): """ Change the prompt format. """ if not arg: message = 'Missing required argument, format.' return [(None, None, None, message)] self.prompt = self.get_prompt(arg) return [(None, None, None, "Changed prompt format to %s" % arg)] def connect(self, aws_config, database): self.sqlexecute = SQLExecute( aws_access_key_id = aws_config.aws_access_key_id, aws_secret_access_key = aws_config.aws_secret_access_key, aws_session_token = aws_config.aws_session_token, region_name = aws_config.region, s3_staging_dir = aws_config.s3_staging_dir, work_group = aws_config.work_group, role_arn = aws_config.role_arn, database = database ) def handle_editor_command(self, text): r""" Editor command is any query that is prefixed or suffixed by a '\e'. The reason for a while loop is because a user might edit a query multiple times. For eg: "select * from \e" to edit it in vim, then come back to the prompt with the edited query "select * from blah where q = 'abc'\e" to edit it again. :param text: str :return: Document """ while special.editor_command(text): filename = special.get_filename(text) query = (special.get_editor_query(text) or self.get_last_query()) sql, message = special.open_external_editor(filename, sql=query) if message: # Something went wrong. Raise an exception and bail. raise RuntimeError(message) while True: try: text = self.prompt_app.prompt(default=sql) break except KeyboardInterrupt: sql = '' continue return text def run_query(self, query, new_line=True): """Runs *query*.""" if (self.destructive_warning and confirm_destructive_query(query) is False): message = 'Wise choice. Command execution stopped.' click.echo(message) return results = self.sqlexecute.run(query) for result in results: title, rows, headers, _ = result self.formatter.query = query output = self.format_output(title, rows, headers) for line in output: click.echo(line, nl=new_line) def run_cli(self): self.iterations = 0 self.configure_pager() self.refresh_completions() history_file = os.path.expanduser(self.config['main']['history_file']) history = FileHistory(history_file) self._build_prompt_app(history) def one_iteration(): try: text = self.prompt_app.prompt() except KeyboardInterrupt: return special.set_expanded_output(False) try: text = self.handle_editor_command(text) except RuntimeError as e: LOGGER.error("sql: %r, error: %r", text, e) LOGGER.error("traceback: %r", traceback.format_exc()) self.echo(str(e), err=True, fg='red') return if not text.strip(): return if self.destructive_warning: destroy = confirm_destructive_query(text) if destroy is None: pass # Query was not destructive. Nothing to do here. elif destroy is True: self.echo('Your call!') else: self.echo('Wise choice!') return mutating = False try: LOGGER.debug('sql: %r', text) special.write_tee(self.get_prompt(self.prompt) + text) successful = False start = time() res = self.sqlexecute.run(text) successful = True threshold = 1000 result_count = 0 for title, rows, headers, status in res: if rows and len(rows) > threshold: self.echo( 'The result set has more than {} rows.'.format(threshold), fg='red' ) if not confirm('Do you want to continue?'): self.echo('Aborted!', err=True, fg='red') break formatted = self.format_output( title, rows, headers, special.is_expanded_output(), None ) t = time() - start try: if result_count > 0: self.echo('') try: self.output(formatted, status) except KeyboardInterrupt: pass if special.is_timing_enabled(): self.echo('Time: %0.03fs' % t) except KeyboardInterrupt: pass start = time() result_count += 1 mutating = mutating or is_mutating(status) special.unset_once_if_written() except EOFError as e: raise e except KeyboardInterrupt: pass except NotImplementedError: self.echo('Not Yet Implemented.', fg="yellow") except OperationalError as e: LOGGER.debug("Exception: %r", e) LOGGER.error("sql: %r, error: %r", text, e) LOGGER.error("traceback: %r", traceback.format_exc()) self.echo(str(e), err=True, fg='red') except Exception as e: LOGGER.error("sql: %r, error: %r", text, e) LOGGER.error("traceback: %r", traceback.format_exc()) self.echo(str(e), err=True, fg='red') else: # Refresh the table names and column names if necessary. if need_completion_refresh(text): self.refresh_completions() query = Query(text, successful, mutating) self.query_history.append(query) try: while True: one_iteration() self.iterations += 1 except EOFError: special.close_tee() def get_output_margin(self, status=None): """Get the output margin (number of rows for the prompt, footer and timing message.""" margin = self.get_reserved_space() + self.get_prompt(self.prompt).count('\n') + 1 if special.is_timing_enabled(): margin += 1 if status: margin += 1 + status.count('\n') return margin def output(self, output, status=None): """Output text to stdout or a pager command. The status text is not outputted to pager or files. The message will be logged in the audit log, if enabled. The message will be written to the tee file, if enabled. The message will be written to the output file, if enabled. """ if output: size = self.prompt_app.output.get_size() margin = self.get_output_margin(status) fits = True buf = [] output_via_pager = self.explicit_pager and special.is_pager_enabled() for i, line in enumerate(output, 1): special.write_tee(line) special.write_once(line) if fits or output_via_pager: # buffering buf.append(line) if len(line) > size.columns or i > (size.rows - margin): fits = False if not self.explicit_pager and special.is_pager_enabled(): # doesn't fit, use pager output_via_pager = True if not output_via_pager: # doesn't fit, flush buffer for line in buf: click.secho(line) buf = [] else: click.secho(line) if buf: if output_via_pager: # sadly click.echo_via_pager doesn't accept generators click.echo_via_pager("\n".join(buf)) else: for line in buf: click.secho(line) if status: click.secho(status) def configure_pager(self): self.explicit_pager = False if not self.config['main'].as_bool('enable_pager'): special.disable_pager() def format_output(self, title, cur, headers, expanded=False, max_width=None): expanded = expanded or self.formatter.format_name == 'vertical' output = [] output_kwargs = { 'disable_numparse': True, 'preserve_whitespace': True, 'preprocessors': (preprocessors.align_decimals, ), 'style': self.output_style } if title: # Only print the title if it's not None. output = itertools.chain(output, [title]) if cur: column_types = None if hasattr(cur, 'description'): column_types = [str for col in cur.description] if max_width is not None: cur = list(cur) formatted = self.formatter.format_output( cur, headers, format_name='vertical' if expanded else None, column_types=column_types, **output_kwargs) if isinstance(formatted, str): formatted = formatted.splitlines() formatted = iter(formatted) first_line = next(formatted) formatted = itertools.chain([first_line], formatted) if (not expanded and max_width and headers and cur and len(first_line) > max_width): formatted = self.formatter.format_output( cur, headers, format_name='vertical', column_types=column_types, **output_kwargs) if isinstance(formatted, str): formatted = iter(formatted.splitlines()) output = itertools.chain(output, formatted) return output def echo(self, s, **kwargs): """Print a message to stdout. The message will be logged in the audit log, if enabled. All keyword arguments are passed to click.echo(). """ click.secho(s, **kwargs) def refresh_completions(self): with self._completer_lock: self.completer.reset_completions() completer_options = { 'smart_completion': True, 'supported_formats': self.formatter.supported_formats, 'keyword_casing': self.completer.keyword_casing } self.completion_refresher.refresh( self.sqlexecute, self._on_completions_refreshed, completer_options ) def _on_completions_refreshed(self, new_completer): """Swap the completer object in cli with the newly created completer. """ with self._completer_lock: self.completer = new_completer if self.prompt_app: # After refreshing, redraw the CLI to clear the statusbar # "Refreshing completions..." indicator self.prompt_app.app.invalidate() def _build_prompt_app(self, history): key_bindings = cli_bindings(self) def get_message(): prompt = self.get_prompt(self.prompt) if len(prompt) > self.MAX_LEN_PROMPT: prompt = self.get_prompt('\\r:\\d> ') return [('class:prompt', prompt)] def get_continuation(width, line_number, is_soft_wrap): continuation = ' ' * (width -1) + ' ' return [('class:continuation', continuation)] def show_suggestion_tip(): return self.iterations < 2 get_toolbar_tokens = create_toolbar_tokens_func( self, show_suggestion_tip) with self._completer_lock: if self.key_bindings == 'vi': editing_mode = EditingMode.VI else: editing_mode = EditingMode.EMACS self.prompt_app = PromptSession( lexer=PygmentsLexer(Lexer), reserve_space_for_menu=self.get_reserved_space(), message=get_message, prompt_continuation=get_continuation, bottom_toolbar=get_toolbar_tokens, complete_style=CompleteStyle.COLUMN, input_processors=[ConditionalProcessor( processor=HighlightMatchingBracketProcessor( chars='[](){}'), filter=HasFocus(DEFAULT_BUFFER) & ~IsDone() )], tempfile_suffix='.sql', completer=DynamicCompleter(lambda: self.completer), history=history, auto_suggest=AutoSuggestFromHistory(), complete_while_typing=True, multiline=cli_is_multiline(self), style=style_factory(self.syntax_style, self.cli_style), include_default_pygments_style=False, key_bindings=key_bindings, enable_open_in_editor=True, enable_system_prompt=True, editing_mode=editing_mode, search_ignore_case=True ) def get_prompt(self, string): sqlexecute = self.sqlexecute now = datetime.now() string = string.replace('\\r', sqlexecute.region_name or '(none)') string = string.replace('\\d', sqlexecute.database or '(none)') string = string.replace('\\n', "\n") string = string.replace('\\D', now.strftime('%a %b %d %H:%M:%S %Y')) string = string.replace('\\m', now.strftime('%M')) string = string.replace('\\P', now.strftime('%p')) string = string.replace('\\R', now.strftime('%H')) string = string.replace('\\s', now.strftime('%S')) return string def get_reserved_space(self): """Get the number of lines to reserve for the completion menu.""" reserved_space_ratio = .45 max_reserved_space = 8 _, height = shutil.get_terminal_size() return min(int(round(height * reserved_space_ratio)), max_reserved_space) def get_last_query(self): """Get the last query executed or None.""" return self.query_history[-1][0] if self.query_history else None def need_completion_refresh(queries): """Determines if the completion needs a refresh by checking if the sql statement is an alter, create, drop or change db.""" tokens = { 'use', '\\u', 'create', 'drop' } for query in sqlparse.split(queries): try: first_token = query.split()[0] if first_token.lower() in tokens: return True except Exception: return False def is_mutating(status): """Determines if the statement is mutating based on the status.""" if not status: return False mutating = set(['insert', 'update', 'delete', 'alter', 'create', 'drop', 'replace', 'truncate', 'load']) return status.split(None, 1)[0].lower() in mutating @click.command() @click.option('-e', '--execute', type=str, help='Execute a command (or a file) and quit.') @click.option('-r', '--region', type=str, help="AWS region.") @click.option('--aws-access-key-id', type=str, help="AWS access key id.") @click.option('--aws-secret-access-key', type=str, help="AWS secretaccess key.") @click.option('--aws-session-token', type=str, help="AWS session token.") @click.option('--s3-staging-dir', type=str, help="Amazon S3 staging directory where query results are stored.") @click.option('--work_group', type=str, help="Amazon Athena workgroup in which query is run, default is primary") @click.option('--athenaclirc', default=ATHENACLIRC, type=click.Path(dir_okay=False), help="Location of athenaclirc file.") @click.option('--profile', type=str, default=os.environ.get('AWS_PROFILE', 'default'), help='AWS profile') @click.option('--table-format', type=str, default='csv', help='Table format used with -e option.') @click.argument('database', default='default', nargs=1) def cli(execute, region, aws_access_key_id, aws_secret_access_key, aws_session_token, s3_staging_dir, work_group, athenaclirc, profile, table_format, database): '''A Athena terminal client with auto-completion and syntax highlighting. \b Examples: - athenacli - athenacli my_database ''' if (athenaclirc == ATHENACLIRC) and (not os.path.exists(os.path.expanduser(athenaclirc))): err_msg = ''' Welcome to athenacli! It seems this is your first time to run athenacli, we generated a default config file for you %s Please change it accordingly, and run athenacli again. ''' % athenaclirc print(err_msg) write_default_config(DEFAULT_CONFIG_FILE, athenaclirc) sys.exit(1) # Only set AWS_PROFILE if it was explicitly provided via CLI and differs from environment if profile != 'default' and profile != os.environ.get('AWS_PROFILE'): os.environ['AWS_PROFILE'] = profile athenacli = AthenaCli( region=region, aws_access_key_id=aws_access_key_id, aws_secret_access_key= aws_secret_access_key, aws_session_token=aws_session_token, s3_staging_dir=s3_staging_dir, work_group=work_group, athenaclirc=athenaclirc, profile=profile, database=database ) # --execute argument if execute: if execute == '-': if select.select([sys.stdin, ], [], [], 0.0)[0]: query = sys.stdin.read() else: raise RuntimeError("No query to execute on stdin") elif os.path.exists(execute): with open(execute) as f: query = f.read() else: query = execute try: athenacli.formatter.format_name = table_format athenacli.run_query(query) exit(0) except Exception as e: click.secho(str(e), err=True, fg='red') exit(1) athenacli.run_cli() if __name__ == '__main__': cli() ================================================ FILE: athenacli/packages/__init__.py ================================================ ================================================ FILE: athenacli/packages/completion_engine.py ================================================ import os import sys import sqlparse import logging from collections import namedtuple from sqlparse.sql import Comparison, Identifier, Where from athenacli.packages.parseutils import last_word, extract_tables, find_prev_keyword from athenacli.packages.special import parse_special_command _logger = logging.getLogger(__name__) Column = namedtuple('Column', ['tables', 'drop_unique']) Column.__new__.__defaults__ = (None, None) Function = namedtuple('Function', ['schema', 'filter']) # For convenience, don't require the `filter` argument in Function constructor Function.__new__.__defaults__ = (None, None) Keyword = namedtuple('Keyword', ['last_token']) Keyword.__new__.__defaults__ = (None,) Table = namedtuple('Table', ['schema']) View = namedtuple('View', ['schema']) Alias = namedtuple('Alias', ['aliases']) Database = namedtuple('Database', []) Schema = namedtuple('Schema', []) Keyword.__new__.__defaults__ = (None,) Show = namedtuple('Show', []) Special = namedtuple('Special', []) TableFormat = namedtuple('TableFormat', []) FileName = namedtuple('FileName', []) FavoriteQuery = namedtuple('FavoriteQuery', []) def suggest_type(full_text, text_before_cursor): """Takes the full_text that is typed so far and also the text before the cursor to suggest completion type and scope. Returns a tuple with a type of entity ('table', 'column' etc) and a scope. A scope for a column category will be a list of tables. """ word_before_cursor = last_word(text_before_cursor, include='many_punctuations') identifier = None # here should be removed once sqlparse has been fixed try: # If we've partially typed a word then word_before_cursor won't be an empty # string. In that case we want to remove the partially typed string before # sending it to the sqlparser. Otherwise the last token will always be the # partially typed string which renders the smart completion useless because # it will always return the list of keywords as completion. if word_before_cursor: if word_before_cursor.endswith( '(') or word_before_cursor.startswith('\\'): parsed = sqlparse.parse(text_before_cursor) else: parsed = sqlparse.parse( text_before_cursor[:-len(word_before_cursor)]) # word_before_cursor may include a schema qualification, like # "schema_name.partial_name" or "schema_name.", so parse it # separately p = sqlparse.parse(word_before_cursor)[0] if p.tokens and isinstance(p.tokens[0], Identifier): identifier = p.tokens[0] else: parsed = sqlparse.parse(text_before_cursor) except (TypeError, AttributeError): return (Keyword(),) if len(parsed) > 1: # Multiple statements being edited -- isolate the current one by # cumulatively summing statement lengths to find the one that bounds the # current position current_pos = len(text_before_cursor) stmt_start, stmt_end = 0, 0 for statement in parsed: stmt_len = len(str(statement)) stmt_start, stmt_end = stmt_end, stmt_end + stmt_len if stmt_end >= current_pos: text_before_cursor = full_text[stmt_start:current_pos] full_text = full_text[stmt_start:] break elif parsed: # A single statement statement = parsed[0] else: # The empty string statement = None # Check for special commands and handle those separately if statement: # Be careful here because trivial whitespace is parsed as a statement, # but the statement won't have a first token tok1 = statement.token_first() if tok1 and tok1.value.startswith("\\"): return suggest_special(text_before_cursor) last_token = statement and statement.token_prev(len(statement.tokens))[1] or '' return suggest_based_on_last_token(last_token, text_before_cursor, full_text, identifier) def suggest_special(text): text = text.lstrip() cmd, _, arg = parse_special_command(text) if cmd == text: # Trying to complete the special command itself return (Special(),) if cmd in ('\\u', '\\r'): return (Database(),) if cmd in ('\\T'): return (TableFormat(),) if cmd in ['\\f', '\\fs', '\\fd']: return (FavoriteQuery(),) if cmd in ['\\dt', '\\dt+']: return ( Table(schema=None), View(schema=None), Schema(), ) elif cmd in ['\\.', 'source']: return (FileName(),) return (Keyword(), Special()) def suggest_based_on_last_token(token, text_before_cursor, full_text, identifier): if isinstance(token, str): token_v = token.lower() elif isinstance(token, Comparison): # If 'token' is a Comparison type such as # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling # token.value on the comparison type will only return the lhs of the # comparison. In this case a.id. So we need to do token.tokens to get # both sides of the comparison and pick the last token out of that # list. token_v = token.tokens[-1].value.lower() elif isinstance(token, Where): # sqlparse groups all tokens from the where clause into a single token # list. This means that token.value may be something like # 'where foo > 5 and '. We need to look "inside" token.tokens to handle # suggestions in complicated where clauses correctly prev_keyword, text_before_cursor = find_prev_keyword(text_before_cursor) return suggest_based_on_last_token(prev_keyword, text_before_cursor, full_text, identifier) else: token_v = token.value.lower() is_operand = lambda x: x and any([x.endswith(op) for op in ['+', '-', '*', '/']]) if not token: return (Keyword(), Special()) elif token_v.endswith('('): p = sqlparse.parse(text_before_cursor)[0] if p.tokens and isinstance(p.tokens[-1], Where): # Four possibilities: # 1 - Parenthesized clause like "WHERE foo AND (" # Suggest columns/functions # 2 - Function call like "WHERE foo(" # Suggest columns/functions # 3 - Subquery expression like "WHERE EXISTS (" # Suggest keywords, in order to do a subquery # 4 - Subquery OR array comparison like "WHERE foo = ANY(" # Suggest columns/functions AND keywords. (If we wanted to be # really fancy, we could suggest only array-typed columns) column_suggestions = suggest_based_on_last_token('where', text_before_cursor, full_text, identifier) # Check for a subquery expression (cases 3 & 4) where = p.tokens[-1] idx, prev_tok = where.token_prev(len(where.tokens) - 1) if isinstance(prev_tok, Comparison): # e.g. "SELECT foo FROM bar WHERE foo = ANY(" prev_tok = prev_tok.tokens[-1] prev_tok = prev_tok.value.lower() if prev_tok == 'exists': return (Keyword(),) else: return column_suggestions # Get the token before the parens idx, prev_tok = p.token_prev(len(p.tokens) - 1) if prev_tok and prev_tok.value and prev_tok.value.lower() == 'using': # tbl1 INNER JOIN tbl2 USING (col1, col2) tables = extract_tables(full_text) # suggest columns that are present in more than one table return (Column(tables=tables, drop_unique=True)) elif p.token_first().value.lower() == 'select': # If the lparen is preceeded by a space chances are we're about to # do a sub-select. if last_word(text_before_cursor, 'all_punctuations').startswith('('): return (Keyword(),) # We're probably in a function argument list return (Column(tables=extract_tables(full_text)),) elif token_v in ('set', 'by', 'distinct'): return (Column(tables=extract_tables(full_text)),) elif token_v == 'as': # Don't suggest anything for an alias return tuple() elif token_v in ('select', 'where', 'having'): # Check for a table alias or schema qualification parent = (identifier and identifier.get_parent_name()) or [] tables = extract_tables(full_text) if parent: tables = [t for t in tables if identifies(parent, *t)] return ( Column(tables=tables), Table(schema=parent), View(schema=parent), Function(schema=parent), ) else: aliases = [alias or table for (schema, table, alias) in tables] return ( Column(tables=tables), Function(schema=None), Alias(aliases=aliases), Keyword(token_v.upper()), ) elif (token_v.endswith('join') and token.is_keyword) or (token_v in ('copy', 'from', 'update', 'into', 'describe', 'truncate', 'desc', 'explain', 'partitions')): schema = (identifier and identifier.get_parent_name()) or None # Suggest tables from either the currently-selected schema or the # public schema if no schema has been specified suggest = [Table(schema=schema)] if not schema: # Suggest schemas suggest.insert(0, Schema()) # Only tables can be TRUNCATED, otherwise suggest views if token_v != 'truncate': suggest.append(View(schema=schema)) return suggest elif token_v in ('table', 'view', 'function', 'tblproperties'): # E.g. 'DROP FUNCTION ', 'ALTER TABLE ' rel_type = { 'table': Table, 'view': View, 'function': Function, 'tblproperties': Table, }[token_v] schema = (identifier and identifier.get_parent_name()) or None if schema: return (rel_type(schema=schema)) else: return (Schema(), rel_type(schema=schema)) elif token_v == 'on': tables = extract_tables(full_text) # [(schema, table, alias), ...] parent = (identifier and identifier.get_parent_name()) or None if parent: # "ON parent." # parent can be either a schema name or table alias tables = tuple(t for t in tables if identifies(parent, *t)) return ( Column(tables=tables), Table(schema=parent), View(schema=parent), Function(schema=parent) ) else: # ON # Use table alias if there is one, otherwise the table name aliases = tuple(alias or table for (schema, table, alias) in tables) suggest = [Alias(aliases=aliases)] # The lists of 'aliases' could be empty if we're trying to complete # a GRANT query. eg: GRANT SELECT, INSERT ON # In that case we just suggest all tables. if not aliases: suggest.append(Table(schema=parent)) return suggest elif token_v in ('use', 'database', 'template', 'connect'): # "\c ", "DROP DATABASE ", # "CREATE DATABASE WITH TEMPLATE " return (Database(),) elif token_v == 'tableformat': return (TableFormat(),) elif token_v.endswith(',') or is_operand(token_v) or token_v in ['=', 'and', 'or']: prev_keyword, text_before_cursor = find_prev_keyword(text_before_cursor) if prev_keyword: return suggest_based_on_last_token( prev_keyword, text_before_cursor, full_text, identifier) else: return tuple() elif token_v in {'alter', 'create', 'drop', 'show'}: return (Keyword(token_v.upper()),) else: return (Keyword(token_v.upper()),) def identifies(id, schema, table, alias): return id == alias or id == table or ( schema and (id == schema + '.' + table)) ================================================ FILE: athenacli/packages/filepaths.py ================================================ # -*- coding: utf-8 import os def list_path(root_dir): """List directory if exists. :param dir: str :return: list """ res = [] if os.path.isdir(root_dir): for name in os.listdir(root_dir): res.append(name) return res def complete_path(curr_dir, last_dir): """Return the path to complete that matches the last entered component. If the last entered component is ~, expanded path would not match, so return all of the available paths. :param curr_dir: str :param last_dir: str :return: str """ if not last_dir or curr_dir.startswith(last_dir): return curr_dir elif last_dir == '~': return os.path.join(last_dir, curr_dir) def parse_path(root_dir): """Split path into head and last component for the completer. Also return position where last component starts. :param root_dir: str path :return: tuple of (string, string, int) """ base_dir, last_dir, position = '', '', 0 if root_dir: base_dir, last_dir = os.path.split(root_dir) position = -len(last_dir) if last_dir else 0 return base_dir, last_dir, position def suggest_path(root_dir): """List all files and subdirectories in a directory. If the directory is not specified, suggest root directory, user directory, current and parent directory. :param root_dir: string: directory to list :return: list """ if not root_dir: return [os.path.abspath(os.sep), '~', os.curdir, os.pardir] if '~' in root_dir: root_dir = os.path.expanduser(root_dir) if not os.path.exists(root_dir): root_dir, _ = os.path.split(root_dir) return list_path(root_dir) ================================================ FILE: athenacli/packages/format_utils.py ================================================ # -*- coding: utf-8 -*- def format_status(rows_length=None, cursor=None): return rows_status(rows_length) + statistics(cursor) def rows_status(rows_length): if rows_length: return '%d row%s in set' % (rows_length, '' if rows_length == 1 else 's') else: return 'Query OK' def statistics(cursor): if cursor: # Most regions are $5 per TB: https://aws.amazon.com/athena/pricing/ approx_cost = cursor.data_scanned_in_bytes / (1024 ** 4) * 5 return '\nExecution time: %d ms, Data scanned: %s, Approximate cost: $%.2f' % ( cursor.engine_execution_time_in_millis, humanize_size(cursor.data_scanned_in_bytes), approx_cost) else: return '' def humanize_size(num_bytes): suffixes = ['B', 'KB', 'MB', 'GB', 'TB'] suffix_index = 0 while num_bytes >= 1024 and suffix_index < len(suffixes) - 1: num_bytes /= 1024.0 suffix_index += 1 num = ('%.2f' % num_bytes).rstrip('0').rstrip('.') return '%s %s' % (num, suffixes[suffix_index]) ================================================ FILE: athenacli/packages/literals/__init__.py ================================================ ================================================ FILE: athenacli/packages/literals/literals.json ================================================ { "keywords": { "ALTER": [ "DATABASE", "SCHEMA", "TABLE" ], "CREATE": [ "DATABASE", "EXTERNAL", "TABLE", "VIEW" ], "EXTERNAL": ["TABLE"], "DESCRIBE": [ "TABLE", "VIEW" ], "DROP": [ "DATABASE", "TABLE", "VIEW" ], "MSCK": [ "REPAIR TABLE" ], "SHOW": [ "COLUMNS IN", "CREATE TABLE", "CREATE VIEW", "DATABASES", "SCHEMAS", "PARTITIONS", "TABLES", "TBLPROPERTIES", "VIEWS" ], "REPLACE": ["VIEW"], "WITH": [], "SELECT": [], "ALL": [], "DISTINCT": [], "FROM": [], "WHERE": [], "INNER": ["JOIN"], "OUTER": ["JOIN"], "CROSS": ["JOIN"], "LEFT": ["JOIN", "OUTER JOIN"], "RIGHT": ["JOIN", "OUTER JOIN"], "FULL": ["JOIN", "OUTER JOIN"], "JOIN": [], "ON": [], "USING": [], "GROUP BY": [], "HAVING": [], "UNION": [], "ORDER BY": [], "ASC": [], "DESC": [], "NULLS FIRST": [], "NULLS LAST": [], "LIMIT": [], "AND": [], "OR": [], "NOT": [], "CAST": [], "CASE": [], "WHEN": [], "THEN": [], "ELSE": [], "END": [], "JSON": [], "IF NOT EXISTS": [] }, "functions": [ "AVG", "CONCAT", "COUNT", "EVERY", "FIRST", "FORMAT", "LAST", "LCASE", "LEN", "MAX", "MIN", "MID", "NOW", "ROUND", "SUM", "TOP", "UCASE", "IF", "COALESCE", "NULLIF", "TRY", "CAST", "TRY_CAST", "TYPEOF", "ABS", "CEIL", "FLOOR", "LOG", "POW", "CONCAT", "LENGTH", "LOWER", "REPLACE", "UPPER", "TRIM", "SUBSTR", "NOW", "DAY", "YEAR", "WEEK", "REGEXP_EXTRACT_ALL", "REGEXP_EXTRACT", "REGEXP_LIKE", "REGEXP_REPLACE", "REGEXP_SPLIT", "URL_EXTRACT_PATH", "URL_EXTRACT_HOST", "URL_EXTRACT_PARAMETER", "URL_EXTRACT_QUERY", "MAP", "REDUCE", "FILTER", "TRANSFORM", "ZIP_WITH", "INDEX" ], "datatypes": [ "TINYINT", "SMALLINT", "INT", "BIGINT", "BINARY", "BOOLEAN", "DOUBLE", "FLOAT", "STRING", "TIMESTAMP", "DECIMAL", "DATE", "CHAR", "VARCHAR", "ARRAY", "MAP", "STRUCT" ] } ================================================ FILE: athenacli/packages/literals/main.py ================================================ import os import json ROOT = os.path.dirname(__file__) LITERAL_FILE = os.path.join(ROOT, 'literals.json') with open(LITERAL_FILE) as f: LITERALS = json.load(f) def get_literals(literal_type, type_=tuple): # Where `literal_type` is one of 'keywords', 'functions', 'datatypes', # returns a tuple of literal values of that type. return type_(LITERALS[literal_type]) ================================================ FILE: athenacli/packages/parseutils.py ================================================ import re import sqlparse from sqlparse.sql import IdentifierList, Identifier, Function from sqlparse.tokens import Keyword, DML, Punctuation cleanup_regex = { # This matches only alphanumerics and underscores. 'alphanum_underscore': re.compile(r'(\w+)$'), # This matches everything except spaces, parens, colon, and comma 'many_punctuations': re.compile(r'([^():,\s]+)$'), # This matches everything except spaces, parens, colon, comma, and period 'most_punctuations': re.compile(r'([^\.():,\s]+)$'), # This matches everything except a space. 'all_punctuations': re.compile(r'([^\s]+)$'), } def last_word(text, include='alphanum_underscore'): r""" Find the last word in a sentence. >>> last_word('abc') 'abc' >>> last_word(' abc') 'abc' >>> last_word('') '' >>> last_word(' ') '' >>> last_word('abc ') '' >>> last_word('abc def') 'def' >>> last_word('abc def ') '' >>> last_word('abc def;') '' >>> last_word('bac $def') 'def' >>> last_word('bac $def', include='most_punctuations') '$def' >>> last_word('bac \def', include='most_punctuations') '\\\\def' >>> last_word('bac \def;', include='most_punctuations') '\\\\def;' >>> last_word('bac::def', include='most_punctuations') 'def' """ if not text: # Empty string return '' if text[-1].isspace(): return '' else: regex = cleanup_regex[include] matches = regex.search(text) if matches: return matches.group(0) else: return '' # This code is borrowed from sqlparse example script. # def is_subselect(parsed): if not parsed.is_group: return False for item in parsed.tokens: if item.ttype is DML and item.value.upper() in ('SELECT', 'INSERT', 'UPDATE', 'CREATE', 'DELETE'): return True return False def extract_from_part(parsed, stop_at_punctuation=True): tbl_prefix_seen = False for item in parsed.tokens: if tbl_prefix_seen: if is_subselect(item): for x in extract_from_part(item, stop_at_punctuation): yield x elif stop_at_punctuation and item.ttype is Punctuation: return # An incomplete nested select won't be recognized correctly as a # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes # the second FROM to trigger this elif condition resulting in a # StopIteration. So we need to ignore the keyword if the keyword # FROM. # Also 'SELECT * FROM abc JOIN def' will trigger this elif # condition. So we need to ignore the keyword JOIN and its variants # INNER JOIN, FULL OUTER JOIN, etc. elif item.ttype is Keyword and ( not item.value.upper() == 'FROM') and ( not item.value.upper().endswith('JOIN')): return else: yield item elif ((item.ttype is Keyword or item.ttype is Keyword.DML) and item.value.upper() in ('COPY', 'FROM', 'INTO', 'UPDATE', 'TABLE', 'JOIN',)): tbl_prefix_seen = True # 'SELECT a, FROM abc' will detect FROM as part of the column list. # So this check here is necessary. elif isinstance(item, IdentifierList): for identifier in item.get_identifiers(): if (identifier.ttype is Keyword and identifier.value.upper() == 'FROM'): tbl_prefix_seen = True break def extract_table_identifiers(token_stream): """yields tuples of (schema_name, table_name, table_alias)""" for item in token_stream: if isinstance(item, IdentifierList): for identifier in item.get_identifiers(): # Sometimes Keywords (such as FROM ) are classified as # identifiers which don't have the get_real_name() method. try: schema_name = identifier.get_parent_name() real_name = identifier.get_real_name() except AttributeError: continue if real_name: yield (schema_name, real_name, identifier.get_alias()) elif isinstance(item, Identifier): real_name = item.get_real_name() schema_name = item.get_parent_name() if real_name: yield (schema_name, real_name, item.get_alias()) else: name = item.get_name() yield (None, name, item.get_alias() or name) elif isinstance(item, Function): yield (None, item.get_name(), item.get_name()) # extract_tables is inspired from examples in the sqlparse lib. def extract_tables(sql): """Extract the table names from an SQL statment. Returns a list of (schema, table, alias) tuples """ parsed = sqlparse.parse(sql) if not parsed: return [] # INSERT statements must stop looking for tables at the sign of first # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2) # abc is the table name, but if we don't stop at the first lparen, then # we'll identify abc, col1 and col2 as table names. insert_stmt = parsed[0].token_first().value.lower() == 'insert' stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt) return list(extract_table_identifiers(stream)) def find_prev_keyword(sql): """ Find the last sql keyword in an SQL statement Returns the value of the last keyword, and the text of the query with everything after the last keyword stripped """ if not sql.strip(): return None, '' parsed = sqlparse.parse(sql)[0] flattened = list(parsed.flatten()) logical_operators = ('AND', 'OR', 'NOT', 'BETWEEN') for t in reversed(flattened): if t.value == '(' or (t.is_keyword and ( t.value.upper() not in logical_operators)): # Find the location of token t in the original parsed statement # We can't use parsed.token_index(t) because t may be a child token # inside a TokenList, in which case token_index thows an error # Minimal example: # p = sqlparse.parse('select * from foo where bar') # t = list(p.flatten())[-3] # The "Where" token # p.token_index(t) # Throws ValueError: not in list idx = flattened.index(t) # Combine the string values of all tokens in the original list # up to and including the target keyword token t, to produce a # query string with everything after the keyword token removed text = ''.join(tok.value for tok in flattened[:idx+1]) return t, text return None, '' def query_starts_with(query, prefixes): """Check if the query starts with any item from *prefixes*.""" prefixes = [prefix.lower() for prefix in prefixes] formatted_sql = sqlparse.format(query.lower(), strip_comments=True) return bool(formatted_sql) and formatted_sql.split()[0] in prefixes def queries_start_with(queries, prefixes): """Check if any queries start with any item from *prefixes*.""" for query in sqlparse.split(queries): if query and query_starts_with(query, prefixes) is True: return True return False def is_destructive(queries): """Returns if any of the queries in *queries* is destructive.""" keywords = ('drop', 'shutdown', 'delete', 'truncate') return queries_start_with(queries, keywords) if __name__ == '__main__': sql = 'select * from (select t. from tabl t' print (extract_tables(sql)) ================================================ FILE: athenacli/packages/prompt_utils.py ================================================ # -*- coding: utf-8 -*- import sys import click from .parseutils import is_destructive def confirm_destructive_query(queries): """Check if the query is destructive and prompts the user to confirm. Returns: * None if the query is non-destructive or we can't prompt the user. * True if the query is destructive and the user wants to proceed. * False if the query is destructive and the user doesn't want to proceed. """ prompt_text = ("You're about to run a destructive command.\n" "Do you want to proceed? (y/n)") if is_destructive(queries) and sys.stdin.isatty(): return prompt(prompt_text, type=bool) def confirm(*args, **kwargs): """Prompt for confirmation (yes/no) and handle any abort exceptions.""" try: return click.confirm(*args, **kwargs) except click.Abort: return False def prompt(*args, **kwargs): """Prompt the user for input and handle any abort exceptions.""" try: return click.prompt(*args, **kwargs) except click.Abort: return False ================================================ FILE: athenacli/packages/special/__init__.py ================================================ __all__ = [] def export(defn): """Decorator to explicitly mark functions that are exposed in a lib.""" globals()[defn.__name__] = defn __all__.append(defn.__name__) return defn from . import dbcommands from . import iocommands ================================================ FILE: athenacli/packages/special/dbcommands.py ================================================ import logging import os import platform from pyathena import ProgrammingError from athenacli import __version__ from athenacli.packages.special import iocommands from athenacli.packages.special.utils import format_uptime from .main import special_command, RAW_QUERY, PARSED_QUERY log = logging.getLogger(__name__) @special_command('\\dt', '\\dt [table]', 'List or describe tables.', arg_type=PARSED_QUERY, case_sensitive=True) def list_tables(cur, arg=None, arg_type=PARSED_QUERY, verbose=False): if arg: query = 'SHOW COLUMNS FROM {0}'.format(arg) else: query = 'SHOW TABLES' log.debug(query) cur.execute(query) tables = cur.fetchall() status = '' if cur.description: headers = [x[0] for x in cur.description] else: return [(None, None, None, '')] return [(None, tables, headers, status)] @special_command('\\l', '\\l', 'List databases.', arg_type=RAW_QUERY, case_sensitive=True) def list_databases(cur, **_): query = 'SHOW DATABASES' log.debug(query) cur.execute(query) if cur.description: headers = [x[0] for x in cur.description] return [(None, cur.fetchall(), headers, '')] else: return [(None, None, None, '')] ================================================ FILE: athenacli/packages/special/favoritequeries.py ================================================ # -*- coding: utf-8 -*- class FavoriteQueries(object): section_name = 'favorite_queries' usage = ''' Favorite Queries are a way to save frequently used queries with a short name. Examples: # Save a new favorite query. > \\fs simple select * from abc where a is not Null; # List all favorite queries. > \\f ╒════════╤═══════════════════════════════════════╕ │ Name │ Query │ ╞════════╪═══════════════════════════════════════╡ │ simple │ SELECT * FROM abc where a is not NULL │ ╘════════╧═══════════════════════════════════════╛ # Run a favorite query. > \\f simple ╒════════╤════════╕ │ a │ b │ ╞════════╪════════╡ │ 日本語 │ 日本語 │ ╘════════╧════════╛ # Delete a favorite query. > \\fd simple simple: Deleted ''' def __init__(self, config): self.config = config def list(self): return self.config.get(self.section_name, []) def get(self, name): return self.config.get(self.section_name, {}).get(name, None) def save(self, name, query): if self.section_name not in self.config: self.config[self.section_name] = {} self.config[self.section_name][name] = query self.config.write() def delete(self, name): try: del self.config[self.section_name][name] except KeyError: return '%s: Not Found.' % name self.config.write() return '%s: Deleted' % name from athenacli.config import read_config_file favoritequeries = FavoriteQueries(read_config_file('~/.athenacli/athenaclirc')) ================================================ FILE: athenacli/packages/special/iocommands.py ================================================ import os import re import locale import logging import subprocess import shlex from io import open from time import sleep import click import sqlparse from athenacli.packages.prompt_utils import confirm_destructive_query from athenacli.packages.special.favoritequeries import favoritequeries from . import export from .main import special_command, NO_QUERY, PARSED_QUERY from .utils import handle_cd_command OUTPUT_LOCATION = None TIMING_ENABLED = False use_expanded_output = False PAGER_ENABLED = True tee_file = None once_file = written_to_once_file = None @export def set_output_location(val): global OUTPUT_LOCATION OUTPUT_LOCATION = val @export def set_timing_enabled(val): global TIMING_ENABLED TIMING_ENABLED = val @export def set_pager_enabled(val): global PAGER_ENABLED PAGER_ENABLED = val @export def is_pager_enabled(): return PAGER_ENABLED @export @special_command('pager', '\\P [command]', 'Set PAGER. Print the query results via PAGER.', arg_type=PARSED_QUERY, aliases=('\\P', ), case_sensitive=True) def set_pager(arg, **_): if arg: os.environ['PAGER'] = arg msg = 'PAGER set to %s.' % arg set_pager_enabled(True) else: if 'PAGER' in os.environ: msg = 'PAGER set to %s.' % os.environ['PAGER'] else: # This uses click's default per echo_via_pager. msg = 'Pager enabled.' set_pager_enabled(True) return [(None, None, None, msg)] @export @special_command('nopager', '\\n', 'Disable pager, print to stdout.', arg_type=NO_QUERY, aliases=('\\n', ), case_sensitive=True) def disable_pager(): set_pager_enabled(False) return [(None, None, None, 'Pager disabled.')] @special_command('\\timing', '\\t', 'Toggle timing of commands.', arg_type=NO_QUERY, aliases=('\\t', ), case_sensitive=True) def toggle_timing(): global TIMING_ENABLED TIMING_ENABLED = not TIMING_ENABLED message = "Timing is " message += "on." if TIMING_ENABLED else "off." return [(None, None, None, message)] @export def is_timing_enabled(): return TIMING_ENABLED @export def set_expanded_output(val): global use_expanded_output use_expanded_output = val @export def is_expanded_output(): return use_expanded_output _logger = logging.getLogger(__name__) @export def editor_command(command): """ Is this an external editor command? :param command: string """ # It is possible to have `\e filename` or `SELECT * FROM \e`. So we check # for both conditions. return command.strip().endswith('\\e') or command.strip().startswith('\\e') @export def get_filename(sql): if sql.strip().startswith('\\e'): command, _, filename = sql.partition(' ') return filename.strip() or None @export def get_editor_query(sql): """Get the query part of an editor command.""" sql = sql.strip() # The reason we can't simply do .strip('\e') is that it strips characters, # not a substring. So it'll strip "e" in the end of the sql also! # Ex: "select * from style\e" -> "select * from styl". pattern = re.compile(r'(^\e|\e$)') while pattern.search(sql): sql = pattern.sub('', sql) return sql @export def open_external_editor(filename=None, sql=None): """Open external editor, wait for the user to type in their query, return the query. :return: list with one tuple, query as first element. """ message = None filename = filename.strip().split(' ', 1)[0] if filename else None sql = sql or '' MARKER = '# Type your query above this line.\n' # Populate the editor buffer with the partial sql (if available) and a # placeholder comment. query = click.edit(u'{sql}\n\n{marker}'.format(sql=sql, marker=MARKER), filename=filename, extension='.sql') if filename: try: with open(filename, encoding='utf-8') as f: query = f.read() except IOError: message = 'Error reading file: %s.' % filename if query is not None: query = query.split(MARKER, 1)[0].rstrip('\n') else: # Don't return None for the caller to deal with. # Empty string is ok. query = sql return (query, message) @special_command('\\f', '\\f [name [args..]]', 'List or execute favorite queries.', arg_type=PARSED_QUERY, case_sensitive=True) def execute_favorite_query(cur, arg, **_): """Returns (title, rows, headers, status)""" if arg == '': for result in list_favorite_queries(): yield result """Parse out favorite name and optional substitution parameters""" name, _, arg_str = arg.partition(' ') args = shlex.split(arg_str) query = favoritequeries.get(name) if query is None: message = "No favorite query: %s" % (name) yield (None, None, None, message) else: query, arg_error = subst_favorite_query_args(query, args) if arg_error: yield (None, None, None, arg_error) else: for sql in sqlparse.split(query): _logger.debug("query is [%s]", sql) sql = sql.rstrip(';') title = '> %s' % (sql) cur.execute(sql) if cur.description: headers = [x[0] for x in cur.description] yield (title, cur.fetchall(), headers, None) else: yield (title, None, None, None) def list_favorite_queries(): """List of all favorite queries. Returns (title, rows, headers, status)""" headers = ["Name", "Query"] rows = [(r, favoritequeries.get(r)) for r in favoritequeries.list()] if not rows: status = '\nNo favorite queries found.' + favoritequeries.usage else: status = '' return [('', rows, headers, status)] def subst_favorite_query_args(query, args): """replace positional parameters ($1...$N) in query.""" for idx, val in enumerate(args): subst_var = '$' + str(idx + 1) if subst_var not in query: return [None, 'query does not have substitution parameter ' + subst_var + ':\n ' + query] query = query.replace(subst_var, val) match = re.search(r'\$\d+', query) if match: return[None, 'missing substitution for ' + match.group(0) + ' in query:\n ' + query] return [query, None] @special_command('read', 'read [filename]', 'Read and execute query from a file.') def execute_file_query(cur, arg, **_): filename = arg if filename: try: with open(filename, encoding='utf-8') as f: query = f.read() for sql in sqlparse.split(query): if not sql: continue _logger.debug("query is [%s]", sql) sql = sql.rstrip(';') destructive_prompt = confirm_destructive_query(sql) if destructive_prompt is False: click.secho("Wise choice!") return elif destructive_prompt is True: click.secho("Your call!") title = '%s' % (sql) cur.execute(sql) if cur.description: headers = [x[0] for x in cur.description] yield (title, cur.fetchall(), headers, None) else: yield (title, None, None, None) except IOError: message = 'Error reading file: %s.' % filename yield (None, None, None, message) @special_command('\\fs', '\\fs name query', 'Save a favorite query.') def save_favorite_query(arg, **_): """Save a new favorite query. Returns (title, rows, headers, status)""" usage = 'Syntax: \\fs name query.\n\n' + favoritequeries.usage if not arg: return [(None, None, None, usage)] name, _, query = arg.partition(' ') # If either name or query is missing then print the usage and complain. if (not name) or (not query): return [(None, None, None, usage + 'Err: Both name and query are required.')] favoritequeries.save(name, query) return [(None, None, None, "Saved.")] @special_command('\\fd', '\\fd [name]', 'Delete a favorite query.') def delete_favorite_query(arg, **_): """Delete an existing favorite query. """ usage = 'Syntax: \\fd name.\n\n' + favoritequeries.usage if not arg: return [(None, None, None, usage)] status = favoritequeries.delete(arg) return [(None, None, None, status)] @special_command('system', 'system [command]', 'Execute a system shell commmand.') def execute_system_command(arg, **_): """Execute a system shell command.""" usage = "Syntax: system [command].\n" if not arg: return [(None, None, None, usage)] try: command = arg.strip() if command.startswith('cd'): ok, error_message = handle_cd_command(arg) if not ok: return [(None, None, None, error_message)] return [(None, None, None, '')] args = arg.split(' ') process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, error = process.communicate() response = output if not error else error # Python 3 returns bytes. This needs to be decoded to a string. if isinstance(response, bytes): encoding = locale.getpreferredencoding(False) response = response.decode(encoding) return [(None, None, None, response)] except OSError as e: return [(None, None, None, 'OSError: %s' % e.strerror)] def parseargfile(arg): if arg.startswith('-o '): mode = "w" filename = arg[3:] else: mode = 'a' filename = arg if not filename: raise TypeError('You must provide a filename.') return {'file': os.path.expanduser(filename), 'mode': mode} @special_command('tee', 'tee [-o] filename', 'Append all results to an output file (overwrite using -o).') def set_tee(arg, **_): global tee_file try: tee_file = open(**parseargfile(arg)) except (IOError, OSError) as e: raise OSError("Cannot write to file '{}': {}".format(e.filename, e.strerror)) return [(None, None, None, "")] @export def close_tee(): global tee_file if tee_file: tee_file.close() tee_file = None @special_command('notee', 'notee', 'Stop writing results to an output file.') def no_tee(arg, **_): close_tee() return [(None, None, None, "")] @export def write_tee(output): global tee_file if tee_file: click.echo(output, file=tee_file, nl=False) click.echo(u'\n', file=tee_file, nl=False) tee_file.flush() @special_command('\\once', '\\o [-o] filename', 'Append next result to an output file (overwrite using -o).', aliases=('\\o', )) def set_once(arg, **_): global once_file once_file = parseargfile(arg) return [(None, None, None, "")] @export def write_once(output): global once_file, written_to_once_file if output and once_file: try: f = open(**once_file) except (IOError, OSError) as e: once_file = None raise OSError("Cannot write to file '{}': {}".format( e.filename, e.strerror)) with f: click.echo(output, file=f, nl=False) click.echo(u"\n", file=f, nl=False) written_to_once_file = True @export def unset_once_if_written(): """Unset the once file, if it has been written to.""" global once_file if written_to_once_file: once_file = None @special_command( 'watch', 'watch [seconds] [-c] query', 'Executes the query every [seconds] seconds (by default 5).' ) def watch_query(arg, **kwargs): usage = """Syntax: watch [seconds] [-c] query. * seconds: The interval at the query will be repeated, in seconds. By default 5. * -c: Clears the screen between every iteration. """ if not arg: yield (None, None, None, usage) return seconds = 5 clear_screen = False statement = None while statement is None: arg = arg.strip() if not arg: # Oops, we parsed all the arguments without finding a statement yield (None, None, None, usage) return (current_arg, _, arg) = arg.partition(' ') try: seconds = float(current_arg) continue except ValueError: pass if current_arg == '-c': clear_screen = True continue statement = '{0!s} {1!s}'.format(current_arg, arg) destructive_prompt = confirm_destructive_query(statement) if destructive_prompt is False: click.secho("Wise choice!") return elif destructive_prompt is True: click.secho("Your call!") cur = kwargs['cur'] sql_list = [ (sql.rstrip(';'), "> {0!s}".format(sql)) for sql in sqlparse.split(statement) ] old_pager_enabled = is_pager_enabled() while True: if clear_screen: click.clear() try: # Somewhere in the code the pager its activated after every yield, # so we disable it in every iteration set_pager_enabled(False) for (sql, title) in sql_list: cur.execute(sql) if cur.description: headers = [x[0] for x in cur.description] yield (title, cur.fetchall(), headers, None) else: yield (title, None, None, None) sleep(seconds) except KeyboardInterrupt: # This prints the Ctrl-C character in its own line, which prevents # to print a line with the cursor positioned behind the prompt click.secho("", nl=True) return finally: set_pager_enabled(old_pager_enabled) @special_command('download', 'download', 'Download results from last query.', arg_type=NO_QUERY) def download(): if OUTPUT_LOCATION is None: return [(None, None, None, "No OUTPUT_LOCATION from last query")] else: aws_s3_command = f"aws s3 cp {OUTPUT_LOCATION} /tmp/" click.echo(f"Running: {aws_s3_command}") return execute_system_command(aws_s3_command) ================================================ FILE: athenacli/packages/special/main.py ================================================ import logging from collections import namedtuple from . import export log = logging.getLogger(__name__) NO_QUERY = 0 PARSED_QUERY = 1 RAW_QUERY = 2 SpecialCommand = namedtuple('SpecialCommand', ['handler', 'command', 'shortcut', 'description', 'arg_type', 'hidden', 'case_sensitive']) COMMANDS = {} @export class CommandNotFound(Exception): pass @export def parse_special_command(sql): command, _, arg = sql.partition(' ') verbose = '+' in command command = command.strip().replace('+', '') return (command, verbose, arg.strip()) @export def special_command(command, shortcut, description, arg_type=PARSED_QUERY, hidden=False, case_sensitive=False, aliases=()): def wrapper(wrapped): register_special_command(wrapped, command, shortcut, description, arg_type, hidden, case_sensitive, aliases) return wrapped return wrapper @export def register_special_command(handler, command, shortcut, description, arg_type=PARSED_QUERY, hidden=False, case_sensitive=False, aliases=()): cmd = command.lower() if not case_sensitive else command COMMANDS[cmd] = SpecialCommand(handler, command, shortcut, description, arg_type, hidden, case_sensitive) for alias in aliases: cmd = alias.lower() if not case_sensitive else alias COMMANDS[cmd] = SpecialCommand(handler, command, shortcut, description, arg_type, case_sensitive=case_sensitive, hidden=True) @export def execute(cur, sql): """Execute a special command and return the results. If the special command is not supported a KeyError will be raised. """ command, verbose, arg = parse_special_command(sql) if (command not in COMMANDS) and (command.lower() not in COMMANDS): raise CommandNotFound try: special_cmd = COMMANDS[command] except KeyError: special_cmd = COMMANDS[command.lower()] if special_cmd.case_sensitive: raise CommandNotFound('Command not found: %s' % command) # "help is a special case. if command == 'help' and arg: return show_keyword_help(cur=cur, arg=arg) if special_cmd.arg_type == NO_QUERY: return special_cmd.handler() elif special_cmd.arg_type == PARSED_QUERY: return special_cmd.handler(cur=cur, arg=arg, verbose=verbose) elif special_cmd.arg_type == RAW_QUERY: return special_cmd.handler(cur=cur, query=sql) @special_command('help', '\\?', 'Show this help.', arg_type=NO_QUERY, aliases=('\\?', '?')) def show_help(): # All the parameters are ignored. headers = ['Command', 'Shortcut', 'Description'] result = [] for _, value in sorted(COMMANDS.items()): if not value.hidden: result.append((value.command, value.shortcut, value.description)) return [(None, result, headers, None)] def show_keyword_help(cur, arg): """ Call the built-in "show ", to display help for an SQL keyword. :param cur: cursor :param arg: string :return: list """ keyword = arg.strip('"').strip("'") query = "help '{0}'".format(keyword) log.debug(query) cur.execute(query) if cur.description and cur.rowcount > 0: headers = [x[0] for x in cur.description] return [(None, cur.fetchall(), headers, '')] else: return [(None, None, None, 'No help found for {0}.'.format(keyword))] @special_command('exit', '\\q', 'Exit.', arg_type=NO_QUERY, aliases=('\\q', )) @special_command('quit', '\\q', 'Quit.', arg_type=NO_QUERY) def quit(*_args): raise EOFError @special_command('\\e', '\\e', 'Edit command with editor (uses $EDITOR).', arg_type=NO_QUERY, case_sensitive=True) @special_command('\\G', '\\G', 'Display current query results vertically.', arg_type=NO_QUERY, case_sensitive=True) def stub(): raise NotImplementedError ================================================ FILE: athenacli/packages/special/utils.py ================================================ import os import subprocess def handle_cd_command(arg): """Handles a `cd` shell command by calling python's os.chdir.""" CD_CMD = 'cd' tokens = arg.split(CD_CMD + ' ') directory = tokens[-1] if len(tokens) > 1 else None if not directory: return False, "No folder name was provided." try: os.chdir(directory) subprocess.call(['pwd']) return True, None except OSError as e: return False, e.strerror def format_uptime(uptime_in_seconds): """Format number of seconds into human-readable string. :param uptime_in_seconds: The server uptime in seconds. :returns: A human-readable string representing the uptime. >>> uptime = format_uptime('56892') >>> print(uptime) 15 hours 48 min 12 sec """ m, s = divmod(int(uptime_in_seconds), 60) h, m = divmod(m, 60) d, h = divmod(h, 24) uptime_values = [] for value, unit in ((d, 'days'), (h, 'hours'), (m, 'min'), (s, 'sec')): if value == 0 and not uptime_values: # Don't include a value/unit if the unit isn't applicable to # the uptime. E.g. don't do 0 days 0 hours 1 min 30 sec. continue elif value == 1 and unit.endswith('s'): # Remove the "s" if the unit is singular. unit = unit[:-1] uptime_values.append('{0} {1}'.format(value, unit)) uptime = ' '.join(uptime_values) return uptime ================================================ FILE: athenacli/packages/tabular_output/__init__.py ================================================ ================================================ FILE: athenacli/packages/tabular_output/sql_format.py ================================================ # -*- coding: utf-8 -*- """Format adapter for sql.""" from cli_helpers.utils import filter_dict_by_key from athenacli.packages.parseutils import extract_tables supported_formats = ('sql-insert', 'sql-update', 'sql-update-1', 'sql-update-2', ) preprocessors = () def adapter(data, headers, table_format=None, **kwargs): tables = extract_tables(formatter.query) if len(tables) > 0: table = tables[0] if table[0]: table_name = "{}.{}".format(*table[:2]) else: table_name = table[1] else: table_name = "`DUAL`" escape = formatter.mycli.sqlexecute.conn.escape if table_format == 'sql-insert': h = "`, `".join(headers) yield "INSERT INTO {} (`{}`) VALUES".format(table_name, h) prefix = " " for d in data: values = ", ".join(escape(v) for i, v in enumerate(d)) yield "{}({})".format(prefix, values) if prefix == " ": prefix = ", " yield ";" if table_format.startswith('sql-update'): s = table_format.split('-') keys = 1 if len(s) > 2: keys = int(s[-1]) for d in data: yield "UPDATE {} SET".format(table_name) prefix = " " for i, v in enumerate(d[keys:], keys): yield "{}`{}` = {}".format(prefix, headers[i], escape(v)) if prefix == " ": prefix = ", " f = "`{}` = {}" where = (f.format(headers[i], escape(d[i])) for i in range(keys)) yield "WHERE {};".format(" AND ".join(where)) def register_new_formatter(TabularOutputFormatter): global formatter formatter = TabularOutputFormatter for sql_format in supported_formats: TabularOutputFormatter.register_new_formatter( sql_format, adapter, preprocessors, {'table_format': sql_format}) ================================================ FILE: athenacli/sqlexecute.py ================================================ # encoding: utf-8 import logging import sqlparse import pyathena from athenacli.packages import special from athenacli.packages.format_utils import format_status logger = logging.getLogger(__name__) class SQLExecute(object): DATABASES_QUERY = 'SHOW DATABASES' TABLES_QUERY = 'SHOW TABLES' TABLE_COLUMNS_QUERY = ''' SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = '%s' ORDER BY table_name, ordinal_position ''' def __init__( self, aws_access_key_id, aws_secret_access_key, aws_session_token, region_name, s3_staging_dir, work_group, role_arn, database ): # Handle database parameter that may contain catalog.database format if database and '.' in database: catalog_name, database = database.split('.', 1) else: catalog_name = None self.aws_access_key_id = aws_access_key_id self.aws_secret_access_key = aws_secret_access_key self.aws_session_token = aws_session_token self.region_name = region_name self.s3_staging_dir = s3_staging_dir self.work_group = work_group self.role_arn = role_arn self.database = database self.catalog_name = catalog_name or 'AwsDataCatalog' self.connect() def connect(self, database=None): # Handle database parameter that may contain catalog.database format if database and '.' in database: catalog_name, database = database.split('.', 1) else: catalog_name = None conn = pyathena.connect( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, aws_session_token=self.aws_session_token, region_name=self.region_name, s3_staging_dir=self.s3_staging_dir, work_group=self.work_group, schema_name=database or self.database, role_arn=self.role_arn, poll_interval=0.2, # 200ms catalog_name=catalog_name or self.catalog_name ) self.database = database or self.database if hasattr(self, 'conn'): self.conn.close() self.conn = conn def run(self, statement): '''Execute the sql in the database and return the results. The results are a list of tuples. Each tuple has 4 values (title, rows, headers, status). ''' # Remove spaces and EOL statement = statement.strip() if not statement: # Empty string yield (None, None, None, None) # Split the sql into separate queries and run each one. components = sqlparse.split(statement) for sql in components: # Remove spaces, eol and semi-colons. sql = sql.rstrip(';') # \G is treated specially since we have to set the expanded output. if sql.endswith('\\G'): special.set_expanded_output(True) sql = sql[:-2].strip() cur = self.conn.cursor() try: for result in special.execute(cur, sql): yield result except special.CommandNotFound: # Regular SQL cur.execute(sql) yield self.get_result(cur) def get_result(self, cursor): '''Get the current result's data from the cursor.''' title = headers = None special.set_output_location(cursor.output_location) # cursor.description is not None for queries that return result sets, # e.g. SELECT or SHOW. if cursor.description is not None: headers = [x[0] for x in cursor.description] rows = cursor.fetchall() status = format_status(rows_length=len(rows), cursor=cursor) else: logger.debug('No rows in result.') rows = None status = format_status(rows_length=None, cursor=cursor) return (title, rows, headers, status) def tables(self): '''Yields table names.''' with self.conn.cursor() as cur: cur.execute(self.TABLES_QUERY) for row in cur: yield row def table_columns(self): '''Yields column names.''' with self.conn.cursor() as cur: cur.execute(self.TABLE_COLUMNS_QUERY % self.database) for row in cur: yield row def databases(self): with self.conn.cursor() as cur: cur.execute(self.DATABASES_QUERY) return [x[0] for x in cur.fetchall()] ================================================ FILE: athenacli/style.py ================================================ from pygments.token import Token from pygments.style import Style from pygments.styles.default import DefaultStyle class AthenaStyle(Style): styles = { Token.Menu.Completions.Completion.Current: 'bg:#00aaaa #000000', Token.Menu.Completions.Completion: 'bg:#008888 #ffffff', Token.Menu.Completions.ProgressButton: 'bg:#003333', Token.Menu.Completions.ProgressBar: 'bg:#00aaaa', Token.SelectedText: '#ffffff bg:#6666aa', Token.IncrementalSearchMatch: '#ffffff bg:#4444aa', Token.IncrementalSearchMatch.Current: '#ffffff bg:#44aa44', } styles.update(DefaultStyle.styles) ================================================ FILE: changelog.md ================================================ 1.7.0 (2026/04/23) ===== * Add support for AWS session token via --aws-session-token option and aws_session_token config field. * Allow catalog to be specified as part of the database argument. ([.]) * Fix warnings related to escape sequences when running with > Python 3.12 1.6.8 (2022/05/15) =================== * Remove unneeded `sqlparse.compat` import. (Thanks @branchvincent) 1.6.7 (2022/05/14) ================== * Update sqlparse dependency max version 1.6.6 (2022/05/09) ================== Bugfix: -------- * Use get_terminal_size from shutil instead of click. (Thanks @ByronLeeLee) 1.6.5 (2022/04/24) ================== * Update the default branch to 'main' 1.6.4 (2022/04/24) ================== * Upgrade prompt_toolkit (Thanks @zzl0) * Fix a DeprecationWarning in Python re (Thanks @zzl0) 1.6.3 (2022/01/11) ================== * Pinned pygments due to regression in 2.11.2 1.6.2 ========= Features: ---------- * Add `--table-format` to change format used in `-e` mode. (Thanks: @ptshrdn) 1.6.1 ========= Bugfix: ---------- * update cursor.execution_time_in_millis to cursor.engine_execution_time_in_millis as libary PyAthena removed execution_time_in_millis 1.6.0 ========= Features: ---------- * Add support for configuring Athena workgroup (Thanks: @warfox) 1.5.0 ========= Features: ---------- * Add homebrew installation support. ((Thanks: @chenrui333)) * Add a load command to load and execute a SQL file while in the REPL. (Thanks: @sco11morgan) 1.4.1 ========= Bugfix ---------- * Fix bug: athenaclirc not found if not in path. ((Thanks: @pdpark)) 1.4.0 ========= Features: ---------- * Add support for `role_arn` in athenaclirc file to allow connection to assume aws role. (Thanks: @pdpark) * Allow using an empty `--athenaclirc=` to not generate the default config file on first start (Thanks: @jankatins) * Allow starting with `--profile=` without having a corresponding entry in the `athenaclirc` config file (Thanks: @jankatins) * Add support for supplying the SQL query on stdin by using `-` (minus) as query string: `--execute=-`. (Thanks: @jankatins) 1.3.3 ======== Features ---------- * Add support for `arn_role` in athenaclirc file to allow connection to assume aws role. (Thanks: @pdpark) Internal: ---------- * deprecate python versions 2.7, 3.4, 3.5 (Thanks: @zzl0) 1.3.0 ======== Features ---------- * Show query execution statistics, such as the amount of data scanned and the approximate cost. (Thanks: @pgr0ss) 1.2.0 ======== Features ---------- * Add a download command to fetch query results to a local CSV. (Thanks: @pgr0ss) 1.1.3 ======== Features ---------- * Add auto-complete support for `JOIN` and related keywords. (Thanks: @getaaron) Bugfix ---------- * Fix bug when completing `ON parent.` clauses. (Thanks: @pgr0ss) 1.1.2 ======== Internal ----------- * Require prompt_toolkit>=2.0.6. (Thanks: @zzl0) 0.1.4 ======== Bugfix ---------- * `distinct` keyword cause an unexpected exception. (Thanks: @zzl0) 0.1.3 ======== Features ---------- * Add error message for missing configuration (Thanks: @jashgala) * Add colors and pager to config file (Thanks: @zzl0) Internal ---------- * Updated docs (Thanks: @jashgala) * Add support for pipenv (Thanks: @Hourann) * Set poll_interval of PyAthena to 0.2s, this will reduce the response time (Thanks: @zzl0) * Add developer guide (Thanks: @zzl0) 0.1.2 ======== Features ---------- * Support default credentials and configurations of aws cli (Thanks: [Zhaolong Zhu]) * Support multiple named profiles in addition to a default profile of AWS configurations (Thanks: [Zhaolong Zhu]) * Note: this feature changes the format of athenaclirc, it's incompatible with the old one. Internal ---------- * Add link of `python-prompt-toolkit` and fix some sentences (Thanks: [Joe Block]) 0.1.1 ======== First public release! ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/awsconfig.rst ================================================ AWS Configs =================== AthenaCLI tries to reuse the AWS credentials and configurations configured by `AWS CLI `_. Precedence --------------- The AthenaCLI looks for credentials and configuration settings in the following order: 1. **Command line options** – aws-access-key-id, aws-secret-access-key, aws-session-token, region, s3-staging-dir, work-group can be specified as command options to override default settings. 2. **AthenaCLI config file** – typically located at `~/.athenacli/athenaclirc` on Linux, macOS, or Unix. This file can contain multiple named profiles in addition to a default profile. Just adds `--profile [PROFILE_NAME]` at the end of athenacli command to use those configurations. 3. **Environment variables** – AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, AWS_DEFAULT_REGION, AWS_ATHENA_S3_STAGING_DIR, AWS_ATHENA_WORK_GROUP 4. **AWS credentials file** – located at `~/.aws/credentials` on Linux, macOS, or Unix. This file can contain multiple named profiles in addition to a default profile. Please refer to `AWS CLI` for more information. 5. **AWS CLI config file** – typically located at `~/.aws/config` on Linux, macOS, or Unix. This file can contain multiple named profiles in addition to a default profile. Please refer to `AWS CLI` for more information. * **Note:** *Whether or not a particular value will be used from a given option above depends on the truthyness of the values. e.g. if the `aws_access_key_id` field is present in the AthenaCLI config file, but its value is empty, it will not be considered (since the truthyness of an empty string is False) and the program will try to resolve to the next available option.* Available configs ------------------------------------ Some variables are not available in all the config files, below table lists the config files in which you can set a variable. +-----------------------+---------------------------+---------------------+ | **Variable** | **Environment Variable** | **Available files** | +-----------------------+---------------------------+---------------------+ | aws_access_key_id | AWS_ACCESS_KEY_ID | - AthenaCLI config | | | | - AWS credentials | | | | - AWS CLI config | +-----------------------+---------------------------+---------------------+ | aws_secret_access_key | AWS_SECRET_ACCESS_KEY | - AthenaCLI config | | | | - AWS credentials | | | | - AWS CLI config | +-----------------------+---------------------------+---------------------+ | aws_session_token | AWS_SESSION_TOKEN | - AthenaCLI config | +-----------------------+---------------------------+---------------------+ | region | AWS_DEFAULT_REGION | - AthenaCLI config | | | | - AWS CLI config | +-----------------------+---------------------------+---------------------+ | s3_staging_dir | AWS_ATHENA_S3_STAGING_DIR | - AthenaCLI config | +-----------------------+---------------------------+---------------------+ | work_group | AWS_ATHENA_WORK_GROUP | - AthenaCLI config | +-----------------------+---------------------------+---------------------+ ================================================ FILE: docs/conf.py ================================================ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # # import os # import sys # sys.path.insert(0, os.path.abspath('.')) # -- Project information ----------------------------------------------------- project = 'AthenaCLI' copyright = '2018, Zhaolong Zhu' author = 'Zhaolong Zhu' # The short X.Y version version = '' # The full version, including alpha/beta/rc tags release = '' # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'AthenaCLIdoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'AthenaCLI.tex', 'AthenaCLI Documentation', 'Zhaolong Zhu', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'athenacli', 'AthenaCLI Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'AthenaCLI', 'AthenaCLI Documentation', author, 'AthenaCLI', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] ================================================ FILE: docs/develop.rst ================================================ Development Guide =================== This is a guide for developers who would like to contribute to this project. Fork this project ------------------- Firstly, You need to fork this project and clone your fork into your computer. .. code-block:: bash $ git clone Local setup -------------- The installation instructions in the README file are intended for users of athenacli. If you're developing athenacli, you'll need to install it in a slightly different way so you can see the effects of your changes right away without having to go through the install cycle everytime you change the code. It is highly recommended to use virtualenv for development. If you don't know what a virtualenv is, `this guide `_ will help you get started. Create a virtualenv (let's call it athenacli-dev): .. code-block:: bash $ virtualenv athenacli-dev Activate it: .. code-block:: bash $ source ./athenacli-dev/bin/activate Once the virtualenv is activated, cd into the local clone of athenacli folder and install athenacli using pip as follows: .. code-block:: bash $ pip install -e . This will install the necessary dependencies as well as install athenacli from the working folder into a virtualenv. Athenacli is installed in an editable way, so any changes made to the code is immediately available in the installed version of athenacli. This makes it easy to change something in the code, launch athenacli and check the effects of your change. Running the tests ------------------ Currently we don't have enough tests for athenacli, because we haven't found an easy way to test AWS Athena locally, we have an `issue `_ track this problem. But we do have some unit tests for other parts, below are the steps to run them. First, install the requirements for testing: .. code-block:: bash $ pip install -r requirements-dev.txt After that, tests can be run with: .. code-block:: bash $ pytest Create a pull request ------------------------ After making the changes and creating the commits in your local machine. Then push those changes to your fork. Then click on the pull request icon on github and create a new pull request. Add a description about the change and send it along. I promise to review the pull request in a reasonable window of time and get back to you. In order to keep your fork up to date with any changes from mainline, add a new git remote to your local copy called 'upstream' and point it to the main athenacli repo. .. code-block:: bash $ git remote add upstream https://github.com/dbcli/athenacli.git Once the 'upstream' end point is added you can then periodically do a `git rebase `_ to update your local copy. ================================================ FILE: docs/faq.rst ================================================ FAQs ====== How can I get support for athenacli? --------------------------------------- There is `Gitter chat `_. We also track our bugs and feature requests in Github Issues for this project. I found a bug, what do I do? --------------------------------------- I'm sorry you encountered a bug. Please file a bug on Github Issues for this project. I'll fix it asap. I have a feature request, what do I do? ----------------------------------------- Sweet! Open a new item on Github Issues. Alternatively, you can take a stab at implementing the new feature yourself. If you'd like some guidance, I'm just an email away. Don't hesitate to contact me. ================================================ FILE: docs/features.rst ================================================ Features ============ Auto completion ----------------- Simple completions such as keywords and sql-functions. .. figure:: _static/screenshots/simple_auto_completion.png :align: center Smart completion ----------------- Smart completion will suggest context-sensitive completion. .. figure:: _static/screenshots/smart_auto_completion.png :align: center Alias support --------------- Column completions will work even when table names are aliased. .. figure:: _static/screenshots/alias.png :align: center Syntax highlighting --------------------- Syntax highlighting for sql. .. figure:: _static/screenshots/syntax_highlight.png :align: center Multiline queries -------------------- Support for multiline queries. .. figure:: _static/screenshots/multiline.png :align: center Pager ---------- Output of an sql command is automatically piped through less command. .. figure:: _static/screenshots/pager.png :align: center Favorite queries -------------------- Save a query using \fs alias query and execute it with \f alias whenever you need. .. figure:: _static/screenshots/favorite_query.png :align: center Various table format ---------------------- Support various table format, e.g. ascii, csv, html etc. .. figure:: _static/screenshots/table_format.png :align: center ================================================ FILE: docs/index.rst ================================================ AthenaCLI ========== AthenaCLI is a command line interface (CLI) for `Athena `_ service that can do auto-completion and syntax highlighting, and is a proud member of the dbcli community. .. figure:: _static/gif/athenacli.gif :align: center * Source: https://github.com/dbcli/athenacli Quick Start ============= Install ------------- .. code-block:: bash $ pip install athenacli You can refer to :doc:`../install` page for more options. Config ------------ A config file is automatically created at ~/.athenacli/athenaclirc at first launch (run `athenacli`). See the file itself for a description of all available options. Below 4 variables are required. .. code-block:: text # If you are a user of aws cli, you might want to use some configurations of aws cli, # please refer to https://athenacli.readthedocs.io/en/latest/awsconfig.html for more infomation. aws_access_key_id = '' aws_secret_access_key = '' region = '' # e.g us-west-2, us-east-1 # Amazon S3 staging directory where query results are stored. # NOTE: S3 should in the same region as specified above. # The format is 's3://' s3_staging_dir = '' # Name of athena workgroup that you want to use work_group = '' # e.g. primary Create a table --------------- .. code-block:: bash $ athenacli -e examples/create_table.sql You can find `examples/create_table.sql` `here `_. Run a query -------------- .. code-block:: bash $ athenacli -e 'select elb_name, request_ip from elb_logs LIMIT 10' REPL ------------- .. code-block:: bash $ athenacli [] Table of Contents ----------------- .. toctree:: features install awsconfig usage develop faq ================================================ FILE: docs/install.rst ================================================ Install ============ Pip ---------------- If you already know how to install python packages, then you can do: .. code-block:: bash $ pip install athenacli You might need sudo, or you can install it in a virtualenv. Docker --------- If you already know how to use docker, then you can do: .. code-block:: bash $ docker run --rm -ti -v $(pwd):/home/athena zzl0/athenacli athenacli Note: we map the home directory (`/home/athena`) of docker container to current directory, `athenacli` will create a config file in it (`.athenacli/athenaclirc`), you might need to change some variables (please refer to `quick start` section of :doc:`index` page). MacOS --------- For MacOS users, you can also use Homebrew to install it: .. code-block:: bash $ brew install athenacli ================================================ FILE: docs/usage.rst ================================================ Usages ================ Options ------------- .. code-block:: bash $ athenacli --help Usage: athenacli [OPTIONS] [DATABASE] A Athena terminal client with auto-completion and syntax highlighting. Examples: - athenacli - athenacli my_database Options: -e, --execute TEXT Execute a command (or a file) and quit. -r, --region TEXT AWS region. --aws-access-key-id TEXT AWS access key id. --aws-secret-access-key TEXT AWS secretaccess key. --aws-session-token TEXT AWS session token. --s3-staging-dir TEXT Amazon S3 staging directory where query results are stored. --work_group TEXT Amazon Athena workgroup in which query is run, default is primary --athenaclirc FILE Location of athenaclirc file. --profile TEXT AWS profile --table-format TEXT Table format used with -e option. --help Show this message and exit. Connect to a database ------------------------ Connect a specific database with AWS credentials, region name and S3 staging directory or work group. AWS credentials, region name and S3 staging directory are optional. You can set those variables in `athenaclirc` config file, and then run below command. .. code-block:: bash $ athenacli ddbtablestats Exit athenacli ------------------ Press `ctrl+d` or type `quit` or `exit`. Special Commands -------------------- Save 'SELECT user_id, tweet_id from twitterfeed LIMIT 2' as a favorite query called 'q1': .. code-block:: bash > \fs q1 SELECT user_id, tweet_id from twitterfeed LIMIT 2 Run the named query: .. code-block:: bash > \f q1 Execute a command (or a file) --------------------------------- Execute a command and quit: .. code-block:: bash $ athenacli -e 'show databases' Execute a file and quit: .. code-block:: bash $ athenacli -e examples/create_table.sql ================================================ FILE: examples/create_table.sql ================================================ CREATE EXTERNAL TABLE `elb_logs`( `request_timestamp` string COMMENT '', `elb_name` string COMMENT '', `request_ip` string COMMENT '', `request_port` int COMMENT '', `backend_ip` string COMMENT '', `backend_port` int COMMENT '', `request_processing_time` double COMMENT '', `backend_processing_time` double COMMENT '', `client_response_time` double COMMENT '', `elb_response_code` string COMMENT '', `backend_response_code` string COMMENT '', `received_bytes` bigint COMMENT '', `sent_bytes` bigint COMMENT '', `request_verb` string COMMENT '', `url` string COMMENT '', `protocol` string COMMENT '', `user_agent` string COMMENT '', `ssl_cipher` string COMMENT '', `ssl_protocol` string COMMENT '') ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( 'input.regex'='([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*):([0-9]*) ([.0-9]*) ([.0-9]*) ([.0-9]*) (-|[0-9]*) (-|[0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" (\"[^\"]*\") ([A-Z0-9-]+) ([A-Za-z0-9.-]*)$') STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 's3://athena-examples-us-west-2/elb/plaintext' TBLPROPERTIES ( 'transient_lastDdlTime'='1528512629') ================================================ FILE: release.py ================================================ #!/usr/bin/env python """A script to publish a release of athenacli to PyPI.""" from optparse import OptionParser import re import subprocess import sys import click DEBUG = False CONFIRM_STEPS = False DRY_RUN = False def skip_step(): """ Asks for user's response whether to run a step. Default is yes. :return: boolean """ global CONFIRM_STEPS if CONFIRM_STEPS: return not click.confirm('--- Run this step?', default=True) return False def run_step(*args, prompt=None): """ Prints out the command and asks if it should be run. If yes (default), runs it. :param args: list of strings (command and args) """ global DRY_RUN cmd = args print(' '.join(cmd)) if skip_step(): print('--- Skipping...') elif DRY_RUN: print('--- Pretending to run...') else: if prompt: print(prompt) subprocess.check_output(cmd) def version(version_file): _version_re = re.compile( r'__version__\s+=\s+(?P[\'"])(?P.*)(?P=quote)') with open(version_file) as f: ver = _version_re.search(f.read()).group('version') return ver def commit_for_release(version_file, ver): run_step('git', 'add', version_file) run_step('git', 'commit', '--message', 'Releasing version {}'.format(ver)) def create_git_tag(tag_name): run_step('git', 'tag', tag_name) def clear_distribution_files(): run_step('rm', '-r', 'dist') def create_distribution_files(): run_step('python', 'setup.py', 'sdist', 'bdist_wheel') def upload_distribution_files(): run_step('twine', 'upload', 'dist/*', prompt="Enter your username:") def push_to_github(): run_step('git', 'push', 'origin', 'main') def push_tags_to_github(): run_step('git', 'push', '--tags', 'origin') def checklist(questions): for question in questions: if not click.confirm('--- {}'.format(question), default=False): sys.exit(1) if __name__ == '__main__': if DEBUG: subprocess.check_output = lambda x: x parser = OptionParser() parser.add_option( "-c", "--confirm-steps", action="store_true", dest="confirm_steps", default=False, help=("Confirm every step. If the step is not " "confirmed, it will be skipped.") ) parser.add_option( "-d", "--dry-run", action="store_true", dest="dry_run", default=False, help="Print out, but not actually run any steps." ) popts, pargs = parser.parse_args() CONFIRM_STEPS = popts.confirm_steps DRY_RUN = popts.dry_run checks = [ 'Have you updated the AUTHORS file?', 'Have you updated the `Usage` section of the README?', ] checklist(checks) ver = version('athenacli/__init__.py') print('Releasing Version:', ver) if not click.confirm('Are you sure?', default=False): sys.exit(1) commit_for_release('athenacli/__init__.py', ver) create_git_tag('v{}'.format(ver)) clear_distribution_files() create_distribution_files() push_to_github() push_tags_to_github() upload_distribution_files() ================================================ FILE: requirements-dev.txt ================================================ mock >= 1.0.1 pytest >= 4.6.5 tox >= 2.7.0 twine >= 1.11.0 click >= 4.1 sphinx wheel ================================================ FILE: setup.cfg ================================================ [bdist_wheel] universal = 1 ================================================ FILE: setup.py ================================================ #!/usr/bin/env python import re import ast from setuptools import setup, find_packages _version_re = re.compile(r'__version__\s+=\s+(.*)') with open('athenacli/__init__.py') as f: version = ast.literal_eval( _version_re.search(f.read()).group(1) ) description = 'CLI for Athena Database. With auto-completion and syntax highlighting.' with open("README.md", "r") as fh: long_description = fh.read() install_requirements = [ 'click>=7.0', 'Pygments>=1.6,<=2.11.1', "prompt_toolkit>=3.0.3,<4.0.0", 'sqlparse>=0.3.0,<0.5.0', 'configobj>=5.0.5', 'cli_helpers[styles]>=1.1.0', 'botocore>=1.5.52', 'boto3>=1.4.4', 'PyAthena>=1.2.2', ] setup( name='athenacli', author='athenacli Core Team', author_email="athenacli@googlegroups.com", version=version, packages=find_packages(), package_data={ 'athenacli': [ 'athenaclirc', 'packages/literals/literals.json' ] }, description=description, long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/dbcli/athenacli", install_requires=install_requirements, entry_points={ 'console_scripts': ['athenacli = athenacli.main:cli'], }, classifiers=[ 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Operating System :: Unix', 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: SQL', 'Topic :: Database', 'Topic :: Database :: Front-Ends', 'Topic :: Software Development', 'Topic :: Software Development :: Libraries :: Python Modules', ], ) ================================================ FILE: test/test_completion_engine.py ================================================ import os import pytest from athenacli.packages.completion_engine import ( suggest_type, Column, Function, Alias, Keyword, Table, View ) def sorted_dicts(dicts): """input is a list of dicts.""" return sorted(tuple(x.items()) for x in dicts) def test_select_suggests_cols_with_visible_table_scope(): suggestions = suggest_type('SELECT FROM tabl', 'SELECT ') assert suggestions == ( Column(tables=[(None, 'tabl', None)], drop_unique=None), Function(schema=None, filter=None), Alias(aliases=['tabl']), Keyword(last_token='SELECT')) def test_select_suggests_cols_with_qualified_table_scope(): suggestions = suggest_type('SELECT FROM sch.tabl', 'SELECT ') assert suggestions == ( Column(tables=[('sch', 'tabl', None)], drop_unique=None), Function(schema=None, filter=None), Alias(aliases=['tabl']), Keyword(last_token='SELECT')) def test_join_suggests_cols_with_qualified_table_scope(): expression = 'SELECT * FROM tabl a JOIN tabl b on a.' suggestions = suggest_type(expression, expression) assert suggestions == ( Column(tables=((None, 'tabl', 'a'),), drop_unique=None), Table(schema='a'), View(schema='a'), Function(schema='a', filter=None)) @pytest.mark.parametrize('expression', [ 'SELECT * FROM tabl WHERE ', 'SELECT * FROM tabl WHERE (', 'SELECT * FROM tabl WHERE foo = ', 'SELECT * FROM tabl WHERE bar OR ', 'SELECT * FROM tabl WHERE foo = 1 AND ', 'SELECT * FROM tabl WHERE (bar > 10 AND ', 'SELECT * FROM tabl WHERE (bar AND (baz OR (qux AND (', 'SELECT * FROM tabl WHERE 10 < ', 'SELECT * FROM tabl WHERE foo BETWEEN ', 'SELECT * FROM tabl WHERE foo BETWEEN foo AND ', ]) def test_where_suggests_columns_functions(expression): suggestions = suggest_type(expression, expression) assert suggestions == ( Column(tables=[(None, 'tabl', None)], drop_unique=None), Function(schema=None, filter=None), Alias(aliases=['tabl']), Keyword(last_token='WHERE')) ================================================ FILE: test/test_completion_refresher.py ================================================ import time import pytest from mock import Mock, patch @pytest.fixture def refresher(): from athenacli.completion_refresher import CompletionRefresher return CompletionRefresher() def test_ctor(refresher): """Refresher object should contain a few handlers. :param refresher: :return: """ assert len(refresher.refreshers) > 0 actual_handlers = list(refresher.refreshers.keys()) expected_handlers = ['databases', 'schemata', 'tables', 'special_commands'] assert expected_handlers == actual_handlers def test_refresh_called_once(refresher): """ :param refresher: :return: """ callbacks = Mock() sqlexecute = Mock() with patch.object(refresher, '_bg_refresh') as bg_refresh: actual = refresher.refresh(sqlexecute, callbacks) time.sleep(1) # Wait for the thread to work. assert len(actual) == 1 assert len(actual[0]) == 4 assert actual[0][3] == 'Auto-completion refresh started in the background.' bg_refresh.assert_called_with(sqlexecute, callbacks, {}) def test_refresh_called_twice(refresher): """If refresh is called a second time, it should be restarted. :param refresher: :return: """ callbacks = Mock() sqlexecute = Mock() def dummy_bg_refresh(*args): time.sleep(3) # seconds refresher._bg_refresh = dummy_bg_refresh actual1 = refresher.refresh(sqlexecute, callbacks) time.sleep(1) # Wait for the thread to work. assert len(actual1) == 1 assert len(actual1[0]) == 4 assert actual1[0][3] == 'Auto-completion refresh started in the background.' actual2 = refresher.refresh(sqlexecute, callbacks) time.sleep(1) # Wait for the thread to work. assert len(actual2) == 1 assert len(actual2[0]) == 4 assert actual2[0][3] == 'Auto-completion refresh restarted.' def test_refresh_with_callbacks(refresher): """Callbacks must be called. :param refresher: """ callbacks = [Mock()] sqlexecute_class = Mock() sqlexecute = Mock() with patch('athenacli.completion_refresher.SQLExecute', sqlexecute_class): # Set refreshers to 0: we're not testing refresh logic here refresher.refreshers = {} refresher.refresh(sqlexecute, callbacks) time.sleep(1) # Wait for the thread to work. assert (callbacks[0].call_count == 1) ================================================ FILE: test/test_dbspecial.py ================================================ from athenacli.packages.special.utils import format_uptime from athenacli.packages.completion_engine import ( suggest_type, Database, View, Schema, Table ) def test_u_suggests_databases(): suggestions = suggest_type('\\u ', '\\u ') assert suggestions == (Database(),) def test_describe_table(): suggestions = suggest_type('\\dt', '\\dt ') assert suggestions == (Table(schema=None), View(schema=None), Schema()) def test_list_or_show_create_tables(): suggestions = suggest_type('\\dt+', '\\dt+ ') assert suggestions == (Table(schema=None), View(schema=None), Schema()) def test_format_uptime(): seconds = 59 assert '59 sec' == format_uptime(seconds) seconds = 120 assert '2 min 0 sec' == format_uptime(seconds) seconds = 54890 assert '15 hours 14 min 50 sec' == format_uptime(seconds) seconds = 598244 assert '6 days 22 hours 10 min 44 sec' == format_uptime(seconds) seconds = 522600 assert '6 days 1 hour 10 min 0 sec' == format_uptime(seconds) ================================================ FILE: test/test_format_utils.py ================================================ # -*- coding: utf-8 -*- from collections import namedtuple from athenacli.packages.format_utils import format_status, humanize_size def test_format_status_plural(): assert format_status(rows_length=1) == "1 row in set" assert format_status(rows_length=2) == "2 rows in set" def test_format_status_no_results(): assert format_status(rows_length=None) == "Query OK" def test_format_status_with_stats(): FakeCursor = namedtuple("FakeCursor", ["engine_execution_time_in_millis", "data_scanned_in_bytes"]) assert format_status(rows_length=1, cursor=FakeCursor(10, 12345678900)) == "1 row in set\nExecution time: 10 ms, Data scanned: 11.5 GB, Approximate cost: $0.06" assert format_status(rows_length=2, cursor=FakeCursor(1000, 1234)) == "2 rows in set\nExecution time: 1000 ms, Data scanned: 1.21 KB, Approximate cost: $0.00" def test_humanize_size(): assert humanize_size(20) == "20 B" assert humanize_size(2000) == "1.95 KB" assert humanize_size(200000) == "195.31 KB" assert humanize_size(20000000) == "19.07 MB" assert humanize_size(200000000000) == "186.26 GB" ================================================ FILE: test/test_naive_completion.py ================================================ import pytest from prompt_toolkit.completion import Completion from prompt_toolkit.document import Document @pytest.fixture def completer(): import athenacli.completer as sqlcompleter return sqlcompleter.AthenaCompleter(smart_completion=False) @pytest.fixture def complete_event(): from mock import Mock return Mock() def test_empty_string_completion(completer, complete_event): text = '' position = 0 result = completer.get_completions( Document(text=text, cursor_position=position), complete_event) assert result == list(map(Completion, sorted(completer.all_completions))) def test_select_keyword_completion(completer, complete_event): text = 'SEL' position = len('SEL') result = completer.get_completions( Document(text=text, cursor_position=position), complete_event) assert result == list([Completion(text='SELECT', start_position=-3)]) def test_function_name_completion(completer, complete_event): text = 'SELECT MA' position = len('SELECT MA') result = completer.get_completions( Document(text=text, cursor_position=position), complete_event) assert result == [ Completion(text='MAP', start_position=-2), Completion(text='MAX', start_position=-2)] def test_column_name_completion(completer, complete_event): text = 'SELECT FROM users' position = len('SELECT ') result = completer.get_completions( Document(text=text, cursor_position=position), complete_event) assert result == list(map(Completion, sorted(completer.all_completions))) def test_various_join_completions(completer, complete_event): for join_type in ['INNER', 'OUTER', 'CROSS', 'LEFT', 'RIGHT', 'FULL']: text = 'SELECT foo FROM bar ' + join_type + ' ' position = len(text) result = completer.get_completions( Document(text=text, cursor_position=position), complete_event, smart_completion=True) assert Completion(text='JOIN') in result def test_outer_join_completion(completer, complete_event): for join_type in ['LEFT', 'RIGHT', 'FULL']: text = 'SELECT foo FROM bar ' + join_type + ' ' position = len(text) result = completer.get_completions( Document(text=text, cursor_position=position), complete_event, smart_completion=True) assert Completion(text='OUTER JOIN') in result ================================================ FILE: test/test_parseutils.py ================================================ import pytest from athenacli.packages.parseutils import ( extract_tables, query_starts_with, queries_start_with, is_destructive ) def test_empty_string(): tables = extract_tables('') assert tables == [] def test_simple_select_single_table(): tables = extract_tables('select * from abc') assert tables == [(None, 'abc', None)] def test_simple_select_single_table_schema_qualified(): tables = extract_tables('select * from abc.def') assert tables == [('abc', 'def', None)] def test_simple_select_multiple_tables(): tables = extract_tables('select * from abc, def') assert sorted(tables) == [(None, 'abc', None), (None, 'def', None)] def test_simple_select_multiple_tables_schema_qualified(): tables = extract_tables('select * from abc.def, ghi.jkl') assert sorted(tables) == [('abc', 'def', None), ('ghi', 'jkl', None)] def test_simple_select_with_cols_single_table(): tables = extract_tables('select a,b from abc') assert tables == [(None, 'abc', None)] def test_simple_select_with_cols_single_table_schema_qualified(): tables = extract_tables('select a,b from abc.def') assert tables == [('abc', 'def', None)] def test_simple_select_with_cols_multiple_tables(): tables = extract_tables('select a,b from abc, def') assert sorted(tables) == [(None, 'abc', None), (None, 'def', None)] def test_simple_select_with_cols_multiple_tables_with_schema(): tables = extract_tables('select a,b from abc.def, def.ghi') assert sorted(tables) == [('abc', 'def', None), ('def', 'ghi', None)] def test_select_with_hanging_comma_single_table(): tables = extract_tables('select a, from abc') assert tables == [(None, 'abc', None)] def test_select_with_hanging_comma_multiple_tables(): tables = extract_tables('select a, from abc, def') assert sorted(tables) == [(None, 'abc', None), (None, 'def', None)] def test_select_with_hanging_period_multiple_tables(): tables = extract_tables('SELECT t1. FROM tabl1 t1, tabl2 t2') assert sorted(tables) == [(None, 'tabl1', 't1'), (None, 'tabl2', 't2')] def test_simple_insert_single_table(): tables = extract_tables('insert into abc (id, name) values (1, "def")') # sqlparse mistakenly assigns an alias to the table # assert tables == [(None, 'abc', None)] assert tables == [(None, 'abc', 'abc')] @pytest.mark.xfail def test_simple_insert_single_table_schema_qualified(): tables = extract_tables('insert into abc.def (id, name) values (1, "def")') assert tables == [('abc', 'def', None)] def test_simple_update_table(): tables = extract_tables('update abc set id = 1') assert tables == [(None, 'abc', None)] def test_simple_update_table_with_schema(): tables = extract_tables('update abc.def set id = 1') assert tables == [('abc', 'def', None)] def test_join_table(): tables = extract_tables('SELECT * FROM abc a JOIN def d ON a.id = d.num') assert sorted(tables) == [(None, 'abc', 'a'), (None, 'def', 'd')] def test_join_table_schema_qualified(): tables = extract_tables( 'SELECT * FROM abc.def x JOIN ghi.jkl y ON x.id = y.num') assert tables == [('abc', 'def', 'x'), ('ghi', 'jkl', 'y')] def test_join_as_table(): tables = extract_tables('SELECT * FROM my_table AS m WHERE m.a > 5') assert tables == [(None, 'my_table', 'm')] def test_query_starts_with(): query = 'USE test;' assert query_starts_with(query, ('use', )) is True query = 'DROP DATABASE test;' assert query_starts_with(query, ('use', )) is False def test_query_starts_with_comment(): query = '# comment\nUSE test;' assert query_starts_with(query, ('use', )) is True def test_queries_start_with(): sql = ( '# comment\n' 'show databases;' 'use foo;' ) assert queries_start_with(sql, ('show', 'select')) is True assert queries_start_with(sql, ('use', 'drop')) is True assert queries_start_with(sql, ('delete', 'update')) is False def test_is_destructive(): sql = ( 'use test;\n' 'show databases;\n' 'drop database foo;' ) assert is_destructive(sql) is True ================================================ FILE: test/test_prompt_utils.py ================================================ # -*- coding: utf-8 -*- import click from athenacli.packages.prompt_utils import confirm_destructive_query def test_confirm_destructive_query_notty(): stdin = click.get_text_stream('stdin') assert stdin.isatty() is False sql = 'drop database foo;' assert confirm_destructive_query(sql) is None ================================================ FILE: tox.ini ================================================ [tox] envlist = py36, py37, py38 [testenv] deps = pytest mock commands = pytest