Repository: llllllllll/codetransformer Branch: master Commit: c5f551e915df Files: 56 Total size: 341.3 KB Directory structure: gitextract_cgcyv2vr/ ├── .coveragerc ├── .gitattributes ├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── codetransformer/ │ ├── __init__.py │ ├── _version.py │ ├── code.py │ ├── core.py │ ├── decompiler/ │ │ ├── _343.py │ │ └── __init__.py │ ├── instructions.py │ ├── patterns.py │ ├── tests/ │ │ ├── __init__.py │ │ ├── test_code.py │ │ ├── test_core.py │ │ ├── test_decompiler.py │ │ └── test_instructions.py │ ├── transformers/ │ │ ├── __init__.py │ │ ├── add2mul.py │ │ ├── constants.py │ │ ├── interpolated_strings.py │ │ ├── literals.py │ │ ├── pattern_matched_exceptions.py │ │ ├── precomputed_slices.py │ │ └── tests/ │ │ ├── __init__.py │ │ ├── test_add2mul.py │ │ ├── test_constants.py │ │ ├── test_exc_patterns.py │ │ ├── test_interpolated_strings.py │ │ ├── test_literals.py │ │ └── test_precomputed_slices.py │ └── utils/ │ ├── __init__.py │ ├── functional.py │ ├── immutable.py │ ├── instance.py │ ├── no_default.py │ ├── pretty.py │ └── tests/ │ ├── __init__.py │ ├── test_immutable.py │ └── test_pretty.py ├── docs/ │ ├── .dir-locals.el │ ├── Makefile │ └── source/ │ ├── appendix.rst │ ├── code-objects.rst │ ├── conf.py │ ├── index.rst │ ├── magics.rst │ └── patterns.rst ├── requirements_doc.txt ├── setup.cfg ├── setup.py ├── tox.ini └── versioneer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .coveragerc ================================================ [run] omit = codetransformer/_version.py ================================================ FILE: .gitattributes ================================================ codetransformer/_version.py export-subst ================================================ FILE: .gitignore ================================================ .bundle db/*.sqlite3 log/*.log *.log tmp/**/* tmp/* *.swp *~ #mac autosaving file .DS_Store *.py[co] # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox test.log .noseids *.xlsx # Compiled python files *.py[co] # Packages *.egg *.egg-info dist build eggs cover parts bin var sdist develop-eggs .installed.cfg coverage.xml nosetests.xml # C Extensions *.o *.so *.out # Vim *.swp *.swo # Built documentation docs/_build/* # database of vbench benchmarks.db # Vagrant temp folder .vagrant # pypi MANIFEST # pytest .cache htmlcov ================================================ FILE: .travis.yml ================================================ language: python sudo: false python: - 3.4.3 - 3.4 - 3.5 - 3.6 install: - pip install -e .[dev] script: - py.test codetransformer - flake8 codetransformer notifications: email: false ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ================================================ FILE: MANIFEST.in ================================================ include versioneer.py include codetransformer/_version.py ================================================ FILE: README.rst ================================================ ``codetransformer`` =================== |build status| |documentation| Bytecode transformers for CPython inspired by the ``ast`` module's ``NodeTransformer``. What is ``codetransformer``? ---------------------------- ``codetransformer`` is a library that allows us to work with CPython's bytecode representation at runtime. ``codetransformer`` provides a level of abstraction between the programmer and the raw bytes read by the eval loop so that we can more easily inspect and modify bytecode. ``codetransformer`` is motivated by the need to override parts of the python language that are not already hooked into through data model methods. For example: * Override the ``is`` and ``not`` operators. * Custom data structure literals. * Syntax features that cannot be represented with valid python AST or source. * Run without a modified CPython interpreter. ``codetransformer`` was originally developed as part of lazy_ to implement the transformations needed to override the code objects at runtime. Example Uses ------------ Overloading Literals ~~~~~~~~~~~~~~~~~~~~ While this can be done as an AST transformation, we will often need to execute the constructor for the literal multiple times. Also, we need to be sure that any additional names required to run our code are provided when we run. With ``codetransformer``, we can pre compute our new literals and emit code that is as fast as loading our unmodified literals without requiring any additional names be available implicitly. In the following block we demonstrate overloading dictionary syntax to result in ``collections.OrderedDict`` objects. ``OrderedDict`` is like a ``dict``; however, the order of the keys is preserved. .. code-block:: python >>> from codetransformer.transformers.literals import ordereddict_literals >>> @ordereddict_literals ... def f(): ... return {'a': 1, 'b': 2, 'c': 3} >>> f() OrderedDict([('a', 1), ('b', 2), ('c', 3)]) This also supports dictionary comprehensions: .. code-block:: python >>> @ordereddict_literals ... def f(): ... return {k: v for k, v in zip('abc', (1, 2, 3))} >>> f() OrderedDict([('a', 1), ('b', 2), ('c', 3)]) The next block overrides ``float`` literals with ``decimal.Decimal`` objects. These objects support arbitrary precision arithmetic. .. code-block:: python >>> from codetransformer.transformers.literals import decimal_literals >>> @decimal_literals ... def f(): ... return 1.5 >>> f() Decimal('1.5') Pattern Matched Exceptions ~~~~~~~~~~~~~~~~~~~~~~~~~~ Pattern matched exceptions are a good example of a ``CodeTransformer`` that would be very complicated to implement at the AST level. This transformation extends the ``try/except`` syntax to accept instances of ``BaseException`` as well subclasses of ``BaseException``. When excepting an instance, the ``args`` of the exception will be compared for equality to determine which exception handler should be invoked. For example: .. code-block:: python >>> @pattern_matched_exceptions() ... def foo(): ... try: ... raise ValueError('bar') ... except ValueError('buzz'): ... return 'buzz' ... except ValueError('bar'): ... return 'bar' >>> foo() 'bar' This function raises an instance of ``ValueError`` and attempts to catch it. The first check looks for instances of ``ValueError`` that were constructed with an argument of ``'buzz'``. Because our custom exception is raised with ``'bar'``, these are not equal and we do not enter this handler. The next handler looks for ``ValueError('bar')`` which does match the exception we raised. We then enter this block and normal python rules take over. We may also pass their own exception matching function: .. code-block:: python >>> def match_greater(match_expr, exc_type, exc_value, exc_traceback): ... return math_expr > exc_value.args[0] >>> @pattern_matched_exceptions(match_greater) ... def foo(): ... try: ... raise ValueError(5) ... except 4: ... return 4 ... except 5: ... return 5 ... except 6: ... return 6 >>> foo() 6 This matches on when the match expression is greater in value than the first argument of any exception type that is raised. This particular behavior would be very hard to mimic through AST level transformations. Core Abstractions ----------------- The three core abstractions of ``codetransformer`` are: 1. The ``Instruction`` object which represents an opcode_ which may be paired with some argument. 2. The ``Code`` object which represents a collection of ``Instruction``\s. 3. The ``CodeTransformer`` object which represents a set of rules for manipulating ``Code`` objects. Instructions ~~~~~~~~~~~~ The ``Instruction`` object represents an atomic operation that can be performed by the CPython virtual machine. These are things like ``LOAD_NAME`` which loads a name onto the stack, or ``ROT_TWO`` which rotates the top two stack elements. Some instructions accept an argument, for example ``LOAD_NAME``, which modifies the behavior of the instruction. This is much like a function call where some functions accept arguments. Because the bytecode is always packed as raw bytes, the argument must be some integer (CPython stores all arguments two in bytes). This means that things that need a more rich argument system (like ``LOAD_NAME`` which needs the actual name to look up) must carry around the actual arguments in some table and use the integer as an offset into this array. One of the key abstractions of the ``Instruction`` object is that the argument is always some python object that represents the actual argument. Any lookup table management is handled for the user. This is helpful because some arguments share this table so we don't want to add extra entries or forget to add them at all. Another annoyance is that the instructions that handle control flow use their argument to say what bytecode offset to jump to. Some jumps use the absolute index, others use a relative index. This also makes it hard if you want to add or remove instructions because all of the offsets must be recomputed. In ``codetransformer``, the jump instructions all accept another ``Instruction`` as the argument so that the assembler can manage this for the user. We also provide an easy way for new instructions to "steal" jumps that targeted another instruction so that can manage altering the bytecode around jump targets. Code ~~~~ ``Code`` objects are a nice abstraction over python's ``types.CodeType``. Quoting the ``CodeType`` constructor docstring: :: code(argcount, kwonlyargcount, nlocals, stacksize, flags, codestring, constants, names, varnames, filename, name, firstlineno, lnotab[, freevars[, cellvars]]) Create a code object. Not for the faint of heart. The ``codetransformer`` abstraction is designed to make it easy to dynamically construct and inspect these objects. This allows us to easy set things like the argument names, and manipulate the line number mappings. The ``Code`` object provides methods for converting to and from Python's code representation: 1. ``from_pycode`` 2. ``to_pycode``. This allows us to take an existing function, parse the meaning from it, modify it, and then assemble this back into a new python code object. .. note:: ``Code`` objects are immutable. When we say "modify", we mean create a copy with different values. CodeTransformers ---------------- This is the set of rules that are used to actually modify the ``Code`` objects. These rules are defined as a set of ``patterns`` which are a DSL used to define a DFA for matching against sequences of ``Instruction`` objects. Once we have matched a segment, we yield new instructions to replace what we have matched. A simple codetransformer looks like: .. code-block:: python from codetransformer import CodeTransformer, instructions class FoldNames(CodeTransformer): @pattern( instructions.LOAD_GLOBAL, instructions.LOAD_GLOBAL, instructions.BINARY_ADD, ) def _load_fast(self, a, b, add): yield instructions.LOAD_FAST(a.arg + b.arg).steal(a) This ``CodeTransformer`` uses the ``+`` operator to implement something like ``CPP``\s token pasting for local variables. We read this pattern as a sequence of two ``LOAD_GLOBAL`` (global name lookups) followed by a ``BINARY_ADD`` instruction (``+`` operator call). This will then call the function with the three instructions passed positionally. This handler replaces this sequence with a single instruction that emits a ``LOAD_FAST`` (local name lookup) that is the result of adding the two names together. We then steal any jumps that used to target the first ``LOAD_GLOBAL``. We can execute this transformer by calling an instance of it on a function object, or using it like a decorator. For example: .. code-block:: python >>> @FoldNames() ... def f(): ... ab = 3 ... return a + b >>> f() 3 License ------- ``codetransformer`` is free software, licensed under the GNU General Public License, version 2. For more information see the ``LICENSE`` file. Source ------ Source code is hosted on github at https://github.com/llllllllll/codetransformer. .. _lazy: https://github.com/llllllllll/lazy_python .. _opcode: https://docs.python.org/3.5/library/dis.html#opcode-NOP .. |build status| image:: https://travis-ci.org/llllllllll/codetransformer.svg?branch=master :target: https://travis-ci.org/llllllllll/codetransformer .. |documentation| image:: https://readthedocs.org/projects/codetransformer/badge/?version=stable :target: http://codetransformer.readthedocs.io/en/stable/?badge=stable :alt: Documentation Status ================================================ FILE: codetransformer/__init__.py ================================================ from .code import Code, Flag from .core import CodeTransformer from . patterns import ( matchany, not_, option, or_, pattern, plus, seq, var, ) from . import instructions from . import transformers from .utils.pretty import a, d, display, pprint_ast, pformat_ast from ._version import get_versions __version__ = get_versions()['version'] del get_versions def load_ipython_extension(ipython): # pragma: no cover def dis_magic(line, cell=None): if cell is None: return d(line) return d(cell) ipython.register_magic_function(dis_magic, 'line_cell', 'dis') def ast_magic(line, cell=None): if cell is None: return a(line) return a(cell) ipython.register_magic_function(ast_magic, 'line_cell', 'ast') __all__ = [ 'a', 'd', 'display', 'Code', 'CodeTransformer', 'Flag', 'instructions', 'matchany', 'not_', 'option', 'or_', 'pattern', 'pattern', 'plus', 'pformat_ast', 'pprint_ast', 'seq', 'var', 'transformers', ] ================================================ FILE: codetransformer/_version.py ================================================ # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.15 (https://github.com/warner/python-versioneer) import errno import os import re import subprocess import sys def get_keywords(): # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "$Format:%d$" git_full = "$Format:%H$" keywords = {"refnames": git_refnames, "full": git_full} return keywords class VersioneerConfig: pass def get_config(): # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" cfg.parentdir_prefix = "codetransformer-" cfg.versionfile_source = "codetransformer/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): pass LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator def decorate(f): if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) return None return stdout def versions_from_parentdir(parentdir_prefix, root, verbose): # Source tarballs conventionally unpack into a directory that includes # both the project name and a version string. dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: print("guessing rootdir is '%s', but '%s' doesn't start with " "prefix '%s'" % (root, dirname, parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None} @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): if not keywords: raise NotThisMethod("no keywords at all, weird") refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs-tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags"} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # this runs 'git' from the root of the source tree. This only gets called # if the git-archive 'subst' keywords were *not* expanded, and # _version.py hasn't already been rewritten with a short version string, # meaning we're inside a checked out source tree. if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) raise NotThisMethod("no .git directory") GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # if there is a tag, this yields TAG-NUM-gHEX[-dirty] # if there are no tags, this yields HEX[-dirty] (no NUM) describe_out = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long"], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits return pieces def plus_or_dot(pieces): if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): # now build up version string, with post-release "local version # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty # exceptions: # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): # TAG[.post.devDISTANCE] . No -dirty # exceptions: # 1: no tags. 0.post.devDISTANCE if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that # .dev0 sorts backwards (a dirty tree will appear "older" than the # corresponding clean one), but you shouldn't be releasing software with # -dirty anyways. # exceptions: # 1: no tags. 0.postDISTANCE[.dev0] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. # exceptions: # 1: no tags. 0.postDISTANCE[.dev0] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty # --always' # exceptions: # 1: no tags. HEX[-dirty] (note: no 'g' prefix) if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty # --always -long'. The distance/hash is unconditional. # exceptions: # 1: no tags. HEX[-dirty] (note: no 'g' prefix) if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"]} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None} def get_versions(): # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree"} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version"} ================================================ FILE: codetransformer/code.py ================================================ from collections import OrderedDict from dis import Bytecode, dis, findlinestarts from enum import IntEnum, unique from functools import reduce from itertools import repeat import operator as op import sys from types import CodeType from .instructions import ( Instruction, LOAD_CONST, YIELD_FROM, YIELD_VALUE, _RawArg, ) from .utils.functional import scanl, reverse_dict, ffill from .utils.immutable import lazyval from .utils.instance import instance WORDCODE = sys.version_info >= (3, 6) if WORDCODE: argsize = 1 max_lnotab_increment = 127 def _sparse_args(instrs): for instr in instrs: yield instr yield None else: argsize = 2 max_lnotab_increment = 255 def _sparse_args(instrs): for instr in instrs: yield instr if instr.have_arg: yield None yield None _sparse_args.__doc__ = """\ Makes the arguments sparse so that instructions live at the correct index for the jump resolution step. This pads the instruction set with None to mark the bytes occupied by arguments. Parameters ---------- instrs : iterable of Instruction The dense instruction set. Yields ------ sparse : Instruction or None Yields the instructions, with objects marking the bytes that are used for arguments. """ @unique class Flag(IntEnum): """ An enum describing the bitmask of flags that can be set on a code object. """ # These enum values and comments are taken from CPython. CO_OPTIMIZED = 0x0001 CO_NEWLOCALS = 0x0002 CO_VARARGS = 0x0004 CO_VARKEYWORDS = 0x0008 CO_NESTED = 0x0010 CO_GENERATOR = 0x0020 # The CO_NOFREE flag is set if there are no free or cell variables. # This information is redundant, but it allows a single flag test # to determine whether there is any extra work to be done when the # call frame it setup. CO_NOFREE = 0x0040 # The CO_COROUTINE flag is set for coroutines creates with the # types.coroutine decorator. This converts old-style coroutines into # python3.5 style coroutines. CO_COROUTINE = 0x0080 CO_ITERABLE_COROUTINE = 0x0100 # Old values: CO_FUTURE_DIVISION = 0x2000 CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # Do absolute imports by default. CO_FUTURE_WITH_STATEMENT = 0x8000 CO_FUTURE_PRINT_FUNCTION = 0x10000 CO_FUTURE_UNICODE_LITERALS = 0x20000 CO_FUTURE_BARRY_AS_BDFL = 0x40000 CO_FUTURE_GENERATOR_STOP = 0x80000 @instance class max: """The largest bitmask that represents a valid flag. """ def __get__(self, instance, owner): return owner.pack(**dict(zip(owner.__members__, repeat(True)))) def __set__(self, instance, value): raise AttributeError("can't set 'max' attribute") @classmethod def pack(cls, *, CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS, CO_NESTED, CO_GENERATOR, CO_NOFREE, CO_COROUTINE, CO_ITERABLE_COROUTINE, CO_FUTURE_DIVISION, CO_FUTURE_ABSOLUTE_IMPORT, CO_FUTURE_WITH_STATEMENT, CO_FUTURE_PRINT_FUNCTION, CO_FUTURE_UNICODE_LITERALS, CO_FUTURE_BARRY_AS_BDFL, CO_FUTURE_GENERATOR_STOP): """Pack a flags into a bitmask. I hope you like kwonly args. Parameters ---------- CO_OPTIMIZED : bool CO_NEWLOCALS : bool CO_VARARGS : bool CO_VARKEYWORDS : bool CO_NESTED : bool CO_GENERATOR : bool CO_NOFREE : bool CO_COROUTINE : bool CO_ITERABLE_COROUTINE : bool CO_FUTURE_DIVISION : bool CO_FUTURE_ABSOLUTE_IMPORT : bool CO_FUTURE_WITH_STATEMENT : bool CO_FUTURE_PRINT_FUNCTION : bool CO_FUTURE_UNICODE_LITERALS : bool CO_FUTURE_BARRY_AS_BDFL : bool CO_FUTURE_GENERATOR_STOP : bool Returns ------- mask : int See Also -------- codetransformer.code.Flag.unpack """ ls = locals() return reduce( op.or_, (v for k, v in cls.__members__.items() if ls[k]), 0, ) @classmethod def unpack(cls, mask): """Unpack a bitmask into a map of flag to bool. Parameters ---------- mask : int A bitmask Returns ------- mapping : OrderedDict[str -> bool] The mapping of flag name to flag status. See Also -------- codetransformer.code.Flag.pack """ if mask > cls.max: raise ValueError('Invalid mask, too large: %d' % mask) return OrderedDict( (k, bool(mask & getattr(cls, k))) for k, v in cls.__members__.items() ) def _freevar_argname(arg, cellvars, freevars): """ Get the name of the variable manipulated by a 'uses_free' instruction. Parameters ---------- arg : int The raw argument to a uses_free instruction that we want to resolve to a name. cellvars : list[str] The co_cellvars of the function for which we want to resolve `arg`. freevars : list[str] The co_freevars of the function for which we want to resolve `arg`. Notes ----- From https://docs.python.org/3.5/library/dis.html#opcode-LOAD_CLOSURE: The name of the variable is co_cellvars[i] if i is less than the length of co_cellvars. Otherwise it is co_freevars[i - len(co_cellvars)] """ len_cellvars = len(cellvars) if arg < len_cellvars: return cellvars[arg] return freevars[arg - len_cellvars] def pycode(argcount, kwonlyargcount, nlocals, stacksize, flags, codestring, constants, names, varnames, filename, name, firstlineno, lnotab, freevars=(), cellvars=()): """types.CodeType constructor that accepts keyword arguments. See Also -------- types.CodeType """ return CodeType( argcount, kwonlyargcount, nlocals, stacksize, flags, codestring, constants, names, varnames, filename, name, firstlineno, lnotab, freevars, cellvars, ) class Code: """A higher abstraction over python's CodeType. See Include/code.h for more information. Parameters ---------- instrs : iterable of Instruction A sequence of codetransformer Instruction objects. argnames : iterable of str, optional The names of the arguments to the code object. name : str, optional The name of this code object. filename : str, optional The file that this code object came from. firstlineno : int, optional The first line number of the code in this code object. lnotab : dict[Instruction -> int], optional The mapping from instruction to the line that it starts. flags : dict[str -> bool], optional Any flags to set. This updates the default flag set. Attributes ---------- argcount argnames cellvars constructs_new_locals consts filename flags freevars instrs is_coroutine is_generator is_iterable_coroutine is_nested kwonlyargcount lnotab name names py_lnotab sparse_instrs stacksize varnames """ __slots__ = ( '_instrs', '_argnames', '_argcount', '_kwonlyargcount', '_cellvars', '_freevars', '_name', '_filename', '_firstlineno', '_lnotab', '_flags', '__weakref__', ) def __init__(self, instrs, argnames=(), *, cellvars=(), freevars=(), name='', filename='', firstlineno=1, lnotab=None, flags=None): instrs = tuple(instrs) # strictly evaluate any generators. # The starting varnames (the names of the arguments to the function) argcount = [0] kwonlyargcount = [0] argcounter = argcount # Which set of args are we currently counting. _argnames = [] append_argname = _argnames.append varg = kwarg = None for argname in argnames: if argname.startswith('**'): if kwarg is not None: raise ValueError('cannot specify **kwargs more than once') kwarg = argname[2:] continue elif argname.startswith('*'): if varg is not None: raise ValueError('cannot specify *args more than once') varg = argname[1:] argcounter = kwonlyargcount # all following args are kwonly. continue argcounter[0] += 1 append_argname(argname) if varg is not None: append_argname(varg) if kwarg is not None: append_argname(kwarg) cellvar_names = set(cellvars) freevar_names = set(freevars) for instr in filter(op.attrgetter('uses_free'), instrs): if instr.arg in cellvar_names: instr._vartype = 'cell' elif instr.arg in freevar_names: instr._vartype = 'free' else: raise ValueError( "Argument to %r is not in cellvars or freevars." % instr ) for instr in filter(op.attrgetter('is_jmp'), instrs): instr.arg._target_of.add(instr) self._instrs = instrs self._argnames = tuple(_argnames) self._argcount = argcount[0] self._kwonlyargcount = kwonlyargcount[0] self._cellvars = cellvars self._freevars = freevars self._name = name self._filename = filename self._firstlineno = firstlineno self._lnotab = lnotab or {} self._flags = Flag.pack(**dict( dict( CO_OPTIMIZED=True, CO_NEWLOCALS=True, CO_VARARGS=varg is not None, CO_VARKEYWORDS=kwarg is not None, CO_NESTED=False, CO_GENERATOR=any( isinstance(instr, (YIELD_VALUE, YIELD_FROM)) for instr in instrs ), CO_NOFREE=not any(map(op.attrgetter('uses_free'), instrs)), CO_COROUTINE=False, CO_ITERABLE_COROUTINE=False, CO_FUTURE_DIVISION=False, CO_FUTURE_ABSOLUTE_IMPORT=False, CO_FUTURE_WITH_STATEMENT=False, CO_FUTURE_PRINT_FUNCTION=False, CO_FUTURE_UNICODE_LITERALS=False, CO_FUTURE_BARRY_AS_BDFL=False, CO_FUTURE_GENERATOR_STOP=False, ), **flags or {} )) @classmethod def from_pyfunc(cls, f): """Create a Code object from a python function object. Parameters ---------- f : function The function from which to construct a code object. Returns ------- code : Code A Code object representing f.__code__. """ return cls.from_pycode(f.__code__) @classmethod def from_pycode(cls, co): """Create a Code object from a python code object. Parameters ---------- co : CodeType The python code object. Returns ------- code : Code The codetransformer Code object. """ # Make it sparse to instrs[n] is the instruction at bytecode[n] sparse_instrs = tuple( _sparse_args( Instruction.from_opcode( b.opcode, Instruction._no_arg if b.arg is None else _RawArg(b.arg), ) for b in Bytecode(co) ), ) for idx, instr in enumerate(sparse_instrs): if instr is None: # The sparse value continue if instr.absjmp: instr.arg = sparse_instrs[instr.arg] elif instr.reljmp: instr.arg = sparse_instrs[instr.arg + idx + argsize + 1] elif isinstance(instr, LOAD_CONST): instr.arg = co.co_consts[instr.arg] elif instr.uses_name: instr.arg = co.co_names[instr.arg] elif instr.uses_varname: instr.arg = co.co_varnames[instr.arg] elif instr.uses_free: instr.arg = _freevar_argname( instr.arg, co.co_freevars, co.co_cellvars, ) elif instr.have_arg and isinstance(instr.arg, _RawArg): instr.arg = int(instr.arg) flags = Flag.unpack(co.co_flags) has_vargs = flags['CO_VARARGS'] has_kwargs = flags['CO_VARKEYWORDS'] # Here we convert the varnames format into our argnames format. paramnames = co.co_varnames[ :(co.co_argcount + co.co_kwonlyargcount + has_vargs + has_kwargs) ] # We start with the positional arguments. new_paramnames = list(paramnames[:co.co_argcount]) # Add *args next. if has_vargs: new_paramnames.append('*' + paramnames[-1 - has_kwargs]) # Add positional only arguments next. new_paramnames.extend(paramnames[ co.co_argcount:co.co_argcount + co.co_kwonlyargcount ]) # Add **kwargs last. if has_kwargs: new_paramnames.append('**' + paramnames[-1]) return cls( filter(bool, sparse_instrs), argnames=new_paramnames, cellvars=co.co_cellvars, freevars=co.co_freevars, name=co.co_name, filename=co.co_filename, firstlineno=co.co_firstlineno, lnotab={ lno: sparse_instrs[off] for off, lno in findlinestarts(co) }, flags=flags, ) def to_pycode(self): """Create a python code object from the more abstract codetransfomer.Code object. Returns ------- co : CodeType The python code object. """ consts = self.consts names = self.names varnames = self.varnames freevars = self.freevars cellvars = self.cellvars bc = bytearray() for instr in self.instrs: bc.append(instr.opcode) # Write the opcode byte. if isinstance(instr, LOAD_CONST): # Resolve the constant index. bc.extend(consts.index(instr.arg).to_bytes(argsize, 'little')) elif instr.uses_name: # Resolve the name index. bc.extend(names.index(instr.arg).to_bytes(argsize, 'little')) elif instr.uses_varname: # Resolve the local variable index. bc.extend( varnames.index(instr.arg).to_bytes(argsize, 'little'), ) elif instr.uses_free: # uses_free is really "uses freevars **or** cellvars". try: # look for the name in cellvars bc.extend( cellvars.index(instr.arg).to_bytes(argsize, 'little'), ) except ValueError: # fall back to freevars, incrementing the length of # cellvars. bc.extend( (freevars.index(instr.arg) + len(cellvars)).to_bytes( argsize, 'little', ) ) elif instr.absjmp: # Resolve the absolute jump target. bc.extend( self.bytecode_offset(instr.arg).to_bytes( argsize, 'little', ), ) elif instr.reljmp: # Resolve the relative jump target. # We do this by subtracting the curren't instructions's # sparse index from the sparse index of the argument. # We then subtract argsize - 1 to account for the bytes the # current instruction takes up. bytecode_offset = self.bytecode_offset bc.extend(( bytecode_offset(instr.arg) - bytecode_offset(instr) - argsize - 1 ).to_bytes(argsize, 'little',)) elif instr.have_arg: # Write any other arg here. bc.extend(instr.arg.to_bytes(argsize, 'little')) elif WORDCODE: # with wordcode, all instructions are padded to 2 bytes bc.append(0) return CodeType( self.argcount, self.kwonlyargcount, len(varnames), self.stacksize, self.py_flags, bytes(bc), consts, names, varnames, self.filename, self.name, self.firstlineno, self.py_lnotab, freevars, cellvars, ) @property def instrs(self): """The instructions in this code object. """ return self._instrs @property def sparse_instrs(self): """The instructions where the index of an instruction is the bytecode offset of that instruction. None indicates that no instruction is at that offset. """ return tuple(_sparse_args(self.instrs)) @property def argcount(self): """The number of arguments this code object accepts. This does not include varargs (\*args). """ return self._argcount @property def kwonlyargcount(self): """The number of keyword only arguments this code object accepts. This does not include varkwargs (\*\*kwargs). """ return self._kwonlyargcount @property def consts(self): """The constants referenced in this code object. """ # We cannot use a set comprehension because consts do not need # to be hashable. consts = [] append_const = consts.append for instr in self.instrs: if isinstance(instr, LOAD_CONST) and instr.arg not in consts: append_const(instr.arg) return tuple(consts) @property def names(self): """The names referenced in this code object. Names come from instructions like LOAD_GLOBAL or STORE_ATTR where the name of the global or attribute is needed at runtime. """ # We must sort to preserve the order between calls. # The set comprehension is to drop the duplicates. return tuple(sorted({ instr.arg for instr in self.instrs if instr.uses_name })) @property def argnames(self): """The names of the arguments to this code object. The format is: [args] [vararg] [kwonlyargs] [varkwarg] where each group is optional. """ return self._argnames @property def varnames(self): """The names of all of the local variables in this code object. """ # We must sort to preserve the order between calls. # The set comprehension is to drop the duplicates. return self._argnames + tuple(sorted({ instr.arg for instr in self.instrs if instr.uses_varname and instr.arg not in self._argnames })) @property def cellvars(self): """The names of the variables closed over by inner code objects. """ return self._cellvars @property def freevars(self): """The names of the variables this code object has closed over. """ return self._freevars @property def flags(self): """The flags of this code object represented as a mapping from flag name to boolean status. Notes ----- This is a copy of the underlying flags. Mutations will not affect the code object. """ return Flag.unpack(self._flags) @property def py_flags(self): """The flags of this code object represented as a bitmask. """ return self._flags @property def is_nested(self): """Is this a nested code object? """ return bool(self._flags & Flag.CO_NESTED) @property def is_generator(self): """Is this a generator? """ return bool(self._flags & Flag.CO_GENERATOR) @property def is_coroutine(self): """Is this a coroutine defined with async def? This is 3.5 and greater. """ return bool(self._flags & Flag.CO_COROUTINE) @property def is_iterable_coroutine(self): """Is this an async generator defined with types.coroutine? This is 3.5 and greater. """ return bool(self._flags & Flag.CO_ITERABLE_COROUTINE) @property def constructs_new_locals(self): """Does this code object construct new locals? This is True for things like functions where executing the code needs a new locals dict each time; however, something like a module does not normally need new locals. """ return bool(self._flags & Flag.CO_NEWLOCALS) @property def filename(self): """The filename of this code object. """ return self._filename @property def name(self): """The name of this code object. """ return self._name @property def firstlineno(self): """The first source line from self.filename that this code object represents. """ return self._firstlineno @property def lnotab(self): """The mapping of line number to the first instruction on that line. """ return self._lnotab @lazyval def lno_of_instr(self): instrs = self.instrs lnos = [None] * len(instrs) reverse_lnotab = reverse_dict(self.lnotab) for n, instr in enumerate(instrs): lnos[n] = reverse_lnotab.get(instr) return dict(zip(instrs, ffill(lnos))) @property def py_lnotab(self): """The encoded lnotab that python uses to compute when lines start. Note ---- See Objects/lnotab_notes.txt in the cpython source for more details. """ reverse_lnotab = reverse_dict(self.lnotab) py_lnotab = [] prev_instr = 0 prev_lno = self.firstlineno for addr, instr in enumerate(_sparse_args(self.instrs)): lno = reverse_lnotab.get(instr) if lno is None: continue delta = lno - prev_lno py_lnotab.append(addr - prev_instr) py_lnotab.append(min(delta, max_lnotab_increment)) delta -= max_lnotab_increment while delta > 0: py_lnotab.append(0) py_lnotab.append(min(delta, max_lnotab_increment)) delta -= max_lnotab_increment prev_lno = lno prev_instr = addr return bytes(py_lnotab) @property def stacksize(self): """The maximum amount of stack space used by this code object. """ return max(scanl( op.add, 0, map(op.attrgetter('stack_effect'), self.instrs), )) def index(self, instr): """Returns the index of instr. Parameters ---------- instr : Instruction The instruction the check the index of. Returns ------- idx : int The index of instr in this code object. """ return self.instrs.index(instr) def bytecode_offset(self, instr): """Returns the offset of instr in the bytecode representation. Parameters ---------- instr : Instruction The instruction the check the index of. Returns ------- idx : int The index of instr in this code object in the sparse instructions. """ return self.sparse_instrs.index(instr) def __getitem__(self, key): return self.instrs[key] def __iter__(self): return iter(self.instrs) def __len__(self): return len(self.instrs) def __contains__(self, instr): return instr in self.instrs def dis(self, file=None): """ Print self via the stdlib ``dis`` module. Parameters ---------- file : file-like, optional A file-like object into which we should print. Defaults to sys.stdout. """ dis(self.to_pycode(), file=file) ================================================ FILE: codetransformer/core.py ================================================ from collections import OrderedDict from contextlib import contextmanager from ctypes import py_object, pythonapi from itertools import chain from types import CodeType, FunctionType from weakref import WeakKeyDictionary try: import threading except ImportError: import dummy_threading as threading from .code import Code from .instructions import LOAD_CONST, STORE_FAST, LOAD_FAST from .patterns import ( boundpattern, patterndispatcher, DEFAULT_STARTCODE, ) from .utils.instance import instance _cell_new = pythonapi.PyCell_New _cell_new.argtypes = (py_object,) _cell_new.restype = py_object def _a_if_not_none(a, b): return a if a is not None else b def _new_lnotab(instrs, lnotab): """The updated lnotab after the instructions have been transformed. Parameters ---------- instrs : iterable[Instruction] The new instructions. lnotab : dict[Instruction -> int] The lnotab for the old code object. Returns ------- new_lnotab : dict[Instruction -> int] The post transform lnotab. """ return { lno: _a_if_not_none(instr._stolen_by, instr) for lno, instr in lnotab.items() } class NoContext(Exception): """Exception raised to indicate that the ``code` or ``startcode`` attribute was accessed outside of a code context. """ def __init__(self): return super().__init__('no active transformation context') class Context: """Empty object for holding the transformation context. """ def __init__(self, code): self.code = code self.startcode = DEFAULT_STARTCODE def __repr__(self): # pragma: no cover return '<%s: %r>' % (type(self).__name__, self.__dict__) class CodeTransformerMeta(type): """Meta class for CodeTransformer to collect all of the patterns and ensure the class dict is ordered. Patterns are created when a method is decorated with ``codetransformer.pattern.pattern`` """ def __new__(mcls, name, bases, dict_): dict_['patterndispatcher'] = patterndispatcher(*chain( (v for v in dict_.values() if isinstance(v, boundpattern)), *( d and d.patterns for d in ( getattr(b, 'patterndispatcher', ()) for b in bases ) ) )) return super().__new__(mcls, name, bases, dict_) def __prepare__(self, bases): return OrderedDict() class CodeTransformer(metaclass=CodeTransformerMeta): """A code object transformer, similar to the NodeTransformer from the ast module. Attributes ---------- code """ __slots__ = '__weakref__', def transform_consts(self, consts): """transformer for the co_consts field. Override this method to transform the `co_consts` of the code object. Parameters ---------- consts : tuple The co_consts Returns ------- new_consts : tuple The new constants. """ return tuple( self.transform(Code.from_pycode(const)).to_pycode() if isinstance(const, CodeType) else const for const in consts ) def _id(self, obj): """Identity function. Parameters ---------- obj : any The object to return Returns ------- obj : any The input unchanged """ return obj transform_name = _id transform_names = _id transform_varnames = _id transform_freevars = _id transform_cellvars = _id transform_defaults = _id del _id def transform(self, code, *, name=None, filename=None): """Transform a codetransformer.Code object applying the transforms. Parameters ---------- code : Code The code object to transform. name : str, optional The new name for this code object. filename : str, optional The new filename for this code object. Returns ------- new_code : Code The transformed code object. """ # reverse lookups from for constants and names. reversed_consts = {} reversed_names = {} reversed_varnames = {} for instr in code: if isinstance(instr, LOAD_CONST): reversed_consts[instr] = instr.arg if instr.uses_name: reversed_names[instr] = instr.arg if isinstance(instr, (STORE_FAST, LOAD_FAST)): reversed_varnames[instr] = instr.arg instrs, consts = tuple(zip(*reversed_consts.items())) or ((), ()) for instr, const in zip(instrs, self.transform_consts(consts)): instr.arg = const instrs, names = tuple(zip(*reversed_names.items())) or ((), ()) for instr, name_ in zip(instrs, self.transform_names(names)): instr.arg = name_ instrs, varnames = tuple(zip(*reversed_varnames.items())) or ((), ()) for instr, varname in zip(instrs, self.transform_varnames(varnames)): instr.arg = varname with self._new_context(code): post_transform = self.patterndispatcher(code) return Code( post_transform, code.argnames, cellvars=self.transform_cellvars(code.cellvars), freevars=self.transform_freevars(code.freevars), name=name if name is not None else code.name, filename=filename if filename is not None else code.filename, firstlineno=code.firstlineno, lnotab=_new_lnotab(post_transform, code.lnotab), flags=code.flags, ) def __call__(self, f, *, globals_=None, name=None, defaults=None, closure=None): # Callable so that we can use CodeTransformers as decorators. if closure is not None: closure = tuple(map(_cell_new, closure)) else: closure = f.__closure__ return FunctionType( self.transform(Code.from_pycode(f.__code__)).to_pycode(), _a_if_not_none(globals_, f.__globals__), _a_if_not_none(name, f.__name__), _a_if_not_none(defaults, f.__defaults__), closure, ) @instance class _context_stack(threading.local): """Thread safe transformation context stack. Each thread will get it's own ``WeakKeyDictionary`` that maps instances to a stack of ``Context`` objects. When this descriptor is looked up we first try to get the weakkeydict off of the thread local storage. If it doesn't exist we make a new map. Then we lookup our instance in this map. If it doesn't exist yet create a new stack (as an empty list). This allows a single instance of ``CodeTransformer`` to be used recursively to transform code objects in a thread safe way while still being able to use a stateful context. """ def __get__(self, instance, owner): try: stacks = self._context_stacks except AttributeError: stacks = self._context_stacks = WeakKeyDictionary() if instance is None: # when looked up off the class return the current threads # context stacks map return stacks return stacks.setdefault(instance, []) @contextmanager def _new_context(self, code): self._context_stack.append(Context(code)) try: yield finally: self._context_stack.pop() @property def context(self): """Lookup the current transformation context. Raises ------ NoContext Raised when there is no active transformation context. """ try: return self._context_stack[-1] except IndexError: raise NoContext() @property def code(self): """The code object we are currently manipulating. """ return self.context.code @property def startcode(self): """The startcode we are currently in. """ return self.context.startcode def begin(self, startcode): """Begin a new startcode. Parameters ---------- startcode : any The startcode to begin. """ self.context.startcode = startcode ================================================ FILE: codetransformer/decompiler/_343.py ================================================ import ast from collections import deque from functools import singledispatch from itertools import takewhile import types from toolz import complement, compose, curry, sliding_window import toolz.curried.operator as op from . import paramnames from ..code import Code from .. import instructions as instrs from ..utils.functional import not_a, is_a from ..utils.immutable import immutable from codetransformer import a as showa, d as showd # noqa __all__ = [ 'DecompilationContext', 'DecompilationError', 'decompile', 'pycode_to_body', ] class DecompilationError(Exception): pass class DecompilationContext(immutable, defaults={ "in_function_block": False, "in_lambda": False, "make_function_context": None, "top_of_loop": None}): """ Value representing the context of the current decompilation run. """ __slots__ = ( 'in_function_block', 'in_lambda', 'make_function_context', 'top_of_loop', ) class MakeFunctionContext(immutable): __slots__ = ('closure',) def decompile(f): """ Decompile a function. Parameters ---------- f : function The function to decompile. Returns ------- ast : ast.FunctionDef A FunctionDef node that compiles to f. """ co = f.__code__ args, kwonly, varargs, varkwargs = paramnames(co) annotations = f.__annotations__ or {} defaults = list(f.__defaults__ or ()) kw_defaults = f.__kwdefaults__ or {} if f.__name__ == '': node = ast.Lambda body = pycode_to_body(co, DecompilationContext(in_lambda=True))[0] extra_kwargs = {} else: node = ast.FunctionDef body = pycode_to_body(co, DecompilationContext(in_function_block=True)) extra_kwargs = { 'decorator_list': [], 'returns': annotations.get('return') } return node( name=f.__name__, args=make_function_arguments( args=args, kwonly=kwonly, varargs=varargs, varkwargs=varkwargs, defaults=defaults, kw_defaults=kw_defaults, annotations=annotations, ), body=body, **extra_kwargs ) def pycode_to_body(co, context): """ Convert a Python code object to a list of AST body elements. """ code = Code.from_pycode(co) # On each instruction, temporarily store all the jumps to the **next** # instruction. This is used in _make_expr to determine when an expression # is part of a short-circuiting expression. for a, b in sliding_window(2, code.instrs): a._next_target_of = b._target_of b._next_target_of = set() try: body = instrs_to_body(deque(code.instrs), context) if context.in_function_block: return make_global_and_nonlocal_decls(code.instrs) + body return body finally: # Clean up jump target data. for i in code.instrs: del i._next_target_of def instrs_to_body(instrs, context): """ Convert a list of Instruction objects to a list of AST body nodes. """ stack = [] body = [] process_instrs(instrs, stack, body, context) if stack: raise DecompilationError( "Non-empty stack at the end of instrs_to_body(): %s." % stack ) return body def process_instrs(queue, stack, body, context): """ Process instructions from the instruction queue. """ next_instr = queue.popleft while queue: newcontext = _process_instr(next_instr(), queue, stack, body, context) if newcontext is not None: context = newcontext @singledispatch def _process_instr(instr, queue, stack, body, context): raise AssertionError( "process_instr() passed a non-instruction argument %s" % type(instr) ) @_process_instr.register(instrs.Instruction) def _instr(instr, queue, stack, body, context): raise DecompilationError( "Don't know how to decompile instructions of type %s" % type(instr) ) @_process_instr.register(instrs.POP_JUMP_IF_TRUE) @_process_instr.register(instrs.POP_JUMP_IF_FALSE) def _process_jump(instr, queue, stack, body, context): stack_effect_until_target = sum( map( op.attrgetter('stack_effect'), takewhile(op.is_not(instr.arg), queue) ) ) if stack_effect_until_target == 0: body.append(make_if_statement(instr, queue, stack, context)) return else: raise DecompilationError( "Don't know how to decompile `and`/`or`/`ternary` exprs." ) def make_if_statement(instr, queue, stack, context): """ Make an ast.If block from a POP_JUMP_IF_TRUE or POP_JUMP_IF_FALSE. """ test_expr = make_expr(stack) if isinstance(instr, instrs.POP_JUMP_IF_TRUE): test_expr = ast.UnaryOp(op=ast.Not(), operand=test_expr) first_block = popwhile(op.is_not(instr.arg), queue, side='left') if isinstance(first_block[-1], instrs.RETURN_VALUE): body = instrs_to_body(first_block, context) return ast.If(test=test_expr, body=body, orelse=[]) jump_to_end = expect( first_block.pop(), instrs.JUMP_FORWARD, "at end of if-block" ) body = instrs_to_body(first_block, context) # First instruction after the whole if-block. end = jump_to_end.arg if instr.arg is jump_to_end.arg: orelse = [] else: orelse = instrs_to_body( popwhile(op.is_not(end), queue, side='left'), context, ) return ast.If(test=test_expr, body=body, orelse=orelse) @_process_instr.register(instrs.EXTENDED_ARG) def _process_instr_extended_arg(instr, queue, stack, body, context): """We account for EXTENDED_ARG when constructing Code objects.""" pass @_process_instr.register(instrs.UNPACK_SEQUENCE) def _process_instr_unpack_sequence(instr, queue, stack, body, context): body.append(make_assignment(instr, queue, stack)) @_process_instr.register(instrs.IMPORT_NAME) def _process_instr_import_name(instr, queue, stack, body, context): """ Process an IMPORT_NAME instruction. Side Effects ------------ Pops two instuctions from `stack` Consumes instructions from `queue` to the end of the import statement. Appends an ast.Import or ast.ImportFrom node to `body`. """ # If this is "import module", fromlist is None. # If this this is "from module import a, b fromlist will be ('a', 'b'). fromlist = stack.pop().arg # level argument to __import__. Should be 0, 1, or 2. level = stack.pop().arg module = instr.arg if fromlist is None: # Regular import. attr_loads = _pop_import_LOAD_ATTRs(module, queue) store = queue.popleft() # There are two cases where we should emit an alias: # import a as # import a.b.c as if attr_loads or module.split('.')[0] != store.arg: asname = store.arg else: asname = None body.append( ast.Import( names=[ ast.alias( name=module, asname=(asname), ), ], level=level, ), ) return elif fromlist == ('*',): # From module import *. expect(queue.popleft(), instrs.IMPORT_STAR, "after IMPORT_NAME") body.append( ast.ImportFrom( module=module, names=[ast.alias(name='*', asname=None)], level=level, ), ) return # Consume a pair of IMPORT_FROM, STORE_NAME instructions for each entry in # fromlist. names = list(map(make_importfrom_alias(queue, body, context), fromlist)) body.append(ast.ImportFrom(module=module, names=names, level=level)) # Remove the final POP_TOP of the imported module. expect(queue.popleft(), instrs.POP_TOP, "after 'from import'") def _pop_import_LOAD_ATTRs(module_name, queue): """ Pop LOAD_ATTR instructions for an import of the form:: import a.b.c as d which should generate bytecode like this:: 1 0 LOAD_CONST 0 (0) 3 LOAD_CONST 1 (None) 6 IMPORT_NAME 0 (a.b.c.d) 9 LOAD_ATTR 1 (b) 12 LOAD_ATTR 2 (c) 15 LOAD_ATTR 3 (d) 18 STORE_NAME 3 (d) """ popped = popwhile(is_a(instrs.LOAD_ATTR), queue, side='left') if popped: expected = module_name.split('.', maxsplit=1)[1] actual = '.'.join(map(op.attrgetter('arg'), popped)) if expected != actual: raise DecompilationError( "Decompiling import of module %s, but LOAD_ATTRS imply %s" % ( expected, actual, ) ) return popped @curry def make_importfrom_alias(queue, body, context, name): """ Make an ast.alias node for the names list of an ast.ImportFrom. Parameters ---------- queue : deque Instruction Queue body : list Current body. context : DecompilationContext name : str Expected name of the IMPORT_FROM node to be popped. Returns ------- alias : ast.alias Side Effects ------------ Consumes IMPORT_FROM and STORE_NAME instructions from queue. """ import_from, store = queue.popleft(), queue.popleft() expect(import_from, instrs.IMPORT_FROM, "after IMPORT_NAME") if not import_from.arg == name: raise DecompilationError( "IMPORT_FROM name mismatch. Expected %r, but got %s." % ( name, import_from, ) ) return ast.alias( name=name, asname=store.arg if store.arg != name else None, ) @_process_instr.register(instrs.COMPARE_OP) @_process_instr.register(instrs.UNARY_NOT) @_process_instr.register(instrs.BINARY_SUBSCR) @_process_instr.register(instrs.LOAD_ATTR) @_process_instr.register(instrs.LOAD_GLOBAL) @_process_instr.register(instrs.LOAD_CONST) @_process_instr.register(instrs.LOAD_FAST) @_process_instr.register(instrs.LOAD_NAME) @_process_instr.register(instrs.LOAD_DEREF) @_process_instr.register(instrs.LOAD_CLOSURE) @_process_instr.register(instrs.BUILD_TUPLE) @_process_instr.register(instrs.BUILD_SET) @_process_instr.register(instrs.BUILD_LIST) @_process_instr.register(instrs.BUILD_MAP) @_process_instr.register(instrs.STORE_MAP) @_process_instr.register(instrs.CALL_FUNCTION) @_process_instr.register(instrs.CALL_FUNCTION_VAR) @_process_instr.register(instrs.CALL_FUNCTION_KW) @_process_instr.register(instrs.CALL_FUNCTION_VAR_KW) @_process_instr.register(instrs.BUILD_SLICE) @_process_instr.register(instrs.JUMP_IF_TRUE_OR_POP) @_process_instr.register(instrs.JUMP_IF_FALSE_OR_POP) def _push(instr, queue, stack, body, context): """ Just push these instructions onto the stack for further processing downstream. """ stack.append(instr) @_process_instr.register(instrs.MAKE_FUNCTION) @_process_instr.register(instrs.MAKE_CLOSURE) def _make_function(instr, queue, stack, body, context): """ Set a make_function_context, then push onto the stack. """ assert stack, "Empty stack before MAKE_FUNCTION." prev = stack[-1] expect(prev, instrs.LOAD_CONST, "before MAKE_FUNCTION") stack.append(instr) if is_lambda_name(prev.arg): return return context.update( make_function_context=MakeFunctionContext( closure=isinstance(instr, instrs.MAKE_CLOSURE), ) ) @_process_instr.register(instrs.STORE_FAST) @_process_instr.register(instrs.STORE_NAME) @_process_instr.register(instrs.STORE_DEREF) @_process_instr.register(instrs.STORE_GLOBAL) def _store(instr, queue, stack, body, context): # This is set by MAKE_FUNCTION nodes to register that the next `STORE_NAME` # should create a FunctionDef node. if context.make_function_context is not None: body.append( make_function( pop_arguments(instr, stack), **context.make_function_context.to_dict() ), ) return context.update(make_function_context=None) body.append(make_assignment(instr, queue, stack)) @_process_instr.register(instrs.DUP_TOP) def _dup_top(instr, queue, stack, body, context): body.append(make_assignment(instr, queue, stack)) def make_assignment(instr, queue, stack): """ Make an ast.Assign node. """ value = make_expr(stack) # Make assignment targets. # If there are multiple assignments (e.g. 'a = b = c'), # each LHS expression except the last is preceded by a DUP_TOP instruction. # Thus, we make targets until we don't see a DUP_TOP, and then make one # more. targets = [] while isinstance(instr, instrs.DUP_TOP): targets.append(make_assign_target(queue.popleft(), queue, stack)) instr = queue.popleft() targets.append(make_assign_target(instr, queue, stack)) return ast.Assign(targets=targets, value=value) @singledispatch def make_assign_target(instr, queue, stack): """ Make an AST node for the LHS of an assignment beginning at `instr`. """ raise DecompilationError("Can't make assignment target for %s." % instr) @make_assign_target.register(instrs.STORE_FAST) @make_assign_target.register(instrs.STORE_NAME) @make_assign_target.register(instrs.STORE_DEREF) @make_assign_target.register(instrs.STORE_GLOBAL) def make_assign_target_store(instr, queue, stack): return ast.Name(id=instr.arg, ctx=ast.Store()) @make_assign_target.register(instrs.STORE_ATTR) def make_assign_target_setattr(instr, queue, stack): return ast.Attribute( value=make_expr(stack), attr=instr.arg, ctx=ast.Store(), ) @make_assign_target.register(instrs.STORE_SUBSCR) def make_assign_target_setitem(instr, queue, stack): slice_ = make_slice(stack) collection = make_expr(stack) return ast.Subscript( value=collection, slice=slice_, ctx=ast.Store(), ) @make_assign_target.register(instrs.UNPACK_SEQUENCE) def make_assign_target_unpack(instr, queue, stack): return ast.Tuple( elts=[ make_assign_target(queue.popleft(), queue, stack) for _ in range(instr.arg) ], ctx=ast.Store(), ) @make_assign_target.register(instrs.LOAD_NAME) @make_assign_target.register(instrs.LOAD_ATTR) @make_assign_target.register(instrs.BINARY_SUBSCR) def make_assign_target_load_name(instr, queue, stack): # We hit this case when a setattr or setitem is nested in a more complex # assignment. Just push the load onto the stack to be processed by the # upcoming STORE_ATTR or STORE_SUBSCR. stack.append(instr) return make_assign_target(queue.popleft(), queue, stack) @_process_instr.register(instrs.STORE_ATTR) @_process_instr.register(instrs.STORE_SUBSCR) def _store_subscr(instr, queue, stack, body, context): target = make_assign_target(instr, queue, stack) rhs = make_expr(stack) body.append(ast.Assign(targets=[target], value=rhs)) @_process_instr.register(instrs.POP_TOP) def _pop(instr, queue, stack, body, context): body.append(ast.Expr(value=make_expr(stack))) @_process_instr.register(instrs.RETURN_VALUE) def _return(instr, queue, stack, body, context): if context.in_function_block: body.append(ast.Return(value=make_expr(stack))) elif context.in_lambda: if body: raise DecompilationError("Non-empty body in lambda: %s" % body) # Just append the raw expr. We'll extract the raw value in # `make_lambda`. body.append(make_expr(stack)) else: _check_stack_for_module_return(stack) # Pop dummy LOAD_CONST(None) at the end of a module. stack.pop() return @_process_instr.register(instrs.BREAK_LOOP) def _jump_break_loop(instr, queue, stack, body, context): if context.top_of_loop is None: raise DecompilationError("BREAK_LOOP outside of loop.") body.append(ast.Break()) @_process_instr.register(instrs.JUMP_ABSOLUTE) def _jump_absolute(instr, queue, stack, body, context): if instr.arg is context.top_of_loop: body.append(ast.Continue()) return raise DecompilationError("Don't know how to decompile %s." % instr) @_process_instr.register(instrs.SETUP_WITH) def _process_instr_setup_with(instr, queue, stack, body, context): items = [make_withitem(queue, stack)] block_body = instrs_to_body( pop_with_body_instrs(instr, queue), context, ) # Handle compound with statement (e.g. "with a, b"). if len(block_body) == 1 and isinstance(block_body[0], ast.With): nested_with = block_body[0] # Merge the inner block's items with our top-level items. items += nested_with.items # Use the inner block's body as the real body. block_body = nested_with.body return body.append( ast.With(items=items, body=block_body) ) def pop_with_body_instrs(setup_with_instr, queue): """ Pop instructions from `queue` that form the body of a with block. """ body_instrs = popwhile(op.is_not(setup_with_instr.arg), queue, side='left') # Last two instructions should always be POP_BLOCK, LOAD_CONST(None). # These don't correspond to anything in the AST, so remove them here. load_none = body_instrs.pop() expect(load_none, instrs.LOAD_CONST, "at end of with-block") pop_block = body_instrs.pop() expect(pop_block, instrs.POP_BLOCK, "at end of with-block") if load_none.arg is not None: raise DecompilationError( "Expected LOAD_CONST(None), but got " "%r instead" % (load_none) ) # Target of the setup_with should be a WITH_CLEANUP instruction followed by # an END_FINALLY. Neither of these correspond to anything in the AST. with_cleanup = queue.popleft() expect(with_cleanup, instrs.WITH_CLEANUP, "at end of with-block") end_finally = queue.popleft() expect(end_finally, instrs.END_FINALLY, "at end of with-block") return body_instrs def make_withitem(queue, stack): """ Make an ast.withitem node. """ context_expr = make_expr(stack) # This is a POP_TOP for just "with :". # This is a STORE_NAME(name) for "with as :". as_instr = queue.popleft() if isinstance(as_instr, (instrs.STORE_FAST, instrs.STORE_NAME, instrs.STORE_DEREF, instrs.STORE_GLOBAL)): return ast.withitem( context_expr=context_expr, optional_vars=make_assign_target(as_instr, queue, stack), ) elif isinstance(as_instr, instrs.POP_TOP): return ast.withitem(context_expr=context_expr, optional_vars=None) else: raise DecompilationError( "Don't know how to make withitem from %s" % as_instr, ) @_process_instr.register(instrs.SETUP_LOOP) def _loop(instr, queue, stack, body, context): loop_type, loop_body, else_body = pop_loop_instrs(instr, queue) assert loop_type in ('for', 'while'), "Unknown loop type %r" % loop_type if loop_type == 'for': body.append(make_for_loop(loop_body, else_body, context)) elif loop_type == 'while': body.append(make_while_loop(loop_body, else_body, context)) def make_for_loop(loop_body_instrs, else_body_instrs, context): """ Make an ast.For node. """ # Instructions from start until GET_ITER are the builders for the iterator # expression. iterator_expr = make_expr( popwhile(not_a(instrs.GET_ITER), loop_body_instrs, side='left') ) # Next is the GET_ITER instruction, which we don't need. loop_body_instrs.popleft() # Next is FOR_ITER, which is the jump target for Continue nodes. top_of_loop = loop_body_instrs.popleft() # This can be a STORE_* or an UNPACK_SEQUENCE followed by some number of # stores. target = make_assign_target( loop_body_instrs.popleft(), loop_body_instrs, stack=[], ) body, orelse_body = make_loop_body_and_orelse( top_of_loop, loop_body_instrs, else_body_instrs, context ) return ast.For( target=target, iter=iterator_expr, body=body, orelse=orelse_body, ) def make_loop_body_and_orelse(top_of_loop, body_instrs, else_instrs, context): """ Make body and orelse lists for a for/while loop whose first instruction is `top_of_loop`. Parameters ---------- top_of_loop : Instruction The first body of the loop. For a for-loop, this should always be a FOR_ITER. For a while loop, it's the first instruction of the stack builders for the loop test expression body_instrs : deque Queue of Instructions that form the body of the loop. The last two elements of body_instrs should be a JUMP_ABSOLUTE to `top_of_loop` and a POP_BLOCK. else_instrs : deque Queue of Instructions that form the else block of the loop. Should be an empty deque if there is no else block. context : DecompilationContext Returns ------- body : list[ast.AST] List of ast nodes forming the loop body. orelse_body : list[ast.AST] List of ast nodes forming the else-block body. """ # Remove the JUMP_ABSOLUTE and POP_BLOCK instructions at the bottom of the # loop. body_instrs.pop() body_instrs.pop() body = instrs_to_body(body_instrs, context.update(top_of_loop=top_of_loop)) if else_instrs: else_body = instrs_to_body(else_instrs, context) else: else_body = [] return body, else_body def make_while_loop(test_and_body_instrs, else_body_instrs, context): """ Make an ast.While node. Parameters ---------- test_and_body_instrs : deque Queue of instructions forming the loop test expression and body. else_body_instrs : deque Queue of instructions forming the else block of the loop. context : DecompilationContext """ top_of_loop = test_and_body_instrs[0] # The popped elements are the stack_builders for the loop test expression. # The top of the loop_body_instrs is either a POP_JUMP_IF_TRUE or a # POP_JUMP_IF_FALSE. test, body_instrs = make_while_loop_test_expr(test_and_body_instrs) body, orelse_body = make_loop_body_and_orelse( top_of_loop, body_instrs, else_body_instrs, context, ) # while-else blocks are not yet supported or handled. return ast.While(test=test, body=body, orelse=orelse_body) def make_while_loop_test_expr(loop_body_instrs): """ Make an expression in the context of a while-loop test. Code of the form:: while : generates a POP_JUMP_IF_FALSE for the loop test, while code of the form:: while not : generates a POP_JUMP_IF_TRUE for the loop test. Code of the form:: while True: generates no jumps at all. """ bottom_of_loop = loop_body_instrs[-1] is_jump_to_bottom = compose(op.is_(bottom_of_loop), op.attrgetter('arg')) # Consume instructions until we find a jump to the bottom of the loop. test_builders = deque( popwhile(complement(is_jump_to_bottom), loop_body_instrs, side='left') ) # If we consumed the entire loop body without finding a jump, assume this # is a while True loop. Return the rest of the instructions as the loop # body. if not loop_body_instrs: return ast.NameConstant(value=True), test_builders # Top of the body is either a POP_JUMP_IF_TRUE or POP_JUMP_IF_FALSE. jump = loop_body_instrs.popleft() expr = make_expr(test_builders) if isinstance(jump, instrs.POP_JUMP_IF_TRUE): return ast.UnaryOp(op=ast.Not(), operand=expr), loop_body_instrs else: return expr, loop_body_instrs def pop_loop_instrs(setup_loop_instr, queue): """ Determine whether setup_loop_instr is setting up a for-loop or a while-loop. Then pop the loop instructions from queue. The easiest way to tell the difference is to look at the target of the JUMP_ABSOLUTE instruction at the end of the loop. If it jumps to a FOR_ITER, then this is a for-loop. Otherwise it's a while-loop. The jump we want to inspect is the first JUMP_ABSOLUTE instruction prior to the jump target of `setup_loop_instr`. Parameters ---------- setup_loop_instr : instructions.SETUP_LOOP First instruction of the loop being parsed. queue : collections.deque Queue of unprocessed instructions. Returns ------- loop_type : str, {'for', 'while'} The kind of loop being constructed. loop_instrs : deque The instructions forming body of the loop. else_instrs : deque The instructions forming the else-block of the loop. Side Effects ------------ Pops all returned instructions from `queue`. """ # Grab everything from left side of the queue until the jump target of # SETUP_LOOP. body = popwhile(op.is_not(setup_loop_instr.arg), queue, side='left') # Anything after the last POP_BLOCK instruction is the else-block. else_body = popwhile(not_a(instrs.POP_BLOCK), body, side='right') jump_to_top, pop_block = body[-2], body[-1] if not isinstance(jump_to_top, instrs.JUMP_ABSOLUTE): raise DecompilationError( "Penultimate instruction of loop body is " "%s, not JUMP_ABSOLUTE." % jump_to_top, ) if not isinstance(pop_block, instrs.POP_BLOCK): raise DecompilationError( "Last instruction of loop body is " "%s, not pop_block." % pop_block, ) loop_expr = jump_to_top.arg if isinstance(loop_expr, instrs.FOR_ITER): return 'for', body, else_body return 'while', body, else_body def make_expr(stack_builders): """ Convert a sequence of instructions into AST expressions. """ return _make_expr(stack_builders.pop(), stack_builders) _BOOLOP_JUMP_TO_AST_OP = { instrs.JUMP_IF_TRUE_OR_POP: ast.Or, instrs.JUMP_IF_FALSE_OR_POP: ast.And, } _BOOLOP_JUMP_TYPES = tuple(_BOOLOP_JUMP_TO_AST_OP) def _make_expr(toplevel, stack_builders): """ Override the single-dispatched make_expr with wrapper logic for handling short-circuiting expressions. """ base_expr = _make_expr_internal(toplevel, stack_builders) if not toplevel._next_target_of: return base_expr subexprs = deque([base_expr]) ops = deque([]) while stack_builders and stack_builders[-1] in toplevel._next_target_of: jump = stack_builders.pop() if not isinstance(jump, _BOOLOP_JUMP_TYPES): raise DecompilationError( "Don't know how to decompile %s inside expression." % jump, ) subexprs.appendleft(make_expr(stack_builders)) ops.appendleft(_BOOLOP_JUMP_TO_AST_OP[type(jump)]()) if len(subexprs) <= 1: raise DecompilationError( "Expected at least one JUMP instruction before expression." ) return normalize_boolop(make_boolop(subexprs, ops)) def make_boolop(exprs, op_types): """ Parameters ---------- exprs : deque op_types : deque[{ast.And, ast.Or}] """ if len(op_types) > 1: return ast.BoolOp( op=op_types.popleft(), values=[exprs.popleft(), make_boolop(exprs, op_types)], ) assert len(exprs) == 2 return ast.BoolOp(op=op_types.popleft(), values=list(exprs)) def normalize_boolop(expr): """ Normalize a boolop by folding together nested And/Or exprs. """ optype = expr.op newvalues = [] for subexpr in expr.values: if not isinstance(subexpr, ast.BoolOp): newvalues.append(subexpr) elif type(subexpr.op) != type(optype): newvalues.append(normalize_boolop(subexpr)) else: # Normalize subexpression, then inline its values into the # top-level subexpr. newvalues.extend(normalize_boolop(subexpr).values) return ast.BoolOp(op=optype, values=newvalues) @singledispatch def _make_expr_internal(toplevel, stack_builders): raise DecompilationError( "Don't know how to build expression for %s" % toplevel ) @_make_expr_internal.register(instrs.MAKE_FUNCTION) @_make_expr_internal.register(instrs.MAKE_CLOSURE) def _make_lambda(toplevel, stack_builders): load_name = stack_builders.pop() load_code = stack_builders.pop() _check_make_function_instrs( load_code, load_name, toplevel, expect_lambda=True, ) co = load_code.arg args, kwonly, varargs, varkwargs = paramnames(co) defaults, kw_defaults, annotations = make_defaults_and_annotations( toplevel, stack_builders, ) if annotations: raise DecompilationError( "Unexpected annotations while building lambda: %s" % annotations ) if isinstance(toplevel, instrs.MAKE_CLOSURE): # There should be a tuple of closure cells still on the stack here. # These don't appear in the AST, but we need to consume them to ensure # correctness down the line. _closure_cells = make_closure_cells(stack_builders) # noqa body = pycode_to_body(co, DecompilationContext(in_lambda=True)) if len(body) != 1: raise DecompilationError( "Got multiple expresssions for lambda: %s" % body, ) body = body[0] return ast.Lambda( args=make_function_arguments( args, kwonly, varargs, varkwargs, defaults, kw_defaults, annotations, ), body=body, ) @_make_expr_internal.register(instrs.UNARY_NOT) def _make_expr_unary_not(toplevel, stack_builders): return ast.UnaryOp( op=ast.Not(), operand=make_expr(stack_builders), ) @_make_expr_internal.register(instrs.CALL_FUNCTION) def _make_expr_call_function(toplevel, stack_builders): keywords = make_call_keywords(stack_builders, toplevel.keyword) positionals = make_call_positionals(stack_builders, toplevel.positional) return ast.Call( func=make_expr(stack_builders), args=positionals, keywords=keywords, starargs=None, kwargs=None, ) @_make_expr_internal.register(instrs.CALL_FUNCTION_VAR) def _make_expr_call_function_var(toplevel, stack_builders): starargs = make_expr(stack_builders) keywords = make_call_keywords(stack_builders, toplevel.keyword) positionals = make_call_positionals(stack_builders, toplevel.positional) return ast.Call( func=make_expr(stack_builders), args=positionals, keywords=keywords, starargs=starargs, kwargs=None, ) @_make_expr_internal.register(instrs.CALL_FUNCTION_KW) def _make_expr_call_function_kw(toplevel, stack_builders): kwargs = make_expr(stack_builders) keywords = make_call_keywords(stack_builders, toplevel.keyword) positionals = make_call_positionals(stack_builders, toplevel.positional) return ast.Call( func=make_expr(stack_builders), args=positionals, keywords=keywords, starargs=None, kwargs=kwargs, ) @_make_expr_internal.register(instrs.CALL_FUNCTION_VAR_KW) def _make_expr_call_function_var_kw(toplevel, stack_builders): kwargs = make_expr(stack_builders) starargs = make_expr(stack_builders) keywords = make_call_keywords(stack_builders, toplevel.keyword) positionals = make_call_positionals(stack_builders, toplevel.positional) return ast.Call( func=make_expr(stack_builders), args=positionals, keywords=keywords, starargs=starargs, kwargs=kwargs, ) def make_call_keywords(stack_builders, count): """ Make the keywords entry for an ast.Call node. """ out = [] for _ in range(count): value = make_expr(stack_builders) load_kwname = stack_builders.pop() if not isinstance(load_kwname, instrs.LOAD_CONST): raise DecompilationError( "Expected a LOAD_CONST, but got %r" % load_kwname ) if not isinstance(load_kwname.arg, str): raise DecompilationError( "Expected LOAD_CONST of a str, but got %r." % load_kwname, ) out.append(ast.keyword(arg=load_kwname.arg, value=value)) out.reverse() return out def make_call_positionals(stack_builders, count): """ Make the args entry for an ast.Call node. """ out = [make_expr(stack_builders) for _ in range(count)] out.reverse() return out @_make_expr_internal.register(instrs.BUILD_TUPLE) def _make_expr_tuple(toplevel, stack_builders): return ast.Tuple( ctx=ast.Load(), elts=make_exprs(stack_builders, toplevel.arg), ) @_make_expr_internal.register(instrs.BUILD_SET) def _make_expr_set(toplevel, stack_builders): return ast.Set( ctx=ast.Load(), elts=make_exprs(stack_builders, toplevel.arg), ) @_make_expr_internal.register(instrs.BUILD_LIST) def _make_expr_list(toplevel, stack_builders): return ast.List( ctx=ast.Load(), elts=make_exprs(stack_builders, toplevel.arg), ) def make_exprs(stack_builders, count): """ Make elements of set/list/tuple literal. """ exprs = [make_expr(stack_builders) for _ in range(count)] # Elements are on the stack from right to left, but we want them from right # to left. exprs.reverse() return exprs @_make_expr_internal.register(instrs.BUILD_MAP) def _make_expr_empty_dict(toplevel, stack_builders): """ This should only be hit for empty dicts. Anything else should hit the STORE_MAP handler instead. """ if toplevel.arg: raise DecompilationError( "make_expr() called with nonzero BUILD_MAP arg %d" % toplevel.arg ) if stack_builders: raise DecompilationError( "Unexpected stack_builders for BUILD_MAP(0): %s" % stack_builders ) return ast.Dict(keys=[], values=[]) @_make_expr_internal.register(instrs.STORE_MAP) def _make_expr_dict(toplevel, stack_builders): # Push toplevel back onto the stack so that it gets correctly consumed by # `_make_dict_elems`. stack_builders.append(toplevel) build_map = find_build_map(stack_builders) dict_builders = popwhile( op.is_not(build_map), stack_builders, side='right' ) # Consume the BUILD_MAP instruction. _build_map = stack_builders.pop() assert _build_map is build_map keys, values = _make_dict_elems(build_map, dict_builders) return ast.Dict(keys=keys, values=values) def find_build_map(stack_builders): """ Find the BUILD_MAP instruction for which the last element of ``stack_builders`` is a store. """ assert isinstance(stack_builders[-1], instrs.STORE_MAP) to_consume = 0 for instr in reversed(stack_builders): if isinstance(instr, instrs.STORE_MAP): # NOTE: This branch should always be hit on the first iteration. to_consume += 1 elif isinstance(instr, instrs.BUILD_MAP): to_consume -= instr.arg if to_consume <= 0: return instr else: raise DecompilationError( "Couldn't find BUILD_MAP for last element of %s." % stack_builders ) def _make_dict_elems(build_instr, builders): """ Return a list of keys and a list of values for the dictionary literal generated by ``build_instr``. """ keys = [] values = [] for _ in range(build_instr.arg): popped = builders.pop() if not isinstance(popped, instrs.STORE_MAP): raise DecompilationError( "Expected a STORE_MAP but got %s" % popped ) keys.append(make_expr(builders)) values.append(make_expr(builders)) # Keys and values are emitted in reverse order of how they appear in the # AST. keys.reverse() values.reverse() return keys, values @_make_expr_internal.register(instrs.LOAD_DEREF) @_make_expr_internal.register(instrs.LOAD_NAME) @_make_expr_internal.register(instrs.LOAD_CLOSURE) @_make_expr_internal.register(instrs.LOAD_FAST) @_make_expr_internal.register(instrs.LOAD_GLOBAL) def _make_expr_name(toplevel, stack_builders): return ast.Name(id=toplevel.arg, ctx=ast.Load()) @_make_expr_internal.register(instrs.LOAD_ATTR) def _make_expr_attr(toplevel, stack_builders): return ast.Attribute( value=make_expr(stack_builders), attr=toplevel.arg, ctx=ast.Load(), ) @_make_expr_internal.register(instrs.BINARY_SUBSCR) def _make_expr_getitem(toplevel, stack_builders): slice_ = make_slice(stack_builders) value = make_expr(stack_builders) return ast.Subscript(slice=slice_, value=value, ctx=ast.Load()) def make_slice(stack_builders): """ Make an expression in the context of a slice. This mostly delegates to _make_expr, but wraps nodes in `ast.Index` or `ast.Slice` as appropriate. """ return _make_slice(stack_builders.pop(), stack_builders) @singledispatch def _make_slice(toplevel, stack_builders): return ast.Index(_make_expr(toplevel, stack_builders)) @_make_slice.register(instrs.BUILD_SLICE) def make_slice_build_slice(toplevel, stack_builders): return _make_expr(toplevel, stack_builders) @_make_slice.register(instrs.BUILD_TUPLE) def make_slice_tuple(toplevel, stack_builders): slice_ = _make_expr(toplevel, stack_builders) if isinstance(slice_, ast.Tuple): # a = b[c, d] generates Index(value=Tuple(...)) # a = b[c:, d] generates ExtSlice(dims=[Slice(...), Index(...)]) slice_ = normalize_tuple_slice(slice_) return slice_ def normalize_tuple_slice(node): """ Normalize an ast.Tuple node representing the internals of a slice. Returns the node wrapped in an ast.Index. Returns an ExtSlice node built from the tuple elements if there are any slices. """ if not any(isinstance(elt, ast.Slice) for elt in node.elts): return ast.Index(value=node) return ast.ExtSlice( [ # Wrap non-Slice nodes in Index nodes. elt if isinstance(elt, ast.Slice) else ast.Index(value=elt) for elt in node.elts ] ) @_make_expr_internal.register(instrs.BUILD_SLICE) def _make_expr_build_slice(toplevel, stack_builders): # Arg is always either 2 or 3. If it's 3, then the first expression is the # step value. if toplevel.arg == 3: step = make_expr(stack_builders) else: step = None def normalize_empty_slice(node): """ Convert LOAD_CONST(None) to just None. This normalizes slices of the form a[b:None] to just a[b:]. """ if isinstance(node, ast.NameConstant) and node.value is None: return None return node upper = normalize_empty_slice(make_expr(stack_builders)) lower = normalize_empty_slice(make_expr(stack_builders)) return ast.Slice(lower=lower, upper=upper, step=step) @_make_expr_internal.register(instrs.LOAD_CONST) def _make_expr_const(toplevel, stack_builders): return _make_const(toplevel.arg) @singledispatch def _make_const(const): raise DecompilationError( "Don't know how to make constant node for %r." % (const,) ) @_make_const.register(float) @_make_const.register(complex) @_make_const.register(int) def _make_const_number(const): return ast.Num(n=const) @_make_const.register(str) def _make_const_str(const): return ast.Str(s=const) @_make_const.register(bytes) def _make_const_bytes(const): return ast.Bytes(s=const) @_make_const.register(tuple) def _make_const_tuple(const): return ast.Tuple(elts=list(map(_make_const, const)), ctx=ast.Load()) @_make_const.register(type(None)) def _make_const_none(none): return ast.NameConstant(value=None) binops = frozenset([ (instrs.BINARY_ADD, ast.Add), (instrs.BINARY_SUBTRACT, ast.Sub), (instrs.BINARY_MULTIPLY, ast.Mult), (instrs.BINARY_POWER, ast.Pow), (instrs.BINARY_TRUE_DIVIDE, ast.Div), (instrs.BINARY_FLOOR_DIVIDE, ast.FloorDiv), (instrs.BINARY_MODULO, ast.Mod), (instrs.BINARY_LSHIFT, ast.LShift), (instrs.BINARY_RSHIFT, ast.RShift), (instrs.BINARY_AND, ast.BitAnd), (instrs.BINARY_XOR, ast.BitXor), (instrs.BINARY_OR, ast.BitOr), ]) def _binop_handler(nodetype): """ Factory function for binary operator handlers. """ def _handler(toplevel, stack_builders): right = make_expr(stack_builders) left = make_expr(stack_builders) return ast.BinOp(left=left, op=nodetype(), right=right) return _handler for instrtype, nodetype in binops: _process_instr.register(instrtype)(_push) _make_expr_internal.register(instrtype)(_binop_handler(nodetype)) def make_function(function_builders, *, closure): """ Construct a FunctionDef AST node from a sequence of the form: LOAD_CLOSURE, N times (when handling MAKE_CLOSURE) BUILD_TUPLE(N) (when handling MAKE_CLOSURE) (optional) , (optional) (optional) LOAD_CONST() (optional) LOAD_CONST(code), LOAD_CONST(name), MAKE_FUNCTION | MAKE_CLOSURE (optional) """ decorator_calls = deque() while isinstance(function_builders[-1], instrs.CALL_FUNCTION): decorator_calls.appendleft(function_builders.pop()) *builders, load_code_instr, load_name_instr, make_function_instr = ( function_builders ) _check_make_function_instrs( load_code_instr, load_name_instr, make_function_instr, ) co = load_code_instr.arg name = load_name_instr.arg args, kwonly, varargs, varkwargs = paramnames(co) # Convert default and annotation builders to AST nodes. defaults, kw_defaults, annotations = make_defaults_and_annotations( make_function_instr, builders, ) # Convert decorator function builders. The stack is in reverse order. decorators = [make_expr(builders) for _ in decorator_calls] decorators.reverse() if closure: # There should be a tuple of closure cells still on the stack here. # These don't appear in the AST, but we need to consume them to ensure # correctness down the line. closure_cells = make_closure_cells(builders) # noqa # We should have consumed all our builders by this point. if builders: raise DecompilationError( "Unexpected leftover builders for %s: %s." % ( make_function_instr, builders ) ) return ast.FunctionDef( body_code=co, name=name.split('.')[-1], args=make_function_arguments( args, kwonly, varargs, varkwargs, defaults, kw_defaults, annotations, ), body=pycode_to_body(co, DecompilationContext(in_function_block=True)), decorator_list=decorators, returns=annotations.get('return'), ) def make_function_arguments(args, kwonly, varargs, varkwargs, defaults, kw_defaults, annotations): """ Make an ast.arguments from the args parsed out of a code object. """ return ast.arguments( args=[ast.arg(arg=a, annotation=annotations.get(a)) for a in args], kwonlyargs=[ ast.arg(arg=a, annotation=annotations.get(a)) for a in kwonly ], defaults=defaults, kw_defaults=list(map(kw_defaults.get, kwonly)), vararg=None if varargs is None else ast.arg( arg=varargs, annotation=annotations.get(varargs), ), kwarg=None if varkwargs is None else ast.arg( arg=varkwargs, annotation=annotations.get(varkwargs) ), ) def make_closure_cells(stack_builders): cells = make_expr(stack_builders) if not isinstance(cells, ast.Tuple): raise DecompilationError( "Expected an ast.Tuple of closure cells, " "but got %s" % cells, ) return cells def make_global_and_nonlocal_decls(code_instrs): """ Find all STORE_GLOBAL and STORE_DEREF instructions in `instrs` and convert them into a canonical list of `ast.Global` and `ast.Nonlocal` declarations. """ globals_ = sorted(set( i.arg for i in code_instrs if isinstance(i, instrs.STORE_GLOBAL) )) nonlocals = sorted(set( i.arg for i in code_instrs if isinstance(i, instrs.STORE_DEREF) and i.vartype == 'free' )) out = [] if globals_: out.append(ast.Global(names=globals_)) if nonlocals: out.append(ast.Nonlocal(names=nonlocals)) return out def make_defaults_and_annotations(make_function_instr, builders): """ Get the AST expressions corresponding to the defaults, kwonly defaults, and annotations for a function created by `make_function_instr`. """ # Integer counts. n_defaults, n_kwonlydefaults, n_annotations = unpack_make_function_arg( make_function_instr.arg ) if n_annotations: # TOS should be a tuple of annotation names. load_annotation_names = builders.pop() annotations = dict(zip( reversed(load_annotation_names.arg), (make_expr(builders) for _ in range(n_annotations - 1)) )) else: annotations = {} kwonlys = {} while n_kwonlydefaults: default_expr = make_expr(builders) key_instr = builders.pop() if not isinstance(key_instr, instrs.LOAD_CONST): raise DecompilationError( "kwonlydefault key is not a LOAD_CONST: %s" % key_instr ) if not isinstance(key_instr.arg, str): raise DecompilationError( "kwonlydefault key builder is not a " "'LOAD_CONST of a string: %s" % key_instr ) kwonlys[key_instr.arg] = default_expr n_kwonlydefaults -= 1 defaults = make_exprs(builders, n_defaults) return defaults, kwonlys, annotations def unpack_make_function_arg(arg): """ Unpack the argument to a MAKE_FUNCTION instruction. Parameters ---------- arg : int The argument to a MAKE_FUNCTION instruction. Returns ------- num_defaults, num_kwonly_default_pairs, num_annotations See Also -------- https://docs.python.org/3/library/dis.html#opcode-MAKE_FUNCTION """ return arg & 0xFF, (arg >> 8) & 0xFF, (arg >> 16) & 0x7FFF def _check_make_function_instrs(load_code_instr, load_name_instr, make_function_instr, *, expect_lambda=False): """ Validate the instructions passed to a make_function call. """ # Validate load_code_instr. if not isinstance(load_code_instr, instrs.LOAD_CONST): raise TypeError( "make_function expected 'load_code_instr` to be a " "LOAD_CONST, but got %s" % load_code_instr, ) if not isinstance(load_code_instr.arg, types.CodeType): raise TypeError( "make_function expected load_code_instr " "to load a code object, but got %s" % load_code_instr.arg, ) # Validate load_name_instr if not isinstance(load_name_instr, instrs.LOAD_CONST): raise TypeError( "make_function expected 'load_name_instr` to be a " "LOAD_CONST, but got %s" % load_code_instr, ) if not isinstance(load_name_instr.arg, str): raise TypeError( "make_function expected load_name_instr " "to load a string, but got %r instead" % load_name_instr.arg ) # This is an endswith rather than '==' because the arg is the # fully-qualified name. is_lambda = is_lambda_name(load_name_instr.arg) if expect_lambda and not is_lambda: raise ValueError( "Expected to make a function named , but " "got %r instead." % load_name_instr.arg ) if not expect_lambda and is_lambda: raise ValueError("Unexpectedly received lambda function.") # Validate make_function_instr if not isinstance(make_function_instr, (instrs.MAKE_FUNCTION, instrs.MAKE_CLOSURE)): raise TypeError( "make_function expected a MAKE_FUNCTION or MAKE_CLOSURE" "instruction, but got %s instead." % make_function_instr ) def pop_arguments(instr, stack): """ Pop instructions off `stack` until we pop all instructions that will produce values popped by `instr`. """ needed = instr.stack_effect if needed >= 0: raise DecompilationError( "%s is does not have a negative stack effect" % instr ) for popcount, to_pop in enumerate(reversed(stack), start=1): needed += to_pop.stack_effect if not needed: break else: raise DecompilationError( "Reached end of stack without finding inputs to %s" % instr, ) popped = stack[-popcount:] stack[:] = stack[:-popcount] return popped def _check_stack_for_module_return(stack): """ Verify that the stack is in the expected state before the dummy RETURN_VALUE instruction of a module or class. """ fail = ( len(stack) != 1 or not isinstance(stack[0], instrs.LOAD_CONST) or stack[0].arg is not None ) if fail: raise DecompilationError( "Reached end of non-function code " "block with unexpected stack: %s." % stack ) def expect(instr, expected, context): """ Check that an instruction is of the expected type. """ if not isinstance(instr, expected): raise DecompilationError( "Expected a {expected} instruction {context}. Got {instr}.".format( instr=instr, expected=expected, context=context, ) ) return instr def is_lambda_name(name): """ Check if `name` is the name of lambda function. """ return name.endswith('') def popwhile(cond, queue, *, side): """ Pop elements off a queue while `cond(nextelem)` is True. Parameters ---------- cond : predicate queue : deque side : {'left', 'right'} Returns ------- popped : deque Examples -------- >>> from collections import deque >>> d = deque([1, 2, 3, 2, 1]) >>> popwhile(lambda x: x < 3, d, side='left') deque([1, 2]) >>> d deque([3, 2, 1]) >>> popwhile(lambda x: x < 3, d, side='right') deque([2, 1]) >>> d deque([3]) """ if side not in ('left', 'right'): raise ValueError("`side` must be one of 'left' or 'right'") out = deque() if side == 'left': popnext = queue.popleft pushnext = out.append nextidx = 0 else: popnext = queue.pop pushnext = out.appendleft nextidx = -1 while queue: if not cond(queue[nextidx]): break pushnext(popnext()) return out def _current_test(): """ Get the string passed to the currently running call to `test_decompiler.check.` This is intended for use in debugging tests. It should never be called in real code. """ from codetransformer.tests.test_decompiler import _current_test as ct return ct ================================================ FILE: codetransformer/decompiler/__init__.py ================================================ import sys from ..code import Flag def paramnames(co): """ Get the parameter names from a pycode object. Returns a 4-tuple of (args, kwonlyargs, varargs, varkwargs). varargs and varkwargs will be None if the function doesn't take *args or **kwargs, respectively. """ flags = co.co_flags varnames = co.co_varnames argcount, kwonlyargcount = co.co_argcount, co.co_kwonlyargcount total = argcount + kwonlyargcount args = varnames[:argcount] kwonlyargs = varnames[argcount:total] varargs, varkwargs = None, None if flags & Flag.CO_VARARGS: varargs = varnames[total] total += 1 if flags & Flag.CO_VARKEYWORDS: varkwargs = varnames[total] return args, kwonlyargs, varargs, varkwargs if sys.version_info[:3] == (3, 4, 3): from ._343 import * # noqa ================================================ FILE: codetransformer/instructions.py ================================================ from abc import ABCMeta, abstractmethod from dis import opname, opmap, hasjabs, hasjrel, HAVE_ARGUMENT, stack_effect from enum import ( IntEnum, unique, ) from operator import attrgetter from re import escape from .patterns import matchable from .utils.immutable import immutableattr from .utils.no_default import no_default __all__ = ['Instruction'] + sorted(list(opmap)) # The instructions that use the co_names tuple. _uses_name = frozenset({ 'DELETE_ATTR', 'DELETE_GLOBAL', 'DELETE_NAME', 'IMPORT_FROM', 'IMPORT_NAME', 'LOAD_ATTR', 'LOAD_GLOBAL', 'LOAD_NAME', 'STORE_ATTR', 'STORE_GLOBAL', 'STORE_NAME', }) # The instructions that use the co_varnames tuple. _uses_varname = frozenset({ 'LOAD_FAST', 'STORE_FAST', 'DELETE_FAST', }) # The instructions that use the co_freevars tuple. _uses_free = frozenset({ 'DELETE_DEREF', 'LOAD_CLASSDEREF', 'LOAD_CLOSURE', 'LOAD_DEREF', 'STORE_DEREF', }) def _notimplemented(name): @property @abstractmethod def _(self): raise NotImplementedError(name) return _ @property def _vartype(self): try: return self._vartype except AttributeError: raise AttributeError( "vartype is not available on instructions " "constructed outside of a Code object." ) class InstructionMeta(ABCMeta, matchable): _marker = object() # sentinel _type_cache = {} def __init__(self, *args, opcode=None): return super().__init__(*args) def __new__(mcls, name, bases, dict_, *, opcode=None): try: return mcls._type_cache[opcode] except KeyError: pass if len(bases) != 1: raise TypeError( '{} does not support multiple inheritance'.format( mcls.__name__, ), ) if bases[0] is mcls._marker: dict_['_reprname'] = immutableattr(name) for attr in ('absjmp', 'have_arg', 'opcode', 'opname', 'reljmp'): dict_[attr] = _notimplemented(attr) return super().__new__(mcls, name, (object,), dict_) if opcode not in opmap.values(): raise TypeError('Invalid opcode: {}'.format(opcode)) opname_ = opname[opcode] dict_['opname'] = dict_['_reprname'] = immutableattr(opname_) dict_['opcode'] = immutableattr(opcode) absjmp = opcode in hasjabs reljmp = opcode in hasjrel dict_['absjmp'] = immutableattr(absjmp) dict_['reljmp'] = immutableattr(reljmp) dict_['is_jmp'] = immutableattr(absjmp or reljmp) dict_['uses_name'] = immutableattr(opname_ in _uses_name) dict_['uses_varname'] = immutableattr(opname_ in _uses_varname) dict_['uses_free'] = immutableattr(opname_ in _uses_free) if opname_ in _uses_free: dict_['vartype'] = _vartype dict_['have_arg'] = immutableattr(opcode >= HAVE_ARGUMENT) cls = mcls._type_cache[opcode] = super().__new__( mcls, opname[opcode], bases, dict_, ) return cls def mcompile(self): return escape(bytes((self.opcode,))) def __repr__(self): return self._reprname __str__ = __repr__ class Instruction(InstructionMeta._marker, metaclass=InstructionMeta): """ Base class for all instruction types. Parameters ---------- arg : any, optional The argument for the instruction. This should be the actual value of the argument, for example, if this is a :class:`~codetransformer.instructions.LOAD_CONST`, use the constant value, not the index that would appear in the bytecode. """ _no_arg = no_default def __init__(self, arg=_no_arg): if self.have_arg and arg is self._no_arg: raise TypeError( "{} missing 1 required argument: 'arg'".format(self.opname), ) self.arg = self._normalize_arg(arg) self._target_of = set() self._stolen_by = None # used for lnotab recalculation def __repr__(self): arg = self.arg return '{op}{arg}'.format( op=self.opname, arg='(%r)' % arg if self.arg is not self._no_arg else '', ) @staticmethod def _normalize_arg(arg): return arg def steal(self, instr): """Steal the jump index off of `instr`. This makes anything that would have jumped to `instr` jump to this Instruction instead. Parameters ---------- instr : Instruction The instruction to steal the jump sources from. Returns ------- self : Instruction The instruction that owns this method. Notes ----- This mutates self and ``instr`` inplace. """ instr._stolen_by = self for jmp in instr._target_of: jmp.arg = self self._target_of = instr._target_of instr._target_of = set() return self @classmethod def from_opcode(cls, opcode, arg=_no_arg): """ Create an instruction from an opcode and raw argument. Parameters ---------- opcode : int Opcode for the instruction to create. arg : int, optional The argument for the instruction. Returns ------- intsr : Instruction An instance of the instruction named by ``opcode``. """ return type(cls)(opname[opcode], (cls,), {}, opcode=opcode)(arg) @property def stack_effect(self): """ The net effect of executing this instruction on the interpreter stack. Instructions that pop values off the stack have negative stack effect equal to the number of popped values. Instructions that push values onto the stack have positive stack effect equal to the number of popped values. Examples -------- - LOAD_{FAST,NAME,GLOBAL,DEREF} push one value onto the stack. They have a stack_effect of 1. - POP_JUMP_IF_{TRUE,FALSE} always pop one value off the stack. They have a stack effect of -1. - BINARY_* instructions pop two instructions off the stack, apply a binary operator, and push the resulting value onto the stack. They have a stack effect of -1 (-2 values consumed + 1 value pushed). """ if self.opcode == NOP.opcode: # noqa # dis.stack_effect is broken here return 0 return stack_effect( self.opcode, *((self.arg if isinstance(self.arg, int) else 0,) if self.have_arg else ()) ) def equiv(self, instr): """Check equivalence of instructions. This checks against the types and the arguments of the instructions Parameters ---------- instr : Instruction The instruction to check against. Returns ------- is_equiv : bool If the instructions are equivalent. Notes ----- This is a separate concept from instruction identity. Two separate instructions can be equivalent without being the same exact instance. This means that two equivalent instructions can be at different points in the bytecode or be targeted by different jumps. """ return type(self) == type(instr) and self.arg == instr.arg class _RawArg(int): """A class to hold arguments that are not yet initialized so that they don't break subclass's type checking code. This is used in the first pass of instruction creating in Code.from_pycode. """ def _mk_call_init(class_): """Create an __init__ function for a call type instruction. Parameters ---------- class_ : type The type to bind the function to. Returns ------- __init__ : callable The __init__ method for the class. """ def __init__(self, packed=no_default, *, positional=0, keyword=0): if packed is no_default: arg = int.from_bytes(bytes((positional, keyword)), 'little') elif not positional and not keyword: arg = packed else: raise TypeError('cannot specify packed and unpacked arguments') self.positional, self.keyword = arg.to_bytes(2, 'little') super(class_, self).__init__(arg) return __init__ def _call_repr(self): return '%s(positional=%d, keyword=%d)' % ( type(self).__name__, self.positional, self.keyword, ) def _check_jmp_arg(self, arg): if not isinstance(arg, (Instruction, _RawArg)): raise TypeError( 'argument to %s must be an instruction, got: %r' % ( type(self).__name__, arg, ), ) if isinstance(arg, Instruction): arg._target_of.add(self) return arg class CompareOpMeta(InstructionMeta): """ Special-case metaclass for the COMPARE_OP instruction type that provides default constructors for the various kinds of comparisons. These default constructors are implemented as descriptors so that we can write:: new_compare = COMPARE_OP.LT and have it be equivalent to:: new_compare = COMPARE_OP(COMPARE_OP.comparator.LT) """ @unique class comparator(IntEnum): LT = 0 LE = 1 EQ = 2 NE = 3 GT = 4 GE = 5 IN = 6 NOT_IN = 7 IS = 8 IS_NOT = 9 EXCEPTION_MATCH = 10 def __repr__(self): return '' % ( self.__class__.__name__, self._name_, self._value_, ) class ComparatorDescr: """ A descriptor on the **metaclass** of COMPARE_OP that constructs new instances of COMPARE_OP on attribute access. Parameters ---------- op : comparator The element of the `comparator` enum that this descriptor will forward to the COMPARE_OP constructor. """ def __init__(self, op): self._op = op def __get__(self, instance, owner): # Since this descriptor is added to the current metaclass, # ``instance`` here is the COMPARE_OP **class**. if instance is None: # If someone does `CompareOpMeta.LT`, give them back the # descriptor object itself. return self # If someone does `COMPARE_OP.LT`, return a **new instance** of # COMPARE_OP. # We create new instances so that consumers can take ownership # without worrying about other jumps targeting the new instruction. return instance(self._op) # Dynamically add an instance of ComparatorDescr for each comparator # opcode. # This is equivalent to doing: # LT = ComparatorDescr(comparator.LT) # GT = ComparatorDescr(comparator.GT) # ... for c in comparator: locals()[c._name_] = ComparatorDescr(c) del c del ComparatorDescr metamap = { 'COMPARE_OP': CompareOpMeta, } globals_ = globals() for name, opcode in opmap.items(): globals_[name] = class_ = metamap.get(name, InstructionMeta)( opname[opcode], (Instruction,), { '__module__': __name__, '__qualname__': '.'.join((__name__, name)), }, opcode=opcode, ) if name.startswith('CALL_FUNCTION'): class_.__init__ = _mk_call_init(class_) class_.__repr__ = _call_repr if name == 'COMPARE_OP': class_._normalize_arg = staticmethod(class_.comparator) if class_.is_jmp: class_._normalize_arg = _check_jmp_arg class_.__doc__ = ( """ See Also -------- dis.{name} """.format(name=name), ) del class_ # Clean up the namespace del name del globals_ del metamap del _check_jmp_arg del _call_repr del _mk_call_init # The instructions that use the co_names tuple. uses_name = frozenset( filter(attrgetter('uses_name'), Instruction.__subclasses__()), ) # The instructions that use the co_varnames tuple. uses_varname = frozenset( filter(attrgetter('uses_varname'), Instruction.__subclasses__()), ) # The instructions that use the co_freevars tuple. uses_free = frozenset( filter(attrgetter('uses_free'), Instruction.__subclasses__()), ) ================================================ FILE: codetransformer/patterns.py ================================================ from operator import methodcaller, index, attrgetter import re from types import MethodType from .utils.instance import instance from .utils.immutable import immutable #: The default startcode for patterns. DEFAULT_STARTCODE = 0 mcompile = methodcaller('mcompile') def _prepr(m): if isinstance(m, or_): return '(%r)' % m return repr(m) def coerce_ellipsis(p): """Convert ... into a matchany """ if p is ...: return matchany return p class matchable: """Mixin for defining the operators on patterns. """ def __or__(self, other): other = coerce_ellipsis(other) if self is other: return self if not isinstance(other, matchable): return NotImplemented patterns = [] if isinstance(self, or_): patterns.extend(self.matchables) else: patterns.append(self) if isinstance(other, or_): patterns.extend(other.matchables) else: patterns.append(other) return or_(*patterns) def __ror__(self, other): # Flip the order on the or method if not isinstance(other, matchable): return NotImplemented return type(self).__or__(coerce_ellipsis(other), self) def __invert__(self): return not_(self) def __getitem__(self, key): try: n = index(key) except TypeError: pass else: return matchrange(self, n) if isinstance(key, tuple) and len(key) in (1, 2): return matchrange(self, *key) if isinstance(key, modifier): return postfix_modifier(self, key) raise TypeError('invalid modifier: {0}'.format(key)) class postfix_modifier(immutable, matchable): """A pattern with a modifier paired with it. """ __slots__ = 'matchable', 'modifier' def mcompile(self): return self.matchable.mcompile() + self.modifier.mcompile() def __repr__(self): return '%r[%r]' % (self.matchable, self.modifier) __str__ = __repr__ class meta(matchable): """Class for meta patterns and pattern likes. for example: ``matchany``. """ def mcompile(self): return self._token def __repr__(self): return self._token.decode('utf-8') __str__ = __repr__ class modifier(meta): """Marker class for modifier types. """ pass @instance class var(modifier): """Modifier that matches zero or more of a pattern. """ _token = b'*' @instance class plus(modifier): """Modifier that matches one or more of a pattern. """ _token = b'+' @instance class option(modifier): """Modifier that matches zero or one of a pattern. """ _token = b'?' class matchrange(immutable, meta, defaults={'m': None}): __slots__ = 'matchable', 'n', 'm' def mcompile(self): m = self.m return ( self.matchable.mcompile() + b'{' + bytes(str(self.n), 'utf-8') + b',' + (b'' if m is None else (b', ' + bytes(str(m), 'utf-8'))) + b'}' ) def __repr__(self): return '{matchable}[{args}]'.format( matchable=_prepr(self.matchable), args=', '.join(map(str, filter(bool, (self.n, self.m)))), ) @instance class matchany(meta): """Matchable that matches any instruction. """ _token = b'.' def __repr__(self): return '...' class seq(immutable, matchable): """A sequence of matchables to match in order. Parameters ---------- \*matchables : iterable of matchable The matchables to match against. """ __slots__ = 'matchables', def __new__(cls, *matchables): if not matchables: raise TypeError('cannot create an empty sequence') if len(matchables) == 1: return coerce_ellipsis(matchables[0]) return super().__new__(cls) def __init__(self, *matchables): self.matchables = tuple(map(coerce_ellipsis, matchables)) def mcompile(self): return b''.join(map(mcompile, self.matchables)) def __repr__(self): return '{cls}({args})'.format( cls=type(self).__name__, args=', '.join(map(_prepr, self.matchables)) ) class or_(immutable, matchable): """Logical or of multiple matchables. Parameters ---------- *matchables : iterable of matchable The matchables to or together. """ __slots__ = '*matchables', def mcompile(self): return b'(' + b'|'.join(map(mcompile, self.matchables)) + b')' def __repr__(self): return ' | '.join(map(_prepr, self.matchables)) class not_(immutable, matchable): """Logical not of a matchable. """ __slots__ = 'matchable', def mcompile(self): matchable = self.matchable if isinstance(matchable, (seq, or_, not_)): return b'((?!(' + matchable.mcompile() + b')).)*' return b'[^' + matchable.mcompile() + b']' def __repr__(self): return '~' + _prepr(self.matchable) class pattern(immutable): """ A pattern of instructions that can be matched against. This class is intended to be used as a decorator on methods of CodeTransformer subclasses. It is used to mark that a given method should be called on sequences of instructions that match the pattern described by the inputs. Parameters ---------- \*matchables : iterable of matchable The type of instructions to match against. startcodes : container of any The startcodes where this pattern should be tried. Examples -------- Match a single BINARY_ADD instruction:: pattern(BINARY_ADD) Match a single BINARY_ADD followed by a RETURN_VALUE:: pattern(BINARY_ADD, RETURN_VALUE) Match a single BINARY_ADD followed by any other single instruction:: pattern(BINARY_ADD, matchany) Match a single BINARY_ADD followed by any number of instructions:: pattern(BINARY_ADD, matchany[var]) """ __slots__ = 'matchable', 'startcodes', '_compiled' def __init__(self, *matchables, startcodes=(DEFAULT_STARTCODE,)): if not matchables: raise TypeError('expected at least one matchable') self.matchable = matchable = seq(*matchables) self.startcodes = startcodes self._compiled = re.compile(matchable.mcompile()) def __call__(self, f): return boundpattern(self._compiled, self.startcodes, f) def __repr__(self): return '{cls}(matchable={m!r}, startcodes={s})'.format( cls=type(self).__name__, m=self.matchable, s=self.startcodes, ) class boundpattern(immutable): """A pattern bound to a function. """ __slots__ = '_compiled', '_startcodes', '_f' def __get__(self, instance, owner): if instance is None: return self return type(self)( self._compiled, self._startcodes, MethodType(self._f, instance) ) def __call__(self, compiled_instrs, instrs, startcode): if startcode not in self._startcodes: raise NoMatch(compiled_instrs, startcode) match = self._compiled.match(compiled_instrs) if match is None or match.end is 0: raise NoMatch(compiled_instrs, startcode) mend = match.end() return self._f(*instrs[:mend]), mend class NoMatch(Exception): """Indicates that there was no match found in this dispatcher. """ pass class patterndispatcher(immutable): """A set of patterns that can dispatch onto instrs. """ __slots__ = '*patterns', def __get__(self, instance, owner): if instance is None: return self return boundpatterndispatcher( instance, *map( methodcaller('__get__', instance, owner), self.patterns, ) ) class boundpatterndispatcher(immutable): """A set of patterns bound to a transformer. """ __slots__ = 'transformer', '*patterns' def _dispatch(self, compiled_instrs, instrs, startcode): for p in self.patterns: try: return p(compiled_instrs, instrs, startcode) except NoMatch: pass raise NoMatch(instrs, startcode) def __call__(self, instrs): opcodes = bytes(map(attrgetter('opcode'), instrs)) idx = 0 # The current index into the pre-transformed instrs. post_transform = [] # The instrs that have been transformed. transformer = self.transformer while idx < len(instrs): try: processed, nconsumed = self._dispatch( opcodes[idx:], instrs[idx:], # NOTE: do not remove this attribute access # self._dispatch can mutate the value of the startcode transformer.startcode, ) except NoMatch: post_transform.append(instrs[idx]) idx += 1 else: post_transform.extend(processed) idx += nconsumed return tuple(post_transform) ================================================ FILE: codetransformer/tests/__init__.py ================================================ ================================================ FILE: codetransformer/tests/test_code.py ================================================ from dis import dis from io import StringIO from itertools import product, chain import random import sys import pytest from codetransformer.code import Code, Flag, pycode from codetransformer.instructions import LOAD_CONST, LOAD_FAST, uses_free @pytest.fixture(scope='module') def sample_flags(request): random.seed(8025816322119661921) # ayy lmao nflags = len(Flag.__members__) return tuple( dict(zip(Flag.__members__.keys(), case)) for case in chain( random.sample(list(product((True, False), repeat=nflags)), 1000), [[True] * nflags], [[False] * nflags], ) ) def test_lnotab_roundtrip(): # DO NOT ADD EXTRA LINES HERE def f(): # pragma: no cover a = 1 b = 2 c = 3 d = 4 a, b, c, d start_line = test_lnotab_roundtrip.__code__.co_firstlineno + 3 lines = [start_line + n for n in range(5)] code = Code.from_pycode(f.__code__) lnotab = code.lnotab assert lnotab.keys() == set(lines) assert isinstance(lnotab[lines[0]], LOAD_CONST) assert lnotab[lines[0]].arg == 1 assert isinstance(lnotab[lines[1]], LOAD_CONST) assert lnotab[lines[1]].arg == 2 assert isinstance(lnotab[lines[2]], LOAD_CONST) assert lnotab[lines[2]].arg == 3 assert isinstance(lnotab[lines[3]], LOAD_CONST) assert lnotab[lines[3]].arg == 4 assert isinstance(lnotab[lines[4]], LOAD_FAST) assert lnotab[lines[4]].arg == 'a' assert f.__code__.co_lnotab == code.py_lnotab == code.to_pycode().co_lnotab def test_lnotab_really_dumb_whitespace(): ns = {} exec('def f():\n lol = True' + '\n' * 1024 + ' wut = True', ns) f = ns['f'] code = Code.from_pycode(f.__code__) lines = [2, 1026] lnotab = code.lnotab assert lnotab.keys() == set(lines) assert isinstance(lnotab[lines[0]], LOAD_CONST) assert lnotab[lines[0]].arg assert isinstance(lnotab[lines[1]], LOAD_CONST) assert lnotab[lines[1]].arg assert f.__code__.co_lnotab == code.py_lnotab == code.to_pycode().co_lnotab def test_flag_packing(sample_flags): for flags in sample_flags: assert Flag.unpack(Flag.pack(**flags)) == flags def test_flag_unpack_too_big(): assert all(Flag.unpack(Flag.max).values()) with pytest.raises(ValueError): Flag.unpack(Flag.max + 1) def test_flag_max(): assert Flag.pack( CO_OPTIMIZED=True, CO_NEWLOCALS=True, CO_VARARGS=True, CO_VARKEYWORDS=True, CO_NESTED=True, CO_GENERATOR=True, CO_NOFREE=True, CO_COROUTINE=True, CO_ITERABLE_COROUTINE=True, CO_FUTURE_DIVISION=True, CO_FUTURE_ABSOLUTE_IMPORT=True, CO_FUTURE_WITH_STATEMENT=True, CO_FUTURE_PRINT_FUNCTION=True, CO_FUTURE_UNICODE_LITERALS=True, CO_FUTURE_BARRY_AS_BDFL=True, CO_FUTURE_GENERATOR_STOP=True, ) == Flag.max def test_flag_max_immutable(): with pytest.raises(AttributeError): Flag.CO_OPTIMIZED.max = None def test_code_multiple_varargs(): with pytest.raises(ValueError) as e: Code( (), ( '*args', '*other', ), ) assert str(e.value) == 'cannot specify *args more than once' def test_code_multiple_kwargs(): with pytest.raises(ValueError) as e: Code( (), ( '**kwargs', '**kwargs', ), ) assert str(e.value) == 'cannot specify **kwargs more than once' @pytest.mark.parametrize('cls', uses_free) def test_dangling_var(cls): instr = cls('dangling') with pytest.raises(ValueError) as e: Code((instr,)) assert ( str(e.value) == "Argument to %r is not in cellvars or freevars." % instr ) def test_code_flags(sample_flags): attr_map = { 'CO_NESTED': 'is_nested', 'CO_GENERATOR': 'is_generator', 'CO_COROUTINE': 'is_coroutine', 'CO_ITERABLE_COROUTINE': 'is_iterable_coroutine', 'CO_NEWLOCALS': 'constructs_new_locals', } for flags in sample_flags: if sys.version_info < (3, 6): codestring = b'd\x00\x00S' # return None else: codestring = b'd\x00S' # return None code = Code.from_pycode(pycode( argcount=0, kwonlyargcount=0, nlocals=2, stacksize=0, flags=Flag.pack(**flags), codestring=codestring, constants=(None,), names=(), varnames=('a', 'b'), filename='', name='', firstlineno=0, lnotab=b'', )) assert code.flags == flags for flag, attr in attr_map.items(): if flags[flag]: assert getattr(code, attr) @pytest.fixture def abc_code(): a = LOAD_CONST('a') b = LOAD_CONST('b') c = LOAD_CONST('c') # not in instrs code = Code((a, b), argnames=()) return (a, b, c), code def test_instr_index(abc_code): (a, b, c), code = abc_code assert code.index(a) == 0 assert code.index(b) == 1 with pytest.raises(ValueError): code.index(c) def test_code_contains(abc_code): (a, b, c), code = abc_code assert a in code assert b in code assert c not in code def test_code_dis(capsys): @Code.from_pyfunc def code(): # pragma: no cover a = 1 b = 2 return a, b buf = StringIO() dis(code.to_pycode(), file=buf) expected = buf.getvalue() code.dis() out, err = capsys.readouterr() assert not err assert out == expected buf = StringIO() code.dis(file=buf) assert buf.getvalue() == expected ================================================ FILE: codetransformer/tests/test_core.py ================================================ import pytest import toolz.curried.operator as op from codetransformer import CodeTransformer, Code, pattern from codetransformer.core import Context, NoContext from codetransformer.instructions import Instruction from codetransformer.patterns import DEFAULT_STARTCODE from codetransformer.utils.instance import instance def test_inherit_patterns(): class C(CodeTransformer): matched = False @pattern(...) def _(self, instr): self.matched = True yield instr class D(C): pass d = D() assert not d.matched @d def f(): pass assert d.matched def test_override_patterns(): class C(CodeTransformer): matched_super = False matched_sub = False @pattern(...) def _(self, instr): self.matched_super = True yield instr class D(C): @pattern(...) def _(self, instr): self.matched_sub = True yield instr d = D() assert not d.matched_super assert not d.matched_sub @d def f(): pass assert d.matched_sub assert not d.matched_super def test_updates_lnotab(): @instance class c(CodeTransformer): @pattern(...) def _(self, instr): yield type(instr)(instr.arg).steal(instr) def f(): # pragma: no cover # this function has irregular whitespace for testing the lnotab a = 1 # intentional line b = 2 # intentional line c = 3 # intentional line return a, b, c original = Code.from_pyfunc(f) post_transform = c.transform(original) # check that something happened assert original.lnotab != post_transform.lnotab # check that we preserved the line numbers assert ( original.lnotab.keys() == post_transform.lnotab.keys() == set(map(op.add(original.firstlineno), (2, 4, 6, 8))) ) def sorted_instrs(lnotab): order = sorted(lnotab.keys()) for idx in order: yield lnotab[idx] # check that the instrs are correct assert all(map( Instruction.equiv, sorted_instrs(original.lnotab), sorted_instrs(post_transform.lnotab), )) # sanity check that the function is correct assert f() == c(f)() def test_context(): def f(): # pragma: no cover pass code = Code.from_pyfunc(f) c = Context(code) # check default attributes assert c.code is code assert c.startcode == DEFAULT_STARTCODE # check that the object acts like a namespace c.attr = 'test' assert c.attr == 'test' def test_no_context(): @instance class c(CodeTransformer): pass with pytest.raises(NoContext) as e: c.context assert str(e.value) == 'no active transformation context' ================================================ FILE: codetransformer/tests/test_decompiler.py ================================================ """ Tests for decompiler.py """ from ast import AST, iter_fields, Module, parse from functools import partial from itertools import product, zip_longest, combinations_with_replacement import sys from textwrap import dedent import pytest from toolz.curried.operator import add from codetransformer import a as show # noqa _343 = sys.version_info[:3] == (3, 4, 3) pytestmark = pytest.mark.skipif( not _343, reason='decompiler only runs on 3.4', ) if _343: from ..decompiler import ( DecompilationContext, decompile, paramnames, pycode_to_body, ) _current_test = None def make_indented_body(body_str): """ Helper for generating an indented string to use as the body of a function. """ return '\n'.join( map( add(" "), dedent(body_str).splitlines(), ) ) def compare(computed, expected): """ Assert that two AST nodes are the same. """ assert type(computed) == type(expected) if isinstance(computed, list): for cv, ev in zip_longest(computed, expected): compare(cv, ev) return if not isinstance(computed, AST): assert computed == expected return for (cn, cv), (en, ev) in zip_longest(*map(iter_fields, (computed, expected))): assert cn == en compare(cv, ev) def check(text, ast_text=None): """ Check that compiling and disassembling `text` produces the same AST tree as calling ast.parse on `ast_text`. If `ast_text` is not passed, use `text` for both. """ global _current_test _current_test = text if ast_text is None: ast_text = text ast = parse(ast_text) code = compile(text, '', 'exec') decompiled_ast = Module( body=pycode_to_body(code, DecompilationContext()), ) compare(decompiled_ast, ast) def check_formatted(text, ast_text=None, **fmt_kwargs): text = text.format(**fmt_kwargs) if ast_text is not None: ast_text = ast_text.format(**fmt_kwargs) check(text, ast_text) # Bodies for for/while loops. LOOP_BODIES = tuple(map( '\n'.join, combinations_with_replacement( [ "x = 1", "break", "continue", dedent( """\ while u + v: w = z """, ), dedent( """\ for u in v: w = z """, ), ], 3, ), )) # Bodies for for-else/while-else blocks. ORELSE_BODIES = ["", "x = 3"] # LHS of assignment, or bindings in a for-loop. NAME_BINDS = [ "a", "(a, b)", "(a,)", "a, ((b, c, d), (e, f))", ] def test_decompile(): def foo(a, b, *, c): return a + b + c decompiled = decompile(foo) # NOTE: We can't reliably match the ast for defaults and annotations, since # we can't tell how they were defined. s = dedent( """ def foo(a, b, *, c): return a + b + c """ ) compiled = parse(s) compare(decompiled, compiled.body[0]) def test_trivial_expr(): check("a") @pytest.mark.parametrize( 'lhs,rhs', product(NAME_BINDS, ['x', 'x.y() + z.w()']), ) def test_assign(lhs, rhs): check("{lhs} = {rhs}".format(lhs=lhs, rhs=rhs)) def test_unpack_to_attribute(): check("((a.b, c.d.e), f) = g") check("((a[b], c[d][e]), f) = g") check("((a[b].c, d.e[f]), g) = h") def test_chained_assign(): check("a = b = c = d") check("a.b = (c,) = d[e].f = g") check("a.b = (c, d[e].f) = g") def test_unary_not(): check("a = not b") check("a = not not b") check("a = not ((not a) + b)") @pytest.mark.parametrize( 'op', [ '+', '-', '*', '**', '/', '//', '%', '<<', '>>', '&', '^', '|', ] ) def test_binary_ops(op): check("a {op} b".format(op=op)) check("a = b {op} c".format(op=op)) check("a = (b {op} c) {op} d".format(op=op)) check("a = b {op} (c {op} d)".format(op=op)) def test_string_literal(): # A string literal as the first expression in a module generates a # STORE_NAME to __doc__. We can't tell the difference between this and an # actual assignment to __doc__. check("'a'", "__doc__ = 'a'") check("'abc'", "__doc__ = 'abc'") check("a = 'a'") check("a = u'a'") def test_bytes_literal(): check("b'a'") check("b'abc'") check("a = b'a'") def test_int_literal(): check("1", "") # This gets constant-folded out check("a = 1") check("a = 1 + b") check("a = b + 1") def test_float_literal(): check('1.0', "") # This gets constant-folded out check("a = 1.0") check("a = 1.0 + b") check("a = b + 1.0") def test_complex_literal(): check('1.0j', "") # This gets constant-folded out check("a = 1.0j") check("a = 1.0j + b") check("a = b + 1.0j") def test_tuple_literals(): check("()") check("(1,)") check("(a,)") check("(1, a)") check("(1, 'a')") check("((1,), a)") check("((1,(b,)), a)") def test_set_literals(): check("{1}") check("{1, 'a'}") check("a = {1, 'a'}") def test_list_literals(): check("[]") check("[1]") check("[a]") check("[[], [a, 1]]") def test_dict_literals(): check("{}") check("{a: b}") check("{a + a: b + b}") check("{a: b, c: d}") check("{1: 2, c: d}") check("{a: {b: c}, d: e}") check("{a: {b: {c: d}, e: {f: g}}}") check("{a: {b: [c, d, e]}}") check("a + {b: c}") def test_function_call(): check("f()") check("f(a, b, c=1, d=2)") check("f(*args)") check("f(a, b=1, *args)") check("f(**kwargs)") check("f(a, b=1, **kwargs)") check("f(*args, **kwargs)") check("f(a, b=1, *args, **kwargs)") check("(a + b)()") check("a().b.c.d()") def test_paramnames(): def foo(a, b): x = 1 return x args, kwonlyargs, varargs, varkwargs = paramnames(foo.__code__) assert args == ('a', 'b') assert kwonlyargs == () assert varargs is None assert varkwargs is None def bar(a, *, b): x = 1 return x args, kwonlyargs, varargs, varkwargs = paramnames(bar.__code__) assert args == ('a',) assert kwonlyargs == ('b',) assert varargs is None assert varkwargs is None def fizz(a, **kwargs): x = 1 return x args, kwonlyargs, varargs, varkwargs = paramnames(fizz.__code__) assert args == ('a',) assert kwonlyargs == () assert varargs is None assert varkwargs == 'kwargs' def buzz(a, b=1, *args, c, d=3, **kwargs): x = 1 return x args, kwonlyargs, varargs, varkwargs = paramnames(buzz.__code__) assert args == ('a', 'b') assert kwonlyargs == ('c', 'd') assert varargs == 'args' assert varkwargs == 'kwargs' @pytest.mark.parametrize( "signature,expr", product( [ "", "a", "a, b", "*a, b", "a, **b", "*a, **b", "a=1, b=2, c=3", "a, *, b=1, c=2, d=3", "a, b=1, c=2, *, d, e=3, f, g=4", "a, b=1, *args, c, d=2, **kwargs", "a, b=c + d, *, e=f + g", ], [ "a + b", "None", "lambda x: lambda y: lambda z: (x, y, z)", "[lambda x: a + b, 1]", "[(lambda y: a + b) + (lambda z: d + e), 1]", ], ), ) def test_lambda(signature, expr): check_formatted("lambda {sig}: {expr}", sig=signature, expr=expr) check_formatted("func = (lambda {sig}: {expr})", sig=signature, expr=expr) check_formatted( dedent( """ def foo(): return (lambda {sig}: {expr})() """ ), sig=signature, expr=expr, ) def test_simple_function(): check( dedent( """\ def foo(a, b): return a + b """ ) ) def test_annotations(): check( dedent( """\ def foo(a: b, c: d): return 3 """ ) ) check( dedent( """\ def foo(a: b, c=1, *args: d, e:f, g:h=i, **kwargs: j): return a + c """ ) ) check( dedent( """\ def foo(a: b * 3, c=1, *args: d, e:f, g:h=i, **kwargs: j) -> k: return a + c """ ) ) @pytest.mark.parametrize( "signature,body", product( [ "()", "(a)", "(a, b)", "(*a, b)", "(a, **b)", "(*a, **b)", "(a=1, b=2, c=3)", "(a, *, b=1, c=2, d=3)", "(a, b=1, c=2, *, d, e=3, f, g=4)", "(a, b=1, *args, c, d=2, **kwargs)", "(a, b=c + d, *, e=f + g)", ], [ """\ return a + b """, """\ x = 1 y = 2 return x + y """, """\ x = 3 def bar(m, n): global x x = 4 return m + n + x return None """, """\ def bar(): x = 3 def buzz(): nonlocal x x = 4 return x return x return None """ ], ), ) def test_function_signatures(signature, body): check( dedent( """\ def foo{signature}: {body} """ ).format(signature=signature, body=make_indented_body(body)) ) def test_decorators(): check( dedent( """ @decorator2 @decorator1() @decorator0.attr.attr def foo(a, b=1, *, c, d=2): @decorator3 def bar(c, d): x = 1 return None return None """ ) ) def test_store_twice_to_global(): check( dedent( """\ x = 3 def foo(): global x x = 4 x = 5 return None """ ) ) def test_store_twice_to_nonlocal(): check( dedent( """\ def foo(): x = 1 def bar(): nonlocal x x = 2 x = 3 return None return None """ ) ) def test_getattr(): check("a.b") check("a.b.c") check("a.b.c + a.b.c") check("(1).real") check("1..real") check("(a + b).c") check("a = b.c") def test_setattr(): check("a.b = c") check("a.b.c = d") check("a.b.c = d.e.f") check("(a + b).c = (d + e).f") def test_getitem(): check("a = b[c]") check("a = b[c:]") check("a = b[:c]") check("a = b[c::]") check("a = b[c:d]") check("a = b[c:d:e]") check("a = b[c, d]") check("a = b[c:, d]") check("a = b[c:d:e, f:g:h, i:j:k]") check("a = b[c + d][e]") def test_setitem(): check("a[b] = c") check("b[c:] = a") check("b[:c] = a") check("b[c::] = a") check("b[c:d] = a") check("b[c:d:e] = a") check("b[c, d] = a") check("b[c:, d] = a") check("b[c:d:e, f:g:h, i:j:k] = a") check("b[c + d][e] = a") @pytest.mark.parametrize( "loop,body,else_body", product( [ "for a in b:", "for a in b.c.d:", "for (a, (b, c), d) in e:" ], LOOP_BODIES, ORELSE_BODIES, ) ) def test_for(loop, body, else_body): check( dedent( """\ {loop} {body} {else_} {else_body} x = 4 """ ).format( loop=loop, body=make_indented_body(body), else_="else:" if else_body else "", else_body=make_indented_body(else_body) if else_body else "", ) ) @pytest.mark.parametrize( "condition,body,else_body", product( [ "a", "not a", "not not a", "a.b.c.d", "not a.b.c.d", "True", ], LOOP_BODIES, ORELSE_BODIES, ) ) def test_while(condition, body, else_body): check( dedent( """\ while {condition}: {body} {else_} {else_body} x = 4 """ ).format( condition=condition, body=make_indented_body(body), else_="else:" if else_body else "", else_body=make_indented_body(else_body) if else_body else "", ) ) def test_while_False(): # The peephole optimizer removes while blocks entirely. check( dedent( """\ while False: x = 1 y = 2 """ ), "" ) def test_import(): check("import a as b") check("import a.b as c") # These generate identical bytecode. check( "import a, b", dedent( """\ import a import b """ ) ) check("import a.b.c") # These generate identical bytecode. check( "import a.b.c as d, e.f.g as h", dedent( """ import a.b.c as d import e.f.g as h """ ) ) def test_import_from(): check("from a import b") check("from a import b, c as d, d") check("from a.b import c, d as e, f as g") def test_import_star(): check("from a import *") check("from a.b.c import *") def test_import_attribute_aliasing_module(): check("import a.b as a") def test_import_in_function(): check( dedent( """\ def foo(): import a.b.c as d from e.f import g return None """ ) ) check( dedent( """\ def foo(): global d, g import a.b.c as d from e.f import g return None """ ) ) check( dedent( """\ def foo(): d = None g = None def bar(): nonlocal d, g import a.b.c as d from e.f import g return None return None """ ) ) def test_with_block(): check( dedent( """ with a.b.c: c = d e = f() """ ) ) # Tests for various kinds of stores from the with assignment. check( dedent( """ with a as b: c = d """ ) ) check( dedent( """ def foo(): with a as b: c = d return None """ ) ) check( dedent( """ def foo(): global b with a as b: c = d return None """ ) ) check( dedent( """ def foo(): with a as b: def bar(): nonlocal b b = None return c return None """ ) ) def test_nested_with(): check( dedent( """ with a: with b: with c: x = 3 y = 4 z = 5 """ ) ) # This is indistinguishable in bytecode from: # with a: # with b: # with c as d: # e = f # We normalize the former to the latter. check( dedent( """ with a, b, c as d: e = f """ ), ) def test_simple_if(): check( dedent( """ if a: b = c x = "end" """ ) ) def test_if_return(): check( dedent( """ def f(): if a: return b return None """ ) ) def test_if_else(): check( dedent( """ if a: b = c else: b = d x = "end" """ ) ) @pytest.mark.parametrize( 'last_statement,prefix', product( ("", "x = 'end'"), ("not", "not not"), ), ) def test_if_elif(last_statement, prefix): check( dedent( """\ if {prefix} a: b = c elif d: e = f elif {prefix} g: h = i else: j = k {last_statement} """ ).format(prefix=prefix, last_statement=last_statement) ) check( dedent( """ if a: x = "before_b" if {prefix} b: x = "in_b" elif b: x = "in_elif_b" else: x = "else_b" w = "after_b" elif c: x = "in_c" else: x = "in_else" {last_statement} """ ).format(prefix=prefix, last_statement=last_statement) ) @pytest.mark.parametrize( 'op', ['and', 'or'], ) def test_boolops(op): check_ = partial(check_formatted, op=op) check_("a {op} b") check_("a {op} b {op} c") check_("a + (b {op} c)") check_("(a {op} b) + c") check_("(a + b) {op} (c + d)") check_("a + (b {op} c) + d") check_("a {op} (1 + (b {op} c))") @pytest.mark.parametrize( 'op', ['and', 'or'], ) def test_normalize_nested_boolops(op): check_ = partial(check_formatted, op=op) # These generate identical bytecode, but they're different at the AST # level. We normalize to minimally-nested form. check_("a {op} (b {op} c)", "a {op} b {op} c") check_("(a {op} b) {op} c", "a {op} b {op} c") check_("a {op} (b {op} (c {op} d))", "a {op} b {op} c {op} d") check_("((a {op} b) {op} c) {op} d", "a {op} b {op} c {op} d") check_("(a {op} b) {op} (c {op} d)", "a {op} b {op} c {op} d") check_("a {op} (b {op} c) {op} d", "a {op} b {op} c {op} d") def test_mixed_boolops(): check("a or b and c and d") ================================================ FILE: codetransformer/tests/test_instructions.py ================================================ from codetransformer.instructions import Instruction def test_repr_types(): assert repr(Instruction) == 'Instruction' for tp in Instruction.__subclasses__(): assert repr(tp) == tp.opname ================================================ FILE: codetransformer/transformers/__init__.py ================================================ from .constants import asconstants from .interpolated_strings import interpolated_strings from .pattern_matched_exceptions import pattern_matched_exceptions from .precomputed_slices import precomputed_slices from .literals import ( bytearray_literals, decimal_literals, haskell_strs, islice_literals, overloaded_complexes, overloaded_floats, overloaded_ints, overloaded_lists, overloaded_sets, overloaded_slices, overloaded_strs, overloaded_tuples, ) __all__ = [ 'asconstants', 'bytearray_literals', 'decimal_literals', 'haskell_strs', 'interpolated_strings', 'islice_literals', 'overloaded_complexes', 'overloaded_floats', 'overloaded_ints', 'overloaded_lists', 'overloaded_sets', 'overloaded_slices', 'overloaded_strs', 'overloaded_tuples', 'pattern_matched_exceptions', 'precomputed_slices', ] ================================================ FILE: codetransformer/transformers/add2mul.py ================================================ """ add2mul -------- A transformer that replaces BINARY_ADD instructions with BINARY_MULTIPLY instructions. This isn't useful, but it's good introductory example/tutorial material. """ from codetransformer import CodeTransformer, pattern from codetransformer.instructions import BINARY_ADD, BINARY_MULTIPLY class add2mul(CodeTransformer): @pattern(BINARY_ADD) def _add2mul(self, add_instr): yield BINARY_MULTIPLY().steal(add_instr) ================================================ FILE: codetransformer/transformers/constants.py ================================================ import builtins from ..core import CodeTransformer from ..instructions import ( DELETE_DEREF, DELETE_FAST, DELETE_GLOBAL, DELETE_NAME, LOAD_CLASSDEREF, LOAD_CONST, LOAD_DEREF, LOAD_GLOBAL, LOAD_NAME, STORE_DEREF, STORE_FAST, STORE_GLOBAL, STORE_NAME, ) from ..patterns import pattern def _assign_or_del(type_): assert type_ in ('assign to', 'delete') def handler(self, instr): name = instr.arg if name not in self._constnames: yield instr return code = self.code filename = code.filename lno = code.lno_of_instr[instr] try: with open(filename) as f: line = f.readlines()[lno - 1] except IOError: line = '???' raise SyntaxError( "can't %s constant name %r" % (type_, name), (filename, lno, len(line), line), ) return handler class asconstants(CodeTransformer): """ A code transformer that inlines names as constants. - Positional arguments are interpreted as names of builtins (e.g. ``len``, ``print``) to freeze as constants in the decorated function's namespace. - Keyword arguments provide additional custom names to freeze as constants. - If invoked with no positional or keyword arguments, ``asconstants`` inlines all names in ``builtins``. Parameters ---------- \*builtin_names Names of builtins to freeze as constants. \*\*kwargs Additional key-value pairs to bind as constants. Examples -------- Freezing Builtins: >>> from codetransformer.transformers import asconstants >>> >>> @asconstants('len') ... def with_asconstants(x): ... return len(x) * 2 ... >>> def without_asconstants(x): ... return len(x) * 2 ... >>> len = lambda x: 0 >>> with_asconstants([1, 2, 3]) 6 >>> without_asconstants([1, 2, 3]) 0 Adding Custom Constants: >>> @asconstants(a=1) ... def f(): ... return a ... >>> f() 1 >>> a = 5 >>> f() 1 """ def __init__(self, *builtin_names, **kwargs): super().__init__() bltins = vars(builtins) if not (builtin_names or kwargs): self._constnames = bltins.copy() else: self._constnames = constnames = {} for arg in builtin_names: constnames[arg] = bltins[arg] overlap = constnames.keys() & kwargs.keys() if overlap: raise TypeError('Duplicate keys: {!r}'.format(overlap)) constnames.update(kwargs) def transform(self, code, **kwargs): overlap = self._constnames.keys() & set(code.argnames) if overlap: raise SyntaxError( 'argument names overlap with constant names: %r' % overlap, ) return super().transform(code, **kwargs) @pattern(LOAD_NAME | LOAD_GLOBAL | LOAD_DEREF | LOAD_CLASSDEREF) def _load_name(self, instr): name = instr.arg if name not in self._constnames: yield instr return yield LOAD_CONST(self._constnames[name]).steal(instr) _store = pattern( STORE_NAME | STORE_GLOBAL | STORE_DEREF | STORE_FAST, )(_assign_or_del('assign to')) _delete = pattern( DELETE_NAME | DELETE_GLOBAL | DELETE_DEREF | DELETE_FAST, )(_assign_or_del('delete')) ================================================ FILE: codetransformer/transformers/interpolated_strings.py ================================================ """ A transformer implementing ruby-style interpolated strings. """ import sys from codetransformer import pattern, CodeTransformer from codetransformer.instructions import ( BUILD_TUPLE, LOAD_CONST, LOAD_ATTR, CALL_FUNCTION, CALL_FUNCTION_KW, ROT_TWO, ) from codetransformer.utils.functional import flatten, is_a class interpolated_strings(CodeTransformer): """ A transformer that interpolates local variables into string literals. Parameters ---------- transform_bytes : bool, optional Whether to transform bytes literals to interpolated unicode strings. Default is True. transform_str : bool, optional Whether to interpolate values into unicode strings. Default is False. Example ------- >>> @interpolated_strings() # doctest: +SKIP ... def foo(a, b): ... c = a + b ... return b"{a} + {b} = {c}" ... >>> foo(1, 2) # doctest: +SKIP '1 + 2 = 3' """ if sys.version_info >= (3, 6): def __init__(self, *, transform_bytes=True, transform_str=False): raise NotImplementedError( '%s is not supported on 3.6 or newer, just use f-strings' % type(self).__name__, ) else: def __init__(self, *, transform_bytes=True, transform_str=False): super().__init__() self._transform_bytes = transform_bytes self._transform_str = transform_str @property def types(self): """ Tuple containing types transformed by this transformer. """ out = [] if self._transform_bytes: out.append(bytes) if self._transform_str: out.append(str) return tuple(out) @pattern(LOAD_CONST) def _load_const(self, instr): const = instr.arg if isinstance(const, (tuple, frozenset)): yield from self._transform_constant_sequence(const) return if isinstance(const, bytes) and self._transform_bytes: yield from self.transform_stringlike(const) elif isinstance(const, str) and self._transform_str: yield from self.transform_stringlike(const) else: yield instr def _transform_constant_sequence(self, seq): """ Transform a frozenset or tuple. """ should_transform = is_a(self.types) if not any(filter(should_transform, flatten(seq))): # Tuple doesn't contain any transformable strings. Ignore. yield LOAD_CONST(seq) return for const in seq: if should_transform(const): yield from self.transform_stringlike(const) elif isinstance(const, (tuple, frozenset)): yield from self._transform_constant_sequence(const) else: yield LOAD_CONST(const) if isinstance(seq, tuple): yield BUILD_TUPLE(len(seq)) else: assert isinstance(seq, frozenset) yield BUILD_TUPLE(len(seq)) yield LOAD_CONST(frozenset) yield ROT_TWO() yield CALL_FUNCTION(1) def transform_stringlike(self, const): """ Yield instructions to process a str or bytes constant. """ yield LOAD_CONST(const) if isinstance(const, bytes): yield from self.bytes_instrs elif isinstance(const, str): yield from self.str_instrs @property def bytes_instrs(self): """ Yield instructions to call TOS.decode('utf-8').format(**locals()). """ yield LOAD_ATTR('decode') yield LOAD_CONST('utf-8') yield CALL_FUNCTION(1) yield from self.str_instrs @property def str_instrs(self): """ Yield instructions to call TOS.format(**locals()). """ yield LOAD_ATTR('format') yield LOAD_CONST(locals) yield CALL_FUNCTION(0) yield CALL_FUNCTION_KW() ================================================ FILE: codetransformer/transformers/literals.py ================================================ from collections import OrderedDict from decimal import Decimal from itertools import islice import sys from textwrap import dedent from .. import instructions from ..core import CodeTransformer from ..patterns import pattern, matchany, var from ..utils.instance import instance IN_COMPREHENSION = 'in_comprehension' class overloaded_dicts(CodeTransformer): """Transformer that allows us to overload dictionary literals. This acts by creating an empty map and then inserting every key value pair in order. The code that is generated will turn something like:: {k_0: v_0, k_1: v_1, ..., k_n: v_n} into:: _tmp = astype() _tmp[k_0] = v_0 _tmp[k_1] = v_1 ... _tmp[k_n] = v_n _tmp # leaves the map on the stack. Parameters ---------- astype : callable The constructor for the type to create. Examples -------- >>> from collections import OrderedDict >>> ordereddict_literals = overloaded_dicts(OrderedDict) >>> @ordereddict_literals ... def f(): ... return {'a': 1, 'b': 2, 'c': 3} ... >>> f() OrderedDict([('a', 1), ('b', 2), ('c', 3)]) """ def __init__(self, astype): super().__init__() self.astype = astype @pattern(instructions.BUILD_MAP, matchany[var], instructions.MAP_ADD) def _start_comprehension(self, instr, *instrs): yield instructions.LOAD_CONST(self.astype).steal(instr) # TOS = self.astype yield instructions.CALL_FUNCTION(0) # TOS = m = self.astype() yield instructions.STORE_FAST('__map__') *body, map_add = instrs yield from self.patterndispatcher(body) # TOS = k # TOS1 = v yield instructions.LOAD_FAST('__map__').steal(map_add) # TOS = __map__ # TOS1 = k # TOS2 = v yield instructions.ROT_TWO() # TOS = k # TOS1 = __map__ # TOS2 = v yield instructions.STORE_SUBSCR() self.begin(IN_COMPREHENSION) @pattern(instructions.RETURN_VALUE, startcodes=(IN_COMPREHENSION,)) def _return_value(self, instr): yield instructions.LOAD_FAST('__map__').steal(instr) # TOS = __map__ yield instr if sys.version_info[:2] <= (3, 4): # Python 3.4 @pattern(instructions.BUILD_MAP) def _build_map(self, instr): yield instructions.LOAD_CONST(self.astype).steal(instr) # TOS = self.astype yield instructions.CALL_FUNCTION(0) # TOS = m = self.astype() yield from (instructions.DUP_TOP(),) * instr.arg # TOS = m # ... # TOS[instr.arg] = m @pattern(instructions.STORE_MAP) def _store_map(self, instr): # TOS = k # TOS1 = v # TOS2 = m # TOS3 = m yield instructions.ROT_THREE().steal(instr) # TOS = v # TOS1 = m # TOS2 = k # TOS3 = m yield instructions.ROT_THREE() # TOS = m # TOS1 = k # TOS2 = v # TOS3 = m yield instructions.ROT_TWO() # TOS = k # TOS1 = m # TOS2 = v # TOS3 = m yield instructions.STORE_SUBSCR() # TOS = m else: # Python 3.5 and beyond! def _construct_map(self, key_value_pairs): mapping = self.astype() for key, value in zip(key_value_pairs[::2], key_value_pairs[1::2]): mapping[key] = value return mapping @pattern(instructions.BUILD_MAP) def _build_map(self, instr): # TOS = vn # TOS1 = kn # ... # TOSN = v0 # TOSN + 1 = k0 # Construct a tuple of (k0, v0, k1, v1, ..., kn, vn) for # each of the key: value pairs in the dictionary. yield instructions.BUILD_TUPLE(instr.arg * 2).steal(instr) # TOS = (k0, v0, k1, v1, ..., kn, vn) yield instructions.LOAD_CONST(self._construct_map) # TOS = self._construct_map # TOS1 = (k0, v0, k1, v1, ..., kn, vn) yield instructions.ROT_TWO() # TOS = (k0, v0, k1, v1, ..., kn, vn) # TOS1 = self._construct_map yield instructions.CALL_FUNCTION(1) if sys.version_info >= (3, 6): def _construct_const_map(self, values, keys): mapping = self.astype() for key, value in zip(keys, values): mapping[key] = value return mapping @pattern(instructions.LOAD_CONST, instructions.BUILD_CONST_KEY_MAP) def _build_const_map(self, keys, instr): yield instructions.BUILD_TUPLE(len(keys.arg)).steal(keys) # TOS = (v0, v1, ..., vn) yield keys # TOS = (k0, k1, ..., kn) # TOS1 = (v0, v1, ..., vn) yield instructions.LOAD_CONST(self._construct_const_map) # TOS = self._construct_const_map # TOS1 = (k0, k1, ..., kn) # TOS2 = (v0, v1, ..., vn) yield instructions.ROT_THREE() # TOS = (k0, k1, ..., kn) # TOS1 = (v0, v1, ..., vn) # TOS2 = self._construct_const_map yield instructions.CALL_FUNCTION(2) ordereddict_literals = overloaded_dicts(OrderedDict) def _format_constant_docstring(type_): return dedent( """ Transformer that applies a callable to each {type_} constant in the transformed code object. Parameters ---------- xform : callable A callable to be applied to {type_} literals. See Also -------- codetransformer.transformers.literals.overloaded_strs """ ).format(type_=type_.__name__) class _ConstantTransformerBase(CodeTransformer): def __init__(self, xform): super().__init__() self.xform = xform def transform_consts(self, consts): # This is all one expression. return super().transform_consts( tuple( frozenset(self.transform_consts(tuple(const))) if isinstance(const, frozenset) else self.transform_consts(const) if isinstance(const, tuple) else self.xform(const) if isinstance(const, self._type) else const for const in consts ) ) def overloaded_constants(type_, __doc__=None): """A factory for transformers that apply functions to literals. Parameters ---------- type_ : type The type to overload. __doc__ : str, optional Docstring for the generated transformer. Returns ------- transformer : subclass of CodeTransformer A new code transformer class that will overload the provided literal types. """ typename = type_.__name__ if typename.endswith('x'): typename += 'es' elif not typename.endswith('s'): typename += 's' if __doc__ is None: __doc__ = _format_constant_docstring(type_) return type( "overloaded_" + typename, (_ConstantTransformerBase,), { '_type': type_, '__doc__': __doc__, }, ) overloaded_strs = overloaded_constants( str, __doc__=dedent( """ A transformer that overloads string literals. Rewrites all constants of the form:: "some string" as:: xform("some string") Parameters ---------- xform : callable Function to call on all string literals in the transformer target. Examples -------- >>> @overloaded_strs(lambda x: "ayy lmao ") ... def prepend_foo(s): ... return "foo" + s ... >>> prepend_foo("bar") 'ayy lmao bar' """ ) ) overloaded_bytes = overloaded_constants(bytes) overloaded_floats = overloaded_constants(float) overloaded_ints = overloaded_constants(int) overloaded_complexes = overloaded_constants(complex) haskell_strs = overloaded_strs(tuple) bytearray_literals = overloaded_bytes(bytearray) decimal_literals = overloaded_floats(Decimal) def _start_comprehension(self, *instrs): self.begin(IN_COMPREHENSION) yield from self.patterndispatcher(instrs) def _return_value(self, instr): # TOS = collection yield instructions.LOAD_CONST(self.xform).steal(instr) # TOS = self.xform # TOS1 = collection yield instructions.ROT_TWO() # TOS = collection # TOS1 = self.xform yield instructions.CALL_FUNCTION(1) # TOS = self.xform(collection) yield instr # Added as a method for overloaded_build def _build(self, instr): yield instr # TOS = new_list yield instructions.LOAD_CONST(self.xform) # TOS = astype # TOS1 = new_list yield instructions.ROT_TWO() # TOS = new_list # TOS1 = astype yield instructions.CALL_FUNCTION(1) # TOS = astype(new_list) def overloaded_build(type_, add_name=None): """Factory for constant transformers that apply to a given build instruction. Parameters ---------- type_ : type The object type to overload the construction of. This must be one of "buildable" types, or types with a "BUILD_*" instruction. add_name : str, optional The suffix of the instruction tha adds elements to the collection. For example: 'add' or 'append' Returns ------- transformer : subclass of CodeTransformer A new code transformer class that will overload the provided literal types. """ typename = type_.__name__ instrname = 'BUILD_' + typename.upper() dict_ = OrderedDict( __doc__=dedent( """ A CodeTransformer for overloading {name} instructions. """.format(name=instrname) ) ) try: build_instr = getattr(instructions, instrname) except AttributeError: raise TypeError("type %s is not buildable" % typename) if add_name is not None: try: add_instr = getattr( instructions, '_'.join((typename, add_name)).upper(), ) except AttributeError: TypeError("type %s is not addable" % typename) dict_['_start_comprehension'] = pattern( build_instr, matchany[var], add_instr, )(_start_comprehension) dict_['_return_value'] = pattern( instructions.RETURN_VALUE, startcodes=(IN_COMPREHENSION,), )(_return_value) else: add_instr = None dict_['_build'] = pattern(build_instr)(_build) if not typename.endswith('s'): typename = typename + 's' return type( 'overloaded_' + typename, (overloaded_constants(type_),), dict_, ) overloaded_slices = overloaded_build(slice) overloaded_lists = overloaded_build(list, 'append') overloaded_sets = overloaded_build(set, 'add') # Add a special method for set overloader. def transform_consts(self, consts): consts = super(overloaded_sets, self).transform_consts(consts) return tuple( # Always pass a thawed set so mutations can happen inplace. self.xform(set(const)) if isinstance(const, frozenset) else const for const in consts ) overloaded_sets.transform_consts = transform_consts del transform_consts frozenset_literals = overloaded_sets(frozenset) overloaded_tuples = overloaded_build(tuple) # Add a special method for the tuple overloader. def transform_consts(self, consts): consts = super(overloaded_tuples, self).transform_consts(consts) return tuple( self.xform(const) if isinstance(const, tuple) else const for const in consts ) overloaded_tuples.transform_consts = transform_consts del transform_consts @instance class islice_literals(CodeTransformer): """Transformer that turns slice indexing into an islice object. Examples -------- >>> from codetransformer.transformers.literals import islice_literals >>> @islice_literals ... def f(): ... return map(str, (1, 2, 3, 4))[:2] ... >>> f() >>> tuple(f()) ('1', '2') """ @pattern(instructions.BINARY_SUBSCR) def _binary_subscr(self, instr): yield instructions.LOAD_CONST(self._islicer).steal(instr) # TOS = self._islicer # TOS1 = k # TOS2 = m yield instructions.ROT_THREE() # TOS = k # TOS1 = m # TOS2 = self._islicer yield instructions.CALL_FUNCTION(2) # TOS = self._islicer(m, k) @staticmethod def _islicer(m, k): if isinstance(k, slice): return islice(m, k.start, k.stop, k.step) return m[k] ================================================ FILE: codetransformer/transformers/pattern_matched_exceptions.py ================================================ import sys from ..core import CodeTransformer from ..instructions import ( BUILD_TUPLE, CALL_FUNCTION, COMPARE_OP, LOAD_CONST, POP_TOP, ROT_TWO, ) from ..patterns import pattern def match(match_expr, exc_type, exc_value, exc_traceback): """ Called to determine whether or not an except block should be matched. True -> enter except block False -> don't enter except block """ # Emulate standard behavior when match_expr is an exception subclass. if isinstance(match_expr, type) and issubclass(match_expr, BaseException): return issubclass(exc_type, match_expr) # Match on type and args when match_expr is an exception instance. return ( issubclass(exc_type, type(match_expr)) and match_expr.args == exc_value.args ) class pattern_matched_exceptions(CodeTransformer): """ Allows usage of arbitrary expressions and matching functions in `except` blocks. When an exception is raised in an except block in a function decorated with `pattern_matched_exceptions`, a matching function will be called with the block's expression and the three values returned by sys.exc_info(). If the matching function returns `True`, we enter the corresponding except-block, otherwise we continue to the next block, or re-raise if there are no more blocks to check Parameters ---------- matcher : function, optional A function accepting an expression and the values of sys.exc_info, returning True if the exception info "matches" the expression. The default behavior is to emulate standard python when the match expression is a *subtype* of Exception, and to compare exc.type and exc.args when the match expression is an *instance* of Exception. Example ------- >>> @pattern_matched_exceptions() ... def foo(): ... try: ... raise ValueError('bar') ... except ValueError('buzz'): ... return 'buzz' ... except ValueError('bar'): ... return 'bar' >>> foo() 'bar' """ def __init__(self, matcher=match): super().__init__() self._matcher = matcher if sys.version_info < (3, 6): from ..instructions import CALL_FUNCTION_VAR def _match(self, instr, CALL_FUNCTION_VAR=CALL_FUNCTION_VAR): yield ROT_TWO().steal(instr) yield POP_TOP() yield LOAD_CONST(self._matcher) yield ROT_TWO() yield LOAD_CONST(sys.exc_info) yield CALL_FUNCTION(0) yield CALL_FUNCTION_VAR(1) del CALL_FUNCTION_VAR else: from ..instructions import ( CALL_FUNCTION_EX, BUILD_TUPLE_UNPACK_WITH_CALL, ) def _match(self, instr, CALL_FUNCTION_EX=CALL_FUNCTION_EX, BUILD_TUPLE_UNPACK_WITH_CALL=BUILD_TUPLE_UNPACK_WITH_CALL): yield ROT_TWO().steal(instr) yield POP_TOP() yield LOAD_CONST(self._matcher) yield ROT_TWO() yield BUILD_TUPLE(1) yield LOAD_CONST(sys.exc_info) yield CALL_FUNCTION(0) yield BUILD_TUPLE_UNPACK_WITH_CALL(2) yield CALL_FUNCTION_EX(0) del CALL_FUNCTION_EX del BUILD_TUPLE_UNPACK_WITH_CALL @pattern(COMPARE_OP) def _compare_op(self, instr): if instr.equiv(COMPARE_OP.EXCEPTION_MATCH): yield from self._match(instr) else: yield instr ================================================ FILE: codetransformer/transformers/precomputed_slices.py ================================================ from codetransformer.core import CodeTransformer from codetransformer.instructions import LOAD_CONST, BUILD_SLICE from codetransformer.patterns import pattern, plus class precomputed_slices(CodeTransformer): """ An optimizing transformer that precomputes and inlines slice literals. Example ------- >>> from dis import dis >>> def first_five(l): ... return l[:5] ... >>> dis(first_five) # doctest: +SKIP 2 0 LOAD_FAST 0 (l) 3 LOAD_CONST 0 (None) 6 LOAD_CONST 1 (5) 9 BUILD_SLICE 2 12 BINARY_SUBSCR 13 RETURN_VALUE >>> dis(precomputed_slices()(first_five)) # doctest: +SKIP 2 0 LOAD_FAST 0 (l) 3 LOAD_CONST 0 (slice(None, 5, None)) 6 BINARY_SUBSCR 7 RETURN_VALUE """ @pattern(LOAD_CONST[plus], BUILD_SLICE) def make_constant_slice(self, *instrs): *loads, build = instrs if build.arg != len(loads): # There are non-constant loads before the consts: # e.g. x[:1:2] yield from instrs slice_ = slice(*(instr.arg for instr in loads)) yield LOAD_CONST(slice_).steal(loads[0]) ================================================ FILE: codetransformer/transformers/tests/__init__.py ================================================ ================================================ FILE: codetransformer/transformers/tests/test_add2mul.py ================================================ from ..add2mul import add2mul def test_add2mul(): @add2mul() def foo(a, b): return (a + b + 2) - 1 assert foo(1, 2) == 3 assert foo(2, 2) == 7 ================================================ FILE: codetransformer/transformers/tests/test_constants.py ================================================ import os from sys import _getframe from types import CodeType import pytest from codetransformer.code import Code from ..constants import asconstants basename = os.path.basename(__file__) def test_global(): @asconstants(a=1) def f(): return a # noqa assert f() == 1 def test_name(): for const in compile( 'class C:\n b = a', '', 'exec').co_consts: if isinstance(const, CodeType): pre_transform = Code.from_pycode(const) code = asconstants(a=1).transform(pre_transform) break else: raise AssertionError('There should be a code object in there!') ns = {} exec(code.to_pycode(), ns) assert ns['b'] == 1 def test_closure(): def f(): a = 2 @asconstants(a=1) def g(): return a return g assert f()() == 1 def test_store(): with pytest.raises(SyntaxError) as e: @asconstants(a=1) def f(): a = 1 # noqa line = _getframe().f_lineno - 2 assert ( str(e.value) == "can't assign to constant name 'a' (%s, line %d)" % (basename, line) ) def test_delete(): with pytest.raises(SyntaxError) as e: @asconstants(a=1) def f(): del a # noqa line = _getframe().f_lineno - 2 assert ( str(e.value) == "can't delete constant name 'a' (%s, line %d)" % (basename, line) ) def test_argname_overlap(): with pytest.raises(SyntaxError) as e: @asconstants(a=1) def f(a): pass assert str(e.value) == "argument names overlap with constant names: {'a'}" ================================================ FILE: codetransformer/transformers/tests/test_exc_patterns.py ================================================ from pytest import raises from ..pattern_matched_exceptions import pattern_matched_exceptions def test_patterns(): @pattern_matched_exceptions() def foo(): try: raise ValueError("bar") except TypeError: raise except ValueError("foo"): raise except ValueError("bar"): return "bar" except ValueError("buzz"): raise assert foo() == "bar" def test_patterns_bind_name(): @pattern_matched_exceptions() def foo(): try: raise ValueError("bar") except ValueError("foo") as e: return e.args[0] except ValueError("bar") as e: return e.args[0] except ValueError("buzz") as e: return e.args[0] assert foo() == "bar" def test_patterns_reraise(): @pattern_matched_exceptions() def foo(): try: raise ValueError("bar") except ValueError("bar"): raise with raises(ValueError) as err: foo() assert err.type == ValueError assert err.value.args == ('bar',) def test_normal_exc_match(): @pattern_matched_exceptions() def foo(): try: raise ValueError("bar") except ValueError: return "matched" except ValueError("bar"): raise assert foo() == "matched" def test_exc_match_custom_func(): def match_greater(expr, exc_type, exc_value, exc_traceback): return expr > exc_value.args[0] @pattern_matched_exceptions(match_greater) def foo(): try: raise ValueError(5) except 4: return 4 except 5: return 5 except 6: return 6 assert foo() == 6 ================================================ FILE: codetransformer/transformers/tests/test_interpolated_strings.py ================================================ import sys import pytest from ..interpolated_strings import interpolated_strings pytestmark = pytest.mark.skipif( sys.version_info >= (3, 6), reason='interpolated_strings is deprecated, just use f-strings', ) def test_interpolated_bytes(): @interpolated_strings(transform_bytes=True) def enabled(a, b, c): return b"{a} {b!r} {c}" assert enabled(1, 2, 3) == "{a} {b!r} {c}".format(a=1, b=2, c=3) @interpolated_strings() def default(a, b, c): return b"{a} {b!r} {c}" assert default(1, 2, 3) == "{a} {b!r} {c}".format(a=1, b=2, c=3) @interpolated_strings(transform_bytes=False) def disabled(a, b, c): return b"{a} {b!r} {c}" assert disabled(1, 2, 3) == b"{a} {b!r} {c}" def test_interpolated_str(): @interpolated_strings(transform_str=True) def enabled(a, b, c): return "{a} {b!r} {c}" assert enabled(1, 2, 3) == "{a} {b!r} {c}".format(a=1, b=2, c=3) @interpolated_strings() def default(a, b, c): return "{a} {b!r} {c}" assert default(1, 2, 3) == "{a} {b!r} {c}" @interpolated_strings(transform_bytes=False) def disabled(a, b, c): return "{a} {b!r} {c}" assert disabled(1, 2, 3) == "{a} {b!r} {c}" def test_no_cross_pollination(): @interpolated_strings(transform_bytes=True) def ignore_str(a): u = "{a}" b = b"{a}" return u, b assert ignore_str(1) == ("{a}", "1") @interpolated_strings(transform_bytes=False, transform_str=True) def ignore_bytes(a): u = "{a}" b = b"{a}" return u, b assert ignore_bytes(1) == ("1", b"{a}") def test_string_in_nested_const(): @interpolated_strings(transform_str=True) def foo(a, b): return ("{a}", (("{b}",), "{a} {b}"), (1, 2)) assert foo(1, 2) == ("1", (("2",), "1 2"), (1, 2)) @interpolated_strings(transform_str=True) def bar(a): return "1" in {"{a}"} assert bar(1) assert not bar(2) ================================================ FILE: codetransformer/transformers/tests/test_literals.py ================================================ """ Tests for literal transformers """ from collections import OrderedDict from decimal import Decimal from itertools import islice from ..literals import ( islice_literals, overloaded_dicts, overloaded_bytes, overloaded_floats, overloaded_lists, overloaded_sets, overloaded_slices, overloaded_strs, overloaded_tuples, ) def test_overload_thing_with_thing_is_noop(): test_vals = [('a', 1), ('b', 2), ('c', 3)] for t in dict, set, list, tuple: expected = t(test_vals) f = eval("lambda: %s" % (expected,)) overloaded = eval(t.__name__.join(['overloaded_', 's']))(t)(f) assert f() == overloaded() == expected def test_overloaded_dicts(): @overloaded_dicts(OrderedDict) def literal(): return {'a': 1, 'b': 2, 'c': 3} assert literal() == OrderedDict((('a', 1), ('b', 2), ('c', 3))) @overloaded_dicts(OrderedDict) def comprehension(): return {k: n for n, k in enumerate('abc', 1)} assert comprehension() == OrderedDict((('a', 1), ('b', 2), ('c', 3))) def test_overloaded_bytes(): @overloaded_bytes(list) def bytes_to_list(): return ["unicode", b"bytes", 1, 2, 3] assert bytes_to_list() == ["unicode", list(b"bytes"), 1, 2, 3] @overloaded_bytes(list) def bytes_to_list_tuple(): return "unicode", b"bytes", 1, 2, 3 assert bytes_to_list_tuple() == ("unicode", list(b"bytes"), 1, 2, 3) @overloaded_bytes(int) def bytes_in_set(x): return x in {b'3'} assert not bytes_in_set(b'3') assert bytes_in_set(3) @overloaded_bytes(bytearray) def mutable_bytes(): return b'123' assert isinstance(mutable_bytes(), bytearray) def test_overloaded_floats(): @overloaded_floats(Decimal) def float_to_decimal(): return [2, 2.0, 3.5] assert float_to_decimal() == [2, Decimal(2.0), Decimal(3.5)] @overloaded_floats(Decimal) def float_to_decimal_tuple(): return (2, 2.0, 3.5) assert float_to_decimal_tuple() == (2, Decimal(2.0), Decimal(3.5)) @overloaded_floats(Decimal) def float_in_set(x): return x in {3.0} xformed_const = float_in_set.__code__.co_consts[0] assert isinstance(xformed_const, frozenset) assert len(xformed_const) == 1 assert isinstance(tuple(xformed_const)[0], Decimal) assert tuple(xformed_const)[0] == Decimal(3.0) def test_overloaded_lists(): @overloaded_lists(tuple) def frozen_list(): return [1, 2, 3] assert frozen_list() == (1, 2, 3) @overloaded_lists(tuple) def frozen_in_tuple(): return [1, 2, 3], [4, 5, 6] assert frozen_in_tuple() == ((1, 2, 3), (4, 5, 6)) @overloaded_lists(tuple) def frozen_in_set(): # lists are not hashable but tuple are. return [1, 2, 3] in {[1, 2, 3]} assert frozen_in_set() @overloaded_lists(tuple) def frozen_comprehension(): return [a for a in (1, 2, 3)] assert frozen_comprehension() == (1, 2, 3) def test_overloaded_strs(): @overloaded_strs(tuple) def haskell_strs(): return 'abc' assert haskell_strs() == ('a', 'b', 'c') @overloaded_strs(tuple) def cs_in_tuple(): return 'abc', 'def' assert cs_in_tuple() == (('a', 'b', 'c'), ('d', 'e', 'f')) def test_overloaded_sets(): @overloaded_sets(frozenset) def f(): return {'a', 'b', 'c'} assert isinstance(f(), frozenset) assert f() == frozenset({'a', 'b', 'c'}) class invertedset(set): def __contains__(self, e): return not super().__contains__(e) @overloaded_sets(invertedset) def containment_with_consts(): # This will create a frozenset FIRST and then we should pull it # into an invertedset return 'd' in {'e'} assert containment_with_consts() def frozen_comprehension(): return {a for a in 'abc'} assert frozen_comprehension() == frozenset('abc') def test_overloaded_tuples(): @overloaded_tuples(list) def nonconst(): a = 1 b = 2 c = 3 return (a, b, c) assert nonconst() == [1, 2, 3] @overloaded_tuples(list) def const(): return (1, 2, 3) assert const() == [1, 2, 3] def test_overloaded_slices(): def concrete_slice(slice_): return tuple(range(slice_.start, slice_.stop))[::slice_.step] class C: _idx = None def __getitem__(self, idx): self._idx = idx return idx c = C() @overloaded_slices(concrete_slice) def f(): return c[1:10:2] f() assert c._idx == (1, 3, 5, 7, 9) def test_islice_literals(): @islice_literals def islice_test(): return map(str, (1, 2, 3, 4))[:2] assert isinstance(islice_test(), islice) assert tuple(islice_test()) == ('1', '2') ================================================ FILE: codetransformer/transformers/tests/test_precomputed_slices.py ================================================ from codetransformer.code import Code from codetransformer.instructions import BUILD_SLICE, LOAD_CONST from ..precomputed_slices import precomputed_slices def test_precomputed_slices(): @precomputed_slices() def foo(a): return a[1:5] l = list(range(10)) assert foo(l) == l[1:5] assert slice(1, 5) in foo.__code__.co_consts instrs = Code.from_pyfunc(foo).instrs assert LOAD_CONST(slice(1, 5)).equiv(instrs[1]) assert BUILD_SLICE not in set(map(type, instrs)) def test_precomputed_slices_non_const(): transformer = precomputed_slices() def f(a, b): with_non_const = a[b] with_mixed = a[1, b] return with_non_const, with_mixed transformed = transformer(f) f_instrs = Code.from_pyfunc(f).instrs transformed_instrs = Code.from_pyfunc(transformed).instrs for orig, xformed in zip(f_instrs, transformed_instrs): assert orig.equiv(xformed) ================================================ FILE: codetransformer/utils/__init__.py ================================================ ================================================ FILE: codetransformer/utils/functional.py ================================================ """ codetransformer.utils.functional -------------------------------- Utilities for functional programming. """ from toolz import complement, flip def is_a(type_): """More curryable version of isinstance.""" return flip(isinstance, type_) def not_a(type_): """More curryable version of not isinstance.""" return complement(is_a(type_)) def scanl(f, n, ns): """Reduce ns by f starting with n yielding each intermediate value. tuple(scanl(f, n, ns))[-1] == reduce(f, ns, n) Parameters ---------- f : callable A binary function. n : any The starting value. ns : iterable of any The iterable to scan over. Yields ------ p : any The value of reduce(f, ns[:idx]) where idx is the current index. Examples -------- >>> import operator as op >>> tuple(scanl(op.add, 0, (1, 2, 3, 4))) (0, 1, 3, 6, 10) """ yield n for m in ns: n = f(n, m) yield n def reverse_dict(d): """Reverse a dictionary, replacing the keys and values. Parameters ---------- d : dict The dict to reverse. Returns ------- rd : dict The dict with the keys and values flipped. Examples -------- >>> d = {'a': 1, 'b': 2, 'c': 3} >>> e = reverse_dict(d) >>> e == {1: 'a', 2: 'b', 3: 'c'} True """ return {v: k for k, v in d.items()} def ffill(iterable): """Forward fill non None values in some iterable. Parameters ---------- iterable : iterable The iterable to forward fill. Yields ------ e : any The last non None value or None if there has not been a non None value. """ it = iter(iterable) previous = next(it) yield previous for e in it: if e is None: yield previous else: previous = e yield e def flatten(seq, *, recurse_types=(tuple, list, set, frozenset)): """ Convert a (possibly nested) iterator into a flattened iterator. Parameters ---------- seq : iterable The sequence to flatten. recurse_types, optional Types to recursively flatten. Defaults to (tuple, list, set, frozenset). >>> list(flatten((1, (2, 3), ((4,), 5)))) [1, 2, 3, 4, 5] >>> list(flatten(["abc", "def"], recurse_types=(str,))) ['a', 'b', 'c', 'd', 'e', 'f'] """ for elem in seq: if isinstance(elem, recurse_types): yield from flatten(elem) else: yield elem ================================================ FILE: codetransformer/utils/immutable.py ================================================ """ codetransformer.utils.immutable ------------------------------- Utilities for creating and working with immutable objects. """ from collections import ChainMap from inspect import getfullargspec from itertools import starmap, repeat from textwrap import dedent from weakref import WeakKeyDictionary class immutableattr: """An immutable attribute of a class. Parameters ---------- attr : any The attribute. """ def __init__(self, attr): self._attr = attr def __get__(self, instance, owner): return self._attr class lazyval: """A memoizing property. Parameters ---------- func : callable The function used to compute the value of the descriptor. """ def __init__(self, func): self._cache = WeakKeyDictionary() self._func = func def __get__(self, instance, owner): if instance is None: return self cache = self._cache try: return cache[instance] except KeyError: cache[instance] = val = self._func(instance) return val def _no_arg_init(self): pass object_setattr = object.__setattr__ def initialize_slot(obj, name, value): """Initalize an unitialized slot to a value. If there is already a value for this slot, this is a nop. Parameters ---------- obj : immutable An immutable object. name : str The name of the slot to initialize. value : any The value to initialize the slot to. """ if not hasattr(obj, name): object_setattr(obj, name, value) def _create_init(name, slots, defaults): """Create the __init__ function for an immutable object. Parameters ---------- name : str The name of the immutable class. slots : iterable of str The __slots__ field from the class. defaults : dict or None The default values for the arguments to __init__. Returns ------- init : callable The __init__ function for the new immutable class. """ if any(s.startswith('__') for s in slots): raise TypeError( "immutable classes may not have slots that start with '__'", ) # If we have no defaults, ignore all of this. kwdefaults = None if defaults is not None: hit_default = False _defaults = [] # positional defaults kwdefaults = {} # kwonly defaults kwdefs = False for s in slots: if s not in defaults and hit_default: raise SyntaxError( 'non-default argument follows default argument' ) if not kwdefs: try: # Try to grab the next default. # Pop so that we know they were all consumed when we # are done. _defaults.append(defaults.pop(s)) except KeyError: # Not in the dict, we haven't hit any defaults yet. pass else: # We are now consuming default arguments. hit_default = True if s.startswith('*'): if s in defaults: raise TypeError( 'cannot set default for var args or var kwargs', ) if not s.startswith('**'): kwdefs = True else: kwdefaults[s] = defaults.pop(s) if defaults: # We didn't consume all of the defaults. raise TypeError( 'default value for non-existent argument%s: %s' % ( 's' if len(defaults) > 1 else '', ', '.join(starmap('{0}={1!r}'.format, defaults.items())), ) ) # cast back to tuples defaults = tuple(_defaults) if not slots: return _no_arg_init, () ns = {'__initialize_slot': initialize_slot} # filter out lone star slotnames = tuple(filter(None, (s.strip('*') for s in slots))) # We are using exec here so that we can later inspect the call signature # of the __init__. This makes the positional vs keywords work as intended. # This is totally reasonable, no h8 m8! exec( 'def __init__(_{name}__self, {args}): \n {assign}'.format( name=name, args=', '.join(slots), assign='\n '.join( map( '__initialize_slot(_{1}__self, "{0}", {0})'.format, slotnames, repeat(name), ), ), ), ns, ) init = ns['__init__'] init.__defaults__ = defaults init.__kwdefaults__ = kwdefaults return init, slotnames def _wrapinit(init): """Wrap an existing initialize function by thawing self for the duration of the init. Parameters ---------- init : callable The user-provided init. Returns ------- wrapped : callable The wrapped init method. """ try: spec = getfullargspec(init) except TypeError: # we cannot preserve the type signature. def __init__(*args, **kwargs): self = args[0] __setattr__._initializing.add(self) init(*args, **kwargs) __setattr__._initializing.remove(self) _check_missing_slots(self) return __init__ args = spec.args varargs = spec.varargs if not (args or varargs): raise TypeError( "%r must accept at least one positional argument for 'self'" % getattr(init, '__qualname__', getattr(init, '__name__', init)), ) if not args: self = '%s[0]' % varargs forward = argspec = '*' + varargs else: self = args[0] forward = argspec = ', '.join(args) if args and varargs: forward = '%s, *%s' % (forward, spec.varargs) argspec = '%s, *%s' % (argspec, spec.varargs) if spec.kwonlyargs: forward = '%s, %s' % ( forward, ', '.join(map('{0}={0}'.format, spec.kwonlyargs)) ) argspec = '%s,%s%s' % ( argspec, '*, ' if not spec.varargs else '', ', '.join(spec.kwonlyargs), ) if spec.varkw: forward = '%s, **%s' % (forward, spec.varkw) argspec = '%s, **%s' % (argspec, spec.varkw) ns = { '__init': init, '__initializing': __setattr__._initializing, '__check_missing_slots': _check_missing_slots, } exec( dedent( """\ def __init__({argspec}): __initializing.add({self}) __init({forward}) __initializing.remove({self}) __check_missing_slots({self}) """.format( argspec=argspec, self=self, forward=forward, ), ), ns, ) __init__ = ns['__init__'] __init__.__defaults__ = spec.defaults __init__.__kwdefaults__ = spec.kwonlydefaults __init__.__annotations__ = spec.annotations return __init__ def _check_missing_slots(ob): """Check that all slots have been initialized when a custom __init__ method is provided. Parameters ---------- ob : immutable The instance that was just initialized. Raises ------ TypeError Raised when the instance has not set values that are named in the __slots__. """ missing_slots = tuple( filter(lambda s: not hasattr(ob, s), ob.__slots__), ) if missing_slots: raise TypeError( 'not all slots initialized in __init__, missing: {0}'.format( missing_slots, ), ) def __setattr__(self, name, value): if self not in __setattr__._initializing: raise AttributeError('cannot mutate immutable object') object_setattr(self, name, value) __setattr__._initializing = set() def __repr__(self): return '{cls}({args})'.format( cls=type(self).__name__, args=', '.join(starmap( '{0}={1!r}'.format, ((s, getattr(self, s)) for s in self.__slots__), )), ) class ImmutableMeta(type): """A metaclass for creating immutable objects. """ def __new__(mcls, name, bases, dict_, *, defaults=None): if '__slots__' not in dict_: raise TypeError('immutable classes must have a __slots__') if '__setattr__' in dict_: raise TypeError('immutable classes cannot have a __setattr__') try: dict_['__init__'] = _wrapinit(dict_['__init__']) except KeyError: dict_['__init__'], dict_['__slots__'] = _create_init( name, dict_['__slots__'], defaults, ) dict_['__setattr__'] = __setattr__ cls = super().__new__(mcls, name, bases, dict_) if cls.__repr__ is object.__repr__: # Put a namedtuple-like repr on this class if there is no custom # repr on the class. cls.__repr__ = __repr__ return cls def __init__(self, *args, defaults=None): # ignore the defaults kwarg. return super().__init__(*args) class immutable(metaclass=ImmutableMeta): """A base class for immutable objects. """ __slots__ = () def to_dict(self): return {s: getattr(self, s) for s in self.__slots__} def update(self, **updates): return type(self)(**ChainMap(updates, self.to_dict())) ================================================ FILE: codetransformer/utils/instance.py ================================================ def instance(cls): """Decorator for creating one of instances. Parameters ---------- cls : type A class. Returns ------- instance : cls A new instance of ``cls``. """ return cls() ================================================ FILE: codetransformer/utils/no_default.py ================================================ @object.__new__ class no_default: def __new__(cls): return no_default def __repr__(self): return 'no_default' __str__ = __repr__ def __reduce__(self): return 'no_default' def __deepcopy__(self): return self __copy__ = __deepcopy__ ================================================ FILE: codetransformer/utils/pretty.py ================================================ """ codetransformer.utils.pretty ---------------------------- Utilities for pretty-printing ASTs and code objects. """ from ast import iter_fields, AST, Name, Num, parse import dis from functools import partial, singledispatch from io import StringIO from itertools import chain from operator import attrgetter import sys from types import CodeType from codetransformer.code import Flag INCLUDE_ATTRIBUTES_DEFAULT = False INDENT_DEFAULT = ' ' __all__ = [ 'a', 'd', 'display', 'pformat_ast', 'pprint_ast', ] def pformat_ast(node, include_attributes=INCLUDE_ATTRIBUTES_DEFAULT, indent=INDENT_DEFAULT): """ Pretty-format an AST tree element Parameters ---------- node : ast.AST Top-level node to render. include_attributes : bool, optional Whether to include node attributes. Default False. indent : str, optional. Indentation string for nested expressions. Default is two spaces. """ def _fmt(node, prefix, level): def with_indent(*strs): return ''.join(((indent * level,) + strs)) with_prefix = partial(with_indent, prefix) if isinstance(node, Name): # Special Case: # Render Name nodes on a single line. yield with_prefix( type(node).__name__, '(id=', repr(node.id), ', ctx=', type(node.ctx).__name__, '()),', ) elif isinstance(node, Num): # Special Case: # Render Num nodes on a single line without names. yield with_prefix( type(node).__name__, '(%r),' % node.n, ) elif isinstance(node, AST): fields_attrs = list( chain( iter_fields(node), iter_attributes(node) if include_attributes else (), ) ) if not fields_attrs: # Special Case: # Render the whole expression on one line if there are no # attributes. yield with_prefix(type(node).__name__, '(),') return yield with_prefix(type(node).__name__, '(') for name, value in fields_attrs: yield from _fmt(value, name + '=', level + 1) # Put a trailing comma if we're not at the top level. yield with_indent(')', ',' if level > 0 else '') elif isinstance(node, list): if not node: # Special Case: # Render empty lists on one line. yield with_prefix('[],') return yield with_prefix('[') yield from chain.from_iterable( map(partial(_fmt, prefix='', level=level + 1), node) ) yield with_indent('],') else: yield with_prefix(repr(node), ',') return '\n'.join(_fmt(node, prefix='', level=0)) def _extend_name(prev, parent_co): return prev + ( '..' if parent_co.co_flags & Flag.CO_NEWLOCALS else '.' ) def pprint_ast(node, include_attributes=INCLUDE_ATTRIBUTES_DEFAULT, indent=INDENT_DEFAULT, file=None): """ Pretty-print an AST tree. Parameters ---------- node : ast.AST Top-level node to render. include_attributes : bool, optional Whether to include node attributes. Default False. indent : str, optional. Indentation string for nested expressions. Default is two spaces. file : None or file-like object, optional File to use to print output. If the default of `None` is passed, we use sys.stdout. """ if file is None: file = sys.stdout print( pformat_ast( node, include_attributes=include_attributes, indent=indent ), file=file, ) def walk_code(co, _prefix=''): """ Traverse a code object, finding all consts which are also code objects. Yields pairs of (name, code object). """ name = _prefix + co.co_name yield name, co yield from chain.from_iterable( walk_code(c, _prefix=_extend_name(name, co)) for c in co.co_consts if isinstance(c, CodeType) ) def iter_attributes(node): attrs = node._attributes if not attrs: return yield from zip(attrs, attrgetter(*attrs)(node)) def a(text, mode='exec', indent=' ', file=None): """ Interactive convenience for displaying the AST of a code string. Writes a pretty-formatted AST-tree to `file`. Parameters ---------- text : str Text of Python code to render as AST. mode : {'exec', 'eval'}, optional Mode for `ast.parse`. Default is 'exec'. indent : str, optional String to use for indenting nested expressions. Default is two spaces. file : None or file-like object, optional File to use to print output. If the default of `None` is passed, we use sys.stdout. """ pprint_ast(parse(text, mode=mode), indent=indent, file=file) def d(obj, mode='exec', file=None): """ Interactive convenience for displaying the disassembly of a function, module, or code string. Compiles `text` and recursively traverses the result looking for `code` objects to render with `dis.dis`. Parameters ---------- obj : str, CodeType, or object with __code__ attribute Object to disassemble. If `obj` is an instance of CodeType, we use it unchanged. If `obj` is a string, we compile it with `mode` and then disassemble. Otherwise, we look for a `__code__` attribute on `obj`. mode : {'exec', 'eval'}, optional Mode for `compile`. Default is 'exec'. file : None or file-like object, optional File to use to print output. If the default of `None` is passed, we use sys.stdout. """ if file is None: file = sys.stdout for name, co in walk_code(extract_code(obj, compile_mode=mode)): print(name, file=file) print('-' * len(name), file=file) dis.dis(co, file=file) print('', file=file) @singledispatch def extract_code(obj, compile_mode): """ Generic function for converting objects into instances of `CodeType`. """ try: code = obj.__code__ if isinstance(code, CodeType): return code raise ValueError( "{obj} has a `__code__` attribute, " "but it's an instance of {notcode!r}, not CodeType.".format( obj=obj, notcode=type(code).__name__, ) ) except AttributeError: raise ValueError("Don't know how to extract code from %s." % obj) @extract_code.register(CodeType) def _(obj, compile_mode): return obj @extract_code.register(str) # noqa def _(obj, compile_mode): return compile(obj, '', compile_mode) _DISPLAY_TEMPLATE = """\ ==== Text ==== {text} ==================== Abstract Syntax Tree ==================== {ast} =========== Disassembly =========== {code} """ def display(text, mode='exec', file=None): """ Show `text`, rendered as AST and as Bytecode. Parameters ---------- text : str Text of Python code to render. mode : {'exec', 'eval'}, optional Mode for `ast.parse` and `compile`. Default is 'exec'. file : None or file-like object, optional File to use to print output. If the default of `None` is passed, we use sys.stdout. """ if file is None: file = sys.stdout ast_section = StringIO() a(text, mode=mode, file=ast_section) code_section = StringIO() d(text, mode=mode, file=code_section) rendered = _DISPLAY_TEMPLATE.format( text=text, ast=ast_section.getvalue(), code=code_section.getvalue(), ) print(rendered, file=file) ================================================ FILE: codetransformer/utils/tests/__init__.py ================================================ ================================================ FILE: codetransformer/utils/tests/test_immutable.py ================================================ from inspect import getfullargspec import pytest from codetransformer.utils.immutable import immutable class a(immutable): __slots__ = 'a', def spec(__self, a): pass class b(immutable): __slots__ = 'a', 'b' def spec(__self, a, b): pass class c(immutable): __slots__ = 'a', 'b', '*c' def spec(__self, a, b, *c): pass class d(immutable): __slots__ = 'a', 'b', '**c' def spec(__self, a, b, **c): pass class e(immutable): __slots__ = 'a', 'b', '*', 'c' def spec(__self, a, b, *, c): pass class f(immutable): __slots__ = 'a', 'b', '*c', 'd' def spec(__self, a, b, *c, d): pass class g(immutable, defaults={'a': 1}): __slots__ = 'a', def spec(__self, a=1): pass class h(immutable, defaults={'b': 2}): __slots__ = 'a', 'b' def spec(__self, a, b=2): pass class i(immutable, defaults={'a': 1, 'b': 2}): __slots__ = 'a', 'b' def spec(__self, a=1, b=2): pass class j(immutable, defaults={'c': 3}): __slots__ = 'a', 'b', '*', 'c' def spec(__self, a, b, *, c=3): pass @pytest.mark.parametrize('cls', (a, b, c, d, e, f, g, h, i, j)) def test_created_signature_single(cls): assert getfullargspec(cls) == getfullargspec(cls.spec) class k(immutable): __slots__ = 'a', def __init__(self, a): pass class l(immutable): __slots__ = 'a', def __init__(self, *a): pass class m(immutable): __slots__ = 'a', def __init__(self, **a): pass class n(immutable): __slots__ = 'a', def __init__(self, *, a): pass class o(immutable): __slots__ = 'a', 'b' def __init__(self, a, b=2): pass class p(immutable): __slots__ = 'a', 'b' def __init__(self, a=1, b=2): pass class q(immutable): __slots__ = 'a', 'b' def __init__(self, a, *b): pass class r(immutable): __slots__ = 'a', 'b' def __init__(self, a=1, *b): pass class s(immutable): __slots__ = 'a', 'b', 'c' def __init__(self, a=1, *b, c): pass class t(immutable): __slots__ = 'a', 'b', 'c' def __init__(self, a, *b, c=3): pass class u(immutable): __slots__ = 'a', 'b', 'c' def __init__(self, a=1, *b, c=3): pass class v(immutable): __slots__ = 'a', 'b', 'c' def __init__(self, a, **b): pass class w(immutable): __slots__ = 'a', 'b', 'c' def __init__(self, a, b, **c): pass class x(immutable): __slots__ = 'a', 'b', 'c' def __init__(self, a, *b, **c): pass class y(immutable): __slots__ = 'a', 'b', 'c', 'd' def __init__(self, a, *b, c, **d): pass class z(immutable): __slots__ = 'a', 'b', 'c', 'd' def __init__(self, a, *b, c=1, **d): pass @pytest.mark.parametrize('cls', ( k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, )) def test_preserve_custom_init_signature(cls): assert getfullargspec(cls) == getfullargspec(cls.__init__) ================================================ FILE: codetransformer/utils/tests/test_pretty.py ================================================ from io import StringIO from textwrap import dedent from types import CodeType from ..pretty import a, walk_code def test_a(capsys): text = dedent( """ def inc(a): b = a + 1 return b """ ) expected = dedent( """\ Module( body=[ FunctionDef( name='inc', args=arguments( args=[ arg( arg='a', annotation=None, ), ], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[], ), body=[ Assign( targets=[ Name(id='b', ctx=Store()), ], value=BinOp( left=Name(id='a', ctx=Load()), op=Add(), right=Num(1), ), ), Return( value=Name(id='b', ctx=Load()), ), ], decorator_list=[], returns=None, ), ], ) """ ) a(text) stdout, stderr = capsys.readouterr() assert stdout == expected assert stderr == '' file_ = StringIO() a(text, file=file_) assert capsys.readouterr() == ('', '') result = file_.getvalue() assert result == expected def test_walk_code(): module = dedent( """\ class Foo: def bar(self): def buzz(): pass def bazz(): pass return buzz """ ) co = compile(module, '', 'exec') foo = [c for c in co.co_consts if isinstance(c, CodeType)][0] bar = [c for c in foo.co_consts if isinstance(c, CodeType)][0] buzz = [c for c in bar.co_consts if isinstance(c, CodeType) and c.co_name == 'buzz'][0] bazz = [c for c in bar.co_consts if isinstance(c, CodeType) and c.co_name == 'bazz'][0] result = list(walk_code(co)) expected = [ ('', co), ('.Foo', foo), ('.Foo.bar', bar), ('.Foo.bar..buzz', buzz), ('.Foo.bar..bazz', bazz), ] assert result == expected ================================================ FILE: docs/.dir-locals.el ================================================ ;; Set compile-commnd for everything in this directory to ;; "make -C html" ;; This is an association list mapping directory prefixes (in this case nil, ;; meaning "all files"), to another association list mapping dir-local variable ;; names to values. An equivalent Python structure would be something like: ;; {None: {'compile-command': "make -C .. html"}} ((nil . ((compile-command . (concat "make -C .. html"))))) ================================================ FILE: docs/Makefile ================================================ # Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" @echo " coverage to run coverage check of the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." livehtml: sphinx-autobuild -p 9999 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/codetransformer.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/codetransformer.qhc" applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @echo "N.B. You won't be able to view it unless you put it in" \ "~/Library/Documentation/Help or install it in your application" \ "bundle." devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/codetransformer" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/codetransformer" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." ================================================ FILE: docs/source/appendix.rst ================================================ API Reference ============= ``codetransformer.transformers`` -------------------------------- .. automodule:: codetransformer.transformers :members: .. autodata:: islice_literals :annotation: .. data:: bytearray_literals A transformer that converts :class:`bytes` literals to :class:`bytearray`. .. data:: decimal_literals A transformer that converts :class:`float` literals to :class:`~decimal.Decimal`. ``codetransformer.code`` ------------------------ .. autoclass:: codetransformer.code.Code :members: .. autoclass:: codetransformer.code.Flag :members: :undoc-members: ``codetransformer.core`` ------------------------ .. autoclass:: codetransformer.core.CodeTransformer :members: ``codetransformer.instructions`` -------------------------------- For details on particular instructions, see `the dis stdlib module docs.`_ .. automodule:: codetransformer.instructions :members: :undoc-members: ``codetransformer.patterns`` ---------------------------- .. autoclass:: codetransformer.patterns.pattern .. autodata:: codetransformer.patterns.DEFAULT_STARTCODE DSL Objects ~~~~~~~~~~~ .. autodata:: codetransformer.patterns.matchany .. autoclass:: codetransformer.patterns.seq .. autodata:: codetransformer.patterns.var .. autodata:: codetransformer.patterns.plus .. autodata:: codetransformer.patterns.option ``codetransformer.utils`` ------------------------- .. automodule:: codetransformer.utils.pretty :members: .. automodule:: codetransformer.utils.immutable :members: immutable, lazyval, immutableattr .. automodule:: codetransformer.utils.functional :members: ``codetransformer.decompiler`` ------------------------------ .. automodule:: codetransformer.decompiler :members: decompile, pycode_to_body, DecompilationContext, DecompilationError .. _`the dis stdlib module docs.` : https://docs.python.org/3.4/library/dis.html#python-bytecode-instructions ================================================ FILE: docs/source/code-objects.rst ================================================ =========================== Working with Code Objects =========================== The :class:`~codetransformer.code.Code` type is the foundational abstraction in ``codetransformer``. It provides high-level APIs for working with logically-grouped sets of instructions and for converting to and from CPython's native :class:`code ` type. Constructing Code Objects ========================= The most common way constructing a Code object is to use the :meth:`~codetransformer.code.Code.from_pycode` classmethod, which accepts a CPython :class:`code ` object. There are two common ways of building raw code objects: - CPython functions have a ``__code__`` attribute, which contains the bytecode executed by the function. - The :func:`compile` builtin can compile a string of Python source code into a code object. Using :meth:`~codetransformer.code.Code.from_pycode`, we can build a Code object and inspect its contents:: >>> from codetransformer import Code >>> def add2(x): ... return x + 2 ... >>> co = Code.from_pycode(add.__code__) >>> co.instrs (LOAD_FAST('x'), LOAD_CONST(2), BINARY_ADD, RETURN_VALUE) >>> co.argnames ('x',) >>> c.consts (2,) We can convert our Code object back into its raw form via the :meth:`~codetransformer.code.Code.to_pycode` method:: >>> co.to_pycode() ", line 1> Building Transformers ===================== Once we have the ability to convert to and from an abstract code representation, we gain the ability to perform transformations on that abtract representation. Let's say that we want to replace the addition operation in our ``add2`` function with a multiplication. We could try to mutate our :class:`~codetransformer.code.Code` object directly before converting back to Python bytecode, but there are many subtle invariants [#f1]_ between the instructions and the other pieces of metadata that must be maintained to ensure that the generated output can be executed correctly. Rather than encourage users to mutate Code objects in place, ``codetransformer`` provides the :class:`~codetransformer.core.CodeTransformer` class, which allows users to declaratively describe operations to perform on sequences of instructions. Implemented as a :class:`~codetransformer.core.CodeTransformer`, our "replace additions with multiplications" operation looks like this: .. literalinclude:: add2mul.py :language: python :lines: 10- The important piece here is the ``_add2mul`` method, which has been decorated with a :class:`~codetransformer.patterns.pattern`. Patterns provide an API for describing sequences of instructions to match against for replacement and/or modification. The :class:`~codetransformer.core.CodeTransformer` base class looks at methods with registered patterns and compares them against the instructions of the Code object under transformation. For each matching sequence of instructions, the decorated method is called with all matching instructions \*-unpacked into the method. The method's job is to take the input instructions and return an iterable of new instructions to serve as replacements. It is often convenient to implement transformer methods as `generator functions`_, as we've done here. In this example, we've supplied the simplest possible pattern: a single instruction type to match. [#f2]_ Our transformer method will be called on every ``BINARY_ADD`` instruction in the target code object, and it will yield a ``BINARY_MULTIPLY`` as replacement each time. Applying Transformers ===================== To apply a :class:`~codetransformer.core.CodeTransformer` to a function, we construct an instance of the transformer and call it on the function we want to modify. The result is a new function whose instructions have been rewritten applying our transformer's methods to matched sequences of the input function's instructions. The original function is not mutated in place. **Example:** .. code-block:: python >>> transformer = add2mul() >>> mul2 = transformer(add2) # mult2 is a brand-new function >>> mul2(5) 10 When we don't care about having access to the pre-transformed version of a function, it's convenient and idiomatic to apply transformers as decorators:: >>> @add2mul() ... def mul2(x): ... return x + 2 ... >>> mul2(5) 10 .. [#f1] For example, if we add a new constant, we have to ensure that we correctly maintain the indices of existing constants in the generated code's ``co_consts``, and if we replace an instruction that was the target of a jump, we have to make sure that the jump instruction resolves correctly to our new instruction. .. [#f2] Many more complex patterns are possible. See the docs for :class:`codetransformer.patterns.pattern` for more examples. .. _`generator functions` : https://docs.python.org/2/tutorial/classes.html#generators ================================================ FILE: docs/source/conf.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # # codetransformer documentation build configuration file, created by # sphinx-quickstart on Sat Sep 5 21:06:06 2015. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys import os import shlex # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('..')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = 'codetransformer' copyright = '2016, Joe Jevnik and Scott Sanderson' author = 'Joe Jevnik and Scott Sanderson' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.6.0' # The full version, including alpha/beta/rc tags. release = '0.6.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all # documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. #keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. #html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' #html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value #html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. #html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = 'codetransformerdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', # Latex figure (float) alignment #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'codetransformer.tex', 'codetransformer Documentation', 'Joe Jevnik and Scott Sanderson', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'codetransformer', 'codetransformer Documentation', [author], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'codetransformer', 'codetransformer Documentation', author, 'codetransformer', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. #texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/3/': None} # This makes a big difference for Code's many attributes. napoleon_use_ivar = True ================================================ FILE: docs/source/index.rst ================================================ codetransformer =============== Bytecode transformers for CPython inspired by the ``ast`` module's ``NodeTransformer``. ``codetransformer`` is a library that provides utilities for working with CPython bytecode at runtime. Among other things, it provides: - A :class:`~codetransformer.code.Code` type for representing and manipulating Python bytecode. - An :class:`~codetransformer.instructions.Instruction` type, with :class:`subclasses ` for each opcode used by the CPython interpreter. - A :class:`~codetransformer.core.CodeTransformer` type providing a pattern-based API for describing transformations on :class:`~codetransformer.code.Code` objects. Example transformers can be found in :mod:`codetransformer.transformers`. - An experimental :mod:`decompiler ` for determining the AST tree that would generate a code object. The existence of ``codetransformer`` is motivated by the desire to override parts of the python language that cannot be easily hooked via more standard means. Examples of program transformations made possible using code transformers include: * Overriding the ``is`` and ``not`` operators. * `Overloading Python's data structure literals`_. * `Optimizing functions by freezing globals as constants`_. * `Exception handlers that match on exception instances`_. Contents: .. toctree:: :maxdepth: 2 code-objects.rst patterns.rst magics.rst appendix.rst Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` .. _lazy: https://github.com/llllllllll/lazy_python .. _Overloading Python's data structure literals: appendix.html\#codetransformer.transformers.literals.overloaded_dicts .. _Optimizing functions by freezing globals as constants: appendix.html#codetransformer.transformers.asconstants .. _Exception handlers that match on exception instances: appendix.html#codetransformer.transformers.exc_patterns.pattern_matched_exceptions ================================================ FILE: docs/source/magics.rst ================================================ Interactive Conveniences ======================== When developing projects using :mod:`codetransformer`, it's often helpful to be able to quickly and easily visualize the AST and/or disassembly generated by CPython for a given source text. The :mod:`codetransformer.utils.pretty` module provides utilities for viewing AST trees and the disassembly of nested code objects: .. autosummary:: ~codetransformer.utils.pretty.a ~codetransformer.utils.pretty.d ~codetransformer.utils.pretty.display ~codetransformer.utils.pretty.extract_code For users of `IPython`_, :mod:`codetransformer` provides an IPython extension that adds ``%%ast`` and ``%%dis`` magics. .. code-block:: python In [1]: %load_ext codetransformer In [2]: %%dis ...: def foo(a, b): ...: return a + b ...: -------- 1 0 LOAD_CONST 0 (", line 1>) 3 LOAD_CONST 1 ('foo') 6 MAKE_FUNCTION 0 9 STORE_NAME 0 (foo) 12 LOAD_CONST 2 (None) 15 RETURN_VAL .foo ------------ 2 0 LOAD_FAST 0 (a) 3 LOAD_FAST 1 (b) 6 BINARY_ADD 7 RETURN_VAL In [3]: %%ast ...: def foo(a, b): ...: return a + b ...: Module( body=[ FunctionDef( name='foo', args=arguments( args=[ arg( arg='a', annotation=None, ), arg( arg='b', annotation=None, ), ], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[], ), body=[ Return( value=BinOp( left=Name(id='a', ctx=Load()), op=Add(), right=Name(id='b', ctx=Load()), ), ), ], decorator_list=[], returns=None, ), ], ) .. _`IPython` : https://ipython.readthedocs.org/en/stable/ ================================================ FILE: docs/source/patterns.rst ================================================ ============ Pattern API ============ Most bytecode transformations are best expressed by identifying a pattern in the bytecode and emitting some replacement. ``codetransformer`` makes it easy to express and work on these patterns by defining a small dsl for use in :class:`~codetransformer.core.CodeTransformer` classes. Matchables ========== A pattern is expressed by a sequence of matchables paired with the startcode. A matchable is anything that we can compare a sequence of bytecode to. Instructions ------------ The most atomic matchable is any :class:`~codetransformer.instructions.Instruction` class. These classes each can be used to define a pattern that matches instances of that instruction. For example, the pattern:: LOAD_CONST will match a single :class:`~codetransformer.instructions.LOAD_CONST` instance. All matchables support the following operations: ``or`` ------ Matchables can be or'd together to create a new matchable that matches either the lhs or the rhs. For example:: LOAD_CONST | LOAD_FAST will match a either a single :class:`~codetransformer.instructions.LOAD_CONST` or a :class:`~codetransformer.instructions.LOAD_FAST`. ``not`` ------- Matchables may be negated to create a new matchable that matches anything the original did not match. For example:: ~LOAD_CONST will match any instruction except an instance of :class:`~codetransformer.instructions.LOAD_CONST`. ``matchrange`` -------------- It is possible to create a matchable from another such that it matches the same pattern repeated multiple times. For example:: LOAD_CONST[3] will match exactly three :class:`~codetransformer.instructions.LOAD_CONST` instances in a row. This will not match on any less than three and will match on the first three if there are more than three :class:`~codetransformer.instructions.LOAD_CONST` instructions in a row. This can be specified with an upper bound also like:: LOAD_CONST[3, 5] This matches between three and five :class:`~codetransformer.instructions.LOAD_CONST` instructions. This is greedy meaning that if four or five :class:`~codetransformer.instructions.LOAD_CONST` instructions exist it will consume as many as possible up to five. ``var`` ------- :data:`~codetransformer.patterns.var` is a modifier that matches zero or more instances of another matchable. For example:: LOAD_CONST[var] will match as many :class:`~codetransformer.instructions.LOAD_CONST` instructions appear in a row or an empty instruction set. ``plus`` -------- :data:`~codetransformer.patterns.plus` is a modifier that matches one or more instances of another matchable. For example:: LOAD_CONST[plus] will match as many :class:`~codetransformer.instructions.LOAD_CONST` instructions appear in a row as long as there is at least one. ``option`` ---------- :data:`~codetransformer.patterns.option` is a modifier that matches zero or one instance of another matchable. For example:: LOAD_CONST[option] will match either an empty instruction set or exactly one :class:`~codetransformer.instructions.LOAD_CONST`. ``matchany`` ------------ :data:`~codetransformer.patterns.matchany` is a special matchable that matches any single instruction. ``...`` is an alias for :data:`~codetransformer.patterns.matchany`. ``seq`` ------- :class:`~codetransformer.patterns.seq` is a matchable that matches a sequence of other matchables. For example:: seq(LOAD_CONST, ..., ~LOAD_CONST) will match a single :class:`~codetransformer.instructions.LOAD_CONST` followed by any instruction followed by any instruction that is not a :class:`~codetransformer.instructions.LOAD_CONST`. This example show how we can compose all of our matchable together to build more complex matchables. ``pattern`` =========== In order to use our DSL we need a way to register transformations to these matchables. To do this we may decorate methods of a :class:`~codetransformer.core.CodeTransformer` with :class:`~codetransformer.patterns.pattern`. This registers the function to the pattern. For example:: class MyTransformer(CodeTransformer): @pattern(LOAD_CONST, ..., ~LOAD_CONST) def _f(self, load_const, any, not_load_const): ... The argument list of a :class:`~codetransformer.patterns.pattern` is implicitly made into a `seq`_. When using ``MyTransformer`` to transform some bytecode ``_f`` will be called only when we see a :class:`~codetransformer.instructions.LOAD_CONST` followed by any instruction followed by any instruction that is not a :class:`~codetransformer.instructions.LOAD_CONST`. This function will be passed these three instruction objects positionally and should yield the instructions to replace them with. Resolution Order ---------------- Patterns are checked in the order they are defined in the class body. This is because some patterns may overlap with eachother. For example, given the two classes:: class OrderOne(CodeTransformer): @pattern(LOAD_CONST) def _load_const(self, instr): print('LOAD_CONST') yield instr @pattern(...) def _any(self, instr): print('...') yield instr class OrderTwo(CodeTransformer): @pattern(...) def _any(self, instr): print('...') yield instr @pattern(LOAD_CONST) def _load_const(self, instr): print('LOAD_CONST') yield instr and the following bytecode sequence:: LOAD_CONST POP_TOP LOAD_CONST RETURN_VALUE When running with ``OrderOne`` we would see:: LOAD_CONST ... LOAD_CONST ... but when running with ``OrderTwo``:: ... ... ... ... This is because we will always match on the ``...`` pattern where ``OrderOne`` will check against :class:`~codetransformer.instructions.LOAD_CONST` before falling back to the :data:`~codetransformer.instructions.matchany`. Contextual Patterns ------------------- Sometimes a pattern should only be matched given that some condition has been met. An example of this is that you want to modify comprehensions. In order to be sure that you are only modifying the bodies of the comprehensions we must only match when we know we are in one. :class:`~codetransformer.patterns.pattern` accepts a keyword only argument ``startcodes`` which is a set of contexts where this pattern should apply. By default this is :data:`~codetransformer.patterns.DEFAULT_STARTCODE` which is the default state. A startcode may be anything hashable; however it is best to use strings or integer constants to make it easy to debug. The :meth:`~codetransformer.core.CodeTransformer.begin` method enters a new startcode. For example:: class FindDictComprehensions(CodeTransformer): @pattern(BUILD_MAP, matchany[var], MAP_ADD) def _start_comprehension(self, *instrs): print('starting dict comprehension') self.begin('in_comprehension') yield from instrs @pattern(RETURN_VALUE, startcodes=('in_comprehension',)) def _return_from_comprehension(self, instr): print('returning from comprehension') yield instr @pattern(RETURN_VALUE) def _return_default(self, instr): print('returning from non-comprehension') yield instr This transformer will find dictionary comprehensions and enter a new startcode. Inside this startcode we will handle :class:`~codetransformer.instructions.RETURN_VALUE` instructions differently. .. code-block:: python >>> @FindDictComprehensions() ... def f(): ... pass ... returning from non-comprehension >>> @FindDictComprehensions() ... def g(): ... {a: b for a, b in it} ... starting dict comprehension returning from comprehension returning from non-comprehension It is important to remember that when we recurse into a nested code object (like a comprehension) that we do not inherit the startcode from our parent. Instead it always starts at :data:`~codetransformer.patterns.DEFAULT_STARTCODE`. ================================================ FILE: requirements_doc.txt ================================================ Sphinx==1.3.5 sphinx-rtd-theme==0.1.9 ================================================ FILE: setup.cfg ================================================ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] VCS=git style=pep440 versionfile_source=codetransformer/_version.py versionfile_build=codetransformer/_version.py tag_prefix= parentdir_prefix=codetransformer- ================================================ FILE: setup.py ================================================ #!/usr/bin/env python from setuptools import setup, find_packages import sys import versioneer long_description = '' if 'upload' in sys.argv: with open('README.rst') as f: long_description = f.read() setup( name='codetransformer', version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), description='Python code object transformers', author='Joe Jevnik and Scott Sanderson', author_email='joejev@gmail.com', packages=find_packages(), long_description=long_description, license='GPL-2', classifiers=[ 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', 'Natural Language :: English', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: Implementation :: CPython', 'Operating System :: POSIX', 'Topic :: Software Development :: Pre-processors', ], url='https://github.com/llllllllll/codetransformer', install_requires=['toolz'], extras_require={ 'dev': [ 'flake8==3.3.0', 'pytest==2.8.4', 'pytest-cov==2.2.1', ], }, ) ================================================ FILE: tox.ini ================================================ [tox] envlist=py{34,35,36} skip_missing_interpreters=True [testenv] commands= pip install -e .[dev] py.test [pytest] addopts = --doctest-modules --cov codetransformer --cov-report term-missing --ignore setup.py testpaths = codetransformer norecursedirs = decompiler ================================================ FILE: versioneer.py ================================================ # Version: 0.15 """ The Versioneer ============== * like a rocketeer, but for versions! * https://github.com/warner/python-versioneer * Brian Warner * License: Public Domain * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy * [![Latest Version] (https://pypip.in/version/versioneer/badge.svg?style=flat) ](https://pypi.python.org/pypi/versioneer/) * [![Build Status] (https://travis-ci.org/warner/python-versioneer.png?branch=master) ](https://travis-ci.org/warner/python-versioneer) This is a tool for managing a recorded version number in distutils-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control system, and maybe making new tarballs. ## Quick Install * `pip install versioneer` to somewhere to your $PATH * add a `[versioneer]` section to your setup.cfg (see below) * run `versioneer install` in your source tree, commit the results ## Version Identifiers Source trees come from a variety of places: * a version-control system checkout (mostly used by developers) * a nightly tarball, produced by build automation * a snapshot tarball, produced by a web-based VCS browser, like github's "tarball from tag" feature * a release tarball, produced by "setup.py sdist", distributed through PyPI Within each source tree, the version identifier (either a string or a number, this tool is format-agnostic) can come from a variety of places: * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows about recent "tags" and an absolute revision-id * the name of the directory into which the tarball was unpacked * an expanded VCS keyword ($Id$, etc) * a `_version.py` created by some earlier build step For released software, the version identifier is closely related to a VCS tag. Some projects use tag names that include more than just the version string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool needs to strip the tag prefix to extract the version identifier. For unreleased software (between tags), the version identifier should provide enough information to help developers recreate the same tree, while also giving them an idea of roughly how old the tree is (after version 1.2, before version 1.3). Many VCS systems can report a description that captures this, for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has uncommitted changes. The version identifier is used for multiple purposes: * to allow the module to self-identify its version: `myproject.__version__` * to choose a name and prefix for a 'setup.py sdist' tarball ## Theory of Operation Versioneer works by adding a special `_version.py` file into your source tree, where your `__init__.py` can import it. This `_version.py` knows how to dynamically ask the VCS tool for version information at import time. `_version.py` also contains `$Revision$` markers, and the installation process marks `_version.py` to have this marker rewritten with a tag name during the `git archive` command. As a result, generated tarballs will contain enough information to get the proper version. To allow `setup.py` to compute a version too, a `versioneer.py` is added to the top level of your source tree, next to `setup.py` and the `setup.cfg` that configures it. This overrides several distutils/setuptools commands to compute the version when invoked, and changes `setup.py build` and `setup.py sdist` to replace `_version.py` with a small static file that contains just the generated version data. ## Installation First, decide on values for the following configuration variables: * `VCS`: the version control system you use. Currently accepts "git". * `style`: the style of version string to be produced. See "Styles" below for details. Defaults to "pep440", which looks like `TAG[+DISTANCE.gSHORTHASH[.dirty]]`. * `versionfile_source`: A project-relative pathname into which the generated version strings should be written. This is usually a `_version.py` next to your project's main `__init__.py` file, so it can be imported at runtime. If your project uses `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. This file should be checked in to your VCS as usual: the copy created below by `setup.py setup_versioneer` will include code that parses expanded VCS keywords in generated tarballs. The 'build' and 'sdist' commands will replace it with a copy that has just the calculated version string. This must be set even if your project does not have any modules (and will therefore never import `_version.py`), since "setup.py sdist" -based trees still need somewhere to record the pre-calculated version strings. Anywhere in the source tree should do. If there is a `__init__.py` next to your `_version.py`, the `setup.py setup_versioneer` command (described below) will append some `__version__`-setting assignments, if they aren't already present. * `versionfile_build`: Like `versionfile_source`, but relative to the build directory instead of the source directory. These will differ when your setup.py uses 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, then you will probably have `versionfile_build='myproject/_version.py'` and `versionfile_source='src/myproject/_version.py'`. If this is set to None, then `setup.py build` will not attempt to rewrite any `_version.py` in the built tree. If your project does not have any libraries (e.g. if it only builds a script), then you should use `versionfile_build = None` and override `distutils.command.build_scripts` to explicitly insert a copy of `versioneer.get_version()` into your generated script. * `tag_prefix`: a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. If your tags look like 'myproject-1.2.0', then you should use tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this should be an empty string. * `parentdir_prefix`: a optional string, frequently the same as tag_prefix, which appears at the start of all unpacked tarball filenames. If your tarball unpacks into 'myproject-1.2.0', this should be 'myproject-'. To disable this feature, just omit the field from your `setup.cfg`. This tool provides one script, named `versioneer`. That script has one mode, "install", which writes a copy of `versioneer.py` into the current directory and runs `versioneer.py setup` to finish the installation. To versioneer-enable your project: * 1: Modify your `setup.cfg`, adding a section named `[versioneer]` and populating it with the configuration values you decided earlier (note that the option names are not case-sensitive): ```` [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = "" parentdir_prefix = myproject- ```` * 2: Run `versioneer install`. This will do the following: * copy `versioneer.py` into the top of your source tree * create `_version.py` in the right place (`versionfile_source`) * modify your `__init__.py` (if one exists next to `_version.py`) to define `__version__` (by calling a function from `_version.py`) * modify your `MANIFEST.in` to include both `versioneer.py` and the generated `_version.py` in sdist tarballs `versioneer install` will complain about any problems it finds with your `setup.py` or `setup.cfg`. Run it multiple times until you have fixed all the problems. * 3: add a `import versioneer` to your setup.py, and add the following arguments to the setup() call: version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), * 4: commit these changes to your VCS. To make sure you won't forget, `versioneer install` will mark everything it touched for addition using `git add`. Don't forget to add `setup.py` and `setup.cfg` too. ## Post-Installation Usage Once established, all uses of your tree from a VCS checkout should get the current version string. All generated tarballs should include an embedded version string (so users who unpack them will not need a VCS tool installed). If you distribute your project through PyPI, then the release process should boil down to two steps: * 1: git tag 1.0 * 2: python setup.py register sdist upload If you distribute it through github (i.e. users use github to generate tarballs with `git archive`), the process is: * 1: git tag 1.0 * 2: git push; git push --tags Versioneer will report "0+untagged.NUMCOMMITS.gHASH" until your tree has at least one tag in its history. ## Version-String Flavors Code which uses Versioneer can learn about its version string at runtime by importing `_version` from your main `__init__.py` file and running the `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can import the top-level `versioneer.py` and run `get_versions()`. Both functions return a dictionary with different flavors of version information: * `['version']`: A condensed version string, rendered using the selected style. This is the most commonly used value for the project's version string. The default "pep440" style yields strings like `0.11`, `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section below for alternative styles. * `['full-revisionid']`: detailed revision identifier. For Git, this is the full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that this is only accurate if run in a VCS checkout, otherwise it is likely to be False or None * `['error']`: if the version string could not be computed, this will be set to a string describing the problem, otherwise it will be None. It may be useful to throw an exception in setup.py if this is set, to avoid e.g. creating tarballs with a version string of "unknown". Some variants are more useful than others. Including `full-revisionid` in a bug report should allow developers to reconstruct the exact code being tested (or indicate the presence of local changes that should be shared with the developers). `version` is suitable for display in an "about" box or a CLI `--version` output: it can be easily compared against release notes and lists of bugs fixed in various releases. The installer adds the following text to your `__init__.py` to place a basic version in `YOURPROJECT.__version__`: from ._version import get_versions __version__ = get_versions()['version'] del get_versions ## Styles The setup.cfg `style=` configuration controls how the VCS information is rendered into a version string. The default style, "pep440", produces a PEP440-compliant string, equal to the un-prefixed tag name for actual releases, and containing an additional "local version" section with more detail for in-between builds. For Git, this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" tag. For released software (exactly equal to a known tag), the identifier will only contain the stripped tag, e.g. "0.11". Other styles are available. See details.md in the Versioneer source tree for descriptions. ## Debugging Versioneer tries to avoid fatal errors: if something goes wrong, it will tend to return a version of "0+unknown". To investigate the problem, run `setup.py version`, which will run the version-lookup code in a verbose mode, and will display the full contents of `get_versions()` (including the `error` string, which may help identify what went wrong). ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) * edit `setup.cfg`, if necessary, to include any new configuration settings indicated by the release notes * re-run `versioneer install` in your source tree, to replace `SRC/_version.py` * commit any changed files ### Upgrading to 0.15 Starting with this version, Versioneer is configured with a `[versioneer]` section in your `setup.cfg` file. Earlier versions required the `setup.py` to set attributes on the `versioneer` module immediately after import. The new version will refuse to run (raising an exception during import) until you have provided the necessary `setup.cfg` section. In addition, the Versioneer package provides an executable named `versioneer`, and the installation process is driven by running `versioneer install`. In 0.14 and earlier, the executable was named `versioneer-installer` and was run without an argument. ### Upgrading to 0.14 0.14 changes the format of the version string. 0.13 and earlier used hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a plus-separated "local version" section strings, with dot-separated components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old format, but should be ok with the new one. ### Upgrading from 0.11 to 0.12 Nothing special. ### Upgrading from 0.10 to 0.11 You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running `setup.py setup_versioneer`. This will enable the use of additional version-control systems (SVN, etc) in the future. ## Future Directions This tool is designed to make it easily extended to other version-control systems: all VCS-specific components are in separate directories like src/git/ . The top-level `versioneer.py` script is assembled from these components by running make-versioneer.py . In the future, make-versioneer.py will take a VCS name as an argument, and will construct a version of `versioneer.py` that is specific to the given VCS. It might also take the configuration arguments that are currently provided manually during installation by editing setup.py . Alternatively, it might go the other direction and include code from all supported VCS systems, reducing the number of intermediate scripts. ## License To make Versioneer easier to embed, all its code is hereby released into the public domain. The `_version.py` that it creates is also in the public domain. """ from __future__ import print_function try: import configparser except ImportError: import ConfigParser as configparser import errno import json import os import re import subprocess import sys class VersioneerConfig: pass def get_root(): # we require that all commands are run from the project root, i.e. the # directory that contains setup.py, setup.cfg, and versioneer.py . root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " "or in a way that lets it use sys.argv[0] to find the root " "(like 'python path/to/setup.py COMMAND').") raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools # tree) execute all dependencies in a single python process, so # "versioneer" may be imported multiple times, and python's shared # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. me = os.path.realpath(os.path.abspath(__file__)) if os.path.splitext(me)[0] != os.path.splitext(versioneer_py)[0]: print("Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(me), versioneer_py)) except NameError: pass return root def get_config_from_root(root): # This might raise EnvironmentError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.SafeConfigParser() with open(setup_cfg, "r") as f: parser.readfp(f) VCS = parser.get("versioneer", "VCS") # mandatory def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" cfg.versionfile_source = get(parser, "versionfile_source") cfg.versionfile_build = get(parser, "versionfile_build") cfg.tag_prefix = get(parser, "tag_prefix") cfg.parentdir_prefix = get(parser, "parentdir_prefix") cfg.verbose = get(parser, "verbose") return cfg class NotThisMethod(Exception): pass # these dictionaries contain VCS-specific tools LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator def decorate(f): if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) return None return stdout LONG_VERSION_PY['git'] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.15 (https://github.com/warner/python-versioneer) import errno import os import re import subprocess import sys def get_keywords(): # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" keywords = {"refnames": git_refnames, "full": git_full} return keywords class VersioneerConfig: pass def get_config(): # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "%(STYLE)s" cfg.tag_prefix = "%(TAG_PREFIX)s" cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" cfg.verbose = False return cfg class NotThisMethod(Exception): pass LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator def decorate(f): if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %%s" %% dispcmd) print(e) return None else: if verbose: print("unable to find command, tried %%s" %% (commands,)) return None stdout = p.communicate()[0].strip() if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) return None return stdout def versions_from_parentdir(parentdir_prefix, root, verbose): # Source tarballs conventionally unpack into a directory that includes # both the project name and a version string. dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: print("guessing rootdir is '%%s', but '%%s' doesn't start with " "prefix '%%s'" %% (root, dirname, parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None} @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): if not keywords: raise NotThisMethod("no keywords at all, weird") refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%%s', no digits" %% ",".join(refs-tags)) if verbose: print("likely tags: %%s" %% ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %%s" %% r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags"} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # this runs 'git' from the root of the source tree. This only gets called # if the git-archive 'subst' keywords were *not* expanded, and # _version.py hasn't already been rewritten with a short version string, # meaning we're inside a checked out source tree. if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %%s" %% root) raise NotThisMethod("no .git directory") GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # if there is a tag, this yields TAG-NUM-gHEX[-dirty] # if there are no tags, this yields HEX[-dirty] (no NUM) describe_out = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long"], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%%s' doesn't start with prefix '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits return pieces def plus_or_dot(pieces): if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): # now build up version string, with post-release "local version # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty # exceptions: # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): # TAG[.post.devDISTANCE] . No -dirty # exceptions: # 1: no tags. 0.post.devDISTANCE if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%%d" %% pieces["distance"] else: # exception #1 rendered = "0.post.dev%%d" %% pieces["distance"] return rendered def render_pep440_post(pieces): # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that # .dev0 sorts backwards (a dirty tree will appear "older" than the # corresponding clean one), but you shouldn't be releasing software with # -dirty anyways. # exceptions: # 1: no tags. 0.postDISTANCE[.dev0] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] return rendered def render_pep440_old(pieces): # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. # exceptions: # 1: no tags. 0.postDISTANCE[.dev0] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty # --always' # exceptions: # 1: no tags. HEX[-dirty] (note: no 'g' prefix) if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty # --always -long'. The distance/hash is unconditional. # exceptions: # 1: no tags. HEX[-dirty] (note: no 'g' prefix) if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"]} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%%s'" %% style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None} def get_versions(): # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree"} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version"} ''' @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): if not keywords: raise NotThisMethod("no keywords at all, weird") refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs-tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags"} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # this runs 'git' from the root of the source tree. This only gets called # if the git-archive 'subst' keywords were *not* expanded, and # _version.py hasn't already been rewritten with a short version string, # meaning we're inside a checked out source tree. if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) raise NotThisMethod("no .git directory") GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # if there is a tag, this yields TAG-NUM-gHEX[-dirty] # if there are no tags, this yields HEX[-dirty] (no NUM) describe_out = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long"], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits return pieces def do_vcs_install(manifest_in, versionfile_source, ipy): GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] files = [manifest_in, versionfile_source] if ipy: files.append(ipy) try: me = __file__ if me.endswith(".pyc") or me.endswith(".pyo"): me = os.path.splitext(me)[0] + ".py" versioneer_file = os.path.relpath(me) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") f.write("%s export-subst\n" % versionfile_source) f.close() files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) def versions_from_parentdir(parentdir_prefix, root, verbose): # Source tarballs conventionally unpack into a directory that includes # both the project name and a version string. dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: print("guessing rootdir is '%s', but '%s' doesn't start with " "prefix '%s'" % (root, dirname, parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None} SHORT_VERSION_PY = """ # This file was generated by 'versioneer.py' (0.15) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json import sys version_json = ''' %s ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) """ def versions_from_file(filename): try: with open(filename) as f: contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) def write_to_version_file(filename, versions): os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): # now build up version string, with post-release "local version # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty # exceptions: # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): # TAG[.post.devDISTANCE] . No -dirty # exceptions: # 1: no tags. 0.post.devDISTANCE if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that # .dev0 sorts backwards (a dirty tree will appear "older" than the # corresponding clean one), but you shouldn't be releasing software with # -dirty anyways. # exceptions: # 1: no tags. 0.postDISTANCE[.dev0] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. # exceptions: # 1: no tags. 0.postDISTANCE[.dev0] if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty # --always' # exceptions: # 1: no tags. HEX[-dirty] (note: no 'g' prefix) if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty # --always -long'. The distance/hash is unconditional. # exceptions: # 1: no tags. HEX[-dirty] (note: no 'g' prefix) if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"]} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None} class VersioneerBadRootError(Exception): pass def get_versions(verbose=False): # returns dict with two keys: 'version' and 'full' if "versioneer" in sys.modules: # see the discussion in cmdclass.py:get_cmdclass() del sys.modules["versioneer"] root = get_root() cfg = get_config_from_root(root) assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) # extract version from first of: _version.py, VCS command (e.g. 'git # describe'), parentdir. This is meant to work for developers using a # source checkout, for users of a tarball created by 'setup.py sdist', # and for users of a tarball/zipball created by 'git archive' or github's # download-from-tag feature or the equivalent in other VCSes. get_keywords_f = handlers.get("get_keywords") from_keywords_f = handlers.get("keywords") if get_keywords_f and from_keywords_f: try: keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: print("got version from expanded keyword %s" % ver) return ver except NotThisMethod: pass try: ver = versions_from_file(versionfile_abs) if verbose: print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass from_vcs_f = handlers.get("pieces_from_vcs") if from_vcs_f: try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: print("got version from VCS %s" % ver) return ver except NotThisMethod: pass try: if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: print("got version from parentdir %s" % ver) return ver except NotThisMethod: pass if verbose: print("unable to compute version") return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version"} def get_version(): return get_versions()["version"] def get_cmdclass(): if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and # 'easy_install .'), in which subdependencies of the main project are # built (using setup.py bdist_egg) in the same python process. Assume # a main project A and a dependency B, which use different versions # of Versioneer. A's setup.py imports A's Versioneer, leaving it in # sys.modules by the time B's setup.py is executed, causing B to run # with the wrong versioneer. Setuptools wraps the sub-dep builds in a # sandbox that restores sys.modules to it's pre-build state, so the # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/warner/python-versioneer/issues/52 cmds = {} # we add "version" to both distutils and setuptools from distutils.core import Command class cmd_version(Command): description = "report generated version string" user_options = [] boolean_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) print(" dirty: %s" % vers.get("dirty")) if vers["error"]: print(" error: %s" % vers["error"]) cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py # distutils/install -> distutils/build ->.. # setuptools/bdist_wheel -> distutils/install ->.. # setuptools/bdist_egg -> distutils/install_lib -> build_py # setuptools/install -> bdist_egg ->.. # setuptools/develop -> ? from distutils.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe class cmd_build_exe(_build_exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _build_exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["build_exe"] = cmd_build_exe del cmds["build_py"] # we override different "sdist" commands for both environments if "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: from distutils.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self): versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old # version self.distribution.metadata.version = versions["version"] return _sdist.run(self) def make_release_tree(self, base_dir, files): root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) # now locate _version.py in the new base_dir directory # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, self._versioneer_generated_versions) cmds["sdist"] = cmd_sdist return cmds CONFIG_ERROR = """ setup.cfg is missing the necessary Versioneer configuration. You need a section like: [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = "" parentdir_prefix = myproject- You will also need to edit your setup.py to use the results: import versioneer setup(version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ...) Please read the docstring in ./versioneer.py for configuration instructions, edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. """ SAMPLE_CONFIG = """ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] #VCS = git #style = pep440 #versionfile_source = #versionfile_build = #tag_prefix = #parentdir_prefix = """ INIT_PY_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) return 1 print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: f.write(INIT_PY_SNIPPET) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) ipy = None # Make sure both the top-level "versioneer.py" and versionfile_source # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so # they'll be copied into source distributions. Pip won't be able to # install the package without this. manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so # it might give some false negatives. Appending redundant 'include' # lines is safe, though. if "versioneer.py" not in simple_includes: print(" appending 'versioneer.py' to MANIFEST.in") with open(manifest_in, "a") as f: f.write("include versioneer.py\n") else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: print(" versionfile_source already in MANIFEST.in") # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-time keyword # substitution. do_vcs_install(manifest_in, cfg.versionfile_source, ipy) return 0 def scan_setup_py(): found = set() setters = False errors = 0 with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: found.add("cmdclass") if "versioneer.get_version()" in line: found.add("get_version") if "versioneer.VCS" in line: setters = True if "versioneer.versionfile_source" in line: setters = True if len(found) != 3: print("") print("Your setup.py appears to be missing some important items") print("(but I might be wrong). Please make sure it has something") print("roughly like the following:") print("") print(" import versioneer") print(" setup( version=versioneer.get_version(),") print(" cmdclass=versioneer.get_cmdclass(), ...)") print("") errors += 1 if setters: print("You should remove lines like 'versioneer.VCS = ' and") print("'versioneer.versionfile_source = ' . This configuration") print("now lives in setup.cfg, and should be removed from setup.py") print("") errors += 1 return errors if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": errors = do_setup() errors += scan_setup_py() if errors: sys.exit(1)