Repository: yandex/gixy Branch: master Commit: 6f68624a7540 Files: 178 Total size: 268.2 KB Directory structure: gitextract__w1ux7cu/ ├── .dockerignore ├── .editorconfig ├── .gitignore ├── .travis.yml ├── AUTHORS ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.RU.md ├── README.md ├── docs/ │ ├── en/ │ │ └── plugins/ │ │ ├── addheadermultiline.md │ │ ├── addheaderredefinition.md │ │ ├── aliastraversal.md │ │ ├── hostspoofing.md │ │ ├── httpsplitting.md │ │ ├── origins.md │ │ ├── ssrf.md │ │ └── validreferers.md │ └── ru/ │ └── plugins/ │ ├── addheadermultiline.md │ ├── addheaderredefinition.md │ ├── aliastraversal.md │ ├── hostspoofing.md │ ├── httpsplitting.md │ ├── origins.md │ ├── ssrf.md │ └── validreferers.md ├── gixy/ │ ├── __init__.py │ ├── cli/ │ │ ├── __init__.py │ │ ├── argparser.py │ │ └── main.py │ ├── core/ │ │ ├── __init__.py │ │ ├── builtin_variables.py │ │ ├── config.py │ │ ├── context.py │ │ ├── exceptions.py │ │ ├── issue.py │ │ ├── manager.py │ │ ├── plugins_manager.py │ │ ├── regexp.py │ │ ├── severity.py │ │ ├── sre_parse/ │ │ │ ├── __init__.py │ │ │ ├── sre_constants.py │ │ │ └── sre_parse.py │ │ ├── utils.py │ │ └── variable.py │ ├── directives/ │ │ ├── __init__.py │ │ ├── block.py │ │ └── directive.py │ ├── formatters/ │ │ ├── __init__.py │ │ ├── _jinja.py │ │ ├── base.py │ │ ├── console.py │ │ ├── json.py │ │ ├── templates/ │ │ │ ├── console.j2 │ │ │ └── text.j2 │ │ └── text.py │ ├── parser/ │ │ ├── __init__.py │ │ ├── nginx_parser.py │ │ └── raw_parser.py │ ├── plugins/ │ │ ├── __init__.py │ │ ├── add_header_multiline.py │ │ ├── add_header_redefinition.py │ │ ├── alias_traversal.py │ │ ├── host_spoofing.py │ │ ├── http_splitting.py │ │ ├── origins.py │ │ ├── plugin.py │ │ ├── ssrf.py │ │ └── valid_referers.py │ └── utils/ │ ├── __init__.py │ └── text.py ├── requirements.dev.txt ├── requirements.txt ├── rpm/ │ ├── gixy.spec │ └── python-argparse.spec ├── setup.py ├── tests/ │ ├── __init__.py │ ├── asserts.py │ ├── core/ │ │ ├── __init__.py │ │ ├── test_context.py │ │ ├── test_regexp.py │ │ └── test_variable.py │ ├── directives/ │ │ ├── __init__.py │ │ ├── test_block.py │ │ └── test_directive.py │ ├── parser/ │ │ ├── __init__.py │ │ ├── test_nginx_parser.py │ │ └── test_raw_parser.py │ ├── plugins/ │ │ ├── __init__.py │ │ ├── simply/ │ │ │ ├── add_header_multiline/ │ │ │ │ ├── add_header.conf │ │ │ │ ├── add_header_fp.conf │ │ │ │ ├── config.json │ │ │ │ ├── more_set_headers.conf │ │ │ │ ├── more_set_headers_fp.conf │ │ │ │ ├── more_set_headers_multiple.conf │ │ │ │ ├── more_set_headers_replace.conf │ │ │ │ ├── more_set_headers_replace_fp.conf │ │ │ │ ├── more_set_headers_status_fp.conf │ │ │ │ └── more_set_headers_type_fp.conf │ │ │ ├── add_header_redefinition/ │ │ │ │ ├── config.json │ │ │ │ ├── duplicate_fp.conf │ │ │ │ ├── if_replaces.conf │ │ │ │ ├── location_replaces.conf │ │ │ │ ├── nested_block.conf │ │ │ │ ├── non_block_fp.conf │ │ │ │ ├── not_secure_both_fp.conf │ │ │ │ ├── not_secure_outer_fp.conf │ │ │ │ └── step_replaces.conf │ │ │ ├── alias_traversal/ │ │ │ │ ├── config.json │ │ │ │ ├── nested.conf │ │ │ │ ├── nested_fp.conf │ │ │ │ ├── not_slashed_alias.conf │ │ │ │ ├── not_slashed_alias_fp.conf │ │ │ │ ├── simple.conf │ │ │ │ ├── simple_fp.conf │ │ │ │ ├── slashed_alias.conf │ │ │ │ └── slashed_alias_fp.conf │ │ │ ├── host_spoofing/ │ │ │ │ ├── config.json │ │ │ │ ├── http_fp.conf │ │ │ │ ├── http_host.conf │ │ │ │ ├── http_host_diff_case.conf │ │ │ │ └── some_arg.conf │ │ │ ├── http_splitting/ │ │ │ │ ├── add_header_uri.conf │ │ │ │ ├── config.json │ │ │ │ ├── dont_report_not_resolved_var_fp.conf │ │ │ │ ├── proxy_from_location_var.conf │ │ │ │ ├── proxy_from_location_var_var.conf │ │ │ │ ├── proxy_from_location_var_var_fp.conf │ │ │ │ ├── proxy_from_location_var_var_var.conf │ │ │ │ ├── proxy_pass_cr_fp.conf │ │ │ │ ├── proxy_pass_ducument_uri.conf │ │ │ │ ├── proxy_pass_lf.conf │ │ │ │ ├── proxy_set_header_ducument_uri.conf │ │ │ │ ├── return_403_fp.conf │ │ │ │ ├── return_request_uri_fp.conf │ │ │ │ ├── rewrite_extract_fp.conf │ │ │ │ ├── rewrite_uri.conf │ │ │ │ └── rewrite_uri_after_var.conf │ │ │ ├── origins/ │ │ │ │ ├── config.json │ │ │ │ ├── metrika.conf │ │ │ │ ├── origin.conf │ │ │ │ ├── origin_fp.conf │ │ │ │ ├── origin_https.conf │ │ │ │ ├── origin_https_fp.conf │ │ │ │ ├── origin_w_slash_anchored_fp.conf │ │ │ │ ├── origin_w_slash_fp.conf │ │ │ │ ├── origin_wo_slash.conf │ │ │ │ ├── referer.conf │ │ │ │ ├── referer_fp.conf │ │ │ │ ├── referer_subdomain.conf │ │ │ │ ├── referer_subdomain_fp.conf │ │ │ │ ├── structure_dot.conf │ │ │ │ ├── structure_fp.conf │ │ │ │ ├── structure_prefix.conf │ │ │ │ ├── structure_suffix.conf │ │ │ │ └── webvisor.conf │ │ │ ├── ssrf/ │ │ │ │ ├── config.json │ │ │ │ ├── have_internal_fp.conf │ │ │ │ ├── host_w_const_start.conf │ │ │ │ ├── host_w_const_start_arg.conf │ │ │ │ ├── not_host_var_fp.conf │ │ │ │ ├── request_uri_fp.conf │ │ │ │ ├── request_uri_var_fp.conf │ │ │ │ ├── scheme_var.conf │ │ │ │ ├── single_var.conf │ │ │ │ ├── used_arg.conf │ │ │ │ ├── vars_from_loc.conf │ │ │ │ └── with_const_scheme.conf │ │ │ └── valid_referers/ │ │ │ ├── config.json │ │ │ ├── none_first.conf │ │ │ ├── none_last.conf │ │ │ ├── none_middle.conf │ │ │ └── wo_none_fp.conf │ │ └── test_simply.py │ └── utils.py └── tox.ini ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ # Byte-compiled / optimized / DLL files **/__pycache__/ **/*.py[cod] # C extensions ***/*.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml cover # Translations **/*.mo **/*.pot # PyBuilder target/ venv/ venv3/ .idea/ # 100% unnecessary for docker image .* *.md docs rpm Dockerfile ================================================ FILE: .editorconfig ================================================ root = true [*] end_of_file = lf insert_final_newline = true [*.{py,j2}] charset = utf-8 [*.py] indent_style = space indent_size = 4 [Makefile] indent_style = tab [.travis.yml] indent_style = space indent_size = 2 ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec !rpm/*.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover cover # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ venv/ venv3/ .idea/ ================================================ FILE: .travis.yml ================================================ language: python dist: xenial sudo: false python: - "2.7" - "3.5" - "3.6" - "3.7" - "pypy" - "pypy3" install: - pip install -r requirements.txt - pip install -r requirements.dev.txt script: - nosetests --with-coverage --cover-package gixy -v - if [[ $TRAVIS_PYTHON_VERSION != '2.6' ]]; then flake8 --max-line-length=120 setup.py gixy; fi ================================================ FILE: AUTHORS ================================================ The following authors have created the source code of "Gixy" published and distributed by YANDEX LLC as the owner: Andrew Krasichkov ================================================ FILE: CONTRIBUTING.md ================================================ # Notice to external contributors ## General info Hello! In order for us (YANDEX LLC) to accept patches and other contributions from you, you will have to adopt our Yandex Contributor License Agreement (the “**CLA**”). The current version of the CLA you may find here: 1) https://yandex.ru/legal/cla/?lang=en (in English) and 2) https://yandex.ru/legal/cla/?lang=ru (in Russian). By adopting the CLA, you state the following: * You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA, * You has read the terms and conditions of the CLA and agree with them in full, * You are legally able to provide and license your contributions as stated, * We may use your contributions for our open source projects and for any other our project too, * We rely on your assurances concerning the rights of third parties in relation to your contributes. If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you has already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA. ## Provide contributions If you have already adopted terms and conditions of the CLA, you are able to provide your contributes. When you submit your pull request, please add the following information into it: ` I hereby agree to the terms of the CLA available at: [link]). ` Replace the bracketed text as follows: * [link] is the link at the current version of the CLA (you may add here a link https://yandex.ru/legal/cla/?lang=en (in English) or a link https://yandex.ru/legal/cla/?lang=ru (in Russian). It is enough to provide us such notification at once. ## Other questions If you have any questions, please mail us at opensource@yandex-team.ru. ================================================ FILE: Dockerfile ================================================ FROM python:alpine ADD . /src WORKDIR /src RUN python3 setup.py install ENTRYPOINT ["gixy"] ================================================ FILE: LICENSE ================================================ (C) YANDEX LLC, 2017 Mozilla Public License Version 2.0 ================================== 1. Definitions -------------- 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means (a) that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or (b) that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: (a) any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or (b) any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions -------------------------------- 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and (b) under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: (a) for any code that a Contributor has removed from Covered Software; or (b) for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or (c) under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities ------------------- 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: (a) such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and (b) You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation --------------------------------------------------- If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination -------------- 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. ************************************************************************ * * * 6. Disclaimer of Warranty * * ------------------------- * * * * Covered Software is provided under this License on an "as is" * * basis, without warranty of any kind, either expressed, implied, or * * statutory, including, without limitation, warranties that the * * Covered Software is free of defects, merchantable, fit for a * * particular purpose or non-infringing. The entire risk as to the * * quality and performance of the Covered Software is with You. * * Should any Covered Software prove defective in any respect, You * * (not any Contributor) assume the cost of any necessary servicing, * * repair, or correction. This disclaimer of warranty constitutes an * * essential part of this License. No use of any Covered Software is * * authorized under this License except under this disclaimer. * * * ************************************************************************ ************************************************************************ * * * 7. Limitation of Liability * * -------------------------- * * * * Under no circumstances and under no legal theory, whether tort * * (including negligence), contract, or otherwise, shall any * * Contributor, or anyone who distributes Covered Software as * * permitted above, be liable to You for any direct, indirect, * * special, incidental, or consequential damages of any character * * including, without limitation, damages for lost profits, loss of * * goodwill, work stoppage, computer failure or malfunction, or any * * and all other commercial damages or losses, even if such party * * shall have been informed of the possibility of such damages. This * * limitation of liability shall not apply to liability for death or * * personal injury resulting from such party's negligence to the * * extent applicable law prohibits such limitation. Some * * jurisdictions do not allow the exclusion or limitation of * * incidental or consequential damages, so this exclusion and * * limitation may not apply to You. * * * ************************************************************************ 8. Litigation ------------- Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous ---------------- This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License --------------------------- 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. ================================================ FILE: MANIFEST.in ================================================ include gixy/formatters/templates/* graft tests ================================================ FILE: Makefile ================================================ .PHONY: all build publish all: build publish build: python setup.py bdist_wheel --universal sdist publish: twine upload dist/gixy-`grep -oP "(?<=version\s=\s['\"])[^'\"]*(?=['\"])" gixy/__init__.py`* ================================================ FILE: README.RU.md ================================================ GIXY ==== [![Mozilla Public License 2.0](https://img.shields.io/github/license/yandex/gixy.svg?style=flat-square)](https://github.com/yandex/gixy/blob/master/LICENSE) [![Build Status](https://img.shields.io/travis/yandex/gixy.svg?style=flat-square)](https://travis-ci.org/yandex/gixy) [![Your feedback is greatly appreciated](https://img.shields.io/maintenance/yes/2018.svg?style=flat-square)](https://github.com/yandex/gixy/issues/new) [![GitHub issues](https://img.shields.io/github/issues/yandex/gixy.svg?style=flat-square)](https://github.com/yandex/gixy/issues) [![GitHub pull requests](https://img.shields.io/github/issues-pr/yandex/gixy.svg?style=flat-square)](https://github.com/yandex/gixy/pulls) # Overview Gixy — это утилита для анализа конфигурации Nginx. Большей частью служит для обнаружения проблем безопасности, но может искать и иные ошибки. Официально поддерживаются версии Python 2.7, 3.5, 3.6 и 3.7   # Что умеет На текущий момент Gixy способна обнаружить: * [[ssrf] Server Side Request Forgery](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/ssrf.md) * [[http_splitting] HTTP Splitting](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/httpsplitting.md) * [[origins] Проблемы валидации referrer/origin](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/origins.md) * [[add_header_redefinition] Переопределение "вышестоящих" заголовков ответа директивой "add_header"](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/addheaderredefinition.md) * [[host_spoofing] Подделка заголовка запроса Host](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/hostspoofing.md) * [[valid_referers] none in valid_referers](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/validreferers.md) * [[add_header_multiline] Многострочные заголовоки ответа](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/addheadermultiline.md) * [[alias_traversal] Path traversal при использовании alias](https://github.com/yandex/gixy/blob/master/docs/ru/plugins/aliastraversal.md) Проблемы, которым Gixy только учится можно найти в [Issues с меткой "new plugin"](https://github.com/yandex/gixy/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+plugin%22) # Установка Наиболее простой способ установки Gixy - воспользоваться pip для установки из [PyPI](https://pypi.python.org/pypi/gixy): ```bash pip install gixy ``` # Использование После установки должна стать доступна консольная утилита `gixy`. По умолчанию Gixy ищет конфигурацию по стандартному пути `/etc/nginx/nginx.conf`, однако вы можете указать специфичное расположение: ``` $ gixy /etc/nginx/nginx.conf ==================== Results =================== Problem: [http_splitting] Possible HTTP-Splitting vulnerability. Description: Using variables that can contain "\n" may lead to http injection. Additional info: https://github.com/yandex/gixy/wiki/ru/httpsplitting Reason: At least variable "$action" can contain "\n" Pseudo config: include /etc/nginx/sites/default.conf; server { location ~ /v1/((?[^.]*)\.json)?$ { add_header X-Action $action; } } ==================== Summary =================== Total issues: Unspecified: 0 Low: 0 Medium: 0 High: 1 ``` Gixy умеет обрабатывать директиву `include` и попробует максимально корректно обработать все зависимости, если что-то пошло не так можно попробовать запустить `gixy` с флагом `-d` для вывода дополнительной информации. Все доступные опции: ``` $ gixy -h usage: gixy [-h] [-c CONFIG_FILE] [--write-config CONFIG_OUTPUT_PATH] [-v] [-l] [-f {console,text,json}] [-o OUTPUT_FILE] [-d] [--tests TESTS] [--skips SKIPS] [--disable-includes] [--origins-domains domains] [--origins-https-only https_only] [--add-header-redefinition-headers headers] [nginx.conf] Gixy - a Nginx configuration [sec]analyzer positional arguments: nginx.conf Path to nginx.conf, e.g. /etc/nginx/nginx.conf optional arguments: -h, --help show this help message and exit -c CONFIG_FILE, --config CONFIG_FILE config file path --write-config CONFIG_OUTPUT_PATH takes the current command line args and writes them out to a config file at the given path, then exits -v, --version show program's version number and exit -l, --level Report issues of a given severity level or higher (-l for LOW, -ll for MEDIUM, -lll for HIGH) -f {console,text,json}, --format {console,text,json} Specify output format -o OUTPUT_FILE, --output OUTPUT_FILE Write report to file -d, --debug Turn on debug mode --tests TESTS Comma-separated list of tests to run --skips SKIPS Comma-separated list of tests to skip --disable-includes Disable "include" directive processing plugins options: --origins-domains domains Default: * --origins-https-only https_only Default: False --add-header-redefinition-headers headers Default: content-security-policy,x-xss- protection,x-frame-options,x-content-type- options,strict-transport-security,cache-control available plugins: host_spoofing add_header_multiline http_splitting valid_referers origins add_header_redefinition ssrf ``` # Contributing Contributions to Gixy are always welcome! You can help us in different ways: * Open an issue with suggestions for improvements and errors you're facing; * Fork this repository and submit a pull request; * Improve the documentation. Code guidelines: * Python code style should follow [pep8](https://www.python.org/dev/peps/pep-0008/) standards whenever possible; * Pull requests with new plugins must have unit tests for it. ================================================ FILE: README.md ================================================ GIXY ==== [![Mozilla Public License 2.0](https://img.shields.io/github/license/yandex/gixy.svg?style=flat-square)](https://github.com/yandex/gixy/blob/master/LICENSE) [![Build Status](https://img.shields.io/travis/yandex/gixy.svg?style=flat-square)](https://travis-ci.org/yandex/gixy) [![Your feedback is greatly appreciated](https://img.shields.io/maintenance/yes/2019.svg?style=flat-square)](https://github.com/yandex/gixy/issues/new) [![GitHub issues](https://img.shields.io/github/issues/yandex/gixy.svg?style=flat-square)](https://github.com/yandex/gixy/issues) [![GitHub pull requests](https://img.shields.io/github/issues-pr/yandex/gixy.svg?style=flat-square)](https://github.com/yandex/gixy/pulls) # Overview Gixy is a tool to analyze Nginx configuration. The main goal of Gixy is to prevent security misconfiguration and automate flaw detection. Currently supported Python versions are 2.7, 3.5, 3.6 and 3.7. Disclaimer: Gixy is well tested only on GNU/Linux, other OSs may have some issues. # What it can do Right now Gixy can find: * [[ssrf] Server Side Request Forgery](https://github.com/yandex/gixy/blob/master/docs/en/plugins/ssrf.md) * [[http_splitting] HTTP Splitting](https://github.com/yandex/gixy/blob/master/docs/en/plugins/httpsplitting.md) * [[origins] Problems with referrer/origin validation](https://github.com/yandex/gixy/blob/master/docs/en/plugins/origins.md) * [[add_header_redefinition] Redefining of response headers by "add_header" directive](https://github.com/yandex/gixy/blob/master/docs/en/plugins/addheaderredefinition.md) * [[host_spoofing] Request's Host header forgery](https://github.com/yandex/gixy/blob/master/docs/en/plugins/hostspoofing.md) * [[valid_referers] none in valid_referers](https://github.com/yandex/gixy/blob/master/docs/en/plugins/validreferers.md) * [[add_header_multiline] Multiline response headers](https://github.com/yandex/gixy/blob/master/docs/en/plugins/addheadermultiline.md) * [[alias_traversal] Path traversal via misconfigured alias](https://github.com/yandex/gixy/blob/master/docs/en/plugins/aliastraversal.md) You can find things that Gixy is learning to detect at [Issues labeled with "new plugin"](https://github.com/yandex/gixy/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+plugin%22) # Installation Gixy is distributed on [PyPI](https://pypi.python.org/pypi/gixy). The best way to install it is with pip: ```bash pip install gixy ``` Run Gixy and check results: ```bash gixy ``` # Usage By default Gixy will try to analyze Nginx configuration placed in `/etc/nginx/nginx.conf`. But you can always specify needed path: ``` $ gixy /etc/nginx/nginx.conf ==================== Results =================== Problem: [http_splitting] Possible HTTP-Splitting vulnerability. Description: Using variables that can contain "\n" may lead to http injection. Additional info: https://github.com/yandex/gixy/blob/master/docs/ru/plugins/httpsplitting.md Reason: At least variable "$action" can contain "\n" Pseudo config: include /etc/nginx/sites/default.conf; server { location ~ /v1/((?[^.]*)\.json)?$ { add_header X-Action $action; } } ==================== Summary =================== Total issues: Unspecified: 0 Low: 0 Medium: 0 High: 1 ``` Or skip some tests: ``` $ gixy --skips http_splitting /etc/nginx/nginx.conf ==================== Results =================== No issues found. ==================== Summary =================== Total issues: Unspecified: 0 Low: 0 Medium: 0 High: 0 ``` Or something else, you can find all other `gixy` arguments with the help command: `gixy --help` ## Docker usage Gixy is available as a Docker image [from the Docker hub](https://hub.docker.com/r/yandex/gixy/). To use it, mount the configuration that you want to analyse as a volume and provide the path to the configuration file when running the Gixy image. ``` $ docker run --rm -v `pwd`/nginx.conf:/etc/nginx/conf/nginx.conf yandex/gixy /etc/nginx/conf/nginx.conf ``` If you have an image that already contains your nginx configuration, you can share the configuration with the Gixy container as a volume. ``` $ docker run --rm --name nginx -d -v /etc/nginx nginx:alpinef68f2833e986ae69c0a5375f9980dc7a70684a6c233a9535c2a837189f14e905 $ docker run --rm --volumes-from nginx yandex/gixy /etc/nginx/nginx.conf ==================== Results =================== No issues found. ==================== Summary =================== Total issues: Unspecified: 0 Low: 0 Medium: 0 High: 0 ``` # Contributing Contributions to Gixy are always welcome! You can help us in different ways: * Open an issue with suggestions for improvements and errors you're facing; * Fork this repository and submit a pull request; * Improve the documentation. Code guidelines: * Python code style should follow [pep8](https://www.python.org/dev/peps/pep-0008/) standards whenever possible; * Pull requests with new plugins must have unit tests for it. ================================================ FILE: docs/en/plugins/addheadermultiline.md ================================================ # [add_header_multiline] Multiline response headers You should avoid using multiline response headers, because: * they are deprecated (see [RFC 7230](https://tools.ietf.org/html/rfc7230#section-3.2.4)); * some HTTP-clients and web browser never supported them (e.g. IE/Edge/Nginx). ## How can I find it? Misconfiguration example: ```nginx # http://nginx.org/en/docs/http/ngx_http_headers_module.html#add_header add_header Content-Security-Policy " default-src: 'none'; script-src data: https://yastatic.net; style-src data: https://yastatic.net; img-src data: https://yastatic.net; font-src data: https://yastatic.net;"; # https://www.nginx.com/resources/wiki/modules/headers_more/ more_set_headers -t 'text/html text/plain' 'X-Foo: Bar multiline'; ``` ## What can I do? The only solution is to never use multiline response headers. ================================================ FILE: docs/en/plugins/addheaderredefinition.md ================================================ # [add_header_redefinition] Redefining of response headers by "add_header" directive Unfortunately, many people don't know how the inheritance of directives works. Most often this leads to misuse of the `add_header` directive while trying to add a new response header on the nested level. This feature is mentioned in Nginx [docs](http://nginx.org/en/docs/http/ngx_http_headers_module.html#add_header): > There could be several `add_header` directives. These directives are inherited from the previous level if and only if there are no `add_header` directives defined on the current level. The logic is quite simple: if you set headers at one level (for example, in `server` section) and then at a lower level (let's say `location`) you set some other headers, then the first headers will discarded. It's easy to check: - Configuration: ```nginx server { listen 80; add_header X-Frame-Options "DENY" always; location / { return 200 "index"; } location /new-headers { # Add special cache control add_header Cache-Control "no-cache, no-store, max-age=0, must-revalidate" always; add_header Pragma "no-cache" always; return 200 "new-headers"; } } ``` - Request to location `/` (`X-Frame-Options` header is in server response): ```http GET / HTTP/1.0 HTTP/1.1 200 OK Server: nginx/1.10.2 Date: Mon, 09 Jan 2017 19:28:33 GMT Content-Type: application/octet-stream Content-Length: 5 Connection: close X-Frame-Options: DENY index ``` - Request to location `/new-headers` (headers `Cache-Control` and `Pragma` are present, but there's no `X-Frame-Options`): ```http GET /new-headers HTTP/1.0 HTTP/1.1 200 OK Server: nginx/1.10.2 Date: Mon, 09 Jan 2017 19:29:46 GMT Content-Type: application/octet-stream Content-Length: 11 Connection: close Cache-Control: no-cache, no-store, max-age=0, must-revalidate Pragma: no-cache new-headers ``` ## What can I do? There are several ways to solve this problem: - duplicate important headers; - set all headers at one level (`server` section is a good choice) - use [ngx_headers_more](https://www.nginx.com/resources/wiki/modules/headers_more/) module. ================================================ FILE: docs/en/plugins/aliastraversal.md ================================================ # [alias_traversal] Path traversal via misconfigured alias The [alias](https://nginx.ru/en/docs/http/ngx_http_core_module.html#alias) directive is used to replace path of the specified location. For example, with the following configuration: ```nginx location /i/ { alias /data/w3/images/; } ``` on request of `/i/top.gif`, the file `/data/w3/images/top.gif` will be sent. But, if the location doesn't ends with directory separator (i.e. `/`): ```nginx location /i { alias /data/w3/images/; } ``` on request of `/i../app/config.py`, the file `/data/w3/app/config.py` will be sent. In other words, the incorrect configuration of `alias` could allow an attacker to read file stored outside the target folder. ## What can I do? It's pretty simple: - you must find all the `alias` directives; - make sure that the parent prefixed location ends with directory separator. - or if you want to map a single file make sure the location starts with a `=`, e.g `=/i.gif` instead of `/i.gif`. ================================================ FILE: docs/en/plugins/hostspoofing.md ================================================ # [host_spoofing] Request's Host header forgery Often, an application located behind Nginx needs a correct `Host` header for URL generation (redirects, resources, links in emails etc.). Spoofing of this header, may leads to a variety of problems, from phishing to SSRF. > Notice: your application may also use the `X-Forwarded-Host` request header for this functionality. > In this case you have to ensure the header is set correctly; ## How can I find it? Most of the time it's a result of using `$http_host` variable instead of `$host`. And they are quite different: * `$host` - host in this order of precedence: host name from the request line, or host name from the “Host” request header field, or the server name matching a request; * `$http_host` - "Host" request header. Config sample: ```nginx location @app { proxy_set_header Host $http_host; # Other proxy params proxy_pass http://backend; } ``` ## What can I do? Luckily, all is quite obvious: * list all the correct server names in `server name` directive; * always use `$host` instead of `$http_host`. ## Additional info * [Host of Troubles Vulnerabilities](https://hostoftroubles.com/) * [Practical HTTP Host header attacks](http://www.skeletonscribe.net/2013/05/practical-http-host-header-attacks.html) ================================================ FILE: docs/en/plugins/httpsplitting.md ================================================ # [http_splitting] HTTP Splitting HTTP Splitting - attack that use improper input validation. It usually targets web application located behind Nginx (HTTP Request Splitting) or its users (HTTP Response Splitting). Vulnerability is created when an attacker can insert newline character `\n` or `\r` into request or into response, created by Nginx. ## How can I find it? You should always pay attention to: - variables that are used in directives, responsible for the request creation (for they may contain CRLF), e.g. `rewrite`, `return`, `add_header`, `proxy_set_header` or `proxy_pass`; - `$uri` and `$document_uri` variables, and in which directives they are used, because these variables contain decoded URL-encoded value; - variables, that are selected from an exclusive range, e.g. `(?P[^.]+)`. An example of configuration that contains variable, selected from an exclusive range: ```nginx server { listen 80 default; location ~ /v1/((?[^.]*)\.json)?$ { add_header X-Action $action; return 200 "OK"; } } ``` Exploitation: ```http GET /v1/see%20below%0d%0ax-crlf-header:injected.json HTTP/1.0 Host: localhost HTTP/1.1 200 OK Server: nginx/1.11.10 Date: Mon, 13 Mar 2017 21:21:29 GMT Content-Type: application/octet-stream Content-Length: 2 Connection: close X-Action: see below x-crlf-header:injected OK ``` As you can see, an attacker could add `x-crlf-header: injected` response header. This was possible because: - `add_header` doesn't encode or validate input value on suggestion that author knows about the consequences; - the path value is normalize before location processing; - `$action` value was given from a regexp with an exclusive range: `[^.]*`; - as the result, `$action` value is equal to `see below\r\nx-crlf-header:injected` and on its use the response header was added. ## What can I do? - try to use safe variables, e.g. `$request_uri` instead of `$uri`; - forbid the use of the new line symbol in the exclusive range by using `/some/(?[^/\s]+)` instead of `/some/(?[^/]+` - it could be a good idea to validate `$uri` (only if you're sure you know what are you getting into). ================================================ FILE: docs/en/plugins/origins.md ================================================ # [origins] Problems with referrer/origin validation It's not unusual to use regex for `Referer` or `Origin` headers validation. Often it is needed for setting the `X-Frame-Options` header (ClickJacking protection) or Cross-Origin Resource Sharing. The most common errors with this configuration are: - regex errors; - allow 3rd-party origins. > Notice: by default Gixy doesn't check regexes for 3rd-party origins matching. > You can pass a list of trusted domains by using the option `--origins-domains example.com,foo.bar` ## How can I find it? "Eazy"-breezy: - you have to find all the `if` directives that are in charge of `$http_origin` or `$http_referer` check; - make sure your regexes are a-ok. Misconfiguration example: ```nginx if ($http_origin ~* ((^https://www\.yandex\.ru)|(^https://ya\.ru)$)) { add_header 'Access-Control-Allow-Origin' "$http_origin"; add_header 'Access-Control-Allow-Credentials' 'true'; } ``` TODO(buglloc): cover typical regex-writing problems TODO(buglloc): Regex Ninja? ## What can I do? - fix your regex or toss it away :) - if you use regex validation for `Referer` request header, then, possibly (not 100%), you could use [ngx_http_referer_module](http://nginx.org/en/docs/http/ngx_http_referer_module.htmll); - sometimes it is much better to use the `map` directive without any regex at all. ================================================ FILE: docs/en/plugins/ssrf.md ================================================ # [ssrf] Server Side Request Forgery Server Side Request Forgery - attack that forces a server to perform arbitrary requests (from Nginx in our case). It's possible when an attacker controls the address of a proxied server (second argument of the `proxy_pass` directive). ## How can I find it? There are two types of errors that make a server vulnerable: - lack of the [internal](http://nginx.org/en/docs/http/ngx_http_core_module.html#internal) directive. It is used to point out a location that can be used for internal requests only; - unsafe internal redirection. ### Lack of the internal directive Classical misconfiguration, based on lack of the `internal` directive, that makes SSRF possible: ```nginx location ~ /proxy/(.*)/(.*)/(.*)$ { proxy_pass $1://$2/$3; } ``` An attacker has complete control over the proxied address, that makes sending requests on behalf of Nginx possible. ### Unsafe internal redirection Let's say you have internal location in your config and that location uses some request data as proxied server's address. E.g.: ```nginx location ~* ^/internal-proxy/(?https?)/(?.*?)/(?.*)$ { internal; proxy_pass $proxy_proto://$proxy_host/$proxy_path ; proxy_set_header Host $proxy_host; } ``` According to Nginx docs, internal requests are the following: > - requests redirected by the **error_page**, index, random_index, and **try_files** directives; > - requests redirected by the “X-Accel-Redirect” response header field from an upstream server; > - subrequests formed by the “include virtual” command of the ngx_http_ssi_module module and by the ngx_http_addition_module module directives; > - requests changed by the **rewrite** directive Accordingly, any unsafe rewrite allows an attacker to make an internal request and control a proxied server's address. Misconfiguration example: ```nginx rewrite ^/(.*)/some$ /$1/ last; location ~* ^/internal-proxy/(?https?)/(?.*?)/(?.*)$ { internal; proxy_pass $proxy_proto://$proxy_host/$proxy_path ; proxy_set_header Host $proxy_host; } ``` ## What can I do? There are several rules you better follow when writing such configurations: - use only "internal locations" for proxying; - if possible, forbid user data transmission; - protect proxied server's address: * if the quantity of proxied hosts is limited (when you have S3 or smth), you better hardcode them and choose them with `map` or do it some other way; * if you can' list all possible hosts to proxy, you should sign the address. ================================================ FILE: docs/en/plugins/validreferers.md ================================================ # [valid_referers] none in valid_referers Module [ngx_http_referer_module](http://nginx.org/en/docs/http/ngx_http_referer_module.html) allows to block the access to service for requests with wrong `Referer` value. It's often used for setting `X-Frame-Options` header (ClickJacking protection), but there may be other cases. Typical problems with this module's config: * use of `server_names` with bad server name (`server_name` directive); * too broad and/or bad regexes; * use of `none`. > Notice: at the moment, Gixy can only detect the use of `none` as a valid referer. ## Why none is bad? According to [docs](http://nginx.org/ru/docs/http/ngx_http_referer_module.html#valid_referers): > `none` - the “Referer” field is missing in the request header; Still, it's important to remember that any resource can make user's browser to make a request without a `Referer` request header. E.g.: - in case of redirect from HTTPS to HTTP; - by setting up the [Referrer Policy](https://www.w3.org/TR/referrer-policy/); - a request with opaque origin, `data:` scheme, for example. So, by using `none` as a valid referer, you nullify any attemps in refferer validation. ================================================ FILE: docs/ru/plugins/addheadermultiline.md ================================================ # [add_header_multiline] Многострочные заголовоки ответа Многострочных заголовков ответа стоит избегать по нескольким причинам: * они признаны устаревшими (см. [RFC 7230](https://tools.ietf.org/html/rfc7230#section-3.2.4)); * они никогда не поддерживались многими HTTP-клиентами и браузерами. Например, IE/Edge/Nginx. ## Как самостоятельно обнаружить? Пример плохой конфигурации: ```nginx # http://nginx.org/ru/docs/http/ngx_http_headers_module.html#add_header add_header Content-Security-Policy " default-src: 'none'; script-src data: https://yastatic.net; style-src data: https://yastatic.net; img-src data: https://yastatic.net; font-src data: https://yastatic.net;"; # https://www.nginx.com/resources/wiki/modules/headers_more/ more_set_headers -t 'text/html text/plain' 'X-Foo: Bar multiline'; ``` ## Что делать? Единственный выход - отказ от многострочных заголовок ответа. ================================================ FILE: docs/ru/plugins/addheaderredefinition.md ================================================ # [add_header_redefinition] Переопределение "вышестоящих" заголовков ответа директивой "add_header" К сожалению, многие считают что с помощью директивы `add_header` можно произвольно доопределять заголовки ответа. Это не так, о чем сказано в [документации](http://nginx.org/ru/docs/http/ngx_http_headers_module.html#add_header) к Nginx: > Директив `add_header` может быть несколько. Директивы наследуются с предыдущего уровня при условии, что на данном уровне не описаны свои директивы `add_header`. К слову, так работает наследование большинства директив в nginx'е. Если вы задаёте что-то на каком-то уровне конфигурации (например, в локейшене), то наследования с предыдущих уровней (например, с http секции) - не будет. В этом довольно легко убедится: - Конфигурация: ```nginx server { listen 80; add_header X-Frame-Options "DENY" always; location / { return 200 "index"; } location /new-headers { # Add special cache control add_header Cache-Control "no-cache, no-store, max-age=0, must-revalidate" always; add_header Pragma "no-cache" always; return 200 "new-headers"; } } ``` - Запрос к локейшену `/` (заголовок `X-Frame-Options` есть в ответе сервера): ```http GET / HTTP/1.0 HTTP/1.1 200 OK Server: nginx/1.10.2 Date: Mon, 09 Jan 2017 19:28:33 GMT Content-Type: application/octet-stream Content-Length: 5 Connection: close X-Frame-Options: DENY index ``` - Запрос к локейшену `/new-headers` (есть заголовки `Cache-Control` и `Pragma`, но нет `X-Frame-Options`): ```http GET /new-headers HTTP/1.0 HTTP/1.1 200 OK Server: nginx/1.10.2 Date: Mon, 09 Jan 2017 19:29:46 GMT Content-Type: application/octet-stream Content-Length: 11 Connection: close Cache-Control: no-cache, no-store, max-age=0, must-revalidate Pragma: no-cache new-headers ``` ## Что делать? Существует несколько способов решить эту проблему: - продублировать важные заголовки; - устанавливать заголовки на одном уровне, например, в серверной секции; - использовать модуль [ngx_headers_more](https://www.nginx.com/resources/wiki/modules/headers_more/). Каждый из способов имеет свои преимущества и недостатки, какой предпочесть зависит от ваших потребностей. ================================================ FILE: docs/ru/plugins/aliastraversal.md ================================================ # [alias_traversal] Path traversal при использовании alias Директива [alias](https://nginx.ru/ru/docs/http/ngx_http_core_module.html#alias) используется для замены пути указанного локейшена. К примеру, для конфигурации: ```nginx location /i/ { alias /data/w3/images/; } ``` на запрос `/i/top.gif` будет отдан файл `/data/w3/images/top.gif`. Однако, если локейшен не оканчивается разделителем директорий (`/`): ```nginx location /i { alias /data/w3/images/; } ``` то на запрос `/i../app/config.py` будет отдан файл `/data/w3/app/config.py`. Иными словами, не корректная конфигурация `alias` может позволить злоумышленнику прочесть файл за пределами целевой директории. ## Что делать? Все довольно просто: - необходимо найти все директивы `alias`; - убедится что вышестоящий префиксный локейшен оканчивается на `/`. ================================================ FILE: docs/ru/plugins/hostspoofing.md ================================================ # [host_spoofing] Подделка заголовка запроса Host Зачастую, приложению, стоящему за Nginx, необходимо передать корректный заголовок `Host` для корректной генерации различных URL-адресов (редиректы, ресурсы, ссылки в письмах и т.д.). Возможность его подмены злоумышленником может повлечь множестве проблем от фишинговых атак до SSRF, поэтому следует избегать таких ситуаций. > Возможно, ваше приложение так же ориентируется на заголовок запроса `X-Forwarded-Host`. > В этом случае вам необходимо самостоятельно позаботится о его корректной установке при проксировании. ## Как самостоятельно обнаружить? Чаще всего эта проблема возникает в результате использования переменной `$http_host` вместо `$host`. Несмотря на их схожесть, они сильно отличаются: * `$host` - хост в порядке приоритета: имя хоста из строки запроса, или имя хоста из заголовка `Host` заголовка запроса, или имя сервера, соответствующего запросу; * `$http_host` - заголовок запроса "Host". Пример такой конфигурации: ```nginx location @app { proxy_set_header Host $http_host; # Other proxy params proxy_pass http://backend; } ``` ## Что делать? К счастью, все довольно очевидно: * перечислить корректные имена сервера в директиве `server_name`; * всегда использовать переменную `$host`, вместо `$http_host`. ## Дополнительная информация * [Host of Troubles Vulnerabilities](https://hostoftroubles.com/) * [Practical HTTP Host header attacks](http://www.skeletonscribe.net/2013/05/practical-http-host-header-attacks.html) ================================================ FILE: docs/ru/plugins/httpsplitting.md ================================================ # [http_splitting] HTTP Splitting HTTP Splitting - уязвимость, возникающая из-за неправильной обработки входных данных. Зачастую может быть для атак на приложение стоящее за Nginx (HTTP Request Splitting) или на клиентов приложения (HTTP Response Splitting). Уязвимость возникает в случае, когда атакующий может внедрить символ перевода строки `\n` или `\r` в запрос или ответ формируемый Nginx. ## Как самостоятельно обнаружить? При анализе конфигурации всега стоит обращать внимание на: - какие переменные используются в директивах, отвечающих за формирование запросов (могут ли они содержать CRLF), например: `rewrite`, `return`, `add_header`, `proxy_set_header` или `proxy_pass`; - используются ли переменные `$uri` и `$document_uri` и если да, то в каких директивах, т.к. они гарантированно содержат урлдекодированное значение; - переменные, выделенные из групп с исключающим диапазоном: `(?P[^.]+)`. Пример плохой конфигурации с переменной, полученной из группы с исключающим диапазоном: ```nginx server { listen 80 default; location ~ /v1/((?[^.]*)\.json)?$ { add_header X-Action $action; return 200 "OK"; } } ``` Пример эксплуатации данной конфигурации: ```http GET /v1/see%20below%0d%0ax-crlf-header:injected.json HTTP/1.0 Host: localhost HTTP/1.1 200 OK Server: nginx/1.11.10 Date: Mon, 13 Mar 2017 21:21:29 GMT Content-Type: application/octet-stream Content-Length: 2 Connection: close X-Action: see below x-crlf-header:injected OK ``` Из примера видно, что злоумышленник смог добавить заголовок ответа `x-crlf-header: injected`. Это случилось благодаря стечению нескольких обстоятельств: - `add_header` не кодирует/валидирует переданные ему значения, считая что автор знает о последствиях; - значение пути нормализуется перед обработкой локейшена; - переменная `$action` была выделена из группы регулярного выражения с исключающим диапазоном: `[^.]*`; - таким образом, значение переменной `$action` равно `see below\r\nx-crlf-header:injected` и при её использовании в формировании ответа добавился заголовок. ## Что делать? - старайтесь использовать более безопасные переменные, например, `$request_uri` вместо `$uri`; - запретите перевод строки в исключающем диапазоне, например, `/some/(?[^/\s]+)` вместо `/some/(?[^/]+`; - возможно, хорошей идеей будет добавить валидацию `$uri` (только если вы знаете, что делаете). ================================================ FILE: docs/ru/plugins/origins.md ================================================ # [origins] Проблемы валидации referrer/origin Нередко валидация заголовка запроса `Referer` или `Origin` делается при помощи регулярного выражения. Зачастую, это необходимо для условного выставления заголовка `X-Frame-Options` (защита от ClickJacking) или реализации Cross-Origin Resource Sharing. Наиболее распространенно два класса ошибок конфигурации, которые приводят к этой проблеме: - ошибки в составлении регулярного выражения; - разрешение не доверенных third-party доменов. > По умолчанию Gixy не валидирует регулярные выражение на предмет матчинга third-party доменов, т.к. не знает кому можно верить. Передать список доверенных доменом можно при помощи опции `--origins-domains example.com,foo.bar` ## Как самостоятельно обнаружить? Все довольно "просто": - необходимо найти все директивы `if`, которые делают проверку переменной `$http_origin` или `$http_referer`; - убедится что в регулярном выражении нет проблем. Пример плохой конфигурации: ```nginx if ($http_origin ~* ((^https://www\.yandex\.ru)|(^https://ya\.ru)$)) { add_header 'Access-Control-Allow-Origin' "$http_origin"; add_header 'Access-Control-Allow-Credentials' 'true'; } ``` TODO(buglloc): описать типичные проблемы при составлении регулярных выражений TODO(buglloc): Regex Ninja? ## Что делать? - исправить регулярное выражение или отказаться от него вовсе :) - если вы проверяете заголовок запроса `Referer` то, возможно (имеются противопоказания), лучшим решением было бы воспользоваться модулем [ngx_http_referer_module](http://nginx.org/ru/docs/http/ngx_http_referer_module.html); - если вы проверяете заголовов запроса `Origin` то, зачастую, лучше использовать `map` и отказаться от регулярных выражений. ================================================ FILE: docs/ru/plugins/ssrf.md ================================================ # [ssrf] Server Side Request Forgery Server Side Request Forgery - уязвимость, позволяющая выполнять различного рода запросы от имени веб-приложения (в нашем случае от имени Nginx). Возникает, когда атакующий может контролировать адрес проксируемого сервера (второй аргумент директивы `proxy_pass`). ## Как самостоятельно обнаружить? Наиболее распространенно два класса ошибок конфигурации, которые приводят к этой проблеме: - отсутствие директивы [internal](http://nginx.org/ru/docs/http/ngx_http_core_module.html#internal). Её смысл заключается в указании того, что определенный location может использоваться только для внутренних запросов; - небезопасное внутреннее перенаправление. ### Отсутствие директивы internal Классический пример уязвимости типа SSRF в виду отсутствия директивы `internal` выглядит следующим образом: ```nginx location ~ /proxy/(.*)/(.*)/(.*)$ { proxy_pass $1://$2/$3; } ``` Злоумышленник, полностью контролируя адрес проксируемого сервера, может выполнять произвольные запросы от имени Nginx. ### Небезопасное внутреннее перенаправление Подразумевается, что в вашей конфигурации есть internal location, которые использует какие-либо данные из запроса в качестве адреса проксируемого сервера. Например: ```nginx location ~* ^/internal-proxy/(?https?)/(?.*?)/(?.*)$ { internal; proxy_pass $proxy_proto://$proxy_host/$proxy_path ; proxy_set_header Host $proxy_host; } ``` Согласно документации Nginx внутренними запросами являются: > - запросы, перенаправленные директивами **error_page**, index, random_index и **try_files**; > - запросы, перенаправленные с помощью поля “X-Accel-Redirect” заголовка ответа вышестоящего сервера; > - подзапросы, формируемые командой “include virtual” модуля ngx_http_ssi_module и директивами модуля ngx_http_addition_module; > - запросы, изменённые директивой **rewrite**.]> Соответственно, любой "неосторожный" реврайт позволит злоумышленнику сделать внутренний запрос и контролировать адрес проксируемого сервера. Пример плохой конфигурации: ```nginx rewrite ^/(.*)/some$ /$1/ last; location ~* ^/internal-proxy/(?https?)/(?.*?)/(?.*)$ { internal; proxy_pass $proxy_proto://$proxy_host/$proxy_path ; proxy_set_header Host $proxy_host; } ``` ## Что делать? Есть несколько правил, которых стоит придерживаться в подобного рода конфигурациях: - использовать только internal location для проксирования; - по возможности запретить передачу пользовательских данных; - обезопасить адрес проксируемого сервера: * если количество проксируемых хостов ограниченно (например, у вас S3), то лучше их захардкодить и выбирать при помощи `map` или иным удобным для вас образом; * если по какой-то причине нет возможности перечислить все возможные хосты для проксирования, его стоит подписать. ================================================ FILE: docs/ru/plugins/validreferers.md ================================================ # [valid_referers] none in valid_referers Модуль [ngx_http_referer_module](http://nginx.org/ru/docs/http/ngx_http_referer_module.html) позволяет блокировать доступ к сервису для запросов с неверными значениями заголовка запроса `Referer`. Зачастую используется для условного выставления заголовка `X-Frame-Options` (защита от ClickJacking), но могут быть и иные случаи. Типичные проблемы при конфигурировании этого модуля: * использование `server_names` при не корректном имени сервера (директива `server_name`); * слишком общие и/или не корректные регулярные выражения; * использование `none`. > На текущий момент, Gixy умеет определять только использование `none` в качестве валидного реферера. ## Чем плох none? Согласно [документации](http://nginx.org/ru/docs/http/ngx_http_referer_module.html#valid_referers): > `none` - поле “Referer” в заголовке запроса отсутствует; Однако, важно помнить, что любой ресурс может заставить браузер пользователя выполнить запрос без заголовка запроса `Referer`, к примеру: - в случае редиректа с HTTPS на HTTP; - указав соответствующую [Referrer Policy](https://www.w3.org/TR/referrer-policy/); - обращение с opaque origin, например, используя схему `data:`. Таким образом, используя `none` в качестве валидного реферера вы сводите на нет любые попытки валидации реферера. ================================================ FILE: gixy/__init__.py ================================================ # flake8: noqa from gixy.core import severity version = '0.1.21' ================================================ FILE: gixy/cli/__init__.py ================================================ ================================================ FILE: gixy/cli/argparser.py ================================================ # flake8: noqa from configargparse import * from six.moves import StringIO from gixy.core.plugins_manager import PluginsManager # used while parsing args to keep track of where they came from _COMMAND_LINE_SOURCE_KEY = 'command_line' _ENV_VAR_SOURCE_KEY = 'environment_variables' _CONFIG_FILE_SOURCE_KEY = 'config_file' _DEFAULTS_SOURCE_KEY = 'defaults' class GixyConfigParser(DefaultConfigFileParser): def get_syntax_description(self): return '' def parse(self, stream): """Parses the keys + values from a config file.""" items = OrderedDict() prefix = '' for i, line in enumerate(stream): line = line.strip() if not line or line[0] in ['#', ';'] or line.startswith('---'): continue if line[0] == '[': prefix = '%s-' % line[1:-1].replace('_', '-') continue white_space = '\\s*' key = '(?P[^:=;#\s]+?)' value = white_space + '[:=\s]' + white_space + '(?P.+?)' comment = white_space + '(?P\\s[;#].*)?' key_only_match = re.match('^' + key + comment + '$', line) if key_only_match: key = key_only_match.group('key') items[key] = 'true' continue key_value_match = re.match('^' + key + value + comment + '$', line) if key_value_match: key = key_value_match.group('key') value = key_value_match.group('value') if value.startswith('[') and value.endswith(']'): # handle special case of lists value = [elem.strip() for elem in value[1:-1].split(',')] items[prefix + key] = value continue raise ConfigFileParserException('Unexpected line %s in %s: %s' % (i, getattr(stream, 'name', 'stream'), line)) return items def serialize(self, items): """Does the inverse of config parsing by taking parsed values and converting them back to a string representing config file contents. """ r = StringIO() for key, value in items.items(): if type(value) == OrderedDict: r.write('\n[%s]\n' % key) r.write(self.serialize(value)) else: value, help = value if help: r.write('; %s\n' % help) r.write('%s = %s\n' % (key, value)) return r.getvalue() class GixyHelpFormatter(HelpFormatter): def format_help(self): manager = PluginsManager() help_message = super(GixyHelpFormatter, self).format_help() if 'plugins options:' in help_message: # Print available blugins _only_ if we prints options for it plugins = '\n'.join('\t' + plugin.__name__ for plugin in manager.plugins_classes) help_message = '{orig}\n\navailable plugins:\n{plugins}\n'.format(orig=help_message, plugins=plugins) return help_message class ArgsParser(ArgumentParser): def get_possible_config_keys(self, action): """This method decides which actions can be set in a config file and what their keys will be. It returns a list of 0 or more config keys that can be used to set the given action's value in a config file. """ keys = [] for arg in action.option_strings: if arg in ['--config', '--write-config', '--version']: continue if any([arg.startswith(2 * c) for c in self.prefix_chars]): keys += [arg[2:], arg] # eg. for '--bla' return ['bla', '--bla'] return keys def get_items_for_config_file_output(self, source_to_settings, parsed_namespace): """Converts the given settings back to a dictionary that can be passed to ConfigFormatParser.serialize(..). Args: source_to_settings: the dictionary described in parse_known_args() parsed_namespace: namespace object created within parse_known_args() Returns: an OrderedDict where keys are strings and values are either strings or lists """ config_file_items = OrderedDict() for source, settings in source_to_settings.items(): if source == _COMMAND_LINE_SOURCE_KEY: _, existing_command_line_args = settings[''] for action in self._actions: config_file_keys = self.get_possible_config_keys(action) if config_file_keys and not action.is_positional_arg and \ already_on_command_line(existing_command_line_args, action.option_strings): value = getattr(parsed_namespace, action.dest, None) if value is not None: if type(value) is bool: value = str(value).lower() if ':' in action.dest: section, key = action.dest.split(':', 2) key = key.replace('_', '-') if section not in config_file_items: config_file_items[section] = OrderedDict() config_file_items[section][key] = (value, action.help) else: config_file_items[config_file_keys[0]] = (value, action.help) elif source.startswith(_CONFIG_FILE_SOURCE_KEY): for key, (action, value) in settings.items(): if ':' in action.dest: section, key = action.dest.split(':', 2) key = key.replace('_', '-') if section not in config_file_items: config_file_items[section] = OrderedDict() config_file_items[section][key] = (value, action.help) else: config_file_items[key] = (value, action.help) return config_file_items def create_parser(): return ArgsParser( description='Gixy - a Nginx configuration [sec]analyzer\n\n', formatter_class=GixyHelpFormatter, config_file_parser_class=GixyConfigParser, auto_env_var_prefix='GIXY_', add_env_var_help=False, default_config_files=['/etc/gixy/gixy.cfg', '~/.config/gixy/gixy.conf'], args_for_setting_config_path=['-c', '--config'], args_for_writing_out_config_file=['--write-config'], add_config_file_help=False ) ================================================ FILE: gixy/cli/main.py ================================================ import os import sys import logging import copy import gixy from gixy.core.manager import Manager as Gixy from gixy.formatters import get_all as formatters from gixy.core.plugins_manager import PluginsManager from gixy.core.config import Config from gixy.cli.argparser import create_parser from gixy.core.exceptions import InvalidConfiguration LOG = logging.getLogger() def _init_logger(debug=False): LOG.handlers = [] log_level = logging.DEBUG if debug else logging.INFO LOG.setLevel(log_level) handler = logging.StreamHandler(sys.stderr) handler.setFormatter(logging.Formatter('[%(module)s]\t%(levelname)s\t%(message)s')) LOG.addHandler(handler) LOG.debug("logging initialized") def _create_plugin_help(option): if isinstance(option, (tuple, list, set)): default = ','.join(list(option)) else: default = str(option) return 'Default: {0}'.format(default) def _get_cli_parser(): parser = create_parser() parser.add_argument('nginx_files', nargs='*', type=str, default=['/etc/nginx/nginx.conf'], metavar='nginx.conf', help='Path to nginx.conf, e.g. /etc/nginx/nginx.conf') parser.add_argument( '-v', '--version', action='version', version='Gixy v{0}'.format(gixy.version)) parser.add_argument( '-l', '--level', dest='level', action='count', default=0, help='Report issues of a given severity level or higher (-l for LOW, -ll for MEDIUM, -lll for HIGH)') default_formatter = 'console' if sys.stdout.isatty() else 'text' available_formatters = formatters().keys() parser.add_argument( '-f', '--format', dest='output_format', choices=available_formatters, default=default_formatter, type=str, help='Specify output format') parser.add_argument( '-o', '--output', dest='output_file', type=str, help='Write report to file') parser.add_argument( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on debug mode') parser.add_argument( '--tests', dest='tests', type=str, help='Comma-separated list of tests to run') parser.add_argument( '--skips', dest='skips', type=str, help='Comma-separated list of tests to skip') parser.add_argument( '--disable-includes', dest='disable_includes', action='store_true', default=False, help='Disable "include" directive processing') group = parser.add_argument_group('plugins options') for plugin_cls in PluginsManager().plugins_classes: name = plugin_cls.__name__ if not plugin_cls.options: continue options = copy.deepcopy(plugin_cls.options) for opt_key, opt_val in options.items(): option_name = '--{plugin}-{key}'.format(plugin=name, key=opt_key).replace('_', '-') dst_name = '{plugin}:{key}'.format(plugin=name, key=opt_key) opt_type = str if isinstance(opt_val, (tuple, list, set)) else type(opt_val) group.add_argument( option_name, metavar=opt_key, dest=dst_name, type=opt_type, help=_create_plugin_help(opt_val) ) return parser def main(): parser = _get_cli_parser() args = parser.parse_args() _init_logger(args.debug) if len(args.nginx_files) == 1 and args.nginx_files[0] != '-': path = os.path.expanduser(args.nginx_files[0]) if not os.path.exists(path): sys.stderr.write('File {path!r} was not found.\nPlease specify correct path to configuration.\n'.format( path=path)) sys.exit(1) try: severity = gixy.severity.ALL[args.level] except IndexError: sys.stderr.write('Too high level filtering. Maximum level: -{0}\n'.format('l' * (len(gixy.severity.ALL) - 1))) sys.exit(1) if args.tests: tests = [x.strip() for x in args.tests.split(',')] else: tests = None if args.skips: skips = [x.strip() for x in args.skips.split(',')] else: skips = None config = Config( severity=severity, output_format=args.output_format, output_file=args.output_file, plugins=tests, skips=skips, allow_includes=not args.disable_includes ) for plugin_cls in PluginsManager().plugins_classes: name = plugin_cls.__name__ options = copy.deepcopy(plugin_cls.options) for opt_key, opt_val in options.items(): option_name = '{name}:{key}'.format(name=name, key=opt_key) if option_name not in args: continue val = getattr(args, option_name) if val is None: continue if isinstance(opt_val, tuple): val = tuple([x.strip() for x in val.split(',')]) elif isinstance(opt_val, set): val = set([x.strip() for x in val.split(',')]) elif isinstance(opt_val, list): val = [x.strip() for x in val.split(',')] options[opt_key] = val config.set_for(name, options) formatter = formatters()[config.output_format]() failed = False for input_path in args.nginx_files: path = os.path.abspath(os.path.expanduser(input_path)) if not os.path.exists(path): LOG.error('File %s was not found', path) continue with Gixy(config=config) as yoda: try: if path == '-': with os.fdopen(sys.stdin.fileno(), 'rb') as fdata: yoda.audit('', fdata, is_stdin=True) else: with open(path, mode='rb') as fdata: yoda.audit(path, fdata, is_stdin=False) except InvalidConfiguration: failed = True formatter.feed(path, yoda) failed = failed or sum(yoda.stats.values()) > 0 if args.output_file: with open(config.output_file, 'w') as f: f.write(formatter.flush()) else: print(formatter.flush()) if failed: # If something found - exit code must be 1, otherwise 0 sys.exit(1) sys.exit(0) ================================================ FILE: gixy/core/__init__.py ================================================ ================================================ FILE: gixy/core/builtin_variables.py ================================================ from gixy.core.regexp import Regexp from gixy.core.variable import Variable BUILTIN_VARIABLES = { # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_uri 'uri': r'/[^\x20\t]*', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_document_uri 'document_uri': r'/[^\x20\t]*', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_arg_ 'arg_': r'[^\s&]+', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_args 'args': r'[^\s]+', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_query_string 'query_string': r'[^\s]+', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_uri 'request_uri': r'/[^\s]*', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_http_ 'http_': r'[\x21-\x7e]', # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_http_ 'upstream_http_': '', # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_cookie_ 'upstream_cookie_': '', # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#var_proxy_add_x_forwarded_for 'proxy_add_x_forwarded_for': '', # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#var_proxy_host 'proxy_host': '', # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#var_proxy_port 'proxy_port': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_proxy_protocol_addr # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_proxy_protocol_addr # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_proxy_protocol_port # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_proxy_protocol_port 'proxy_protocol_port': '', # http://nginx.org/en/docs/http/ngx_http_fastcgi_module.html#var_fastcgi_path_info 'fastcgi_path_info': '', # http://nginx.org/en/docs/http/ngx_http_fastcgi_module.html#var_fastcgi_script_name 'fastcgi_script_name': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_content_type 'content_type': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_cookie_ 'cookie_': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_host 'host': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_hostname # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_hostname 'hostname': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_limit_rate 'limit_rate': '', # http://nginx.org/en/docs/http/ngx_http_memcached_module.html#var_memcached_key 'memcached_key': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_realpath_root 'realpath_root': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_remote_user 'remote_user': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request 'request': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_body 'request_body': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_completion 'request_completion': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_filename 'request_filename': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_id 'request_id': '', # http://nginx.org/en/docs/http/ngx_http_slice_module.html#var_slice_range 'slice_range': '', # http://nginx.org/en/docs/http/ngx_http_secure_link_module.html#var_secure_link 'secure_link': '', # http://nginx.org/en/docs/http/ngx_http_secure_link_module.html#var_secure_link_expires 'secure_link_expires': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_sent_http_ 'sent_http_': '', # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_name 'server_name': '', # "Secure" variables that can't content or strictly limited user input # http://nginx.org/en/docs/http/ngx_http_browser_module.html#var_ancient_browser 'ancient_browser': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_binary_remote_addr # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_binary_remote_addr 'binary_remote_addr': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_body_bytes_sent 'body_bytes_sent': None, # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_bytes_received 'bytes_received': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_bytes_sent # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_bytes_sent # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_bytes_sent 'bytes_sent': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_connection # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_connection # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_connection 'connection': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_connection_requests # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_connection_requests 'connection_requests': None, # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_active 'connections_active': None, # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_reading 'connections_reading': None, # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_waiting 'connections_waiting': None, # http://nginx.org/en/docs/http/ngx_http_stub_status_module.html#var_connections_writing 'connections_writing': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_content_length 'content_length': None, # http://nginx.org/en/docs/http/ngx_http_ssi_module.html#var_date_gmt 'date_gmt': None, # http://nginx.org/en/docs/http/ngx_http_ssi_module.html#var_date_local 'date_local': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_document_root 'document_root': '/etc/nginx', # http://nginx.org/en/docs/http/ngx_http_geoip_module.html # http://nginx.org/en/docs/stream/ngx_stream_geoip_module.html 'geoip_': None, # http://nginx.org/en/docs/http/ngx_http_gzip_module.html#var_gzip_ratio 'gzip_ratio': None, # http://nginx.org/en/docs/http/ngx_http_v2_module.html#var_http2 'http2': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_https 'https': None, # http://nginx.org/en/docs/http/ngx_http_referer_module.html#var_invalid_referer 'invalid_referer': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_is_args 'is_args': None, # http://nginx.org/en/docs/http/ngx_http_auth_jwt_module.html 'jwt_': None, # http://nginx.org/en/docs/http/ngx_http_browser_module.html#var_modern_browser 'modern_browser': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_msec # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_msec # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_msec 'msec': None, # http://nginx.org/en/docs/http/ngx_http_browser_module.html#var_msie 'msie': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_nginx_version # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_nginx_version 'nginx_version': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_pid # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_pid 'pid': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_pipe # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_pipe 'pipe': None, # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_protocol 'protocol': None, # http://nginx.org/en/docs/http/ngx_http_realip_module.html#var_realip_remote_addr # http://nginx.org/en/docs/stream/ngx_stream_realip_module.html#var_realip_remote_addr # http://nginx.org/en/docs/http/ngx_http_realip_module.html#var_realip_remote_port # http://nginx.org/en/docs/stream/ngx_stream_realip_module.html#var_realip_remote_port 'realip_remote_port': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_remote_addr # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_remote_addr 'remote_addr': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_remote_port # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_remote_port 'remote_port': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_body_file 'request_body_file': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_length # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_request_length 'request_length': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_method 'request_method': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_request_time # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_request_time 'request_time': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_scheme 'scheme': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_addr # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_server_addr 'server_addr': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_port # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_server_port 'server_port': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_server_protocol 'server_protocol': None, # http://nginx.org/en/docs/http/ngx_http_session_log_module.html#var_session_log_binary_id 'session_log_binary_id': None, # http://nginx.org/en/docs/http/ngx_http_session_log_module.html#var_session_log_id 'session_log_id': None, # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_session_time 'session_time': None, # http://nginx.org/en/docs/http/ngx_http_spdy_module.html#var_spdy 'spdy': None, # http://nginx.org/en/docs/http/ngx_http_spdy_module.html#var_spdy_request_priority 'spdy_request_priority': None, # http://nginx.org/en/docs/http/ngx_http_ssl_module.html # http://nginx.org/en/docs/stream/ngx_stream_ssl_module.html 'ssl_': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html#var_status # http://nginx.org/en/docs/http/ngx_http_log_module.html#var_status # http://nginx.org/en/docs/stream/ngx_stream_core_module.html#var_status 'status': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html 'tcpinfo_': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html # http://nginx.org/en/docs/http/ngx_http_log_module.html # http://nginx.org/en/docs/stream/ngx_stream_core_module.html 'time_iso8601': None, # http://nginx.org/en/docs/http/ngx_http_core_module.html # http://nginx.org/en/docs/http/ngx_http_log_module.html # http://nginx.org/en/docs/stream/ngx_stream_core_module.html 'time_local': None, # http://nginx.org/en/docs/http/ngx_http_userid_module.html#var_uid_got 'uid_got': None, # http://nginx.org/en/docs/http/ngx_http_userid_module.html#var_uid_reset 'uid_reset': None, # http://nginx.org/en/docs/http/ngx_http_userid_module.html#var_uid_set 'uid_set': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_addr # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_addr 'upstream_addr': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_bytes_received # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_bytes_received 'upstream_bytes_received': None, # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_bytes_sent 'upstream_bytes_sent': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_cache_status 'upstream_cache_status': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_connect_time # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_connect_time 'upstream_connect_time': None, # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_first_byte_time 'upstream_first_byte_time': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_header_time 'upstream_header_time': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_response_length 'upstream_response_length': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_response_time 'upstream_response_time': None, # http://nginx.org/en/docs/stream/ngx_stream_upstream_module.html#var_upstream_session_time 'upstream_session_time': None, # http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_status 'upstream_status': None } def is_builtin(name): if isinstance(name, int): # Indexed variables can't be builtin return False for builtin in BUILTIN_VARIABLES: if builtin.endswith('_'): if name.startswith(builtin): return True elif name == builtin: return True return False def builtin_var(name): for builtin, regexp in BUILTIN_VARIABLES.items(): if builtin.endswith('_'): if not name.startswith(builtin): continue elif name != builtin: continue if regexp: return Variable(name=name, value=Regexp(regexp, strict=True, case_sensitive=False)) return Variable(name=name, value='builtin', have_script=False) return None ================================================ FILE: gixy/core/config.py ================================================ import gixy class Config(object): def __init__(self, plugins=None, skips=None, severity=gixy.severity.UNSPECIFIED, output_format=None, output_file=None, allow_includes=True): self.severity = severity self.output_format = output_format self.output_file = output_file self.plugins = plugins self.skips = skips self.allow_includes = allow_includes self.plugins_options = {} def set_for(self, name, options): self.plugins_options[name] = options def get_for(self, name): if self.has_for(name): return self.plugins_options[name] return {} def has_for(self, name): return name in self.plugins_options ================================================ FILE: gixy/core/context.py ================================================ import logging import copy from gixy.core.utils import is_indexed_name LOG = logging.getLogger(__name__) CONTEXTS = [] def get_context(): return CONTEXTS[-1] def purge_context(): del CONTEXTS[:] def push_context(block): if len(CONTEXTS): context = copy.deepcopy(get_context()) else: context = Context() context.set_block(block) CONTEXTS.append(context) return context def pop_context(): return CONTEXTS.pop() class Context(object): def __init__(self): self.block = None self.variables = { 'index': {}, 'name': {} } def set_block(self, directive): self.block = directive return self def clear_index_vars(self): self.variables['index'] = {} return self def add_var(self, name, var): if is_indexed_name(name): var_type = 'index' name = int(name) else: var_type = 'name' self.variables[var_type][name] = var return self def get_var(self, name): if is_indexed_name(name): var_type = 'index' name = int(name) else: var_type = 'name' result = None try: result = self.variables[var_type][name] except KeyError: if var_type == 'name': # Only named variables can be builtins import gixy.core.builtin_variables as builtins if builtins.is_builtin(name): result = builtins.builtin_var(name) if not result: LOG.info("Can't find variable '{0}'".format(name)) return result def __deepcopy__(self, memo): cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result result.block = copy.copy(self.block) result.variables = { 'index': copy.copy(self.variables['index']), 'name': copy.copy(self.variables['name']) } return result ================================================ FILE: gixy/core/exceptions.py ================================================ class InvalidConfiguration(Exception): pass ================================================ FILE: gixy/core/issue.py ================================================ class Issue(object): def __init__(self, plugin, summary=None, description=None, severity=None, reason=None, help_url=None, directives=None): self.plugin = plugin self.summary = summary self.description = description self.severity = severity self.reason = reason self.help_url = help_url if not directives: self.directives = [] elif not hasattr(directives, '__iter__'): self.directives = [directives] else: self.directives = directives ================================================ FILE: gixy/core/manager.py ================================================ import os import logging import gixy from gixy.core.plugins_manager import PluginsManager from gixy.core.context import get_context, pop_context, push_context, purge_context from gixy.parser.nginx_parser import NginxParser from gixy.core.config import Config LOG = logging.getLogger(__name__) class Manager(object): def __init__(self, config=None): self.root = None self.config = config or Config() self.auditor = PluginsManager(config=self.config) def audit(self, file_path, file_data, is_stdin=False): LOG.debug("Audit config file: {fname}".format(fname=file_path)) parser = NginxParser( cwd=os.path.dirname(file_path) if not is_stdin else '', allow_includes=self.config.allow_includes) self.root = parser.parse(content=file_data.read(), path_info=file_path) push_context(self.root) self._audit_recursive(self.root.children) @property def results(self): for plugin in self.auditor.plugins: if plugin.issues: yield plugin @property def stats(self): stats = dict.fromkeys(gixy.severity.ALL, 0) for plugin in self.auditor.plugins: base_severity = plugin.severity for issue in plugin.issues: # TODO(buglloc): encapsulate into Issue class? severity = issue.severity if issue.severity else base_severity stats[severity] += 1 return stats def _audit_recursive(self, tree): for directive in tree: self._update_variables(directive) self.auditor.audit(directive) if directive.is_block: if directive.self_context: push_context(directive) self._audit_recursive(directive.children) if directive.self_context: pop_context() def _update_variables(self, directive): # TODO(buglloc): finish him! if not directive.provide_variables: return context = get_context() for var in directive.variables: if var.name == 0: # All regexps must clean indexed variables context.clear_index_vars() context.add_var(var.name, var) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): purge_context() ================================================ FILE: gixy/core/plugins_manager.py ================================================ import os import gixy from gixy.plugins.plugin import Plugin class PluginsManager(object): def __init__(self, config=None): self.imported = False self.config = config self._plugins = [] def import_plugins(self): if self.imported: return files_list = os.listdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'plugins')) for plugin_file in files_list: if not plugin_file.endswith('.py') or plugin_file.startswith('_'): continue __import__('gixy.plugins.' + os.path.splitext(plugin_file)[0], None, None, ['']) self.imported = True def init_plugins(self): self.import_plugins() exclude = self.config.skips if self.config else None include = self.config.plugins if self.config else None severity = self.config.severity if self.config else None for plugin_cls in Plugin.__subclasses__(): name = plugin_cls.__name__ if include and name not in include: # Skip not needed plugins continue if exclude and name in exclude: # Skipped plugins continue if severity and not gixy.severity.is_acceptable(plugin_cls.severity, severity): # Skip plugin by severity level continue if self.config and self.config.has_for(name): options = self.config.get_for(name) else: options = plugin_cls.options self._plugins.append(plugin_cls(options)) @property def plugins(self): if not self._plugins: self.init_plugins() return self._plugins @property def plugins_classes(self): self.import_plugins() return Plugin.__subclasses__() def get_plugins_descriptions(self): return map(lambda a: a.name, self.plugins) def audit(self, directive): for plugin in self.plugins: if plugin.directives and directive.name not in plugin.directives: continue plugin.audit(directive) def issues(self): result = [] for plugin in self.plugins: if not plugin.issues: continue result.extend(plugin.issues) return result ================================================ FILE: gixy/core/regexp.py ================================================ import six import logging import re import random import itertools from cached_property import cached_property import gixy.core.sre_parse.sre_parse as sre_parse LOG = logging.getLogger(__name__) def _build_reverse_list(original): result = [] for c in range(1, 126): c = six.unichr(c) if c not in original: result.append(c) return frozenset(result) FIX_NAMED_GROUPS_RE = re.compile(r"(?|')") CATEGORIES = { # TODO(buglloc): unicode? sre_parse.CATEGORY_SPACE: sre_parse.WHITESPACE, sre_parse.CATEGORY_NOT_SPACE: _build_reverse_list(sre_parse.WHITESPACE), sre_parse.CATEGORY_DIGIT: sre_parse.DIGITS, sre_parse.CATEGORY_NOT_DIGIT: _build_reverse_list(sre_parse.DIGITS), sre_parse.CATEGORY_WORD: frozenset('abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' '0123456789_'), sre_parse.CATEGORY_NOT_WORD: _build_reverse_list(frozenset('abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' '0123456789_')), sre_parse.CATEGORY_LINEBREAK: frozenset('\n'), sre_parse.CATEGORY_NOT_LINEBREAK: _build_reverse_list(frozenset('\n')), 'ANY': [six.unichr(x) for x in range(1, 127) if x != 10] } CATEGORIES_NAMES = { sre_parse.CATEGORY_DIGIT: r'\d', sre_parse.CATEGORY_NOT_DIGIT: r'\D', sre_parse.CATEGORY_SPACE: r'\s', sre_parse.CATEGORY_NOT_SPACE: r'\S', sre_parse.CATEGORY_WORD: r'\w', sre_parse.CATEGORY_NOT_WORD: r'\W', } def extract_groups(parsed, top=True): result = {} if top: result[0] = parsed for token in parsed: if not token: # Skip empty tokens pass elif token[0] == sre_parse.SUBPATTERN: if isinstance(token[1][0], int): # Captured group index can't be a string. E.g. for pattern "(?:la)" group name is "None" result[token[1][0]] = token[1][1] result.update(extract_groups(token[1][1], False)) elif token[0] == sre_parse.MIN_REPEAT: result.update(extract_groups(token[1][2], False)) elif token[0] == sre_parse.MAX_REPEAT: result.update(extract_groups(token[1][2], False)) elif token[0] == sre_parse.BRANCH: result.update(extract_groups(token[1][1], False)) elif token[0] == sre_parse.SUBPATTERN: result.update(extract_groups(token[1][1], False)) elif token[0] == sre_parse.IN: result.update(extract_groups(token[1], False)) elif isinstance(token, sre_parse.SubPattern): result.update(extract_groups(token, False)) return result def _gen_combinator(variants, _merge=True): if not hasattr(variants, '__iter__'): return [variants] if variants is not None else [] res = [] need_product = False for var in variants: if isinstance(var, list): sol = _gen_combinator(var, _merge=False) res.append(sol) need_product = True elif var is not None: res.append(var) if need_product: producted = itertools.product(*res) if _merge: # TODO(buglloc): ??! return list(six.moves.map(_merge_variants, producted)) return producted elif _merge: return list(six.moves.map(_merge_variants, [res])) return res def _merge_variants(variants): result = [] for var in variants: if isinstance(var, tuple): result.append(_merge_variants(var)) else: result.append(var) return ''.join(result) class Token(object): type = None def __init__(self, token, parent, regexp): self.token = token self.childs = None self.parent = parent self.regexp = regexp self._parse() def parse(self): pass def _parse(self): pass def _parse_childs(self, childs): self.childs = parse(childs, self, regexp=self.regexp) def _get_group(self, gid): return self.regexp.group(gid) def _reg_group(self, gid): self.regexp.reg_group(gid, self) def can_contain(self, char, skip_literal=True): raise NotImplementedError('can_contain must be implemented') def can_startswith(self, char, strict=False): return self.can_contain(char, skip_literal=False) def must_contain(self, char): raise NotImplementedError('must_contain must be implemented') def must_startswith(self, char, strict=False): return self.must_contain(char) def generate(self, context): raise NotImplementedError('generate must be implemented') def __str__(self): raise NotImplementedError('__str__ must be implemented') class AnyToken(Token): type = sre_parse.ANY def can_contain(self, char, skip_literal=True): return char in CATEGORIES['ANY'] def must_contain(self, char, skip_literal=True): # Char may not be present in ANY token return False def generate(self, context): if context.char in CATEGORIES['ANY']: return context.char return 'a' def __str__(self): return '.' class LiteralToken(Token): type = sre_parse.LITERAL def _parse(self): self.char = six.unichr(self.token[1]) def can_contain(self, char, skip_literal=True): if skip_literal: return False return self.char == char def must_contain(self, char, skip_literal=True): return self.char == char def generate(self, context): return self.char def __str__(self): return re.escape(self.char) class NotLiteralToken(Token): type = sre_parse.NOT_LITERAL def _parse(self): self.char = six.unichr(self.token[1]) self.gen_char_list = list(_build_reverse_list(frozenset(self.char))) def can_contain(self, char, skip_literal=True): return self.char != char def must_contain(self, char): # Any char MAY not be present in NotLiteral, e.g.: "a" not present in "[^b]" return False def generate(self, context): if self.can_contain(context.char): return context.char return random.choice(self.gen_char_list) def __str__(self): return '[^{char}]'.format(char=self.char) class RangeToken(Token): type = sre_parse.RANGE def _parse(self): self.left_code = self.token[1][0] self.right_code = self.token[1][1] self.left = six.unichr(self.left_code) self.right = six.unichr(self.right_code) def can_contain(self, char, skip_literal=True): return self.left <= char <= self.right def must_contain(self, char, skip_literal=True): return self.left == char == self.right def generate(self, context): if self.can_contain(context.char): return context.char return six.unichr(random.randint(self.token[1][0], self.token[1][1])) def __str__(self): return '{left}-{right}'.format(left=self.left, right=self.right) class CategoryToken(Token): type = sre_parse.CATEGORY def _parse(self): self.char_list = CATEGORIES.get(self.token[1], ['']) def can_contain(self, char, skip_literal=True): return char in self.char_list def must_contain(self, char, skip_literal=True): return frozenset([char]) == self.char_list def generate(self, context): if self.can_contain(context.char): return context.char for c in self.char_list: return c def __str__(self): return CATEGORIES_NAMES.get(self.token[1], '\\C') class MinRepeatToken(Token): type = sre_parse.MIN_REPEAT def _parse(self): self._parse_childs(self.token[1][2]) self.min = self.token[1][0] self.max = self.token[1][1] def can_contain(self, char, skip_literal=True): if self.max == 0: # [a-z]{0} return False for child in self.childs: if child.can_contain(char, skip_literal=skip_literal): return True return False def must_contain(self, char): if self.max == 0: # [a-z]{0} return False if self.min == 0: # [a-z]*? return False for child in self.childs: if child.must_contain(char): return True return False def can_startswith(self, char, strict=False): if self.max == 0: # [a-z]{0} if self.childs[0].can_startswith(char, strict): return False return None return self.childs[0].can_startswith(char, strict) def must_startswith(self, char, strict=False): if self.min == 0: # [a-z]*? return None if self.max == 0: # [a-z]{0} return None return self.childs[0].must_startswith(char, strict=strict) def generate(self, context): res = [] if self.min == 0: # [a-z]* res.append('') if self.max == 0: # [a-z]{0} return res for child in self.childs: res.extend(child.generate(context)) result = [] repeat = self.max if self.max <= context.max_repeat else context.max_repeat for val in _gen_combinator([res]): result.append(val * repeat) return result def __str__(self): childs = ''.join(str(x) for x in self.childs) if self.min == self.max: return '{childs}{{{count}}}?'.format(childs=childs, count=self.min) if self.min == 0 and self.max == 1: return '{childs}?'.format(childs=childs) if self.min == 0 and self.max == sre_parse.MAXREPEAT: return '{childs}*?'.format(childs=childs) if self.min == 1 and self.max == sre_parse.MAXREPEAT: return '{childs}+?'.format(childs=childs) return '{childs}{{{min},{max}}}?'.format(childs=childs, min=self.min, max=self.max) class MaxRepeatToken(Token): type = sre_parse.MAX_REPEAT def _parse(self): self._parse_childs(self.token[1][2]) self.min = self.token[1][0] self.max = self.token[1][1] def can_contain(self, char, skip_literal=True): if self.max == 0: # [a-z]{0} return False for child in self.childs: if child.can_contain(char, skip_literal=skip_literal): return True return False def must_contain(self, char): if self.max == 0: # [a-z]{0} return False if self.min == 0: # [a-z]? return False for child in self.childs: if child.must_contain(char): return True return False def can_startswith(self, char, strict=False): if self.max == 0: # [a-z]{0} if self.childs[0].can_startswith(char, strict): return False return None return self.childs[0].can_startswith(char, strict) def must_startswith(self, char, strict=False): if self.min == 0: # [a-z]* return None if self.max == 0: # [a-z]{0} return None return self.childs[0].must_startswith(char, strict=strict) def generate(self, context): res = [] if self.min == 0: # [a-z]* res.append('') if self.max == 0: # [a-z]{0} return res for child in self.childs: res.extend(child.generate(context)) result = [] repeat = self.max if self.max <= context.max_repeat else context.max_repeat for val in _gen_combinator([res]): result.append(val * repeat) return result def __str__(self): childs = ''.join(str(x) for x in self.childs) if self.min == self.max: return '{childs}{{{count}}}'.format(childs=childs, count=self.min) if self.min == 0 and self.max == 1: return '{childs}?'.format(childs=childs) if self.min == 0 and self.max == sre_parse.MAXREPEAT: return '{childs}*'.format(childs=childs) if self.min == 1 and self.max == sre_parse.MAXREPEAT: return '{childs}+'.format(childs=childs) return '{childs}{{{min},{max}}}'.format(childs=childs, min=self.min, max=self.max) class BranchToken(Token): type = sre_parse.BRANCH def _parse(self): self.childs = [] for token in self.token[1][1]: if not token: self.childs.append(EmptyToken(token=token, parent=self.parent, regexp=self.regexp)) elif isinstance(token, sre_parse.SubPattern): self.childs.append(InternalSubpatternToken(token=token, parent=self.parent, regexp=self.regexp)) else: raise RuntimeError('Unexpected token {0} in branch'.format(token)) def can_contain(self, char, skip_literal=True): for child in self.childs: if child.can_contain(char, skip_literal=skip_literal): return True return False def must_contain(self, char): return all(child.must_contain(char) for child in self.childs) def can_startswith(self, char, strict=False): return any(x.can_startswith(char, strict) for x in self.childs) def must_startswith(self, char, strict=False): return all(x.must_startswith(char, strict) for x in self.childs) def generate(self, context): res = [] for child in self.childs: values = child.generate(context) if isinstance(values, list): res.extend(child.generate(context)) else: res.append(values) return res def __str__(self): return '(?:{0})'.format('|'.join(str(x) for x in self.childs)) class SubpatternToken(Token): type = sre_parse.SUBPATTERN def _parse(self): self._parse_childs(self.token[1][1]) self.group = self.token[1][0] if isinstance(self.group, int): # Captured group index can't be a string. E.g. for pattern "(?:la)" group name is "None" self._reg_group(self.group) def can_contain(self, char, skip_literal=True): for child in self.childs: if child.can_contain(char, skip_literal=skip_literal): return True return False def must_contain(self, char): for child in self.childs: if child.must_contain(char): return True return False def can_startswith(self, char, strict=False): if isinstance(self.childs[0], AtToken): if len(self.childs) > 1: for child in self.childs[1:]: can = child.can_startswith(char, strict) if can is None: continue return can return False elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): # Not strict regexp w/o ^ can starts with any character return char in CATEGORIES['ANY'] for child in self.childs: can = child.can_startswith(char, strict) if can is None: continue return can return None def must_startswith(self, char, strict=False): if isinstance(self.childs[0], AtToken): if len(self.childs) > 1: for child in self.childs[1:]: must = child.must_startswith(char, strict=True) if must is None: continue return must return False elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): # Not strict regexp w/o ^ MAY NOT starts with any character return False for child in self.childs: must = child.must_startswith(char, strict=strict) if must is None: continue return must return None def generate(self, context): res = [] for child in self.childs: res.append(child.generate(context)) return _gen_combinator(res) def __str__(self): childs = ''.join(str(x) for x in self.childs) if self.group is None: return '(?:{childs})'.format(childs=childs) return '({childs})'.format(childs=childs) class InternalSubpatternToken(Token): type = sre_parse.SUBPATTERN def _parse(self): self._parse_childs(self.token) self.group = None def can_contain(self, char, skip_literal=True): for child in self.childs: if child.can_contain(char, skip_literal=skip_literal): return True return False def must_contain(self, char): for child in self.childs: if child.must_contain(char): return True return False def can_startswith(self, char, strict=False): if isinstance(self.childs[0], AtToken): if len(self.childs) > 1: for child in self.childs[1:]: can = child.can_startswith(char, strict) if can is None: continue return can return False elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): # Not strict regexp w/o ^ can starts with any character return char in CATEGORIES['ANY'] for child in self.childs: can = child.can_startswith(char, strict) if can is None: continue return can return None def must_startswith(self, char, strict=False): if isinstance(self.childs[0], AtToken): if len(self.childs) > 1: for child in self.childs[1:]: must = child.must_startswith(char, strict=True) if must is None: continue return must return False elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)): # Not strict regexp w/o ^ MAY NOT starts with any character return False for child in self.childs: must = child.must_startswith(char, strict=strict) if must is None: continue return must return None def generate(self, context): res = [] for child in self.childs: res.append(child.generate(context)) return _gen_combinator(res) def __str__(self): return ''.join(str(x) for x in self.childs) class InToken(Token): type = sre_parse.IN def _parse(self): self.childs = parse(self.token[1], self) def can_contain(self, char, skip_literal=True): can = False negative = False for child in self.childs: if isinstance(child, NegateToken): negative = True else: can = child.can_contain(char, skip_literal=False) if can: break if can and not negative: # a in [a-z] return True if not can and negative: # a in [^b-z] return True return False def must_contain(self, char): # Any character MAY not be present in IN return False def _generate_positive(self, context): result = [] for child in self.childs: if isinstance(child, (NegateToken, EmptyToken)): pass else: result.append(child.generate(context=context)) return result def _generate_negative(self, context): blacklisted = set() # TODO(buglloc): move chars list into the tokens? for child in self.childs: if isinstance(child, (NegateToken, EmptyToken)): pass elif isinstance(child, LiteralToken): blacklisted.add(child.char) elif isinstance(child, RangeToken): blacklisted.update(six.unichr(c) for c in six.moves.range(child.left_code, child.right_code + 1)) elif isinstance(child, CategoryToken): blacklisted.update(child.char_list) else: LOG.info('Unexpected child "{0!r}"'.format(child)) for char in _build_reverse_list(set()): if char not in blacklisted: return char def generate(self, context): if self.can_contain(context.char, skip_literal=False): return context.char is_negative = self.childs and isinstance(self.childs[0], NegateToken) if is_negative: # [^a-z] return self._generate_negative(context) # [a-z] return self._generate_positive(context) def __str__(self): return '[{childs}]'.format(childs=''.join(str(x) for x in self.childs)) class AtToken(Token): type = sre_parse.AT def _parse(self): self.begin = self.token[1] == sre_parse.AT_BEGINNING self.end = self.token[1] == sre_parse.AT_END def can_contain(self, char, skip_literal=True): return False def must_contain(self, char): return False def generate(self, context): if context.anchored: if self.begin: return '^' if self.end: return '$' return None def __str__(self): if self.begin: return '^' if self.end: return '$' LOG.warn('unexpected AT token: %s', self.token) class NegateToken(Token): type = sre_parse.NEGATE def can_contain(self, char, skip_literal=True): return False def must_contain(self, char): return False def can_startswith(self, char, strict=False): return None def must_startswith(self, char, strict=False): return None def generate(self, context): return None def __str__(self): return '^' class GroupRefToken(Token): type = sre_parse.GROUPREF def _parse(self): self.id = self.token[1] self.group = self._get_group(self.id) def can_contain(self, char, skip_literal=True): return self.group.can_contain(char, skip_literal=skip_literal) def must_contain(self, char): return self.group.must_contain(char) def can_startswith(self, char, strict=False): return self.group.can_startswith(char, strict=strict) def must_startswith(self, char, strict=False): return self.group.must_startswith(char, strict=strict) def generate(self, context): return self.group.generate(context) def __str__(self): return '\\\\{0}'.format(self.id) class AssertToken(Token): type = sre_parse.ASSERT def can_contain(self, char, skip_literal=True): # TODO(buglloc): Do it! return False def must_contain(self, char): # TODO(buglloc): Do it! return False def can_startswith(self, char, strict=False): return None def must_startswith(self, char, strict=False): return None class AssertNotToken(Token): type = sre_parse.ASSERT_NOT def can_contain(self, char, skip_literal=True): # TODO(buglloc): Do it! return False def must_contain(self, char): # TODO(buglloc): Do it! return False def can_startswith(self, char, strict=False): return None def must_startswith(self, char, strict=False): return None class EmptyToken(Token): type = None def can_contain(self, char, skip_literal=True): return False def must_contain(self, char): # TODO(buglloc): Do it! return False def can_startswith(self, char, strict=False): return None def must_startswith(self, char, strict=False): return None def generate(self, context): return '' def __str__(self): return '' def parse(sre_obj, parent=None, regexp=None): result = [] for token in sre_obj: if not token: result.append(EmptyToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.ANY: result.append(AnyToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.LITERAL: result.append(LiteralToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.NOT_LITERAL: result.append(NotLiteralToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.RANGE: result.append(RangeToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.CATEGORY: result.append(CategoryToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.MIN_REPEAT: result.append(MinRepeatToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.MAX_REPEAT: result.append(MaxRepeatToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.BRANCH: result.append(BranchToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.SUBPATTERN: result.append(SubpatternToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.IN: result.append(InToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.NEGATE: result.append(NegateToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.AT: result.append(AtToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.GROUPREF: result.append(GroupRefToken(token=token, parent=parent, regexp=regexp)) elif token[0] == sre_parse.ASSERT: pass # TODO(buglloc): Do it! elif token[0] == sre_parse.ASSERT_NOT: pass # TODO(buglloc): Do it! else: LOG.info('Unexpected token "{0}"'.format(token[0])) return result class GenerationContext(object): def __init__(self, char, max_repeat=5, strict=False, anchored=True): self.char = char self.max_repeat = max_repeat self.strict = strict self.anchored = anchored class Regexp(object): def __init__(self, source, strict=False, case_sensitive=True, _root=None, _parsed=None): """ Gixy Regexp class, parse and provide helpers to work with it. :param str source: regexp, e.g. ^foo$. :param bool strict: anchored or not. :param bool case_sensitive: case sensitive or not. """ self.source = source self.strict = strict self.case_sensitive = case_sensitive self._root = _root self._parsed = _parsed self._groups = {} def can_startswith(self, char): """ Checks if regex can starts with the specified char. Example: Regexp('[a-z][0-9]').can_startswith('s') -> True Regexp('[a-z][0-9]').can_startswith('0') -> True Regexp('^[a-z][0-9]').can_startswith('0') -> False Regexp('[a-z][0-9]', strict=True).can_startswith('0') -> False :param str char: character to test. :return bool: True if regex can starts with the specified char, False otherwise. """ return self.root.can_startswith( char=char if self.case_sensitive else char.lower(), strict=self.strict ) def can_contain(self, char, skip_literal=True): """ Checks if regex can contain the specified char. Example: Regexp('[a-z][0-9]').can_contain('s') -> True Regexp('[a-z][0-9]').can_contain('0') -> True Regexp('[a-z][0-9]').can_contain('/') -> False Regexp('[a-z][0-9]/').can_contain('/') -> False Regexp('[a-z][0-9]/').can_contain('/', skip_literal=False) -> True :param str char: character to test. :param bool skip_literal: skip literal tokens. :return bool: True if regex can contain the specified char, False otherwise. """ return self.root.can_contain( char=char if self.case_sensitive else char.lower(), skip_literal=skip_literal ) def must_startswith(self, char): """ Checks if regex MUST starts with the specified char. Example: Regexp('[a-z][0-9]').must_startswith('s') -> False Regexp('s[a-z]').must_startswith('s') -> False Regexp('^s[a-z]').must_startswith('s') -> True Regexp('s[a-z]', strict=True).must_startswith('s') -> True :param str char: character to test. :return bool: True if regex must starts with the specified char, False otherwise. """ return self.root.must_startswith( char=char if self.case_sensitive else char.lower(), strict=self.strict ) def must_contain(self, char): """ Checks if regex MUST contain the specified char. Example: Regexp('[a-z][0-9]').must_contain('s') -> False Regexp('[a-z][0-9]s').must_contain('s') -> True :param str char: character to test. :return bool: True if regex MUST contain the specified char, False otherwise. """ return self.root.must_contain( char=char if self.case_sensitive else char.lower() ) def generate(self, char, anchored=False, max_repeat=5): """ Generate values that match regex. Example: Regexp('.a?').generate('s') -> ['s', 'sa'] Regexp('(?:^http|https)://.').generate('s') -> ['http://s', 'https://s'] Regexp('(?:^http|https)://.').generate('s', anchored=True) -> ['^http://s', 'https://s'] :param str char: "dangerous" character, generator try to place it wherever possible. :param bool anchored: place anchors in generated values. :param int max_repeat: maximum count of repeated group (e.g. "a+" provides "aaaaa"). :return list of str: True if regex can contain the specified char, False otherwise. """ context = GenerationContext(char, anchored=anchored, max_repeat=max_repeat) for val in self.root.generate(context=context): if anchored and self.strict and not val.startswith('^'): yield '^' + val else: yield val def group(self, name): """ Returns group by specified name. :param name: name of the group. :return Regexp: Regexp object for this group. """ if name in self.groups: return self.groups[name] return Regexp('') def reg_group(self, gid, token): self._groups[gid] = token def get_group(self, gid): return self._groups[gid] @cached_property def groups(self): # self.root.parse() result = {} # for name, token in self._groups.items(): # result[name] = Regexp(str(self), root=token, strict=True, case_sensitive=self.case_sensitive) for name, parsed in extract_groups(self.parsed).items(): result[name] = Regexp('compiled', _parsed=parsed, strict=True, case_sensitive=self.case_sensitive) for name, group in self.parsed.pattern.groupdict.items(): result[name] = result[group] return result @property def root(self): if self._root: return self._root self._root = InternalSubpatternToken(self.parsed, parent=None, regexp=self) self._groups[0] = self._root return self._root @property def parsed(self): # TODO(buglloc): Ugly hack! if self._parsed: return self._parsed try: self._parsed = sre_parse.parse(FIX_NAMED_GROUPS_RE.sub('(?P<\\1>', self.source)) except sre_parse.error as e: LOG.fatal('Failed to parse regex: %s (%s)', self.source, str(e)) raise e return self._parsed def __str__(self): return str(self.root) ================================================ FILE: gixy/core/severity.py ================================================ UNSPECIFIED = 'UNSPECIFIED' LOW = 'LOW' MEDIUM = 'MEDIUM' HIGH = 'HIGH' ALL = [UNSPECIFIED, LOW, MEDIUM, HIGH] def is_acceptable(current_severity, min_severity): return ALL.index(current_severity) >= ALL.index(min_severity) ================================================ FILE: gixy/core/sre_parse/__init__.py ================================================ ================================================ FILE: gixy/core/sre_parse/sre_constants.py ================================================ # flake8: noqa # # Secret Labs' Regular Expression Engine # # various symbols used by the regular expression engine. # run this script to update the _sre include files! # # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # """Internal support module for sre""" # update when constants are added or removed MAGIC = 20031017 try: from _sre import MAXREPEAT except ImportError: import _sre MAXREPEAT = _sre.MAXREPEAT = 65535 # SRE standard exception (access as sre.error) # should this really be here? class error(Exception): pass # operators FAILURE = "failure" SUCCESS = "success" ANY = "any" ANY_ALL = "any_all" ASSERT = "assert" ASSERT_NOT = "assert_not" AT = "at" BIGCHARSET = "bigcharset" BRANCH = "branch" CALL = "call" CATEGORY = "category" CHARSET = "charset" GROUPREF = "groupref" GROUPREF_IGNORE = "groupref_ignore" GROUPREF_EXISTS = "groupref_exists" IN = "in" IN_IGNORE = "in_ignore" INFO = "info" JUMP = "jump" LITERAL = "literal" LITERAL_IGNORE = "literal_ignore" MARK = "mark" MAX_REPEAT = "max_repeat" MAX_UNTIL = "max_until" MIN_REPEAT = "min_repeat" MIN_UNTIL = "min_until" NEGATE = "negate" NOT_LITERAL = "not_literal" NOT_LITERAL_IGNORE = "not_literal_ignore" RANGE = "range" REPEAT = "repeat" REPEAT_ONE = "repeat_one" SUBPATTERN = "subpattern" MIN_REPEAT_ONE = "min_repeat_one" # positions AT_BEGINNING = "at_beginning" AT_BEGINNING_LINE = "at_beginning_line" AT_BEGINNING_STRING = "at_beginning_string" AT_BOUNDARY = "at_boundary" AT_NON_BOUNDARY = "at_non_boundary" AT_END = "at_end" AT_END_LINE = "at_end_line" AT_END_STRING = "at_end_string" AT_LOC_BOUNDARY = "at_loc_boundary" AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" AT_UNI_BOUNDARY = "at_uni_boundary" AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" # categories CATEGORY_DIGIT = "category_digit" CATEGORY_NOT_DIGIT = "category_not_digit" CATEGORY_SPACE = "category_space" CATEGORY_NOT_SPACE = "category_not_space" CATEGORY_WORD = "category_word" CATEGORY_NOT_WORD = "category_not_word" CATEGORY_LINEBREAK = "category_linebreak" CATEGORY_NOT_LINEBREAK = "category_not_linebreak" CATEGORY_LOC_WORD = "category_loc_word" CATEGORY_LOC_NOT_WORD = "category_loc_not_word" CATEGORY_UNI_DIGIT = "category_uni_digit" CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" CATEGORY_UNI_SPACE = "category_uni_space" CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" CATEGORY_UNI_WORD = "category_uni_word" CATEGORY_UNI_NOT_WORD = "category_uni_not_word" CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" OPCODES = [ # failure=0 success=1 (just because it looks better that way :-) FAILURE, SUCCESS, ANY, ANY_ALL, ASSERT, ASSERT_NOT, AT, BRANCH, CALL, CATEGORY, CHARSET, BIGCHARSET, GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, IN, IN_IGNORE, INFO, JUMP, LITERAL, LITERAL_IGNORE, MARK, MAX_UNTIL, MIN_UNTIL, NOT_LITERAL, NOT_LITERAL_IGNORE, NEGATE, RANGE, REPEAT, REPEAT_ONE, SUBPATTERN, MIN_REPEAT_ONE ] ATCODES = [ AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, AT_UNI_NON_BOUNDARY ] CHCODES = [ CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, CATEGORY_UNI_NOT_LINEBREAK ] def makedict(list): d = {} i = 0 for item in list: d[item] = i i = i + 1 return d OPCODES = makedict(OPCODES) ATCODES = makedict(ATCODES) CHCODES = makedict(CHCODES) # replacement operations for "ignore case" mode OP_IGNORE = { GROUPREF: GROUPREF_IGNORE, IN: IN_IGNORE, LITERAL: LITERAL_IGNORE, NOT_LITERAL: NOT_LITERAL_IGNORE } AT_MULTILINE = { AT_BEGINNING: AT_BEGINNING_LINE, AT_END: AT_END_LINE } AT_LOCALE = { AT_BOUNDARY: AT_LOC_BOUNDARY, AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY } AT_UNICODE = { AT_BOUNDARY: AT_UNI_BOUNDARY, AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY } CH_LOCALE = { CATEGORY_DIGIT: CATEGORY_DIGIT, CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, CATEGORY_SPACE: CATEGORY_SPACE, CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, CATEGORY_WORD: CATEGORY_LOC_WORD, CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK } CH_UNICODE = { CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, CATEGORY_SPACE: CATEGORY_UNI_SPACE, CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, CATEGORY_WORD: CATEGORY_UNI_WORD, CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK } # flags SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) SRE_FLAG_IGNORECASE = 2 # case insensitive SRE_FLAG_LOCALE = 4 # honour system locale SRE_FLAG_MULTILINE = 8 # treat target as multiline string SRE_FLAG_DOTALL = 16 # treat target as a single string SRE_FLAG_UNICODE = 32 # use unicode locale SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments SRE_FLAG_DEBUG = 128 # debugging # flags for INFO primitive SRE_INFO_PREFIX = 1 # has prefix SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) SRE_INFO_CHARSET = 4 # pattern starts with character from given set ================================================ FILE: gixy/core/sre_parse/sre_parse.py ================================================ # flake8: noqa # # Secret Labs' Regular Expression Engine # # convert re-style regular expression to sre pattern # # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # from __future__ import print_function """Internal support module for sre""" from sre_constants import * SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" DIGITS = set("0123456789") OCTDIGITS = set("01234567") HEXDIGITS = set("0123456789abcdefABCDEF") WHITESPACE = set(" \t\n\r\v\f") ESCAPES = { r"\a": (LITERAL, ord("\a")), r"\b": (LITERAL, ord("\b")), r"\f": (LITERAL, ord("\f")), r"\n": (LITERAL, ord("\n")), r"\r": (LITERAL, ord("\r")), r"\t": (LITERAL, ord("\t")), r"\v": (LITERAL, ord("\v")), r"\\": (LITERAL, ord("\\")) } CATEGORIES = { r"\A": (AT, AT_BEGINNING_STRING), # start of string r"\b": (AT, AT_BOUNDARY), r"\B": (AT, AT_NON_BOUNDARY), r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), r"\Z": (AT, AT_END_STRING), # end of string } FLAGS = { # standard flags "i": SRE_FLAG_IGNORECASE, "L": SRE_FLAG_LOCALE, "m": SRE_FLAG_MULTILINE, "s": SRE_FLAG_DOTALL, "x": SRE_FLAG_VERBOSE, # extensions "t": SRE_FLAG_TEMPLATE, "u": SRE_FLAG_UNICODE, } class Pattern: # master pattern object. keeps track of global attributes def __init__(self): self.flags = 0 self.open = [] self.groups = 1 self.groupdict = {} self.lookbehind = 0 def opengroup(self, name=None): gid = self.groups self.groups = gid + 1 if name is not None: ogid = self.groupdict.get(name, None) if ogid is not None: raise error(("redefinition of group name %s as group %d; " "was group %d" % (repr(name), gid, ogid))) self.groupdict[name] = gid self.open.append(gid) return gid def closegroup(self, gid): self.open.remove(gid) def checkgroup(self, gid): return gid < self.groups and gid not in self.open class SubPattern: # a subpattern, in intermediate form def __init__(self, pattern, data=None): self.pattern = pattern if data is None: data = [] self.data = data self.width = None def __repr__(self): return repr(self.data) def __len__(self): return len(self.data) def __delitem__(self, index): del self.data[index] def __getitem__(self, index): if isinstance(index, slice): return SubPattern(self.pattern, self.data[index]) return self.data[index] def __setitem__(self, index, code): self.data[index] = code def insert(self, index, code): self.data.insert(index, code) def append(self, code): self.data.append(code) def getwidth(self): # determine the width (min, max) for this subpattern if self.width: return self.width lo = hi = 0 UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY) REPEATCODES = (MIN_REPEAT, MAX_REPEAT) for op, av in self.data: if op is BRANCH: i = MAXREPEAT - 1 j = 0 for av in av[1]: l, h = av.getwidth() i = min(i, l) j = max(j, h) lo = lo + i hi = hi + j elif op is CALL: i, j = av.getwidth() lo = lo + i hi = hi + j elif op is SUBPATTERN: i, j = av[1].getwidth() lo = lo + i hi = hi + j elif op in REPEATCODES: i, j = av[2].getwidth() lo = lo + i * av[0] hi = hi + j * av[1] elif op in UNITCODES: lo = lo + 1 hi = hi + 1 elif op == SUCCESS: break self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) return self.width class Tokenizer: def __init__(self, string): self.string = string self.index = 0 self.__next() def __next(self): if self.index >= len(self.string): self.next = None return char = self.string[self.index] if char[0] == "\\": try: c = self.string[self.index + 1] except IndexError: raise error("bogus escape (end of line)") char = char + c self.index = self.index + len(char) self.next = char def match(self, char, skip=1): if char == self.next: if skip: self.__next() return 1 return 0 def get(self): this = self.next self.__next() return this def tell(self): return self.index, self.next def seek(self, index): self.index, self.next = index def isident(char): return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" def isdigit(char): return "0" <= char <= "9" def isname(name): # check that group name is a valid string if not isident(name[0]): return False for char in name[1:]: if not isident(char) and not isdigit(char): return False return True def _class_escape(source, escape): # handle escape code inside character class code = ESCAPES.get(escape) if code: return code code = CATEGORIES.get(escape) if code and code[0] == IN: return code try: c = escape[1:2] if c == "x": # hexadecimal escape (exactly two digits) while source.next in HEXDIGITS and len(escape) < 4: escape = escape + source.get() escape = escape[2:] if len(escape) != 2: raise error("bogus escape: %s" % repr("\\" + escape)) return LITERAL, int(escape, 16) & 0xff elif c in OCTDIGITS: # octal escape (up to three digits) while source.next in OCTDIGITS and len(escape) < 4: escape = escape + source.get() escape = escape[1:] return LITERAL, int(escape, 8) & 0xff elif c in DIGITS: raise error("bogus escape: %s" % repr(escape)) if len(escape) == 2: return LITERAL, ord(escape[1]) except ValueError: pass raise error("bogus escape: %s" % repr(escape)) def _escape(source, escape, state): # handle escape code in expression code = CATEGORIES.get(escape) if code: return code code = ESCAPES.get(escape) if code: return code try: c = escape[1:2] if c == "x": # hexadecimal escape while source.next in HEXDIGITS and len(escape) < 4: escape = escape + source.get() if len(escape) != 4: raise ValueError return LITERAL, int(escape[2:], 16) & 0xff elif c == "0": # octal escape while source.next in OCTDIGITS and len(escape) < 4: escape = escape + source.get() return LITERAL, int(escape[1:], 8) & 0xff elif c in DIGITS: # octal escape *or* decimal group reference (sigh) if source.next in DIGITS: escape = escape + source.get() if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and source.next in OCTDIGITS): # got three octal digits; this is an octal escape escape = escape + source.get() return LITERAL, int(escape[1:], 8) & 0xff # not an octal escape, so this is a group reference group = int(escape[1:]) if group < state.groups: if not state.checkgroup(group): raise error("cannot refer to open group") if state.lookbehind: import warnings warnings.warn('group references in lookbehind ' 'assertions are not supported', RuntimeWarning) return GROUPREF, group raise ValueError if len(escape) == 2: return LITERAL, ord(escape[1]) except ValueError: pass raise error("bogus escape: %s" % repr(escape)) def _parse_sub(source, state, nested=1): # parse an alternation: a|b|c items = [] itemsappend = items.append sourcematch = source.match while 1: itemsappend(_parse(source, state)) if sourcematch("|"): continue if not nested: break if not source.next or sourcematch(")", 0): break else: raise error("pattern not properly closed") if len(items) == 1: return items[0] subpattern = SubPattern(state) subpatternappend = subpattern.append # check if all items share a common prefix while 1: prefix = None for item in items: if not item: break if prefix is None: prefix = item[0] elif item[0] != prefix: break else: # all subitems start with a common "prefix". # move it out of the branch for item in items: del item[0] subpatternappend(prefix) continue # check next one break # check if the branch can be replaced by a character set for item in items: if len(item) != 1 or item[0][0] != LITERAL: break else: # we can store this as a character set instead of a # branch (the compiler may optimize this even more) set = [] setappend = set.append for item in items: setappend(item[0]) subpatternappend((IN, set)) return subpattern subpattern.append((BRANCH, (None, items))) return subpattern def _parse_sub_cond(source, state, condgroup): item_yes = _parse(source, state) if source.match("|"): item_no = _parse(source, state) if source.match("|"): raise error("conditional backref with more than two branches") else: item_no = None if source.next and not source.match(")", 0): raise error("pattern not properly closed") subpattern = SubPattern(state) subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern _PATTERNENDERS = set("|)") _ASSERTCHARS = set("=!<") _LOOKBEHINDASSERTCHARS = set("=!") _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) def _parse(source, state): # parse a simple pattern subpattern = SubPattern(state) # precompute constants into local variables subpatternappend = subpattern.append sourceget = source.get sourcematch = source.match _len = len PATTERNENDERS = _PATTERNENDERS ASSERTCHARS = _ASSERTCHARS LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS REPEATCODES = _REPEATCODES while 1: if source.next in PATTERNENDERS: break # end of subpattern this = sourceget() if this is None: break # end of pattern if state.flags & SRE_FLAG_VERBOSE: # skip whitespace and comments if this in WHITESPACE: continue if this == "#": while 1: this = sourceget() if this in (None, "\n"): break continue if this and this[0] not in SPECIAL_CHARS: subpatternappend((LITERAL, ord(this))) elif this == "[": # character set set = [] setappend = set.append ## if sourcematch(":"): ## pass # handle character classes if sourcematch("^"): setappend((NEGATE, None)) # check remaining characters start = set[:] while 1: this = sourceget() if this == "]" and set != start: break elif this and this[0] == "\\": code1 = _class_escape(source, this) elif this: code1 = LITERAL, ord(this) else: raise error("unexpected end of regular expression") if sourcematch("-"): # potential range this = sourceget() if this == "]": if code1[0] is IN: code1 = code1[1][0] setappend(code1) setappend((LITERAL, ord("-"))) break elif this: if this[0] == "\\": code2 = _class_escape(source, this) else: code2 = LITERAL, ord(this) if code1[0] != LITERAL or code2[0] != LITERAL: raise error("bad character range") lo = code1[1] hi = code2[1] if hi < lo: raise error("bad character range") setappend((RANGE, (lo, hi))) else: raise error("unexpected end of regular expression") else: if code1[0] is IN: code1 = code1[1][0] setappend(code1) # XXX: should move set optimization to compiler! if _len(set) == 1 and set[0][0] is LITERAL: subpatternappend(set[0]) # optimization elif _len(set) == 2 and set[0][0] is NEGATE and set[1][0] is LITERAL: subpatternappend((NOT_LITERAL, set[1][1])) # optimization else: # XXX: should add charmap optimization here subpatternappend((IN, set)) elif this and this[0] in REPEAT_CHARS: # repeat previous item if this == "?": min, max = 0, 1 elif this == "*": min, max = 0, MAXREPEAT elif this == "+": min, max = 1, MAXREPEAT elif this == "{": if source.next == "}": subpatternappend((LITERAL, ord(this))) continue here = source.tell() min, max = 0, MAXREPEAT lo = hi = "" while source.next in DIGITS: lo = lo + source.get() if sourcematch(","): while source.next in DIGITS: hi = hi + sourceget() else: hi = lo if not sourcematch("}"): subpatternappend((LITERAL, ord(this))) source.seek(here) continue if lo: min = int(lo) if min >= MAXREPEAT: raise OverflowError("the repetition number is too large") if hi: max = int(hi) if max >= MAXREPEAT: raise OverflowError("the repetition number is too large") if max < min: raise error("bad repeat interval") else: raise error("not supported") # figure out which item to repeat if subpattern: item = subpattern[-1:] else: item = None if not item or (_len(item) == 1 and item[0][0] == AT): raise error("nothing to repeat") if item[0][0] in REPEATCODES: raise error("multiple repeat") if sourcematch("?"): subpattern[-1] = (MIN_REPEAT, (min, max, item)) else: subpattern[-1] = (MAX_REPEAT, (min, max, item)) elif this == ".": subpatternappend((ANY, None)) elif this == "(": group = 1 name = None condgroup = None if sourcematch("?"): group = 0 # options if sourcematch("P"): # python extensions if sourcematch("<"): # named group: skip forward to end of name name = "" while 1: char = sourceget() if char is None: raise error("unterminated name") if char == ">": break name = name + char group = 1 if not name: raise error("missing group name") if not isname(name): raise error("bad character in group name %r" % name) elif sourcematch("="): # named backreference name = "" while 1: char = sourceget() if char is None: raise error("unterminated name") if char == ")": break name = name + char if not name: raise error("missing group name") if not isname(name): raise error("bad character in backref group name " "%r" % name) gid = state.groupdict.get(name) if gid is None: msg = "unknown group name: {0!r}".format(name) raise error(msg) if state.lookbehind: import warnings warnings.warn('group references in lookbehind ' 'assertions are not supported', RuntimeWarning) subpatternappend((GROUPREF, gid)) continue else: char = sourceget() if char is None: raise error("unexpected end of pattern") raise error("unknown specifier: ?P%s" % char) elif sourcematch(":"): # non-capturing group group = 2 elif sourcematch("#"): # comment while 1: if source.next is None or source.next == ")": break sourceget() if not sourcematch(")"): raise error("unbalanced parenthesis") continue elif source.next in ASSERTCHARS: # lookahead assertions char = sourceget() dir = 1 if char == "<": if source.next not in LOOKBEHINDASSERTCHARS: raise error("syntax error") dir = -1 # lookbehind char = sourceget() state.lookbehind += 1 p = _parse_sub(source, state) if dir < 0: state.lookbehind -= 1 if not sourcematch(")"): raise error("unbalanced parenthesis") if char == "=": subpatternappend((ASSERT, (dir, p))) else: subpatternappend((ASSERT_NOT, (dir, p))) continue elif sourcematch("("): # conditional backreference group condname = "" while 1: char = sourceget() if char is None: raise error("unterminated name") if char == ")": break condname = condname + char group = 2 if not condname: raise error("missing group name") if isname(condname): condgroup = state.groupdict.get(condname) if condgroup is None: msg = "unknown group name: {0!r}".format(condname) raise error(msg) else: try: condgroup = int(condname) except ValueError: raise error("bad character in group name") if state.lookbehind: import warnings warnings.warn('group references in lookbehind ' 'assertions are not supported', RuntimeWarning) else: # flags if not source.next in FLAGS: raise error("unexpected end of pattern") while source.next in FLAGS: state.flags = state.flags | FLAGS[sourceget()] if group: # parse group contents if group == 2: # anonymous group group = None else: group = state.opengroup(name) if condgroup: p = _parse_sub_cond(source, state, condgroup) else: p = _parse_sub(source, state) if not sourcematch(")"): raise error("unbalanced parenthesis") if group is not None: state.closegroup(group) subpatternappend((SUBPATTERN, (group, p))) else: while 1: char = sourceget() if char is None: raise error("unexpected end of pattern") if char == ")": break raise error("unknown extension") elif this == "^": subpatternappend((AT, AT_BEGINNING)) elif this == "$": subpattern.append((AT, AT_END)) elif this and this[0] == "\\": code = _escape(source, this, state) subpatternappend(code) else: raise error("parser error") return subpattern def parse(str, flags=0, pattern=None): # parse 're' pattern into list of (opcode, argument) tuples source = Tokenizer(str) if pattern is None: pattern = Pattern() pattern.flags = flags pattern.str = str p = _parse_sub(source, pattern, 0) tail = source.get() if tail == ")": raise error("unbalanced parenthesis") elif tail: raise error("bogus characters at end of regular expression") if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: # the VERBOSE flag was switched on inside the pattern. to be # on the safe side, we'll parse the whole thing again... return parse(str, p.pattern.flags) if flags & SRE_FLAG_DEBUG: p.dump() return p def parse_template(source, pattern): # parse 're' replacement string into list of literals and # group references s = Tokenizer(source) sget = s.get p = [] a = p.append def literal(literal, p=p, pappend=a): if p and p[-1][0] is LITERAL: p[-1] = LITERAL, p[-1][1] + literal else: pappend((LITERAL, literal)) sep = source[:0] if type(sep) is type(""): makechar = chr else: makechar = unichr while 1: this = sget() if this is None: break # end of replacement string if this and this[0] == "\\": # group c = this[1:2] if c == "g": name = "" if s.match("<"): while 1: char = sget() if char is None: raise error("unterminated group name") if char == ">": break name = name + char if not name: raise error("missing group name") try: index = int(name) if index < 0: raise error("negative group number") except ValueError: if not isname(name): raise error("bad character in group name") try: index = pattern.groupindex[name] except KeyError: msg = "unknown group name: {0!r}".format(name) raise IndexError(msg) a((MARK, index)) elif c == "0": if s.next in OCTDIGITS: this = this + sget() if s.next in OCTDIGITS: this = this + sget() literal(makechar(int(this[1:], 8) & 0xff)) elif c in DIGITS: isoctal = False if s.next in DIGITS: this = this + sget() if (c in OCTDIGITS and this[2] in OCTDIGITS and s.next in OCTDIGITS): this = this + sget() isoctal = True literal(makechar(int(this[1:], 8) & 0xff)) if not isoctal: a((MARK, int(this[1:]))) else: try: this = makechar(ESCAPES[this][1]) except KeyError: pass literal(this) else: literal(this) # convert template to groups and literals lists i = 0 groups = [] groupsappend = groups.append literals = [None] * len(p) for c, s in p: if c is MARK: groupsappend((i, s)) # literal[i] is already None else: literals[i] = s i = i + 1 return groups, literals def expand_template(template, match): g = match.group sep = match.string[:0] groups, literals = template literals = literals[:] try: for index, group in groups: literals[index] = s = g(group) if s is None: raise error("unmatched group") except IndexError: raise error("invalid group reference") return sep.join(literals) ================================================ FILE: gixy/core/utils.py ================================================ def is_indexed_name(name): return isinstance(name, int) or (len(name) == 1 and '1' <= name <= '9') ================================================ FILE: gixy/core/variable.py ================================================ import re import logging from gixy.core.regexp import Regexp from gixy.core.context import get_context LOG = logging.getLogger(__name__) # See ngx_http_script_compile in http/ngx_http_script.c EXTRACT_RE = re.compile(r'\$([1-9]|[a-z_][a-z0-9_]*|\{[a-z0-9_]+\})', re.IGNORECASE) def compile_script(script): """ Compile Nginx script to list of variables. Example: compile_script('http://$foo:$bar') -> [Variable('http://'), Variable($foo), Variable(':', Variable($bar). :param str script: Nginx scrip. :return Variable[]: list of variable. """ depends = [] context = get_context() for i, var in enumerate(EXTRACT_RE.split(str(script))): if i % 2: # Variable var = var.strip('{}\x20') var = context.get_var(var) if var: depends.append(var) elif var: # Literal depends.append(Variable(name=None, value=var, have_script=False)) return depends class Variable(object): def __init__(self, name, value=None, boundary=None, provider=None, have_script=True): """ Gixy Nginx variable class - parse and provide helpers to work with it. :param str|None name: variable name. :param str|Regexp value: variable value.. :param Regexp boundary: variable boundary set. :param Directive provider: directive that provide variable (e.g. if, location, rewrite, etc). :param bool have_script: may variable have nginx script or not (mostly used to indicate a string literal). """ self.name = name self.value = value self.regexp = None self.depends = None self.boundary = boundary self.provider = provider if isinstance(value, Regexp): self.regexp = value elif have_script: self.depends = compile_script(value) def can_contain(self, char): """ Checks if variable can contain the specified char. :param str char: character to test. :return: True if variable can contain the specified char, False otherwise. """ # First of all check boundary set if self.boundary and not self.boundary.can_contain(char): return False # Then regexp if self.regexp: return self.regexp.can_contain(char, skip_literal=True) # Then dependencies if self.depends: return any(dep.can_contain(char) for dep in self.depends) # Otherwise user can't control value of this variable return False def can_startswith(self, char): """ Checks if variable can starts with the specified char. :param str char: character to test. :return: True if variable can starts with the specified char, False otherwise. """ # First of all check boundary set if self.boundary and not self.boundary.can_startswith(char): return False # Then regexp if self.regexp: return self.regexp.can_startswith(char) # Then dependencies if self.depends: return self.depends[0].can_startswith(char) # Otherwise user can't control value of this variable return False def must_contain(self, char): """ Checks if variable MUST contain the specified char. :param str char: character to test. :return: True if variable must contain the specified char, False otherwise. """ # First of all check boundary set if self.boundary and self.boundary.must_contain(char): return True # Then regexp if self.regexp: return self.regexp.must_contain(char) # Then dependencies if self.depends: return any(dep.must_contain(char) for dep in self.depends) # Otherwise checks literal return self.value and char in self.value def must_startswith(self, char): """ Checks if variable MUST starts with the specified char. :param str char: character to test. :return: True if variable must starts with the specified char. """ # First of all check boundary set if self.boundary and self.boundary.must_startswith(char): return True # Then regexp if self.regexp: return self.regexp.must_startswith(char) # Then dependencies if self.depends: return self.depends[0].must_startswith(char) # Otherwise checks literal return self.value and self.value[0] == char @property def providers(self): """ Returns list of variable provides. :return Directive[]: providers. """ result = [] if self.provider: result.append(self.provider) if self.depends: for dep in self.depends: result += dep.providers return result ================================================ FILE: gixy/directives/__init__.py ================================================ import os from gixy.directives.directive import Directive DIRECTIVES = {} def import_directives(): files_list = os.listdir(os.path.dirname(__file__)) for directive_file in files_list: if not directive_file.endswith(".py") or directive_file.startswith('_'): continue __import__('gixy.directives.' + os.path.splitext(directive_file)[0], None, None, ['']) def get_all(): if len(DIRECTIVES): return DIRECTIVES import_directives() for klass in Directive.__subclasses__(): if not klass.nginx_name: continue DIRECTIVES[klass.nginx_name] = klass return DIRECTIVES ================================================ FILE: gixy/directives/block.py ================================================ from cached_property import cached_property from gixy.directives.directive import Directive from gixy.core.variable import Variable from gixy.core.regexp import Regexp def get_overrides(): result = {} for klass in Block.__subclasses__(): if not klass.nginx_name: continue if not klass.__name__.endswith('Block'): continue result[klass.nginx_name] = klass return result class Block(Directive): nginx_name = None is_block = True self_context = True def __init__(self, name, args): super(Block, self).__init__(name, args) self.children = [] def some(self, name, flat=True): for child in self.children: if child.name == name: return child if flat and child.is_block and not child.self_context: result = child.some(name, flat=flat) if result: return result return None def find(self, name, flat=False): result = [] for child in self.children: if child.name == name: result.append(child) if flat and child.is_block and not child.self_context: result += child.find(name) return result def find_recursive(self, name): result = [] for child in self.children: if child.name == name: result.append(child) if child.is_block: result += child.find_recursive(name) return result def append(self, directive): directive.set_parent(self) self.children.append(directive) def __str__(self): return '{name} {args} {{'.format(name=self.name, args=' '.join(self.args)) class Root(Block): nginx_name = None def __init__(self): super(Root, self).__init__(None, []) class HttpBlock(Block): nginx_name = 'http' def __init__(self, name, args): super(HttpBlock, self).__init__(name, args) class ServerBlock(Block): nginx_name = 'server' def __init__(self, name, args): super(ServerBlock, self).__init__(name, args) def get_names(self): return self.find('server_name') def __str__(self): server_names = [str(sn) for sn in self.find('server_name')] if server_names: return 'server {{\n{0}'.format('\n'.join(server_names[:2])) return 'server {' class LocationBlock(Block): nginx_name = 'location' provide_variables = True def __init__(self, name, args): super(LocationBlock, self).__init__(name, args) if len(args) == 2: self.modifier, self.path = args else: self.modifier = None self.path = args[0] @property def is_internal(self): return self.some('internal') is not None @cached_property def variables(self): if not self.modifier or self.modifier not in ('~', '~*'): return [] regexp = Regexp(self.path, case_sensitive=self.modifier == '~') result = [] for name, group in regexp.groups.items(): result.append(Variable(name=name, value=group, boundary=None, provider=self)) return result class IfBlock(Block): nginx_name = 'if' self_context = False def __init__(self, name, args): super(IfBlock, self).__init__(name, args) self.operand = None self.value = None self.variable = None if len(args) == 1: # if ($slow) self.variable = args[0] elif len(args) == 2: # if (!-e $foo) self.operand, self.value = args elif len(args) == 3: # if ($request_method = POST) self.variable, self.operand, self.value = args else: raise Exception('Unknown "if" definition, args: {0!r}'.format(args)) def __str__(self): return '{name} ({args}) {{'.format(name=self.name, args=' '.join(self.args)) class IncludeBlock(Block): nginx_name = 'include' self_context = False def __init__(self, name, args): super(IncludeBlock, self).__init__(name, args) self.file_path = args[0] def __str__(self): return 'include {0};'.format(self.file_path) class MapBlock(Block): nginx_name = 'map' self_context = False provide_variables = True def __init__(self, name, args): super(MapBlock, self).__init__(name, args) self.source = args[0] self.variable = args[1].strip('$') @cached_property def variables(self): # TODO(buglloc): Finish him! return [Variable(name=self.variable, value='', boundary=None, provider=self, have_script=False)] class GeoBlock(Block): nginx_name = 'geo' self_context = False provide_variables = True def __init__(self, name, args): super(GeoBlock, self).__init__(name, args) if len(args) == 1: # geo uses $remote_addr as default source of the value source = '$remote_addr' variable = args[0].strip('$') else: source = args[0] variable = args[1].strip('$') self.source = source self.variable = variable @cached_property def variables(self): # TODO(buglloc): Finish him! -- same as in MapBlock return [Variable(name=self.variable, value='', boundary=None, provider=self, have_script=False)] ================================================ FILE: gixy/directives/directive.py ================================================ from gixy.core.variable import Variable from gixy.core.regexp import Regexp def get_overrides(): result = {} for klass in Directive.__subclasses__(): if not klass.nginx_name: continue if not klass.__name__.endswith('Directive'): continue result[klass.nginx_name] = klass return result class Directive(object): nginx_name = None is_block = False provide_variables = False def __init__(self, name, args, raw=None): self.name = name self.parent = None self.args = args self._raw = raw def set_parent(self, parent): self.parent = parent @property def parents(self): parent = self.parent while parent: yield parent parent = parent.parent @property def variables(self): raise NotImplementedError() def __str__(self): return '{name} {args};'.format(name=self.name, args=' '.join(self.args)) class AddHeaderDirective(Directive): nginx_name = 'add_header' def __init__(self, name, args): super(AddHeaderDirective, self).__init__(name, args) self.header = args[0].lower() self.value = args[1] self.always = False if len(args) > 2 and args[2] == 'always': self.always = True class SetDirective(Directive): nginx_name = 'set' provide_variables = True def __init__(self, name, args): super(SetDirective, self).__init__(name, args) self.variable = args[0].strip('$') self.value = args[1] @property def variables(self): return [Variable(name=self.variable, value=self.value, provider=self)] class AuthRequestSetDirective(Directive): nginx_name = 'auth_request_set' provide_variables = True def __init__(self, name, args): super(AuthRequestSetDirective, self).__init__(name, args) self.variable = args[0].strip('$') self.value = args[1] @property def variables(self): return [Variable(name=self.variable, value=self.value, provider=self)] class PerlSetDirective(Directive): nginx_name = 'perl_set' provide_variables = True def __init__(self, name, args): super(PerlSetDirective, self).__init__(name, args) self.variable = args[0].strip('$') self.value = args[1] @property def variables(self): return [Variable(name=self.variable, provider=self, have_script=False)] class SetByLuaDirective(Directive): nginx_name = 'set_by_lua' provide_variables = True def __init__(self, name, args): super(SetByLuaDirective, self).__init__(name, args) self.variable = args[0].strip('$') self.value = args[1] @property def variables(self): return [Variable(name=self.variable, provider=self, have_script=False)] class RewriteDirective(Directive): nginx_name = 'rewrite' provide_variables = True boundary = Regexp(r'[^\s\r\n]') def __init__(self, name, args): super(RewriteDirective, self).__init__(name, args) self.pattern = args[0] self.replace = args[1] self.flag = None if len(args) > 2: self.flag = args[2] @property def variables(self): regexp = Regexp(self.pattern, case_sensitive=True) result = [] for name, group in regexp.groups.items(): result.append(Variable(name=name, value=group, boundary=self.boundary, provider=self)) return result class RootDirective(Directive): nginx_name = 'root' provide_variables = True def __init__(self, name, args): super(RootDirective, self).__init__(name, args) self.path = args[0] @property def variables(self): return [Variable(name='document_root', value=self.path, provider=self)] class AliasDirective(Directive): nginx_name = 'alias' def __init__(self, name, args): super(AliasDirective, self).__init__(name, args) self.path = args[0] ================================================ FILE: gixy/formatters/__init__.py ================================================ import os from gixy.formatters.base import BaseFormatter FORMATTERS = {} def import_formatters(): files_list = os.listdir(os.path.dirname(__file__)) for formatter_file in files_list: if not formatter_file.endswith(".py") or formatter_file.startswith('_'): continue __import__('gixy.formatters.' + os.path.splitext(formatter_file)[0], None, None, ['']) def get_all(): if len(FORMATTERS): return FORMATTERS import_formatters() for klass in BaseFormatter.__subclasses__(): FORMATTERS[klass.__name__.replace('Formatter', '').lower()] = klass return FORMATTERS ================================================ FILE: gixy/formatters/_jinja.py ================================================ from __future__ import absolute_import from jinja2 import Environment, PackageLoader from gixy.utils.text import to_text def load_template(name): env = Environment(loader=PackageLoader('gixy', 'formatters/templates'), trim_blocks=True, lstrip_blocks=True) env.filters['to_text'] = to_text_filter return env.get_template(name) def to_text_filter(text): try: return text.encode('latin1').decode('utf-8') except UnicodeEncodeError: return to_text(text) ================================================ FILE: gixy/formatters/base.py ================================================ from __future__ import absolute_import import gixy from gixy.directives import block class BaseFormatter(object): skip_parents = set([block.Root, block.HttpBlock]) def __init__(self): self.reports = {} self.stats = dict.fromkeys(gixy.severity.ALL, 0) def format_reports(self, reports, stats): raise NotImplementedError("Formatter must override format_reports function") def feed(self, path, manager): for severity in gixy.severity.ALL: self.stats[severity] += manager.stats[severity] self.reports[path] = [] for result in manager.results: report = self._prepare_result(manager.root, summary=result.summary, severity=result.severity, description=result.description, issues=result.issues, plugin=result.name, help_url=result.help_url) self.reports[path].extend(report) def flush(self): return self.format_reports(self.reports, self.stats) def _prepare_result(self, root, issues, severity, summary, description, plugin, help_url): result = {} for issue in issues: report = dict( plugin=plugin, summary=issue.summary or summary, severity=issue.severity or severity, description=issue.description or description, help_url=issue.help_url or help_url, reason=issue.reason or '', ) key = ''.join(report.values()) report['directives'] = issue.directives if key in result: result[key]['directives'].extend(report['directives']) else: result[key] = report for report in result.values(): if report['directives']: config = self._resolve_config(root, report['directives']) else: config = '' del report['directives'] report['config'] = config yield report def _resolve_config(self, root, directives): points = set() for directive in directives: points.add(directive) points.update(p for p in directive.parents) result = self._traverse_tree(root, points, 0) return '\n'.join(result) def _traverse_tree(self, tree, points, level): result = [] for leap in tree.children: if leap not in points: continue printable = type(leap) not in self.skip_parents # Special hack for includes # TODO(buglloc): fix me have_parentheses = type(leap) != block.IncludeBlock if printable: if leap.is_block: result.append('') directive = str(leap).replace('\n', '\n' + '\t' * (level + 1)) result.append('{indent:s}{dir:s}'.format(indent='\t' * level, dir=directive)) if leap.is_block: result.extend(self._traverse_tree(leap, points, level + 1 if printable else level)) if printable and have_parentheses: result.append('{indent:s}}}'.format(indent='\t' * level)) return result ================================================ FILE: gixy/formatters/console.py ================================================ from __future__ import absolute_import from gixy.formatters.base import BaseFormatter from gixy.formatters._jinja import load_template class ConsoleFormatter(BaseFormatter): def __init__(self): super(ConsoleFormatter, self).__init__() self.template = load_template('console.j2') def format_reports(self, reports, stats): return self.template.render(reports=reports, stats=stats) ================================================ FILE: gixy/formatters/json.py ================================================ from __future__ import absolute_import import json from gixy.formatters.base import BaseFormatter class JsonFormatter(BaseFormatter): def format_reports(self, reports, stats): result = [] for path, issues in reports.items(): for issue in issues: result.append(dict( path=path, plugin=issue['plugin'], summary=issue['summary'], severity=issue['severity'], description=issue['description'], reference=issue['help_url'], reason=issue['reason'], config=issue['config'] )) return json.dumps(result, sort_keys=True, indent=2, separators=(',', ': ')) ================================================ FILE: gixy/formatters/templates/console.j2 ================================================ {% set colors = {'DEF': '\033[0m', 'TITLE': '\033[95m', 'UNSPECIFIED': '\033[0m', 'LOW': '\033[94m', 'MEDIUM': '\033[93m', 'HIGH': '\033[91m'} %} {{ colors.TITLE }}==================== Results ==================={{ colors.DEF }} {% for path, issues in reports.items() %} {% if reports|length > 1 %} File path: {{ path }} {% endif %} {% if not issues %} No issues found. {% else %} {% for issue in issues|sort(attribute='severity') %} {{ colors[issue.severity] }}>> Problem: [{{ issue.plugin }}] {{ issue.summary }}{{ colors.DEF }} {% if issue.description %} Description: {{ issue.description }} {% endif %} {% if issue.help_url %} Additional info: {{ issue.help_url }} {% endif %} {% if issue.reason %} Reason: {{ issue.reason }} {% endif %} Pseudo config: {{ issue.config | to_text }} {% if not loop.last %} ------------------------------------------------ {% endif %} {% endfor %} {% endif %} {% if not loop.last %} --------8<--------8<--------8<--------8<-------- {% endif %} {% endfor %} {% if stats %} {{ colors.TITLE }}==================== Summary ==================={{ colors.DEF }} Total issues: Unspecified: {{ stats.UNSPECIFIED }} Low: {{ stats.LOW }} Medium: {{ stats.MEDIUM }} High: {{ stats.HIGH }} {% endif %} ================================================ FILE: gixy/formatters/templates/text.j2 ================================================ ==================== Results =================== {% for path, issues in reports.items() %} {% if reports|length > 1 %} File path: {{ path }} {% endif %} {% if not issues %} No issues found. {% else %} {% for issue in issues|sort(attribute='severity') %} >> Problem: [{{ issue.plugin }}] {{ issue.summary }} Severity: {{ issue.severity }} {% if issue.description %} Description: {{ issue.description }} {% endif %} {% if issue.help_url %} Additional info: {{ issue.help_url }} {% endif %} {% if issue.reason %} Reason: {{ issue.reason }} {% endif %} Pseudo config: {{ issue.config | to_text }} {% if not loop.last %} ------------------------------------------------ {% endif %} {% endfor %} {% endif %} {% if not loop.last %} --------8<--------8<--------8<--------8<-------- {% endif %} {% endfor %} {% if stats %} ==================== Summary =================== Total issues: Unspecified: {{ stats.UNSPECIFIED }} Low: {{ stats.LOW }} Medium: {{ stats.MEDIUM }} High: {{ stats.HIGH }} {% endif %} ================================================ FILE: gixy/formatters/text.py ================================================ from __future__ import absolute_import from gixy.formatters.base import BaseFormatter from gixy.formatters._jinja import load_template class TextFormatter(BaseFormatter): def __init__(self): super(TextFormatter, self).__init__() self.template = load_template('text.j2') def format_reports(self, reports, stats): return self.template.render(reports=reports, stats=stats) ================================================ FILE: gixy/parser/__init__.py ================================================ ================================================ FILE: gixy/parser/nginx_parser.py ================================================ import os import glob import logging import fnmatch from pyparsing import ParseException from gixy.core.exceptions import InvalidConfiguration from gixy.parser import raw_parser from gixy.directives import block, directive from gixy.utils.text import to_native LOG = logging.getLogger(__name__) class NginxParser(object): def __init__(self, cwd='', allow_includes=True): self.cwd = cwd self.configs = {} self.is_dump = False self.allow_includes = allow_includes self.directives = {} self.parser = raw_parser.RawParser() self._init_directives() def parse_file(self, path, root=None): LOG.debug("Parse file: {0}".format(path)) content = open(path).read() return self.parse(content=content, root=root, path_info=path) def parse(self, content, root=None, path_info=None): if not root: root = block.Root() try: parsed = self.parser.parse(content) except ParseException as e: error_msg = 'char {char} (line:{line}, col:{col})'.format(char=e.loc, line=e.lineno, col=e.col) if path_info: LOG.error('Failed to parse config "{file}": {error}'.format(file=path_info, error=error_msg)) else: LOG.error('Failed to parse config: {error}'.format(error=error_msg)) raise InvalidConfiguration(error_msg) if len(parsed) and parsed[0].getName() == 'file_delimiter': # Were parse nginx dump LOG.info('Switched to parse nginx configuration dump.') root_filename = self._prepare_dump(parsed) self.is_dump = True self.cwd = os.path.dirname(root_filename) parsed = self.configs[root_filename] self.parse_block(parsed, root) return root def parse_block(self, parsed_block, parent): for parsed in parsed_block: parsed_type = parsed.getName() parsed_name = parsed[0] parsed_args = parsed[1:] if parsed_type == 'include': # TODO: WTF?! self._resolve_include(parsed_args, parent) else: directive_inst = self.directive_factory(parsed_type, parsed_name, parsed_args) if directive_inst: parent.append(directive_inst) def directive_factory(self, parsed_type, parsed_name, parsed_args): klass = self._get_directive_class(parsed_type, parsed_name) if not klass: return None if klass.is_block: args = [to_native(v).strip() for v in parsed_args[0]] children = parsed_args[1] inst = klass(parsed_name, args) self.parse_block(children, inst) return inst else: args = [to_native(v).strip() for v in parsed_args] return klass(parsed_name, args) def _get_directive_class(self, parsed_type, parsed_name): if parsed_type in self.directives and parsed_name in self.directives[parsed_type]: return self.directives[parsed_type][parsed_name] elif parsed_type == 'block': return block.Block elif parsed_type == 'directive': return directive.Directive elif parsed_type == 'unparsed_block': LOG.warning('Skip unparseable block: "%s"', parsed_name) return None else: return None def _init_directives(self): self.directives['block'] = block.get_overrides() self.directives['directive'] = directive.get_overrides() def _resolve_include(self, args, parent): pattern = args[0] # TODO(buglloc): maybe file providers? if self.is_dump: return self._resolve_dump_include(pattern=pattern, parent=parent) if not self.allow_includes: LOG.debug('Includes are disallowed, skip: {0}'.format(pattern)) return return self._resolve_file_include(pattern=pattern, parent=parent) def _resolve_file_include(self, pattern, parent): path = os.path.join(self.cwd, pattern) exists = False for file_path in glob.iglob(path): if not os.path.exists(file_path): continue exists = True include = block.IncludeBlock('include', [file_path]) parent.append(include) self.parse_file(file_path, include) if not exists: LOG.warning('File not found: {0}'.format(path)) def _resolve_dump_include(self, pattern, parent): path = os.path.join(self.cwd, pattern) founded = False for file_path, parsed in self.configs.items(): if fnmatch.fnmatch(file_path, path): founded = True include = block.IncludeBlock('include', [file_path]) parent.append(include) self.parse_block(parsed, include) if not founded: LOG.warning("File not found: {0}".format(path)) def _prepare_dump(self, parsed_block): filename = '' root_filename = '' for parsed in parsed_block: if parsed.getName() == 'file_delimiter': if not filename: root_filename = parsed[0] filename = parsed[0] self.configs[filename] = [] continue self.configs[filename].append(parsed) return root_filename ================================================ FILE: gixy/parser/raw_parser.py ================================================ import logging import codecs import six from cached_property import cached_property from pyparsing import ( Literal, Suppress, White, Word, alphanums, Forward, Group, Optional, Combine, Keyword, OneOrMore, ZeroOrMore, Regex, QuotedString, nestedExpr, ParseResults) LOG = logging.getLogger(__name__) class NginxQuotedString(QuotedString): def __init__(self, quoteChar): super(NginxQuotedString, self).__init__(quoteChar, escChar='\\', multiline=True) # Nginx parse quoted values in special manner: # '^https?:\/\/yandex\.ru\/\00\'\"' -> ^https?:\/\/yandex\.ru\/\00'" # TODO(buglloc): research and find another special characters! self.escCharReplacePattern = '\\\\(\'|")' class RawParser(object): """ A class that parses nginx configuration with pyparsing """ def parse(self, data): """ Returns the parsed tree. """ if isinstance(data, six.binary_type): if data[:3] == codecs.BOM_UTF8: encoding = 'utf-8-sig' else: encoding = 'latin1' content = data.decode(encoding).strip() else: content = data.strip() if not content: return ParseResults() return self.script.parseString(content, parseAll=True) @cached_property def script(self): # constants left_bracket = Suppress("{") right_bracket = Suppress("}") semicolon = Suppress(";") space = White().suppress() keyword = Word(alphanums + ".+-_/") path = Word(alphanums + ".-_/") variable = Word("$_-" + alphanums) value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') value_sq = NginxQuotedString(quoteChar="'") value_dq = NginxQuotedString(quoteChar='"') value = (value_dq | value_sq | value_wq) # modifier for location uri [ = | ~ | ~* | ^~ ] location_modifier = ( Keyword("=") | Keyword("~*") | Keyword("~") | Keyword("^~")) # modifier for if statement if_modifier = Combine(Optional("!") + ( Keyword("=") | Keyword("~*") | Keyword("~") | (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) # This ugly workaround needed to parse unquoted regex with nested parentheses # so we capture all content between parentheses and then parse it :( # TODO(buglloc): may be use something better? condition_body = ( (if_modifier + Optional(space) + value) | (variable + Optional(space + if_modifier + Optional(space) + value)) ) condition = Regex(r'\((?:[^()\n\r\\]|(?:\(.*\))|(?:\\.))+?\)')\ .setParseAction(lambda s, l, t: condition_body.parseString(t[0][1:-1])) # rules include = ( Keyword("include") + space + value + semicolon )("include") directive = ( keyword + ZeroOrMore(space + value) + semicolon )("directive") file_delimiter = ( Suppress("# configuration file ") + path + Suppress(":") )("file_delimiter") comment = ( Regex(r"#.*") )("comment").setParseAction(_fix_comment) hash_value = Group( value + ZeroOrMore(space + value) + semicolon )("hash_value") generic_block = Forward() if_block = Forward() location_block = Forward() hash_block = Forward() unparsed_block = Forward() sub_block = OneOrMore(Group(if_block | location_block | hash_block | generic_block | include | directive | file_delimiter | comment | unparsed_block)) if_block << ( Keyword("if") + Group(condition) + Suppress(Optional(comment)) + Group( left_bracket + Optional(sub_block) + right_bracket) )("block") location_block << ( Keyword("location") + Group( Optional(space + location_modifier) + Optional(space) + value) + Suppress(Optional(comment)) + Group( left_bracket + Optional(sub_block) + right_bracket) )("block") hash_block << ( keyword + Group(OneOrMore(space + value)) + Group( left_bracket + Optional(OneOrMore(hash_value)) + right_bracket) )("block") generic_block << ( keyword + Group(ZeroOrMore(space + value)) + Suppress(Optional(comment)) + Group( left_bracket + Optional(sub_block) + right_bracket) )("block") unparsed_block << ( keyword + Group(ZeroOrMore(space + value)) + nestedExpr(opener="{", closer="}") )("unparsed_block") return sub_block def _fix_comment(string, location, tokens): """ Returns "cleared" comment text :param string: original parse string :param location: location in the string where matching started :param tokens: list of the matched tokens, packaged as a ParseResults_ object :return: list of the cleared comment tokens """ comment = tokens[0][1:].strip() return [comment] ================================================ FILE: gixy/plugins/__init__.py ================================================ ================================================ FILE: gixy/plugins/add_header_multiline.py ================================================ import gixy from gixy.plugins.plugin import Plugin class add_header_multiline(Plugin): """ Insecure example: add_header Content-Security-Policy " default-src: 'none'; img-src data: https://mc.yandex.ru https://yastatic.net *.yandex.net https://mc.yandex.${tld} https://mc.yandex.ru; font-src data: https://yastatic.net;"; """ summary = 'Found a multi-line header.' severity = gixy.severity.LOW description = ('Multi-line headers are deprecated (see RFC 7230). ' 'Some clients never supports them (e.g. IE/Edge).') help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/addheadermultiline.md' directives = ['add_header', 'more_set_headers'] def audit(self, directive): header_values = get_header_values(directive) for value in header_values: if '\n\x20' in value or '\n\t' in value: self.add_issue(directive=directive) break def get_header_values(directive): if directive.name == 'add_header': return [directive.args[1]] # See headers more documentation: https://github.com/openresty/headers-more-nginx-module#description result = [] skip_next = False for arg in directive.args: if arg in ['-s', '-t']: # Skip next value, because it's not a header skip_next = True elif arg.startswith('-'): # Skip any options pass elif skip_next: skip_next = False elif not skip_next: result.append(arg) return result ================================================ FILE: gixy/plugins/add_header_redefinition.py ================================================ import gixy from gixy.plugins.plugin import Plugin class add_header_redefinition(Plugin): """ Insecure example: server { add_header X-Content-Type-Options nosniff; location / { add_header X-Frame-Options DENY; } } """ summary = 'Nested "add_header" drops parent headers.' severity = gixy.severity.MEDIUM description = ('"add_header" replaces ALL parent headers. ' 'See documentation: http://nginx.org/en/docs/http/ngx_http_headers_module.html#add_header') help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/addheaderredefinition.md' directives = ['server', 'location', 'if'] options = {'headers': set(['x-frame-options', 'x-content-type-options', 'x-xss-protection', 'content-security-policy', 'cache-control']) } def __init__(self, config): super(add_header_redefinition, self).__init__(config) self.interesting_headers = self.config.get('headers') def audit(self, directive): if not directive.is_block: # Skip all not block directives return actual_headers = get_headers(directive) if not actual_headers: return for parent in directive.parents: parent_headers = get_headers(parent) if not parent_headers: continue diff = (parent_headers - actual_headers) & self.interesting_headers if len(diff): self._report_issue(directive, parent, diff) break def _report_issue(self, current, parent, diff): directives = [] # Add headers from parent level directives.extend(parent.find('add_header')) # Add headers from current level directives.extend(current.find('add_header')) reason = 'Parent headers "{headers}" was dropped in current level'.format(headers='", "'.join(diff)) self.add_issue(directive=directives, reason=reason) def get_headers(directive): headers = directive.find('add_header') if not headers: return set() return set(map(lambda d: d.header, headers)) ================================================ FILE: gixy/plugins/alias_traversal.py ================================================ import gixy from gixy.plugins.plugin import Plugin class alias_traversal(Plugin): """ Insecure example: location /files { alias /home/; } """ summary = 'Path traversal via misconfigured alias.' severity = gixy.severity.HIGH description = 'Using alias in a prefixed location that doesn\'t ends with directory separator could lead to path ' \ 'traversal vulnerability. ' help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/aliastraversal.md' directives = ['alias'] def audit(self, directive): for location in directive.parents: if location.name != 'location': continue if not location.modifier or location.modifier == '^~': # We need non-strict prefixed locations if not location.path.endswith('/'): self.add_issue( severity=gixy.severity.HIGH if directive.path.endswith('/') else gixy.severity.MEDIUM, directive=[directive, location] ) break ================================================ FILE: gixy/plugins/host_spoofing.py ================================================ import gixy from gixy.plugins.plugin import Plugin class host_spoofing(Plugin): """ Insecure example: proxy_set_header Host $http_host """ summary = 'The proxied Host header may be spoofed.' severity = gixy.severity.MEDIUM description = 'In most cases "$host" variable are more appropriate, just use it.' help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/hostspoofing.md' directives = ['proxy_set_header'] def audit(self, directive): name, value = directive.args if name.lower() != 'host': # Not a "Host" header return if value == '$http_host' or value.startswith('$arg_'): self.add_issue(directive=directive) ================================================ FILE: gixy/plugins/http_splitting.py ================================================ import gixy from gixy.plugins.plugin import Plugin from gixy.core.variable import compile_script class http_splitting(Plugin): r""" Insecure examples: rewrite ^ http://$host$uri; return 301 http://$host$uri; proxy_set_header "X-Original-Uri" $uri; proxy_pass http://upstream$document_uri; location ~ /proxy/(a|b)/(\W*)$ { set $path $2; proxy_pass http://storage/$path; } """ summary = 'Possible HTTP-Splitting vulnerability.' severity = gixy.severity.HIGH description = 'Using variables that can contain "\\n" or "\\r" may lead to http injection.' help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/httpsplitting.md' directives = ['rewrite', 'return', 'add_header', 'proxy_set_header', 'proxy_pass'] def audit(self, directive): value = _get_value(directive) if not value: return server_side = directive.name.startswith('proxy_') for var in compile_script(value): char = '' if var.can_contain('\n'): char = '\\n' elif not server_side and var.can_contain('\r'): char = '\\r' else: continue reason = 'At least variable "${var}" can contain "{char}"'.format(var=var.name, char=char) self.add_issue(directive=[directive] + var.providers, reason=reason) def _get_value(directive): if directive.name == 'proxy_pass' and len(directive.args) >= 1: return directive.args[0] elif len(directive.args) >= 2: return directive.args[1] return None ================================================ FILE: gixy/plugins/origins.py ================================================ import re import logging import gixy from gixy.plugins.plugin import Plugin from gixy.core.regexp import Regexp LOG = logging.getLogger(__name__) class origins(Plugin): r""" Insecure example: if ($http_referer !~ "^https?://([^/]+metrika.*yandex\.ru/"){ add_header X-Frame-Options SAMEORIGIN; } """ summary = 'Validation regex for "origin" or "referrer" matches untrusted domain.' severity = gixy.severity.MEDIUM description = 'Improve the regular expression to match only trusted referrers.' help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/origins.md' directives = ['if'] options = { 'domains': ['*'], 'https_only': False } def __init__(self, config): super(origins, self).__init__(config) if self.config.get('domains') and self.config.get('domains')[0] and self.config.get('domains')[0] != '*': domains = '|'.join(re.escape(d) for d in self.config.get('domains')) else: domains = r'[^/.]*\.[^/]{2,7}' scheme = 'https{http}'.format(http=('?' if not self.config.get('https_only') else '')) regex = r'^{scheme}://(?:[^/.]*\.){{0,10}}(?P{domains})(?::\d*)?(?:/|\?|$)'.format( scheme=scheme, domains=domains ) self.valid_re = re.compile(regex) def audit(self, directive): if directive.operand not in ['~', '~*', '!~', '!~*']: # Not regexp return if directive.variable not in ['$http_referer', '$http_origin']: # Not interesting return invalid_referers = set() regexp = Regexp(directive.value, case_sensitive=(directive.operand in ['~', '!~'])) for value in regexp.generate('/', anchored=True): if value.startswith('^'): value = value[1:] else: value = 'http://evil.com/' + value if value.endswith('$'): value = value[:-1] elif not value.endswith('/'): value += '.evil.com' valid = self.valid_re.match(value) if not valid or valid.group('domain') == 'evil.com': invalid_referers.add(value) if invalid_referers: invalid_referers = '", "'.join(invalid_referers) name = 'origin' if directive.variable == '$http_origin' else 'referrer' severity = gixy.severity.HIGH if directive.variable == '$http_origin' else gixy.severity.MEDIUM reason = 'Regex matches "{value}" as a valid {name}.'.format(value=invalid_referers, name=name) self.add_issue(directive=directive, reason=reason, severity=severity) ================================================ FILE: gixy/plugins/plugin.py ================================================ import gixy from gixy.core.issue import Issue class Plugin(object): summary = '' description = '' help_url = '' severity = gixy.severity.UNSPECIFIED directives = [] options = {} def __init__(self, config): self._issues = [] self.config = config def add_issue(self, directive, summary=None, severity=None, description=None, reason=None, help_url=None): self._issues.append(Issue(self, directives=directive, summary=summary, severity=severity, description=description, reason=reason, help_url=help_url)) def audit(self, directive): pass @property def issues(self): return self._issues @property def name(self): return self.__class__.__name__ ================================================ FILE: gixy/plugins/ssrf.py ================================================ import re import gixy from gixy.plugins.plugin import Plugin from gixy.core.context import get_context from gixy.core.variable import compile_script class ssrf(Plugin): """ Insecure examples: location ~ /proxy/(.*)/(.*)/(.*)$ { set $scheme $1; set $host $2; set $path $3; proxy_pass $scheme://$host/$path; } location /proxy/ { proxy_pass $arg_some; } """ summary = 'Possible SSRF (Server Side Request Forgery) vulnerability.' severity = gixy.severity.HIGH description = 'The configuration may allow attacker to create a arbitrary requests from the vulnerable server.' help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/ssrf.md' directives = ['proxy_pass'] def __init__(self, config): super(ssrf, self).__init__(config) self.parse_uri_re = re.compile(r'(?P[^?#/)]+://)?(?P[^?#/)]+)') def audit(self, directive): value = directive.args[0] if not value: return context = get_context() if context.block.name == 'location' and context.block.is_internal: # Exclude internal locations return parsed = self.parse_uri_re.match(value) if not parsed: return res = self._check_script(parsed.group('scheme'), directive) if not res: self._check_script(parsed.group('host'), directive) def _check_script(self, script, directive): for var in compile_script(script): if var.must_contain('/'): # Skip variable checks return False if var.can_contain('.'): # Yay! Our variable can contain any symbols! reason = 'At least variable "${var}" can contain untrusted user input'.format(var=var.name) self.add_issue(directive=[directive] + var.providers, reason=reason) return True return False ================================================ FILE: gixy/plugins/valid_referers.py ================================================ import gixy from gixy.plugins.plugin import Plugin class valid_referers(Plugin): """ Insecure example: valid_referers none server_names *.webvisor.com; """ summary = 'Used "none" as valid referer.' severity = gixy.severity.HIGH description = 'Never trust undefined referer.' help_url = 'https://github.com/yandex/gixy/blob/master/docs/en/plugins/validreferers.md' directives = ['valid_referers'] def audit(self, directive): if 'none' in directive.args: self.add_issue(directive=directive) ================================================ FILE: gixy/utils/__init__.py ================================================ ================================================ FILE: gixy/utils/text.py ================================================ from __future__ import absolute_import from six import PY3, text_type, binary_type def to_bytes(obj, encoding='latin1', errors='strict', nonstring='replace'): if isinstance(obj, binary_type): return obj if isinstance(obj, text_type): try: # Try this first as it's the fastest return obj.encode(encoding, errors) except UnicodeEncodeError: return b'failed_to_encode' if nonstring == 'simplerepr': try: value = str(obj) except UnicodeError: try: value = repr(obj) except UnicodeError: # Giving up return b'failed_to_encode' elif nonstring == 'passthru': return obj elif nonstring == 'replace': return b'failed_to_encode' elif nonstring == 'strict': raise TypeError('obj must be a string type') else: raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) return to_bytes(value, encoding, errors) def to_text(obj, encoding='latin1', errors='strict', nonstring='replace'): if isinstance(obj, text_type): return obj if isinstance(obj, binary_type): try: return obj.decode(encoding, errors) except UnicodeEncodeError: return u'failed_to_encode' if nonstring == 'simplerepr': try: value = str(obj) except UnicodeError: try: value = repr(obj) except UnicodeError: # Giving up return u'failed_to_encode' elif nonstring == 'passthru': return obj elif nonstring == 'replace': return u'failed_to_encode' elif nonstring == 'strict': raise TypeError('obj must be a string type') else: raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) return to_text(value, encoding, errors) if PY3: to_native = to_text else: to_native = to_bytes ================================================ FILE: requirements.dev.txt ================================================ nose>=1.3.7 mock>=2.0.0 coverage>=4.3 flake8>=3.2 tox>=2.7.0 ================================================ FILE: requirements.txt ================================================ pyparsing>=1.5.5,<3 cached-property>=1.2.0 argparse>=1.4.0 six>=1.1.0 Jinja2>=2.8 ConfigArgParse>=0.11.0 ================================================ FILE: rpm/gixy.spec ================================================ ######################################################################################## Summary: Nginx configuration static analyzer Name: gixy Version: 0.1.5 Release: 0%{?dist} License: MPLv2.0 Group: Development/Utilities URL: https://github.com/yandex/gixy Source: https://github.com/yandex/%{name}/archive/v%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildArch: noarch BuildRequires: python-devel python-setuptools Requires: python-setuptools python-six >= 1.1.0 python-jinja >= 2.8 Requires: python2-cached_property >= 1.2.0 python2-configargparse >= 0.11.0 Requires: python-argparse >= 1.4.0 pyparsing >= 1.5.5 python-markupsafe Provides: %{name} = %{verion}-%{release} ######################################################################################## %description Gixy is a tool to analyze Nginx configuration. The main goal of Gixy is to prevent misconfiguration and automate flaw detection. ######################################################################################## %prep %setup -qn %{name}-%{version} %clean rm -rf %{buildroot} %build python setup.py build %install rm -rf %{buildroot} python setup.py install --prefix=%{_prefix} \ --root=%{buildroot} ######################################################################################## %files %defattr(-,root,root,-) %doc LICENSE AUTHORS README.md docs/* %{python_sitelib}/* %{_bindir}/%{name} ######################################################################################## %changelog * Sun May 21 2017 Yandex Team - 0.1.5-0 - Supported Python 2.6 - Supported multiple config files scanning - Fixed summary count - Fixed symlink resolution - Minor improvements and fixes * Sun May 14 2017 Yandex Team - 0.1.4-0 - Allow processing stdin, file descriptors - Fixed configuration parser * Thu May 11 2017 Yandex Team - 0.1.3-0 - Uses english versions in plugins references * Tue May 02 2017 Yandex Team - 0.1.2-0 - Fixed blank comments parsing - Added "auth_request_set" directive * Sat Apr 29 2017 Yandex Team - 0.1.1-0 - Initial build ================================================ FILE: rpm/python-argparse.spec ================================================ ######################################################################################## %{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")} ######################################################################################## %define pkg_name argparse %define pkg_version r140 ######################################################################################## Summary: Python command-line parsing library Name: python-argparse Version: 1.4.0 Release: 0%{?dist} License: Python License Group: Development/Libraries URL: https://github.com/ThomasWaldmann/argparse Source: https://github.com/ThomasWaldmann/%{pkg_name}/archive/%{pkg_version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildArch: noarch BuildRequires: python >= 2.3 python-setuptools Requires: python >= 2.3 python-setuptools Provides: %{name} = %{verion}-%{release} ######################################################################################## %description The argparse module makes it easy to write user friendly command line interfaces. The program defines what arguments it requires, and argparse will figure out how to parse those out of sys.argv. The argparse module also automatically generates help and usage messages and issues errors when users give the program invalid arguments. As of Python >= 2.7 and >= 3.2, the argparse module is maintained within the Python standard library. For users who still need to support Python < 2.7 or < 3.2, it is also provided as a separate package, which tries to stay compatible with the module in the standard library, but also supports older Python versions. argparse is licensed under the Python license, for details see LICENSE.txt. ######################################################################################## %prep %setup -qn %{pkg_name}-%{pkg_version} %clean rm -rf %{buildroot} %build python setup.py build %install rm -rf %{buildroot} python setup.py install --prefix=%{_prefix} \ --single-version-externally-managed -O1 \ --root=%{buildroot} ######################################################################################## %files %defattr(-,root,root,-) %doc LICENSE.txt NEWS.txt README.txt %{python_sitelib}/* ######################################################################################## %changelog * Sat Apr 29 2017 Yandex Team - 1.4.0-0 - Initial build ================================================ FILE: setup.py ================================================ import re from setuptools import setup, find_packages with open('gixy/__init__.py', 'r') as fd: version = re.search(r'^version\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE).group(1) if not version: raise RuntimeError('Cannot find version information') setup( name='gixy', version=version, description='Nginx configuration [sec]analyzer', keywords='nginx security lint static-analysis', author='Yandex IS Team', author_email='buglloc@yandex.ru', url='https://github.com/yandex/gixy', install_requires=[ 'pyparsing>=1.5.5,<3', 'cached-property>=1.2.0', 'argparse>=1.4.0;python_version<"3.2"', 'six>=1.1.0', 'Jinja2>=2.8', 'ConfigArgParse>=0.11.0' ], entry_points={ 'console_scripts': ['gixy=gixy.cli.main:main'], }, test_suite='nose.collector', packages=find_packages(exclude=['tests', 'tests.*']), classifiers=[ 'Development Status :: 3 - Alpha', 'Environment :: Console', 'Intended Audience :: System Administrators', 'Intended Audience :: Developers', 'Topic :: Security', 'Topic :: Software Development :: Quality Assurance', 'Topic :: Software Development :: Testing' ], include_package_data=True ) ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/asserts.py ================================================ from nose.tools import assert_true, assert_false ''' Various nose.tools helpers that doesn't exists in Python 2.6 Unittest :( Must be removed with drop Python 2.6 support ''' def assert_is_instance(obj, cls, msg=None): """Same as assert_true(isinstance(obj, cls)), with a nicer default message.""" if not msg: msg = '{orig} is not an instance of {test}'.format(orig=type(obj), test=cls) assert_true(isinstance(obj, cls), msg=msg) def assert_is_none(obj, msg=None): """Same as assert_true(obj is None), with a nicer default message.""" if not msg: msg = '{orig!r} is not None'.format(orig=obj) assert_true(obj is None, msg=msg) def assert_is_not_none(obj, msg=None): """Same as assert_false(obj is None), with a nicer default message.""" if not msg: msg = '{orig!r} is None'.format(orig=obj) assert_false(obj is None, msg=msg) def assert_in(member, container, msg=None): """Just like assert_true(a in b), but with a nicer default message.""" if not msg: msg = '{member!r} not found in {container!r}'.format(member=member, container=container) assert_true(member in container, msg=msg) ================================================ FILE: tests/core/__init__.py ================================================ ================================================ FILE: tests/core/test_context.py ================================================ from nose.tools import with_setup, assert_equals, assert_not_equals, assert_true from gixy.core.context import get_context, pop_context, push_context, purge_context, CONTEXTS, Context from gixy.directives.block import Root from gixy.core.variable import Variable from gixy.core.regexp import Regexp def setup(): assert_equals(len(CONTEXTS), 0) def tear_down(): purge_context() @with_setup(setup, tear_down) def test_push_pop_context(): root_a = Root() push_context(root_a) assert_equals(len(CONTEXTS), 1) root_b = Root() push_context(root_b) assert_equals(len(CONTEXTS), 2) poped = pop_context() assert_equals(len(CONTEXTS), 1) assert_equals(poped.block, root_b) poped = pop_context() assert_equals(len(CONTEXTS), 0) assert_equals(poped.block, root_a) @with_setup(setup, tear_down) def test_push_get_purge_context(): root = Root() push_context(root) assert_equals(len(CONTEXTS), 1) assert_equals(get_context().block, root) root = Root() push_context(root) assert_equals(len(CONTEXTS), 2) assert_equals(get_context().block, root) purge_context() assert_equals(len(CONTEXTS), 0) @with_setup(setup, tear_down) def test_add_variables(): context = push_context(Root()) assert_equals(len(context.variables['index']), 0) assert_equals(len(context.variables['name']), 0) one_str_var = Variable('1') context.add_var('1', one_str_var) one_int_var = Variable(1) context.add_var(1, one_int_var) some_var = Variable('some') context.add_var('some', some_var) assert_equals(len(context.variables['index']), 1) assert_equals(context.variables['index'][1], one_int_var) assert_equals(len(context.variables['name']), 1) assert_equals(context.variables['name']['some'], some_var) context.clear_index_vars() assert_equals(len(context.variables['index']), 0) assert_equals(len(context.variables['name']), 1) assert_equals(context.variables['name']['some'], some_var) @with_setup(setup, tear_down) def test_get_variables(): context = push_context(Root()) assert_equals(len(context.variables['index']), 0) assert_equals(len(context.variables['name']), 0) one_var = Variable(1) context.add_var(1, one_var) some_var = Variable('some') context.add_var('some', some_var) assert_equals(context.get_var(1), one_var) assert_equals(context.get_var('some'), some_var) # Checks not existed variables, for now context may return None assert_equals(context.get_var(0), None) assert_equals(context.get_var('not_existed'), None) # Checks builtins variables assert_true(context.get_var('uri')) assert_true(context.get_var('document_uri')) assert_true(context.get_var('arg_asdsadasd')) assert_true(context.get_var('args')) @with_setup(setup, tear_down) def test_context_depend_variables(): push_context(Root()) assert_equals(len(get_context().variables['index']), 0) assert_equals(len(get_context().variables['name']), 0) get_context().add_var(1, Variable(1, value='one')) get_context().add_var('some', Variable('some', value='some')) assert_equals(get_context().get_var(1).value, 'one') assert_equals(get_context().get_var('some').value, 'some') # Checks top context variables are still exists push_context(Root()) assert_equals(get_context().get_var(1).value, 'one') assert_equals(get_context().get_var('some').value, 'some') # Checks variable overriding get_context().add_var('some', Variable('some', value='some_new')) get_context().add_var('foo', Variable('foo', value='foo')) assert_not_equals(get_context().get_var('some').value, 'some') assert_equals(get_context().get_var('some').value, 'some_new') assert_equals(get_context().get_var('foo').value, 'foo') assert_equals(get_context().get_var(1).value, 'one') # Checks variables after restore previous context pop_context() assert_not_equals(get_context().get_var('some').value, 'some_new') assert_equals(get_context().get_var('some').value, 'some') assert_equals(get_context().get_var('foo'), None) assert_equals(get_context().get_var(1).value, 'one') @with_setup(setup, tear_down) def test_push_failed_with_regexp_py35_gixy_10(): push_context(Root()) assert_equals(len(get_context().variables['index']), 0) assert_equals(len(get_context().variables['name']), 0) regexp = Regexp('^/some/(.*?)') for name, group in regexp.groups.items(): get_context().add_var(name, Variable(name=name, value=group)) push_context(Root()) ================================================ FILE: tests/core/test_regexp.py ================================================ from nose.tools import assert_true, assert_false, assert_equals from gixy.core.regexp import Regexp ''' CATEGORIES: sre_parse.CATEGORY_SPACE sre_parse.CATEGORY_NOT_SPACE sre_parse.CATEGORY_DIGIT sre_parse.CATEGORY_NOT_DIGIT sre_parse.CATEGORY_WORD sre_parse.CATEGORY_NOT_WORD ANY ''' def test_positive_contains(): cases = ( (r'[a-z]', 'a'), (r'[a-z]*', 'a'), (r'[a-z]*?', 'a'), (r'[a-z]+?', 'a'), (r'[a-z]', 'z'), (r'(?:a|b)', 'b'), (r'(/|:|[a-z])', 'g'), (r'[^a-z]', '/'), (r'[^a-z]', '\n'), (r'[^0]', '9'), (r'[^0-2]', '3'), (r'[^0123a-z]', '9'), (r'\s', '\x20'), (r'[^\s]', 'a'), (r'\d', '1'), (r'[^\d]', 'b'), (r'\w', '_'), (r'[^\w]', '\n'), (r'\W', '\n'), (r'[^\W]', 'a'), (r'.', 'a') ) for case in cases: regexp, char = case yield check_positive_contain, regexp, char def test_negative_contains(): cases = ( ('[a-z]', '1'), ('[a-z]*', '2'), ('[a-z]*?', '3'), ('[a-z]+?', '4'), ('[a-z]', '\n'), ('(?:a|b)', 'c'), ('(/|:|[a-z])', '\n'), ('[^a-z]', 'a'), ('[^0]', '0'), ('[^0-2]', '0'), ('[^0123a-z]', 'z'), (r'\s', 'a'), (r'[^\s]', '\n'), (r'\d', 'f'), (r'[^\d]', '2'), (r'\w', '\n'), (r'[^\w]', '_'), (r'\W', 'a'), (r'[^\W]', '\n'), (r'.', '\n') ) for case in cases: regexp, char = case yield check_negative_contain, regexp, char def test_groups_names(): cases = ( ('foo', [0]), ('(1)(2)(?:3)', [0, 1, 2]), ('(1)((2)|(?:3))', [0, 1, 2, 3]), ("(?'pcre_7'1as)(?P(?2)|(?:3))", [0, 1, 2, 3, 'pcre_7', 'outer', 'inner']), ('/proxy/(?.*)$', [0, 1, 'proxy']) ) for case in cases: regexp, groups = case yield check_groups_names, regexp, groups def test_to_string(): cases = ( (r'foo', 'foo'), (r'(1)(2)(?:3)', '(1)(2)(?:3)'), (r'(1)((2)|(?:3))', '(1)((?:(2)|(?:3)))'), (r'\w|1|3-5|[a-z]', '(?:[\w]|1|3\\-5|[a-z])'), (r'(1|(?:3)|([4-6]))', '((?:1|(?:3)|([4-6])))'), (r'(1|(?:3)|(?P[4-6]))', '((?:1|(?:3)|([4-6])))'), (r'^sss', '^sss'), (r'(^bb|11)$', '((?:^bb|11))$'), (r'(http|https)', '(http(?:|s))'), (r'1*', '1*'), (r'1*?', '1*?'), (r'1+', '1+'), ) for case in cases: regexp, string = case yield check_to_string, regexp, string def test_positive_startswith(): cases = ( (r'foo', 'q', False), (r'foo', 'f', True), (r'^foo', 'f', False), (r'(^foo)', 'f', False), (r'(^foo)', 'f', True), (r'(^foo|g)', 'f', True), (r'(^foo|g)', 'g', True), (r'(^foo|g)', 'q', False), (r'^[^/]+', '\n', True), (r'/[^/]+', '/', True), (r'((a))', 'a', False), (r'((a))', 'b', False), (r'^[a-z]{0}0', '0', False), (r'^[a-z]{1}0', 'a', False), ) for case in cases: regexp, check, strict = case yield check_positive_startswith, regexp, check, strict def test_negative_startswith(): cases = ( (r'foo', '\n', False), (r'foo', 'o', True), (r'^foo', 'o', False), (r'(^foo)', 'q', False), (r'(^foo)', 'q', True), (r'(^foo|g)', 'q', True), (r'(^foo|g)', 'o', True), (r'(^foo|g)', '\n', False), (r'^[^/]+', '/', True), (r'/[^/]+', 'a', True), (r'((abc)|(ss))', 'b', True), (r'^[a-z]{0}0', 'a', False), (r'^[a-z]{0}0', 'g', False), ) for case in cases: regexp, check, strict = case yield check_negative_startswith, regexp, check, strict def test_positive_must_contain(): cases = ( (r'abc', 'a'), (r'abc', 'b'), (r'abc', 'c'), (r'3+', '3'), (r'[0]', '0'), (r'([0])', '0'), (r'(?:[0])', '0'), (r'(?:[0])|0|((((0))))', '0'), ) for case in cases: regexp, char = case yield check_positive_must_contain, regexp, char def test_negative_must_contain(): cases = ( (r'[a-z]', '1'), (r'2{0}1', '2'), (r'3?', '3'), (r'3*', '3'), (r'3*?', '3'), (r'3+a', 'b'), (r'[a-z]', 'a'), (r'(?:a|b)', 'a'), (r'(?:a|b)', 'b'), (r'(/|:|[a-z])', '/'), (r'(/|:|[a-z])', 'z'), (r'[^a-z]', '\n'), (r'[^0]', '0'), (r'[^0-2]', '0'), (r'[^0123a-z]', 'z'), (r'\s', '\x20'), (r'[^\s]', '\n'), (r'\d', '3'), (r'[^\d]', 'a'), (r'\w', 'a'), (r'[^\w]', '\n'), (r'\W', '\n'), (r'[^\W]', 'a'), (r'.', '\n') ) for case in cases: regexp, char = case yield check_negative_must_contain, regexp, char def test_positive_must_startswith(): cases = ( (r'foo', 'f', True), (r'^foo', 'f', False), (r'(^foo)', 'f', True), (r'^((a))', 'a', False), (r'((a))', 'a', True), (r'^[a-z]{0}0', '0', False), (r'^a{1}0', 'a', False), ) for case in cases: regexp, check, strict = case yield check_positive_must_startswith, regexp, check, strict def test_negative_must_startswith(): cases = ( (r'foo', 'o', False), (r'^foo', 'o', False), (r'(^foo)', 'o', False), (r'[a-z]', '1', True), (r'[a-z]', 'a', True), (r'/[^/]+', 'a', True), (r'3?', '3', True), (r'3*', '3', True), (r'3*?', '3', True), (r'3+a', 'b', True), (r'^((a))', 'b', False), (r'((a))', 'a', False), (r'^a{0}0', 'a', False), ) for case in cases: regexp, check, strict = case yield check_negative_must_startswith, regexp, check, strict def test_generate(): cases = ( (r'foo', ['foo']), (r'^sss', ['^sss']), (r'(1)(2)(3)', ['123']), (r'(1)((2)|(?:3))', ['12', '13']), (r'(^1?2?|aa/)', ['^', '^1', '^2', '^12', 'aa/']), (r'^https?://yandex.ru', ['^http://yandex|ru', '^https://yandex|ru']), (r'(^bb|11)$', ['^bb$', '11$']), (r'(http|https)', ['http', 'https']), (r'1*', ['', '11111']), (r'1*?', ['', '11111']), (r'1[0]?2', ['102', '12']), (r'1[0]2', ['102']), (r'1+', ['11111']), (r'[^/]?', ['', '|']), (r'^http://(foo|bar)|baz', ['^http://foo', '^http://bar', 'baz']), (r'[^\x00-\x7b|\x7e-\xff]', ['\x7d']), (r'(a|b|c)', ['a', 'b', 'c']), (r'[xyz]', ['x', 'y', 'z']) ) for case in cases: regexp, values = case yield check_generate, regexp, values def test_strict_generate(): reg = Regexp('^foo|bar', strict=True) assert_equals(sorted(reg.generate('|', anchored=True)), sorted(['^foo', '^bar'])) def test_gen_anchor(): reg = Regexp('^some$') val = next(reg.generate('', anchored=False)) assert_equals(val, 'some') reg = Regexp('^some$') val = next(reg.generate('', anchored=True)) assert_equals(val, '^some$') reg = Regexp('^some$', strict=True) val = next(reg.generate('', anchored=False)) assert_equals(val, 'some') reg = Regexp('^some$', strict=True) val = next(reg.generate('', anchored=True)) assert_equals(val, '^some$') def test_group_can_contains(): source = '/some/(?P[^/:.]+)/' reg = Regexp(source) assert_true(reg.can_contain('\n'), 'Whole regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_true(reg.group(0).can_contain('\n'), 'Group 0 from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_true(reg.group('action').can_contain('\n'), 'Group "action" from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_true(reg.group(1).can_contain('\n'), 'Group 1 from regex "{src}" can contains {sym!r}'.format(src=source, sym='\\n')) assert_false(reg.group('action').can_contain('/'), 'Group "action" from regex "{src}" CAN\'T (!) contain {sym!r}'.format(src=source, sym='/')) def check_positive_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_true(reg.can_contain(char), '{reg!r} should contain {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_true(reg.can_contain(char), '{reg!r} (case insensitive) should contain {chr!r}'.format(reg=regexp, chr=char)) def check_negative_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_false(reg.can_contain(char), '{reg!r} should not contain {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_false(reg.can_contain(char), '{reg!r} (case insensitive) should not contain {chr!r}'.format(reg=regexp, chr=char)) def check_positive_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_true(reg.can_startswith(char), '{reg!r} can start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_true(reg.can_startswith(char), '{reg!r} (case insensitive) can start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_negative_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_false(reg.can_startswith(char), '{reg!r} can\'t start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_false(reg.can_startswith(char), '{reg!r} (case insensitive) can\'t start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_groups_names(regexp, groups): reg = Regexp(regexp) assert_equals(set(reg.groups.keys()), set(groups)) def check_to_string(regexp, string): reg = Regexp(regexp) assert_equals(str(reg), string) def check_positive_must_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_true(reg.must_contain(char), '{reg!r} must contain with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_true(reg.must_contain(char), '{reg!r} (case insensitive) must contain with {chr!r}'.format(reg=regexp, chr=char)) def check_negative_must_contain(regexp, char): reg = Regexp(regexp, case_sensitive=True) assert_false(reg.must_contain(char), '{reg!r} must NOT contain with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False) char = char.upper() assert_false(reg.must_contain(char), '{reg!r} (case insensitive) must NOT contain with {chr!r}'.format(reg=regexp, chr=char)) def check_positive_must_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_true(reg.must_startswith(char), '{reg!r} MUST start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_true(reg.must_startswith(char), '{reg!r} (case insensitive) MUST start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_negative_must_startswith(regexp, char, strict): reg = Regexp(regexp, case_sensitive=True, strict=strict) assert_false(reg.must_startswith(char), '{reg!r} MUST NOT start\'s with {chr!r}'.format(reg=regexp, chr=char)) reg = Regexp(regexp, case_sensitive=False, strict=strict) char = char.upper() assert_false(reg.must_startswith(char), '{reg!r} (case insensitive) MUST NOT start\'s with {chr!r}'.format(reg=regexp, chr=char)) def check_generate(regexp, values): reg = Regexp(regexp) assert_equals(sorted(reg.generate('|', anchored=True)), sorted(values)) ================================================ FILE: tests/core/test_variable.py ================================================ from nose.tools import assert_true, assert_false, assert_equals, with_setup from gixy.core.context import get_context, push_context, purge_context from gixy.directives.block import Root from gixy.core.regexp import Regexp from gixy.core.variable import Variable def setup(): push_context(Root()) def tear_down(): purge_context() @with_setup(setup, tear_down) def test_literal(): var = Variable(name='simple', value='$uri', have_script=False) assert_false(var.depends) assert_false(var.regexp) assert_equals(var.value, '$uri') assert_false(var.can_startswith('$')) assert_false(var.can_contain('i')) assert_true(var.must_contain('$')) assert_true(var.must_contain('u')) assert_false(var.must_contain('a')) assert_true(var.must_startswith('$')) assert_false(var.must_startswith('u')) @with_setup(setup, tear_down) def test_regexp(): var = Variable(name='simple', value=Regexp('^/.*')) assert_false(var.depends) assert_true(var.regexp) assert_true(var.can_startswith('/')) assert_false(var.can_startswith('a')) assert_true(var.can_contain('a')) assert_false(var.can_contain('\n')) assert_true(var.must_contain('/')) assert_false(var.must_contain('a')) assert_true(var.must_startswith('/')) assert_false(var.must_startswith('a')) @with_setup(setup, tear_down) def test_script(): get_context().add_var('foo', Variable(name='foo', value=Regexp('.*'))) var = Variable(name='simple', value='/$foo') assert_true(var.depends) assert_false(var.regexp) assert_false(var.can_startswith('/')) assert_false(var.can_startswith('a')) assert_true(var.can_contain('/')) assert_true(var.can_contain('a')) assert_false(var.can_contain('\n')) assert_true(var.must_contain('/')) assert_false(var.must_contain('a')) assert_true(var.must_startswith('/')) assert_false(var.must_startswith('a')) @with_setup(setup, tear_down) def test_regexp_boundary(): var = Variable(name='simple', value=Regexp('.*'), boundary=Regexp('/[a-z]', strict=True)) assert_false(var.depends) assert_true(var.regexp) assert_true(var.can_startswith('/')) assert_false(var.can_startswith('a')) assert_false(var.can_contain('/')) assert_true(var.can_contain('a')) assert_false(var.can_contain('0')) assert_false(var.can_contain('\n')) assert_true(var.must_contain('/')) assert_false(var.must_contain('a')) assert_true(var.must_startswith('/')) assert_false(var.must_startswith('a')) @with_setup(setup, tear_down) def test_script_boundary(): get_context().add_var('foo', Variable(name='foo', value=Regexp('.*'), boundary=Regexp('[a-z]', strict=True))) var = Variable(name='simple', value='/$foo', boundary=Regexp('[/a-z0-9]', strict=True)) assert_true(var.depends) assert_false(var.regexp) assert_false(var.can_startswith('/')) assert_false(var.can_startswith('a')) assert_false(var.can_contain('/')) assert_true(var.can_contain('a')) assert_false(var.can_contain('\n')) assert_false(var.can_contain('0')) assert_true(var.must_contain('/')) assert_false(var.must_contain('a')) assert_true(var.must_startswith('/')) assert_false(var.must_startswith('a')) ================================================ FILE: tests/directives/__init__.py ================================================ ================================================ FILE: tests/directives/test_block.py ================================================ from nose.tools import assert_equals, assert_true, assert_false from tests.asserts import assert_is_instance, assert_is_none, assert_is_not_none from gixy.parser.nginx_parser import NginxParser from gixy.directives.block import * # TODO(buglloc): what about include block? def _get_parsed(config): root = NginxParser(cwd='', allow_includes=False).parse(config) return root.children[0] def test_block(): config = 'some {some;}' directive = _get_parsed(config) assert_is_instance(directive, Block) assert_true(directive.is_block) assert_true(directive.self_context) assert_false(directive.provide_variables) def test_http(): config = ''' http { default_type application/octet-stream; sendfile on; keepalive_timeout 65; } ''' directive = _get_parsed(config) assert_is_instance(directive, HttpBlock) assert_true(directive.is_block) assert_true(directive.self_context) assert_false(directive.provide_variables) def test_server(): config = ''' server { listen 80; server_name _; server_name cool.io; } ''' directive = _get_parsed(config) assert_is_instance(directive, ServerBlock) assert_true(directive.is_block) assert_true(directive.self_context) assert_equals([d.args[0] for d in directive.get_names()], ['_', 'cool.io']) assert_false(directive.provide_variables) def test_location(): config = ''' location / { } ''' directive = _get_parsed(config) assert_is_instance(directive, LocationBlock) assert_true(directive.is_block) assert_true(directive.self_context) assert_true(directive.provide_variables) assert_is_none(directive.modifier) assert_equals(directive.path, '/') assert_false(directive.is_internal) def test_location_internal(): config = ''' location / { internal; } ''' directive = _get_parsed(config) assert_is_instance(directive, LocationBlock) assert_true(directive.is_internal) def test_location_modifier(): config = ''' location = / { } ''' directive = _get_parsed(config) assert_is_instance(directive, LocationBlock) assert_equals(directive.modifier, '=') assert_equals(directive.path, '/') def test_if(): config = ''' if ($some) { } ''' directive = _get_parsed(config) assert_is_instance(directive, IfBlock) assert_true(directive.is_block) assert_false(directive.self_context) assert_false(directive.provide_variables) assert_equals(directive.variable, '$some') assert_is_none(directive.operand) assert_is_none(directive.value) def test_if_modifier(): config = ''' if (-f /some) { } ''' directive = _get_parsed(config) assert_is_instance(directive, IfBlock) assert_equals(directive.operand, '-f') assert_equals(directive.value, '/some') assert_is_none(directive.variable) def test_if_variable(): config = ''' if ($http_some = '/some') { } ''' directive = _get_parsed(config) assert_is_instance(directive, IfBlock) assert_equals(directive.variable, '$http_some') assert_equals(directive.operand, '=') assert_equals(directive.value, '/some') def test_block_some_flat(): config = ''' some { default_type application/octet-stream; sendfile on; if (-f /some/) { keepalive_timeout 65; } } ''' directive = _get_parsed(config) for d in ['default_type', 'sendfile', 'keepalive_timeout']: c = directive.some(d, flat=True) assert_is_not_none(c) assert_equals(c.name, d) def test_block_some_not_flat(): config = ''' some { default_type application/octet-stream; sendfile on; if (-f /some/) { keepalive_timeout 65; } } ''' directive = _get_parsed(config) c = directive.some('keepalive_timeout', flat=False) assert_is_none(c) def test_block_find_flat(): config = ''' some { directive 1; if (-f /some/) { directive 2; } } ''' directive = _get_parsed(config) finds = directive.find('directive', flat=True) assert_equals(len(finds), 2) assert_equals([x.name for x in finds], ['directive', 'directive']) assert_equals([x.args[0] for x in finds], ['1', '2']) def test_block_find_not_flat(): config = ''' some { directive 1; if (-f /some/) { directive 2; } } ''' directive = _get_parsed(config) finds = directive.find('directive', flat=False) assert_equals(len(finds), 1) assert_equals([x.name for x in finds], ['directive']) assert_equals([x.args[0] for x in finds], ['1']) def test_block_map(): config = ''' map $some_var $some_other_var { a b; default c; } ''' directive = _get_parsed(config) assert_is_instance(directive, MapBlock) assert_true(directive.is_block) assert_false(directive.self_context) assert_true(directive.provide_variables) assert_equals(directive.variable, 'some_other_var') def test_block_geo_two_vars(): config = ''' geo $some_var $some_other_var { 1.2.3.4 b; default c; } ''' directive = _get_parsed(config) assert_is_instance(directive, GeoBlock) assert_true(directive.is_block) assert_false(directive.self_context) assert_true(directive.provide_variables) assert_equals(directive.variable, 'some_other_var') def test_block_geo_one_var(): config = ''' geo $some_var { 5.6.7.8 d; default e; } ''' directive = _get_parsed(config) assert_is_instance(directive, GeoBlock) assert_true(directive.is_block) assert_false(directive.self_context) assert_true(directive.provide_variables) assert_equals(directive.variable, 'some_var') ================================================ FILE: tests/directives/test_directive.py ================================================ from nose.tools import assert_equals, assert_false, assert_true from tests.asserts import assert_is_instance from gixy.parser.nginx_parser import NginxParser from gixy.directives.directive import * def _get_parsed(config): root = NginxParser(cwd='', allow_includes=False).parse(config) return root.children[0] def test_directive(): config = 'some "foo" "bar";' directive = _get_parsed(config) assert_is_instance(directive, Directive) assert_equals(directive.name, 'some') assert_equals(directive.args, ['foo', 'bar']) assert_equals(str(directive), 'some foo bar;') def test_add_header(): config = 'add_header "X-Foo" "bar";' directive = _get_parsed(config) assert_is_instance(directive, AddHeaderDirective) assert_equals(directive.name, 'add_header') assert_equals(directive.args, ['X-Foo', 'bar']) assert_equals(directive.header, 'x-foo') assert_equals(directive.value, 'bar') assert_false(directive.always) assert_equals(str(directive), 'add_header X-Foo bar;') def test_add_header_always(): config = 'add_header "X-Foo" "bar" always;' directive = _get_parsed(config) assert_is_instance(directive, AddHeaderDirective) assert_equals(directive.name, 'add_header') assert_equals(directive.args, ['X-Foo', 'bar', 'always']) assert_equals(directive.header, 'x-foo') assert_equals(directive.value, 'bar') assert_true(directive.always) assert_equals(str(directive), 'add_header X-Foo bar always;') def test_set(): config = 'set $foo bar;' directive = _get_parsed(config) assert_is_instance(directive, SetDirective) assert_equals(directive.name, 'set') assert_equals(directive.args, ['$foo', 'bar']) assert_equals(directive.variable, 'foo') assert_equals(directive.value, 'bar') assert_equals(str(directive), 'set $foo bar;') assert_true(directive.provide_variables) def test_rewrite(): config = 'rewrite ^ http://some;' directive = _get_parsed(config) assert_is_instance(directive, RewriteDirective) assert_equals(directive.name, 'rewrite') assert_equals(directive.args, ['^', 'http://some']) assert_equals(str(directive), 'rewrite ^ http://some;') assert_true(directive.provide_variables) assert_equals(directive.pattern, '^') assert_equals(directive.replace, 'http://some') assert_equals(directive.flag, None) def test_rewrite_flags(): config = 'rewrite ^/(.*)$ http://some/$1 redirect;' directive = _get_parsed(config) assert_is_instance(directive, RewriteDirective) assert_equals(directive.name, 'rewrite') assert_equals(directive.args, ['^/(.*)$', 'http://some/$1', 'redirect']) assert_equals(str(directive), 'rewrite ^/(.*)$ http://some/$1 redirect;') assert_true(directive.provide_variables) assert_equals(directive.pattern, '^/(.*)$') assert_equals(directive.replace, 'http://some/$1') assert_equals(directive.flag, 'redirect') def test_root(): config = 'root /var/www/html;' directive = _get_parsed(config) assert_is_instance(directive, RootDirective) assert_equals(directive.name, 'root') assert_equals(directive.args, ['/var/www/html']) assert_equals(str(directive), 'root /var/www/html;') assert_true(directive.provide_variables) assert_equals(directive.path, '/var/www/html') ================================================ FILE: tests/parser/__init__.py ================================================ ================================================ FILE: tests/parser/test_nginx_parser.py ================================================ from nose.tools import assert_equal from tests.asserts import assert_is_instance from gixy.parser.nginx_parser import NginxParser from gixy.directives.directive import * from gixy.directives.block import * def _parse(config): return NginxParser(cwd='', allow_includes=False).parse(config) def test_directive(): configs = [ 'access_log syslog:server=127.0.0.1,tag=nginx_sentry toolsformat;', 'user http;', 'internal;', 'set $foo "bar";', "set $foo 'bar';", 'proxy_pass http://unix:/run/sock.socket;', 'rewrite ^/([a-zA-Z0-9]+)$ /$1/${arg_v}.pb break;' ] expected = [ [Directive], [Directive], [Directive], [Directive, SetDirective], [Directive], [Directive, RewriteDirective] ] for i, config in enumerate(configs): return assert_config, config, expected[i] def test_blocks(): configs = [ 'if (-f /some) {}', 'location / {}' ] expected = [ [Directive, Block, IfBlock], [Directive, Block, LocationBlock], ] for i, config in enumerate(configs): yield assert_config, config, expected[i] def test_dump_simple(): config = ''' # configuration file /etc/nginx/nginx.conf: http { include sites/*.conf; } # configuration file /etc/nginx/conf.d/listen: listen 80; # configuration file /etc/nginx/sites/default.conf: server { include conf.d/listen; } ''' tree = _parse(config) assert_is_instance(tree, Directive) assert_is_instance(tree, Block) assert_is_instance(tree, Root) assert_equal(len(tree.children), 1) http = tree.children[0] assert_is_instance(http, Directive) assert_is_instance(http, Block) assert_is_instance(http, HttpBlock) assert_equal(len(http.children), 1) include_server = http.children[0] assert_is_instance(include_server, Directive) assert_is_instance(include_server, IncludeBlock) assert_equal(include_server.file_path, '/etc/nginx/sites/default.conf') assert_equal(len(include_server.children), 1) server = include_server.children[0] assert_is_instance(server, Directive) assert_is_instance(server, Block) assert_is_instance(server, ServerBlock) assert_equal(len(server.children), 1) include_listen = server.children[0] assert_is_instance(include_listen, Directive) assert_is_instance(include_listen, IncludeBlock) assert_equal(include_listen.file_path, '/etc/nginx/conf.d/listen') assert_equal(len(include_listen.children), 1) listen = include_listen.children[0] assert_is_instance(listen, Directive) assert_equal(listen.args, ['80']) def test_encoding(): configs = [ 'bar "\xD1\x82\xD0\xB5\xD1\x81\xD1\x82";' ] for i, config in enumerate(configs): _parse(config) def assert_config(config, expected): tree = _parse(config) assert_is_instance(tree, Directive) assert_is_instance(tree, Block) assert_is_instance(tree, Root) child = tree.children[0] for ex in expected: assert_is_instance(child, ex) ================================================ FILE: tests/parser/test_raw_parser.py ================================================ from nose.tools import assert_equals from gixy.parser.raw_parser import * def test_directive(): config = ''' access_log syslog:server=127.0.0.1,tag=nginx_sentry toolsformat; user http; internal; set $foo "bar"; set $foo 'bar'; proxy_pass http://unix:/run/sock.socket; rewrite ^/([a-zA-Z0-9]+)$ /$1/${arg_v}.pb break; server_name some.tld ~^(www\.)?podberi.(?:ru|com|ua)$ ~^(www\.)?guru.yandex.ru$; ''' expected = [ ['access_log', 'syslog:server=127.0.0.1,tag=nginx_sentry', 'toolsformat'], ['user', 'http'], ['internal'], ['set', '$foo', 'bar'], ['set', '$foo', 'bar'], ['proxy_pass', 'http://unix:/run/sock.socket'], ['rewrite', '^/([a-zA-Z0-9]+)$', '/$1/${arg_v}.pb', 'break'], ['server_name', 'some.tld', '~^(www\.)?podberi.(?:ru|com|ua)$', '~^(www\.)?guru.yandex.ru$'] ] assert_config(config, expected) def test_block(): config = ''' http { } ''' expected = [['http', [], []]] assert_config(config, expected) def test_block_with_child(): config = ''' http { gzip on; } ''' expected = [['http', [], [['gzip', 'on']]]] assert_config(config, expected) def test_location_simple(): config = ''' location / { } location = /foo { } location ~ ^/bar { } location ~* ^/baz$ { } location ^~ ^/bazz { } # Whitespace may be omitted:(( location ~\.(js|css)$ { } ''' expected = [['location', ['/'], []], ['location', ['=', '/foo'], []], ['location', ['~', '^/bar'], []], ['location', ['~*', '^/baz$'], []], ['location', ['^~', '^/bazz'], []], ['Whitespace may be omitted:(('], ['location', ['~', '\.(js|css)$'], []]] assert_config(config, expected) def test_quoted_strings(): config = ''' some_sq '\\'la\\.\\/\\"'; some_dq '\\'la\\.\\/\\"'; ''' expected = [['some_sq', '\'la\\.\\/\"'], ['some_dq', '\'la\\.\\/\"']] assert_config(config, expected) def test_location_child(): config = ''' location = /foo { proxy_pass http://unix:/run/sock.socket; } ''' expected = [['location', ['=', '/foo'], [ ['proxy_pass', 'http://unix:/run/sock.socket'] ]]] assert_config(config, expected) def test_nested_location(): config = ''' location ~* ^/foo { location = /foo/bar { internal; proxy_pass http://any.yandex.ru; } location = /foo/baz { proxy_pass upstream; } } ''' expected = [['location', ['~*', '^/foo'], [ ['location', ['=', '/foo/bar'], [ ['internal'], ['proxy_pass', 'http://any.yandex.ru'] ]], ['location', ['=', '/foo/baz'], [ ['proxy_pass', 'upstream'] ]], ]]] assert_config(config, expected) def test_hash_block(): config = ''' geo $geo { default 0; 127.0.0.1 2; 192.168.1.0/24 1; 10.1.0.0/16 1; ::1 2; 2001:0db8::/32 1; } ''' expected = [['geo', ['$geo'], [ ['default', '0'], ['127.0.0.1', '2'], ['192.168.1.0/24', '1'], ['10.1.0.0/16', '1'], ['::1', '2'], ['2001:0db8::/32', '1'] ]]] assert_config(config, expected) def test_hash_block_in_location(): config = ''' location /iphone/ { types { text/html html htm shtml; application/json json; application/rss+xml rss; text/vnd.sun.j2me.app-descriptor jad; } } ''' expected = [['location', ['/iphone/'], [ ['types', [], [ ['text/html', 'html', 'htm', 'shtml'], ['application/json', 'json'], ['application/rss+xml', 'rss'], ['text/vnd.sun.j2me.app-descriptor', 'jad'] ]], ]]] assert_config(config, expected) def test_named_location(): config = ''' location @foo { proxy_pass http://any.yandex.ru; } ''' expected = [['location', ['@foo'], [ ['proxy_pass', 'http://any.yandex.ru'] ]]] assert_config(config, expected) def test_if(): config = ''' # http://nginx.org/ru/docs/http/ngx_http_rewrite_module.html#if if ($http_user_agent ~ MSIE) { rewrite ^(.*)$ /msie/$1 break; } if ($http_cookie ~* "id=([^;]+)(?:;|$)") { set $id $1; } if ($request_method = POST) { return 405; } if ($slow) { limit_rate 10k; } if ($invalid_referer) { return 403; } if (!-e "/var/data/$dataset") { return 503; } if ($https_or_slb = (by_\(sl\)b|https)) { } if ($host ~* (lori|rage2)\.yandex\.(ru|ua|com|com\.tr)) { set $x_frame_options ALLOW; } if ($request_filename ~* ^.*?/(\d+_)([^/]+)$) { } if ($http_user_agent ~* "^WordPress.*; verifying pingback") { } if ($foo = "BAR") { rewrite ^(.*)$ /bar; } ''' expected = [ ['http://nginx.org/ru/docs/http/ngx_http_rewrite_module.html#if'], ['if', ['$http_user_agent', '~', 'MSIE'], [ ['rewrite', '^(.*)$', '/msie/$1', 'break'] ]], ['if', ['$http_cookie', '~*', 'id=([^;]+)(?:;|$)'], [ ['set', '$id', '$1'] ]], ['if', ['$request_method', '=', 'POST'], [ ['return', '405'] ]], ['if', ['$slow'], [ ['limit_rate', '10k'] ]], ['if', ['$invalid_referer'], [ ['return', '403'] ]], ['if', ['!-e', '/var/data/$dataset'], [ ['return', '503'] ]], ['if', ['$https_or_slb', '=', '(by_\(sl\)b|https)'], [ ]], ['if', ['$host', '~*', '(lori|rage2)\.yandex\.(ru|ua|com|com\.tr)'], [ ['set', '$x_frame_options', 'ALLOW'] ]], ['if', ['$request_filename', '~*', '^.*?/(\d+_)([^/]+)$'], [ ]], ['if', ['$http_user_agent', '~*', '^WordPress.*; verifying pingback'], [ ]], ['if', ['$foo', '=', 'BAR'], [ ['rewrite', '^(.*)$', '/bar'] ]] ] assert_config(config, expected) def test_hash_block_map(): config = ''' # http://nginx.org/ru/docs/http/ngx_http_map_module.html map $http_host $name { hostnames; default 0; example.com 1; *.example.com 1; example.org 2; *.example.org 2; .example.net 3; wap.* 4; } map $http_user_agent $mobile { default 0; "~Opera Mini" 1; } ''' expected = [ ['http://nginx.org/ru/docs/http/ngx_http_map_module.html'], ['map', ['$http_host', '$name'], [ ['hostnames'], ['default', '0'], ['example.com', '1'], ['*.example.com', '1'], ['example.org', '2'], ['*.example.org', '2'], ['.example.net', '3'], ['wap.*', '4'], ]], ['map', ['$http_user_agent', '$mobile'], [ ['default', '0'], ['~Opera Mini', '1'], ]] ] assert_config(config, expected) def test_upstream(): config = ''' # http://nginx.org/ru/docs/http/ngx_http_upstream_module.html upstream backend { server backend1.example.com weight=5; server backend2.example.com:8080; server unix:/tmp/backend3; server backup1.example.com:8080 backup; server backup2.example.com:8080 backup; } server { location / { proxy_pass http://backend; } } ''' expected = [ ['http://nginx.org/ru/docs/http/ngx_http_upstream_module.html'], ['upstream', ['backend'], [ ['server', 'backend1.example.com', 'weight=5'], ['server', 'backend2.example.com:8080'], ['server', 'unix:/tmp/backend3'], ['server', 'backup1.example.com:8080', 'backup'], ['server', 'backup2.example.com:8080', 'backup'], ]], ['server', [], [ ['location', ['/'], [ ['proxy_pass', 'http://backend'] ]] ]]] assert_config(config, expected) def test_issue_8(): config = ''' # http://nginx.org/ru/docs/http/ngx_http_upstream_module.html if ($http_referer ~* (\.(ru|ua|by|kz)/(pages/music|partners/|page-no-rights\.xml)) ) { set $temp A; } ''' expected = [ ['http://nginx.org/ru/docs/http/ngx_http_upstream_module.html'], ['if', ['$http_referer', '~*', '(\.(ru|ua|by|kz)/(pages/music|partners/|page-no-rights\.xml))'], [ ['set', '$temp', 'A'] ]] ] assert_config(config, expected) def test_issue_11(): config = ''' init_by_lua_block { tvm = require "nginx.tvm" } ''' expected = [ ['init_by_lua_block', [], ['tvm', '=', 'require', '"nginx.tvm"']] ] assert_config(config, expected) def test_lua_block(): config = ''' # https://github.com/openresty/lua-nginx-module#typical-uses location = /lua { # MIME type determined by default_type: default_type 'text/plain'; content_by_lua_block { local res = ngx.location.capture("/some_other_location") if res then ngx.say("status: ", res.status) ngx.say("body:") ngx.print(res.body) end } } ''' expected = [ ['https://github.com/openresty/lua-nginx-module#typical-uses'], ['location', ['=', '/lua'], [ ['MIME type determined by default_type:'], ['default_type', 'text/plain'], ['content_by_lua_block', [], [ 'local', 'res', '=', 'ngx.location.capture(', '"/some_other_location"', ')', 'if', 'res', 'then', 'ngx.say(', '"status: "', ',', 'res.status)', 'ngx.say(', '"body:"', ')', 'ngx.print(res.body)', 'end']] ]] ] assert_config(config, expected) def test_lua_block_brackets(): config = ''' location = /foo { rewrite_by_lua_block { res = ngx.location.capture("/memc", { args = { cmd = "incr", key = ngx.var.uri } } ) } proxy_pass http://blah.blah.com; } ''' expected = [ ['location', ['=', '/foo'], [ ['rewrite_by_lua_block', [], [ 'res', '=', 'ngx.location.capture(', '"/memc"', ',', ['args', '=', ['cmd', '=', '"incr"', ',', 'key', '=', 'ngx.var.uri']], ')']], ['proxy_pass', 'http://blah.blah.com'] ]] ] assert_config(config, expected) def test_file_delims(): config = ''' # configuration file /etc/nginx/nginx.conf: http { include sites/*.conf; } # configuration file /etc/nginx/sites/default.conf: server { } ''' expected = [ ['/etc/nginx/nginx.conf'], ['http', [], [ ['include', 'sites/*.conf'] ]], ['/etc/nginx/sites/default.conf'], ['server', [], []] ] assert_config(config, expected) def test_comments(): config = ''' # Some comment add_header X-Some-Comment some; # # Comment with padding # add_header X-Padding-Comment padding; # add_header X-Blank-Comment blank; if (1) # Comment { add_header X-Inline blank; } ''' expected = [ ['Some comment'], ['add_header', 'X-Some-Comment', 'some'], [''], ['Comment with padding'], [''], ['add_header', 'X-Padding-Comment', 'padding'], [''], ['add_header', 'X-Blank-Comment', 'blank'], ['if', ['1'], [ ['add_header', 'X-Inline', 'blank'], ]], ] assert_config(config, expected) def test_upstream_dot(): config = ''' upstream test.mysite.com { server 127.0.0.1:9009; } ''' expected = [ ['upstream', ['test.mysite.com'], [ ['server', '127.0.0.1:9009'] ]], ] assert_config(config, expected) def test_empty_config(): config = ''' ''' expected = [] assert_config(config, expected) def test_utfbom_decoding(): config = b'''\xef\xbb\xbf add_header X-Test "Windows-1251"; ''' expected = [ ['add_header', 'X-Test', 'Windows-1251'] ] assert_config(config, expected) def test_national_comment_decoding(): config = b''' # \xeb\xff-\xeb\xff-\xeb\xff = Lya-lya-lya add_header X-Test "Windows-1251"; ''' actual = RawParser().parse(config) assert_equals(len(actual.asList()), 2) def assert_config(config, expected): actual = RawParser().parse(config) assert_equals(actual.asList(), expected) ================================================ FILE: tests/plugins/__init__.py ================================================ ================================================ FILE: tests/plugins/simply/add_header_multiline/add_header.conf ================================================ add_header Content-Security-Policy " default-src: 'none'; font-src data: https://yastatic.net;"; ================================================ FILE: tests/plugins/simply/add_header_multiline/add_header_fp.conf ================================================ add_header X-Foo foo; ================================================ FILE: tests/plugins/simply/add_header_multiline/config.json ================================================ { "severity": "LOW" } ================================================ FILE: tests/plugins/simply/add_header_multiline/more_set_headers.conf ================================================ more_set_headers -t 'text/html text/plain' 'X-Foo: Bar multiline'; ================================================ FILE: tests/plugins/simply/add_header_multiline/more_set_headers_fp.conf ================================================ more_set_headers -t 'text/html text/plain' 'X-Foo: Bar multiline'; ================================================ FILE: tests/plugins/simply/add_header_multiline/more_set_headers_multiple.conf ================================================ more_set_headers -t 'text/html text/plain' 'X-Foo: some multiline' 'X-Bar: some multiline' 'X-Baz: some multiline'; ================================================ FILE: tests/plugins/simply/add_header_multiline/more_set_headers_replace.conf ================================================ more_set_headers -r 'Foo: multiline'; ================================================ FILE: tests/plugins/simply/add_header_multiline/more_set_headers_replace_fp.conf ================================================ more_set_headers -r 'Foo: multiline'; ================================================ FILE: tests/plugins/simply/add_header_multiline/more_set_headers_status_fp.conf ================================================ more_set_headers -s 404 -s '500 503' 'Foo: bar'; ================================================ FILE: tests/plugins/simply/add_header_multiline/more_set_headers_type_fp.conf ================================================ more_set_headers -t 'text/html text/plain' 'X-Foo: some'; ================================================ FILE: tests/plugins/simply/add_header_redefinition/config.json ================================================ { "severity": "MEDIUM" } ================================================ FILE: tests/plugins/simply/add_header_redefinition/duplicate_fp.conf ================================================ http { add_header X-Frame-Options "DENY" always; server { location /new-headers { add_header X-Frame-Options "DENY" always; add_header X-Foo foo; } } } ================================================ FILE: tests/plugins/simply/add_header_redefinition/if_replaces.conf ================================================ add_header X-Frame-Options "DENY" always; if (1) { add_header X-Foo foo; } ================================================ FILE: tests/plugins/simply/add_header_redefinition/location_replaces.conf ================================================ add_header X-Frame-Options "DENY" always; location /new-headers { add_header X-Foo foo; } ================================================ FILE: tests/plugins/simply/add_header_redefinition/nested_block.conf ================================================ server { add_header X-Frame-Options "DENY" always; location / { location /some { add_header X-Frame-Options "DENY" always; } location /another { add_header X-Foo foo; } } } ================================================ FILE: tests/plugins/simply/add_header_redefinition/non_block_fp.conf ================================================ add_header X-Frame-Options "DENY" always; server "some"; add_header X-Foo foo; ================================================ FILE: tests/plugins/simply/add_header_redefinition/not_secure_both_fp.conf ================================================ add_header X-Bar bar; location /new-headers { add_header X-Foo foo; } ================================================ FILE: tests/plugins/simply/add_header_redefinition/not_secure_outer_fp.conf ================================================ add_header X-Bar bar; location /new-headers { add_header X-Frame-Options "DENY" always; } ================================================ FILE: tests/plugins/simply/add_header_redefinition/step_replaces.conf ================================================ http { add_header X-Frame-Options "DENY" always; server { location /new-headers { add_header X-Foo foo; } } } ================================================ FILE: tests/plugins/simply/alias_traversal/config.json ================================================ { "severity": ["MEDIUM", "HIGH"] } ================================================ FILE: tests/plugins/simply/alias_traversal/nested.conf ================================================ location /files/ { location /files/images { alias /home/; } } ================================================ FILE: tests/plugins/simply/alias_traversal/nested_fp.conf ================================================ location /files/ { location /files/images { } alias /home/; } ================================================ FILE: tests/plugins/simply/alias_traversal/not_slashed_alias.conf ================================================ location /files { alias /home; } ================================================ FILE: tests/plugins/simply/alias_traversal/not_slashed_alias_fp.conf ================================================ location /files/ { alias /home; } ================================================ FILE: tests/plugins/simply/alias_traversal/simple.conf ================================================ location /files { alias /home/; } ================================================ FILE: tests/plugins/simply/alias_traversal/simple_fp.conf ================================================ location /files/ { alias /home/; } ================================================ FILE: tests/plugins/simply/alias_traversal/slashed_alias.conf ================================================ location /files { alias /home/; } ================================================ FILE: tests/plugins/simply/alias_traversal/slashed_alias_fp.conf ================================================ location /files/ { alias /home/; } ================================================ FILE: tests/plugins/simply/host_spoofing/config.json ================================================ { "severity": "MEDIUM" } ================================================ FILE: tests/plugins/simply/host_spoofing/http_fp.conf ================================================ proxy_set_header Host $host; ================================================ FILE: tests/plugins/simply/host_spoofing/http_host.conf ================================================ proxy_set_header Host $http_host; ================================================ FILE: tests/plugins/simply/host_spoofing/http_host_diff_case.conf ================================================ proxy_set_header HoSt $http_host; ================================================ FILE: tests/plugins/simply/host_spoofing/some_arg.conf ================================================ proxy_set_header host $arg_host; ================================================ FILE: tests/plugins/simply/http_splitting/add_header_uri.conf ================================================ add_header X-Uri $uri; ================================================ FILE: tests/plugins/simply/http_splitting/config.json ================================================ { "severity": "HIGH" } ================================================ FILE: tests/plugins/simply/http_splitting/dont_report_not_resolved_var_fp.conf ================================================ location ~ /proxy/(a|b)/(\W*)$ { proxy_pass http://storage/$some; } ================================================ FILE: tests/plugins/simply/http_splitting/proxy_from_location_var.conf ================================================ location ~ /proxy/(a|b)/(\W*)$ { proxy_pass http://storage/$2; } ================================================ FILE: tests/plugins/simply/http_splitting/proxy_from_location_var_var.conf ================================================ location ~ /proxy/(a|b)/(\W*)$ { set $p $2; proxy_pass http://storage/$p; } ================================================ FILE: tests/plugins/simply/http_splitting/proxy_from_location_var_var_fp.conf ================================================ location ~ /proxy/(a|b)/(\W*)$ { set $p $1; proxy_pass http://storage/$p; } ================================================ FILE: tests/plugins/simply/http_splitting/proxy_from_location_var_var_var.conf ================================================ location ~ /proxy/(a|b)/(?

\W*)$ { set $upstream "http://$1/$p?"; proxy_pass $upstream; } ================================================ FILE: tests/plugins/simply/http_splitting/proxy_pass_cr_fp.conf ================================================ location ~* ^/test/(.*) { proxy_pass http://10.10.10.10/$1; } ================================================ FILE: tests/plugins/simply/http_splitting/proxy_pass_ducument_uri.conf ================================================ proxy_pass http://upstream$document_uri; ================================================ FILE: tests/plugins/simply/http_splitting/proxy_pass_lf.conf ================================================ location ~* ^/test/([^/]+)/ { proxy_pass http://10.10.10.10/$1; } ================================================ FILE: tests/plugins/simply/http_splitting/proxy_set_header_ducument_uri.conf ================================================ proxy_set_header "X-Original-Uri" $document_uri; ================================================ FILE: tests/plugins/simply/http_splitting/return_403_fp.conf ================================================ return 403; ================================================ FILE: tests/plugins/simply/http_splitting/return_request_uri_fp.conf ================================================ return 301 https://some$request_uri; ================================================ FILE: tests/plugins/simply/http_splitting/rewrite_extract_fp.conf ================================================ rewrite ^/proxy/(a|b)/(?\W*)$ http://storage/$path redirect; ================================================ FILE: tests/plugins/simply/http_splitting/rewrite_uri.conf ================================================ rewrite ^ http://some$uri; ================================================ FILE: tests/plugins/simply/http_splitting/rewrite_uri_after_var.conf ================================================ return 301 https://$host$uri; ================================================ FILE: tests/plugins/simply/origins/config.json ================================================ { "severity": ["MEDIUM", "HIGH"] } ================================================ FILE: tests/plugins/simply/origins/metrika.conf ================================================ if ($http_referer !~ "^https?://([^/]+metrika.*yandex\.(ru|ua|com|com\.tr|by|kz)|([^/]+\.)?webvisor\.com)/"){ add_header X-Frame-Options SAMEORIGIN; } ================================================ FILE: tests/plugins/simply/origins/origin.conf ================================================ if ($http_origin !~ '^https?:\/\/yandex.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/origin_fp.conf ================================================ if ($http_origin !~ '^https?:\/\/yandex\.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/origin_https.conf ================================================ # Options: {"domains": ["yandex.ru"], "https_only": true} if ($http_origin !~ '^https?:\/\/yandex\.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/origin_https_fp.conf ================================================ # Options: {"domains": ["yandex.ru"], "https_only": true} if ($http_origin !~ '^https:\/\/yandex\.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/origin_w_slash_anchored_fp.conf ================================================ if ($http_origin !~ '^https?:\/\/yandex\.ru/$') { } ================================================ FILE: tests/plugins/simply/origins/origin_w_slash_fp.conf ================================================ if ($http_origin !~ '^https?:\/\/yandex\.ru/') { } ================================================ FILE: tests/plugins/simply/origins/origin_wo_slash.conf ================================================ # Options: {"domains": ["yandex.ru"]} http { if ($http_origin !~ '^https?:\/\/yandex\.ru') { } } ================================================ FILE: tests/plugins/simply/origins/referer.conf ================================================ if ($http_referer !~ '^https?:\/\/yandex.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/referer_fp.conf ================================================ if ($http_referer !~ '^https?:\/\/yandex\.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/referer_subdomain.conf ================================================ if ($http_referer !~ '^https?:\/\/some.yandex\.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/referer_subdomain_fp.conf ================================================ if ($http_referer !~ '^https?:\/\/some\.yandex\.ru\/') { } ================================================ FILE: tests/plugins/simply/origins/structure_dot.conf ================================================ if ($http_referer !~ "^https://example.com/"){ add_header X-Frame-Options SAMEORIGIN; } ================================================ FILE: tests/plugins/simply/origins/structure_fp.conf ================================================ if ($http_referer !~ "^https://example\.com/"){ add_header X-Frame-Options SAMEORIGIN; } ================================================ FILE: tests/plugins/simply/origins/structure_prefix.conf ================================================ if ($http_referer !~ "https://example\.com/"){ add_header X-Frame-Options SAMEORIGIN; } ================================================ FILE: tests/plugins/simply/origins/structure_suffix.conf ================================================ if ($http_referer !~ "^https://example\.com"){ add_header X-Frame-Options SAMEORIGIN; } ================================================ FILE: tests/plugins/simply/origins/webvisor.conf ================================================ # Options: {"domains": ["webvisor.com", "yandex.com"]} if ($http_referer !~ "^https?://([^/]+\.)?yandex\.com/|([^/]+\.)?webvisor\.com/"){ add_header X-Frame-Options SAMEORIGIN; } ================================================ FILE: tests/plugins/simply/ssrf/config.json ================================================ { "severity": "HIGH" } ================================================ FILE: tests/plugins/simply/ssrf/have_internal_fp.conf ================================================ location /proxy/ { internal; proxy_pass $arg_some; } ================================================ FILE: tests/plugins/simply/ssrf/host_w_const_start.conf ================================================ location ~* ^/backend/(?.*) { proxy_pass http://some$path; } ================================================ FILE: tests/plugins/simply/ssrf/host_w_const_start_arg.conf ================================================ location /backend/ { proxy_pass http://some${arg_la}.shit; } ================================================ FILE: tests/plugins/simply/ssrf/not_host_var_fp.conf ================================================ location ~ /proxy/(.*)$ { proxy_pass http://yastatic.net/$1; } ================================================ FILE: tests/plugins/simply/ssrf/request_uri_fp.conf ================================================ location /backend/ { proxy_pass http://some$request_uri; } ================================================ FILE: tests/plugins/simply/ssrf/request_uri_var_fp.conf ================================================ location / { set $upstream "http://some$request_uri"; proxy_pass $upstream; } ================================================ FILE: tests/plugins/simply/ssrf/scheme_var.conf ================================================ location ~ /proxy/$ { proxy_pass $http_proxy_scheme://some/file.conf; } ================================================ FILE: tests/plugins/simply/ssrf/single_var.conf ================================================ location ~ /proxy/(?P.*)$ { proxy_pass $proxy; } ================================================ FILE: tests/plugins/simply/ssrf/used_arg.conf ================================================ location /proxy/ { proxy_pass $arg_some; } ================================================ FILE: tests/plugins/simply/ssrf/vars_from_loc.conf ================================================ location ~ /proxy/(.*)/(.*)/(.*)$ { set $scheme $1; set $host $2; set $path $3; proxy_pass $scheme://$host/$path; } ================================================ FILE: tests/plugins/simply/ssrf/with_const_scheme.conf ================================================ location ~* ^/internal-proxy/(https?)/(.*?)/(.*) { resolver 127.0.0.1; set $proxy_protocol $1; set $proxy_host $2; set $proxy_path $3; proxy_pass $proxy_protocol://$proxy_host/$proxy_path ; proxy_set_header Host $proxy_host; } ================================================ FILE: tests/plugins/simply/valid_referers/config.json ================================================ { "severity": "HIGH" } ================================================ FILE: tests/plugins/simply/valid_referers/none_first.conf ================================================ valid_referers none server_names *.webvisor.com; ================================================ FILE: tests/plugins/simply/valid_referers/none_last.conf ================================================ valid_referers server_names foo.com none; ================================================ FILE: tests/plugins/simply/valid_referers/none_middle.conf ================================================ valid_referers server_names foo.com none bar.com; ================================================ FILE: tests/plugins/simply/valid_referers/wo_none_fp.conf ================================================ valid_referers server_names foo.com bar.com *.none.com none.ru; ================================================ FILE: tests/plugins/test_simply.py ================================================ from nose.tools import assert_equals, assert_true from tests.asserts import assert_in import os from os import path import json from ..utils import * from gixy.core.manager import Manager as Gixy from gixy.core.plugins_manager import PluginsManager from gixy.core.config import Config def setup_module(): pass def teardown_module(): pass def test_from_config(): tested_plugins = set() tested_fp_plugins = set() conf_dir = path.join(path.dirname(__file__), 'simply') for plugin in os.listdir(conf_dir): if plugin in ('.', '..'): continue plugin_path = path.join(conf_dir, plugin) if not path.isdir(plugin_path): continue config = {} if path.exists(path.join(plugin_path, 'config.json')): with open(path.join(plugin_path, 'config.json'), 'r') as file: config = json.loads(file.read()) for test_case in os.listdir(plugin_path): if not test_case.endswith('.conf'): continue config_path = path.join(plugin_path, test_case) if not test_case.endswith('_fp.conf'): # Not False Positive test tested_plugins.add(plugin) test_func = check_configuration else: tested_fp_plugins.add(plugin) test_func = check_configuration_fp yield test_func, plugin, config_path, config manager = PluginsManager() for plugin in manager.plugins: plugin = plugin.name assert_true(plugin in tested_plugins, 'Plugin {name!r} should have at least one simple test config'.format(name=plugin)) assert_true(plugin in tested_fp_plugins, 'Plugin {name!r} should have at least one simple test config with false positive'.format(name=plugin)) def parse_plugin_options(config_path): with open(config_path, 'r') as f: config_line = f.readline() if config_line.startswith('# Options: '): return json.loads(config_line[10:]) return None def yoda_provider(plugin, plugin_options=None): config = Config( allow_includes=False, plugins=[plugin] ) if plugin_options: config.set_for(plugin, plugin_options) return Gixy(config=config) def check_configuration(plugin, config_path, test_config): plugin_options = parse_plugin_options(config_path) with yoda_provider(plugin, plugin_options) as yoda: yoda.audit(config_path, open(config_path, mode='r')) formatter = BaseFormatter() formatter.feed(config_path, yoda) _, results = formatter.reports.popitem() assert_equals(len(results), 1, 'Should have one report') result = results[0] if 'severity' in test_config: if not hasattr(test_config['severity'], '__iter__'): assert_equals(result['severity'], test_config['severity']) else: assert_in(result['severity'], test_config['severity']) assert_equals(result['plugin'], plugin) assert_true(result['summary']) assert_true(result['description']) assert_true(result['config']) assert_true(result['help_url'].startswith('https://'), 'help_url must starts with https://. It\'is URL!') def check_configuration_fp(plugin, config_path, test_config): with yoda_provider(plugin) as yoda: yoda.audit(config_path, open(config_path, mode='r')) assert_equals(len([x for x in yoda.results]), 0, 'False positive configuration must not trigger any plugins') ================================================ FILE: tests/utils.py ================================================ from logging.handlers import BufferingHandler class LogHandler(BufferingHandler): def __init__(self, matcher): # BufferingHandler takes a "capacity" argument # so as to know when to flush. As we're overriding # shouldFlush anyway, we can set a capacity of zero. # You can call flush() manually to clear out the # buffer. super(LogHandler, self).__init__(0) self.matcher = matcher def shouldFlush(self, **kwargs): return False def emit(self, record): self.buffer.append(record.__dict__) def matches(self, **kwargs): """ Look for a saved dict whose keys/values match the supplied arguments. """ result = False for d in self.buffer: if self.matcher.matches(d, **kwargs): result = True break return result class Matcher(object): _partial_matches = ('msg', 'message') def matches(self, d, **kwargs): """ Try to match a single dict with the supplied arguments. Keys whose values are strings and which are in self._partial_matches will be checked for partial (i.e. substring) matches. You can extend this scheme to (for example) do regular expression matching, etc. """ result = True for k in kwargs: v = kwargs[k] dv = d.get(k) if not self.match_value(k, dv, v): result = False break return result def match_value(self, k, dv, v): """ Try to match a single stored value (dv) with a supplied value (v). """ if type(v) != type(dv): result = False elif type(dv) is not str or k not in self._partial_matches: result = (v == dv) else: result = dv.find(v) >= 0 return result ================================================ FILE: tox.ini ================================================ [tox] envlist = py26, py27, py34, py35, py36, py37, flake8 skip_missing_interpreters = True [testenv] deps = -rrequirements.txt -rrequirements.dev.txt commands = nosetests -v [testenv:flake8] deps = flake8 basepython = python3 commands = flake8 setup.py gixy [flake8] max_line_length = 120