Repository: Jjschwartz/NetworkAttackSimulator Branch: master Commit: 4f26de37cfdc Files: 91 Total size: 357.3 KB Directory structure: gitextract_bolyar94/ ├── .github/ │ └── ISSUE_TEMPLATE/ │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .readthedocs.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.rst ├── LICENSE.md ├── README.rst ├── docs/ │ ├── Makefile │ ├── make.bat │ ├── requirements.txt │ └── source/ │ ├── community/ │ │ ├── acknowledgements.rst │ │ ├── contact.rst │ │ ├── development.rst │ │ ├── distributing.rst │ │ ├── index.rst │ │ └── license.rst │ ├── conf.py │ ├── explanations/ │ │ ├── index.rst │ │ ├── scenario_generation.rst │ │ └── sim_to_real.rst │ ├── index.rst │ ├── reference/ │ │ ├── agents/ │ │ │ └── index.rst │ │ ├── envs/ │ │ │ ├── actions.rst │ │ │ ├── environment.rst │ │ │ ├── host_vector.rst │ │ │ ├── index.rst │ │ │ ├── observation.rst │ │ │ └── state.rst │ │ ├── index.rst │ │ ├── load.rst │ │ └── scenarios/ │ │ ├── benchmark_scenarios.rst │ │ ├── benchmark_scenarios_agent_scores.csv │ │ ├── benchmark_scenarios_table.csv │ │ ├── generator.rst │ │ └── index.rst │ └── tutorials/ │ ├── creating_scenarios.rst │ ├── environment.rst │ ├── gym_load.rst │ ├── index.rst │ ├── installation.rst │ ├── loading.rst │ └── scenarios.rst ├── nasim/ │ ├── __init__.py │ ├── agents/ │ │ ├── __init__.py │ │ ├── bruteforce_agent.py │ │ ├── dqn_agent.py │ │ ├── keyboard_agent.py │ │ ├── policies/ │ │ │ └── dqn_tiny.pt │ │ ├── ql_agent.py │ │ ├── ql_replay_agent.py │ │ └── random_agent.py │ ├── demo.py │ ├── envs/ │ │ ├── __init__.py │ │ ├── action.py │ │ ├── environment.py │ │ ├── gym_env.py │ │ ├── host_vector.py │ │ ├── network.py │ │ ├── observation.py │ │ ├── render.py │ │ ├── state.py │ │ └── utils.py │ ├── scenarios/ │ │ ├── __init__.py │ │ ├── benchmark/ │ │ │ ├── __init__.py │ │ │ ├── generated.py │ │ │ ├── medium-multi-site.yaml │ │ │ ├── medium-single-site.yaml │ │ │ ├── medium.yaml │ │ │ ├── small-honeypot.yaml │ │ │ ├── small-linear.yaml │ │ │ ├── small.yaml │ │ │ ├── tiny-hard.yaml │ │ │ ├── tiny-small.yaml │ │ │ └── tiny.yaml │ │ ├── generator.py │ │ ├── host.py │ │ ├── loader.py │ │ ├── scenario.py │ │ └── utils.py │ └── scripts/ │ ├── describe_scenarios.py │ ├── run_dqn_policy.py │ ├── run_random_benchmarks.py │ ├── train_dqn.py │ └── visualize_graph.py ├── setup.py └── test/ ├── __init__.py ├── test_bruteforce.py ├── test_env.py ├── test_generator.py └── test_gym_bruteforce.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: [e.g. iOS] - Browser [e.g. chrome, safari] - Version [e.g. 22] **Smartphone (please complete the following information):** - Device: [e.g. iPhone6] - OS: [e.g. iOS8.1] - Browser [e.g. stock browser, safari] - Version [e.g. 22] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .gitignore ================================================ *.cprof # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # Installer logs pip-log.txt pip-delete-this-directory.txt # Sphinx documentation docs/_build/ # mkdocs documentation /site # data storage from tensorboard nasim/agents/runs runs/ .ipynb_checkpoints/ *.ipynb ================================================ FILE: .readthedocs.yaml ================================================ # .readthedocs.yaml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the version of Python and other tools you might need build: os: ubuntu-20.04 tools: python: "3.8" # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/source/conf.py builder: html fail_on_warning: false # Optionally declare the Python requirements required to build your docs python: install: - method: pip path: . - requirements: docs/requirements.txt ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at Jonathon.schwartz@anu.edu.au. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq ================================================ FILE: CONTRIBUTING.rst ================================================ Development =========== NASim is a work in progress and contributions are welcome via pull request. For more information, you can check out this link : |how_to_contrib|. .. |how_to_contrib| raw:: html Contributing to an open source Project on github Guidelines ---------- Here are a few guidelines for this project. * Simplicity: Be easy to use but also easy to understand when one digs into the code. Any additional code should be justified by the usefulness of the feature. These guidelines come of course in addition to all good practices for open source development. .. _naming_conv: Code style ---------- This project follows the `PEP 8 `_ style guide, please follow this with your contributions. Additionally: * If a variable is intended to be 'private', it is prefixed by an underscore. Documentation ------------- All contributions should be accompanied with at least in code docstrings, when applicable. This project uses `Sphinx `_ for documentation generation and uses `Numpy style docstrings `_. Please see code in this project for example or check out this `example `_. ================================================ FILE: LICENSE.md ================================================ The MIT License (MIT) Copyright (c) 2018 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.rst ================================================ **Status**: Stable release. No extra development is planned, but still being maintained (bug fixes, etc). Network Attack Simulator ======================== |docs| Network Attack Simulator (NASim) is a simulated computer network complete with vulnerabilities, scans and exploits designed to be used as a testing environment for AI agents and planning techniques applied to network penetration testing. Installation ------------ The easiest way to install the latest version of NASim hosted on PyPi is via pip:: $ pip install nasim To install dependencies for running the DQN test agent (this is needed to run the demo) run:: $ pip install nasim[dqn] To get the latest bleeding edge version and install in development mode see the `Install docs `_ Demo ---- To see NASim in action, you can run the provided demo to interact with an environment directly or see a pre-trained AI agent in action. To run the `tiny` benchmark scenario demo in interactive mode run:: $ python -m nasim.demo tiny This will then run an interactive console where the user can see the current state and choose the next action to take. The goal of the scenario is to *compromise* every host with a non-zero value. See `here `_ for the full list of scenarios. To run the `tiny` benchmark scenario demo using the pre-trained AI agent, first ensure the DQN dependencies are installed (see *Installation* section above), then run:: $ python -m nasim.demo tiny -ai **Note:** Currently you can only run the AI demo for the `tiny` scenario. Documentation ------------- The documentation is available at: https://networkattacksimulator.readthedocs.io/ Using with gymnasium --------------------- NASim implements the `Gymnasium `_ environment interface and so can be used with any algorithm that is developed for that interface. See `Starting NASim using gymnasium `_. Authors ------- **Jonathon Schwartz** - Jonathon.schwartz@anu.edu.au License ------- `MIT`_ © 2020, Jonathon Schwartz .. _MIT: LICENSE What's new ---------- - 2023-05-14 (v 0.12.0) (MINOR release) + Renamed `NASimEnv.get_minimum_actions -> NASimEnv.get_minumum_hops` to better reflect what it does (thanks @rzvnbr for the suggestion). - 2023-03-13 (v 0.11.0) (MINOR release) + Migrated to `gymnasium (formerly Open AI gym) `_ fromOpen AI gym (thanks @rzvnbr for the suggestion). + Fixed bug with action string representation (thanks @rzvnbr for the bug report) + Added "sim to real considerations" explanation document to the docs (thanks @Tudyx for the suggestion) - 2023-02-27 (v 0.10.1) (MICRO release) + Fixed bug for host based actions (thanks @nguyen-thanh20 for the bug report) - 2022-07-30 (v 0.10.0) (MINOR release) + Fixed typos (thanks @francescoluciano) + Updates to be compatible with latest version of OpenAI gym API (v0.25) (see `Open AI gym API docs `_ for details), notable changes include * Updated naming convention when initializing environments using the ``gym.make`` API (see `gym load docs `_ for details.) * Updated reset function to match new gym API (shouldn't break any implementations using old API) * Updated step function to match new gym API. It now returns two bools, the first specifies if terminal/goal state has been reached and the other specifies if the episode is terminated due to the scenario step limit (if any exists) has been reached. This change may break implementations and you may need to specify (or not) when initializing the gym environment using ``gym.make(env_id, new_step_api=True)`` - 2022-05-19 (v 0.9.1) (MICRO release) + Fixed a few bugs and added some tests (thanks @simonsays1980 for the bug reports) - 2021-12-20 (v 0.9.0) (MINOR release) + The value of a host is now observed when any level of access is gained on a host. This makes it so that agents can learn to decide whether to invest time in gaining root access on a host or not, depending on the host's value (thanks @jaromiru for the proposal). + Initial observation of reachable hosts now contains the host's address (thanks @jaromiru). + Added some support for custom address space bounds in when using scenario generator (thanks @jaromiru for the suggestion). - 2021-3-15 (v 0.8.0) (MINOR release) + Added option of specifying a 'value' for each host when defining a custom network using the .YAML format (thanks @Joe-zsc for the suggestion). + Added the 'small-honeypot' scenario to included scenarios. - 2020-12-24 (v 0.7.5) (MICRO release) + Added 'undefined error' to observation to fix issue with initial and later observations being indistinguishable. - 2020-12-17 (v 0.7.4) (MICRO release) + Fixed issues with incorrect observation of host 'value' and 'discovery_value'. Now, when in partially observable mode, the agent will correctly only observe these values on the step that they are recieved. + Some other minor code formatting fixes - 2020-09-23 (v 0.7.3) (MICRO release) + Fixed issue with scenario YAML files not being included with PyPi package + Added final policy visualisation option to DQN and Q-Learning agents - 2020-09-20 (v 0.7.2) (MICRO release) + Fixed bug with 're-registering' Gym environments when reloading modules + Added example implementations of Tabular Q-Learning: `agents/ql_agent.py` and `agents/ql_replay.py` + Added `Agents` section to docs, along with other minor doc updates - 2020-09-20 (v 0.7.1) (MICRO release) + Added some scripts for running random benchmarks and describing benchmark scenarios + Added some more docs (including for creating custom scenarios) and updated other docs - 2020-09-20 (v 0.7.0) (MINOR release) + Implemented host based firewalls + Added priviledge escalation + Added a demo script, including a pre-trained agent for the 'tiny' scenario + Fix to upper bound calculation (factored in reward for discovering a host) - 2020-08-02 (v 0.6.0) (MINOR release) + Implemented compatibility with gym.make() + Updated docs for loading and interactive with NASimEnv + Added extra functions to nasim.scenarios to make it easier to load scenarios seperately to a NASimEnv + Fixed bug to do with class attributes and creating different scenarios in same python session + Fixed up bruteforce agent and tests - 2020-07-31 (v 0.5.0) (MINOR release) + First official release on PyPi + Cleaned up dependencies, setup.py, etc and some small fixes .. |docs| image:: https://readthedocs.org/projects/networkattacksimulator/badge/ :target: https://networkattacksimulator.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status :scale: 100% ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ================================================ FILE: docs/requirements.txt ================================================ nasim sphinx sphinx-autobuild sphinx-rtd-theme ================================================ FILE: docs/source/community/acknowledgements.rst ================================================ .. _acknowledgements: Acknowledgements ================ * Inspiration for the documentation was taken from the `DeeR `_ project. ================================================ FILE: docs/source/community/contact.rst ================================================ Contact ======= Questions? Please contact Jonathon.schwartz@anu.edu.au. ================================================ FILE: docs/source/community/development.rst ================================================ .. _dev: Development =========== NASim is a work in progress and contributions are welcome via pull request. For more information, you can check out this link : |how_to_contrib|. .. |how_to_contrib| raw:: html Contributing to an open source Project on github Guidelines ---------- Here are a few guidelines for this project. * Simplicity: Be easy to use but also easy to understand when one digs into the code. Any additional code should be justified by the usefulness of the feature. These guidelines come of course in addition to all good practices for open source development. .. _naming_conv: Code style ---------- This project follows the `PEP 8 `_ style guide, please follow this with your contributions. Additionally: * If a variable is intended to be 'private', it is prefixed by an underscore. Documentation ------------- All contributions should be accompanied with at least in code docstrings, when applicable. This project uses `Sphinx `_ for documentation generation and uses `Numpy style docstrings `_. Please see code in this project for example or check out this `example `_. ================================================ FILE: docs/source/community/distributing.rst ================================================ .. _distribution: Distribution ============ This document contains some notes on distributing NASim via PyPi. This is mainly as a reminder for the steps to take when releasing an update. .. note:: Unless specified otherwise, all bash commands are assumed to be executed from the root directory of the NASim package. Before pushing to master ~~~~~~~~~~~~~~~~~~~~~~~~ 1. Ensure all tests are passing by running: .. code-block:: bash cd test pytest 2. Ensure updates are included in the *What's new* section of the *README.rst* and *docs/source/index.rst* files (this step can be ignored for very small changes) 3. Ensure any necessary updates have been included in the documentation. 4. Make sure the documentation can be built by running: .. code-block:: bash cd docs make html 5. Ensure ``setup.py`` has been updated to reflect any version and/or dependency changes. After changes have been pushed ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If pushing a new version (MAJOR, MINOR, or MICRO), do the following: 1. Add a tag with the release number to the commit. 2. On github create a new release and link it to the tagged commit 3. Publish the new release to PyPi: .. code-block:: bash # build distributions python setup.py sdist bdist_wheel # upload latest distribution builds to pypi # this will ask for PyPi username and password python -m twine upload dist/* --skip-existing 4. Login to https://pypi.org/ and verify latest version is added correctly. 5. Visit https://networkattacksimulator.readthedocs.io/en/latest/index.html and check documentation has updated correctly (make sure to refresh browser cache to ensure your looking at the latest version.) ================================================ FILE: docs/source/community/index.rst ================================================ .. _community: Community & Development ======================= .. toctree:: :maxdepth: 1 development license contact acknowledgements distributing ================================================ FILE: docs/source/community/license.rst ================================================ License ======= The MIT License (MIT) Copyright (c) 2018 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: docs/source/conf.py ================================================ # Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys import nasim sys.path.insert(0, os.path.abspath(os.path.join('..', '..'))) # -- Project information ----------------------------------------------------- project = 'NASim' copyright = '2020, Jonathon Schwartz' author = 'Jonathon Schwartz' # The full version, including alpha/beta/rc tags release = nasim.__version__ # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.napoleon' ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] # Explicitly assign the master document # This is required for the readthedocs.org build to work correctly master_doc = 'index' # -- to include special methods --------------------------------------------- def skip(app, what, name, obj, would_skip, options): if name == "__init__": return False return would_skip def setup(app): app.connect("autodoc-skip-member", skip) # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # # html_theme = 'alabaster' html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] ================================================ FILE: docs/source/explanations/index.rst ================================================ .. _explanations: Explanations ============ More technical explanations related to NASim. .. toctree:: :maxdepth: 1 scenario_generation sim_to_real ================================================ FILE: docs/source/explanations/scenario_generation.rst ================================================ .. _scenario_generation_explanation: Scenario Generation Explanation =============================== Generating the scenarios involves a number of design decisions that strongly determine the form of the network being generated. This document aims to explain some of the more technical details of generating the scenarios when using the :ref:`scenario_generator` class. The scenario generator is based heavily on prior work, specifically: - `Sarraute, Carlos, Olivier Buffet, and Jörg Hoffmann. "POMDPs make better hackers: Accounting for uncertainty in penetration testing." Twenty-Sixth AAAI Conference on Artificial Intelligence. 2012. `_ - `Speicher, Patrick, et al. "Towards Automated Network Mitigation Analysis (extended)." arXiv preprint arXiv:1705.05088 (2017). `_ Network Topology ---------------- Description to come. Till then we recommend reading the papers linked above, especially the appendix of Speicher et al (2017). .. _correlated_configurations: Correlated Configurations ------------------------- When generating a scenario with ``uniform=False`` the scenario will be generated with host configurations being correlated. This means that rather than the OS and services it is running being chosen uniformly at random from the available OSs and services, they are chosen randomly with increased probability given to OSs and services that are being run by other hosts whose configuration was generated earlier. Specifically, the distribution of configurations of each host in the network are generated using a Nested Dirichlet Process, so that across the network hosts will have corelated configurations (i.e. certain services/configurations will be more common across hosts on the network). The correlation can be controlled using three parameters: ``alpha_H``, ``alpha_V``, and ``lambda_V``. ``alpha_H`` and ``alpha_V`` control the degree of correlation, with lower values leading to greater correlation. ``lambda_V`` controls the average number of services running per host, with higher values will mean more services (so more vulnerable) hosts on average. All three parameters must have a positive value, with the defaults being ``alpha_H=2.0``, ``alpha_V=2.0``, and ``lambda_V=1.0``, which tends to generate networks with fairly correlated configurations where hosts have only a single vulnerability on average. .. _generated_exploit_probs: Generated Exploit Probabilities ------------------------------- Success probabilities of each exploit are determined based on the value of the ``exploit_probs`` argument, as follows: - ``exploit_probs=None`` - probabilities generated randomly from uniform distribution over the interval (0, 1). - ``exploit_probs=float`` - probability of each exploit is set to the float value, which must be a valid probability. - ``exploit_probs=list[float]`` - probability of each exploit is set to corresponding float value in list. This requires that the length of the list matches the number of exploits as specified by the ``num_exploits`` argument. - ``exploit_probs="mixed"`` - probabilities chosen from a set distribution which is based on the `CVSS attack complexity `_ distribution of `top 10 vulnerabilities in 2017 `_. Specifically, exploit probabilities are chosen from [0.3, 0.6, 0.9] which correspond to high, medium and low attack complexity, respectively, with probabilities [0.2, 0.4, 0.4]. For deterministic exploits set ``exploit_probs=1.0``. Firewall -------- The firewall restricts which services can be communicated with between hosts on different subnets. This is mostly done by selecting services at random to block between each subnet, with some contraints. Firstly, there exists no firewall between subnets in the user zone. So communication between hosts on different user subnets is allowed for all services. Secondly, the number of services blocked is controlled by the ``restrictiveness`` parameter. This controls the number of services to block between zones (i.e. between the internet, DMZ, sensitive, and user zones). Thirdly, to ensure that the goal can be reached, traffic from at least one service running on each subnet will be allowed between each zone. This may mean more services will be allowed than restrictiveness parameter. ================================================ FILE: docs/source/explanations/sim_to_real.rst ================================================ .. _sim_to_real_explanation: Sim-to-Real Gap Considerations ============================== NASim is a fairly simplified simulator of network penetration testing. It's main goal is to capture some of the key features of network pentesting in a easy-to-use and fast simulator so that it can be used for rapid testing and prototyping of algorithms before these algorithms are tested on more realistic environments. That is to say there is a bit of gap between the scenarios in NASim and the real world. In this document we wanted to lay down some considerations to think about when trying to extend your algorithm beyond NASim. This is by no means an exhaustive list, but will hopefully give you something to think about for the next steps, and also give an explanation of some of the design decisions made in NASim. .. note:: This document is a work in progress so if you have any thoughts, useful references, etc on the topic of applying autonomous penetration testing in the real-world please reach out via email or open an issue on github. Handling Partial Observability ------------------------------ One of the big assumptions made by NASim is that the pentester agent has access to the network addresses of every host in the network, even in partially observable mode. This information is given to the agent in it's list of actions. In practice in the real-world, depending on the scenario, this assumption may be invalid, and part of the challenge for the pentester is to be able to discover new hosts as they navigate through the network. The main reason NASim is implemented with the network addresses being known is so that the action space size could be fixed, making it simpler to use with typical Deep Reinforcement Learning algorithms (i.e. with neural nets with fixed size input and output layers). One of the research challenges is to develop algorithms that can handle action spaces that change as the pentester discovers more network addresses, or perhaps more realistic would be that the pentester's action space is mult-dimensional and includes choosing an address and exploit/scan/etc separately. There actually is some support for this built into NASim with the nasim.envs.action.ParameterisedActionSpace action space (see :ref:`actions`), but even using that action space some information about the size of the network is given to the pentester. At this stage there is no plans to update NASim to support the no-information action space. This is partially due to time, but also to keep NASim simple and stable and because there are a lot of even better and more realistic environments being developed now (e.g. `CybORG `_.) One avenue for handling changing action space is to use auto-regressive actions as was done by `AlphaStar `_. ================================================ FILE: docs/source/index.rst ================================================ Welcome to Network Attack Simulator's documentation! ==================================================== Network Attack Simulator (NASim) is a lightweight, high-level network attack simulator written in python. It is designed to be used for rapid testing of autonomous pen-testing agents using reinforcement learning and planning. It is a simulator by definition so does not replicate all details of attacking a real system but it instead aims to capture some of the more salient features of network pen-testing such as the large and changing sizes of the state and action spaces, partial observability and varied network topology. The environment is modelled after the `gymnasium (formerly Open AI gym) `_ interface. What's new ---------- Version 0.12.0 ************** + Renamed `NASimEnv.get_minimum_actions -> NASimEnv.get_minumum_hops` to better reflect what it does (thanks @rzvnbr for the suggestion). Version 0.11.0 ************** + Migrated to `gymnasium (formerly Open AI gym) `_ fromOpen AI gym (thanks @rzvnbr for the suggestion). + Fixed bug with action string representation (thanks @rzvnbr for the bug report) + Added "sim to real considerations" explanation document to the docs (thanks @Tudyx for the suggestion) Version 0.10.1 ************** + Fixed bug for host based actions (thanks @nguyen-thanh20 for the bug report) Version 0.10.0 ************** + Fixed typos (thanks @francescoluciano) + Updates to be compatible with latest version of OpenAI gym API (v0.25) (see `Open AI gym API docs `_ for details), notable changes include * Updated naming convention when initializing environments using the ``gym.make`` API (see `gym load docs `_ for details.) * Updated reset function to match new gym API (shouldn't break any implementations using old API) * Updated step function to match new gym API. It now returns two bools, the first specifies if terminal/goal state has been reached and the other specifies if the episode is terminated due to the scenario step limit (if any exists) has been reached. This change may break implementations and you may need to specify (or not) when initializing the gym environment using ``gym.make(env_id, new_step_api=True)`` Version 0.9.1 ************* - Fixed a few bugs and added some tests (thanks @simonsays1980 for the bug reports) Version 0.9.0 ************* - The value of a host is now observed when any level of access is gained on a host. This makes it so that agents can learn to decide whether to invest time in gaining root access on a host or not, depending on the host's value (thanks @jaromiru for the proposal). - Initial observation of reachable hosts now contains the host's address (thanks @jaromiru). - Added some support for custom address space bounds in when using scenario generator (thanks @jaromiru for the suggestion). Version 0.8.0 ************* - Added option of specifying a 'value' for each host when defining a custom network using the .YAML format (thanks @Joe-zsc for the suggestion). - Added the 'small-honeypot' scenario to included scenarios. Version 0.7.5 ************* - Added 'undefined error' to observation to fix issue with initial and later observations being indistinguishable. Version 0.7.4 ************* - Fixed issues with incorrect observation of host 'value' and 'discovery_value'. Now, when in partially observable mode, the agent will correctly only observe these values on the step that they are recieved - Some other minor code formatting fixes Version 0.7.3 ************* - Fixed issue with scenario YAML files not being included with PyPi package - Added final policy visualisation option to DQN and Q-Learning agents Version 0.7.2 ************* - Fixed bug with 're-registering' Gym environments when reloading modules - Added example implementations of Tabular Q-Learning: `agents/ql_agent.py` and `agents/ql_replay.py` - Added `Agents` section to docs, along with other minor doc updates Version 0.7.1 ************* - Added some scripts for running random benchmarks and describing benchmark scenarios - Added some more docs (including for creating custom scenarios) and updated other docs Version 0.7 *********** - Implemented host based firewalls - Added priviledge escalation - Added a demo script, including a pre-trained agent for the 'tiny' scenario - Fix to upper bound calculation (factored in reward for discovering a host) Version 0.6 *********** - Implemented compatibility with gym.make() - Updated docs for loading and interactive with NASimEnv - Added extra functions to nasim.scenarios to make it easier to load scenarios seperately to a NASimEnv - Fixed bug to do with class attributes and creating different scenarios in same python session - Fixed up bruteforce agent and tests Version 0.5 *********** - First official release on PyPi - Cleaned up dependencies, setup.py, etc and some small fixes - First stable version The Docs -------- .. toctree:: :maxdepth: 2 tutorials/index reference/index explanations/index community/index How should I cite NASim? ------------------------ Please cite NASim in your publications if you use it in your research. Here is an example BibTeX entry: .. code-block:: bash @misc{schwartz2019nasim, title={NASim: Network Attack Simulator}, author={Schwartz, Jonathon and Kurniawatti, Hanna}, year={2019}, howpublished={\url{https://networkattacksimulator.readthedocs.io/}}, } Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` .. _GitHub: https://github.com/Jjschwartz/NetworkAttackSimulator ================================================ FILE: docs/source/reference/agents/index.rst ================================================ .. _agents_reference: Agents Reference ================ This page provides a short summary of the agents that come with the NASim library. Available Agents ---------------- The agent implementations that come with NASim include: * **keyboard_agent.py**: An agent that is controlled by the user via terminal inputs. * **random_agent.py**: A random agent that selects an action randomly from all available actions at each time step. * **bruteforce_agent.py**: An agent that repeatedly cycles through all available actions in order. * **ql_agent.py**: A Tabular, epsilod-greedy Q-Learning reinforcement learning agent. * **ql_replay_agent.py**: A Tabular, epsilod-greedy Q-Learning reinforcement learning agent (same as above) that incorporates an experience replay. * **dqn_agent.py**: A Deep Q-Network reinforcement learning agent using experience replay and a target Q-Network. Running Agents -------------- Each agent file defines a main function so can be run in python via the terminal, with the specific scenario and settings specified as command line arguments: .. code-block:: bash cd nasim/agents # to run a different agent, simply replace .py file with desired file # to run a different scenario, simply replace 'tiny' with desired scenario python bruteforce_agent.py tiny # to get details on command line arguments available (e.g. hyperparameters for Q-Learning and DQN agents) python bruteforce_agent.py --help A description and details of how to run each agent can be found at the top of each agent file. Viewing Agent Policies ---------------------- For the DQN and Tabular Q-Learning agents you can optionally also view the final policies learned by the agents after training has finished: .. code-block:: bash # simply include the --render_eval flag with the DQN and Q-Learning agents python ql_agent.py tiny --render_eval This will show a single episode of the agent, displaying the actions the agent performs along with the observations and rewards the agent recieves. ================================================ FILE: docs/source/reference/envs/actions.rst ================================================ .. _`actions`: Actions ======= .. automodule:: nasim.envs.action :members: ================================================ FILE: docs/source/reference/envs/environment.rst ================================================ .. _`environment`: Environment =========== .. automodule:: nasim.envs.environment :members: ================================================ FILE: docs/source/reference/envs/host_vector.rst ================================================ .. _`host_vector`: HostVector ========== .. automodule:: nasim.envs.host_vector :members: ================================================ FILE: docs/source/reference/envs/index.rst ================================================ .. _env_reference: Environment Reference ===================== Technical reference material for classes and functions used to interact with the NASim Environment. .. toctree:: :maxdepth: 1 actions environment host_vector observation state ================================================ FILE: docs/source/reference/envs/observation.rst ================================================ .. _`observation`: Observation =========== .. automodule:: nasim.envs.observation :members: ================================================ FILE: docs/source/reference/envs/state.rst ================================================ .. _`state`: State ===== .. automodule:: nasim.envs.state :members: ================================================ FILE: docs/source/reference/index.rst ================================================ .. _reference: Reference ========= Technical reference material. .. toctree:: :maxdepth: 2 load agents/index envs/index scenarios/index ================================================ FILE: docs/source/reference/load.rst ================================================ .. _nasim_init: NASimEnv load reference ======================= Technical reference material for different functions for creating a new NASim Environment. .. automodule:: nasim :members: ================================================ FILE: docs/source/reference/scenarios/benchmark_scenarios.rst ================================================ .. _benchmark_scenarios: Benchmark Scenarios =================== There are a number of existing scenarios that come with NASim. They cover a range of complexities and sizes and are intended to be used to help with benchmarking algorithms. Additionally, there are two flavours of existing scenarios: **static** and **generated**. .. note:: For full list of benchmark scenarios see :ref:`all_benchmark_scenarios`. **Static** scenarios are predefined and will be exactly the same every time they are loaded. They are defined in .yaml files in the `nasim/scenarios/benchmark/` directory. **Generated** are scenario generated using the :ref:`scenario_generator` based on some parameters. While certain features of the each scenario will remain constant between generations (e.g. number of hosts, services, exploits), other features may change (e.g. specific host configurations, firewall settings, exploit probabilities) depending on the random seed. .. _all_benchmark_scenarios: All benchmark scenarios ----------------------- The following table provides details of each benchmark scenario currently available in NASim. .. csv-table:: NASim Benchmark scenarios :file: benchmark_scenarios_table.csv :header-rows: 1 The number of actions is calculated as *Hosts X (Exploits + PrivEscs + 4)*. The +4 is for the 4 scans available for each host (OSScan, ServiceScan, ProcessScan, and SubnetScan). The number of states is calculated as *Hosts X 2^(3 + OS + Services) X 3 *. Here the first 3 comes from the *compromised*, *reachable* and *discovered* features of the state and the base of 2 is due to all state features being boolean (present/absent). The second 3 comes from the number of possible access levels possible on a host. The table below provides mean steps to reach the goal and reward (+/- stdev) for a uniform random agent, with scores averaged over 100 runs. .. csv-table:: NASim Benchmark scenarios Agent scores :file: benchmark_scenarios_agent_scores.csv :header-rows: 2 Notes on the scenarios ---------------------- The *tiny*, *small*, *medium*, *large*, and *huge* (and their generated versions) are all based on the network scenarios first used by: - `Sarraute, Carlos, Olivier Buffet, and Jörg Hoffmann. "POMDPs make better hackers: Accounting for uncertainty in penetration testing." Twenty-Sixth AAAI Conference on Artificial Intelligence. 2012. `_ - `Speicher, Patrick, et al. "Towards Automated Network Mitigation Analysis (extended)." arXiv preprint arXiv:1705.05088 (2017). `_ The *pocp-1-gen* and *pocp-2-gen* scenarios are based on the work by: - `Shmaryahu, D., Shani, G., Hoffmann, J., & Steinmetz, M. (2018, June). Simulated penetration testing as contingent planning. In Twenty-Eighth International Conference on Automated Planning and Scheduling. `_ The other scenarios were made up by author after looking at some random google images of network layouts, and playing around with different interesting network topologies. ================================================ FILE: docs/source/reference/scenarios/benchmark_scenarios_agent_scores.csv ================================================ Scenario Name,Steps,Total Reward tiny,108.02 +/- 43.82,91.98 +/- 43.82 tiny-hard,135.31 +/- 65.56,21.05 +/- 85.45 tiny-small,319.56 +/- 124.26,-225.86 +/- 167.14 small,501.94 +/- 181.40,-469.80 +/- 241.99 small-honeypot,448.72 +/- 151.62,-476.08 +/- 222.41 small-linear,566.00 +/- 177.08,-555.08 +/- 241.06 medium,1371.45 +/- 420.41,-1875.29 +/- 660.62 medium-single-site,654.89 +/- 385.76,-782.17 +/- 581.14 medium-multi-site,1060.94 +/- 389.86,-1394.71 +/- 590.89 tiny-gen,86.56 +/- 40.16,116.43 +/- 40.15 tiny-gen-rgoal,98.94 +/- 47.83,104.02 +/- 47.80 small-gen,435.73 +/- 205.61,-228.53 +/- 214.34 small-gen-rgoal,423.52 +/- 226.68,-218.62 +/- 240.20 medium-gen,1002.94 +/- 468.10,-788.64 +/- 481.86 large-gen,2548.62 +/- 1224.08,-2327.34 +/- 1241.92 huge-gen,6303.86 +/- 2403.40,-6075.69 +/- 2434.77 pocp-1-gen,15189.46 +/- 6879.75,-14947.80 +/- 6887.43 pocp-2-gen,17211.38 +/- 5855.83,-16871.05 +/- 5864.58 ================================================ FILE: docs/source/reference/scenarios/benchmark_scenarios_table.csv ================================================ Name,Type,Subnets,Hosts,OS,Services,Processes,Exploits,PrivEscs,Actions,Observation Dims,States,Step Limit tiny,static,4,3,1,1,1,1,1,18,4X14,576,1000 tiny-hard,static,4,3,2,3,2,3,2,27,4X18,9216,1000 tiny-small,static,5,5,2,3,2,3,2,45,6X20,15360,1000 small,static,5,8,2,3,2,3,2,72,9X23,24576,1000 small-honeypot,static,5,8,2,3,2,3,2,72,9X23,24576,1000 small-linear,static,7,8,2,3,2,3,2,72,9X22,24576,1000 medium,static,6,16,2,5,3,5,3,192,17X27,393216,2000 medium-single-site,static,2,16,2,5,3,5,3,192,17x34,393216,2000 medium-multi-site,static,7,16,2,5,3,5,3,192,17X29,393216,2000 tiny-gen,generated,4,3,1,1,1,1,1,18,4X14,576,1000 tiny-gen-rangoal,generated,4,3,1,1,1,1,1,18,4X14,576,1000 small-gen,generated,5,8,2,3,2,3,2,72,9X23,24576,1000 small-gen-rangoal,generated,5,8,2,3,2,3,2,72,9X23,24576,1000 medium-gen,generated,6,16,2,5,2,5,2,176,17X26,196608,2000 large-gen,generated,8,23,3,7,3,7,3,322,24X32,4521984,5000 huge-gen,generated,11,38,4,10,4,10,4,684,39X40,2.39E+08,10000 pocp-1-gen,generated,10,35,2,50,2,60,2,2310,36X75,1.51E+19,30000 pocp-2-gen,generated,21,95,3,10,3,30,3,3515,96X48,1.49E+08,30000 ================================================ FILE: docs/source/reference/scenarios/generator.rst ================================================ .. _scenario_generator: Scenario Generator =================== .. automodule:: nasim.scenarios.generator :members: ================================================ FILE: docs/source/reference/scenarios/index.rst ================================================ .. _scenario_reference: Scenario Reference ================== Technical reference material for classes and functions used to generate and load Scenarios to use with the NASim Environment. .. toctree:: :maxdepth: 1 benchmark_scenarios generator ================================================ FILE: docs/source/tutorials/creating_scenarios.rst ================================================ .. _`creating_scenarios_tute`: Creating Custom Scenarios ========================= With NASim it is possible to use custom scenarios defined in a valid YAML file. In this tutorial we will cover how to create and run you own custom scenario. .. _'defining_custom_yaml': Defining a custom scenario using YAML ------------------------------------- Before we dive into writing a new custom YAML scenario it is worth having a look at some examples. NASim comes with a number of benchmark YAML scenarios which can be found in the ``nasim/scenarios/benchmark`` directory (or view on github `here `_). For this tutorial we will be using the ``tiny.yaml`` scenario as an example. A custom scenarios in NASim requires definining components: the network and the pen-tester. Defining the network ^^^^^^^^^^^^^^^^^^^^ The network is defined by the following sections: 1. **subnets**: size of each subnet in network 2. **topology**: an adjacency matrix defining which subnets are connected 3. **os**: names of available operating systems on network 4. **services**: names of available services on network 5. **processes**: names of available processes on network 6. **hosts**: a dictionary of hosts on the network and their configurations 7. **firewall**: definition of the subnet firewalls Subnets """"""" This property defines the number of subnets on the network and the size of each. It is simply defined as an ordered list of integers. The address of the first subnet in the list is *1*, the second subnet is *2*, and so on. The address of *0* is reserved for the "internet" subnet (see topology section below). For example, the ``tiny`` network contains 3 subnets all of size 1: .. code-block:: yaml subnets: [1, 1, 1] # or alternatively subnets: - 1 - 1 - 1 Topology """""""" The topology is defined by an adjacency matrix with a row and column for every subnet in the network along with an additional row and column designating the "internet" subnet, i.e. connection to outside of the network. The first row and column is reserved for the "internet" subnet. A connection between subnets is indicated with a ``1`` while not connection is indicated with a ``0``. Note that we assume that connections are symmetric and that a subnet is connected with itself. For the ``tiny`` network, subnet *1* is a public subnet so is connected to the internet, indicated by a ``1`` in row 1, column 2 and row 2, column 1. Subnet *1* is also connected with subnets *2* and *3*, indicated by ``1`` in relevant cells, meanwhile subnets *2* and *3* are private and not connected directly to the internet, indicated by the ``0`` values. .. code-block:: yaml topology: [[ 1, 1, 0, 0], [ 1, 1, 1, 1], [ 0, 1, 1, 1], [ 0, 1, 1, 1]] OS, services, processes """"""""""""""""""""""" Similar to how we defined the subnet list, the **os**, **services** and **processes** are defined by a simple list. The names of any of the items in each list can be anything, but note that they will be used for validating the host configurations, exploits, etc, so just need to match-up with those values as desired. Continuing our example, the ``tiny`` scenario includes one OS: *linux*, one service: *ssh*, and one process: *tomcat*: .. code-block:: yaml os: - linux services: - ssh processes: - tomcat Host Configurations """"""""""""""""""" The host configuration section is a mapping from host address to their configuration, where the address is a ``(subnet number, host number)`` tuple and the configuration must include the hosts OS, services running, processes running, and optional host firewall settings. There are a few things to note when defining a host: 1. The number of hosts defined for each subnet needs to match the size of each subnet 2. Host addresses within a subnet must start from ``0`` and count up from there (i.e. three hosts in subnet *1* would have addresses ``(1, 0)``, ``(1, 1)``, and ``(1, 2)``) 3. The names of any OS, service, and process must match values provided in the **os**, **services** and **processes** sections of the YAML file. 4. Each host must have an OS and at least one service running. It is okay for hosts to have no processes running (which can be indicated using an empty list ``[]``). **Host firewalls** are defined as a mapping from host address to the list of services to deny from that host. Host addresses must be a valid address of a host in the network and any services must also match services defined in the services section. Finally, if a host address is not part of the firewall then it is assumed all traffic is allowed from that host, at the host level (it may still be blocked by subnet firewall). **Host Value** is the optional value the agent will recieve when compromising the host. Unlike for the *sensitive_hosts* section this value can be negative as well as zero and positive. This makes it possible to set additional host specific rewards or penalties, for example setting a negative reward for a 'honeypot' host on the network. A couple of things to note: 1. Host value is optional and will default to 0. 2. For any *sensitive hosts* the value must either not be specified or it must match the value specified in the *sensitive_hosts* section of the file. 3. Same as for *sensitive hosts*, agent will only recieve the value as a reward when they compromise the host. Here is the example host configurations section for the ``tiny`` scenario, where a host firewall and is defined only for host ``(1, 0)`` and the host ``(1, 0)`` has a value of ``0`` (noting we could leave value unspecified in this case for the same result, we include it here as an example): .. code-block:: yaml host_configurations: (1, 0): os: linux services: [ssh] processes: [tomcat] # which services to deny between individual hosts firewall: (3, 0): [ssh] value: 0 (2, 0): os: linux services: [ssh] processes: [tomcat] firewall: (1, 0): [ssh] (3, 0): os: linux services: [ssh] processes: [tomcat] Firewall """""""" The final section for defining the network is the firewall, which is defined as a mapping from ``(subnet number, subnet number)`` tuples to list of services to allow. Some things to note about defining firewalls: 1. A firewall rule can only be defined between subnets that are connected in the topology adjacency matrix. 2. Each rule defines which services are allowed in a single direction, from the first subnet in the tuple to the second subnet in the tuple (i.e. (source subnet, destination subnet)) 3. An empty list means all traffic will be blocked from source to destination Here is the firewall definition for the ``tiny`` scenario where SSH traffic is allowed between all subnets, except from subnet 1 to 0 and from 1 to 2. .. code-block:: yaml # two rows for each connection between subnets as defined by topology # one for each direction of connection # lists which services to allow firewall: (0, 1): [ssh] (1, 0): [] (1, 2): [] (2, 1): [ssh] (1, 3): [ssh] (3, 1): [ssh] (2, 3): [ssh] (3, 2): [ssh] And with that we have covered everything needed to define the scenario's network. Next up is defining the pen-tester. Defining the pen-tester ^^^^^^^^^^^^^^^^^^^^^^^ The pen-tester is defined by these sections: 1. **sensitive_hosts**: a dictionary containing the address of sensitive/target hosts and their value 2. **exploits**: a dictionary of exploits 3. **privilege_escalation**: a dictionary of privilege escalation actions 4. **os_scan_cost**: cost of using OS scan 5. **service_scan_cost**: cost of using service scan 6. **process_scan_cost**: cost of using process scan 7. **subnet_scan_cost**: cost of using subnet scan 8. **step_limit**: the maximum number of actions pen-tester can perform in a single episode Sensitive hosts """"""""""""""" This section specifies the addresses and values of the target hosts in the network. When the pen-tester gains root access on these hosts they will recieve the specified value as a reward. The *sensitive_hosts* section is a dictionary where the entries are address, value pairs. Where the address is a ``(subnet number, host number)`` tuple and the value is a non-negative float or integer. In the ``tiny`` scenario the pen-tester is aiming to get root access on the hosts ``(2, 0)`` and ``(3, 0)``, both of which have a value of 100: .. code-block:: yaml sensitive_hosts: (2, 0): 100 (3, 0): 100 Exploits """""""" The exploits section is a dictionary which maps exploit names to exploit definitions. Every scenario requires at least on exploit. An exploit definition is a dictionary which must include the following entries: 1. **service**: the name of the service the exploit targets. - Note, the value must match the name of a service defined in the **services** section of the network definition. 2. **os**: the name of the operating system the exploit targets or ``none`` if the exploit works on all OSs. - If the value is not ``none`` it must match the name of an OS defined in the **os** section of the network definition 3. **prob**: the probability that the exploit succeeds given all preconditions are met (i.e. target host is discovered and reachable, and the host is running targete service and OS) 4. **cost**: the cost of performing the action. This should be a non-negative int or float and can represent the cost of the action in any sense desired (financial, time, traffic generated, etc) 5. **access**: the resulting access the pen-tester will get on the target host if the exploit succeeds. This can be either *user* or *root*. The name of the exploits can be anything you desire, so long as they are immutable and hashable (i.e. strings, ints, tuples) and unique. The ``tiny`` example scenario has only a single exploit ``e_ssh`` which targets the SSH service running on linux hosts, has a cost of 1 and results in user level access: .. code-block:: yaml exploits: e_ssh: service: ssh os: linux prob: 0.8 cost: 1 access: user Privilege Escalation """"""""""""""""""""" Similar to the exploits section, the privilege escalation section is a dictionary which maps privilege escalation action names to their definitions. A privilege escalation action definition is a dictionary which must include the following entries: 1. **process**: the name of the process the action targets. - The value must match the name of a process defined in the **processes** section of the network definition. 2. **os**: the name of the operating system the action targets or ``none`` if the exploit works on all OSs. - If the value is not ``none`` it must match the name of an OS defined in the **os** section of the network definition. 3. **prob**: the probability that the action succeeds given all preconditions are met (i.e. pen-tester has access to target host, and the host is running target process and OS) 4. **cost**: the cost of performing the action. This should be a non-negative int or float and can represent the cost of the action in any sense desired (financial, time, traffic generated, etc) 5. **access**: the resulting access the pen-tester will get on the target host if the action succeeds. This can be either *user* or *root*. Similar to exploits, the name of each privilege exploit action can be anything you desire, so long as they are immutable and hashable (i.e. strings, ints, tuples) and unique. .. note:: It is not required that a scenario has any privilege escalation actions defined. In this case define the privilege escalation section to be empty: ``privilege_escalation: {}``. Note however that you will need to make sure that it is possible to get root access on the sensitive hosts via using only exploits, otherwise the pen-tester will never be able to reach the goal. The ``tiny`` example scenario has a single privilege escalation action ``pe_tomcat`` which targets the tomcat process running on linux hosts, has a cost of 1 and results in root level access: .. code-block:: yaml privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root Scan costs """""""""" Each scan must have an associated non-negative cost associated with it. This cost can represent whatever you wish and will be factored in to the reward the agent recieves each time a scan is performed. Scan costs are easy to define, requiring only a non-negative float or integer value. You must specify the cost of all scans. Here, in the example ``tiny`` scenario, we define a cost of 1 for all scans: .. code-block:: yaml service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 Step limit """""""""" The step limit defines the maximum number of steps (i.e. actions) the pen-tester has to reach the goal within a single episode. During simulation once the step limit is reached the episode is considered done, with the agent having failed to reach the goal. Defining the step limit is easy since it requires only a positive integer value. For example, here we define a step limit of 1000 for the ``tiny`` scenario: .. code-block:: yaml step_limit: 1000 With that we have everything we need to define a custom scenario. Running the scenario is even easier! .. _'running_custom_yaml': Running a custom YAML scenario ------------------------------ To create a ``NASimEnv`` from a custom YAML scenario file we use the ``nasim.load()`` function: .. code-block:: python import nasim env = nasim.load('path/to/custom/scenario.yaml`) The load function also takes some additional parameters to control the observation mode and observation and action spaces for the environment, see :ref:`nasim_init` for reference and :ref:`env_params` for explanation. If there are any issues with the format of your file you should recieve some, hopefully, helpful error messages when attempting to load it. Once the environment is loaded successfully you can interact with it as per normal (see :ref:`env_tute` for more details). ================================================ FILE: docs/source/tutorials/environment.rst ================================================ .. _`env_tute`: Interacting with NASim Environment ================================== Assuming you are comfortable loading an environment from a scenario (see :ref:`loading_tute` or :ref:`gym_load_tute`), then interacting with a NASim Environment is very easy and follows the same interface as `gymnasium `_. Starting the environment ------------------------ First thing is simply loading the environment:: import nasim # load my environment in the desired way (make_benchmark, load, generate) env = nasim.make_benchmark("tiny") # or using gym import gymnasium as gym env = gym.make("nasim:Tiny-PO-v0") Here we are using the default environment parameters: ``fully_obs=False``, ``flat_actions=True``, and ``flat_obs=True``. The number of actions can be retrieved from the environment ``action_space`` attribute as follows:: # When flat_actions=True num_actions = env.action_space.n # When flat_actions=False nvec_actions = env.action_space.nvec The shape of the observations can be retrieved from the environment ``observation_space`` attribute as follows:: obs_shape = env.observation_space.shape Getting the initial observation and resetting the environment ------------------------------------------------------------- To reset the environment and get the initial observation, use the ``reset()`` function:: o, info = env.reset() The ``info`` return value contains optional auxiliary information. Performing a single step ------------------------ A step in the environment can be taken using the ``step(action)`` function. Here ``action`` can take a few different forms depending on if using ``flat_actions=True`` or ``flat_actions=False``, for our example we can simply pass an integer with 0 <= action < N, which specifies the index of the action in the action space. The ``step`` function then returns a ``(Observation, float, bool, bool, dict)`` tuple corresponding to observation, reward, done, step limit reached, auxiliary info, respectively:: action = # integer in range [0, env.action_space.n] o, r, done, step_limit_reached, info = env.step(action) if ``done=True`` then the goal has been reached, and the episode is over. Alternatively, if the current scenario has a step limit and ``step_limit_reached=True`` then, well, the step limit has been reached. Following both cases, it is then recommended to stop or reset the environment, otherwise theres no gaurantee of what will happen (especially the first case). Visualizing the environment --------------------------- You can use the ``render()`` function to get a human readable visualization of the state of the environment. To use render correctly make sure to pass ``render_mode="human"`` to the environment initialization function:: import nasim # load my environment in the desired way (make_benchmark, load, generate) env = nasim.make_benchmark("tiny", render_mode="human") # or using gym import gymnasium as gym env = gym.make("nasim:Tiny-PO-v0", render_mode="human") env.reset() # render the environment # (if render_mode="human" is not passed during initialization this will do nothing) env.render() An example agent ---------------- Some example agents are provided in the ``nasim/agents`` directory. Here is a quick example of a hypothetical agent interacting with the environment:: import nasim env = nasim.make_benchmark("tiny") agent = AnAgent(...) o, info = env.reset() total_reward = 0 done = False step_limit_reached = False while not done and not step_limit_reached: a = agent.choose_action(o) o, r, done, step_limit_reached, info = env.step(a) total_reward += r print("Done") print("Total reward =", total_reward) It's as simple as that. ================================================ FILE: docs/source/tutorials/gym_load.rst ================================================ .. _`gym_load_tute`: Starting NASim using OpenAI gym =============================== On startup NASim also registers each benchmark scenario as an `Gymnasium `_ environment, allowing NASim benchmark environments to be loaded using ``gymnasium.make()``. :ref:`all_benchmark_scenarios` can be loaded using ``gymnasium.make()``. .. note:: Custom scenarios must be loaded using the nasim library directly, see :ref:`loading_tute`. Environment Naming ------------------ Unlike when starting an environment using the ``nasim`` library directly, where environment modes are specified as arguments to the ``nasim.make_benchmark()`` function, when using ``gymnasium.make()`` the scenario and mode are specified in a single name. When using ``gymnasium.make()`` each environment has the following mode and naming convention: ``ScenarioName[PO][2D][VA]-vX`` Where: - ``ScenarioName`` is the name of the benchmark scenario in Camel Casing - ``[PO]`` is optional and specifies the environment is in partially observable mode, if it is not included the environment is in fully observable mode. - ``[2D]`` is optional and specifies the environment is to return 2D observations, if it is not included the environment returns 1D observations. - ``[VA]`` is optional and specifies the environment is to accept Vector actions (parametrised actions), if it is not included the environment expects integer (flat) actions. - ``vX`` is the environment version. Currently (as of version ``0.10.0``) all environments are on ``v0`` For example, the 'tiny' benchmark scenario in partially observable mode with flat action-space and flat observation space has the name: ``TinyPO-v0`` Or the 'small-gen' benchmark scenario in fully observable mode with parametrised action-space and flat observation-space has the name: ``SmallGenVA-v0`` Or the 'medium-single-site' benchmark scenario in partially observable mode with parametrised action-space and 2D observation-space has the name: ``MediumSingleSitePO2DVA-v0`` .. note:: See :ref:`env_params` for more explanation on the different modes. Usage ----- Now we understand the naming of environments, making a new environment using ``gym.make()`` is easy. For example to create a new ``TinyPO-v0`` environment: .. code:: python import gymnasium as gym env = gym.make("nasim:TinyPO-v0") # to specify render mode env = gym.make("nasim:TinyPO-v0", render_mode="human") ================================================ FILE: docs/source/tutorials/index.rst ================================================ .. _tutorials: Tutorials ========= .. toctree:: :maxdepth: 1 installation loading gym_load environment scenarios creating_scenarios ================================================ FILE: docs/source/tutorials/installation.rst ================================================ .. _installation: Installation ============== Dependencies -------------- This framework is tested to work under Python 3.7 or later. The required dependencies: * Python >= 3.7 * Gym >= 0.17 * NumPy >= 1.18 * PyYaml >= 5.3 For rendering: * NetworkX >= 2.4 * prettytable >= 0.7.2 * Matplotlib >= 3.1.3 We recommend to use the bleeding-edge version and to install it by following the :ref:`dev-install`. If you want a simpler installation procedure and do not intend to modify yourself the learning algorithms etc., you can look at the :ref:`user-install`. .. _user-install: User install instructions -------------------------- NASIm is available on PyPi for and can be installed with ``pip`` with the following command: .. code-block:: bash pip install nasim This will install the base level, which includes all dependencies needed to use NASim. You can also install the dependencies for building the docs, running tests, and running the DQN example agent seperately or all together, as follows: .. code-block:: bash # install dependencies for building docs pip install nasim[docs] # install dependencies for running tests pip install nasim[test] # install dependencies for running dqn_agent pip install nasim[dqn] # install all dependencies pip install nasim[all] .. _dev-install: Developer install instructions ------------------------------- As a developer, you can set you up with the bleeding-edge version of NASim with: .. code-block:: bash git clone -b master https://github.com/Jjschwartz/NetworkAttackSimulator.git You can install the framework as a package along with all dependencies with (you can remove the '[all]' if you just want base level install): .. code-block:: bash pip install -e .[all] ================================================ FILE: docs/source/tutorials/loading.rst ================================================ .. _`loading_tute`: Starting a NASim Environment ============================ Interaction with NASim is done primarily via the :class:`~nasim.envs.environment.NASimEnv` class, which handles a simulated network environment as defined by the chosen scenario. There are two ways to start a new environment: (i) via the nasim library directly, or (ii) using the `gym.make()` function of the gymnasium library. In this tutorial we will be covering the first method. For the second method check out :ref:`gym_load_tute`. .. _`env_params`: Environment Settings -------------------- For initialization the NASimEnv class takes a scenario definition and three optional arguments. The scenario defines the network properties and the pen-tester specific information (e.g. exploits available, etc). For this tutorial we are going to stick to how to start a new environment, details on scenarios is covered in :ref:`scenarios_tute`. The three optional arguments control the environment modes: - ``fully_obs`` : The observability mode of environment, if True then uses fully observable mode, otherwise is partially observable (default=False) - ``flat_actions`` : If true then uses a flat action space, otherwise will uses a parameterised action space (default=True). - ``flat_obs`` : If true then uses a 1D observation space, otherwise uses a 2D observation space (default=True) If using fully observable mode (``fully_obs=True``) then the entire state of the network and the attack is observed after each step. This is 'easy' mode and does not reflect the reality of pen-testing, but it is useful for getting started and sanity checking algorithms and environments. When using partially observable mode (``fully_obs=False``) the agent starts with no knowledge of the location, configuration and value of every host on the network and recieves only observations of features of the directly related to the action performed at each step. This is 'hard' mode and reflects the reality of pen-testing more accurately. Whether the environment is fully or partially observable has no effect on the size and shape of the action and observation spaces or how the agent interacts with the environment. It will have significant implications for the algorithms used to solve the environment, but that is beyond the scope of this tutorial. Using ``flat_actions=True`` means our action space is made up of N discrete actions, where N is based on the number of hosts in the network and the number of exploits and scans available. For our example there are 3 hosts, 1 exploit and 3 scans (OS, Service, and Subnet), for a total of 3 * (1 + 3) = 12 actions. If ``flat_actions=False`` then each action is a vector with each element of the vector specifying a parameter of the action. For more info see :ref:`actions`. Using ``flat_obs=True`` means the observations returned will be a 1D vector. Otherwise if ``flat_obs=False`` observations will be a 2D matrix. For explanation of the features of this vector see :ref:`observation`. .. _`loading_env`: Loading an Environment from a Scenario -------------------------------------- NASim Environments can be constructed from scenarios in three ways: making an existing scenario, loading from a .yaml file, and generating from parameters. .. note:: Each of the methods described below also accept `fully_obs`, `flat_actions` and `flat_obs` boolen arguments. .. _`make_existing`: Making an existing scenario ^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is the easiest method for loading a new environment and closely matches the `OpenAI gym `_ way of doing things. Loading an existing scenario is as easy as:: import nasim env = nasim.make_benchmark("tiny") And you are done. You can also pass in a a random seed using the `seed` argument, which will have an effect when using a generated scenario. .. note:: This method only works with the benchmark scenarios that come with NASim (for the full list see the :ref:`benchmark_scenarios`). Loading a scenario from a YAML file ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you wish to load an existing or custom scenario defined in a YAML file, this is also very straight forward:: import nasim env = nasim.load("path/to/scenario.yaml") And once again, you are done (given your file is in a valid format)! Generating a scenario ^^^^^^^^^^^^^^^^^^^^^ The final method for loading a new environment is to generate it using the NASim scenario generator. There are quite a number of parameters that can be used to control the what scenario is generated (for a full list see the :ref:`scenario_generator` class), but the two key parameters are the number of hosts in the network and the number of services running (which also controls number of exploits, unless otherwise specified). To generate a new environment with 5 hosts running a possible 3 services:: import nasim env = nasim.generate(5, 3) And your done! If you want to pass in some other parameters (say the number of possible operating systems) these can be passed in as keyword arguments:: env = nasim.generate(5, 3, num_os=3) Once again, for a full list of available parameters refer to the :ref:`scenario_generator` documentation. ================================================ FILE: docs/source/tutorials/scenarios.rst ================================================ .. _`scenarios_tute`: Understanding Scenarios ======================= A scenario in NASim defines all the necessary properties for creating a network environment. Each scenario definition can be broken down into two components: the network configuration and the pen-tester. Network Configuration --------------------- The network configuration is defined by a the following properties: - *subnets*: the number and size of the subnets in the network. - *topology*: how the different subnets in the network are connected - *host configurations*: the address, OS, services, processes, and firewalls for each host in the network - *firewall*: which communication is prevented between subnets *Note*, for the host configurations we are, in general, only interested in services and processes that the pen-tester has exploits for, so we will typically ignore any non-vulnerable services and processes in order to reduce the problem size. Pen-Tester ---------- The pen-tester is defined by: - *exploits*: the set of exploits available to the pen-tester - *privescs*: the set of priviledge escalation actions available to the pen-tester - *scan costs*: the cost of performing each type of scan (service, OS, process, and subnet) - *sensitive hosts*: the target hosts on the network and their value Example Scenario ---------------- To illustrate these properties here we show an example scenario, where the aim of the pen-tester is to gain root access to the server in the sensitive subnet and one of the hosts in the user subnet. The figure below shows the the layout of our example network. .. image:: example_network.png :width: 700 From the figure we can see that this network has the following properties: - *subnets*: three subnets: DMZ with a single server, Sensitive with a single server and User with three user machines. - *topology*: Only the DMZ is connected to the internet, while all subnets in network are interconnected. - *host configurations*: The address, OS, services, and processes running on each host are shown next to each host (e.g. the server in the DMZ subnet has address (1, 0), has a linux OS, is running http and ssh services, and the tomcat process). The host firewall settings are show in the table in the top-right of the figure. Here only host *(1, 0)* has a firewall configured which blocks any SSH connections from hosts *(3, 0)* and *(3, 1)*. - *firewall*: The arrows above and below the firwalls indicate which services can be communicated with in each direction between subnets and between the DMZ subnet and the internet (e.g. the internet can communicate with http services running on hosts in the DMZ, while the firewall blocks no communication from the DMZ to the internet). Next we need to define our pen-tester, which we specify based on the scenario we wish to simulate. - *exploits*: for this scenario the pen-tester has access to three exploits 1. *ssh_exploit*: which exploits the ssh service running on windows machine, has a cost of 2, a success probability of 0.6, and results in user level access if successful. 2. *ftp_exploit*: which exploits the ftp service running on a linux machine, has a cost of 1, a sucess probability of 0.9, and results in root level access if successful. 3. *http_exploit*: which exploits the http service running on any OS, has a cost of 3, a success probability of 1.0, and results in user level access if successful. - *privescs*: for this scenario the pen-tester has access to two priviledge escalation actions 1. *pe_tomcat*: exploits the tomcat process running on a linux machine to gain root access. It has a cost of 1 and success probability of 1.0. 2. *pe_daclsvc*: exploits the daclsvc process running on a windows machine to gain root access. It has a cost of 1 and success probability of 1.0. - *scan costs*: here we need to specify the cost of each type of scan 1. *service_scan*: 1 2. *os_scan*: 2 3. *process_scan*: 1 4. *subnet_scan*: 1 - *sensitive hosts*: here we have two target hosts 1. *(2, 0), 1000* : the server running on sensitive subnet, which has a value of 1000. 2. *(3, 2), 1000* : the last host running on user subnet, which has a value of 1000. And with that our scenario is fully defined and we have everything we need to run an attack simulation. ================================================ FILE: nasim/__init__.py ================================================ import gymnasium as gym from gymnasium.envs.registration import register from nasim.envs import NASimEnv from nasim.scenarios.benchmark import AVAIL_BENCHMARKS from nasim.scenarios import \ make_benchmark_scenario, load_scenario, generate_scenario __all__ = ['make_benchmark', 'load', 'generate'] def make_benchmark(scenario_name, seed=None, fully_obs=False, flat_actions=True, flat_obs=True, render_mode=None): """Make a new benchmark NASim environment. Parameters ---------- scenario_name : str the name of the benchmark environment seed : int, optional random seed to use to generate environment (default=None) fully_obs : bool, optional the observability mode of environment, if True then uses fully observable mode, otherwise partially observable (default=False) flat_actions : bool, optional if true then uses a flat action space, otherwise will use parameterised action space (default=True). flat_obs : bool, optional if true then uses a 1D observation space. If False will use a 2D observation space (default=True) render_mode : str, optional The render mode to use for the environment. Returns ------- NASimEnv a new environment instance Raises ------ NotImplementederror if scenario_name does no match any implemented benchmark scenarios. """ env_kwargs = {"fully_obs": fully_obs, "flat_actions": flat_actions, "flat_obs": flat_obs, "render_mode": render_mode} scenario = make_benchmark_scenario(scenario_name, seed) return NASimEnv(scenario, **env_kwargs) def load(path, fully_obs=False, flat_actions=True, flat_obs=True, name=None, render_mode=None): """Load NASim Environment from a .yaml scenario file. Parameters ---------- path : str path to the .yaml scenario file fully_obs : bool, optional The observability mode of environment, if True then uses fully observable mode, otherwise partially observable (default=False) flat_actions : bool, optional if true then uses a flat action space, otherwise will use parameterised action space (default=True). flat_obs : bool, optional if true then uses a 1D observation space. If False will use a 2D observation space (default=True) name : str, optional the scenarios name, if None name will be generated from path (default=None) render_mode : str, optional The render mode to use for the environment. Returns ------- NASimEnv a new environment object """ env_kwargs = {"fully_obs": fully_obs, "flat_actions": flat_actions, "flat_obs": flat_obs, "render_mode": render_mode} scenario = load_scenario(path, name=name) return NASimEnv(scenario, **env_kwargs) def generate(num_hosts, num_services, fully_obs=False, flat_actions=True, flat_obs=True, render_mode=None, **params): """Construct Environment from an auto generated network. Parameters ---------- num_hosts : int number of hosts to include in network (minimum is 3) num_services : int number of services to use in environment (minimum is 1) fully_obs : bool, optional The observability mode of environment, if True then uses fully observable mode, otherwise partially observable (default=False) flat_actions : bool, optional if true then uses a flat action space, otherwise will use parameterised action space (default=True). flat_obs : bool, optional if true then uses a 1D observation space. If False will use a 2D observation space (default=True) render_mode : str, optional The render mode to use for the environment. params : dict, optional generator params (see :class:`ScenarioGenertor` for full list) Returns ------- NASimEnv a new environment object """ env_kwargs = {"fully_obs": fully_obs, "flat_actions": flat_actions, "flat_obs": flat_obs, "render_mode": render_mode} scenario = generate_scenario(num_hosts, num_services, **params) return NASimEnv(scenario, **env_kwargs) def _register(id, entry_point, kwargs, nondeterministic, force=True): """Registers NASim as a Gymnasium Environment. Handles issues with re-registering gym environments. """ if id in gym.envs.registry: if not force: return del gym.envs.registry[id] register( id=id, entry_point=entry_point, kwargs=kwargs, nondeterministic=nondeterministic ) for benchmark in AVAIL_BENCHMARKS: # PO - partially observable # 2D - use 2D Obs # VA - use param actions # tiny should yield Tiny and tiny-small should yield TinySmall for fully_obs in [True, False]: name = ''.join([g.capitalize() for g in benchmark.split("-")]) if not fully_obs: name = f"{name}PO" _register( id=f"{name}-v0", entry_point='nasim.envs:NASimGymEnv', kwargs={ "scenario": benchmark, "fully_obs": fully_obs, "flat_actions": True, "flat_obs": True }, nondeterministic=True ) _register( id=f"{name}2D-v0", entry_point='nasim.envs:NASimGymEnv', kwargs={ "scenario": benchmark, "fully_obs": fully_obs, "flat_actions": True, "flat_obs": False }, nondeterministic=True ) _register( id=f"{name}VA-v0", entry_point='nasim.envs:NASimGymEnv', kwargs={ "scenario": benchmark, "fully_obs": fully_obs, "flat_actions": False, "flat_obs": True }, nondeterministic=True ) _register( id=f"{name}2DVA-v0", entry_point='nasim.envs:NASimGymEnv', kwargs={ "scenario": benchmark, "fully_obs": fully_obs, "flat_actions": False, "flat_obs": False }, nondeterministic=True ) __version__ = "0.12.0" ================================================ FILE: nasim/agents/__init__.py ================================================ ================================================ FILE: nasim/agents/bruteforce_agent.py ================================================ """An bruteforce agent that repeatedly cycles through all available actions in order. To run 'tiny' benchmark scenario with default settings, run the following from the nasim/agents dir: $ python bruteforce_agent.py tiny This will run the agent and display progress and final results to stdout. To see available running arguments: $ python bruteforce_agent.py --help """ from itertools import product import nasim LINE_BREAK = "-"*60 def run_bruteforce_agent(env, step_limit=1e6, verbose=True): """Run bruteforce agent on nasim environment. Parameters ---------- env : nasim.NASimEnv the nasim environment to run agent on step_limit : int, optional the maximum number of steps to run agent for (default=1e6) verbose : bool, optional whether to print out progress messages or not (default=True) Returns ------- int timesteps agent ran for float the total reward recieved by agent bool whether the goal was reached or not """ if verbose: print(LINE_BREAK) print("STARTING EPISODE") print(LINE_BREAK) print("t: Reward") env.reset() total_reward = 0 done = False env_step_limit_reached = False steps = 0 cycle_complete = False if env.flat_actions: act = 0 else: act_iter = product(*[range(n) for n in env.action_space.nvec]) while not done and not env_step_limit_reached and steps < step_limit: if env.flat_actions: act = (act + 1) % env.action_space.n cycle_complete = (steps > 0 and act == 0) else: try: act = next(act_iter) cycle_complete = False except StopIteration: act_iter = product(*[range(n) for n in env.action_space.nvec]) act = next(act_iter) cycle_complete = True _, rew, done, env_step_limit_reached, _ = env.step(act) total_reward += rew if cycle_complete and verbose: print(f"{steps}: {total_reward}") steps += 1 if done and verbose: print(LINE_BREAK) print("EPISODE FINISHED") print(LINE_BREAK) print(f"Goal reached = {env.goal_reached()}") print(f"Total steps = {steps}") print(f"Total reward = {total_reward}") elif verbose: print(LINE_BREAK) print("STEP LIMIT REACHED") print(LINE_BREAK) if done: done = env.goal_reached() return steps, total_reward, done if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("-s", "--seed", type=int, default=0, help="random seed") parser.add_argument("-o", "--partially_obs", action="store_true", help="Partially Observable Mode") parser.add_argument("-p", "--param_actions", action="store_true", help="Use Parameterised action space") parser.add_argument("-f", "--box_obs", action="store_true", help="Use 2D observation space") args = parser.parse_args() nasimenv = nasim.make_benchmark( args.env_name, args.seed, not args.partially_obs, not args.param_actions, not args.box_obs ) if not args.param_actions: print(nasimenv.action_space.n) else: print(nasimenv.action_space.nvec) run_bruteforce_agent(nasimenv) ================================================ FILE: nasim/agents/dqn_agent.py ================================================ """An example DQN Agent. It uses pytorch 1.5+ and tensorboard libraries (HINT: these dependencies can be installed by running pip install nasim[dqn]) To run 'tiny' benchmark scenario with default settings, run the following from the nasim/agents dir: $ python dqn_agent.py tiny To see detailed results using tensorboard: $ tensorboard --logdir runs/ To see available hyperparameters: $ python dqn_agent.py --help Notes ----- This is by no means a state of the art implementation of DQN, but is designed to be an example implementation that can be used as a reference for building your own agents. """ import random from pprint import pprint from gymnasium import error import numpy as np import nasim try: import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F from torch.utils.tensorboard import SummaryWriter except ImportError as e: raise error.DependencyNotInstalled( f"{e}. (HINT: you can install dqn_agent dependencies by running " "'pip install nasim[dqn]'.)" ) class ReplayMemory: def __init__(self, capacity, s_dims, device="cpu"): self.capacity = capacity self.device = device self.s_buf = np.zeros((capacity, *s_dims), dtype=np.float32) self.a_buf = np.zeros((capacity, 1), dtype=np.int64) self.next_s_buf = np.zeros((capacity, *s_dims), dtype=np.float32) self.r_buf = np.zeros(capacity, dtype=np.float32) self.done_buf = np.zeros(capacity, dtype=np.float32) self.ptr, self.size = 0, 0 def store(self, s, a, next_s, r, done): self.s_buf[self.ptr] = s self.a_buf[self.ptr] = a self.next_s_buf[self.ptr] = next_s self.r_buf[self.ptr] = r self.done_buf[self.ptr] = done self.ptr = (self.ptr + 1) % self.capacity self.size = min(self.size+1, self.capacity) def sample_batch(self, batch_size): sample_idxs = np.random.choice(self.size, batch_size) batch = [self.s_buf[sample_idxs], self.a_buf[sample_idxs], self.next_s_buf[sample_idxs], self.r_buf[sample_idxs], self.done_buf[sample_idxs]] return [torch.from_numpy(buf).to(self.device) for buf in batch] class DQN(nn.Module): """A simple Deep Q-Network """ def __init__(self, input_dim, layers, num_actions): super().__init__() self.layers = nn.ModuleList([nn.Linear(input_dim[0], layers[0])]) for l in range(1, len(layers)): self.layers.append(nn.Linear(layers[l-1], layers[l])) self.out = nn.Linear(layers[-1], num_actions) def forward(self, x): for layer in self.layers: x = F.relu(layer(x)) x = self.out(x) return x def save_DQN(self, file_path): torch.save(self.state_dict(), file_path) def load_DQN(self, file_path): self.load_state_dict(torch.load(file_path)) def get_action(self, x): with torch.no_grad(): if len(x.shape) == 1: x = x.view(1, -1) return self.forward(x).max(1)[1] class DQNAgent: """A simple Deep Q-Network Agent """ def __init__(self, env, seed=None, lr=0.001, training_steps=20000, batch_size=32, replay_size=10000, final_epsilon=0.05, exploration_steps=10000, gamma=0.99, hidden_sizes=[64, 64], target_update_freq=1000, verbose=True, **kwargs): # This DQN implementation only works for flat actions assert env.flat_actions self.verbose = verbose if self.verbose: print(f"\nRunning DQN with config:") pprint(locals()) # set seeds self.seed = seed if self.seed is not None: np.random.seed(self.seed) # environment setup self.env = env self.num_actions = self.env.action_space.n self.obs_dim = self.env.observation_space.shape # logger setup self.logger = SummaryWriter() # Training related attributes self.lr = lr self.exploration_steps = exploration_steps self.final_epsilon = final_epsilon self.epsilon_schedule = np.linspace(1.0, self.final_epsilon, self.exploration_steps) self.batch_size = batch_size self.discount = gamma self.training_steps = training_steps self.steps_done = 0 # Neural Network related attributes self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.dqn = DQN(self.obs_dim, hidden_sizes, self.num_actions).to(self.device) if self.verbose: print(f"\nUsing Neural Network running on device={self.device}:") print(self.dqn) self.target_dqn = DQN(self.obs_dim, hidden_sizes, self.num_actions).to(self.device) self.target_update_freq = target_update_freq self.optimizer = optim.Adam(self.dqn.parameters(), lr=self.lr) self.loss_fn = nn.SmoothL1Loss() # replay setup self.replay = ReplayMemory(replay_size, self.obs_dim, self.device) def save(self, save_path): self.dqn.save_DQN(save_path) def load(self, load_path): self.dqn.load_DQN(load_path) def get_epsilon(self): if self.steps_done < self.exploration_steps: return self.epsilon_schedule[self.steps_done] return self.final_epsilon def get_egreedy_action(self, o, epsilon): if random.random() > epsilon: o = torch.from_numpy(o).float().to(self.device) return self.dqn.get_action(o).cpu().item() return random.randint(0, self.num_actions-1) def optimize(self): batch = self.replay.sample_batch(self.batch_size) s_batch, a_batch, next_s_batch, r_batch, d_batch = batch # get q_vals for each state and the action performed in that state q_vals_raw = self.dqn(s_batch) q_vals = q_vals_raw.gather(1, a_batch).squeeze() # get target q val = max val of next state with torch.no_grad(): target_q_val_raw = self.target_dqn(next_s_batch) target_q_val = target_q_val_raw.max(1)[0] target = r_batch + self.discount*(1-d_batch)*target_q_val # calculate loss loss = self.loss_fn(q_vals, target) # optimize the model self.optimizer.zero_grad() loss.backward() self.optimizer.step() if self.steps_done % self.target_update_freq == 0: self.target_dqn.load_state_dict(self.dqn.state_dict()) q_vals_max = q_vals_raw.max(1)[0] mean_v = q_vals_max.mean().item() return loss.item(), mean_v def train(self): if self.verbose: print("\nStarting training") num_episodes = 0 training_steps_remaining = self.training_steps while self.steps_done < self.training_steps: ep_results = self.run_train_episode(training_steps_remaining) ep_return, ep_steps, goal = ep_results num_episodes += 1 training_steps_remaining -= ep_steps self.logger.add_scalar("episode", num_episodes, self.steps_done) self.logger.add_scalar( "epsilon", self.get_epsilon(), self.steps_done ) self.logger.add_scalar( "episode_return", ep_return, self.steps_done ) self.logger.add_scalar( "episode_steps", ep_steps, self.steps_done ) self.logger.add_scalar( "episode_goal_reached", int(goal), self.steps_done ) if num_episodes % 10 == 0 and self.verbose: print(f"\nEpisode {num_episodes}:") print(f"\tsteps done = {self.steps_done} / " f"{self.training_steps}") print(f"\treturn = {ep_return}") print(f"\tgoal = {goal}") self.logger.close() if self.verbose: print("Training complete") print(f"\nEpisode {num_episodes}:") print(f"\tsteps done = {self.steps_done} / {self.training_steps}") print(f"\treturn = {ep_return}") print(f"\tgoal = {goal}") def run_train_episode(self, step_limit): o, _ = self.env.reset() done = False env_step_limit_reached = False steps = 0 episode_return = 0 while not done and not env_step_limit_reached and steps < step_limit: a = self.get_egreedy_action(o, self.get_epsilon()) next_o, r, done, env_step_limit_reached, _ = self.env.step(a) self.replay.store(o, a, next_o, r, done) self.steps_done += 1 loss, mean_v = self.optimize() self.logger.add_scalar("loss", loss, self.steps_done) self.logger.add_scalar("mean_v", mean_v, self.steps_done) o = next_o episode_return += r steps += 1 return episode_return, steps, self.env.goal_reached() def run_eval_episode(self, env=None, render=False, eval_epsilon=0.05, render_mode="human"): if env is None: env = self.env original_render_mode = env.render_mode env.render_mode = render_mode o, _ = env.reset() done = False env_step_limit_reached = False steps = 0 episode_return = 0 line_break = "="*60 if render: print("\n" + line_break) print(f"Running EVALUATION using epsilon = {eval_epsilon:.4f}") print(line_break) env.render() input("Initial state. Press enter to continue..") while not done and not env_step_limit_reached: a = self.get_egreedy_action(o, eval_epsilon) next_o, r, done, env_step_limit_reached, _ = env.step(a) o = next_o episode_return += r steps += 1 if render: print("\n" + line_break) print(f"Step {steps}") print(line_break) print(f"Action Performed = {env.action_space.get_action(a)}") env.render() print(f"Reward = {r}") print(f"Done = {done}") print(f"Step limit reached = {env_step_limit_reached}") input("Press enter to continue..") if done or env_step_limit_reached: print("\n" + line_break) print("EPISODE FINISHED") print(line_break) print(f"Goal reached = {env.goal_reached()}") print(f"Total steps = {steps}") print(f"Total reward = {episode_return}") env.render_mode = original_render_mode return episode_return, steps, env.goal_reached() if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("--render_eval", action="store_true", help="Renders final policy") parser.add_argument("-o", "--partially_obs", action="store_true", help="Partially Observable Mode") parser.add_argument("--hidden_sizes", type=int, nargs="*", default=[64, 64], help="(default=[64. 64])") parser.add_argument("--lr", type=float, default=0.001, help="Learning rate (default=0.001)") parser.add_argument("-t", "--training_steps", type=int, default=20000, help="training steps (default=20000)") parser.add_argument("--batch_size", type=int, default=32, help="(default=32)") parser.add_argument("--target_update_freq", type=int, default=1000, help="(default=1000)") parser.add_argument("--seed", type=int, default=0, help="(default=0)") parser.add_argument("--replay_size", type=int, default=100000, help="(default=100000)") parser.add_argument("--final_epsilon", type=float, default=0.05, help="(default=0.05)") parser.add_argument("--init_epsilon", type=float, default=1.0, help="(default=1.0)") parser.add_argument("--exploration_steps", type=int, default=10000, help="(default=10000)") parser.add_argument("--gamma", type=float, default=0.99, help="(default=0.99)") parser.add_argument("--quite", action="store_false", help="Run in Quite mode") args = parser.parse_args() env = nasim.make_benchmark(args.env_name, args.seed, fully_obs=not args.partially_obs, flat_actions=True, flat_obs=True) dqn_agent = DQNAgent(env, verbose=args.quite, **vars(args)) dqn_agent.train() dqn_agent.run_eval_episode(render=args.render_eval) ================================================ FILE: nasim/agents/keyboard_agent.py ================================================ """An agent that lets the user interact with NASim using the keyboard. To run 'tiny' benchmark scenario with default settings, run the following from the nasim/agents dir: $ python keyboard_agent.py tiny This will run the agent and display the game in stdout. To see available running arguments: $ python keyboard_agent.py--help """ import nasim from nasim.envs.action import Exploit, PrivilegeEscalation LINE_BREAK = "-"*60 LINE_BREAK2 = "="*60 def print_actions(action_space): for a in range(action_space.n): print(f"{a} {action_space.get_action(a)}") print(LINE_BREAK) def choose_flat_action(env): print_actions(env.action_space) while True: try: idx = int(input("Choose action number: ")) action = env.action_space.get_action(idx) print(f"Performing: {action}") return action except Exception: print("Invalid choice. Try again.") def display_actions(actions): action_names = list(actions) for i, name in enumerate(action_names): a_def = actions[name] output = [f"{i} {name}:"] output.extend([f"{k}={v}" for k, v in a_def.items()]) print(" ".join(output)) def choose_item(items): while True: try: idx = int(input("Choose number: ")) return items[idx] except Exception: print("Invalid choice. Try again.") def choose_param_action(env): print("1. Choose Action Type:") print("----------------------") for i, atype in enumerate(env.action_space.action_types): print(f"{i} {atype.__name__}") while True: try: atype_idx = int(input("Choose index: ")) # check idx valid atype = env.action_space.action_types[atype_idx] break except Exception: print("Invalid choice. Try again.") print("------------------------") print("2. Choose Target Subnet:") print("------------------------") num_subnets = env.action_space.nvec[1] while True: try: subnet = int(input(f"Choose subnet in [1, {num_subnets}]: ")) if subnet < 1 or subnet > num_subnets: raise ValueError() break except Exception: print("Invalid choice. Try again.") print("----------------------") print("3. Choose Target Host:") print("----------------------") num_hosts = env.scenario.subnets[subnet] while True: try: host = int(input(f"Choose host in [0, {num_hosts-1}]: ")) if host < 0 or host > num_hosts-1: raise ValueError() break except Exception: print("Invalid choice. Try again.") # subnet-1, since action_space handles exclusion of internet subnet avec = [atype_idx, subnet-1, host, 0, 0] if atype not in (Exploit, PrivilegeEscalation): action = env.action_space.get_action(avec) print("----------------") print(f"ACTION SELECTED: {action}") return action target = (subnet, host) if atype == Exploit: print("------------------") print("4. Choose Exploit:") print("------------------") exploits = env.scenario.exploits display_actions(exploits) e_name = choose_item(list(exploits)) action = Exploit(name=e_name, target=target, **exploits[e_name]) else: print("------------------") print("4. Choose Privilege Escalation:") print("------------------") privescs = env.scenario.privescs display_actions(privescs) pe_name = choose_item(list(privescs)) action = PrivilegeEscalation( name=pe_name, target=target, **privescs[pe_name] ) print("----------------") print(f"ACTION SELECTED: {action}") return action def choose_action(env): input("Press enter to choose next action..") print("\n" + LINE_BREAK2) print("CHOOSE ACTION") print(LINE_BREAK2) if env.flat_actions: return choose_flat_action(env) return choose_param_action(env) def run_keyboard_agent(env): """Run Keyboard agent Parameters ---------- env : NASimEnv the environment Returns ------- int final return int steps taken bool whether goal reached or not """ print(LINE_BREAK2) print("STARTING EPISODE") print(LINE_BREAK2) o, _ = env.reset() env.render() total_reward = 0 total_steps = 0 done = False step_limit_reached = False while not done and not step_limit_reached: a = choose_action(env) o, r, done, step_limit_reached, _ = env.step(a) total_reward += r total_steps += 1 print("\n" + LINE_BREAK2) print("OBSERVATION RECIEVED") print(LINE_BREAK2) env.render() print(f"Reward={r}") print(f"Done={done}") print(f"Step limit reached={step_limit_reached}") print(LINE_BREAK) return total_reward, total_steps, done def run_generative_keyboard_agent(env, render_mode="human"): """Run Keyboard agent in generative mode. The experience is the same as the normal mode, this is mainly useful for testing. Parameters ---------- env : NASimEnv the environment render_mode : str, optional display mode for environment (default="human") Returns ------- int final return int steps taken bool whether goal reached or not """ print(LINE_BREAK2) print("STARTING EPISODE") print(LINE_BREAK2) o, _ = env.reset() s = env.current_state env.render_state(render_mode, s) env.render_obs(render_mode, o) total_reward = 0 total_steps = 0 done = False while not done: a = choose_action(env) ns, o, r, done, _ = env.generative_step(s, a) total_reward += r total_steps += 1 print(LINE_BREAK2) print("NEXT STATE") print(LINE_BREAK2) env.render_state(render_mode, ns) print("\n" + LINE_BREAK2) print("OBSERVATION RECIEVED") print(LINE_BREAK2) env.render_obs(render_mode, o) print(f"Reward={r}") print(f"Done={done}") print(LINE_BREAK) s = ns if done: done = env.goal_reached() return total_reward, total_steps, done if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("-s", "--seed", type=int, default=None, help="random seed (default=None)") parser.add_argument("-o", "--partially_obs", action="store_true", help="Partially Observable Mode") parser.add_argument("-p", "--param_actions", action="store_true", help="Use Parameterised action space") parser.add_argument("-g", "--use_generative", action="store_true", help=("Generative environment mode. This makes no" " difference for the player, but is useful" " for testing.")) args = parser.parse_args() env = nasim.make_benchmark(args.env_name, args.seed, fully_obs=not args.partially_obs, flat_actions=not args.param_actions, flat_obs=True, render_mode="human") if args.use_generative: total_reward, steps, goal = run_generative_keyboard_agent(env, render_mode="human") else: total_reward, steps, goal = run_keyboard_agent(env) print(LINE_BREAK2) print("EPISODE FINISHED") print(LINE_BREAK) print(f"Goal reached = {goal}") print(f"Total reward = {total_reward}") print(f"Steps taken = {steps}") ================================================ FILE: nasim/agents/ql_agent.py ================================================ """An example Tabular, epsilon greedy Q-Learning Agent. This agent does not use an Experience replay (see the 'ql_replay_agent.py') It uses pytorch 1.5+ tensorboard library for logging (HINT: these dependencies can be installed by running pip install nasim[dqn]) To run 'tiny' benchmark scenario with default settings, run the following from the nasim/agents dir: $ python ql_agent.py tiny To see detailed results using tensorboard: $ tensorboard --logdir runs/ To see available hyperparameters: $ python ql_agent.py --help Notes ----- This is by no means a state of the art implementation of Tabular Q-Learning. It is designed to be an example implementation that can be used as a reference for building your own agents and for simple experimental comparisons. """ import random import numpy as np from pprint import pprint import nasim try: from torch.utils.tensorboard import SummaryWriter except ImportError as e: from gymnasium import error raise error.DependencyNotInstalled( f"{e}. (HINT: you can install tabular_q_learning_agent dependencies " "by running 'pip install nasim[dqn]'.)" ) class TabularQFunction: """Tabular Q-Function """ def __init__(self, num_actions): self.q_func = dict() self.num_actions = num_actions def __call__(self, x): return self.forward(x) def forward(self, x): if isinstance(x, np.ndarray): x = str(x.astype(np.int)) if x not in self.q_func: self.q_func[x] = np.zeros(self.num_actions, dtype=np.float32) return self.q_func[x] def forward_batch(self, x_batch): return np.asarray([self.forward(x) for x in x_batch]) def update_batch(self, s_batch, a_batch, delta_batch): for s, a, delta in zip(s_batch, a_batch, delta_batch): q_vals = self.forward(s) q_vals[a] += delta def update(self, s, a, delta): q_vals = self.forward(s) q_vals[a] += delta def get_action(self, x): return int(self.forward(x).argmax()) def display(self): pprint(self.q_func) class TabularQLearningAgent: """A Tabular. epsilon greedy Q-Learning Agent using Experience Replay """ def __init__(self, env, seed=None, lr=0.001, training_steps=10000, final_epsilon=0.05, exploration_steps=10000, gamma=0.99, verbose=True, **kwargs): # This implementation only works for flat actions assert env.flat_actions self.verbose = verbose if self.verbose: print("\nRunning Tabular Q-Learning with config:") pprint(locals()) # set seeds self.seed = seed if self.seed is not None: np.random.seed(self.seed) # envirnment setup self.env = env self.num_actions = self.env.action_space.n self.obs_dim = self.env.observation_space.shape # logger setup self.logger = SummaryWriter() # Training related attributes self.lr = lr self.exploration_steps = exploration_steps self.final_epsilon = final_epsilon self.epsilon_schedule = np.linspace( 1.0, self.final_epsilon, self.exploration_steps ) self.discount = gamma self.training_steps = training_steps self.steps_done = 0 # Q-Function self.qfunc = TabularQFunction(self.num_actions) def get_epsilon(self): if self.steps_done < self.exploration_steps: return self.epsilon_schedule[self.steps_done] return self.final_epsilon def get_egreedy_action(self, o, epsilon): if random.random() > epsilon: return self.qfunc.get_action(o) return random.randint(0, self.num_actions-1) def optimize(self, s, a, next_s, r, done): # get q_val for state and action performed in that state q_vals_raw = self.qfunc.forward(s) q_val = q_vals_raw[a] # get target q val = max val of next state target_q_val = self.qfunc.forward(next_s).max() target = r + self.discount * (1-done) * target_q_val # calculate error and update td_error = target - q_val td_delta = self.lr * td_error # optimize the model self.qfunc.update(s, a, td_delta) s_value = q_vals_raw.max() return td_error, s_value def train(self): if self.verbose: print("\nStarting training") num_episodes = 0 training_steps_remaining = self.training_steps while self.steps_done < self.training_steps: ep_results = self.run_train_episode(training_steps_remaining) ep_return, ep_steps, goal = ep_results num_episodes += 1 training_steps_remaining -= ep_steps self.logger.add_scalar("episode", num_episodes, self.steps_done) self.logger.add_scalar( "epsilon", self.get_epsilon(), self.steps_done ) self.logger.add_scalar( "episode_return", ep_return, self.steps_done ) self.logger.add_scalar( "episode_steps", ep_steps, self.steps_done ) self.logger.add_scalar( "episode_goal_reached", int(goal), self.steps_done ) if num_episodes % 10 == 0 and self.verbose: print(f"\nEpisode {num_episodes}:") print(f"\tsteps done = {self.steps_done} / " f"{self.training_steps}") print(f"\treturn = {ep_return}") print(f"\tgoal = {goal}") self.logger.close() if self.verbose: print("Training complete") print(f"\nEpisode {num_episodes}:") print(f"\tsteps done = {self.steps_done} / {self.training_steps}") print(f"\treturn = {ep_return}") print(f"\tgoal = {goal}") def run_train_episode(self, step_limit): s, _ = self.env.reset() done = False env_step_limit_reached = False steps = 0 episode_return = 0 while not done and not env_step_limit_reached and steps < step_limit: a = self.get_egreedy_action(s, self.get_epsilon()) next_s, r, done, env_step_limit_reached, _ = self.env.step(a) self.steps_done += 1 td_error, s_value = self.optimize(s, a, next_s, r, done) self.logger.add_scalar("td_error", td_error, self.steps_done) self.logger.add_scalar("s_value", s_value, self.steps_done) s = next_s episode_return += r steps += 1 return episode_return, steps, self.env.goal_reached() def run_eval_episode(self, env=None, render=False, eval_epsilon=0.05, render_mode="human"): if env is None: env = self.env original_render_mode = env.render_mode env.render_mode = render_mode s, _ = env.reset() done = False env_step_limit_reached = False steps = 0 episode_return = 0 line_break = "="*60 if render: print("\n" + line_break) print(f"Running EVALUATION using epsilon = {eval_epsilon:.4f}") print(line_break) env.render() input("Initial state. Press enter to continue..") while not done and not env_step_limit_reached: a = self.get_egreedy_action(s, eval_epsilon) next_s, r, done, env_step_limit_reached, _ = env.step(a) s = next_s episode_return += r steps += 1 if render: print("\n" + line_break) print(f"Step {steps}") print(line_break) print(f"Action Performed = {env.action_space.get_action(a)}") env.render() print(f"Reward = {r}") print(f"Done = {done}") print(f"Step limit reached = {env_step_limit_reached}") input("Press enter to continue..") if done or env_step_limit_reached: print("\n" + line_break) print("EPISODE FINISHED") print(line_break) print(f"Goal reached = {env.goal_reached()}") print(f"Total steps = {steps}") print(f"Total reward = {episode_return}") env.render_mode = original_render_mode return episode_return, steps, env.goal_reached() if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("--render_eval", action="store_true", help="Renders final policy") parser.add_argument("--lr", type=float, default=0.001, help="Learning rate (default=0.001)") parser.add_argument("-t", "--training_steps", type=int, default=10000, help="training steps (default=10000)") parser.add_argument("--batch_size", type=int, default=32, help="(default=32)") parser.add_argument("--seed", type=int, default=0, help="(default=0)") parser.add_argument("--replay_size", type=int, default=100000, help="(default=100000)") parser.add_argument("--final_epsilon", type=float, default=0.05, help="(default=0.05)") parser.add_argument("--init_epsilon", type=float, default=1.0, help="(default=1.0)") parser.add_argument("-e", "--exploration_steps", type=int, default=10000, help="(default=10000)") parser.add_argument("--gamma", type=float, default=0.99, help="(default=0.99)") parser.add_argument("--quite", action="store_false", help="Run in Quite mode") args = parser.parse_args() env = nasim.make_benchmark( args.env_name, args.seed, fully_obs=True, flat_actions=True, flat_obs=True ) ql_agent = TabularQLearningAgent( env, verbose=args.quite, **vars(args) ) ql_agent.train() ql_agent.run_eval_episode(render=args.render_eval) ================================================ FILE: nasim/agents/ql_replay_agent.py ================================================ """An example Tabular, epsilon greedy Q-Learning Agent using experience replay. The replay can help improve learning stability and speed (in terms of learning per training step), at the cost of increased memory and computation use. It uses pytorch 1.5+ tensorboard library for logging (HINT: these dependencies can be installed by running pip install nasim[dqn]) To run 'tiny' benchmark scenario with default settings, run the following from the nasim/agents dir: $ python ql_replay_agent.py tiny To see detailed results using tensorboard: $ tensorboard --logdir runs/ To see available hyperparameters: $ python ql_replay_agent.py --help Notes ----- This is by no means a state of the art implementation of Tabular Q-Learning. It is designed to be an example implementation that can be used as a reference for building your own agents and for simple experimental comparisons. """ import random from pprint import pprint import numpy as np import nasim try: from torch.utils.tensorboard import SummaryWriter except ImportError as e: from gymnasium import error raise error.DependencyNotInstalled( f"{e}. (HINT: you can install tabular_q_learning_agent dependencies " "by running 'pip install nasim[dqn]'.)" ) class ReplayMemory: """Experience Replay for Tabular Q-Learning agent """ def __init__(self, capacity, s_dims): self.capacity = capacity self.s_buf = np.zeros((capacity, *s_dims), dtype=np.float32) self.a_buf = np.zeros((capacity, 1), dtype=np.int32) self.next_s_buf = np.zeros((capacity, *s_dims), dtype=np.float32) self.r_buf = np.zeros(capacity, dtype=np.float32) self.done_buf = np.zeros(capacity, dtype=np.float32) self.ptr, self.size = 0, 0 def store(self, s, a, next_s, r, done): self.s_buf[self.ptr] = s self.a_buf[self.ptr] = a self.next_s_buf[self.ptr] = next_s self.r_buf[self.ptr] = r self.done_buf[self.ptr] = done self.ptr = (self.ptr + 1) % self.capacity self.size = min(self.size+1, self.capacity) def sample_batch(self, batch_size): sample_idxs = np.random.choice(self.size, batch_size) batch = [self.s_buf[sample_idxs], self.a_buf[sample_idxs], self.next_s_buf[sample_idxs], self.r_buf[sample_idxs], self.done_buf[sample_idxs]] return batch class TabularQFunction: """Tabular Q-Function """ def __init__(self, num_actions): self.q_func = dict() self.num_actions = num_actions def __call__(self, x): return self.forward(x) def forward(self, x): if isinstance(x, np.ndarray): x = str(x.astype(np.int)) if x not in self.q_func: self.q_func[x] = np.zeros(self.num_actions, dtype=np.float32) return self.q_func[x] def forward_batch(self, x_batch): return np.asarray([self.forward(x) for x in x_batch]) def update(self, s_batch, a_batch, delta_batch): for s, a, delta in zip(s_batch, a_batch, delta_batch): q_vals = self.forward(s) q_vals[a] += delta def get_action(self, x): return int(self.forward(x).argmax()) def display(self): pprint(self.q_func) class TabularQLearningAgent: """A Tabular. epsilon greedy Q-Learning Agent using Experience Replay """ def __init__(self, env, seed=None, lr=0.001, training_steps=10000, batch_size=32, replay_size=10000, final_epsilon=0.05, exploration_steps=10000, gamma=0.99, verbose=True, **kwargs): # This implementation only works for flat actions assert env.flat_actions self.verbose = verbose if self.verbose: print("\nRunning Tabular Q-Learning with config:") pprint(locals()) # set seeds self.seed = seed if self.seed is not None: np.random.seed(self.seed) # envirnment setup self.env = env self.num_actions = self.env.action_space.n self.obs_dim = self.env.observation_space.shape # logger setup self.logger = SummaryWriter() # Training related attributes self.lr = lr self.exploration_steps = exploration_steps self.final_epsilon = final_epsilon self.epsilon_schedule = np.linspace( 1.0, self.final_epsilon, self.exploration_steps ) self.batch_size = batch_size self.discount = gamma self.training_steps = training_steps self.steps_done = 0 # Q-Function self.qfunc = TabularQFunction(self.num_actions) # replay setup self.replay = ReplayMemory(replay_size, self.obs_dim) def get_epsilon(self): if self.steps_done < self.exploration_steps: return self.epsilon_schedule[self.steps_done] return self.final_epsilon def get_egreedy_action(self, o, epsilon): if random.random() > epsilon: return self.qfunc.get_action(o) return random.randint(0, self.num_actions-1) def optimize(self): batch = self.replay.sample_batch(self.batch_size) s_batch, a_batch, next_s_batch, r_batch, d_batch = batch # get q_vals for each state and the action performed in that state q_vals_raw = self.qfunc.forward_batch(s_batch) q_vals = np.take_along_axis(q_vals_raw, a_batch, axis=1).squeeze() # get target q val = max val of next state target_q_val_raw = self.qfunc.forward_batch(next_s_batch) target_q_val = target_q_val_raw.max(axis=1) target = r_batch + self.discount*(1-d_batch)*target_q_val # calculate error and update td_error = target - q_vals td_delta = self.lr * td_error # optimize the model self.qfunc.update(s_batch, a_batch, td_delta) q_vals_max = q_vals_raw.max(axis=1) mean_v = q_vals_max.mean().item() mean_td_error = np.absolute(td_error).mean().item() return mean_td_error, mean_v def train(self): if self.verbose: print("\nStarting training") num_episodes = 0 training_steps_remaining = self.training_steps while self.steps_done < self.training_steps: ep_results = self.run_train_episode(training_steps_remaining) ep_return, ep_steps, goal = ep_results num_episodes += 1 training_steps_remaining -= ep_steps self.logger.add_scalar("episode", num_episodes, self.steps_done) self.logger.add_scalar( "epsilon", self.get_epsilon(), self.steps_done ) self.logger.add_scalar( "episode_return", ep_return, self.steps_done ) self.logger.add_scalar( "episode_steps", ep_steps, self.steps_done ) self.logger.add_scalar( "episode_goal_reached", int(goal), self.steps_done ) if num_episodes % 10 == 0 and self.verbose: print(f"\nEpisode {num_episodes}:") print(f"\tsteps done = {self.steps_done} / " f"{self.training_steps}") print(f"\treturn = {ep_return}") print(f"\tgoal = {goal}") self.logger.close() if self.verbose: print("Training complete") print(f"\nEpisode {num_episodes}:") print(f"\tsteps done = {self.steps_done} / {self.training_steps}") print(f"\treturn = {ep_return}") print(f"\tgoal = {goal}") def run_train_episode(self, step_limit): o = self.env.reset() done = False env_step_limit_reached = False steps = 0 episode_return = 0 while not done and not env_step_limit_reached and steps < step_limit: a = self.get_egreedy_action(o, self.get_epsilon()) next_o, r, done, env_step_limit_reached, _ = self.env.step(a) self.replay.store(o, a, next_o, r, done) self.steps_done += 1 mean_td_error, mean_v = self.optimize() self.logger.add_scalar( "mean_td_error", mean_td_error, self.steps_done ) self.logger.add_scalar("mean_v", mean_v, self.steps_done) o = next_o episode_return += r steps += 1 return episode_return, steps, self.env.goal_reached() def run_eval_episode(self, env=None, render=False, eval_epsilon=0.05, render_mode="readable"): if env is None: env = self.env o = env.reset() done = False env_step_limit_reached = False steps = 0 episode_return = 0 line_break = "="*60 if render: print("\n" + line_break) print(f"Running EVALUATION using epsilon = {eval_epsilon:.4f}") print(line_break) env.render(render_mode) input("Initial state. Press enter to continue..") while not done and not env_step_limit_reached: a = self.get_egreedy_action(o, eval_epsilon) next_o, r, done, env_step_limit_reached, _ = env.step(a) o = next_o episode_return += r steps += 1 if render: print("\n" + line_break) print(f"Step {steps}") print(line_break) print(f"Action Performed = {env.action_space.get_action(a)}") env.render(render_mode) print(f"Reward = {r}") print(f"Done = {done}") print(f"Step limit reached = {env_step_limit_reached}") input("Press enter to continue..") if done or env_step_limit_reached: print("\n" + line_break) print("EPISODE FINISHED") print(line_break) print(f"Goal reached = {env.goal_reached()}") print(f"Total steps = {steps}") print(f"Total reward = {episode_return}") return episode_return, steps, env.goal_reached() if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("--render_eval", action="store_true", help="Renders final policy") parser.add_argument("--lr", type=float, default=0.001, help="Learning rate (default=0.001)") parser.add_argument("-t", "--training_steps", type=int, default=10000, help="training steps (default=10000)") parser.add_argument("--batch_size", type=int, default=32, help="(default=32)") parser.add_argument("--seed", type=int, default=0, help="(default=0)") parser.add_argument("--replay_size", type=int, default=100000, help="(default=100000)") parser.add_argument("--final_epsilon", type=float, default=0.05, help="(default=0.05)") parser.add_argument("--init_epsilon", type=float, default=1.0, help="(default=1.0)") parser.add_argument("--exploration_steps", type=int, default=10000, help="(default=10000)") parser.add_argument("--gamma", type=float, default=0.99, help="(default=0.99)") parser.add_argument("--quite", action="store_false", help="Run in Quite mode") args = parser.parse_args() env = nasim.make_benchmark(args.env_name, args.seed, fully_obs=True, flat_actions=True, flat_obs=True) ql_agent = TabularQLearningAgent( env, verbose=args.quite, **vars(args) ) ql_agent.train() ql_agent.run_eval_episode(render=args.render_eval) ================================================ FILE: nasim/agents/random_agent.py ================================================ """A random agent that selects a random action at each step To run 'tiny' benchmark scenario with default settings, run the following from the nasim/agents dir: $ python random_agent.py tiny This will run the agent and display progress and final results to stdout. To see available running arguments: $ python random_agent.py --help """ import numpy as np import nasim LINE_BREAK = "-"*60 def run_random_agent(env, step_limit=1e6, verbose=True): if verbose: print(LINE_BREAK) print("STARTING EPISODE") print(LINE_BREAK) print(f"t: Reward") env.reset() total_reward = 0 done = False env_step_limit_reached = False t = 0 a = 0 while not done and not env_step_limit_reached and t < step_limit: a = env.action_space.sample() _, r, done, env_step_limit_reached, _ = env.step(a) total_reward += r if (t+1) % 100 == 0 and verbose: print(f"{t}: {total_reward}") t += 1 if (done or env_step_limit_reached) and verbose: print(LINE_BREAK) print("EPISODE FINISHED") print(LINE_BREAK) print(f"Total steps = {t}") print(f"Total reward = {total_reward}") elif verbose: print(LINE_BREAK) print("STEP LIMIT REACHED") print(LINE_BREAK) if done: done = env.goal_reached() return t, total_reward, done if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("-s", "--seed", type=int, default=0, help="random seed") parser.add_argument("-r", "--runs", type=int, default=1, help="number of random runs to perform (default=1)") parser.add_argument("-o", "--partially_obs", action="store_true", help="Partially Observable Mode") parser.add_argument("-p", "--param_actions", action="store_true", help="Use Parameterised action space") parser.add_argument("-f", "--box_obs", action="store_true", help="Use 2D observation space") args = parser.parse_args() seed = args.seed run_steps = [] run_rewards = [] run_goals = 0 for i in range(args.runs): env = nasim.make_benchmark(args.env_name, seed, not args.partially_obs, not args.param_actions, not args.box_obs) steps, reward, done = run_random_agent(env, verbose=False) run_steps.append(steps) run_rewards.append(reward) run_goals += int(done) seed += 1 if args.runs > 1: print(f"Run {i}:") print(f"\tSteps = {steps}") print(f"\tReward = {reward}") print(f"\tGoal reached = {done}") run_steps = np.array(run_steps) run_rewards = np.array(run_rewards) print(LINE_BREAK) print("Random Agent Runs Complete") print(LINE_BREAK) print(f"Mean steps = {run_steps.mean():.2f} +/- {run_steps.std():.2f}") print(f"Mean rewards = {run_rewards.mean():.2f} " f"+/- {run_rewards.std():.2f}") print(f"Goals reached = {run_goals} / {args.runs}") ================================================ FILE: nasim/demo.py ================================================ """Script for running NASim demo Usage ----- $ python demo [-ai] [-h] env_name """ import os.path as osp import nasim from nasim.agents.dqn_agent import DQNAgent from nasim.agents.keyboard_agent import run_keyboard_agent DQN_POLICY_DIR = osp.join( osp.dirname(osp.abspath(__file__)), "agents", "policies" ) DQN_POLICIES = { "tiny": osp.join(DQN_POLICY_DIR, "dqn_tiny.pt"), "small": osp.join(DQN_POLICY_DIR, "dqn_small.pt") } if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description=( "NASim demo. Play as the hacker, trying to gain access" " to sensitive information on the network, or run a pre-trained" " AI hacker." ) ) parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("-ai", "--run_ai", action="store_true", help=("Run AI policy (currently ony supported for" " 'tiny' and 'small' environments")) args = parser.parse_args() if args.run_ai: assert args.env_name in DQN_POLICIES, \ ("AI demo only supported for the following environments:" f" {list(DQN_POLICIES)}") env = nasim.make_benchmark( args.env_name, fully_obs=True, flat_actions=True, flat_obs=True, render_mode="human" ) line_break = f"\n{'-'*60}" print(line_break) print(f"Running Demo on {args.env_name} environment") if args.run_ai: print("Using AI policy") print(line_break) dqn_agent = DQNAgent(env, verbose=False, **vars(args)) dqn_agent.load(DQN_POLICIES[args.env_name]) ret, steps, goal = dqn_agent.run_eval_episode( env, True, 0.01, "human" ) else: print("Player controlled") print(line_break) ret, steps, goal = run_keyboard_agent(env) print(line_break) print(f"Episode Complete") print(line_break) if goal: print("Goal accomplished. Sensitive data retrieved!") print(f"Final Score={ret}") print(f"Steps taken={steps}") ================================================ FILE: nasim/envs/__init__.py ================================================ from nasim.envs.gym_env import NASimGymEnv from nasim.envs.environment import NASimEnv ================================================ FILE: nasim/envs/action.py ================================================ """Action related classes for the NASim environment. This module contains the different action classes that are used to implement actions within a NASim environment, along within the different ActionSpace classes, and the ActionResult class. Notes ----- **Actions:** Every action inherits from the base :class:`Action` class, which defines some common attributes and functions. Different types of actions are implemented as subclasses of the Action class. Action types implemented: - :class:`Exploit` - :class:`PrivilegeEscalation` - :class:`ServiceScan` - :class:`OSScan` - :class:`SubnetScan` - :class:`ProcessScan` - :class:`NoOp` **Action Spaces:** There are two types of action spaces, depending on if you are using flat actions or not: - :class:`FlatActionSpace` - :class:`ParameterisedActionSpace` """ import math import numpy as np from gymnasium import spaces from nasim.envs.utils import AccessLevel def load_action_list(scenario): """Load list of actions for environment for given scenario Parameters ---------- scenario : Scenario the scenario Returns ------- list list of all actions in environment """ action_list = [] for address in scenario.address_space: action_list.append( ServiceScan(address, scenario.service_scan_cost) ) action_list.append( OSScan(address, scenario.os_scan_cost) ) action_list.append( SubnetScan(address, scenario.subnet_scan_cost) ) action_list.append( ProcessScan(address, scenario.process_scan_cost) ) for e_name, e_def in scenario.exploits.items(): exploit = Exploit(e_name, address, **e_def) action_list.append(exploit) for pe_name, pe_def in scenario.privescs.items(): privesc = PrivilegeEscalation(pe_name, address, **pe_def) action_list.append(privesc) return action_list class Action: """The base abstract action class in the environment There are multiple types of actions (e.g. exploit, scan, etc.), but every action has some common attributes. ... Attributes ---------- name : str the name of action target : (int, int) the (subnet, host) address of target of the action. The target of the action could be the address of a host that the action is being used against (e.g. for exploits or targeted scans) or could be the host that the action is being executed on (e.g. for subnet scans). cost : float the cost of performing the action prob : float the success probability of the action. This is the probability that the action works given that it's preconditions are met. E.g. a remote exploit targeting a host that you cannot communicate with will always fail. For deterministic actions this will be 1.0. req_access : AccessLevel, the required access level to perform action. For for on host actions (i.e. subnet scan, process scan, and privilege escalation) this will be the access on the target. For remote actions (i.e. service scan, os scan, and exploits) this will be the access on a pivot host (i.e. a compromised host that can reach the target). """ def __init__(self, name, target, cost, prob=1.0, req_access=AccessLevel.USER, **kwargs): """ Parameters --------- name : str name of action target : (int, int) address of target cost : float cost of performing action prob : float, optional probability of success for a given action (default=1.0) req_access : AccessLevel, optional the required access level to perform action (default=AccessLevel.USER) """ assert 0 <= prob <= 1.0 self.name = name self.target = target self.cost = cost self.prob = prob self.req_access = req_access def is_exploit(self): """Check if action is an exploit Returns ------- bool True if action is exploit, otherwise False """ return isinstance(self, Exploit) def is_privilege_escalation(self): """Check if action is privilege escalation action Returns ------- bool True if action is privilege escalation action, otherwise False """ return isinstance(self, PrivilegeEscalation) def is_scan(self): """Check if action is a scan Returns ------- bool True if action is scan, otherwise False """ return isinstance(self, (ServiceScan, OSScan, SubnetScan, ProcessScan)) def is_remote(self): """Check if action is a remote action A remote action is one where the target host is a remote host (i.e. the action is not performed locally on the target) Returns ------- bool True if action is remote, otherwise False """ return isinstance(self, (ServiceScan, OSScan, Exploit)) def is_service_scan(self): """Check if action is a service scan Returns ------- bool True if action is service scan, otherwise False """ return isinstance(self, ServiceScan) def is_os_scan(self): """Check if action is an OS scan Returns ------- bool True if action is an OS scan, otherwise False """ return isinstance(self, OSScan) def is_subnet_scan(self): """Check if action is a subnet scan Returns ------- bool True if action is a subnet scan, otherwise False """ return isinstance(self, SubnetScan) def is_process_scan(self): """Check if action is a process scan Returns ------- bool True if action is a process scan, otherwise False """ return isinstance(self, ProcessScan) def is_noop(self): """Check if action is a do nothing action. Returns ------- bool True if action is a noop action, otherwise False """ return isinstance(self, NoOp) def __str__(self): return (f"{self.__class__.__name__}: " f"target={self.target}, " f"cost={self.cost:.2f}, " f"prob={self.prob:.2f}, " f"req_access={self.req_access}") def __hash__(self): return hash(self.__str__()) def __eq__(self, other): if self is other: return True if not isinstance(other, type(self)): return False if self.target != other.target: return False if not (math.isclose(self.cost, other.cost) and math.isclose(self.prob, other.prob)): return False return self.req_access == other.req_access class Exploit(Action): """An Exploit action in the environment Inherits from the base Action Class. ... Attributes ---------- service : str the service targeted by exploit os : str the OS targeted by exploit. If None then exploit works for all OSs. access : int the access level gained on target if exploit succeeds. """ def __init__(self, name, target, cost, service, os=None, access=0, prob=1.0, req_access=AccessLevel.USER, **kwargs): """ Parameters --------- target : (int, int) address of target cost : float cost of performing action service : str the target service os : str, optional the target OS of exploit, if None then exploit works for all OS (default=None) access : int, optional the access level gained on target if exploit succeeds (default=0) prob : float, optional probability of success (default=1.0) req_access : AccessLevel, optional the required access level to perform action (default=AccessLevel.USER) """ super().__init__(name=name, target=target, cost=cost, prob=prob, req_access=req_access) self.os = os self.service = service self.access = access def __str__(self): return (f"{super().__str__()}, os={self.os}, " f"service={self.service}, access={self.access}") def __eq__(self, other): if not super().__eq__(other): return False return self.service == other.service \ and self.os == other.os \ and self.access == other.access class PrivilegeEscalation(Action): """A privilege escalation action in the environment Inherits from the base Action Class. ... Attributes ---------- process : str the process targeted by the privilege escalation. If None the action works independent of a process os : str the OS targeted by privilege escalation. If None then action works for all OSs. access : int the access level resulting from privilege escalation action """ def __init__(self, name, target, cost, access, process=None, os=None, prob=1.0, req_access=AccessLevel.USER, **kwargs): """ Parameters --------- target : (int, int) address of target cost : float cost of performing action access : int the access level resulting from the privilege escalation process : str, optional the target process, if None the action does not require a process to work (default=None) os : str, optional the target OS of privilege escalation action, if None then action works for all OS (default=None) prob : float, optional probability of success (default=1.0) req_access : AccessLevel, optional the required access level to perform action (default=AccessLevel.USER) """ super().__init__(name=name, target=target, cost=cost, prob=prob, req_access=req_access) self.access = access self.os = os self.process = process def __str__(self): return (f"{super().__str__()}, os={self.os}, " f"process={self.process}, access={self.access}") def __eq__(self, other): if not super().__eq__(other): return False return self.process == other.process \ and self.os == other.os \ and self.access == other.access class ServiceScan(Action): """A Service Scan action in the environment Inherits from the base Action Class. """ def __init__(self, target, cost, prob=1.0, req_access=AccessLevel.USER, **kwargs): """ Parameters --------- target : (int, int) address of target cost : float cost of performing action prob : float, optional probability of success for a given action (default=1.0) req_access : AccessLevel, optional the required access level to perform action (default=AccessLevel.USER) """ super().__init__("service_scan", target=target, cost=cost, prob=prob, req_access=req_access, **kwargs) class OSScan(Action): """An OS Scan action in the environment Inherits from the base Action Class. """ def __init__(self, target, cost, prob=1.0, req_access=AccessLevel.USER, **kwargs): """ Parameters --------- target : (int, int) address of target cost : float cost of performing action prob : float, optional probability of success for a given action (default=1.0) req_access : AccessLevel, optional the required access level to perform action (default=AccessLevel.USER) """ super().__init__("os_scan", target=target, cost=cost, prob=prob, req_access=req_access, **kwargs) class SubnetScan(Action): """A Subnet Scan action in the environment Inherits from the base Action Class. """ def __init__(self, target, cost, prob=1.0, req_access=AccessLevel.USER, **kwargs): """ Parameters --------- target : (int, int) address of target cost : float cost of performing action prob : float, optional probability of success for a given action (default=1.0) req_access : AccessLevel, optional the required access level to perform action (default=AccessLevel.USER) """ super().__init__("subnet_scan", target=target, cost=cost, prob=prob, req_access=req_access, **kwargs) class ProcessScan(Action): """A Process Scan action in the environment Inherits from the base Action Class. """ def __init__(self, target, cost, prob=1.0, req_access=AccessLevel.USER, **kwargs): """ Parameters --------- target : (int, int) address of target cost : float cost of performing action prob : float, optional probability of success for a given action (default=1.0) req_access : AccessLevel, optional the required access level to perform action (default=AccessLevel.USER) """ super().__init__("process_scan", target=target, cost=cost, prob=prob, req_access=req_access, **kwargs) class NoOp(Action): """A do nothing action in the environment Inherits from the base Action Class """ def __init__(self, *args, **kwargs): super().__init__(name="noop", target=(1, 0), cost=0, prob=1.0, req_access=AccessLevel.NONE) class ActionResult: """A dataclass for storing the results of an Action. These results are then used to update the full state and observation. ... Attributes ---------- success : bool True if exploit/scan was successful, False otherwise value : float value gained from action. Is the value of the host if successfuly exploited, otherwise 0 services : dict services identified by action. os : dict OS identified by action processes : dict processes identified by action access : dict access gained by action discovered : dict host addresses discovered by action connection_error : bool True if action failed due to connection error (e.g. could not reach target) permission_error : bool True if action failed due to a permission error (e.g. incorrect access level to perform action) undefined_error : bool True if action failed due to an undefined error (e.g. random exploit failure) newly_discovered : dict host addresses discovered for the first time by action """ def __init__(self, success, value=0.0, services=None, os=None, processes=None, access=None, discovered=None, connection_error=False, permission_error=False, undefined_error=False, newly_discovered=None): """ Parameters ---------- success : bool True if exploit/scan was successful, False otherwise value : float, optional value gained from action (default=0.0) services : dict, optional services identified by action (default=None={}) os : dict, optional OS identified by action (default=None={}) processes : dict, optional processes identified by action (default=None={}) access : dict, optional access gained by action (default=None={}) discovered : dict, optional host addresses discovered by action (default=None={}) connection_error : bool, optional True if action failed due to connection error (default=False) permission_error : bool, optional True if action failed due to a permission error (default=False) undefined_error : bool, optional True if action failed due to an undefined error (default=False) newly_discovered : dict, optional host addresses discovered for first time by action (default=None) """ self.success = success self.value = value self.services = {} if services is None else services self.os = {} if os is None else os self.processes = {} if processes is None else processes self.access = {} if access is None else access self.discovered = {} if discovered is None else discovered self.connection_error = connection_error self.permission_error = permission_error self.undefined_error = undefined_error if newly_discovered is not None: self.newly_discovered = newly_discovered else: self.newly_discovered = {} def info(self): """Get results as dict Returns ------- dict action results information """ return dict( success=self.success, value=self.value, services=self.services, os=self.os, processes=self.processes, access=self.access, discovered=self.discovered, connection_error=self.connection_error, permission_error=self.permission_error, undefined_error=self.undefined_error, newly_discovered=self.newly_discovered ) def __str__(self): output = ["ActionObservation:"] for k, val in self.info().items(): output.append(f" {k}={val}") return "\n".join(output) class FlatActionSpace(spaces.Discrete): """Flat Action space for NASim environment. Inherits and implements the gym.spaces.Discrete action space ... Attributes ---------- n : int the number of actions in the action space actions : list of Actions the list of the Actions in the action space """ def __init__(self, scenario): """ Parameters --------- scenario : Scenario scenario description """ self.actions = load_action_list(scenario) super().__init__(len(self.actions)) def get_action(self, action_idx): """Get Action object corresponding to action idx Parameters ---------- action_idx : int the action idx Returns ------- Action Corresponding Action object """ assert isinstance(action_idx, int), \ ("When using flat action space, action must be an integer" f" or an Action object: {action_idx} is invalid") return self.actions[action_idx] class ParameterisedActionSpace(spaces.MultiDiscrete): """A parameterised action space for NASim environment. Inherits and implements the gym.spaces.MultiDiscrete action space, where each dimension corresponds to a different action parameter. The action parameters (in order) are: 0. Action Type = [0, 5] Where: 0=Exploit, 1=PrivilegeEscalation, 2=ServiceScan, 3=OSScan, 4=SubnetScan, 5=ProcessScan, 1. Subnet = [0, #subnets-1] -1 since we don't include the internet subnet 2. Host = [0, max subnets size-1] 3. OS = [0, #OS] Where 0=None. 4. Service = [0, #services - 1] 5. Process = [0, #processes] Where 0=None. Note that OS, Service and Process are only important for exploits and privilege escalation actions. ... Attributes ---------- nvec : Numpy.Array vector of the of the size of each parameter actions : list of Actions the list of all the Actions in the action space """ action_types = [ Exploit, PrivilegeEscalation, ServiceScan, OSScan, SubnetScan, ProcessScan ] def __init__(self, scenario): """ Parameters ---------- scenario : Scenario scenario description """ self.scenario = scenario self.actions = load_action_list(scenario) nvec = [ len(self.action_types), len(self.scenario.subnets)-1, max(self.scenario.subnets), self.scenario.num_os+1, self.scenario.num_services, self.scenario.num_processes ] super().__init__(nvec) def get_action(self, action_vec): """Get Action object corresponding to action vector. Parameters ---------- action_vector : list of ints or tuple of ints or Numpy.Array the action vector Returns ------- Action Corresponding Action object Notes ----- 1. if host# specified in action vector is greater than the number of hosts in the specified subnet, then host# will be changed to host# % subnet size. 2. if action is an exploit and parameters do not match any exploit definition in the scenario description then a NoOp action is returned with 0 cost. """ assert isinstance(action_vec, (list, tuple, np.ndarray)), \ ("When using parameterised action space, action must be an Action" f" object, a list or a numpy array: {action_vec} is invalid") a_class = self.action_types[action_vec[0]] # need to add one to subnet to account for Internet subnet subnet = action_vec[1]+1 host = action_vec[2] % self.scenario.subnets[subnet] target = (subnet, host) if a_class not in (Exploit, PrivilegeEscalation): # can ignore other action parameters kwargs = self._get_scan_action_def(a_class) return a_class(target=target, **kwargs) os = None if action_vec[3] == 0 else self.scenario.os[action_vec[3]-1] if a_class == Exploit: # have to make sure it is valid choice # and also get constant params (name, cost, prob, access) service = self.scenario.services[action_vec[4]] a_def = self._get_exploit_def(service, os) else: # privilege escalation # have to make sure it is valid choice # and also get constant params (name, cost, prob, access) proc = self.scenario.processes[action_vec[5]] a_def = self._get_privesc_def(proc, os) if a_def is None: return NoOp() return a_class(target=target, **a_def) def _get_scan_action_def(self, a_class): """Get the constants for scan actions definitions """ if a_class == ServiceScan: cost = self.scenario.service_scan_cost elif a_class == OSScan: cost = self.scenario.os_scan_cost elif a_class == SubnetScan: cost = self.scenario.subnet_scan_cost elif a_class == ProcessScan: cost = self.scenario.process_scan_cost else: raise TypeError(f"Not implemented for Action class {a_class}") return {"cost": cost} def _get_exploit_def(self, service, os): """Check if exploit parameters are valid """ e_map = self.scenario.exploit_map if service not in e_map: return None if os not in e_map[service]: return None return e_map[service][os] def _get_privesc_def(self, proc, os): """Check if privilege escalation parameters are valid """ pe_map = self.scenario.privesc_map if proc not in pe_map: return None if os not in pe_map[proc]: return None return pe_map[proc][os] ================================================ FILE: nasim/envs/environment.py ================================================ """ The main Environment class for NASim: NASimEnv. The NASimEnv class is the main interface for agents interacting with NASim. """ import gymnasium as gym from gymnasium import spaces import numpy as np from nasim.envs.state import State from nasim.envs.render import Viewer from nasim.envs.network import Network from nasim.envs.observation import Observation from nasim.envs.action import Action, FlatActionSpace, ParameterisedActionSpace class NASimEnv(gym.Env): """ A simulated computer network environment for pen-testing. Implements the gymnasium interface. ... Attributes ---------- name : str the environment scenario name scenario : Scenario Scenario object, defining the properties of the environment action_space : FlatActionSpace or ParameterisedActionSpace Action space for environment. If *flat_action=True* then this is a discrete action space (which subclasses gymnasium.spaces.Discrete), so each action is represented by an integer. If *flat_action=False* then this is a parameterised action space (which subclasses gymnasium.spaces.MultiDiscrete), so each action is represented using a list of parameters. observation_space : gymnasium.spaces.Box observation space for environment. If *flat_obs=True* then observations are represented by a 1D vector, otherwise observations are represented as a 2D matrix. current_state : State the current state of the environment last_obs : Observation the last observation that was generated by environment steps : int the number of steps performed since last reset (this does not include generative steps) """ metadata = {'render_modes': ["human", "ansi"]} render_mode = None reward_range = (-float('inf'), float('inf')) action_space = None observation_space = None current_state = None last_obs = None def __init__(self, scenario, fully_obs=False, flat_actions=True, flat_obs=True, render_mode=None): """ Parameters ---------- scenario : Scenario Scenario object, defining the properties of the environment fully_obs : bool, optional The observability mode of environment, if True then uses fully observable mode, otherwise is partially observable (default=False) flat_actions : bool, optional If true then uses a flat action space, otherwise will uses a parameterised action space (default=True). flat_obs : bool, optional If true then uses a 1D observation space, otherwise uses a 2D observation space (default=True) render_mode : str, optional The render mode to use for the environment. """ self.name = scenario.name self.scenario = scenario self.fully_obs = fully_obs self.flat_actions = flat_actions self.flat_obs = flat_obs self.render_mode = render_mode self.network = Network(scenario) self.current_state = State.generate_initial_state(self.network) self._renderer = None self.reset() if self.flat_actions: self.action_space = FlatActionSpace(self.scenario) else: self.action_space = ParameterisedActionSpace(self.scenario) if self.flat_obs: obs_shape = self.last_obs.shape_flat() else: obs_shape = self.last_obs.shape() obs_low, obs_high = Observation.get_space_bounds(self.scenario) self.observation_space = spaces.Box( low=obs_low, high=obs_high, shape=obs_shape ) self.steps = 0 def reset(self, *, seed=None, options=None): """Reset the state of the environment and returns the initial state. Implements gymnasium.Env.reset(). Parameters ---------- seed : int, optional the optional seed for the environments RNG options : dict, optional optional environment options (does nothing in NASim at the moment) Returns ------- numpy.Array the initial observation of the environment dict auxiliary information regarding reset """ super().reset(seed=seed, options=options) self.steps = 0 self.current_state = self.network.reset(self.current_state) self.last_obs = self.current_state.get_initial_observation( self.fully_obs ) if self.flat_obs: obs = self.last_obs.numpy_flat() else: obs = self.last_obs.numpy() return obs, {} def step(self, action): """Run one step of the environment using action. Implements gymnasium.Env.step(). Parameters ---------- action : Action or int or list or NumpyArray Action to perform. If not Action object, then if using flat actions this should be an int and if using non-flat actions this should be an indexable array. Returns ------- numpy.Array observation from performing action float reward from performing action bool whether the episode reached a terminal state or not (i.e. all target machines have been successfully compromised) bool whether the episode has reached the step limit (if one exists) dict auxiliary information regarding step (see :func:`nasim.env.action.ActionResult.info`) """ next_state, obs, reward, done, info = self.generative_step( self.current_state, action ) self.current_state = next_state self.last_obs = obs if self.flat_obs: obs = obs.numpy_flat() else: obs = obs.numpy() self.steps += 1 step_limit_reached = ( self.scenario.step_limit is not None and self.steps >= self.scenario.step_limit ) return obs, reward, done, step_limit_reached, info def generative_step(self, state, action): """Run one step of the environment using action in given state. Parameters ---------- state : State The state to perform the action in action : Action, int, list, NumpyArray Action to perform. If not Action object, then if using flat actions this should be an int and if using non-flat actions this should be an indexable array. Returns ------- State the next state after action was performed Observation observation from performing action float reward from performing action bool whether a terminal state has been reached or not dict auxiliary information regarding step (see :func:`nasim.env.action.ActionResult.info`) """ if not isinstance(action, Action): action = self.action_space.get_action(action) next_state, action_obs = self.network.perform_action( state, action ) obs = next_state.get_observation( action, action_obs, self.fully_obs ) done = self.goal_reached(next_state) reward = action_obs.value - action.cost return next_state, obs, reward, done, action_obs.info() def generate_random_initial_state(self): """Generates a random initial state for environment. This only randomizes the host configurations (os, services) using a uniform distribution, so may result in networks where it is not possible to reach the goal. Returns ------- State A random initial state """ return State.generate_random_initial_state(self.network) def generate_initial_state(self): """Generate the initial state for the environment. Returns ------- State The initial state Notes ----- This does not reset the current state of the environment (use :func:`reset` for that). """ return State.generate_initial_state(self.network) def render(self): """Render environment. Implements gymnasium.Env.render(). See render module for more details on modes and symbols. """ if self.render_mode is None: return return self.render_obs(mode=self.render_mode, obs=self.last_obs) def render_obs(self, mode="human", obs=None): """Render observation. See render module for more details on modes and symbols. Parameters ---------- mode : str rendering mode obs : Observation or numpy.ndarray, optional the observation to render, if None will render last observation. If numpy.ndarray it must be in format that matches Observation (i.e. ndarray returned by step method) (default=None) """ if mode is None: return if obs is None: obs = self.last_obs if not isinstance(obs, Observation): obs = Observation.from_numpy(obs, self.current_state.shape()) if self._renderer is None: self._renderer = Viewer(self.network) if mode in ("human", "ansi"): return self._renderer.render_readable(obs) else: raise NotImplementedError( "Please choose correct render mode from :" f"{self.metadata['render_modes']}" ) def render_state(self, mode="human", state=None): """Render state. See render module for more details on modes and symbols. If mode = ASCI: Machines displayed in rows, with one row for each subnet and hosts displayed in order of id within subnet Parameters ---------- mode : str rendering mode state : State or numpy.ndarray, optional the State to render, if None will render current state If numpy.ndarray it must be in format that matches State (i.e. ndarray returned by generative_step method) (default=None) """ if mode is None: return if state is None: state = self.current_state if not isinstance(state, State): state = State.from_numpy(state, self.current_state.shape(), self.current_state.host_num_map) if self._renderer is None: self._renderer = Viewer(self.network) if mode in ("human", "ansi"): return self._renderer.render_readable_state(state) else: raise NotImplementedError( "Please choose correct render mode from : " f"{self.metadata['render_modes']}" ) def render_action(self, action): """Renders human readable version of action. This is mainly useful for getting a text description of the action that corresponds to a given integer. Parameters ---------- action : Action or int or list or NumpyArray Action to render. If not Action object, then if using flat actions this should be an int and if using non-flat actions this should be an indexable array. """ if not isinstance(action, Action): action = self.action_space.get_action(action) print(action) def render_episode(self, episode, width=7, height=7): """Render an episode as sequence of network graphs, where an episode is a sequence of (state, action, reward, done) tuples generated from interactions with environment. Parameters ---------- episode : list list of (State, Action, reward, done) tuples width : int width of GUI window height : int height of GUI window """ if self._renderer is None: self._renderer = Viewer(self.network) self._renderer.render_episode(episode, width, height) def render_network_graph(self, ax=None, show=False): """Render a plot of network as a graph with hosts as nodes arranged into subnets and showing connections between subnets. Renders current state of network. Parameters ---------- ax : Axes matplotlib axis to plot graph on, or None to plot on new axis show : bool whether to display plot, or simply setup plot and showing plot can be handled elsewhere by user """ if self._renderer is None: self._renderer = Viewer(self.network) state = self.current_state self._renderer.render_graph(state, ax, show) def get_minimum_hops(self): """Get the minimum number of network hops required to reach targets. That is minimum number of hosts that must be traversed in the network in order to reach all sensitive hosts on the network starting from the initial state Returns ------- int minumum possible number of network hops to reach target hosts """ return self.network.get_minimal_hops() def get_action_mask(self): """Get a vector mask for valid actions. Returns ------- ndarray numpy vector of 1's and 0's, one for each action. Where an index will be 1 if action is valid given current state, or 0 if action is invalid. """ assert isinstance(self.action_space, FlatActionSpace), \ "Can only use action mask function when using flat action space" mask = np.zeros(self.action_space.n, dtype=np.int64) for a_idx in range(self.action_space.n): action = self.action_space.get_action(a_idx) if self.network.host_discovered(action.target): mask[a_idx] = 1 return mask def get_score_upper_bound(self): """Get the theoretical upper bound for total reward for scenario. The theoretical upper bound score is where the agent exploits only a single host in each subnet that is required to reach sensitive hosts along the shortest bath in network graph, and exploits the all sensitive hosts (i.e. the minimum network hops). Assuming action cost of 1 and each sensitive host is exploitable from any other connected subnet (which may not be true, hence being an upper bound). Returns ------- float theoretical max score """ max_reward = self.network.get_total_sensitive_host_value() max_reward += self.network.get_total_discovery_value() max_reward -= self.network.get_minimal_hops() return max_reward def goal_reached(self, state=None): """Check if the state is the goal state. The goal state is when all sensitive hosts have been compromised. Parameters ---------- state : State, optional a state, if None will use current_state of environment (default=None) Returns ------- bool True if state is goal state, otherwise False. """ if state is None: state = self.current_state return self.network.all_sensitive_hosts_compromised(state) def __str__(self): output = [ "NASimEnv:", f"name={self.name}", f"fully_obs={self.fully_obs}", f"flat_actions={self.flat_actions}", f"flat_obs={self.flat_obs}" ] return "\n ".join(output) def close(self): if self._renderer is not None: self._renderer.close() self._renderer = None ================================================ FILE: nasim/envs/gym_env.py ================================================ from nasim.envs.environment import NASimEnv from nasim.scenarios import Scenario, make_benchmark_scenario class NASimGymEnv(NASimEnv): """A wrapper around the NASimEnv compatible with gymnasium.make() See nasim.NASimEnv for details. """ def __init__(self, scenario, fully_obs=False, flat_actions=True, flat_obs=True, render_mode=None): """ Parameters ---------- scenario : str or or nasim.scenarios.Scenario either the name of benchmark environment (str) or a nasim Scenario instance fully_obs : bool, optional the observability mode of environment, if True then uses fully observable mode, otherwise partially observable (default=False) flat_actions : bool, optional if true then uses a flat action space, otherwise will use parameterised action space (default=True). flat_obs : bool, optional if true then uses a 1D observation space. If False will use a 2D observation space (default=True) render_mode : str, optional The render mode to use for the environment. """ if not isinstance(scenario, Scenario): scenario = make_benchmark_scenario(scenario) super().__init__(scenario, fully_obs=fully_obs, flat_actions=flat_actions, flat_obs=flat_obs, render_mode=render_mode) ================================================ FILE: nasim/envs/host_vector.py ================================================ """ This module contains the HostVector class. This is the main class for storing and updating the state of a single host in the NASim environment. """ import numpy as np from nasim.envs.utils import AccessLevel from nasim.envs.action import ActionResult class HostVector: """ A Vector representation of a single host in NASim. Each host is represented as a vector (1D numpy array) for efficiency and to make it easier to use with deep learning agents. The vector is made up of multiple features arranged in a consistent way. Features in the vector, listed in order, are: 1. subnet address - one-hot encoding with length equal to the number of subnets 2. host address - one-hot encoding with length equal to the maximum number of hosts in any subnet 3. compromised - bool 4. reachable - bool 5. discovered - bool 6. value - float 7. discovery value - float 8. access - int 9. OS - bool for each OS in scenario (only one OS has value of true) 10. services running - bool for each service in scenario 11. processes running - bool for each process in scenario Notes ----- - The size of the vector is equal to: #subnets + max #hosts in any subnet + 6 + #OS + #services + #processes. - Where the +6 is for compromised, reachable, discovered, value, discovery_value, and access features - The vector is a float vector so True/False is actually represented as 1.0/0.0. """ # class properties that are the same for all hosts # these are set when calling vectorize method # the bounds on address space (used for one hot encoding of host address) address_space_bounds = None # number of OS in scenario num_os = None # map from OS name to its index in host vector os_idx_map = {} # number of services in scenario num_services = None # map from service name to its index in host vector service_idx_map = {} # number of processes in scenario num_processes = None # map from process name to its index in host vector process_idx_map = {} # size of state for host vector (i.e. len of vector) state_size = None # vector position constants # to be initialized _subnet_address_idx = 0 _host_address_idx = None _compromised_idx = None _reachable_idx = None _discovered_idx = None _value_idx = None _discovery_value_idx = None _access_idx = None _os_start_idx = None _service_start_idx = None _process_start_idx = None def __init__(self, vector): self.vector = vector @classmethod def vectorize(cls, host, address_space_bounds, vector=None): if cls.address_space_bounds is None: cls._initialize( address_space_bounds, host.services, host.os, host.processes ) if vector is None: vector = np.zeros(cls.state_size, dtype=np.float32) else: assert len(vector) == cls.state_size vector[cls._subnet_address_idx + host.address[0]] = 1 vector[cls._host_address_idx + host.address[1]] = 1 vector[cls._compromised_idx] = int(host.compromised) vector[cls._reachable_idx] = int(host.reachable) vector[cls._discovered_idx] = int(host.discovered) vector[cls._value_idx] = host.value vector[cls._discovery_value_idx] = host.discovery_value vector[cls._access_idx] = host.access for os_num, (os_key, os_val) in enumerate(host.os.items()): vector[cls._get_os_idx(os_num)] = int(os_val) for srv_num, (srv_key, srv_val) in enumerate(host.services.items()): vector[cls._get_service_idx(srv_num)] = int(srv_val) host_procs = host.processes.items() for proc_num, (proc_key, proc_val) in enumerate(host_procs): vector[cls._get_process_idx(proc_num)] = int(proc_val) return cls(vector) @classmethod def vectorize_random(cls, host, address_space_bounds, vector=None): hvec = cls.vectorize(host, vector) # random variables for srv_num in cls.service_idx_map.values(): srv_val = np.random.randint(0, 2) hvec.vector[cls._get_service_idx(srv_num)] = srv_val chosen_os = np.random.choice(list(cls.os_idx_map.values())) for os_num in cls.os_idx_map.values(): hvec.vector[cls._get_os_idx(os_num)] = int(os_num == chosen_os) for proc_num in cls.process_idx_map.values(): proc_val = np.random.randint(0, 2) hvec.vector[cls._get_process_idx(proc_num)] = proc_val return hvec @property def compromised(self): return self.vector[self._compromised_idx] @compromised.setter def compromised(self, val): self.vector[self._compromised_idx] = int(val) @property def discovered(self): return self.vector[self._discovered_idx] @discovered.setter def discovered(self, val): self.vector[self._discovered_idx] = int(val) @property def reachable(self): return self.vector[self._reachable_idx] @reachable.setter def reachable(self, val): self.vector[self._reachable_idx] = int(val) @property def address(self): return ( self.vector[self._subnet_address_idx_slice()].argmax(), self.vector[self._host_address_idx_slice()].argmax() ) @property def value(self): return self.vector[self._value_idx] @property def discovery_value(self): return self.vector[self._discovery_value_idx] @property def access(self): return self.vector[self._access_idx] @access.setter def access(self, val): self.vector[self._access_idx] = int(val) @property def services(self): services = {} for srv, srv_num in self.service_idx_map.items(): services[srv] = self.vector[self._get_service_idx(srv_num)] return services @property def os(self): os = {} for os_key, os_num in self.os_idx_map.items(): os[os_key] = self.vector[self._get_os_idx(os_num)] return os @property def processes(self): processes = {} for proc, proc_num in self.process_idx_map.items(): processes[proc] = self.vector[self._get_process_idx(proc_num)] return processes def is_running_service(self, srv): srv_num = self.service_idx_map[srv] return bool(self.vector[self._get_service_idx(srv_num)]) def is_running_os(self, os): os_num = self.os_idx_map[os] return bool(self.vector[self._get_os_idx(os_num)]) def is_running_process(self, proc): proc_num = self.process_idx_map[proc] return bool(self.vector[self._get_process_idx(proc_num)]) def perform_action(self, action): """Perform given action against this host Arguments --------- action : Action the action to perform Returns ------- HostVector the resulting state of host after action ActionObservation the result from the action """ next_state = self.copy() if action.is_service_scan(): result = ActionResult(True, 0, services=self.services) return next_state, result if action.is_os_scan(): return next_state, ActionResult(True, 0, os=self.os) if action.is_exploit(): if self.is_running_service(action.service) and \ (action.os is None or self.is_running_os(action.os)): # service and os is present so exploit is successful value = 0 next_state.compromised = True if not self.access == AccessLevel.ROOT: # ensure a machine is not rewarded twice # and access doesn't decrease next_state.access = action.access if action.access == AccessLevel.ROOT: value = self.value result = ActionResult( True, value=value, services=self.services, os=self.os, access=action.access ) return next_state, result # following actions are on host so require correct access if not (self.compromised and action.req_access <= self.access): result = ActionResult(False, 0, permission_error=True) return next_state, result if action.is_process_scan(): result = ActionResult( True, 0, access=self.access, processes=self.processes ) return next_state, result if action.is_privilege_escalation(): has_proc = ( action.process is None or self.is_running_process(action.process) ) has_os = ( action.os is None or self.is_running_os(action.os) ) if has_proc and has_os: # host compromised and proc and os is present # so privesc is successful value = 0.0 if not self.access == AccessLevel.ROOT: # ensure a machine is not rewarded twice # and access doesn't decrease next_state.access = action.access if action.access == AccessLevel.ROOT: value = self.value result = ActionResult( True, value=value, processes=self.processes, os=self.os, access=action.access ) return next_state, result # action failed due to host config not meeting preconditions return next_state, ActionResult(False, 0) def observe(self, address=False, compromised=False, reachable=False, discovered=False, access=False, value=False, discovery_value=False, services=False, processes=False, os=False): obs = np.zeros(self.state_size, dtype=np.float32) if address: subnet_slice = self._subnet_address_idx_slice() host_slice = self._host_address_idx_slice() obs[subnet_slice] = self.vector[subnet_slice] obs[host_slice] = self.vector[host_slice] if compromised: obs[self._compromised_idx] = self.vector[self._compromised_idx] if reachable: obs[self._reachable_idx] = self.vector[self._reachable_idx] if discovered: obs[self._discovered_idx] = self.vector[self._discovered_idx] if value: obs[self._value_idx] = self.vector[self._value_idx] if discovery_value: v = self.vector[self._discovery_value_idx] obs[self._discovery_value_idx] = v if access: obs[self._access_idx] = self.vector[self._access_idx] if os: idxs = self._os_idx_slice() obs[idxs] = self.vector[idxs] if services: idxs = self._service_idx_slice() obs[idxs] = self.vector[idxs] if processes: idxs = self._process_idx_slice() obs[idxs] = self.vector[idxs] return obs def readable(self): return self.get_readable(self.vector) def copy(self): vector_copy = np.copy(self.vector) return HostVector(vector_copy) def numpy(self): return self.vector @classmethod def _initialize(cls, address_space_bounds, services, os_info, processes): cls.os_idx_map = {} cls.service_idx_map = {} cls.process_idx_map = {} cls.address_space_bounds = address_space_bounds cls.num_os = len(os_info) cls.num_services = len(services) cls.num_processes = len(processes) cls._update_vector_idxs() for os_num, (os_key, os_val) in enumerate(os_info.items()): cls.os_idx_map[os_key] = os_num for srv_num, (srv_key, srv_val) in enumerate(services.items()): cls.service_idx_map[srv_key] = srv_num for proc_num, (proc_key, proc_val) in enumerate(processes.items()): cls.process_idx_map[proc_key] = proc_num @classmethod def _update_vector_idxs(cls): cls._subnet_address_idx = 0 cls._host_address_idx = cls.address_space_bounds[0] cls._compromised_idx = ( cls._host_address_idx + cls.address_space_bounds[1] ) cls._reachable_idx = cls._compromised_idx + 1 cls._discovered_idx = cls._reachable_idx + 1 cls._value_idx = cls._discovered_idx + 1 cls._discovery_value_idx = cls._value_idx + 1 cls._access_idx = cls._discovery_value_idx + 1 cls._os_start_idx = cls._access_idx + 1 cls._service_start_idx = cls._os_start_idx + cls.num_os cls._process_start_idx = cls._service_start_idx + cls.num_services cls.state_size = cls._process_start_idx + cls.num_processes @classmethod def _subnet_address_idx_slice(cls): return slice(cls._subnet_address_idx, cls._host_address_idx) @classmethod def _host_address_idx_slice(cls): return slice(cls._host_address_idx, cls._compromised_idx) @classmethod def _get_service_idx(cls, srv_num): return cls._service_start_idx+srv_num @classmethod def _service_idx_slice(cls): return slice(cls._service_start_idx, cls._process_start_idx) @classmethod def _get_os_idx(cls, os_num): return cls._os_start_idx+os_num @classmethod def _os_idx_slice(cls): return slice(cls._os_start_idx, cls._service_start_idx) @classmethod def _get_process_idx(cls, proc_num): return cls._process_start_idx+proc_num @classmethod def _process_idx_slice(cls): return slice(cls._process_start_idx, cls.state_size) @classmethod def get_readable(cls, vector): readable_dict = dict() hvec = cls(vector) readable_dict["Address"] = hvec.address readable_dict["Compromised"] = bool(hvec.compromised) readable_dict["Reachable"] = bool(hvec.reachable) readable_dict["Discovered"] = bool(hvec.discovered) readable_dict["Value"] = hvec.value readable_dict["Discovery Value"] = hvec.discovery_value readable_dict["Access"] = hvec.access for os_name in cls.os_idx_map: readable_dict[f"{os_name}"] = hvec.is_running_os(os_name) for srv_name in cls.service_idx_map: readable_dict[f"{srv_name}"] = hvec.is_running_service(srv_name) for proc_name in cls.process_idx_map: readable_dict[f"{proc_name}"] = hvec.is_running_process(proc_name) return readable_dict @classmethod def reset(cls): """Resets any class variables. This is used to avoid errors when changing scenarios within a single python session """ cls.address_space_bounds = None def __repr__(self): return f"Host: {self.address}" def __hash__(self): return hash(str(self.vector)) def __eq__(self, other): if self is other: return True if not isinstance(other, HostVector): return False return np.array_equal(self.vector, other.vector) ================================================ FILE: nasim/envs/network.py ================================================ import numpy as np from nasim.envs.action import ActionResult from nasim.envs.utils import get_minimal_hops_to_goal, min_subnet_depth, AccessLevel # column in topology adjacency matrix that represents connection between # subnet and public INTERNET = 0 class Network: """A computer network """ def __init__(self, scenario): self.hosts = scenario.hosts self.host_num_map = scenario.host_num_map self.subnets = scenario.subnets self.topology = scenario.topology self.firewall = scenario.firewall self.address_space = scenario.address_space self.address_space_bounds = scenario.address_space_bounds self.sensitive_addresses = scenario.sensitive_addresses self.sensitive_hosts = scenario.sensitive_hosts def reset(self, state): """Reset the network state to initial state """ next_state = state.copy() for host_addr in self.address_space: host = next_state.get_host(host_addr) host.compromised = False host.access = AccessLevel.NONE host.reachable = self.subnet_public(host_addr[0]) host.discovered = host.reachable return next_state def perform_action(self, state, action): """Perform the given Action against the network. Arguments --------- state : State the current state action : Action the action to perform Returns ------- State the state after the action is performed ActionObservation the result from the action """ tgt_subnet, tgt_id = action.target assert 0 < tgt_subnet < len(self.subnets) assert tgt_id <= self.subnets[tgt_subnet] next_state = state.copy() if action.is_noop(): return next_state, ActionResult(True) if not state.host_reachable(action.target) \ or not state.host_discovered(action.target): result = ActionResult(False, 0.0, connection_error=True) return next_state, result has_req_permission = self.has_required_remote_permission(state, action) if action.is_remote() and not has_req_permission: result = ActionResult(False, 0.0, permission_error=True) return next_state, result if action.is_exploit() \ and not self.traffic_permitted( state, action.target, action.service ): result = ActionResult(False, 0.0, connection_error=True) return next_state, result host_compromised = state.host_compromised(action.target) if action.is_privilege_escalation() and not host_compromised: result = ActionResult(False, 0.0, connection_error=True) return next_state, result if action.is_exploit() and host_compromised: # host already compromised so exploits don't fail due to randomness pass elif np.random.rand() > action.prob: return next_state, ActionResult(False, 0.0, undefined_error=True) if action.is_subnet_scan(): return self._perform_subnet_scan(next_state, action) t_host = state.get_host(action.target) next_host_state, action_obs = t_host.perform_action(action) next_state.update_host(action.target, next_host_state) self._update(next_state, action, action_obs) return next_state, action_obs def _perform_subnet_scan(self, next_state, action): if not next_state.host_compromised(action.target): result = ActionResult(False, 0.0, connection_error=True) return next_state, result if not next_state.host_has_access(action.target, action.req_access): result = ActionResult(False, 0.0, permission_error=True) return next_state, result discovered = {} newly_discovered = {} discovery_reward = 0 target_subnet = action.target[0] for h_addr in self.address_space: newly_discovered[h_addr] = False discovered[h_addr] = False if self.subnets_connected(target_subnet, h_addr[0]): host = next_state.get_host(h_addr) discovered[h_addr] = True if not host.discovered: newly_discovered[h_addr] = True host.discovered = True discovery_reward += host.discovery_value obs = ActionResult( True, discovery_reward, discovered=discovered, newly_discovered=newly_discovered ) return next_state, obs def _update(self, state, action, action_obs): if action.is_exploit() and action_obs.success: self._update_reachable(state, action.target) def _update_reachable(self, state, compromised_addr): """Updates the reachable status of hosts on network, based on current state and newly exploited host """ comp_subnet = compromised_addr[0] for addr in self.address_space: if state.host_reachable(addr): continue if self.subnets_connected(comp_subnet, addr[0]): state.set_host_reachable(addr) def get_sensitive_hosts(self): return self.sensitive_addresses def is_sensitive_host(self, host_address): return host_address in self.sensitive_addresses def subnets_connected(self, subnet_1, subnet_2): return self.topology[subnet_1][subnet_2] == 1 def subnet_traffic_permitted(self, src_subnet, dest_subnet, service): if src_subnet == dest_subnet: # in same subnet so permitted return True if not self.subnets_connected(src_subnet, dest_subnet): return False return service in self.firewall[(src_subnet, dest_subnet)] def host_traffic_permitted(self, src_addr, dest_addr, service): dest_host = self.hosts[dest_addr] return dest_host.traffic_permitted(src_addr, service) def has_required_remote_permission(self, state, action): """Checks attacker has necessary permissions for remote action """ if self.subnet_public(action.target[0]): return True for src_addr in self.address_space: if not state.host_compromised(src_addr): continue if action.is_scan() and \ not self.subnets_connected(src_addr[0], action.target[0]): continue if action.is_exploit() and \ not self.subnet_traffic_permitted( src_addr[0], action.target[0], action.service ): continue if state.host_has_access(src_addr, action.req_access): return True return False def traffic_permitted(self, state, host_addr, service): """Checks whether the subnet and host firewalls permits traffic to a given host and service, based on current set of compromised hosts on network. """ for src_addr in self.address_space: if not state.host_compromised(src_addr) and \ not self.subnet_public(src_addr[0]): continue if not self.subnet_traffic_permitted( src_addr[0], host_addr[0], service ): continue if self.host_traffic_permitted(src_addr, host_addr, service): return True return False def subnet_public(self, subnet): return self.topology[subnet][INTERNET] == 1 def get_number_of_subnets(self): return len(self.subnets) def all_sensitive_hosts_compromised(self, state): for host_addr in self.sensitive_addresses: if not state.host_has_access(host_addr, AccessLevel.ROOT): return False return True def get_total_sensitive_host_value(self): total = 0 for host_value in self.sensitive_hosts.values(): total += host_value return total def get_total_discovery_value(self): total = 0 for host in self.hosts.values(): total += host.discovery_value return total def get_minimal_hops(self): return get_minimal_hops_to_goal( self.topology, self.sensitive_addresses ) def get_subnet_depths(self): return min_subnet_depth(self.topology) def __str__(self): output = "\n--- Network ---\n" output += "Subnets: " + str(self.subnets) + "\n" output += "Topology:\n" for row in self.topology: output += f"\t{row}\n" output += "Sensitive hosts: \n" for addr, value in self.sensitive_hosts.items(): output += f"\t{addr}: {value}\n" output += "Num_services: {self.scenario.num_services}\n" output += "Hosts:\n" for m in self.hosts.values(): output += str(m) + "\n" output += "Firewall:\n" for c, a in self.firewall.items(): output += f"\t{c}: {a}\n" return output ================================================ FILE: nasim/envs/observation.py ================================================ import numpy as np from nasim.envs.utils import AccessLevel from nasim.envs.host_vector import HostVector class Observation: """An observation for NASim. Each observation is a 2D tensor with a row for each host and an additional row containing auxiliary observations. Each host row is a host_vector (for details see :class:`HostVector`) while the auxiliary row contains non-host specific observations (see Notes section). ... Attributes ---------- obs_shape : (int, int) the shape of the observation aux_row : int the row index for the auxiliary row tensor : numpy.ndarray 2D Numpy array storing the observation Notes ----- The auxiliary row is the final row in the observation tensor and has the following features (in order): 1. Action success - True (1) or False (0) indicates whether the action succeeded or failed 2. Connection error - True (1) or False (0) indicates whether there was a connection error or not 3. Permission error - True (1) or False (0) indicates whether there was a permission error or not 4. Undefined error - True (1) or False (0) indicates whether there was an undefined error or not (e.g. failure due to stochastic nature of exploits) Since the number of features in the auxiliary row is less than the number of features in each host row, the remainder of the row is all zeros. """ # obs vector positions for auxiliary observations _success_idx = 0 _conn_error_idx = _success_idx + 1 _perm_error_idx = _conn_error_idx + 1 _undef_error_idx = _perm_error_idx + 1 def __init__(self, state_shape): """ Parameters ---------- state_shape : (int, int) 2D shape of the state (i.e. num_hosts, host_vector_size) """ self.obs_shape = (state_shape[0]+1, state_shape[1]) self.aux_row = self.obs_shape[0]-1 self.tensor = np.zeros(self.obs_shape, dtype=np.float32) @staticmethod def get_space_bounds(scenario): value_bounds = scenario.host_value_bounds discovery_bounds = scenario.host_discovery_value_bounds obs_low = min( 0, value_bounds[0], discovery_bounds[0] ) obs_high = max( 1, value_bounds[1], discovery_bounds[1], AccessLevel.ROOT, scenario.address_space_bounds[0], scenario.address_space_bounds[1] ) return (obs_low, obs_high) @classmethod def from_numpy(cls, o_array, state_shape): obs = cls(state_shape) if o_array.shape != (state_shape[0]+1, state_shape[1]): o_array = o_array.reshape(state_shape[0]+1, state_shape[1]) obs.tensor = o_array return obs def from_state(self, state): self.tensor[:self.aux_row] = state.tensor def from_action_result(self, action_result): success = int(action_result.success) self.tensor[self.aux_row][self._success_idx] = success con_err = int(action_result.connection_error) self.tensor[self.aux_row][self._conn_error_idx] = con_err perm_err = int(action_result.permission_error) self.tensor[self.aux_row][self._perm_error_idx] = perm_err undef_err = int(action_result.undefined_error) self.tensor[self.aux_row][self._undef_error_idx] = undef_err def from_state_and_action(self, state, action_result): self.from_state(state) self.from_action_result(action_result) def update_from_host(self, host_idx, host_obs_vector): self.tensor[host_idx][:] = host_obs_vector @property def success(self): """Whether the action succeded or not Returns ------- bool True if the action succeeded, otherwise False """ return bool(self.tensor[self.aux_row][self._success_idx]) @property def connection_error(self): """Whether there was a connection error or not Returns ------- bool True if there was a connection error, otherwise False """ return bool(self.tensor[self.aux_row][self._conn_error_idx]) @property def permission_error(self): """Whether there was a permission error or not Returns ------- bool True if there was a permission error, otherwise False """ return bool(self.tensor[self.aux_row][self._perm_error_idx]) @property def undefined_error(self): """Whether there was an undefined error or not Returns ------- bool True if there was a undefined error, otherwise False """ return bool(self.tensor[self.aux_row][self._undef_error_idx]) def shape_flat(self): """Get the flat (1D) shape of the Observation. Returns ------- (int, ) the flattened shape of observation """ return self.numpy_flat().shape def shape(self): """Get the (2D) shape of the observation Returns ------- (int, int) the 2D shape of the observation """ return self.obs_shape def numpy_flat(self): """Get the flattened observation tensor Returns ------- numpy.ndarray the flattened (1D) observation tenser """ return self.tensor.flatten() def numpy(self): """Get the observation tensor Returns ------- numpy.ndarray the (2D) observation tenser """ return self.tensor def get_readable(self): """Get a human readable version of the observation Returns ------- list[dict] list of host observations as human-readable dictionary dict[str, bool] auxiliary observation dictionary """ host_obs = [] for host_idx in range(self.obs_shape[0]-1): host_obs_vec = self.tensor[host_idx] readable_dict = HostVector.get_readable(host_obs_vec) host_obs.append(readable_dict) aux_obs = { "Success": self.success, "Connection Error": self.connection_error, "Permission Error": self.permission_error, "Undefined Error": self.undefined_error } return host_obs, aux_obs def __str__(self): return str(self.tensor) def __eq__(self, other): return np.array_equal(self.tensor, other.tensor) def __hash__(self): return hash(str(self.tensor)) ================================================ FILE: nasim/envs/render.py ================================================ """This module contains functions and classes for rendering NASim """ import math import random import tkinter as Tk import networkx as nx from prettytable import PrettyTable # import order important here try: import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt # noqa E402 from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg # noqa E402 import matplotlib.patches as mpatches # noqa E402 except Exception as ex: import warnings warnings.warn( f"Unable to import Matplotlib with TkAgg backend due to following " f"exception: \"{type(ex)} {ex}\". NASIM can still run but GUI " f"functionallity may not work as expected." ) # Agent node in graph AGENT = (0, 0) # Colors and symbols for describing state of host COLORS = ['yellow', 'orange', 'magenta', 'green', 'blue', 'red', 'black'] SYMBOLS = ['C', 'R', 'S', 'c', 'r', 'o', 'A'] class Viewer: """A class for visualizing the network state from NASimEnv""" def __init__(self, network): """ Arguments --------- network : Network network of environment """ self.network = network self.subnets = self._get_subnets(network) self.positions = self._get_host_positions(network) def render_graph(self, state, ax=None, show=False, width=5, height=6): """Render graph structure represention of network Arguments --------- state : State state of network user wants to view (Typically will be initial state) ax : Axes matplotlib axis to plot graph on, or None to plot on new axis show : bool whether to display plot, or simply construct plot width : int width of GUI window height : int height of GUI window """ G = self._construct_graph(state) colors = [] labels = {} for n in list(G.nodes): colors.append(G.nodes[n]["color"]) labels[n] = G.nodes[n]["label"] if ax is None: fig = plt.figure(figsize=(width, height)) ax = fig.add_subplot(111) else: fig = ax.get_figure() nx.draw_networkx_nodes(G, self.positions, node_size=1000, node_color=colors, ax=ax) nx.draw_networkx_labels(G, self.positions, labels, font_size=10, font_weight="bold") nx.draw_networkx_edges(G, self.positions) ax.axis('off') ax.set_xlim(left=0.0, right=100.0) # ax.set_ylim(bottom=0.0, top=100.0) legend_entries = EpisodeViewer.legend(compromised=False) ax.legend(handles=legend_entries, fontsize=12, loc=2) if show: fig.tight_layout() plt.show() plt.close(fig) def render_episode(self, episode, width=7, height=5): """Display an episode from Cyber Attack Simulator Environment in a seperate window. Where an episode is a sequence of (state, action, reward, done) tuples generated from interactions with environment. Arguments --------- episode : list list of (State, Action, reward, done) tuples width : int width of GUI window height : int height of GUI window """ init_ep_state = episode[0][0] G = self._construct_graph(init_ep_state) EpisodeViewer(episode, G, self.network.sensitive_hosts, width, height) def render_readable(self, obs): """Print a readable tabular version of observation to stdout Arguments --------- obs : Observation observation to view """ host_obs, aux_obs = obs.get_readable() aux_table = self._construct_table_from_dict(aux_obs) host_table = self._construct_table_from_list_of_dicts(host_obs) print("Observation:") print(aux_table) print(host_table) def render_readable_state(self, state): """Print a readable tabular version of observation to stdout Arguments --------- state : State state to view """ host_obs = state.get_readable() host_table = self._construct_table_from_list_of_dicts(host_obs) print("State:") print(host_table) def close(self): """Close renderer.""" plt.close("all") def _construct_table_from_dict(self, d): headers = list(d.keys()) table = PrettyTable(headers) row = [str(d[k]) for k in headers] table.add_row(row) return table def _construct_table_from_list_of_dicts(self, l): headers = list(l[0].keys()) table = PrettyTable(headers) for d in l: row = [str(d[k]) for k in headers] table.add_row(row) return table def _construct_graph(self, state): """Create a network graph from the current state Arguments --------- state : State current state of network Returns ------- G : Graph NetworkX Graph representing state of network """ G = nx.Graph() sensitive_hosts = self.network.sensitive_hosts # Create a fully connected graph for each subnet for subnet in self.subnets: for m in subnet: node_color = get_host_representation(state, sensitive_hosts, m, COLORS) node_pos = self.positions[m] G.add_node(m, color=node_color, pos=node_pos, label=str(m)) for x in subnet: for y in subnet: if x == y: continue G.add_edge(x, y) # Retrieve first host in each subnet subnet_prime_nodes = [] for subnet in self.subnets: subnet_prime_nodes.append(subnet[0]) # Connect connected subnets by creating edge between first host from # each subnet for x in subnet_prime_nodes: for y in subnet_prime_nodes: if x == y: continue if self.network.subnets_connected(x[0], y[0]): G.add_edge(x, y) return G def _get_host_positions(self, network): """Get list of positions for each host in episode Arguments --------- network : Network network object describing network configuration of environment episode was generated from """ address_space = network.address_space depths = network.get_subnet_depths() max_depth = max(depths) # list of lists where each list contains subnet_id of subnets with # same depth subnets_by_depth = [[] for i in range(max_depth + 1)] for subnet_id, subnet_depth in enumerate(depths): if subnet_id == 0: continue subnets_by_depth[subnet_depth].append(subnet_id) # max value of position in figure max_pos = 100 # for spacing between rows and columns and spread of nodes within # subnet margin = 10 row_height = max_pos / (max_depth + 1) # positions are randomly assigned within regions of display based on # subnet number positions = {} for m in address_space: m_subnet = m[0] m_depth = depths[m_subnet] # row is dependent on depth of subnet row_max = max_pos - (m_depth * row_height) row_min = max_pos - ((m_depth + 1) * row_height) # col width is dependent on number of subnets at same depth num_cols = len(subnets_by_depth[m_depth]) col_width = max_pos / num_cols # col of host dependent on subnet_id relative to other subnets of # same depth m_col = subnets_by_depth[m_depth].index(m_subnet) col_min = m_col * col_width col_max = (m_col + 1) * col_width # randomly sample position of host within row and column of subnet col_pos, row_pos = self._get_host_position( m, positions, address_space, row_min, row_max, col_min, col_max, margin ) positions[m] = (col_pos, row_pos) # get position of agent, which is just right of host first host in # network first_m_pos = positions[address_space[0]] agent_row = first_m_pos[1] agent_col = min(first_m_pos[0] + margin * 4, max_pos - margin) positions[AGENT] = (agent_col, agent_row) return positions def _get_host_position(self, m, positions, address_space, row_min, row_max, col_min, col_max, margin): """Get the position of m within the bounds of (row_min, row_max, col_min, col_max) while trying to make the distance between the positions of any two hosts in the same subnet greater than some threshold. """ subnet_hosts = [] for other_m in address_space: if other_m == m: continue if other_m[0] == m[0]: subnet_hosts.append(other_m) threshold = 8 col_margin = (col_max - col_min) / 4 col_mid = col_max - ((col_max - col_min) / 2) m_y = random.uniform(row_min + margin, row_max - margin) m_x = random.uniform(col_mid - col_margin, col_mid + col_margin) # only try 100 times good = False n = 0 while n < 100 and not good: good = True m_x = random.uniform(col_mid - col_margin, col_mid + col_margin) m_y = random.uniform(row_min + margin, row_max - margin) for other_m in subnet_hosts: if other_m not in positions: continue other_x, other_y = positions[other_m] dist = math.hypot(m_x - other_x, m_y - other_y) if dist < threshold: good = False break n += 1 return m_x, m_y def _get_subnets(self, network): """Get list of hosts organized into subnets Arguments --------- network : Network the environment network Returns ------- list[list[(int, int)]] addresses with each list containing hosts on same subnet """ subnets = [[] for i in range(network.get_number_of_subnets())] for m in network.address_space: subnets[m[0]].append(m) # add internet host subnets[0].append(AGENT) return subnets class EpisodeViewer: """Displays sequence of observations from NASimEnv in a seperate window""" def __init__(self, episode, G, sensitive_hosts, width=7, height=7): self.episode = episode self.G = G self.sensitive_hosts = sensitive_hosts # used for moving between timesteps in episode self.timestep = 0 self._setup_GUI(width, height) # draw first observation self._next_graph() # Initialize GUI drawing loop Tk.mainloop() def _setup_GUI(self, width, height): """Setup all the elements for the GUI for displaying the network graphs. Initializes object variables:k Tk root : the root window for GUI FigureCanvasTkAgg canvas : the canvas object to draw figure onto Figure fig : the figure that holds axes Axes axes : the matplotlib figure axes to draw onto """ # The GUI root window self.root = Tk.Tk() self.root.wm_title("Cyber Attack Simulator") self.root.wm_protocol("WM_DELETE_WINDOW", self._close) # matplotlib figure to house networkX graph self.fig = plt.figure(figsize=(width, height)) self.axes = self.fig.add_subplot(111) self.fig.tight_layout() self.fig.subplots_adjust(top=0.8) # a tk.DrawingArea self.canvas = FigureCanvasTkAgg(self.fig, master=self.root) self.canvas.draw() self.canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) # buttons for moving between observations back = Tk.Button(self.root, text="back", command=self._previous_graph) back.pack() next = Tk.Button(self.root, text="next", command=self._next_graph) next.pack() def _close(self): plt.close('all') self.root.destroy() def _next_graph(self): if self.timestep < len(self.episode): t_state = self.episode[self.timestep][0] self.G = self._update_graph(self.G, t_state) self._draw_graph(self.G) self.timestep += 1 def _previous_graph(self): if self.timestep > 1: self.timestep -= 2 self._next_graph() def _update_graph(self, G, state): # update colour of each host in network as necessary for m in list(G.nodes): if m == AGENT: continue node_color = get_host_representation( state, self.sensitive_hosts, m, COLORS ) G.nodes[m]["color"] = node_color return G def _draw_graph(self, G): pos = {} colors = [] labels = {} for n in list(G.nodes): colors.append(G.nodes[n]["color"]) labels[n] = G.nodes[n]["label"] pos[n] = G.nodes[n]["pos"] # clear window and redraw graph self.axes.cla() nx.draw_networkx_nodes( G, pos, node_color=colors, node_size=1500, ax=self.axes ) nx.draw_networkx_labels( G, pos, labels, font_size=12, font_weight="bold" ) nx.draw_networkx_edges(G, pos) plt.axis('off') # generate and plot legend # legend_entries = self.legend() # plt.legend(handles=legend_entries, fontsize=16) # add title state, action, reward, done = self.episode[self.timestep] if done: title = ( f"t={self.timestep}\nGoal reached\ntotal reward={reward}" ) else: title = f"t={self.timestep}\n{action}\nreward={reward}" ax_title = self.axes.set_title(title, fontsize=16, pad=10) ax_title.set_y(1.05) xticks = self.axes.get_xticks() yticks = self.axes.get_yticks() # shift half a step to the left xmin = (3*xticks[0] - xticks[1])/2. ymin = (3*yticks[0] - yticks[1])/2. # shaft half a step to the right xmax = (3*xticks[-1] - xticks[-2])/2. ymax = (3*yticks[-1] - yticks[-2])/2. self.axes.set_xlim(left=xmin, right=xmax) self.axes.set_ylim(bottom=ymin, top=ymax) # self.fig.savefig("t_{}.png".format(self.timestep)) self.canvas.draw() @staticmethod def legend(compromised=True): """ Manually setup the display legend """ a = mpatches.Patch(color='black', label='Agent') s = mpatches.Patch(color='magenta', label='Sensitive (S)') c = mpatches.Patch(color='green', label='Compromised (C)') r = mpatches.Patch(color='blue', label='Reachable (R)') legend_entries = [a, s, c, r] if compromised: sc = mpatches.Patch(color='yellow', label='S & C') sr = mpatches.Patch(color='orange', label='S & R') o = mpatches.Patch(color='red', label='not S, C or R') legend_entries.extend([sc, sr, o]) return legend_entries def get_host_representation(state, sensitive_hosts, m, representation): """Get the representation of a host based on current state Arguments --------- state : State current state sensitive_hosts : list list of addresses of sensitive hosts on network m : (int, int) host address representation : list list of different representations (e.g. color or symbol) Returns ------- str host color """ # agent not in state so return straight away if m == AGENT: return representation[6] compromised = state.host_compromised(m) reachable = state.host_reachable(m) sensitive = m in sensitive_hosts if sensitive: if compromised: output = representation[0] elif reachable: output = representation[1] else: output = representation[2] elif compromised: output = representation[3] elif reachable: output = representation[4] else: output = representation[5] return output ================================================ FILE: nasim/envs/state.py ================================================ import numpy as np from nasim.envs.host_vector import HostVector from nasim.envs.observation import Observation class State: """A state in the NASim Environment. Each row in the state tensor represents the state of a single host on the network. For details on host the state a single host is represented see :class:`HostVector` ... Attributes ---------- tensor : numpy.Array tensor representation of the state of network host_num_map : dict mapping from host address to host number (this is used to map host address to host row in the network tensor) """ def __init__(self, network_tensor, host_num_map): """ Parameters ---------- state_tensor : np.Array the tensor representation of the network state host_num_map : dict mapping from host address to host number (this is used to map host address to host row in the network tensor) """ self.tensor = network_tensor self.host_num_map = host_num_map @classmethod def tensorize(cls, network): h0 = network.hosts[(1, 0)] h0_vector = HostVector.vectorize(h0, network.address_space_bounds) tensor = np.zeros( (len(network.hosts), h0_vector.state_size), dtype=np.float32 ) for host_addr, host in network.hosts.items(): host_num = network.host_num_map[host_addr] HostVector.vectorize( host, network.address_space_bounds, tensor[host_num] ) return cls(tensor, network.host_num_map) @classmethod def generate_initial_state(cls, network): cls.reset() state = cls.tensorize(network) return network.reset(state) @classmethod def generate_random_initial_state(cls, network): h0 = network.hosts[(1, 0)] h0_vector = HostVector.vectorize_random( h0, network.address_space_bounds ) tensor = np.zeros( (len(network.hosts), h0_vector.state_size), dtype=np.float32 ) for host_addr, host in network.hosts.items(): host_num = network.host_num_map[host_addr] HostVector.vectorize_random( host, network.address_space_bounds, tensor[host_num] ) state = cls(tensor, network.host_num_map) # ensure host state set correctly return network.reset(state) @classmethod def from_numpy(cls, s_array, state_shape, host_num_map): if s_array.shape != state_shape: s_array = s_array.reshape(state_shape) return State(s_array, host_num_map) @classmethod def reset(cls): """Reset any class attributes for state """ HostVector.reset() @property def hosts(self): hosts = [] for host_addr in self.host_num_map: hosts.append((host_addr, self.get_host(host_addr))) return hosts def copy(self): new_tensor = np.copy(self.tensor) return State(new_tensor, self.host_num_map) def get_initial_observation(self, fully_obs): """Get the initial observation of network. Returns ------- Observation an observation object """ obs = Observation(self.shape()) if fully_obs: obs.from_state(self) return obs for host_addr, host in self.hosts: if not host.reachable: continue host_obs = host.observe(address=True, reachable=True, discovered=True) host_idx = self.get_host_idx(host_addr) obs.update_from_host(host_idx, host_obs) return obs def get_observation(self, action, action_result, fully_obs): """Get observation given last action and action result Parameters ---------- action : Action last action performed action_result : ActionResult observation from performing action fully_obs : bool whether problem is fully observable or not Returns ------- Observation an observation object """ obs = Observation(self.shape()) obs.from_action_result(action_result) if fully_obs: obs.from_state(self) return obs if action.is_noop(): return obs if not action_result.success: # action failed so no observation return obs t_idx, t_host = self.get_host_and_idx(action.target) obs_kwargs = dict( address=True, # must be true for success compromised=False, reachable=True, # must be true for success discovered=True, # must be true for success value=False, # discovery_value=False, # this is only added as needed services=False, processes=False, os=False, access=False ) if action.is_exploit(): # exploit action, so get all observations for host obs_kwargs["compromised"] = True obs_kwargs["services"] = True obs_kwargs["os"] = True obs_kwargs["access"] = True obs_kwargs["value"] = True elif action.is_privilege_escalation(): obs_kwargs["compromised"] = True obs_kwargs["access"] = True elif action.is_service_scan(): obs_kwargs["services"] = True elif action.is_os_scan(): obs_kwargs["os"] = True elif action.is_process_scan(): obs_kwargs["processes"] = True obs_kwargs["access"] = True elif action.is_subnet_scan(): for host_addr in action_result.discovered: discovered = action_result.discovered[host_addr] if not discovered: continue d_idx, d_host = self.get_host_and_idx(host_addr) newly_discovered = action_result.newly_discovered[host_addr] d_obs = d_host.observe( discovery_value=newly_discovered, **obs_kwargs ) obs.update_from_host(d_idx, d_obs) # this is for target host (where scan was performed on) obs_kwargs["compromised"] = True else: raise NotImplementedError(f"Action {action} not implemented") target_obs = t_host.observe(**obs_kwargs) obs.update_from_host(t_idx, target_obs) return obs def shape_flat(self): return self.numpy_flat().shape def shape(self): return self.tensor.shape def numpy_flat(self): return self.tensor.flatten() def numpy(self): return self.tensor def update_host(self, host_addr, host_vector): host_idx = self.host_num_map[host_addr] self.tensor[host_idx] = host_vector.vector def get_host(self, host_addr): host_idx = self.host_num_map[host_addr] return HostVector(self.tensor[host_idx]) def get_host_idx(self, host_addr): return self.host_num_map[host_addr] def get_host_and_idx(self, host_addr): host_idx = self.host_num_map[host_addr] return host_idx, HostVector(self.tensor[host_idx]) def host_reachable(self, host_addr): return self.get_host(host_addr).reachable def host_compromised(self, host_addr): return self.get_host(host_addr).compromised def host_discovered(self, host_addr): return self.get_host(host_addr).discovered def host_has_access(self, host_addr, access_level): return self.get_host(host_addr).access >= access_level def set_host_compromised(self, host_addr): self.get_host(host_addr).compromised = True def set_host_reachable(self, host_addr): self.get_host(host_addr).reachable = True def set_host_discovered(self, host_addr): self.get_host(host_addr).discovered = True def get_host_value(self, host_address): return self.hosts[host_address].get_value() def host_is_running_service(self, host_addr, service): return self.get_host(host_addr).is_running_service(service) def host_is_running_os(self, host_addr, os): return self.get_host(host_addr).is_running_os(os) def get_total_host_value(self): total_value = 0 for host_addr in self.host_num_map: host = self.get_host(host_addr) total_value += host.value return total_value def state_size(self): return self.tensor.size def get_readable(self): host_obs = [] for host_addr in self.host_num_map: host = self.get_host(host_addr) readable_dict = host.readable() host_obs.append(readable_dict) return host_obs def __str__(self): output = "\n--- State ---\n" output += "Hosts:\n" for host in self.hosts: output += str(host) + "\n" return output def __hash__(self): return hash(str(self.tensor)) def __eq__(self, other): return np.array_equal(self.tensor, other.tensor) ================================================ FILE: nasim/envs/utils.py ================================================ import enum import numpy as np from queue import deque from itertools import permutations INTERNET = 0 class OneHotBool(enum.IntEnum): NONE = 0 TRUE = 1 FALSE = 2 @staticmethod def from_bool(b): if b: return OneHotBool.TRUE return OneHotBool.FALSE def __str__(self): return self.name def __repr__(self): return self.name class ServiceState(enum.IntEnum): # values for possible service knowledge states UNKNOWN = 0 # service may or may not be running on host PRESENT = 1 # service is running on the host ABSENT = 2 # service not running on the host def __str__(self): return self.name def __repr__(self): return self.name class AccessLevel(enum.IntEnum): NONE = 0 USER = 1 ROOT = 2 def __str__(self): return self.name def __repr__(self): return self.name def get_minimal_hops_to_goal(topology, sensitive_addresses): """Get minimum network hops required to reach all sensitive hosts. Starting from outside the network (i.e. can only reach exposed subnets). Returns ------- int minimum number of network hops to reach all sensitive hosts """ num_subnets = len(topology) max_value = np.iinfo(np.int16).max distance = np.full((num_subnets, num_subnets), max_value, dtype=np.int16) # set distances for each edge to 1 for s1 in range(num_subnets): for s2 in range(num_subnets): if s1 == s2: distance[s1][s2] = 0 elif topology[s1][s2] == 1: distance[s1][s2] = 1 # find all pair minimum shortest path distance for k in range(num_subnets): for i in range(num_subnets): for j in range(num_subnets): if distance[i][k] == max_value \ or distance[k][j] == max_value: dis = max_value else: dis = distance[i][k] + distance[k][j] if distance[i][j] > dis: distance[i][j] = distance[i][k] + distance[k][j] # get list of all subnets we need to visit subnets_to_visit = [INTERNET] for subnet, host in sensitive_addresses: if subnet not in subnets_to_visit: subnets_to_visit.append(subnet) # find minimum shortest path that visits internet subnet and all # sensitive subnets by checking all possible permutations shortest = max_value for pm in permutations(subnets_to_visit): pm_sum = 0 for i in range(len(pm) - 1): pm_sum += distance[pm[i]][pm[i+1]] shortest = min(shortest, pm_sum) return shortest def min_subnet_depth(topology): """Find the minumum depth of each subnet in the network graph in terms of steps from an exposed subnet to each subnet Parameters ---------- topology : 2D matrix An adjacency matrix representing the network, with first subnet representing the internet (i.e. exposed) Returns ------- depths : list depth of each subnet ordered by subnet index in topology """ num_subnets = len(topology) assert len(topology[0]) == num_subnets depths = [] Q = deque() for subnet in range(num_subnets): if topology[subnet][INTERNET] == 1: depths.append(0) Q.appendleft(subnet) else: depths.append(float('inf')) while len(Q) > 0: parent = Q.pop() for child in range(num_subnets): if topology[parent][child] == 1: # child is connected to parent if depths[child] > depths[parent] + 1: depths[child] = depths[parent] + 1 Q.appendleft(child) return depths ================================================ FILE: nasim/scenarios/__init__.py ================================================ from nasim.scenarios.utils import INTERNET from nasim.scenarios.scenario import Scenario from nasim.scenarios.loader import ScenarioLoader from nasim.scenarios.generator import ScenarioGenerator import nasim.scenarios.benchmark as benchmark def make_benchmark_scenario(scenario_name, seed=None): """Generate or Load a benchmark Scenario. Parameters ---------- scenario_name : str the name of the benchmark environment seed : int, optional random seed to use to generate environment (default=None) Returns ------- Scenario a new scenario instance Raises ------ NotImplementederror if scenario_name does no match any implemented benchmark scenarios. """ if scenario_name in benchmark.AVAIL_GEN_BENCHMARKS: params = benchmark.AVAIL_GEN_BENCHMARKS[scenario_name] params['seed'] = seed return generate_scenario(**params) elif scenario_name in benchmark.AVAIL_STATIC_BENCHMARKS: scenario_def = benchmark.AVAIL_STATIC_BENCHMARKS[scenario_name] return load_scenario(scenario_def["file"], name=scenario_name) else: raise NotImplementedError( f"Benchmark scenario '{scenario_name}' not available." f"Available scenarios are: {benchmark.AVAIL_BENCHMARKS}" ) def generate_scenario(num_hosts, num_services, **params): """Generate Scenario from network parameters. Parameters ---------- num_hosts : int number of hosts to include in network (minimum is 3) num_services : int number of services to use in environment (minimum is 1) params : dict, optional generator params (see :class:`ScenarioGenertor` for full list) Returns ------- Scenario a new scenario object """ generator = ScenarioGenerator() return generator.generate(num_hosts, num_services, **params) def load_scenario(path, name=None): """Load NASim Environment from a .yaml scenario file. Parameters ---------- path : str path to the .yaml scenario file name : str, optional the scenarios name, if None name will be generated from path (default=None) Returns ------- Scenario a new scenario object """ loader = ScenarioLoader() return loader.load(path, name=name) def get_scenario_max(scenario_name): if scenario_name in benchmark.AVAIL_GEN_BENCHMARKS: return benchmark.AVAIL_GEN_BENCHMARKS[scenario_name]["max_score"] elif scenario_name in benchmark.AVAIL_STATIC_BENCHMARKS: return benchmark.AVAIL_STATIC_BENCHMARKS[scenario_name]["max_score"] return None ================================================ FILE: nasim/scenarios/benchmark/__init__.py ================================================ import os.path as osp from nasim.scenarios.benchmark.generated import AVAIL_GEN_BENCHMARKS BENCHMARK_DIR = osp.dirname(osp.abspath(__file__)) AVAIL_STATIC_BENCHMARKS = { "tiny": { "file": osp.join(BENCHMARK_DIR, "tiny.yaml"), "name": "tiny", "step_limit": 1000, "max_score": 195 }, "tiny-hard": { "file": osp.join(BENCHMARK_DIR, "tiny-hard.yaml"), "name": "tiny-hard", "step_limit": 1000, "max_score": 192 }, "tiny-small": { "file": osp.join(BENCHMARK_DIR, "tiny-small.yaml"), "name": "tiny-small", "step_limit": 1000, "max_score": 189 }, "small": { "file": osp.join(BENCHMARK_DIR, "small.yaml"), "name": "small", "step_limit": 1000, "max_score": 186 }, "small-honeypot": { "file": osp.join(BENCHMARK_DIR, "small-honeypot.yaml"), "name": "small-honeypot", "step_limit": 1000, "max_score": 186 }, "small-linear": { "file": osp.join(BENCHMARK_DIR, "small-linear.yaml"), "name": "small-linear", "step_limit": 1000, "max_score": 187 }, "medium": { "file": osp.join(BENCHMARK_DIR, "medium.yaml"), "name": "medium", "step_limit": 2000, "max_score": 190 }, "medium-single-site": { "file": osp.join(BENCHMARK_DIR, "medium-single-site.yaml"), "name": "medium-single-site", "step_limit": 2000, "max_score": 195 }, "medium-multi-site": { "file": osp.join(BENCHMARK_DIR, "medium-multi-site.yaml"), "name": "medium-multi-site", "step_limit": 2000, "max_score": 190 }, } AVAIL_BENCHMARKS = list(AVAIL_STATIC_BENCHMARKS.keys()) \ + list(AVAIL_GEN_BENCHMARKS.keys()) ================================================ FILE: nasim/scenarios/benchmark/generated.py ================================================ """A collection of definitions for generated benchmark scenarios. Each generated scenario is defined by the a number of parameters that control the size of the problem (see scenario.generator for more info): There are also some parameters, where default values are used for all scenarios, see DEFAULTS dict. """ # generated environment constants DEFAULTS = dict( num_exploits=None, num_privescs=None, r_sensitive=100, r_user=100, exploit_cost=1, exploit_probs='mixed', privesc_cost=1, privesc_probs=1.0, service_scan_cost=1, os_scan_cost=1, subnet_scan_cost=1, process_scan_cost=1, uniform=False, alpha_H=2.0, alpha_V=2.0, lambda_V=1.0, random_goal=False, base_host_value=1, host_discovery_value=1, step_limit=1000, address_space_bounds=None ) # Generated Scenario definitions TINY_GEN = {**DEFAULTS, "name": "tiny-gen", "num_hosts": 3, "num_os": 1, "num_services": 1, "num_processes": 1, "restrictiveness": 1} TINY_GEN_RGOAL = {**DEFAULTS, "name": "tiny-gen-rangoal", "num_hosts": 3, "num_os": 1, "num_services": 1, "num_processes": 1, "restrictiveness": 1, "random_goal": True} SMALL_GEN = {**DEFAULTS, "name": "small-gen", "num_hosts": 8, "num_os": 2, "num_services": 3, "num_processes": 2, "restrictiveness": 2} SMALL_GEN_RGOAL = {**DEFAULTS, "name": "small-gen-rangoal", "num_hosts": 8, "num_os": 2, "num_services": 3, "num_processes": 2, "restrictiveness": 2, "random_goal": True} MEDIUM_GEN = {**DEFAULTS, "name": "medium-gen", "num_hosts": 16, "num_os": 2, "num_services": 5, "num_processes": 2, "restrictiveness": 3, "step_limit": 2000} LARGE_GEN = {**DEFAULTS, "name": "large-gen", "num_hosts": 23, "num_os": 3, "num_services": 7, "num_processes": 3, "restrictiveness": 3, "step_limit": 5000} HUGE_GEN = {**DEFAULTS, "name": "huge-gen", "num_hosts": 38, "num_os": 4, "num_services": 10, "num_processes": 4, "restrictiveness": 3, "step_limit": 10000} POCP_1_GEN = {**DEFAULTS, "name": "pocp-1-gen", "num_hosts": 35, "num_os": 2, "num_services": 50, "num_exploits": 60, "num_processes": 2, "restrictiveness": 5, "step_limit": 30000} POCP_2_GEN = {**DEFAULTS, "name": "pocp-2-gen", "num_hosts": 95, "num_os": 3, "num_services": 10, "num_exploits": 30, "num_processes": 3, "restrictiveness": 5, "step_limit": 30000} AVAIL_GEN_BENCHMARKS = { "tiny-gen": TINY_GEN, "tiny-gen-rgoal": TINY_GEN_RGOAL, "small-gen": SMALL_GEN, "small-gen-rgoal": SMALL_GEN_RGOAL, "medium-gen": MEDIUM_GEN, "large-gen": LARGE_GEN, "huge-gen": HUGE_GEN, "pocp-1-gen": POCP_1_GEN, "pocp-2-gen": POCP_2_GEN } ================================================ FILE: nasim/scenarios/benchmark/medium-multi-site.yaml ================================================ # A WAN which has multiple 3 remote sites (subnets) connected to the main site # sensitive hosts: # 1) a server in server subnet on the main site, # 2) a host in user subnet in main site # # main site has 3 subnets (1 server, 1 DMZ, 1 user) # subnet 1 = main site DMZ (exposed, but not vulnerable) - contains 2 webservers # subnet 2 = main site server (not exposed) - contains 2 data servers # subnet 3 = main site user (not exposed) - contains 6 user hosts # subnet 4 = remote site 1 (exposed) - contains 2 user hosts # subnet 5 = remote site 2 (exposed) - contains 2 user hosts # subnet 6 = remote site 3 (exposed) - contains 2 user hosts # each remote site is connected to main site server subnet # # 16 hosts # 6 subnets # 2 OS # 5 services # 3 processes # 5 exploits # 3 priv esc # # |A| = 16 * (5 + 3 + 4) = 192 # # Optimal path: # (e_samba, (6, 1)) -> (subnet_scan, (6, 1)) -> (e_smtp, (2, 1)) -> (pe_schtask, (2, 1)) # -> (e_http, (3, 1)) -> (e_ssh, (3, 4)) -> (pe_tomcat, (3, 4)) # Score = 200 - (2 + 3 + 2 + 3) = 190 # subnets: [2, 2, 6, 2, 2, 2] topology: [[ 1, 1, 0, 0, 1, 1, 1], # 0 - internet [ 1, 1, 1, 1, 0, 0, 0], # 1 - MS-DMZ [ 0, 1, 1, 1, 1, 1, 1], # 2 - MS-Server [ 0, 1, 1, 1, 0, 0, 0], # 3 - MS-User [ 1, 0, 1, 0, 1, 0, 0], # 4 - RS-1 [ 1, 0, 1, 0, 0, 1, 0], # 5 - RS-2 [ 1, 0, 1, 0, 0, 0, 1]] # 6 - RS-3 sensitive_hosts: (2, 1): 100 (3, 4): 100 os: - linux - windows services: - ssh - ftp - http - samba - smtp processes: - tomcat - daclsvc - schtask exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: root e_http: service: http os: None prob: 0.9 cost: 2 access: user e_samba: service: samba os: linux prob: 0.3 cost: 2 access: root e_smtp: service: smtp os: windows prob: 0.6 cost: 3 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root pe_schtask: process: schtask os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [ssh] processes: [tomcat] (1, 1): os: linux services: [ssh] processes: [tomcat] (2, 0): os: windows services: [smtp] processes: [] (2, 1): os: windows services: [smtp] processes: [schtask] (3, 0): os: linux services: [ssh] processes: [tomcat] (3, 1): os: linux services: [ssh, http] processes: [] (3, 2): os: linux services: [ssh] processes: [] (3, 3): os: linux services: [ssh] processes: [] (3, 4): os: linux services: [ssh] processes: [tomcat] (3, 5): os: linux services: [ssh] processes: [] (4, 0): os: windows services: [ftp] processes: [daclsvc] (4, 1): os: windows services: [ftp] processes: [daclsvc] (5, 0): os: windows services: [ftp] processes: [daclsvc, schtask] (5, 1): os: windows services: [ftp, http] processes: [] (6, 0): os: linux services: [ssh] processes: [tomcat] (6, 1): os: windows services: [ssh, samba] processes: [] firewall: (0, 1): [] (1, 0): [] (0, 4): [] (4, 0): [] (0, 5): [http] (5, 0): [] (0, 6): [samba] (6, 0): [] (1, 2): [] (2, 1): [ssh] (1, 3): [] (3, 1): [ssh] (2, 3): [http] (3, 2): [smtp] (2, 4): [ftp] (4, 2): [smtp] (2, 5): [ftp] (5, 2): [smtp] (2, 6): [ftp, ssh] (6, 2): [smtp] step_limit: 2000 ================================================ FILE: nasim/scenarios/benchmark/medium-single-site.yaml ================================================ # A network with a single subnet that has one vulnerable host that must be compromised # to access other hosts behind firewall # # 1 subnet # 16 hosts # 2 OS # 5 services # 3 processes # 5 exploits # 3 priv esc # # |A| = 16 * (5 + 3 + 4) = 192 # # Optimal path: # (e_http, (1, 7) or (1, 15)) -> (e_smtp, (1, 3)) -> (pe_schtask, (1, 3)) # -> (e_ssh, (1, 8)) -> (pe_tomcat, (1, 8)) # Score = 200 - (2 + 3 + 1 + 3 + 1) = 190 # subnets: [16] topology: [[ 1, 1], [ 1, 1]] sensitive_hosts: (1, 3): 100 (1, 8): 100 os: - linux - windows services: - ssh - ftp - http - samba - smtp processes: - tomcat - daclsvc - schtask exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: root e_http: service: http os: None prob: 0.9 cost: 2 access: user e_samba: service: samba os: linux prob: 0.3 cost: 2 access: root e_smtp: service: smtp os: windows prob: 0.6 cost: 3 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root pe_schtask: process: schtask os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [ftp] processes: [tomcat] (1, 1): os: linux services: [ftp, ssh] processes: [tomcat] (1, 2): os: windows services: [ftp] processes: [schtask] (1, 3): os: windows services: [smtp] processes: [schtask] (1, 4): os: windows services: [ftp] processes: [schtask] (1, 5): os: linux services: [ftp, ssh] processes: [tomcat] (1, 6): os: windows services: [ftp] processes: [daclsvc] (1, 7): os: windows services: [http] processes: [] (1, 8): os: linux services: [ssh] processes: [tomcat] (1, 9): os: windows services: [ftp] processes: [schtask] (1, 10): os: windows services: [ssh] processes: [] (1, 11): os: windows services: [ftp] processes: [daclsvc] (1, 12): os: windows services: [ftp, ssh] processes: [] (1, 13): os: windows services: [ftp] processes: [] (1, 14): os: windows services: [ftp] processes: [schtask] (1, 15): os: linux services: [http] processes: [] firewall: (0, 1): [http] (1, 0): [] step_limit: 2000 ================================================ FILE: nasim/scenarios/benchmark/medium.yaml ================================================ # A medium standard (one public subnet) network configuration # # 16 hosts # 5 subnets # 2 OS # 5 services # 3 processes # 5 exploits # 3 priv esc # # |A| = 16 * (5 + 3 + 4) = 192 # # Optimal path: # (e_http, (1, 0)) -> subnet_scan -> (e_smtp, (2, 0)) -> (pe_schtask, (2, 0) -> (e_http, (3, 1)) # -> subnet_scan -> (e_ssh, (5, 0)) -> (e_samba, (5, 0)) # Score = 200 - (2+1+3+1+2+1+3+2) = 185 # subnets: [1, 1, 5, 5, 4] topology: [[ 1, 1, 0, 0, 0, 0], [ 1, 1, 1, 1, 0, 0], [ 0, 1, 1, 1, 0, 0], [ 0, 1, 1, 1, 1, 1], [ 0, 0, 0, 1, 1, 0], [ 0, 0, 0, 1, 0, 1]] sensitive_hosts: (2, 0): 100 (5, 0): 100 os: - linux - windows services: - ssh - ftp - http - samba - smtp processes: - tomcat - daclsvc - schtask exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: root e_http: service: http os: None prob: 0.9 cost: 2 access: user e_samba: service: samba os: linux prob: 0.3 cost: 2 access: root e_smtp: service: smtp os: windows prob: 0.6 cost: 3 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root pe_schtask: process: schtask os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [http] processes: [] (2, 0): os: windows services: [smtp] processes: [schtask] (3, 0): os: windows services: [ftp] processes: [schtask] (3, 1): os: windows services: [ftp, http] processes: [daclsvc] (3, 2): os: windows services: [ftp] processes: [] (3, 3): os: windows services: [ftp] processes: [schtask] (3, 4): os: windows services: [ftp] processes: [schtask] (4, 0): os: linux services: [ssh] processes: [] (4, 1): os: linux services: [ssh] processes: [] (4, 2): os: linux services: [ssh] processes: [] (4, 3): os: windows services: [ssh, ftp] processes: [tomcat] (4, 4): os: windows services: [ssh, ftp] processes: [tomcat] (5, 0): os: linux services: [ssh, samba] processes: [] (5, 1): os: linux services: [ssh, http] processes: [tomcat] (5, 2): os: linux services: [ssh] processes: [] (5, 3): os: linux services: [ssh] processes: [] firewall: (0, 1): [http] (1, 0): [] (1, 2): [smtp] (2, 1): [ssh] (1, 3): [] (3, 1): [ssh] (2, 3): [http] (3, 2): [smtp] (3, 4): [ssh, ftp] (4, 3): [ftp, ssh] (3, 5): [ssh, ftp] (5, 3): [ftp, ssh] step_limit: 2000 ================================================ FILE: nasim/scenarios/benchmark/small-honeypot.yaml ================================================ # A small standard (one public network) network configuration containing a # honeypot host (3, 2). # # 4 subnets # 8 hosts # 2 OS # 3 services # 2 processes # 3 exploits # 2 priv esc # # Optimal path: # (e_http, (1, 0)) -> subnet_scan -> (e_ssh, (2, 0)) -> (pe_tomcat, (2, 0)) # -> (e_http, (3, 1)) -> subnet_scan -> (e_ssh, (4, 0) # -> (pe_tomcat, (4, 0)) # Score = 200 - (2 + 1 + 3 + 1 + 2 + 1 + 3 + 1) = 186 # subnets: [1, 1, 5, 1] topology: [[ 1, 1, 0, 0, 0], [ 1, 1, 1, 1, 0], [ 0, 1, 1, 1, 0], [ 0, 1, 1, 1, 1], [ 0, 0, 0, 1, 1]] sensitive_hosts: (2, 0): 100 (4, 0): 100 os: - linux - windows services: - ssh - ftp - http processes: - tomcat - daclsvc exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: user e_http: service: http os: None prob: 0.9 cost: 2 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [http] processes: [] (2, 0): os: linux services: [ssh, ftp] processes: [tomcat] (3, 0): os: windows services: [ftp] processes: [] (3, 1): os: windows services: [ftp, http] processes: [daclsvc] (3, 2): os: windows services: [ftp, http] processes: [daclsvc] # This host is the honeypot so has large negative value value: -100 (3, 3): os: windows services: [ftp] processes: [] (3, 4): os: windows services: [ftp] processes: [daclsvc] (4, 0): os: linux services: [ssh, ftp] processes: [tomcat] # two row for each connection between subnets as defined by topology # one for each direction of connection # list which services to allow firewall: (0, 1): [http] (1, 0): [] (1, 2): [ssh] (2, 1): [ssh] (1, 3): [] (3, 1): [ssh] (2, 3): [http] (3, 2): [ftp] (3, 4): [ssh, ftp] (4, 3): [ftp] step_limit: 1000 ================================================ FILE: nasim/scenarios/benchmark/small-linear.yaml ================================================ # A small network with # # 6 subnets # 8 hosts # 2 OS # 3 services # 2 processes # 3 exploits # 2 priv esc # # - subnets organized in a linear network # - sensitive documents located in two middle subnets # - end subnets are both connected to internet # - two middle subnets are not connected to each other # # Optimal path: # (e_http, (1, 0)) -> subnet_scan -> (e_ssh, (2, 0)) -> subnet_scan -> (e_ssh, (3, 1)) -> (e_ftp, (3, 0)) # (e_http, (6, 0)) -> subnet_scan -> (e_ssh, (5, 0)) -> subnet_scan -> (e_http, (4, 0)) -> (pe_daclsvc, (4, 0)) # Score = 200 - (2+1+3+1+3+1+2+1+3+1+1+1) = 179 # subnets: [1, 1, 2, 1, 2, 1] topology: [[ 1, 1, 0, 0, 0, 0, 1], # 0 connected to 1 and 6 [ 1, 1, 1, 0, 0, 0, 0], # 1 connected to 0 and 2 [ 0, 1, 1, 1, 0, 0, 0], # 2 connected to 1 and 3 [ 0, 0, 1, 1, 1, 0, 0], # 3 connected to 2 and 4 [ 0, 0, 0, 1, 1, 1, 0], # 4 connected to 3 and 5 [ 0, 0, 0, 0, 1, 1, 1], # 5 connected to 4 and 6 [ 1, 0, 0, 0, 0, 1, 1]] # 6 connected to 5 and 0 sensitive_hosts: (3, 0): 100 (4, 0): 100 os: - linux - windows services: - ssh - ftp - http processes: - tomcat - daclsvc exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: root e_http: service: http os: None prob: 0.9 cost: 2 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [http] processes: [] (2, 0): os: linux services: [ssh, ftp] processes: [tomcat] (3, 0): os: windows services: [ftp] processes: [] (3, 1): os: linux services: [ssh] processes: [] (4, 0): os: windows services: [http] processes: [daclsvc] (5, 0): os: linux services: [ftp, ssh] processes: [] (5, 1): os: windows services: [ftp] processes: [daclsvc] (6, 0): os: linux services: [http] processes: [tomcat] # two row for each connection between subnets as defined by topology # one for each direction of connection # list which services to allow firewall: (0, 1): [http] (1, 0): [] (1, 2): [ssh, ftp] (2, 1): [http] (2, 3): [ssh] (3, 2): [ssh, ftp] (3, 4): [] # no traffic permitted between middle networks (4, 3): [] # no traffic permitted between middle networks (4, 5): [ftp] (5, 4): [ftp, http] (5, 6): [http] (6, 5): [ssh] (6, 0): [] (0, 6): [http] step_limit: 1000 ================================================ FILE: nasim/scenarios/benchmark/small.yaml ================================================ # A small standard (one public network) network configuration # # 4 subnets # 8 hosts # 2 OS # 3 services # 2 processes # 3 exploits # 2 priv esc # # Optimal path: # (e_http, (1, 0)) -> subnet_scan -> (e_ssh, (2, 0)) -> (pe_tomcat, (2, 0)) # -> (e_http, (3, 1)) -> subnet_scan -> (e_ssh, (4, 0) # -> (pe_tomcat, (4, 0)) # Score = 200 - (2 + 1 + 3 + 1 + 2 + 1 + 3 + 1) = 186 # subnets: [1, 1, 5, 1] topology: [[ 1, 1, 0, 0, 0], [ 1, 1, 1, 1, 0], [ 0, 1, 1, 1, 0], [ 0, 1, 1, 1, 1], [ 0, 0, 0, 1, 1]] sensitive_hosts: (2, 0): 100 (4, 0): 100 os: - linux - windows services: - ssh - ftp - http processes: - tomcat - daclsvc exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: user e_http: service: http os: None prob: 0.9 cost: 2 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [http] processes: [] (2, 0): os: linux services: [ssh, ftp] processes: [tomcat] (3, 0): os: windows services: [ftp] processes: [] (3, 1): os: windows services: [ftp, http] processes: [daclsvc] (3, 2): os: windows services: [ftp] processes: [daclsvc] (3, 3): os: windows services: [ftp] processes: [] (3, 4): os: windows services: [ftp] processes: [daclsvc] (4, 0): os: linux services: [ssh, ftp] processes: [tomcat] # two row for each connection between subnets as defined by topology # one for each direction of connection # list which services to allow firewall: (0, 1): [http] (1, 0): [] (1, 2): [ssh] (2, 1): [ssh] (1, 3): [] (3, 1): [ssh] (2, 3): [http] (3, 2): [ftp] (3, 4): [ssh, ftp] (4, 3): [ftp] step_limit: 1000 ================================================ FILE: nasim/scenarios/benchmark/tiny-hard.yaml ================================================ # A harder version of the tiny standard (one public network) network configuration # # 3 subnets # 3 hosts # 2 OS # 3 services # 2 processes # 3 exploits # 2 priv esc actions # # Optimal path: # (e_http, (1, 0)) -> subnet scan -> (e_ssh, (2, 0)) -> (pe_tomcat, (2, 0)) -> (e_ftp, (3, 0)) # Score = 200 - (2 + 1 + 3 + 1 + 1) = 192 # subnets: [1, 1, 1] topology: [[ 1, 1, 0, 0], [ 1, 1, 1, 1], [ 0, 1, 1, 1], [ 0, 1, 1, 1]] sensitive_hosts: (2, 0): 100 (3, 0): 100 os: - linux - windows services: - ssh - ftp - http processes: - tomcat - daclsvc exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: root e_http: service: http os: None prob: 0.9 cost: 2 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [http] processes: [] (2, 0): os: linux services: [ssh, ftp] processes: [tomcat] (3, 0): os: windows services: [ftp] processes: [daclsvc] # two row for each connection between subnets as defined by topology # one for each direction of connection # list which services to allow firewall: (0, 1): [http] (1, 0): [] (1, 2): [ssh] (2, 1): [ssh] (1, 3): [] (3, 1): [ssh] (2, 3): [ftp, ssh] (3, 2): [ftp, ssh] step_limit: 1000 ================================================ FILE: nasim/scenarios/benchmark/tiny-small.yaml ================================================ # A tiny-small standard (one public network) network configuration # (Not quite tiny, not quite small) # # 4 subnets # 5 hosts # 2 OS # 3 services # 2 processes # 3 exploits # 2 priv esc actions # # Optimal path: # (e_http, (1, 0)) -> subnet_scan -> (e_ssh, (2, 0)) -> (pe_tomcat, (2,0)) -> (e_http, (3, 1)) # -> subnet_scan -> (e_ftp, (4, 0)) # Score = 200 - (2 + 1 + 3 + 1 + 2 + 1 + 1) = 189 # subnets: [1, 1, 2, 1] topology: [[ 1, 1, 0, 0, 0], [ 1, 1, 1, 1, 0], [ 0, 1, 1, 1, 0], [ 0, 1, 1, 1, 1], [ 0, 0, 0, 1, 1]] sensitive_hosts: (2, 0): 100 (4, 0): 100 os: - linux - windows services: - ssh - ftp - http processes: - tomcat - daclsvc exploits: e_ssh: service: ssh os: linux prob: 0.9 cost: 3 access: user e_ftp: service: ftp os: windows prob: 0.6 cost: 1 access: root e_http: service: http os: None prob: 0.9 cost: 2 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root pe_daclsvc: process: daclsvc os: windows prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [http] processes: [tomcat] (2, 0): os: linux services: [ssh, ftp] processes: [tomcat] (3, 0): os: windows services: [ftp] processes: [] (3, 1): os: windows services: [ftp, http] processes: [daclsvc] (4, 0): os: windows services: [ssh, ftp] processes: [] # two row for each connection between subnets as defined by topology # one for each direction of connection # list which services to allow firewall: (0, 1): [http] (1, 0): [] (1, 2): [ssh] (2, 1): [ssh] (1, 3): [] (3, 1): [ssh] (2, 3): [http] (3, 2): [ftp] (3, 4): [ssh, ftp] (4, 3): [ftp] step_limit: 1000 ================================================ FILE: nasim/scenarios/benchmark/tiny.yaml ================================================ # A tiny standard (one public network) network configuration # # 3 hosts # 3 subnets # 1 service # 1 process # 1 os # 1 exploit # 1 privilege escalation # # Optimal path: # (e_ssh, (1, 0)) -> subnet_scan -> (e_ssh, (3, 0)) -> (pe_tomcat, (3, 0)) # -> (e_ssh, (2, 0)) -> (pe_tomcat, (2, 0)) # Score = 200 - (6*1) = 195 # subnets: [1, 1, 1] topology: [[ 1, 1, 0, 0], [ 1, 1, 1, 1], [ 0, 1, 1, 1], [ 0, 1, 1, 1]] sensitive_hosts: (2, 0): 100 (3, 0): 100 os: - linux services: - ssh processes: - tomcat exploits: e_ssh: service: ssh os: linux prob: 0.8 cost: 1 access: user privilege_escalation: pe_tomcat: process: tomcat os: linux prob: 1.0 cost: 1 access: root service_scan_cost: 1 os_scan_cost: 1 subnet_scan_cost: 1 process_scan_cost: 1 host_configurations: (1, 0): os: linux services: [ssh] processes: [tomcat] # which services to deny between individual hosts firewall: (3, 0): [ssh] (2, 0): os: linux services: [ssh] processes: [tomcat] firewall: (1, 0): [ssh] (3, 0): os: linux services: [ssh] processes: [tomcat] # two row for each connection between subnets as defined by topology # one for each direction of connection # list which services to allow firewall: (0, 1): [ssh] (1, 0): [] (1, 2): [] (2, 1): [ssh] (1, 3): [ssh] (3, 1): [ssh] (2, 3): [ssh] (3, 2): [ssh] step_limit: 1000 ================================================ FILE: nasim/scenarios/generator.py ================================================ """This module contains functionality for generating scenarios. Specifically, it generates network configurations and action space configurations based on number of hosts and services in network using standard formula. """ import math import numpy as np import nasim.scenarios.utils as u from nasim.scenarios import Scenario from nasim.scenarios.host import Host # Constants for generating network USER_SUBNET_SIZE = 5 HOST_ASSIGNMENT_PERIOD = 40 DMZ = 1 SENSITIVE = 2 USER = 3 # Number of time to attempt to find valid vulnerable config VUL_RETRIES = 5 class ScenarioGenerator: """Generates a scenario based on standard formula For explanation of the details of how scenarios are generated see :ref:`scenario_generation_explanation`. Notes ----- **Exploit Probabilities**: Success probabilities of each exploit are determined based on the value of the ``exploit_probs`` argument, as follows: - ``exploit_probs=None`` - probabilities generated randomly from uniform distribution - ``exploit_probs="mixed"`` - probabilities are chosen from [0.3, 0.6, 0.9] with probability [0.2, 0.4, 0.4] (see :ref:`generated_exploit_probs` for explanation). - ``exploit_probs=float`` - probability of each exploit is set to value - ``exploit_probs=list[float]`` - probability of each exploit is set to corresponding value in list For deterministic exploits set ``exploit_probs=1.0``. **Privilege Escalation Probabilities**: Success probabilities of each privilege escalation are determined based on the value of the ``privesc_probs`` argument, and are determined the same as for exploits with the exclusion of the "mixed" option. **Host Configuration distribution**: 1. if ``uniform=True`` then host configurations are chosen uniformly at random from set of all valid possible configurations 2. if ``uniform=False`` host configurations are chosen to be correlated (see :ref:`correlated_configurations` for explanation) """ def generate(self, num_hosts, num_services, num_os=2, num_processes=2, num_exploits=None, num_privescs=None, r_sensitive=10, r_user=10, exploit_cost=1, exploit_probs=1.0, privesc_cost=1, privesc_probs=1.0, service_scan_cost=1, os_scan_cost=1, subnet_scan_cost=1, process_scan_cost=1, uniform=False, alpha_H=2.0, alpha_V=2.0, lambda_V=1.0, restrictiveness=5, random_goal=False, base_host_value=1, host_discovery_value=1, seed=None, name=None, step_limit=None, address_space_bounds=None, **kwargs): """Generate the network configuration based on standard formula. Parameters ---------- num_hosts : int number of hosts to include in network (minimum is 3) num_services : int number of services running on network (minimum is 1) num_os : int, optional number of OS running on network (minimum is 1) (default=2) num_processes : int, optional number of processes running on hosts on network (minimum is 1) (default=2) num_exploits : int, optional number of exploits to use. minimum is 1. If None will use num_services (default=None) num_privescs : int, optional number of privilege escalation actions to use. minimum is 1. If None will use num_processes (default=None) r_sensitive : float, optional reward for sensitive subnet documents (default=10) r_user : float, optional reward for user subnet documents (default=10) exploit_cost : int or float, optional cost for an exploit (default=1) exploit_probs : None, float, list of floats or "mixed", optional success probability of exploits (default=1.0) privesc_cost : int or float, optional cost for an privilege escalation action (default=1) privesc_probs : None, float, list of floats, optional success probability of privilege escalation actions (default=1.0) service_scan_cost : int or float, optional cost for a service scan (default=1) os_scan_cost : int or float, optional cost for an os scan (default=1) subnet_scan_cost : int or float, optional cost for a subnet scan (default=1) process_scan_cost : int or float, optional cost for a process scan (default=1) uniform : bool, optional whether to use uniform distribution or correlated host configs (default=False) alpha_H : float, optional (only used when uniform=False) Scaling/concentration parameter for controlling corelation between host configurations (must be > 0) (default=2.0) alpha_V : float, optional (only used when uniform=False) scaling/concentration parameter for controlling corelation between services across host configurations (must be > 0) (default=2.0) lambda_V : float, optional (only used when uniform=False) parameter for controlling average number of services running per host configuration (must be > 0) (default=1.0) restrictiveness : int, optional max number of services allowed to pass through firewalls between zones (default=5) random_goal : bool, optional whether to randomly assign the goal user host or not (default=False) base_host_value : int, optional, value of non sensitive hosts (default=1) host_discovery_value : int, optional value of discovering a host for the first time (default=1) seed : int, optional random number generator seed (default=None) name : str, optional name of the scenario, if None one will be generated (default=None) step_limit : int, optional max number of steps permitted in a single episode, if None there is no limit (default=None) address_space_bounds : (int, int), optional bounds for the (subnet#, host#) address space. If None bounds will be determined by the number of subnets in the scenario and the max number of hosts in any subnet. Returns ------- Scenario scenario description """ assert 0 < num_services assert 2 < num_hosts assert 0 < num_processes assert num_exploits is None or 0 < num_exploits assert num_privescs is None or 0 < num_privescs assert 0 < num_os assert 0 < r_sensitive and 0 < r_user assert 0 < alpha_H and 0 < alpha_V and 0 < lambda_V assert 0 < restrictiveness if seed is not None: np.random.seed(seed) if num_exploits is None: num_exploits = num_services if num_privescs is None: num_privescs = num_processes self._generate_subnets(num_hosts) self._generate_topology() self._generate_address_space_bounds(address_space_bounds) self._generate_os(num_os) self._generate_services(num_services) self._generate_processes(num_processes) self._generate_exploits(num_exploits, exploit_cost, exploit_probs) self._generate_privescs(num_privescs, privesc_cost, privesc_probs) self._generate_sensitive_hosts(r_sensitive, r_user, random_goal) self.base_host_value = base_host_value self.host_discovery_value = host_discovery_value if uniform: self._generate_uniform_hosts() else: self._generate_correlated_hosts(alpha_H, alpha_V, lambda_V) self._ensure_host_vulnerability() self._generate_firewall(restrictiveness) self.service_scan_cost = service_scan_cost self.os_scan_cost = os_scan_cost self.subnet_scan_cost = subnet_scan_cost self.process_scan_cost = process_scan_cost if name is None: name = f"gen_H{num_hosts}_E{num_exploits}_S{num_services}" self.name = name self.step_limit = step_limit return self._construct_scenario() def _construct_scenario(self): scenario_dict = dict() scenario_dict[u.SUBNETS] = self.subnets scenario_dict[u.ADDRESS_SPACE_BOUNDS] = self.address_space_bounds scenario_dict[u.TOPOLOGY] = self.topology scenario_dict[u.SERVICES] = self.services scenario_dict[u.PROCESSES] = self.processes scenario_dict[u.OS] = self.os scenario_dict[u.SENSITIVE_HOSTS] = self.sensitive_hosts scenario_dict[u.EXPLOITS] = self.exploits scenario_dict[u.PRIVESCS] = self.privescs scenario_dict[u.SERVICE_SCAN_COST] = self.service_scan_cost scenario_dict[u.OS_SCAN_COST] = self.os_scan_cost scenario_dict[u.SUBNET_SCAN_COST] = self.subnet_scan_cost scenario_dict[u.PROCESS_SCAN_COST] = self.process_scan_cost scenario_dict[u.FIREWALL] = self.firewall scenario_dict[u.HOSTS] = self.hosts scenario_dict[u.STEP_LIMIT] = self.step_limit scenario = Scenario( scenario_dict, name=self.name, generated=True ) return scenario def _generate_subnets(self, num_hosts): # Internet (0) and sensitive (2) subnets both start with 1 host subnets = [1] # For every HOST_ASSIGNMENT_PERIOD hosts we have: # first host assigned to DMZ (1), dmz_hosts = math.ceil(num_hosts / HOST_ASSIGNMENT_PERIOD) subnets.append(dmz_hosts) # second host assigned sensitive (2) sensitive_hosts = math.ceil(num_hosts / (HOST_ASSIGNMENT_PERIOD+1)) subnets.append(sensitive_hosts) # remainder of hosts go into user subnet tree num_user_hosts = num_hosts - dmz_hosts - sensitive_hosts num_full_user_subnets = num_user_hosts // USER_SUBNET_SIZE subnets += [USER_SUBNET_SIZE] * num_full_user_subnets if (num_user_hosts % USER_SUBNET_SIZE) != 0: subnets.append(num_user_hosts % USER_SUBNET_SIZE) self.subnets = subnets def _generate_topology(self): # including internet subnet num_subnets = len(self.subnets) topology = np.zeros((num_subnets, num_subnets)) # DMZ subnet is connected to sensitive and first user subnet and also # to internet for row in range(USER + 1): for col in range(USER + 1): if row == u.INTERNET and col > DMZ: continue if row > DMZ and col == u.INTERNET: continue topology[row][col] = 1 if num_subnets == USER + 1: self.topology = topology return # all other subnets are part of user binary tree for row in range(USER, num_subnets): # subnet connected to itself topology[row][row] = 1 # position in tree pos = row - USER if pos > 0: parent = ((pos - 1) // 2) + 3 topology[row][parent] = 1 child_left = ((2 * pos) + 1) + 3 child_right = ((2 * pos) + 2) + 3 if child_left < num_subnets: topology[row][child_left] = 1 if child_right < num_subnets: topology[row][child_right] = 1 self.topology = topology def _generate_address_space_bounds(self, address_space_bounds): if address_space_bounds is None: address_space_bounds = (len(self.subnets), max(self.subnets)) err_msg = ( "address_space_bounds must be None or a tuple/list of length 2" f"containing positive ints. '{address_space_bounds}' is invalid" ) assert isinstance(address_space_bounds, (tuple, list)), err_msg address_space_bounds = tuple(address_space_bounds) assert len(address_space_bounds) == 2, err_msg for val in address_space_bounds: assert isinstance(val, int) and 0 < val, err_msg assert address_space_bounds[0] >= len(self.subnets), \ ("Number of subnets in address bound must be >= number of subnets" f" in the scenario. '{address_space_bounds[0]}' is invalid") assert address_space_bounds[1] >= max(self.subnets), \ ("Number of hosts in address bound must be >= number of hosts " " in the largest subnet in the scenario. " f"'{address_space_bounds[1]}' is invalid") self.address_space_bounds = address_space_bounds def _generate_os(self, num_os): self.os = [f"os_{i}" for i in range(num_os)] def _generate_services(self, num_services): self.services = [f"srv_{s}" for s in range(num_services)] def _generate_processes(self, num_processes): self.processes = [f"proc_{s}" for s in range(num_processes)] def _generate_exploits(self, num_exploits, exploit_cost, exploit_probs): exploits = {} exploit_probs = self._get_action_probs(num_exploits, exploit_probs) # add None since some exploits might work for all OS possible_os = self.os + [None] # we create one exploit per service exploits_added = 0 while exploits_added < num_exploits: srv = np.random.choice(self.services) os = np.random.choice(possible_os) al = np.random.randint(u.USER_ACCESS, u.ROOT_ACCESS+1) e_name = f"e_{srv}" if os is not None: e_name += f"_{os}" if e_name not in exploits: exploits[e_name] = { u.EXPLOIT_SERVICE: srv, u.EXPLOIT_OS: os, u.EXPLOIT_PROB: exploit_probs[exploits_added], u.EXPLOIT_COST: exploit_cost, u.EXPLOIT_ACCESS: al } exploits_added += 1 self.exploits = exploits def _generate_privescs(self, num_privesc, privesc_cost, privesc_probs): privescs = {} privesc_probs = self._get_action_probs(num_privesc, privesc_probs) # add None since some privesc might work for all OS possible_os = self.os + [None] # need to ensure there is a privesc for each OS, # or >= 1 OS agnostic privesc # This ensures we can make it possible to get ROOT access on a # host, independendent of the exploit the host is vulnerable too if num_privesc < len(self.os): os_choices = [None] os_choices.extend( list(np.random.choice(possible_os, num_privesc-1)) ) else: while True: os_choices = list( np.random.choice(possible_os, num_privesc) ) if None in os_choices \ or all([os in os_choices for os in self.os]): break # we create one exploit per service privescs_added = 0 while privescs_added < num_privesc: proc = np.random.choice(self.processes) os = os_choices[privescs_added] pe_name = f"pe_{proc}" if os is not None: pe_name += f"_{os}" if pe_name not in privescs: privescs[pe_name] = { u.PRIVESC_PROCESS: proc, u.PRIVESC_OS: os, u.PRIVESC_PROB: privesc_probs[privescs_added], u.PRIVESC_COST: privesc_cost, u.PRIVESC_ACCESS: u.ROOT_ACCESS } privescs_added += 1 self.privescs = privescs def _get_action_probs(self, num_actions, action_probs): if action_probs is None: action_probs = np.random.random_sample(num_actions) elif action_probs == 'mixed': # success probability of low, med, high attack complexity if num_actions == 1: # for case where only 1 service ignore low probability actions # since could lead to unnecessarily long attack paths levels = [0.6, 0.9] probs = [0.5, 0.5] else: levels = [0.3, 0.6, 0.9] probs = [0.2, 0.4, 0.4] action_probs = np.random.choice(levels, num_actions, p=probs) elif type(action_probs) is list: assert len(action_probs) == num_actions, \ ("Length of action probability list must equal number of" " exploits") for a in action_probs: assert 0.0 < a <= 1.0, \ "Action probabilities in list must be in (0.0, 1.0]" else: assert isinstance(action_probs, float), \ ("Action probabilities must be float, list of floats or " "'mixed' (exploit only)") assert 0.0 < action_probs <= 1.0, \ "Action probability float must be in (0.0, 1.0]" action_probs = [action_probs] * num_actions return action_probs def _generate_sensitive_hosts(self, r_sensitive, r_user, random_goal): sensitive_hosts = {} # first sensitive host is first host in SENSITIVE network sensitive_hosts[(SENSITIVE, 0)] = r_sensitive # second sensitive host in USER network if random_goal and len(self.subnets) > SENSITIVE: # randomly choose user host to be goal subnet_id = np.random.randint(USER, len(self.subnets)) host_id = np.random.randint(0, self.subnets[subnet_id]) sensitive_hosts[(subnet_id, host_id)] = r_user else: # second last host in USER network is goal sensitive_hosts[(len(self.subnets)-1, self.subnets[-1]-1)] = r_user self.sensitive_hosts = sensitive_hosts def _generate_uniform_hosts(self): hosts = dict() srv_config_set, proc_config_set = self._possible_host_configs() num_srv_configs = len(srv_config_set) num_proc_configs = len(proc_config_set) for subnet, size in enumerate(self.subnets): if subnet == u.INTERNET: continue for h in range(size): srv_cfg = srv_config_set[np.random.choice(num_srv_configs)] srv_cfg = self._convert_to_service_map(srv_cfg) proc_cfg = proc_config_set[np.random.choice(num_proc_configs)] proc_cfg = self._convert_to_process_map(proc_cfg) os = np.random.choice(self.os) os_cfg = self._convert_to_os_map(os) address = (subnet, h) value = self._get_host_value(address) host = Host( address=address, os=os_cfg.copy(), services=srv_cfg.copy(), processes=proc_cfg.copy(), firewall={}, value=value, discovery_value=self.host_discovery_value ) hosts[address] = host self.hosts = hosts def _possible_host_configs(self): """Generate set of all possible host service and process configurations based on number of services and processes in environment. Note: Each host is vulnerable to at least one exploit and one privesc, so there is no configuration where all services and processes are absent. Returns ------- list[list] all possible service configurations, where each configuration is a list of bools corresponding to the presence or absence of a service list[list] all possible process configurations, same as above except for processes """ # remove last permutation which is all False srv_configs = self._permutations(len(self.services))[:-1] proc_configs = self._permutations(len(self.processes))[:-1] return srv_configs, proc_configs def _permutations(self, n): """Generate list of all possible permutations of n bools N.B First permutation in list is always the all True permutation and final permutation in list is always the all False permutationself. perms[1] = [True, ..., True] perms[-1] = [False, ..., False] Parameters ---------- n : int bool list length Returns ------- perms : list[list] all possible permutations of n bools """ # base cases if n <= 0: return [] if n == 1: return [[True], [False]] perms = [] for p in self._permutations(n - 1): perms.append([True] + p) perms.append([False] + p) return perms def _generate_correlated_hosts(self, alpha_H, alpha_V, lambda_V): hosts = dict() prev_configs = [] prev_os = [] prev_srvs = [] prev_procs = [] host_num = 0 for subnet, size in enumerate(self.subnets): if subnet == u.INTERNET: continue for m in range(size): os, services, processes = self._get_host_config( host_num, alpha_H, prev_configs, alpha_V, lambda_V, prev_os, prev_srvs, prev_procs ) os_cfg = self._convert_to_os_map(os) service_cfg = self._convert_to_service_map(services) process_cfg = self._convert_to_process_map(processes) host_num += 1 address = (subnet, m) value = self._get_host_value(address) host = Host( address=address, os=os_cfg.copy(), services=service_cfg.copy(), processes=process_cfg.copy(), firewall={}, value=value, discovery_value=self.host_discovery_value ) hosts[address] = host self.hosts = hosts def _get_host_config(self, host_num, alpha_H, prev_configs, alpha_V, lambda_V, prev_os, prev_srvs, prev_procs): """Select a host configuration from all possible configurations based using a Nested Dirichlet Process """ if host_num == 0 \ or np.random.rand() < (alpha_H / (alpha_H + host_num - 1)): # if first host or with prob proportional to alpha_H # choose new config new_config = self._sample_config( alpha_V, prev_srvs, lambda_V, prev_os, prev_procs ) else: # sample uniformly from previous sampled configs new_config = prev_configs[np.random.choice(len(prev_configs))] prev_configs.append(new_config) return new_config def _sample_config(self, alpha_V, prev_srvs, lambda_V, prev_os, prev_procs): """Sample a host configuration from all possible configurations based using a Dirichlet Process """ os = self._dirichlet_sample( alpha_V, self.os, prev_os ) new_services_cfg = self._dirichlet_process( alpha_V, lambda_V, len(self.services), prev_srvs ) new_process_cfg = self._dirichlet_process( alpha_V, lambda_V, len(self.processes), prev_procs ) return os, new_services_cfg, new_process_cfg def _dirichlet_process(self, alpha_V, lambda_V, num_options, prev_vals): """Sample from all possible configurations using Dirichlet Process """ # no options present by default new_cfg = [False for i in range(num_options)] # randomly get number of times to sample using poission dist with # minimum 1 option choice n = max(np.random.poisson(lambda_V), 1) # draw n samples from Dirichlet Process # (alpha_V, uniform dist of services) for i in range(n): if i == 0 or np.random.rand() < (alpha_V / (alpha_V + i - 1)): # draw randomly from uniform dist over services x = np.random.randint(0, num_options) else: # draw uniformly at random from previous choices x = np.random.choice(prev_vals) new_cfg[x] = True prev_vals.append(x) return new_cfg def _dirichlet_sample(self, alpha_V, choices, prev_vals): """Sample single choice using dirichlet process """ # sample an os from Dirichlet Process (alpha_V, uniform dist of OSs) if len(prev_vals) == 0 \ or np.random.rand() < (alpha_V / (alpha_V - 1)): # draw randomly from uniform dist over services choice = np.random.choice(choices) else: # draw uniformly at random from previous choices choice = np.random.choice(prev_vals) prev_vals.append(choice) return choice def _is_sensitive_host(self, addr): return addr in self.sensitive_hosts def _convert_to_service_map(self, config): """Converts list of bools to a map from service name -> bool """ service_map = {} for srv, val in zip(self.services, config): service_map[srv] = val return service_map def _convert_to_process_map(self, config): """Converts list of bools to a map from process name -> bool """ process_map = {} for proc, val in zip(self.processes, config): process_map[proc] = val return process_map def _convert_to_os_map(self, os): """Converts an OS string to a map from os name -> bool N.B. also adds an entry for None os, which makes it easier for vectorizing and checking if an exploit will work (since exploits can have os=None) """ os_map = {} for os_name in self.os: os_map[os_name] = os_name == os return os_map def _ensure_host_vulnerability(self): """Ensures each subnet has at least one vulnerable host and all sensitive hosts are vulnerable """ vulnerable_subnets = set() for host_addr, host in self.hosts.items(): if not self._is_sensitive_host(host_addr) \ and host_addr[0] in vulnerable_subnets: continue if self._is_sensitive_host(host_addr): if not self._host_is_vulnerable(host, u.ROOT_ACCESS): self._update_host_to_vulnerable(host, u.ROOT_ACCESS) vulnerable_subnets.add(host_addr[0]) elif self._host_is_vulnerable(host): vulnerable_subnets.add(host_addr[0]) for subnet, size in enumerate(self.subnets): if subnet in vulnerable_subnets or subnet == u.INTERNET: continue host_num = np.random.randint(size) host = self.hosts[(subnet, host_num)] self._update_host_to_vulnerable(host) vulnerable_subnets.add(subnet) def _host_is_vulnerable(self, host, access_level=u.USER_ACCESS): for e_def in self.exploits.values(): if self._host_is_vulnerable_to_exploit(host, e_def): if e_def[u.EXPLOIT_ACCESS] >= access_level: return True for pe_def in self.privescs.values(): if self._host_is_vulnerable_to_privesc(host, pe_def): return True return False def _host_is_vulnerable_to_exploit(self, host, exploit_def): e_srv = exploit_def[u.EXPLOIT_SERVICE] e_os = exploit_def[u.EXPLOIT_OS] if not host.services[e_srv]: return False return e_os is None or host.os[e_os] def _host_is_vulnerable_to_privesc(self, host, privesc_def): pe_proc = privesc_def[u.PRIVESC_PROCESS] pe_os = privesc_def[u.PRIVESC_OS] if not host.processes[pe_proc]: return False return pe_os is None or host.os[pe_os] def _update_host_to_vulnerable(self, host, access_level=u.USER_ACCESS): """Update host config so it's vulnerable to at least one exploit """ # choose an exploit randomly and make host vulnerable to it # will retry X times before giving up # If vulnerable config is not found in X tries then the scenario # probably needs more options (processes, privesc actions) for i in range(VUL_RETRIES): success, e_def = self._update_host_exploit_vulnerability( host, False ) # don't need to check success since should always succeed # in finding exploit, when there is no contraint on OS if e_def[u.EXPLOIT_ACCESS] >= access_level: return # Need to ensure host is now vulnerable to >= 1 privesc action success, pe_def = self._update_host_privesc_vulnerability( host, True ) if success: return raise AssertionError( "After {VUL_RETRIES}, unable to find privilege escalation action" " for target OS, when looking for vulnerable host configuration," " try again using more privilege escalation actions or processes" ) def _update_host_exploit_vulnerability(self, host, os_constraint): # choose an exploit randomly and make host vulnerable to it if not os_constraint: # can change host OS, so all exploits valid valid_e = list(self.exploits.values()) else: # exploits must match OS of host, or be OS agnostic # since cannot change host OS valid_e = [] for e_def in self.exploits.values(): e_os = e_def[u.EXPLOIT_OS] if e_os is None or host.os[e_os]: valid_e.append(e_def) if len(valid_e) == 0: return False, None e_def = np.random.choice(valid_e) host.services[e_def[u.EXPLOIT_SERVICE]] = True if e_def[u.EXPLOIT_OS] is not None and not os_constraint: self._update_host_os(host, e_def[u.EXPLOIT_OS]) return True, e_def def _update_host_privesc_vulnerability(self, host, os_constraint): # choose an exploit randomly and make host vulnerable to it if not os_constraint: # no OS constraint valid_pe = list(self.privescs.values()) else: valid_pe = [] for pe_def in self.privescs.values(): pe_os = pe_def[u.PRIVESC_OS] if pe_os is None or host.os[pe_os]: valid_pe.append(pe_def) if len(valid_pe) == 0: return False, None pe_def = np.random.choice(valid_pe) host.processes[pe_def[u.PRIVESC_PROCESS]] = True if pe_def[u.PRIVESC_OS] is not None and not os_constraint: self._update_host_os(host, pe_def[u.PRIVESC_OS]) return True, pe_def def _update_host_os(self, host, os): # must set all to false first, so only one host OS is true for os_name in host.os.keys(): host.os[os_name] = False host.os[os] = True def _get_host_value(self, address): return float(self.sensitive_hosts.get(address, self.base_host_value)) def _generate_firewall(self, restrictiveness): """Generate the firewall rules. Parameters ---------- restrictiveness : int parameter that controls how many services are blocked by firewall between zones (i.e. between internet, DMZ, sensitive and user zones). Returns ------- dict firewall rules that are a mapping from (src, dest) connection to set of allowed services, which defines for each service whether traffic using that service is allowed between pairs of subnets. Notes ----- Traffic from at least one service running on each subnet will be allowed between each zone. This may mean more services will be allowed than restrictiveness parameter. """ num_subnets = len(self.subnets) firewall = {} # find services running on each subnet that are vulnerable subnet_services = {} subnet_services[u.INTERNET] = set() for host_addr, host in self.hosts.items(): subnet = host_addr[0] if subnet not in subnet_services: subnet_services[subnet] = set() for e_def in self.exploits.values(): if self._host_is_vulnerable_to_exploit(host, e_def): subnet_services[subnet].add(e_def[u.EXPLOIT_SERVICE]) for src in range(num_subnets): for dest in range(num_subnets): if src == dest or not self.topology[src][dest]: # no inter subnet connection so no firewall continue elif src > SENSITIVE and dest > SENSITIVE: # all services allowed between user subnets allowed = set(self.services) firewall[(src, dest)] = allowed continue # else src and dest in different zones => block services based # on restrictiveness dest_avail = subnet_services[dest].copy() if len(dest_avail) < restrictiveness: # restrictiveness not limiting allowed traffic, all # services allowed firewall[(src, dest)] = dest_avail.copy() continue # add at least one service to allowed service dest_allowed = np.random.choice(list(dest_avail)) # for dest subnet choose available services upto # restrictiveness limit or all services dest_avail.remove(dest_allowed) allowed = set() allowed.add(dest_allowed) while len(allowed) < restrictiveness: dest_allowed = np.random.choice(list(dest_avail)) if dest_allowed not in allowed: allowed.add(dest_allowed) dest_avail.remove(dest_allowed) firewall[(src, dest)] = allowed self.firewall = firewall ================================================ FILE: nasim/scenarios/host.py ================================================ class Host: """A single host in the network. Note this class is mainly used to store initial scenario data for a host. The HostVector class is used to store and track the current state of a host (for efficiency and ease of use reasons). """ def __init__(self, address, os, services, processes, firewall, value=0.0, discovery_value=0.0, compromised=False, reachable=False, discovered=False, access=0): """ Arguments --------- address : (int, int) address of host as (subnet, id) os : dict A os_name: bool dictionary indicating which OS the host is runinng services : dict a (service_name, bool) dictionary indicating which services are present/absent processes : dict a (process_name, bool) dictionary indicating which processes are running on host or not firewall : dict a (addr, denied services) dictionary defining which services are blocked from other hosts in the network. If other host not in firewall assumes all services allowed value : float, optional value of the host (default=0.0) discovery_value : float, optional the reward gained for discovering the host (default=0.0) compromised : bool, optional whether host has been compromised or not (default=False) reachable : bool, optional whether host is reachable by attacker or not (default=False) discovered : bool, optional whether host has been reachable discovered by attacker or not (default=False) access : int, optional access level of attacker on host (default=0) """ self.address = address self.os = os self.services = services self.processes = processes self.firewall = firewall self.value = value self.discovery_value = discovery_value self.compromised = compromised self.reachable = reachable self.discovered = discovered self.access = access def is_running_service(self, service): return self.services[service] def is_running_os(self, os): return self.os[os] def is_running_process(self, process): return self.processes[process] def traffic_permitted(self, addr, service): return service not in self.firewall.get(addr, []) def __str__(self): output = ["Host: {"] output.append(f"\taddress: {self.address}") output.append(f"\tcompromised: {self.compromised}") output.append(f"\treachable: {self.reachable}") output.append(f"\tvalue: {self.value}") output.append(f"\taccess: {self.access}") output.append("\tOS: {") for os_name, val in self.os.items(): output.append(f"\t\t{os_name}: {val}") output.append("\t}") output.append("\tservices: {") for name, val in self.services.items(): output.append(f"\t\t{name}: {val}") output.append("\t}") output.append("\tprocesses: {") for name, val in self.processes.items(): output.append(f"\t\t{name}: {val}") output.append("\t}") output.append("\tfirewall: {") for addr, val in self.firewall.items(): output.append(f"\t\t{addr}: {val}") output.append("\t}") return "\n".join(output) def __repr__(self): return f"Host: {self.address}" ================================================ FILE: nasim/scenarios/loader.py ================================================ """This module contains functionality for loading network scenarios from yaml files. """ import math import nasim.scenarios.utils as u from nasim.scenarios import Scenario from nasim.scenarios.host import Host # dictionary of valid key names and value types for config file VALID_CONFIG_KEYS = { u.SUBNETS: list, u.TOPOLOGY: list, u.SENSITIVE_HOSTS: dict, u.OS: list, u.SERVICES: list, u.PROCESSES: list, u.EXPLOITS: dict, u.PRIVESCS: dict, u.SERVICE_SCAN_COST: (int, float), u.SUBNET_SCAN_COST: (int, float), u.OS_SCAN_COST: (int, float), u.PROCESS_SCAN_COST: (int, float), u.HOST_CONFIGS: dict, u.FIREWALL: dict } OPTIONAL_CONFIG_KEYS = {u.STEP_LIMIT: int} VALID_ACCESS_VALUES = ["user", "root", u.USER_ACCESS, u.ROOT_ACCESS] ACCESS_LEVEL_MAP = { "user": u.USER_ACCESS, "root": u.ROOT_ACCESS } # required keys for exploits EXPLOIT_KEYS = { u.EXPLOIT_SERVICE: str, u.EXPLOIT_OS: str, u.EXPLOIT_PROB: (int, float), u.EXPLOIT_COST: (int, float), u.EXPLOIT_ACCESS: (str, int) } # required keys for privesc actions PRIVESC_KEYS = { u.PRIVESC_OS: str, u.PRIVESC_PROCESS: str, u.PRIVESC_PROB: (int, float), u.PRIVESC_COST: (int, float), u.PRIVESC_ACCESS: (str, int) } # required keys for host configs HOST_CONFIG_KEYS = { u.HOST_OS: (str, None), u.HOST_SERVICES: list, u.HOST_PROCESSES: list } class ScenarioLoader: def load(self, file_path, name=None): """Load the scenario from file Arguments --------- file_path : str path to scenario file name : str, optional the scenarios name, if None name will be generated from file path (default=None) Returns ------- scenario_dict : dict dictionary with scenario definition Raises ------ Exception If file unable to load or scenario file is invalid. """ self.yaml_dict = u.load_yaml(file_path) if name is None: name = u.get_file_name(file_path) self.name = name self._check_scenario_sections_valid() self._parse_subnets() self._parse_topology() self._parse_os() self._parse_services() self._parse_processes() self._parse_sensitive_hosts() self._parse_exploits() self._parse_privescs() self._parse_scan_costs() self._parse_host_configs() self._parse_firewall() self._parse_hosts() self._parse_step_limit() return self._construct_scenario() def _construct_scenario(self): scenario_dict = dict() scenario_dict[u.SUBNETS] = self.subnets scenario_dict[u.TOPOLOGY] = self.topology scenario_dict[u.OS] = self.os scenario_dict[u.SERVICES] = self.services scenario_dict[u.PROCESSES] = self.processes scenario_dict[u.SENSITIVE_HOSTS] = self.sensitive_hosts scenario_dict[u.EXPLOITS] = self.exploits scenario_dict[u.PRIVESCS] = self.privescs scenario_dict[u.OS_SCAN_COST] = self.os_scan_cost scenario_dict[u.SERVICE_SCAN_COST] = self.service_scan_cost scenario_dict[u.SUBNET_SCAN_COST] = self.subnet_scan_cost scenario_dict[u.PROCESS_SCAN_COST] = self.process_scan_cost scenario_dict[u.FIREWALL] = self.firewall scenario_dict[u.HOSTS] = self.hosts scenario_dict[u.STEP_LIMIT] = self.step_limit return Scenario( scenario_dict, name=self.name, generated=False ) def _check_scenario_sections_valid(self): """Checks if scenario dictionary contains all required sections and they are valid type. """ # 0. check correct number of keys assert len(self.yaml_dict) >= len(VALID_CONFIG_KEYS), \ (f"Too few config file keys: {len(self.yaml_dict)} " f"< {len(VALID_CONFIG_KEYS)}") # 1. check keys are valid and values are correct type for k, v in self.yaml_dict.items(): assert k in VALID_CONFIG_KEYS or k in OPTIONAL_CONFIG_KEYS, \ f"{k} not a valid config file key" if k in VALID_CONFIG_KEYS: expected_type = VALID_CONFIG_KEYS[k] else: expected_type = OPTIONAL_CONFIG_KEYS[k] assert isinstance(v, expected_type), \ (f"{v} invalid type for config file key '{k}': {type(v)}" f" != {expected_type}") def _parse_subnets(self): subnets = self.yaml_dict[u.SUBNETS] self._validate_subnets(subnets) # insert internet subnet subnets.insert(0, 1) self.subnets = subnets self.num_hosts = sum(subnets)-1 def _validate_subnets(self, subnets): # check subnets is valid list of positive ints assert len(subnets) > 0, "Subnets cannot be empty list" for subnet_size in subnets: assert type(subnet_size) is int and subnet_size > 0, \ f"{subnet_size} invalid subnet size, must be positive int" def _parse_topology(self): topology = self.yaml_dict[u.TOPOLOGY] self._validate_topology(topology) self.topology = topology def _validate_topology(self, topology): # check topology is valid adjacency matrix assert len(topology) == len(self.subnets), \ ("Number of rows in topology adjacency matrix must equal " f"number of subnets: {len(topology)} != {len(self.subnets)}") for row in topology: assert isinstance(row, list), \ "topology must be 2D adjacency matrix (i.e. list of lists)" assert len(row) == len(self.subnets), \ ("Number of columns in topology matrix must equal number of" f" subnets: {len(topology)} != {len(self.subnets)}") for col in row: assert isinstance(col, int) and (col == 1 or col == 0), \ ("Subnet_connections adjaceny matrix must contain only" f" 1 (connected) or 0 (not connected): {col} invalid") def _parse_os(self): os = self.yaml_dict[u.OS] self._validate_os(os) self.os = os def _validate_os(self, os): assert len(os) > 0, \ f"{len(os)}. Invalid number of OSs, must be >= 1" assert len(os) == len(set(os)), \ f"{os}. OSs must not contain duplicates" def _parse_services(self): services = self.yaml_dict[u.SERVICES] self._validate_services(services) self.services = services def _validate_services(self, services): assert len(services) > 0, \ f"{len(services)}. Invalid number of services, must be > 0" assert len(services) == len(set(services)), \ f"{services}. Services must not contain duplicates" def _parse_processes(self): processes = self.yaml_dict[u.PROCESSES] self._validate_processes(processes) self.processes = processes def _validate_processes(self, processes): assert len(processes) >= 1, \ f"{len(processes)}. Invalid number of services, must be > 0" assert len(processes) == len(set(processes)), \ f"{processes}. Processes must not contain duplicates" def _parse_sensitive_hosts(self): sensitive_hosts = self.yaml_dict[u.SENSITIVE_HOSTS] self._validate_sensitive_hosts(sensitive_hosts) self.sensitive_hosts = dict() for address, value in sensitive_hosts.items(): self.sensitive_hosts[eval(address)] = value def _validate_sensitive_hosts(self, sensitive_hosts): # check sensitive_hosts is valid dict of (subnet, id) : value assert len(sensitive_hosts) > 0, \ ("Number of sensitive hosts must be >= 1: " f"{len(sensitive_hosts)} not >= 1") assert len(sensitive_hosts) <= self.num_hosts, \ ("Number of sensitive hosts must be <= total number of " f"hosts: {len(sensitive_hosts)} not <= {self.num_hosts}") # sensitive hosts must be valid address for address, value in sensitive_hosts.items(): subnet_id, host_id = eval(address) assert self._is_valid_subnet_ID(subnet_id), \ ("Invalid sensitive host tuple: subnet_id must be a valid" f" subnet: {subnet_id} != non-negative int less than " f"{len(self.subnets) + 1}") assert self._is_valid_host_address(subnet_id, host_id), \ ("Invalid sensitive host tuple: host_id must be a valid" f" int: {host_id} != non-negative int less than" f" {self.subnets[subnet_id]}") assert isinstance(value, (float, int)) and value > 0, \ (f"Invalid sensitive host tuple: invalid value: {value}" f" != a positive int or float") # 5.c sensitive hosts must not contain duplicate addresses for i, m in enumerate(sensitive_hosts.keys()): h1_addr = eval(m) for j, n in enumerate(sensitive_hosts.keys()): if i == j: continue h2_addr = eval(n) assert h1_addr != h2_addr, \ ("Sensitive hosts list must not contain duplicate host " f"addresses: {m} == {n}") def _is_valid_subnet_ID(self, subnet_ID): if type(subnet_ID) is not int \ or subnet_ID < 1 \ or subnet_ID > len(self.subnets): return False return True def _is_valid_host_address(self, subnet_ID, host_ID): if not self._is_valid_subnet_ID(subnet_ID): return False if type(host_ID) is not int \ or host_ID < 0 \ or host_ID >= self.subnets[subnet_ID]: return False return True def _parse_exploits(self): exploits = self.yaml_dict[u.EXPLOITS] self._validate_exploits(exploits) self.exploits = exploits def _validate_exploits(self, exploits): for e_name, e in exploits.items(): self._validate_single_exploit(e_name, e) def _validate_single_exploit(self, e_name, e): assert isinstance(e, dict), \ f"{e_name}. Exploit must be a dict." for k, t in EXPLOIT_KEYS.items(): assert k in e, f"{e_name}. Exploit missing key: '{k}'" assert isinstance(e[k], t), \ f"{e_name}. Exploit '{k}' incorrect type. Expected {t}" assert e[u.EXPLOIT_SERVICE] in self.services, \ (f"{e_name}. Exploit target service invalid: " f"'{e[u.EXPLOIT_SERVICE]}'") if str(e[u.EXPLOIT_OS]).lower() == "none": e[u.EXPLOIT_OS] = None assert e[u.EXPLOIT_OS] is None or e[u.EXPLOIT_OS] in self.os, \ (f"{e_name}. Exploit target OS is invalid. '{e[u.EXPLOIT_OS]}'." " Should be None or one of the OS in the os list.") assert 0 <= e[u.EXPLOIT_PROB] < 1, \ (f"{e_name}. Exploit probability, '{e[u.EXPLOIT_PROB]}' not " "a valid probability") assert e[u.EXPLOIT_COST] > 0, f"{e_name}. Exploit cost must be > 0." assert e[u.EXPLOIT_ACCESS] in VALID_ACCESS_VALUES, \ (f"{e_name}. Exploit access value '{e[u.EXPLOIT_ACCESS]}' " f"invalid. Must be one of {VALID_ACCESS_VALUES}") if isinstance(e[u.EXPLOIT_ACCESS], str): e[u.EXPLOIT_ACCESS] = ACCESS_LEVEL_MAP[e[u.EXPLOIT_ACCESS]] def _parse_privescs(self): self.privescs = self.yaml_dict[u.PRIVESCS] self._validate_privescs(self.privescs) def _validate_privescs(self, privescs): for pe_name, pe in privescs.items(): self._validate_single_privesc(pe_name, pe) def _validate_single_privesc(self, pe_name, pe): s_name = "Priviledge Escalation" assert isinstance(pe, dict), f"{pe_name}. {s_name} must be a dict." for k, t in PRIVESC_KEYS.items(): assert k in pe, f"{pe_name}. {s_name} missing key: '{k}'" assert isinstance(pe[k], t), \ (f"{pe_name}. {s_name} '{k}' incorrect type. Expected {t}") assert pe[u.PRIVESC_PROCESS] in self.processes, \ (f"{pe_name}. {s_name} target process invalid: " f"'{pe[u.PRIVESC_PROCESS]}'") if str(pe[u.PRIVESC_OS]).lower() == "none": pe[u.PRIVESC_OS] = None assert pe[u.PRIVESC_OS] is None or pe[u.PRIVESC_OS] in self.os, \ (f"{pe_name}. {s_name} target OS is invalid. '{pe[u.PRIVESC_OS]}'." f" Should be None or one of the OS in the os list.") assert 0 <= pe[u.PRIVESC_PROB] <= 1.0, \ (f"{pe_name}. {s_name} probability, '{pe[u.PRIVESC_PROB]}' not " "a valid probability") assert pe[u.PRIVESC_COST] > 0, \ f"{pe_name}. {s_name} cost must be > 0." assert pe[u.PRIVESC_ACCESS] in VALID_ACCESS_VALUES, \ (f"{pe_name}. {s_name} access value '{pe[u.PRIVESC_ACCESS]}' " f"invalid. Must be one of {VALID_ACCESS_VALUES}") if isinstance(pe[u.PRIVESC_ACCESS], str): pe[u.PRIVESC_ACCESS] = ACCESS_LEVEL_MAP[pe[u.PRIVESC_ACCESS]] def _parse_scan_costs(self): self.os_scan_cost = self.yaml_dict[u.OS_SCAN_COST] self.service_scan_cost = self.yaml_dict[u.SERVICE_SCAN_COST] self.subnet_scan_cost = self.yaml_dict[u.SUBNET_SCAN_COST] self.process_scan_cost = self.yaml_dict[u.PROCESS_SCAN_COST] for (n, c) in [ ("OS", self.os_scan_cost), ("Service", self.service_scan_cost), ("Subnet", self.subnet_scan_cost), ("Process", self.process_scan_cost) ]: self._validate_scan_cost(n, c) def _validate_scan_cost(self, scan_name, scan_cost): assert scan_cost >= 0, f"{scan_name} Scan Cost must be >= 0." def _parse_host_configs(self): self.host_configs = self.yaml_dict[u.HOST_CONFIGS] self._validate_host_configs(self.host_configs) def _validate_host_configs(self, host_configs): assert len(host_configs) == self.num_hosts, \ ("Number of host configurations must match the number of hosts " f"in network: {len(host_configs)} != {self.num_hosts}") assert self._has_all_host_addresses(host_configs.keys()), \ ("Host configurations must have no duplicates and have an" " address for each host on network.") for addr, cfg in host_configs.items(): self._validate_host_config(addr, cfg) def _has_all_host_addresses(self, addresses): """Check that list of (subnet_ID, host_ID) tuples contains all addresses on network based on subnets list """ for s_id, s_size in enumerate(self.subnets[1:]): for m in range(s_size): # +1 to s_id since first subnet is 1 if str((s_id + 1, m)) not in addresses: return False return True def _validate_host_config(self, addr, cfg): """Check if a host config is valid or not given the list of exploits available N.B. each host config must contain at least one service """ err_prefix = f"Host {addr}" assert isinstance(cfg, dict) and len(cfg) >= len(HOST_CONFIG_KEYS), \ (f"{err_prefix} configurations must be a dict of length >= " f"{len(HOST_CONFIG_KEYS)}. {cfg} is invalid") for k in HOST_CONFIG_KEYS: assert k in cfg, f"{err_prefix} configuration missing key: {k}" host_services = cfg[u.HOST_SERVICES] for service in host_services: assert service in self.services, \ (f"{err_prefix} Invalid service in configuration services " f"list: {service}") assert len(host_services) == len(set(host_services)), \ (f"{err_prefix} configuration services list cannot contain " "duplicates") host_processes = cfg[u.HOST_PROCESSES] for process in host_processes: assert process in self.processes, \ (f"{err_prefix} invalid process in configuration processes" f" list: {process}") assert len(host_processes) == len(set(host_processes)), \ (f"{err_prefix} configuation processes list cannot contain " "duplicates") host_os = cfg[u.HOST_OS] assert host_os in self.os, \ f"{err_prefix} invalid os in configuration: {host_os}" fw_err_prefix = f"{err_prefix} {u.HOST_FIREWALL}" if u.HOST_FIREWALL in cfg: firewall = cfg[u.HOST_FIREWALL] assert isinstance(firewall, dict), \ (f"{fw_err_prefix} must be a dictionary, with host " "addresses as keys and a list of denied services as values. " f"{firewall} is invalid.") for addr, srv_list in firewall.items(): addr = self._validate_host_address(addr, err_prefix) assert self._is_valid_firewall_setting(srv_list), \ (f"{fw_err_prefix} setting must be a list, contain only " f"valid services and contain no duplicates: {srv_list}" " is not valid") else: cfg[u.HOST_FIREWALL] = dict() v_err_prefix = f"{err_prefix} {u.HOST_VALUE}" if u.HOST_VALUE in cfg: host_value = cfg[u.HOST_VALUE] assert isinstance(host_value, (int, float)), \ (f"{v_err_prefix} must be an integer or float value. " f"{host_value} is invalid") if addr in self.sensitive_hosts: sh_value = self.sensitive_hosts[addr] assert math.isclose(host_value, sh_value), \ (f"{v_err_prefix} for a sensitive host must either match " f"the value specified in the {u.SENSITIVE_HOSTS} section " f"or be excluded the host config. The value {host_value} " f"is invalid as it does not match value {sh_value}.") def _validate_host_address(self, addr, err_prefix=""): try: addr = eval(addr) except Exception: raise AssertionError( f"{err_prefix} address invalid. Must be (subnet, host) tuple" f" of integers. {addr} is invalid." ) assert isinstance(addr, tuple) \ and len(addr) == 2 \ and all([isinstance(a, int) for a in addr]), \ (f"{err_prefix} address invalid. Must be (subnet, host) tuple" f" of integers. {addr} is invalid.") assert 0 < addr[0] < len(self.subnets), \ (f"{err_prefix} address invalid. Subnet address must be in range" f" 0 < subnet addr < {len(self.subnets)}. {addr[0]} is invalid.") assert 0 <= addr[1] < self.subnets[addr[0]], \ (f"{err_prefix} address invalid. Host address must be in range " f"0 < host addr < {self.subnets[addr[0]]}. {addr[1]} is invalid.") return True def _parse_firewall(self): firewall = self.yaml_dict[u.FIREWALL] self._validate_firewall(firewall) # convert (subnet_id, subnet_id) string to tuple self.firewall = {} for connect, v in firewall.items(): self.firewall[eval(connect)] = v def _validate_firewall(self, firewall): assert self._contains_all_required_firewalls(firewall), \ ("Firewall dictionary must contain two entries for each subnet " "connection in network (including from outside) as defined by " "network topology matrix") for f in firewall.values(): assert self._is_valid_firewall_setting(f), \ ("Firewall setting must be a list, contain only valid " f"services and contain no duplicates: {f} is not valid") def _contains_all_required_firewalls(self, firewall): for src, row in enumerate(self.topology): for dest, col in enumerate(row): if src == dest: continue if col == 1 and (str((src, dest)) not in firewall or str((dest, src)) not in firewall): return False return True def _is_valid_firewall_setting(self, f): if type(f) != list: return False for service in f: if service not in self.services: return False for i, x in enumerate(f): for j, y in enumerate(f): if i != j and x == y: return False return True def _parse_hosts(self): """Returns ordered dictionary of hosts in network, with address as keys and host objects as values """ hosts = dict() for address, h_cfg in self.host_configs.items(): formatted_address = eval(address) os_cfg, srv_cfg, proc_cfg = self._construct_host_config(h_cfg) value = self._get_host_value(formatted_address, h_cfg) hosts[formatted_address] = Host( address=formatted_address, os=os_cfg, services=srv_cfg, processes=proc_cfg, firewall=h_cfg[u.HOST_FIREWALL], value=value ) self.hosts = hosts def _construct_host_config(self, host_cfg): os_cfg = {} for os_name in self.os: os_cfg[os_name] = os_name == host_cfg[u.HOST_OS] services_cfg = {} for service in self.services: services_cfg[service] = service in host_cfg[u.HOST_SERVICES] processes_cfg = {} for process in self.processes: processes_cfg[process] = process in host_cfg[u.HOST_PROCESSES] return os_cfg, services_cfg, processes_cfg def _get_host_value(self, address, host_cfg): if address in self.sensitive_hosts: return float(self.sensitive_hosts[address]) return float(host_cfg.get(u.HOST_VALUE, u.DEFAULT_HOST_VALUE)) def _parse_step_limit(self): if u.STEP_LIMIT not in self.yaml_dict: step_limit = None else: step_limit = self.yaml_dict[u.STEP_LIMIT] assert step_limit > 0, \ f"Step limit must be positive int: {step_limit} is invalid" self.step_limit = step_limit ================================================ FILE: nasim/scenarios/scenario.py ================================================ import math from pprint import pprint import nasim.scenarios.utils as u class Scenario: def __init__(self, scenario_dict, name=None, generated=False): self.scenario_dict = scenario_dict self.name = name self.generated = generated self._e_map = None self._pe_map = None # this is used for consistent positioning of # host state and obs in state and obs matrices self.host_num_map = {} for host_num, host_addr in enumerate(self.hosts): self.host_num_map[host_addr] = host_num @property def step_limit(self): return self.scenario_dict.get(u.STEP_LIMIT, None) @property def services(self): return self.scenario_dict[u.SERVICES] @property def num_services(self): return len(self.services) @property def os(self): return self.scenario_dict[u.OS] @property def num_os(self): return len(self.os) @property def processes(self): return self.scenario_dict[u.PROCESSES] @property def num_processes(self): return len(self.processes) @property def access_levels(self): return u.ROOT_ACCESS @property def exploits(self): return self.scenario_dict[u.EXPLOITS] @property def privescs(self): return self.scenario_dict[u.PRIVESCS] @property def exploit_map(self): """A nested dictionary for all exploits in scenario. I.e. {service_name: { os_name: { name: e_name, cost: e_cost, prob: e_prob, access: e_access } } """ if self._e_map is None: e_map = {} for e_name, e_def in self.exploits.items(): srv_name = e_def[u.EXPLOIT_SERVICE] if srv_name not in e_map: e_map[srv_name] = {} srv_map = e_map[srv_name] os = e_def[u.EXPLOIT_OS] if os not in srv_map: srv_map[os] = { "name": e_name, u.EXPLOIT_SERVICE: srv_name, u.EXPLOIT_OS: os, u.EXPLOIT_COST: e_def[u.EXPLOIT_COST], u.EXPLOIT_PROB: e_def[u.EXPLOIT_PROB], u.EXPLOIT_ACCESS: e_def[u.EXPLOIT_ACCESS] } self._e_map = e_map return self._e_map @property def privesc_map(self): """A nested dictionary for all privilege escalation actions in scenario. I.e. {process_name: { os_name: { name: pe_name, cost: pe_cost, prob: pe_prob, access: pe_access } } """ if self._pe_map is None: pe_map = {} for pe_name, pe_def in self.privescs.items(): proc_name = pe_def[u.PRIVESC_PROCESS] if proc_name not in pe_map: pe_map[proc_name] = {} proc_map = pe_map[proc_name] os = pe_def[u.PRIVESC_OS] if os not in proc_map: proc_map[os] = { "name": pe_name, u.PRIVESC_PROCESS: proc_name, u.PRIVESC_OS: os, u.PRIVESC_COST: pe_def[u.PRIVESC_COST], u.PRIVESC_PROB: pe_def[u.PRIVESC_PROB], u.PRIVESC_ACCESS: pe_def[u.PRIVESC_ACCESS] } self._pe_map = pe_map return self._pe_map @property def subnets(self): return self.scenario_dict[u.SUBNETS] @property def topology(self): return self.scenario_dict[u.TOPOLOGY] @property def sensitive_hosts(self): return self.scenario_dict[u.SENSITIVE_HOSTS] @property def sensitive_addresses(self): return list(self.sensitive_hosts.keys()) @property def firewall(self): return self.scenario_dict[u.FIREWALL] @property def hosts(self): return self.scenario_dict[u.HOSTS] @property def address_space(self): return list(self.hosts.keys()) @property def service_scan_cost(self): return self.scenario_dict[u.SERVICE_SCAN_COST] @property def os_scan_cost(self): return self.scenario_dict[u.OS_SCAN_COST] @property def subnet_scan_cost(self): return self.scenario_dict[u.SUBNET_SCAN_COST] @property def process_scan_cost(self): return self.scenario_dict[u.PROCESS_SCAN_COST] @property def address_space_bounds(self): return self.scenario_dict.get( u.ADDRESS_SPACE_BOUNDS, (len(self.subnets), max(self.subnets)) ) @property def host_value_bounds(self): """The min and max values of host in scenario Returns ------- (float, float) (min, max) tuple of host values """ min_value = math.inf max_value = -math.inf for host in self.hosts.values(): min_value = min(min_value, host.value) max_value = max(max_value, host.value) return (min_value, max_value) @property def host_discovery_value_bounds(self): """The min and max discovery values of hosts in scenario Returns ------- (float, float) (min, max) tuple of host values """ min_value = math.inf max_value = -math.inf for host in self.hosts.values(): min_value = min(min_value, host.discovery_value) max_value = max(max_value, host.discovery_value) return (min_value, max_value) def display(self): pprint(self.scenario_dict) def get_action_space_size(self): num_exploits = len(self.exploits) num_privescs = len(self.privescs) # OSScan, ServiceScan, SubnetScan, ProcessScan num_scans = 4 actions_per_host = num_exploits + num_privescs + num_scans return len(self.hosts) * actions_per_host def get_state_space_size(self): # compromised, reachable, discovered host_aux_bin_features = 3 num_bin_features = ( host_aux_bin_features + self.num_os + self.num_services + self.num_processes ) # access num_tri_features = 1 host_states = 2**num_bin_features * 3**num_tri_features return len(self.hosts) * host_states def get_state_dims(self): # compromised, reachable, discovered, value, discovery_value, access host_aux_features = 6 host_state_size = ( self.address_space_bounds[0] + self.address_space_bounds[1] + host_aux_features + self.num_os + self.num_services + self.num_processes ) return len(self.hosts), host_state_size def get_observation_dims(self): state_dims = self.get_state_dims() return state_dims[0]+1, state_dims[1] def get_description(self): description = { "Name": self.name, "Type": "generated" if self.generated else "static", "Subnets": len(self.subnets), "Hosts": len(self.hosts), "OS": self.num_os, "Services": self.num_services, "Processes": self.num_processes, "Exploits": len(self.exploits), "PrivEscs": len(self.privescs), "Actions": self.get_action_space_size(), "Observation Dims": self.get_observation_dims(), "States": self.get_state_space_size(), "Step Limit": self.step_limit } return description ================================================ FILE: nasim/scenarios/utils.py ================================================ import os import yaml import os.path as osp SCENARIO_DIR = osp.dirname(osp.abspath(__file__)) # default subnet address for internet INTERNET = 0 # Constants NUM_ACCESS_LEVELS = 2 NO_ACCESS = 0 USER_ACCESS = 1 ROOT_ACCESS = 2 DEFAULT_HOST_VALUE = 0 # scenario property keys SUBNETS = "subnets" TOPOLOGY = "topology" SENSITIVE_HOSTS = "sensitive_hosts" SERVICES = "services" OS = "os" PROCESSES = "processes" EXPLOITS = "exploits" PRIVESCS = "privilege_escalation" SERVICE_SCAN_COST = "service_scan_cost" OS_SCAN_COST = "os_scan_cost" SUBNET_SCAN_COST = "subnet_scan_cost" PROCESS_SCAN_COST = "process_scan_cost" HOST_CONFIGS = "host_configurations" FIREWALL = "firewall" HOSTS = "host" STEP_LIMIT = "step_limit" ACCESS_LEVELS = "access_levels" ADDRESS_SPACE_BOUNDS = "address_space_bounds" # scenario exploit keys EXPLOIT_SERVICE = "service" EXPLOIT_OS = "os" EXPLOIT_PROB = "prob" EXPLOIT_COST = "cost" EXPLOIT_ACCESS = "access" # scenario privilege escalation keys PRIVESC_PROCESS = "process" PRIVESC_OS = "os" PRIVESC_PROB = "prob" PRIVESC_COST = "cost" PRIVESC_ACCESS = "access" # host configuration keys HOST_SERVICES = "services" HOST_PROCESSES = "processes" HOST_OS = "os" HOST_FIREWALL = "firewall" HOST_VALUE = "value" def load_yaml(file_path): """Load yaml file located at file path. Parameters ---------- file_path : str path to yaml file Returns ------- dict contents of yaml file Raises ------ Exception if theres an issue loading file. """ with open(file_path) as fin: content = yaml.load(fin, Loader=yaml.FullLoader) return content def get_file_name(file_path): """Extracts the file or dir name from file path Parameters ---------- file_path : str file path Returns ------- str file name with any path and extensions removed """ full_file_name = file_path.split(os.sep)[-1] file_name = full_file_name.split(".")[0] return file_name ================================================ FILE: nasim/scripts/describe_scenarios.py ================================================ """This script will output description statistics of all benchmark scenarios. It will output a table to stdout (and optionally to a .csv file) which contains the following headers: - Name : the scenarios name - Type : static or generated - Subnets : the number of subnets - Hosts : the number of hosts - OS : the number of OS - Services : the number of services - Processes : the number of processes - Exploits : the number of exploits - PrivEsc : the number of priviledge escalation actions - Actions : the total number of actions available to agent - States : the total number of states - Step limit : the step limit for the scenario Usage ----- $ python describe_scenarios.py [-o --output filename.csv] """ import prettytable from nasim.scenarios import make_benchmark_scenario from nasim.scenarios.benchmark import AVAIL_BENCHMARKS def describe_scenarios(output=None): rows = [] headers = None for name in AVAIL_BENCHMARKS: scenario = make_benchmark_scenario(name, seed=0) des = scenario.get_description() if headers is None: headers = list(des.keys()) if des["States"] > 1e8: des["States"] = f"{des['States']:.2E}" rows.append([str(des[h]) for h in headers]) table = prettytable.PrettyTable(headers) for row in rows: table.add_row(row) print(table) if output is not None: print(f"\nSaving to {output}") with open(output, "w") as fout: fout.write(",".join(headers) + "\n") for row in rows: fout.write(",".join(row) + "\n") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("-o", "--output", type=str, default=None, help="File name to output as CSV too") args = parser.parse_args() describe_scenarios(args.output) ================================================ FILE: nasim/scripts/run_dqn_policy.py ================================================ """A script for running a pre-trained DQN agent Note, user must ensure the DQN policy matches the NASim Environment used to train it in terms of size. E.g. A policy trained on the 'tiny-gen' env can be tested against the 'tiny' env since they both have the same Action and Observation spaces. But a policy trained on 'tiny-gen' could not be used on the 'small' environment (or any non-'tiny' environment for that matter) """ import nasim from nasim.agents.dqn_agent import DQNAgent if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("policy_path", type=str, help="path to policy") parser.add_argument("-o", "--partially_obs", action="store_true", help="Partially Observable Mode") parser.add_argument("--eval_eps", type=int, default=1, help="Number of episodes to run (default=1)") parser.add_argument("--seed", type=int, default=0, help="Random seed (default=0)") parser.add_argument("--epsilon", type=float, default=0.05, help=("Epsilon (i.e. random action probability) to use" "(default=0.05)")) parser.add_argument("--render", action="store_true", help="Render the episode/s") args = parser.parse_args() env = nasim.make_benchmark(args.env_name, args.seed, fully_obs=not args.partially_obs, flat_actions=True, flat_obs=True) dqn_agent = DQNAgent(env, verbose=False, **vars(args)) dqn_agent.load(args.policy_path) total_ret = 0 total_steps = 0 goals = 0 print(f"\n{'-'*60}\nRunning DQN Policy:\n\t{args.policy_path}\n{'-'*60}") for i in range(args.eval_eps): ret, steps, goal = dqn_agent.run_eval_episode( env, args.render, args.epsilon ) print(f"Episode {i} return={ret}, steps={steps}, goal reached={goal}") total_ret += ret total_steps += steps goals += int(goal) print(f"\n{'-'*60}\nDone\n{'-'*60}") print(f"Average Return = {total_ret / args.eval_eps:.2f}") print(f"Average Steps = {total_steps / args.eval_eps:.2f}") print(f"Goals = {goals} / {args.eval_eps}") ================================================ FILE: nasim/scripts/run_random_benchmarks.py ================================================ """This script runs the random agent for all benchmarks scenarios The mean (+/- stdev) steps and reward are reported in table to stdout (and to optional CSV file) Usage ----- $ python run_random_benchmarks.py [-n --num_cpus NUM_CPUS] [-o --output OUTPUT_FILENAME] [-s --num_seeds NUM_SEEDS] """ import os import numpy as np import multiprocessing as mp from prettytable import PrettyTable import nasim from nasim.agents.random_agent import run_random_agent from nasim.scenarios.benchmark import AVAIL_BENCHMARKS def print_msg(msg): print(f"[PID={os.getpid()}] {msg}") class Result: def __init__(self, name): self.name = name self.run_steps = [] self.run_rewards = [] def add(self, steps, reward): self.run_steps.append(steps) self.run_rewards.append(reward) def summarize(self): steps_mean = np.mean(self.run_steps) steps_std = np.std(self.run_steps) reward_mean = np.mean(self.run_rewards) reward_std = np.std(self.run_rewards) return steps_mean, steps_std, reward_mean, reward_std def get_formatted_summary(self): steps_mean, steps_std, reward_mean, reward_std = self.summarize() return ( f"{steps_mean:.2f} +/- {steps_std:.2f}", f"{reward_mean:.2f} +/- {reward_std:.2f}" ) def run_scenario(args): scenario_name, seed = args print_msg(f"Running '{scenario_name}' scenario with seed={seed}") env = nasim.make_benchmark(scenario_name, seed, False, True, True) steps, total_reward, done = run_random_agent(env, verbose=False) return { "Name": scenario_name, "Seed": seed, "Steps": steps, "Total reward": total_reward } def collate_results(results): scenario_results = {} for res in results: name = res["Name"] if name not in scenario_results: scenario_results[name] = Result(name) scenario_results[name].add(res["Steps"], res["Total reward"]) return scenario_results def output_results(results, output=None): headers = ["Scenario Name", "Steps", "Total Reward"] rows = [] for name in AVAIL_BENCHMARKS: rows.append([ name, *results[name].get_formatted_summary() ]) table = PrettyTable(headers) for row in rows: table.add_row(row) if output is not None: with open(output, "w") as fout: fout.write(",".join(headers) + "\n") for row in rows: fout.write(",".join(row) + "\n") def run_random_benchmark(num_cpus=1, num_seeds=10, output=None): run_args_list = [] for name in AVAIL_BENCHMARKS: for seed in range(num_seeds): run_args_list.append((name, seed)) with mp.Pool(num_cpus) as p: results = p.map(run_scenario, run_args_list) results = collate_results(results) output_results(results, output) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("-n", "--num_cpus", type=int, default=1, help="Number of CPUS to use in parallel (default=1)") parser.add_argument("-o", "--output", type=str, default=None, help="File name to output as CSV too") parser.add_argument("-s", "--num_seeds", type=int, default=10, help=("Number of seeds to run for each scenario" " (default=10)")) args = parser.parse_args() run_random_benchmark(**vars(args)) ================================================ FILE: nasim/scripts/train_dqn.py ================================================ """A script for training a DQN agent and storing best policy """ import nasim from nasim.agents.dqn_agent import DQNAgent class BestDQN(DQNAgent): """A DQN Agent which saves best policy found during training """ def __init__(self, env, save_path, eval_epsilon=0.01, **kwargs): super().__init__(env, **kwargs) self.save_path = save_path self.eval_epsilon = eval_epsilon self.best_score = -float("inf") def run_train_episode(self, step_limit): ep_ret, steps, goal_reached = super().run_train_episode(step_limit) if self.steps_done > self.exploration_steps: eval_ret, _, _ = self.run_eval_episode( eval_epsilon=self.eval_epsilon ) if eval_ret > self.best_score: print(f"Saving New Best Score = {ep_ret}") self.best_score = eval_ret self.save(self.save_path) return ep_ret, steps, goal_reached if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("env_name", type=str, help="benchmark scenario name") parser.add_argument("save_path", type=str, help="save path for agent") parser.add_argument("-o", "--partially_obs", action="store_true", help="Partially Observable Mode") parser.add_argument("--eval_epsilon", type=float, default=0.01, help="Epsilon to use for evaluation (default=0.01)") parser.add_argument("--hidden_sizes", type=int, nargs="*", default=[64, 64], help="(default=[64. 64])") parser.add_argument("--lr", type=float, default=0.001, help="Learning rate (default=0.001)") parser.add_argument("--training_steps", type=int, default=10000, help="training steps (default=10000)") parser.add_argument("--batch_size", type=int, default=32, help="(default=32)") parser.add_argument("--target_update_freq", type=int, default=1000, help="(default=1000)") parser.add_argument("--seed", type=int, default=0, help="(default=0)") parser.add_argument("--replay_size", type=int, default=100000, help="(default=100000)") parser.add_argument("--final_epsilon", type=float, default=0.05, help="(default=0.05)") parser.add_argument("--exploration_steps", type=int, default=5000, help="(default=5000)") parser.add_argument("--gamma", type=float, default=0.99, help="(default=0.99)") args = parser.parse_args() assert args.training_steps > args.exploration_steps env = nasim.make_benchmark(args.env_name, args.seed, fully_obs=not args.partially_obs, flat_actions=True, flat_obs=True) dqn_agent = BestDQN(env, **vars(args)) dqn_agent.train() print(f"\n{'-'*60}\nDone\n{'-'*60}") print(f"Best Policy score = {dqn_agent.best_score}") print(f"Policy saved to: {dqn_agent.save_path}") ================================================ FILE: nasim/scripts/visualize_graph.py ================================================ """Environment network graph visualizer This script allows the user to visualize the network graph for a chosen benchmark scenario. """ import nasim if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("scenario_name", type=str, help="benchmark scenario name") parser.add_argument("-s", "--seed", type=int, default=0, help="random seed (default=0)") args = parser.parse_args() env = nasim.make_benchmark(args.scenario_name, args.seed) env.render_network_graph(show=True) ================================================ FILE: setup.py ================================================ import pathlib from setuptools import setup, find_packages extras = { 'dqn': [ 'torch>=1.5', 'tensorboard>=2.2' ], 'docs': [ 'sphinx>=3.0', 'sphinx-rtd-theme>=0.4' ], 'test': [ 'pytest>=5.4' ] } extras['all'] = [item for group in extras.values() for item in group] def get_version(): """Gets the posggym version.""" path = pathlib.Path(__file__).absolute().parent / "nasim" / "__init__.py" content = path.read_text() for line in content.splitlines(): if line.startswith("__version__"): return line.strip().split()[-1].strip().strip('"') raise RuntimeError("bad version data in __init__.py") setup( name='nasim', version=get_version(), url="https://networkattacksimulator.readthedocs.io", description="A simple and fast simulator for remote network pen-testing", long_description=open('README.rst').read(), long_description_content_type='text/x-rst', author="Jonathon Schwartz", author_email="Jonathon.Schwartz@anu.edu.au", license="MIT", packages=[ package for package in find_packages() if package.startswith('nasim') ], install_requires=[ 'gymnasium>=0.26', 'numpy>=1.18', 'networkx>=2.4', 'matplotlib>=3.1', 'pyyaml>=5.3', 'prettytable>=0.7' ], extras_require=extras, python_requires='>=3.8', package_data={ 'nasim': ['scenarios/benchmark/*.yaml'] }, project_urls={ 'Documentation': "https://networkattacksimulator.readthedocs.io", 'Source': "https://github.com/Jjschwartz/NetworkAttackSimulator/", }, classifiers=[ 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.8', ], zip_safe=False ) ================================================ FILE: test/__init__.py ================================================ ================================================ FILE: test/test_bruteforce.py ================================================ """Runs bruteforce agent on environment for different scenarios and using different parameters to check no exceptions occur """ import pytest import nasim from nasim.scenarios.benchmark import \ AVAIL_GEN_BENCHMARKS, AVAIL_STATIC_BENCHMARKS from nasim.agents.bruteforce_agent import run_bruteforce_agent @pytest.mark.parametrize("scenario", AVAIL_STATIC_BENCHMARKS) @pytest.mark.parametrize("seed", [0, 666]) @pytest.mark.parametrize("fully_obs", [True, False]) @pytest.mark.parametrize("flat_actions", [True, False]) @pytest.mark.parametrize("flat_obs", [True, False]) def test_bruteforce_static(scenario, seed, fully_obs, flat_actions, flat_obs): """Tests all static benchmark scenarios using every possible environment setting, using bruteforce agent, checking for any errors """ env = nasim.make_benchmark(scenario, seed=seed, fully_obs=fully_obs, flat_actions=flat_actions, flat_obs=flat_obs, render_mode=None) run_bruteforce_agent(env, verbose=False) @pytest.mark.parametrize("scenario", AVAIL_GEN_BENCHMARKS) @pytest.mark.parametrize("seed", [0, 30, 666]) @pytest.mark.parametrize("fully_obs", [True, False]) @pytest.mark.parametrize("flat_actions", [True, False]) @pytest.mark.parametrize("flat_obs", [True, False]) def test_bruteforce_gen(scenario, seed, fully_obs, flat_actions, flat_obs): """Tests all generated benchmark scenarios using every possible environment setting, using bruteforce agent, checking for any errors """ env = nasim.make_benchmark(scenario, seed=seed, fully_obs=fully_obs, flat_actions=flat_actions, flat_obs=flat_obs, render_mode=None) run_bruteforce_agent(env, verbose=False) ================================================ FILE: test/test_env.py ================================================ """Runs some general tests on environment""" import pytest import nasim from nasim.scenarios.benchmark import \ AVAIL_GEN_BENCHMARKS, AVAIL_STATIC_BENCHMARKS def test_render_error(): env = nasim.make_benchmark("tiny", render_mode="a bad mode str") env.reset() with pytest.raises(NotImplementedError): env.render() def test_render_readable(): env = nasim.make_benchmark("tiny", render_mode="human") env.reset() env.render() def test_render_state_error(): env = nasim.make_benchmark("tiny") env.reset() with pytest.raises(NotImplementedError): env.render_state(mode="a bad mode str") def test_render_state_readable(): env = nasim.make_benchmark("tiny") env.reset() env.render_state(mode="human") @pytest.mark.parametrize("flat_actions", [True, False]) def test_render_action(flat_actions): env = nasim.make_benchmark("tiny", flat_actions=flat_actions) env.reset() env.render_action(env.action_space.sample()) @pytest.mark.parametrize( ("scenario", "expected_value"), [("tiny", 0.0), ("small", 0.0)] ) def test_get_total_discovery_value(scenario, expected_value): env = nasim.make_benchmark(scenario) env.reset() actual_value = env.network.get_total_discovery_value() assert actual_value == expected_value @pytest.mark.parametrize( ("scenario", "expected_value"), [("tiny", 200.0), ("small", 200.0)] ) def test_get_total_sensitive_host_value(scenario, expected_value): env = nasim.make_benchmark(scenario) env.reset() actual_value = env.network.get_total_sensitive_host_value() assert actual_value == expected_value @pytest.mark.parametrize( ("scenario", "expected_value"), [("tiny", 3), ("small", 4)] ) def test_get_minumum_hops(scenario, expected_value): env = nasim.make_benchmark(scenario) env.reset() actual_value = env.get_minimum_hops() assert actual_value == expected_value ================================================ FILE: test/test_generator.py ================================================ """Runs bruteforce agent on environment for different scenarios and using different parameters to check no exceptions occur """ import pytest import nasim from nasim.scenarios.benchmark import \ AVAIL_GEN_BENCHMARKS @pytest.mark.parametrize("scenario", AVAIL_GEN_BENCHMARKS) @pytest.mark.parametrize("seed", list(range(100))) def test_generator(scenario, seed): """Tests generating all generated benchmark scenarios using a range of seeds, checking for any errors """ nasim.make_benchmark(scenario, seed=seed) ================================================ FILE: test/test_gym_bruteforce.py ================================================ """Runs bruteforce agent on environment for different scenarios and using different parameters to check no exceptions occur. Tests loading environments using gym.make() """ from importlib import reload import gymnasium as gym import pytest import nasim from nasim.scenarios.benchmark import AVAIL_BENCHMARKS from nasim.agents.bruteforce_agent import run_bruteforce_agent def test_gym_reload(): """Tests there is no issue when reloading gym """ reload(gym) reload(nasim) @pytest.mark.parametrize("scenario", AVAIL_BENCHMARKS) @pytest.mark.parametrize("po", ['', 'PO']) @pytest.mark.parametrize("obs", ['', '2D']) @pytest.mark.parametrize("actions", ['', 'VA']) @pytest.mark.parametrize("v", ['v0']) def test_bruteforce(scenario, po, obs, actions,v): """Tests all benchmark scenarios using every possible environment setting, using bruteforce agent, checking for any errors """ name = ''.join([g.capitalize() for g in scenario.split("-")]) name = f"nasim:{name}{po}{obs}{actions}-{v}" env = gym.make(name, render_mode=None) run_bruteforce_agent(env, verbose=False)