Showing preview only (5,447K chars total). Download the full file or copy to clipboard to get everything.
Repository: Samsung/CredSweeper
Branch: main
Commit: 4fef4bedba2e
Files: 387
Total size: 16.1 MB
Directory structure:
gitextract_f9me649i/
├── LICENSE
├── README.md
├── SECURITY.md
├── action.yml
├── credsweeper/
│ ├── __init__.py
│ ├── __main__.py
│ ├── app.py
│ ├── common/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── keyword_checklist.py
│ │ ├── keyword_checklist.txt
│ │ ├── keyword_pattern.py
│ │ └── morpheme_checklist.txt
│ ├── config/
│ │ ├── __init__.py
│ │ └── config.py
│ ├── credentials/
│ │ ├── __init__.py
│ │ ├── augment_candidates.py
│ │ ├── candidate.py
│ │ ├── candidate_group_generator.py
│ │ ├── candidate_key.py
│ │ ├── credential_manager.py
│ │ └── line_data.py
│ ├── deep_scanner/
│ │ ├── __init__.py
│ │ ├── abstract_scanner.py
│ │ ├── byte_scanner.py
│ │ ├── bzip2_scanner.py
│ │ ├── crx_scanner.py
│ │ ├── csv_scanner.py
│ │ ├── deb_scanner.py
│ │ ├── deep_scanner.py
│ │ ├── docx_scanner.py
│ │ ├── eml_scanner.py
│ │ ├── encoder_scanner.py
│ │ ├── gzip_scanner.py
│ │ ├── html_scanner.py
│ │ ├── jclass_scanner.py
│ │ ├── jks_scanner.py
│ │ ├── lang_scanner.py
│ │ ├── lzma_scanner.py
│ │ ├── mxfile_scanner.py
│ │ ├── patch_scanner.py
│ │ ├── pdf_scanner.py
│ │ ├── pkcs_scanner.py
│ │ ├── png_scanner.py
│ │ ├── pptx_scanner.py
│ │ ├── rpm_scanner.py
│ │ ├── rtf_scanner.py
│ │ ├── sqlite3_scanner.py
│ │ ├── strings_scanner.py
│ │ ├── tar_scanner.py
│ │ ├── tmx_scanner.py
│ │ ├── xlsx_scanner.py
│ │ ├── xml_scanner.py
│ │ ├── zip_scanner.py
│ │ └── zlib_scanner.py
│ ├── file_handler/
│ │ ├── __init__.py
│ │ ├── abstract_provider.py
│ │ ├── analysis_target.py
│ │ ├── byte_content_provider.py
│ │ ├── content_provider.py
│ │ ├── data_content_provider.py
│ │ ├── descriptor.py
│ │ ├── diff_content_provider.py
│ │ ├── file_path_extractor.py
│ │ ├── files_provider.py
│ │ ├── patches_provider.py
│ │ ├── string_content_provider.py
│ │ ├── struct_content_provider.py
│ │ └── text_content_provider.py
│ ├── filters/
│ │ ├── __init__.py
│ │ ├── filter.py
│ │ ├── group/
│ │ │ ├── __init__.py
│ │ │ ├── general_keyword.py
│ │ │ ├── general_pattern.py
│ │ │ ├── group.py
│ │ │ ├── password_keyword.py
│ │ │ ├── token_pattern.py
│ │ │ ├── url_credentials_group.py
│ │ │ ├── weird_base36_token.py
│ │ │ └── weird_base64_token.py
│ │ ├── line_git_binary_check.py
│ │ ├── line_specific_key_check.py
│ │ ├── line_uue_part_check.py
│ │ ├── value_allowlist_check.py
│ │ ├── value_array_dictionary_check.py
│ │ ├── value_atlassian_token_check.py
│ │ ├── value_azure_token_check.py
│ │ ├── value_base32_data_check.py
│ │ ├── value_base64_data_check.py
│ │ ├── value_base64_encoded_pem_check.py
│ │ ├── value_base64_key_check.py
│ │ ├── value_base64_part_check.py
│ │ ├── value_basic_auth_check.py
│ │ ├── value_blocklist_check.py
│ │ ├── value_camel_case_check.py
│ │ ├── value_dictionary_keyword_check.py
│ │ ├── value_discord_bot_check.py
│ │ ├── value_entropy_base32_check.py
│ │ ├── value_entropy_base36_check.py
│ │ ├── value_entropy_base64_check.py
│ │ ├── value_entropy_base_check.py
│ │ ├── value_file_path_check.py
│ │ ├── value_github_check.py
│ │ ├── value_grafana_check.py
│ │ ├── value_grafana_service_check.py
│ │ ├── value_hex_number_check.py
│ │ ├── value_jfrog_token_check.py
│ │ ├── value_json_web_key_check.py
│ │ ├── value_json_web_token_check.py
│ │ ├── value_last_word_check.py
│ │ ├── value_length_check.py
│ │ ├── value_method_check.py
│ │ ├── value_morphemes_check.py
│ │ ├── value_not_allowed_pattern_check.py
│ │ ├── value_not_part_encoded_check.py
│ │ ├── value_number_check.py
│ │ ├── value_pattern_check.py
│ │ ├── value_sealed_secret_check.py
│ │ ├── value_search_check.py
│ │ ├── value_similarity_check.py
│ │ ├── value_split_keyword_check.py
│ │ ├── value_string_type_check.py
│ │ ├── value_token_base32_check.py
│ │ ├── value_token_base36_check.py
│ │ ├── value_token_base64_check.py
│ │ ├── value_token_base_check.py
│ │ └── value_token_check.py
│ ├── logger/
│ │ ├── __init__.py
│ │ └── logger.py
│ ├── main.py
│ ├── ml_model/
│ │ ├── __init__.py
│ │ ├── features/
│ │ │ ├── __init__.py
│ │ │ ├── entropy_evaluation.py
│ │ │ ├── feature.py
│ │ │ ├── file_extension.py
│ │ │ ├── has_html_tag.py
│ │ │ ├── is_secret_numeric.py
│ │ │ ├── length_of_attribute.py
│ │ │ ├── morpheme_dense.py
│ │ │ ├── rule_name.py
│ │ │ ├── rule_severity.py
│ │ │ ├── search_in_attribute.py
│ │ │ ├── word_in.py
│ │ │ ├── word_in_path.py
│ │ │ ├── word_in_postamble.py
│ │ │ ├── word_in_preamble.py
│ │ │ ├── word_in_transition.py
│ │ │ ├── word_in_value.py
│ │ │ └── word_in_variable.py
│ │ ├── ml_config.json
│ │ ├── ml_model.onnx
│ │ └── ml_validator.py
│ ├── py.typed
│ ├── rules/
│ │ ├── __init__.py
│ │ ├── config.yaml
│ │ └── rule.py
│ ├── scanner/
│ │ ├── __init__.py
│ │ ├── scan_type/
│ │ │ ├── __init__.py
│ │ │ ├── multi_pattern.py
│ │ │ ├── pem_key_pattern.py
│ │ │ ├── scan_type.py
│ │ │ └── single_pattern.py
│ │ └── scanner.py
│ ├── secret/
│ │ ├── config.json
│ │ └── log.yaml
│ └── utils/
│ ├── __init__.py
│ ├── hop_stat.py
│ ├── pem_key_detector.py
│ └── util.py
├── docs/
│ ├── Makefile
│ ├── README.md
│ ├── howto/
│ │ └── how-to-contribute.md
│ ├── make.bat
│ ├── requirements.txt
│ └── source/
│ ├── api.rst
│ ├── apps_config.rst
│ ├── conf.py
│ ├── credsweeper.common.rst
│ ├── credsweeper.config.rst
│ ├── credsweeper.credentials.rst
│ ├── credsweeper.deep_scanner.rst
│ ├── credsweeper.file_handler.rst
│ ├── credsweeper.filters.group.rst
│ ├── credsweeper.filters.rst
│ ├── credsweeper.logger.rst
│ ├── credsweeper.ml_model.features.rst
│ ├── credsweeper.ml_model.rst
│ ├── credsweeper.rst
│ ├── credsweeper.rules.rst
│ ├── credsweeper.scanner.rst
│ ├── credsweeper.scanner.scan_type.rst
│ ├── credsweeper.utils.rst
│ ├── develop.rst
│ ├── guide.rst
│ ├── how_to_contribute.rst
│ ├── index.rst
│ ├── install.rst
│ ├── overall_architecture.rst
│ └── rules_config.rst
├── experiment/
│ ├── README.md
│ ├── __init__.py
│ ├── data_loader.py
│ ├── evaluate_model.py
│ ├── features.py
│ ├── hyperparameters.py
│ ├── log_callback.py
│ ├── main.py
│ ├── main.sh
│ ├── ml_model.py
│ ├── model_config_preprocess.py
│ ├── plot.py
│ ├── prepare_data.py
│ ├── requirements.txt
│ ├── tf2onnx/
│ │ └── tf2onnx.sh
│ ├── tools/
│ │ ├── base64_test.py
│ │ ├── entropy_test.py
│ │ ├── morpheme_test.py
│ │ └── strength_test.py
│ └── train.py
├── fuzz/
│ ├── README.md
│ ├── __main__.py
│ ├── auxilary.py
│ ├── coveraging.sh
│ ├── fuzzing.sh
│ ├── minimizing.sh
│ ├── re-fuzzing.sh
│ ├── reducing.sh
│ └── requirements.txt
├── pyproject.toml
├── pytest.ini
├── requirements.txt
└── tests/
├── README.md
├── __init__.py
├── common/
│ ├── __init__.py
│ ├── test_confidence.py
│ ├── test_keyword_checklist.py
│ ├── test_keyword_pattern.py
│ ├── test_regex.py
│ └── test_severity.py
├── config/
│ ├── __init__.py
│ └── test_config.py
├── conftest.py
├── credentials/
│ ├── __init__.py
│ ├── test_augment_candidates.py
│ ├── test_credential_manager.py
│ └── test_line_data.py
├── data/
│ ├── __init__.py
│ ├── depth_3_pedantic.json
│ ├── doc.json
│ ├── no_filters_no_ml.json
│ ├── no_ml.json
│ └── output.json
├── deep_scanner/
│ ├── __init__.py
│ ├── test_abstract_scanner.py
│ ├── test_bzip2_scanner.py
│ ├── test_crx_scanner.py
│ ├── test_csv_scanner.py
│ ├── test_deb_scanner.py
│ ├── test_deep_scanner.py
│ ├── test_eml_scanner.py
│ ├── test_encoder_scanner.py
│ ├── test_gzip_scanner.py
│ ├── test_html_scanner.py
│ ├── test_jclass_scanner.py
│ ├── test_jks_scanner.py
│ ├── test_lzma_scanner.py
│ ├── test_mxfile_scanner.py
│ ├── test_pdf_scanner.py
│ ├── test_png_scanner.py
│ ├── test_rtf_scanner.py
│ ├── test_sqlite3_scanner.py
│ ├── test_strings_scanner.py
│ ├── test_struct_scanner.py
│ ├── test_tar_scanner.py
│ ├── test_tmx_scanner.py
│ ├── test_xml_scanner.py
│ ├── test_zip_scanner.py
│ └── test_zlib_scanner.py
├── file_handler/
│ ├── __init__.py
│ ├── test_byte_content_provider.py
│ ├── test_data_content_provider.py
│ ├── test_diff_content_provider.py
│ ├── test_file_path_extractor.py
│ ├── test_files_provider.py
│ ├── test_patches_provider.py
│ ├── test_string_content_provider.py
│ ├── test_struct_content_provider.py
│ ├── test_text_content_provider.py
│ ├── zip_bomb_1.py
│ └── zip_bomb_2.py
├── filters/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_line_git_binary_check.py
│ ├── test_line_specific_key_check.py
│ ├── test_line_uue_part_check.py
│ ├── test_value_allowlist_check.py
│ ├── test_value_array_dictionary_check.py
│ ├── test_value_atlassian_token_check.py
│ ├── test_value_azure_token_check.py
│ ├── test_value_base32_data_check.py
│ ├── test_value_base64_data_check.py
│ ├── test_value_base64_key_check.py
│ ├── test_value_base64_part_check.py
│ ├── test_value_basic_auth_check.py
│ ├── test_value_blocklist_check.py
│ ├── test_value_camel_case_check.py
│ ├── test_value_dictionary_keyword_check.py
│ ├── test_value_entropy_base32_check.py
│ ├── test_value_entropy_base36_check.py
│ ├── test_value_entropy_base64_check.py
│ ├── test_value_file_path_check.py
│ ├── test_value_github_check.py
│ ├── test_value_grafana_check.py
│ ├── test_value_grafana_service_check.py
│ ├── test_value_hex_number_check.py
│ ├── test_value_json_web_key_check.py
│ ├── test_value_json_web_token_check.py
│ ├── test_value_last_word_check.py
│ ├── test_value_length_check.py
│ ├── test_value_method_check.py
│ ├── test_value_morphemes_check.py
│ ├── test_value_not_allowed_pattern.py
│ ├── test_value_not_part_encoded.py
│ ├── test_value_number_check.py
│ ├── test_value_pattern_check.py
│ ├── test_value_sealed_secret_check.py
│ ├── test_value_search_check.py
│ ├── test_value_similarity_check.py
│ ├── test_value_split_keyword_check.py
│ ├── test_value_string_type_check.py
│ ├── test_value_token_base32_check.py
│ ├── test_value_token_base36_check.py
│ ├── test_value_token_base64_check.py
│ └── test_value_token_check.py
├── ml_model/
│ ├── __init__.py
│ ├── test_features.py
│ └── test_ml_validator.py
├── rules/
│ ├── __init__.py
│ ├── common.py
│ ├── test_api.py
│ ├── test_auth.py
│ ├── test_aws_key.py
│ ├── test_aws_multi.py
│ ├── test_aws_mws_key.py
│ ├── test_credential.py
│ ├── test_dynatrace_api_token.py
│ ├── test_facebook_key.py
│ ├── test_firebase_domain.py
│ ├── test_github_classic_token.py
│ ├── test_github_fine_granted_token.py
│ ├── test_google_api_key.py
│ ├── test_google_multi.py
│ ├── test_google_oauth_key.py
│ ├── test_instagram_access_token.py
│ ├── test_jwt.py
│ ├── test_key.py
│ ├── test_mailchimp_key.py
│ ├── test_nonce.py
│ ├── test_password.py
│ ├── test_paypal_key.py
│ ├── test_pem_key.py
│ ├── test_picatic_key.py
│ ├── test_pypi_api_token.py
│ ├── test_rule.py
│ ├── test_salt.py
│ ├── test_secret.py
│ ├── test_sendgrid_api_key_token.py
│ ├── test_shopify_token.py
│ ├── test_slack_token.py
│ ├── test_slack_webhook.py
│ ├── test_square_access_token.py
│ ├── test_telegram_bot_api_token.py
│ ├── test_token.py
│ └── test_url_credentials.py
├── scanner/
│ ├── __init__.py
│ └── scan_type/
│ ├── __init__.py
│ ├── test_multipattern.py
│ └── test_pem_key_pattern.py
├── test_app.py
├── test_doc.py
├── test_git.py
├── test_main.py
├── test_utils/
│ ├── __init__.py
│ └── dummy_line_data.py
└── utils/
├── __init__.py
├── test_hop_stat.py
└── test_util.py
================================================
FILE CONTENTS
================================================
================================================
FILE: LICENSE
================================================
Copyright (c) 2021 SAMSUNG
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# CredSweeper
[](https://github.com/Samsung/CredSweeper/releases)
[](https://credsweeper.readthedocs.io/en/latest/?badge=latest)
[](LICENSE)
[](https://pypi.org/project/credsweeper/)
[](https://badge.fury.io/py/credsweeper)
[](https://github.com/Samsung/CredSweeper/actions/workflows/test.yml)
[](https://codecov.io/gh/Samsung/CredSweeper)
[](https://bestpractices.coreinfrastructure.org/projects/6055)
[](https://api.securityscorecards.dev/projects/github.com/Samsung/CredSweeper)
<img src="https://raw.githubusercontent.com/Samsung/CredSweeper/main/docs/images/Logo.png" width="500"/>
- [CredSweeper](#credsweeper)
- [Introduction](#introduction)
- [How To Use](#how-to-use)
- [Main Requirements](#main-requirements)
- [Installation](#installation)
- [Run](#run)
- [Config](#config)
- [Develop](#develop)
- [Tests](#tests)
- [Benchmark](#benchmark)
- [Overall Architecture](#overall-architecture)
- [Retrain Model](#retrain-model)
- [License](#license)
- [How to Get Involved](#how-to-get-involved)
- [Project Roles](#project-roles)
- [Contributor](#contributor)
- [Maintainer](#maintainer)
- [How to Contact](#how-to-contact)
## Introduction
CredSweeper is an advanced credential detection tool designed to identify exposed
credentials such as passwords, API keys, tokens, and other sensitive information
across source code, configuration files, documents, and binary assets.
CredSweeper scans regular files, embedded data in containers, and files added in Git commits.
The tool combines pattern-based detection, machine learning–based validation, and
deep file inspection to deliver comprehensive and accurate security scanning for
modern codebases and repositories.
**Key Capabilities:**
- Credential detection in source code, configuration files, documents, and archives
- False positive reduction using algorithmic filters and machine learning
- Scanning of compressed files, documents, and binary formats
- Git repository analysis and diff scanning
Full documentation can be found here: <https://credsweeper.readthedocs.io/>
## How To Use
### Main Requirements
- Python 3.10, 3.11, 3.12, 3.13, 3.14
### Installation
Details [here](https://credsweeper.readthedocs.io/en/latest/install.html).
```bash
pip install credsweeper
```
### Run
[How to use](https://credsweeper.readthedocs.io/en/latest/guide.html).
Run CredSweeper:
```bash
python -m credsweeper --path tests/samples/password.gradle --save-json output.json
```
### JSON Output
```json
[
{
"rule": "Password",
"severity": "high",
"confidence": "moderate",
"ml_probability": 0.993,
"line_data_list": [
{
"line": "password = \"cackle!\"",
"line_num": 1,
"path": "./tests/samples/password.gradle",
"info": "",
"variable": "password",
"variable_start": 0,
"variable_end": 8,
"value": "cackle!",
"value_start": 12,
"value_end": 19,
"entropy": 2.52164
}
]
}
]
```
### Config
[credsweeper/secret/config.json](credsweeper/secret/config.json) - Configuration file for pre-processing of CredSweeper. For more details please check [here](https://credsweeper.readthedocs.io/en/latest/overall_architecture.html#pre-processing).
You can set the `pattern`, `extension` and `path` you want to exclude from scanning as below.
```json
{
"exclude": {
"pattern": [
"AKIA[0-9A-Z]{9}EXAMPLE",
...
],
"extension": [
"gif",
"jpg",
...
],
"path": [
"/.git/",
"/openssl/",
...
]
},
...
}
```
And you can also set `source_ext`, `source_quote_ext`, `find_by_ext_list`, `check_for_literals`, `line_data_output`, and `candidate_output` as below.
- `source_ext`: List of extensions for scanning categorized as source files.
- `source_quote_ext`: List of extensions for scanning categorized as source files that use quotes.
- `find_by_ext_list`: List of extensions to detect only extensions.
- `check_for_literals`: Bool value for whether to check line has string literal declaration or not.
- `line_data_output`: List of attributes of [line_data](credsweeper/credentials/line_data.py) for output.
- `candidate_output`: List of attributes of [candidate](credsweeper/credentials/candidate.py) for output.
```json
{
...
"source_ext": [
".py",
".cpp",
...
],
"source_quote_ext": [
".py",
".cpp",
...
],
"find_by_ext_list": [
".pem",
".cer",
...
],
"check_for_literals": true,
"line_data_output": [
"line",
"line_num",
...
],
"candidate_output": [
"rule",
"severity",
...
]
}
```
[credsweeper/rules/config.yaml](credsweeper/rules/config.yaml) - Configuration file for setting Rule. For more details please check [here](https://credsweeper.readthedocs.io/en/latest/overall_architecture.html#rule).
```yaml
- name: Credential
severity: medium
confidence: moderate
type: keyword
values:
- credential
filter_type: GeneralKeyword
use_ml: true
min_line_len: 18
required_substrings:
- credential
target:
- code
```
## Develop
### Tests
Run all tests with random order:
```bash
python -m pytest --cov=credsweeper --cov-report=term-missing --random-order --random-order-bucket=global -s tests/
```
### Benchmark
We have a dataset for testing credential scanners called [CredData](https://github.com/Samsung/CredData).
If you want to test CredSweeper with this dataset please check [here](https://github.com/Samsung/CredData/blob/main/README.md#benchmark).
## Overall Architecture
To check overall architecture of CredSweeper please check [here](https://credsweeper.readthedocs.io/en/latest/overall_architecture.html).
## Retrain Model
If you want to check how model was trained or retrain it on your own data, please refer to the [experiment](experiment/README.md) folder
## License
The CredSweeper is an Open Source project released under the terms of [MIT License](https://opensource.org/licenses/mit-license.php).
## How to Get Involved
In addition to developing under an Open Source license, the project follows an Open Source Development approach,
welcoming everyone to participate, contribute, and engage with each other through the project.
### Project Roles
The project recognizes the following formal roles: Contributor and Maintainer.
Informally, the community may organize itself and grant additional rights and responsibilities to the necessary people to achieve its goals.
#### Contributor
A Contributor is anyone who wishes to contribute to the project, at any level. Contributors are granted the following rights to:
- Contribute code, documentation, translations, artwork, samples, etc.
- Report defects (bugs) and suggestions for enhancement.
- Participate in the process of reviewing contributions by others.
If you want to participate in the project development, check out the [how to contribute guideline](./docs/howto/how-to-contribute.md) in advance.
Contributors who show dedication and skill are rewarded with additional rights and responsibilities.
Their opinions weigh more when decisions are made, in a fully meritocratic fashion.
#### Maintainer
A Maintainer is a Contributor who is also responsible for knowing, directing and anticipating the needs of a given Module.
As such, Maintainers have the right to set the overall organization of the source code in the Module,
and the right to participate in the decision-making. Maintainers are required to review the contributor’s requests and decide whether to accept or not.
| Name | E-Mail |
|------------------------------------------------|------------------------|
| [Jaeku Yun](https://github.com/silentearth) | jk0113.yun@samsung.com |
| [Shinhyung Choi](https://github.com/csh519) | sh519.choi@samsung.com |
| [Roman Babenko](https://github.com/babenek) | r.babenko@samsung.com |
| [Yuliia Tatarinova](https://github.com/Yullia) | yuliia.t@samsung.com |
## How to Contact
Please post questions, [issues, or suggestions in issues](https://github.com/Samsung/CredSweeper/issues). This is the best way to communicate with the developers.
================================================
FILE: SECURITY.md
================================================
# Security Policy
## Supported Versions
| Version | Supported |
|---------|--------------------|
| 1.15.x | :white_check_mark: |
| <1.15.x | :x: |
## Reporting a Vulnerability
Please use [issues](https://github.com/Samsung/CredSweeper/issues) to report about any security issue.
================================================
FILE: action.yml
================================================
name: "CredSweeper action"
description: "CredSweeper checks files"
author: "r.babenko@samsung.com"
branding:
icon: "terminal"
color: "gray-dark"
inputs:
python_version:
description: "Python Version. 3.10 - default"
default: "3.10"
required: false
path:
description: "Path to scan"
required: true
report:
description: "CredSweeper report in JSON format"
default: "output.json"
required: false
hashed:
description: "Report output is hashed by default"
default: "--hashed"
required: false
error:
description: "Exit with an error code if credentials are detected"
default: "--error"
required: false
runs:
using: "composite"
steps:
- name: DEBUG
shell: bash
env:
path: ${{ inputs.path }}
report: ${{ inputs.report }}
error: ${{ inputs.error }}
hashed: ${{ inputs.hashed }}
run: echo "print ('@@@ $error @@@ $report @@@ $path @@@ $PATH @@@')"
- name: Setup Python
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 - 2025.01.28
with:
python-version: ${{ inputs.python_version }}
- name: Install CredSweeper
shell: bash
run: python -m pip install credsweeper
- name: Run CredSweeper
shell: bash
env:
path: ${{ inputs.path }}
report: ${{ inputs.report }}
error: ${{ inputs.error }}
hashed: ${{ inputs.hashed }}
run: python -m credsweeper --banner --log INFO --no-color --no-stdout "$error" "$hashed" --save-json "$report" --path "$path"
================================================
FILE: credsweeper/__init__.py
================================================
from credsweeper.app import CredSweeper
from credsweeper.common.constants import ThresholdPreset, Severity, Confidence
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
from credsweeper.file_handler.content_provider import ContentProvider
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
from credsweeper.file_handler.text_content_provider import TextContentProvider
from credsweeper.ml_model.ml_validator import MlValidator
__all__ = [
"ByteContentProvider", #
"Confidence", #
"ContentProvider", #
"CredSweeper", #
"DataContentProvider", #
"DiffContentProvider", #
"MlValidator", #
"Severity", #
"StringContentProvider", #
"TextContentProvider", #
"ThresholdPreset", #
"__version__"
]
__version__ = "1.15.7"
================================================
FILE: credsweeper/__main__.py
================================================
import sys
from credsweeper.main import main
if __name__ == "__main__":
sys.exit(main())
================================================
FILE: credsweeper/app.py
================================================
import json
import logging
import multiprocessing
import signal
from pathlib import Path
from typing import Any, List, Optional, Union, Dict, Sequence, Tuple
import pandas as pd
from colorama import Style
# Directory of credsweeper sources MUST be placed before imports to avoid circular import error
APP_PATH = Path(__file__).resolve().parent
from credsweeper.scanner.scanner import Scanner
from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType, DEFAULT_ENCODING
from credsweeper.config.config import Config
from credsweeper.credentials.candidate import Candidate
from credsweeper.credentials.candidate_key import CandidateKey
from credsweeper.credentials.credential_manager import CredentialManager
from credsweeper.deep_scanner.deep_scanner import DeepScanner
from credsweeper.file_handler.content_provider import ContentProvider
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
from credsweeper.file_handler.abstract_provider import AbstractProvider
from credsweeper.ml_model.ml_validator import MlValidator
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class CredSweeper:
"""Advanced credential analyzer base class.
Parameters:
credential_manager: CredSweeper credential manager object
scanner: CredSweeper scanner object
pool_count: number of pools used to run multiprocessing scanning
config: dictionary variable, stores analyzer features
json_filename: string variable, credential candidates export filename
"""
def __init__(self,
rule_path: Union[None, str, Path] = None,
config_path: Optional[str] = None,
json_filename: Union[None, str, Path] = None,
xlsx_filename: Union[None, str, Path] = None,
stdout: bool = False,
color: bool = False,
hashed: bool = False,
subtext: bool = False,
sort_output: bool = False,
use_filters: bool = True,
pool_count: int = 1,
ml_batch_size: Optional[int] = None,
ml_threshold: Union[int, float, ThresholdPreset] = ThresholdPreset.medium,
ml_config: Union[None, str, Path] = None,
ml_model: Union[None, str, Path] = None,
ml_providers: Optional[str] = None,
find_by_ext: bool = False,
pedantic: bool = False,
depth: int = 0,
doc: bool = False,
severity: Union[Severity, str] = Severity.INFO,
size_limit: Optional[str] = None,
exclude_lines: Optional[List[str]] = None,
exclude_values: Optional[List[str]] = None,
thrifty: bool = False,
log_level: Optional[str] = None) -> None:
"""Initialize Advanced credential scanner.
Args:
rule_path: optional str variable, path of rule config file
validation was the grained candidate model on machine learning
config_path: optional str variable, path of CredSweeper config file
default built-in config is used if None
json_filename: optional string variable, path to save result to json
xlsx_filename: optional string variable, path to save result to xlsx
stdout: print results to stdout
color: print concise results to stdout with colorization
hashed: use hash of line, value and variable instead plain text
subtext: use subtext of line near variable-value like it performed in ML
use_filters: boolean variable, specifying the need of rule filters
pool_count: int value, number of parallel processes to use
ml_batch_size: int value, size of the batch for model inference
ml_threshold: float or string value to specify threshold for the ml model
ml_config: str or Path to set custom config of ml model
ml_model: str or Path to set custom ml model
ml_providers: str - comma separated list with providers
find_by_ext: boolean - files will be reported by extension
pedantic: boolean - scan all files
depth: int - how deep container files will be scanned
doc: boolean - document-specific scanning
severity: Severity - minimum severity level of rule
size_limit: optional string integer or human-readable format to skip oversize files
exclude_lines: lines to omit in scan. Will be added to the lines already in config
exclude_values: values to omit in scan. Will be added to the values already in config
thrifty: free provider resources after scan to reduce memory consumption
log_level: str - level for pool initializer according logging levels (UPPERCASE)
"""
self.pool_count: int = max(1, int(pool_count))
if not (_severity := Severity.get(severity)):
raise RuntimeError(f"Severity level provided: {severity}"
f" -- must be one of: {' | '.join([i.value for i in Severity])}")
config_dict = self._get_config_dict(config_path=config_path,
use_filters=use_filters,
find_by_ext=find_by_ext,
pedantic=pedantic,
depth=depth,
doc=doc,
severity=_severity,
size_limit=size_limit,
exclude_lines=exclude_lines,
exclude_values=exclude_values)
self.config = Config(config_dict)
self.scanner = Scanner(self.config, rule_path)
self.deep_scanner = DeepScanner(self.config, self.scanner)
self.credential_manager = CredentialManager()
self.json_filename: Union[None, str, Path] = json_filename
self.xlsx_filename: Union[None, str, Path] = xlsx_filename
self.stdout = stdout
self.color = color
self.hashed = hashed
self.subtext = subtext
self.sort_output = sort_output
self.ml_batch_size = ml_batch_size if ml_batch_size and 0 < ml_batch_size else 16
self.ml_threshold = ml_threshold
self.ml_config = ml_config
self.ml_model = ml_model
self.ml_providers = ml_providers
self.__thrifty = thrifty
self.__log_level = log_level
self.__ml_validator: Optional[MlValidator] = None
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@staticmethod
def _get_config_path(config_path: Optional[str]) -> Path:
if config_path:
return Path(config_path)
return APP_PATH / "secret" / "config.json"
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def _get_config_dict(
self, #
config_path: Optional[str], #
use_filters: bool, #
find_by_ext: bool, #
pedantic: bool, #
depth: int, #
doc: bool, #
severity: Severity, #
size_limit: Optional[str], #
exclude_lines: Optional[List[str]], #
exclude_values: Optional[List[str]]) -> Dict[str, Any]:
config_dict = Util.json_load(self._get_config_path(config_path))
config_dict["use_filters"] = use_filters
config_dict["find_by_ext"] = find_by_ext
config_dict["size_limit"] = size_limit
config_dict["pedantic"] = pedantic
config_dict["depth"] = depth
config_dict["doc"] = doc
config_dict["severity"] = severity.value
if exclude_lines is not None:
config_dict["exclude"]["lines"] = config_dict["exclude"].get("lines", []) + exclude_lines
if exclude_values is not None:
config_dict["exclude"]["values"] = config_dict["exclude"].get("values", []) + exclude_values
return config_dict # type: ignore
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def _use_ml_validation(self) -> bool:
if isinstance(self.ml_threshold, int) and 0 == self.ml_threshold:
logger.info("ML validation is disabled")
return False
if not self.credential_manager.candidates:
logger.info("Skip ML validation because no candidates were found")
return False
for i in self.credential_manager.candidates:
if i.use_ml:
# any() or all() is not used to speedup
return True
logger.info("Skip ML validation because no candidates support it")
return False
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@property
def ml_validator(self) -> MlValidator:
"""ml_validator getter"""
if not self.__ml_validator:
self.__ml_validator = MlValidator(
threshold=self.ml_threshold, #
ml_config=self.ml_config, #
ml_model=self.ml_model, #
ml_providers=self.ml_providers, #
)
if not self.__ml_validator:
raise RuntimeError("MlValidator was not initialized!")
return self.__ml_validator
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@staticmethod
def pool_initializer(log_kwargs) -> None:
"""Ignore SIGINT in child processes."""
logging.basicConfig(**log_kwargs)
signal.signal(signal.SIGINT, signal.SIG_IGN)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def run(self, content_provider: AbstractProvider) -> int:
"""Run an analysis of 'content_provider' object.
Args:
content_provider: path objects to scan
"""
_empty_list: Sequence[ContentProvider] = []
file_extractors = content_provider.get_scannable_files(self.config) if content_provider else _empty_list
if not file_extractors:
logger.info("No scannable targets for %s paths", len(content_provider.paths))
return 0
self.scan(file_extractors)
self.post_processing()
# PatchesProvider has the attribute. Circular import error appears with using the isinstance
change_type = content_provider.change_type if hasattr(content_provider, "change_type") else None
self.export_results(change_type)
return self.credential_manager.len_credentials()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def scan(self, content_providers: Sequence[ContentProvider]) -> None:
"""Run scanning of files from an argument "content_providers".
Args:
content_providers: file objects to scan
"""
if 1 < self.pool_count and 1 < len(content_providers):
self.__multi_jobs_scan(content_providers)
else:
self.__single_job_scan(content_providers)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def __single_job_scan(self, content_providers: Sequence[ContentProvider]) -> None:
"""Performs scan in main thread"""
logger.info("Scan for %s providers", len(content_providers))
all_cred = self.files_scan(content_providers)
self.credential_manager.set_credentials(all_cred)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def __multi_jobs_scan(self, content_providers: Sequence[ContentProvider]) -> None:
"""Performs scan with multiple jobs"""
# use this separation to satisfy YAPF formatter
yapfix = "%(asctime)s | %(levelname)s | %(processName)s:%(threadName)s | %(filename)s:%(lineno)s | %(message)s"
log_kwargs = {"format": yapfix}
if isinstance(self.__log_level, str):
# is not None
if "SILENCE" == self.__log_level:
logging.addLevelName(60, "SILENCE")
log_kwargs["level"] = self.__log_level
pool_count = min(self.pool_count, len(content_providers))
logger.info("Scan in %s processes for %s providers", pool_count, len(content_providers))
with multiprocessing.get_context("spawn").Pool(processes=pool_count,
initializer=CredSweeper.pool_initializer,
initargs=(log_kwargs,)) as pool: # yapf: disable
try:
for scan_results in pool.imap_unordered(self.files_scan,
(content_providers[x::pool_count] for x in range(pool_count))):
for cred in scan_results:
self.credential_manager.add_credential(cred)
except KeyboardInterrupt:
pool.terminate()
pool.join()
raise
pool.close()
pool.join()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def files_scan(self, content_providers: Sequence[ContentProvider]) -> List[Candidate]:
"""Auxiliary method for scan one sequence"""
all_cred: List[Candidate] = []
for provider in content_providers:
candidates = self.file_scan(provider)
if self.__thrifty:
provider.free()
all_cred.extend(candidates)
logger.info("Completed: processed %s providers with %s candidates", len(content_providers), len(all_cred))
return all_cred
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def file_scan(self, content_provider: ContentProvider) -> List[Candidate]:
"""Run scanning of file from 'file_provider'.
Args:
content_provider: content provider object to scan
Return:
list of credential candidates from scanned file
"""
candidates: List[Candidate] = []
logger.debug("Start scan file: %s %s", content_provider.file_path, content_provider.info)
if FilePathExtractor.is_find_by_ext_file(self.config, content_provider.file_type):
# Skip the file scanning and create fake candidate because the extension is suspicious
dummy_candidate = Candidate.get_dummy_candidate(self.config, content_provider.file_path,
content_provider.file_type, content_provider.info,
FilePathExtractor.FIND_BY_EXT_RULE)
candidates.append(dummy_candidate)
else:
if self.config.depth or self.config.doc:
# deep scan with possible data representation
candidates = self.deep_scanner.scan(content_provider, self.config.depth, self.config.size_limit)
else:
if content_provider.file_type not in self.config.exclude_containers:
# Regular file scanning
candidates = self.scanner.scan(content_provider)
# finally return result from 'file_scan'
return candidates
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def post_processing(self) -> None:
"""Machine learning validation for received credential candidates."""
if purged := self.credential_manager.purge_duplicates():
logger.info("Purged %s duplicates", purged)
if self._use_ml_validation():
logger.info("Grouping %s candidates", len(self.credential_manager.candidates))
new_cred_list: List[Candidate] = []
cred_groups = self.credential_manager.group_credentials()
ml_cred_groups: List[Tuple[CandidateKey, List[Candidate]]] = []
for group_key, group_candidates in cred_groups.items():
# Analyze with ML if any candidate in group require ML
for candidate in group_candidates:
if candidate.use_ml:
ml_cred_groups.append((group_key, group_candidates))
break
else:
# all candidates do not require ML
new_cred_list.extend(group_candidates)
# prevent extra ml_validator creation if ml_cred_groups is empty
if ml_cred_groups:
logger.info("Run ML Validation for %s groups", len(ml_cred_groups))
is_cred, probability = self.ml_validator.validate_groups(ml_cred_groups, self.ml_batch_size)
for i, (_, group_candidates) in enumerate(ml_cred_groups):
for candidate in group_candidates:
if candidate.use_ml:
if is_cred[i]:
candidate.ml_probability = probability[i]
new_cred_list.append(candidate)
else:
new_cred_list.append(candidate)
else:
logger.info("Skipping ML validation due not applicable")
self.credential_manager.set_credentials(new_cred_list)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def export_results(self, change_type: Optional[DiffRowType] = None) -> None:
"""
Save credential candidates to json file or print them to a console.
Args:
change_type: flag to know which file should be created for a patch
"""
credentials = self.credential_manager.get_credentials()
logger.info("Exporting %s credentials", len(credentials))
if self.sort_output:
credentials.sort(key=lambda x: ( #
x.line_data_list[0].path, #
x.line_data_list[0].line_num, #
x.severity, #
x.rule_name, #
x.line_data_list[0].value_start, #
x.line_data_list[0].value_end #
))
if self.json_filename:
json_path = Path(self.json_filename)
if isinstance(change_type, DiffRowType):
# add suffix for appropriated reports to create two files for the patch scan
json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}")
with open(json_path, 'w', encoding=DEFAULT_ENCODING) as f:
# use the approach to reduce total memory usage in case of huge data
first_item = True
f.write('[\n')
for credential in credentials:
if first_item:
first_item = False
else:
f.write(",\n")
f.write(json.dumps(credential.to_json(hashed=self.hashed, subtext=self.subtext), indent=4))
f.write("\n]")
if self.xlsx_filename:
data_list = []
for credential in credentials:
data_list.extend(credential.to_dict_list(hashed=self.hashed, subtext=self.subtext))
df = pd.DataFrame(data=data_list)
if isinstance(change_type, DiffRowType):
if Path(self.xlsx_filename).exists():
with pd.ExcelWriter(self.xlsx_filename, mode='a', engine="openpyxl",
if_sheet_exists="replace") as writer:
df.to_excel(writer, sheet_name=change_type.value, index=False)
else:
df.to_excel(self.xlsx_filename, sheet_name=change_type.value, index=False)
else:
df.to_excel(self.xlsx_filename, sheet_name="report", index=False)
if self.color:
for credential in credentials:
for line_data in credential.line_data_list:
# bright rule name and path or info
if isinstance(credential.ml_probability, float):
ml_probability_info = f" {credential.ml_probability:.6f}"
else:
ml_probability_info = ""
print(Style.BRIGHT + credential.rule_name +
f" {line_data.info or line_data.path}:{line_data.line_num}{ml_probability_info}" +
Style.RESET_ALL)
print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))
if self.stdout:
for credential in credentials:
print(credential.to_str(hashed=self.hashed, subtext=self.subtext))
================================================
FILE: credsweeper/common/__init__.py
================================================
from credsweeper.common.keyword_checklist import KeywordChecklist
# use the variable to avoid singleton creation and make testing easier
static_keyword_checklist = KeywordChecklist()
================================================
FILE: credsweeper/common/constants.py
================================================
import string
import typing
from enum import Enum
from typing import Optional, Union
class Severity(Enum):
"""Severity of candidate"""
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
INFO = "info"
def __lt__(self, other) -> bool:
if Severity.INFO == self:
return other is not Severity.INFO
if Severity.LOW == self:
return other in [Severity.MEDIUM, Severity.HIGH, Severity.CRITICAL]
if Severity.MEDIUM == self:
return other in [Severity.HIGH, Severity.CRITICAL]
if Severity.HIGH == self:
return other is Severity.CRITICAL
return False
@staticmethod
def get(severity: Union[str, "Severity"]) -> Optional["Severity"]:
"""returns Severity value from string or None"""
if isinstance(severity, Severity):
return severity
if isinstance(severity, str):
value = getattr(Severity, severity.strip().upper(), None)
if isinstance(value, Severity):
return value
return None
class Confidence(Enum):
"""Confidence of candidate"""
STRONG = "strong"
MODERATE = "moderate"
WEAK = "weak"
def __lt__(self, other) -> bool:
if Confidence.WEAK == self:
return other is not Confidence.WEAK
if Confidence.MODERATE == self:
return other is Confidence.STRONG
return False
@staticmethod
def get(confidence: Union[str, "Confidence"]) -> Optional["Confidence"]:
"""returns Confidence value from string or None"""
if isinstance(confidence, Confidence):
return confidence
if isinstance(confidence, str):
value = getattr(Confidence, confidence.strip().upper(), None)
if isinstance(value, Confidence):
return value
return None
BASE64COMMON = string.ascii_uppercase + string.ascii_lowercase + string.digits
class Chars(Enum):
"""Stores enumeration of characters sets of encoding dictionaries"""
# set of characters, hexadecimal numeral system (Base16). Upper- and lowercase
HEX_CHARS = string.digits + "ABCDEFabcdef"
# UUID charset in uppercase
UUID_UPPER_CHARS = string.digits + "ABCDEF-"
# UUID charset in lowercase
UUID_LOWER_CHARS = string.digits + "abcdef-"
# set of characters, hexadecimal numeral system (Base16). Uppercase
BASE16UPPER = string.digits + "ABCDEF"
# set of characters, hexadecimal numeral system (Base16). Lowercase
BASE16LOWER = string.digits + "abcdef"
# set of 32 characters, used in Base32 encoding
BASE32_CHARS = string.ascii_uppercase + "234567"
# set of 36 characters, used in Base36 encoding
BASE36_CHARS = string.digits + string.ascii_lowercase
# base62 set https://en.wikipedia.org/wiki/Base62
BASE62_CHARS = string.digits + string.ascii_uppercase + string.ascii_lowercase
# URL- and filename-safe standard
BASE64URL_CHARS = BASE64COMMON + "-_"
# URL- and filename-safe standard plus padding sign
BASE64URLPAD_CHARS = BASE64COMMON + "-_="
# standard base64 charset
BASE64STD_CHARS = BASE64COMMON + "+/"
# standard base64 plus padding sign
BASE64STDPAD_CHARS = BASE64COMMON + "+/="
# except whitespaces
ASCII_VISIBLE = string.digits + string.ascii_letters + string.punctuation
# all printable symbols
ASCII_PRINTABLE = string.printable
class GroupType(Enum):
"""Group type - used in Group constructor for load predefined set of filters"""
KEYWORD = "keyword"
PATTERN = "pattern"
# for empty filter set
DEFAULT = "default"
class RuleType(Enum):
"""Rule type"""
# combine pattern with predefined structure
KEYWORD = "keyword"
# use patterns as-is. all patterns must be found in target (line)
PATTERN = "pattern"
# single value to detect pem format with specific scanner
PEM_KEY = "pem_key"
# When first pattern found - second will be searched in adjoining lines
MULTI = "multi"
class ThresholdPreset(Enum):
"""Preset threshold to simplify precision/recall selection for the user."""
lowest = "lowest"
low = "low"
medium = "medium"
high = "high"
highest = "highest"
class DiffRowType(Enum):
"""Diff type of row"""
ADDED = "added"
DELETED = "deleted"
StartEnd = typing.NamedTuple("StartEnd", [("start", int), ("end", int)])
MIN_VARIABLE_LENGTH = 1
MIN_SEPARATOR_LENGTH = 1
MIN_VALUE_LENGTH = 4
# if the line is oversize - it will be scanned by chunks with overlapping
MAX_LINE_LENGTH = 8000
# the size for overlapping chunks must be less than MAX_LINE_LENGTH
CHUNK_SIZE = 4000
OVERLAP_SIZE = 1000
CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
# ML hunk size to limit of variable or value size and get substring near value
ML_HUNK = 64
# values according https://docs.python.org/3/library/codecs.html
UTF_8 = "utf_8"
LATIN_1 = "latin_1"
ASCII = "ascii"
# 16-bits codecs may be detected during decoding
UTF_16_LE = "utf_16_le"
UTF_16_BE = "utf_16_be"
DEFAULT_ENCODING = UTF_8
# LATIN_1 has to be placed at end to apply binary file detection
AVAILABLE_ENCODINGS = [UTF_8, LATIN_1]
# to limit memory usage in case of recursive scan
RECURSIVE_SCAN_LIMITATION = 1 << 30
# default value for config and ValuePatternCheck
DEFAULT_PATTERN_LEN = 4
# PEM x509 patterns
PEM_BEGIN_PATTERN = "-----BEGIN"
PEM_END_PATTERN = "-----END"
# similar min_line_len in rule_template - no real credential in data less than 8 bytes
MIN_DATA_LEN = 8
================================================
FILE: credsweeper/common/keyword_checklist.py
================================================
from functools import cached_property
from typing import Set, List
from credsweeper.app import APP_PATH
class KeywordChecklist:
"""KeywordsChecklist contains words 3 or more letters length"""
__keyword_set: Set[str]
__morpheme_set: Set[str]
KEYWORD_PATH = APP_PATH / "common" / "keyword_checklist.txt"
MORPHEME_PATH = APP_PATH / "common" / "morpheme_checklist.txt"
def __init__(self) -> None:
# used suggested text read style. split() is preferred because it strips 0x0A on end the file
self.__keyword_list = self.KEYWORD_PATH.read_text().split()
self.__keyword_list.sort(key=str.__len__, reverse=True)
self.__keyword_set = set(self.KEYWORD_PATH.read_text().split())
# The list of morphemes can be combined to form words.
# The value is considered a variable if at least two exist.
self.__morpheme_set = set(self.MORPHEME_PATH.read_text().split())
@cached_property
def keyword_set(self) -> Set[str]:
"""Get set with keywords"""
return self.__keyword_set
@cached_property
def keyword_list(self) -> List[str]:
"""Get list with keywords in descended order of length"""
return self.__keyword_list
@cached_property
def keyword_len(self) -> int:
"""Length of keyword_set"""
return len(self.__keyword_set)
@cached_property
def morpheme_set(self) -> Set[str]:
"""Get extended set with keywords.
Return:
Extended set of strings
"""
return self.__morpheme_set
@cached_property
def morpheme_len(self) -> int:
"""Length of morpheme_set"""
return len(self.__morpheme_set)
def check_morphemes(self, line_lower: str, threshold: int) -> bool:
"""Checks limit of morphemes limit in line.
Args:
line_lower: input line - MUST be in lower
threshold: number of minimal morphemes
Return:
True - if number of morphemes exceeds the threshold
"""
matches = 0
for keyword in self.morpheme_set:
if keyword in line_lower:
matches += 1
if threshold < matches:
return True
return False
================================================
FILE: credsweeper/common/keyword_checklist.txt
================================================
1234
abort
about
above
absolute
abstract
accent
accept
access
account
action
active
activity
actor
actual
added
adding
additional
address
adjust
advise
after
again
agent
alert
alias
algori
allow
alpha
already
always
amount
analyses
analyze
anchor
android
animated
animation
another
anony
apache
api
appearance
apple
application
apply
are
argc
args
argv
argument
array
arrow
article
ascii
aside
assembly
asset
assert
assign
associated
association
atomic
attachment
attribute
audio
author
authen
automatically
available
avatar
avoid
await
awesome
aws
backdrop
background
backward
badge
banner
based
basic
beans
because
before
begin
behind
being
below
between
beware
binary
binding
binds
blah
black
blank
bless
block
boost
bool
border
bottle
bottom
bound
brain
branch
brand
break
breeze
brief
broker
browse
buffer
build
bundle
button
byte
cache
calendar
callback
called
caller
calls
camel
cancel
cannot
canvas
capacity
capab
carat
carousel
cascade
cases
catalog
catch
categories
category
cause
center
certificate
chain
change
channel
chapter
character
chart
check
chevron
child
choices
chomp
choose
chosen
chrome
chunk
circle
clang
class
clean
clear
click
client
clock
clone
close
closure
cloud
cocoa
coding
collapse
collect
color
column
command
comment
commit
common
compact
compare
compilation
complete
completion
component
components
compute
condensed
condition
config
confirm
connect
consists
console
constant
constraints
consumer
contact
contain
content
context
continue
control
convenience
convert
copy
cookie
coordinator
corner
correct
could
count
course
cover
create
creature
credential
cron
criteria
croak
cross
cubic
curl
current
custom
danger
darken
dashboard
dashed
data
declaration
declared
decod
decoration
default
deferred
define
definition
delay
delegate
delete
delivery
delta
demo
dependency
dependent
depth
describe
description
designer
desktop
destination
destroy
detail
development
device
devise
diagnostic
dictionary
different
digest
direct
disable
dismiss
dispatch
display
disposable
dispose
disposing
distance
distribute
distribution
doctrine
document
domain
dotted
double
download
draft
driver
dumps
duration
during
dword
dynamic
easing
eclipse
editing
editor
effect
either
elastic
element
email
empty
enable
encod
encrypt
engine
enrollment
ensure
entity
entries
entry
environment
equal
equals
erase
error
event
example
except
exclude
execute
exist
expand
expect
explode
expir
export
exposed
expression
extend
extension
external
extra
faces
factory
failed
failure
false
family
feature
federate
feedback
fetch
field
figure
file
files
filename
filter
finagle
final
finish
first
fixed
fixture
flags
flash
float
floor
fluid
flush
focus
folder
follow
footer
force
format
forms
formula
forum
forward
found
fragment
frame
freeze
friend
fulfill
function
furnished
future
gallery
gateway
generate
generator
generic
geometry
getter
get(
given
github
gitlab
global
graphics
green
group
grunt
guard
handle
header
heading
height
hello
helper
hidden
highlight
history
holders
hooks
horizontal
hours
hover
http
html
icons
ignore
image
immediately
immutable
implemented
import
include
index
indicator
inference
infinite
info
inherit
inherited
initial
inject
inner
input
insert
inside
inspect
install
instance
instead
intent
interaction
intercept
interface
internal
interrupt
intro
invalid
inverted
invoke
isolate
issue
item
iterat
itself
java
justified
justify
key
label
labels
lambda
language
large
launch
layer
layout
leader
least
legend
length
letter
level
library
light
limit
linear
lines
links
linux
listener
little
loaded
loading
loads
local
location
logger
login
logon
loose
lower
machine
makes
manage
mapping
marathon
margin
mark
master
match
material
matrix
maximum
means
measure
media
medium
member
memory
message
meteor
method
methods
metro
middle
might
minus
minutes
missing
mixed
mobile
model
modified
module
moment
month
mount
mouse
multiple
mutating
name
native
navigation
needed
needs
network
neutral
neutron
never
nexus
nodes
none
normal
notes
nothing
notice
notification
null
number
oauth
object
oblique
observe
observer
occurs
offline
offset
often
openssl
operation
operator
option
oracle
orange
orbit
order
orientation
origin
organis
other
outer
outline
overflow
override
overview
owner
package
packet
padding
pager
pages
palette
panel
paper
param
parent
parse
partial
parts
passed
passing
passcode
passphrase
password
patch
paths
pattern
pause
peer
payload
payment
pending
people
percent
perform
performance
persistence
person
perspective
phone
picker
pills
pipeline
pixels
place
placement
plain
platform
player
point
pool
policy
portal
portfolio
position
possible
posts
power
precedence
preference
prefix
preparation
prepare
presence
present
pressed
preview
previous
price
primary
print
priority
private
problem
process
produce
product
profile
program
progress
project
promise
properties
property
props
protected
protocol
prototype
provide
proxy
public
publish
purchase
purple
queri
query
question
queue
radio
radius
rails
raise
raises
random
range
react
reader
readonly
readme
ready
really
realm
reason
reboot
receive
recommended
record
recreated
redirect
reference
reflect
refresh
regenerated
region
regist
reject
related
relation
relative
release
reload
remarks
remote
remove
render
repeat
replace
replica
reply
report
repository
representing
request
requests
require
rescue
reserved
reset
resolution
resolve
resource
response
responsible
responsive
restart
restriction
result
resume
retain
return
reveal
reverse
right
ripple
roles
rotate
round
route
rudder
rules
runner
running
sample
scale
scanner
scene
scenario
scope
score
screen
script
scroll
sealed
search
second
secret
section
secure
security
segue
select
sender
sending
sequel
sequence
series
serial
server
service
session
setting
setter
setup
sha256
sha1
sha2
sha224
sha512
shadow
shallow
shape
share
shift
short
should
showing
shown
shutdown
sidebar
signature
sign
similar
simple
since
single
sites
size
sizing
sleep
slice
slick
slide
small
smart
snapshot
social
socket
solid
sorted
source
space
spaces
spacing
spark
speak
special
specific
specified
specify
specs
speed
spell
spinner
split
spray
square
stack
start
stash
state
static
stats
status
steps
sticky
storage
store
strategy
stream
stretch
strict
string
strip
stroke
strong
struct
stubs
student
stuff
style
subject
submit
subscriptions
subtitle
success
suite
summary
super
support
swift
swing
switch
symbol
synchronized
synthesize
system
table
tablet
target
tasks
teacher
team
temp
terms
test
texture
their
theme
there
these
thick
those
thread
three
thrift
through
throw
thrown
throws
thumb
thumbs
ticket
timeline
timer
times
timing
title
today
token
tools
topic
total
touch
trace
track
trait
trans
tagword
triangle
trigger
true
trust
trying
tween
type
typically
uint
unavailable
under
uniform
union
unique
universe
unknown
unless
unlock
unsigned
unstable
until
update
upload
used
username
using
usually
valid
value
variable
variant
vector
verbose
verify
version
vertical
video
views
virtual
visibility
visible
visit
volatile
void
volume
wallet
warning
watch
waves
weight
whatever
where
whether
which
while
white
width
window
with
within
without
world
would
wrapper
write
written
xxxxx
yellow
yield
your
zeros
.json
.xml
================================================
FILE: credsweeper/common/keyword_pattern.py
================================================
import re
class KeywordPattern:
"""Pattern set of keyword types"""
directive = r"(?P<directive>(?:" \
r"(?:[#%]define|define(?=(\s|\\{1,8}[tnr])*\()|%global)" \
r"(?:\s?\(|\s|\\{1,8}[tnr]){1,8}|\bset(?=\b|\w*(\s|\\{1,8}[tnr])*\()" \
r"))?"
key_left = r"(?:\\[nrt]|(\\\\*u00|%)[0-9a-f]{2}|\s)*" \
r"(?P<variable>(([\"'`]{1,8}[^:=\"'`}<>\\/&?]*|[^:=\"'`}<>\s()\\/&?;,%]*)"
# keyword will be inserted here
key_right = r"[^%:=\"'`<>({?!&;\n]{0,80}" \
r")" \
r"(&(quot|apos|#3[49]);|(\\\\*u00|%)[0-9a-f]{2}|[\"'`])*" \
r")" # <variable>
separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|>|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
r"|(?(directive)(,|\\t|\s|\((?!\))){1,80}|%3d))" \
r"(\s|\\{1,8}[tnr])*"
# might be curly, square or parenthesis with words before
wrap = r"(?P<wrap>(" \
r"((\s|\\{1,8}[tnr]|new|byte|char|string|\[\]){1,8})?" \
r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
r"([0-9a-z_.]|::|-(>|>))*" \
r"\s*" \
r"(\[(?!\])|\((?!\))|\{(?!\}))" \
r"(\s|\\{1,8}[tnr])*" \
r"(?(get)('[^']{1,31}'|\"[^\"]{1,31}\")\s*(,|\)\s*or)\s*|)" \
r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
r"){1,8})?"
string_prefix = r"(((b|r|br|rb|u|t|f|rf|fr|l|@)(?=(\\*[\"'`])))?"
left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([\"'`]|&(quot|apos|#3[49]);)){1,4}))?"
# Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential
auth_keywords = r"(\s?(oauth|bot|basic|bearer|apikey|accesskey|ssws|ntlm|token)\s)?"
value = r"(?P<value>" \
r"(?(value_leftquote)" \
r"(" \
r"(?!(?P=value_leftquote))" \
r"(?(esq)((?!(?P=esq)([\"'`]|&(quot|apos|#3[49]);)).)|((?!(?P=value_leftquote)).)))" \
r"|" \
r"(?!&(quot|apos|#3[49]);)" \
r"(\\{1,8}([ tnr]|[^\s\"'`])" \
r"|" \
r"(?P<url_esc>%[0-9a-f]{2})" \
r"|" \
r"(?(url_esc)[^\s\"'`,;\\&]|[^\s\"'`,;\\])" \
r")" \
r"){4,8000}" \
r"|" \
r"(<[^>]{4,8000}>)" \
r"|" \
r"(\$?\({1,3}[^)]{4,8000}\){1,3})" \
r"|" \
r"(\$?\{{1,3}[^}]{4,8000}\}{1,3})" \
r"|" \
r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})" \
r")" # <value>
right_quote = r"(?(value_leftquote)" \
r"(?P<value_rightquote>(?<!\\)(?P=value_leftquote)|\\$|(?<=[0-9a-z+_/-])$)" \
r"|" \
r"(?(wrap)(\]|\)|\}|;|\\|$))" \
r")"
@classmethod
def get_keyword_pattern(cls, keyword: str) -> re.Pattern:
"""Returns compiled regex pattern"""
expression = ''.join([ #
cls.directive, #
cls.key_left, #
fr"(?P<keyword>{keyword})", # named group required
cls.key_right, #
cls.separator, #
cls.wrap, #
cls.string_prefix, #
cls.left_quote, #
cls.auth_keywords, #
cls.value, #
cls.right_quote, #
])
return re.compile(expression, flags=re.IGNORECASE | re.DOTALL)
================================================
FILE: credsweeper/common/morpheme_checklist.txt
================================================
../
.com
.org
/bin
/dev
/etc
/lib
/mnt
/opt
/sbin
/srv
/tmp
/usr
/var
000
111
14159265
18284590
222
333
444
555
65358979
666
71828182
777
80211
888
999
_ack_
_arg
_cbc
_cfg
_clk
_con
_cpu
_dbg_
_dev
_dir
_div
_dma
_drv_
_env
_err
_eth
_ext
_fig
_fmt_
_ghz
_i2c_
_id_
_if
_in
_io_
_irq
_is
_it
_jpg
_khz
_lan
_led_
_mem
_mhz
_mux
_num
_on
_op_
_or_
_pcm_
_pin
_pre
_pro
_pwr
_ram
_reg
_req
_ret
_rev
_rgb
_rsa_
_rw_
_rx_
_sdr_
_src
_to
_tx_
_un
_up
_val
_vol
_wap
_wep
_wpa
_x64
abel
abilit
able
ably
abort
above
absolut
abstra
academ
acce
acon
activ
actor
actual
actur
adapt
add
ader
adjust
admi
adver
advise
advisor
aes256
affect
after
aggre
agno
aight
aign
akeup
alert
algo
alias
alice
align
aling
all
alpha
alter
altit
amazon
ample
anali
analy
ance
anchor
anci
ancy
and
anguage
angular
anima
anomaly
antenna
anth
anti
any
apache
api
app
aram
arch
are
arg_
argc
args
argv
arian
arker
arpa
array
arro
art
ascii
ash
asia
asic
ask
assembl
assert
assoc
asure
asyn
ately
athon
atic
atil
ating
atlas
atomic
ator
attach
attack
attend
attr
atus
audio
audit
auri
auten
auth
auto
aux
avail
avatar
aver
awesom
axis
azure
back
badge
balanc
bank
bann
bar
bas
batch
batt
beac
beans
beat
beef
begin
behav
behind
being
belo
benutz
best
bias
big
bill
bin/
binar
bind
bio
bipol
bit
bixby
black
blan
bless
blic
blish
blob
blood
blue
board
bob
body
book
bool
boost
boot
boss
bot
boun
box
branch
break
breeze
bridge
brief
brit
bro
bssid
buck
buf
bugs
build
builtin
bular
bulk
bull
bund
burst
bus
butor
button
byte
cache
calen
camel
camp
can
capab
capac
cape
captu
carat
card
carri
carry
cascade
case
cast
catch
categor
cative
cbc_
ccele
ccept
ccess
ceed
celebr
cell
cenar
cense
cent
cert
cessor
cfg_
chacha
chain
change
channel
chant
chapter
char
check
chevron
child
chin
chip
choices
chomp
choose
chosen
chrom
chron
chunk
ciat
cilla
cinema
circle
cirrus
city
cket
claims
clan
class
clean
clear
click
clien
clip
clk_
close
closure
cloud
clud
clus
cmd
cocoa
code
codi
cogn
collaps
collect
color
column
comb
comi
comm
compa
compet
compil
compl
compo
compr
conc
conden
conf
connect
consist
console
const
contact
contai
conten
continu
contra
contri
contro
conven
conver
cookie
coord
copy
core
corn
correct
correl
corres
corru
cost
could
count
course
court
cove
cpu_
crac
creat
cred
cript
crit
croak
cron
cross
crypt
crystal
ctive
ctrl
cubic
cue
cultur
cumulat
curr
curs
custom
cut
cyan
cycle
daily
danger
darken
darwin
das
data
date
davinci
day
dead
debug
decimal
decod
def
delay
dele
deliv
delta
demo
denc
dens
dent
depen
deploy
depo
depth
derive
desc
desired
desktop
dest
detach
detai
detect
dev/
dev_
develop
device
devise
diag
dial
dicat
dict
did
dif
dig
dimen
ding
diod
dir_
direct
disab
disc
disk
dismi
dispos
dissoc
dist
ditor
dity
div_
divid
dma_
dock
docs
doct
does
dog
dot
double
doubt
draft
dragon
drift
drive
droid
drop
dul
dummy
dump
dup
durin
dust
dvb
dynamic
dynamo
eadbee
easin
easy
ecdhe
ecdsa
ecret
ected
ector
ectron
eded
edge
edit
edium
eeprom
effect
egory
elect
eless
emai
emi
empty
enabl
ence
enclave
encod
encryp
ency
ende
eness
engine
ength
enhanc
ensure
ente
entit
entr
enum
env_
equal
erase
erial
ericsson
err_
error
erse
ersi
ertise
esam
esses
estima
esult
etc/
eth_
etic
eting
eutron
eval
evan
event
exam
excee
except
exclu
exist
exit
expan
expe
expir
expl
expo
expr
ext_
exten
exter
extra
exynos
face
fact
fail
false
famil
far
fast
fault
favor
featu
fee
ferr
fetch
fied
field
fifo
fig_
figur
file
fill
filter
finagle
final
find
fine
fire
firm
first
fix
flas
flat
fleet
flick
flix
float
flood
floor
fluent
fluid
flush
focus
foo
for
fossil
foun
fpga
frac
frame
free
freq
friend
from
front
frozen
fujitsu
fulf
full
func
furn
futu
gain
game
gang
gate
gative
gauss
gen
geo
gest
get
ghbor
ghz_
gian
ging
git
given
global
gobble
good
google
grab
grace
gram
grant
graph
grave
gray
greater
green
gregat
gregor
gress
grid
gro
grpc
guard
guest
guid
guish
ha1-
ha1_
ha2-
ha256
ha2_
ha394
ha512
hack
half
hard
has
have
having
havior
hdmi
head
health
hear
height
hello
help
herm
heroku
hetero
hex
hiber
hidden
hierar
high
histo
hola
home
hook
horizon
host
houn
hours
html
http
hub
human
humid
hybrid
iabl
ical
icon
id_rsa
iden
idle
ieee
ient
if_
ificat
ignore
illega
ilor
image
imated
imer
impact
imple
improve
in_
inclu
incom
indemni
index
indic
indiv
iness
info
infra
ingle
ings
ingular
inherit
ini
injec
inn
insert
insig
instead
int
inval
invent
inver
invoke
ion
ipv4
ipv6
iron
irq_
is_
ished
iso_
isolat
issue
it_
item
iter
ities
iting
itiv
ivate
ixed
ixtu
ixup
ized
izer
jabber
java
ject
jira
jitsi
job
join
journal
jpeg
jpg_
json
jump
justif
kafka
kerberos
kernel
key
khz_
kill
kind
kinesis
kirk
know
knox
kris
lab
lag
lambda
lan_
lang
large
larval
last
late
latit
lative
launch
layer
lazy
lead
leaf
least
leek
left
legacy
legal
lend
leng
lens
let
level
lexeme
lexic
lianc
liant
lib/
library
licens
lies
life
lift
light
lim
line
lingu
link
linux
list
lite
little
lity
live
lled
llup
lness
load
local
lock
log
long
look
loop
loose
lost
low
luate
lysis
mac
magic
mail
main
maker
makes
manage
manual
manuf
map
marat
margin
mark
mary
master
match
mater
matrix
max
mber
mbin
mbler
mean
measur
medi
medus
meet
mem_
memb
memo
ment
menu
merc
merge
messag
meta
meteor
method
metr
mhz_
micha
micro
middle
might
migrat
millis
min
mirror
misc
miss
mit
mix
mmon
mmun
mnt/
mobile
mock
mode
modi
modu
monitor
month
morp
mory
mote
motor
mount
move
mpeg
multi
mutat
mute
mux_
nalyz
name
nary
nates
nativ
nced
ncept
ncies
ndom
ndow
ned
need
neigh
neo4j
ner
net
neutr
never
new
next
nexus
nielsen
ning
nipp
nish
nism
node
non
nope
norm
not
nsive
ntal
nter
nting
null
num_
numb
numer
nuous
nvram
obj
oblique
occur
ocean
ocess
oder
off
often
oken
oker
old
olygon
on_
oncat
one
onfig
only
ookup
open
opt/
opted
opti
oracle
orbi
order
ordinar
ores
organ
ories
origin
orithm
ormat
orph
otorola
ottle
ound
ously
out
over
own
pack
page
pair
pale
panel
par
pass
patch
path
patte
paw
pci
pcmcia
peer
penalt
pend
people
per
pets
phore
photon
phrase
phys
pick
pills
ping
pipe
pixel
pkcs1
pkcs8
place
plain
plan
play
plex
plic
plod
plor
plug
plus
poin
polar
polic
poll
poly
pond
pons
pool
poon
pop
port
pose
posit
possib
post
poun
power
pre_
pred
prefi
prese
press
prev
price
prim
princip
prior
priv
pro_
probe
problem
proc
prod
prof
prog
proj
promise
prompt
prop
prote
proto
provi
prox
pseudo
pster
psycho
pub
pull
purcha
purple
push
put
pwr_
python
qos
quantum
queri
query
queue
quick
quota
quote
rabbi
rack
radar
radeon
radio
radius
rage
rails
rain
raise
ram_
rammar
range
rank
ransit
rate
rati
raw
rcept
rchite
rchive
reached
react
read
real
reason
receive
recipe
recog
recom
record
redact
redir
redisson
refer
reflect
refresh
reg_
regexp
regio
regist
regs
regul
rejec
relat
release
reli
remar
remo
rend
rent
repeat
repl
repo
repre
req_
request
require
resiz
resolv
resp
resul
ret_
retai
retriev
return
rev_
revea
revel
reven
rever
revisio
revoke
rgb_
rick
ride
right
rimar
rime
rine
ring
ripple
rish
risk
ritte
rity
river
rize
road
role
roll
room
root
ropo
rose
rotat
rotocol
rottl
rough
roun
roup
row
rroga
rrupt
rticle
rudder
rule
run
rxtx
sabl
sage
salt
same
sampl
sams
saves
savi
scala
scale
scali
scen
sched
schem
scipl
scont
scope
scram
screen
scret
scri
scro
seal
searc
seccomp
second
secre
sect
secur
seed
seek
seen
segue
sein
self
sema
send
sens
sent
seque
seria
series
serv
sessio
set
sever
sex
sha1
sha2
sha3
sha5
shadow
shape
shift
ship
shoot
short
shot
should
show
shut
sian
sible
side
sight
sign
similar
simpl
simul
since
sine
sing
sip
sites
size
sizi
skip
slack
slas
slave
sleep
slice
slick
slide
slot
smar
smooth
snap
sness
sniff
snip
social
sock
soft
solid
solve
some
sony
sort
soun
source
space
spacing
speak
spec
speed
spell
spent
spin
split
spot
spray
sql
src_
srv/
ssh
ssl
stack
stan
star
stas
stat
stdin
steer
stem
sten
step
stic
sting
ston
stop
stor
strai
stream
stren
stretch
strob
stroke
strong
struct
stubs
stude
studio
stuff
style
sub
succee
succes
such
suffi
suite
sum
sun
supe
supp
surro
suspe
swap
swift
swing
switch
swizz
symbol
sync
synth
sys
tabl
tag
tail
tain
tape
tate
tative
teacher
teams
tech
tele
tell
temp
tent
tera
term
ternal
tery
test
text
than
that
the
thick
thing
this
thor
those
threat
three
thrift
thro
thumb
tial
tick
tics
tifier
time
timi
tio
tish
title
titud
tizen
tmp/
to_
tod
toke
tolera
tomcat
too
topic
tory
torial
total
touch
tour
trace
tract
traffic
trait
tral
trans
treat
trial
triang
tribut
tric
tries
trigger
trip
trol
trouble
troy
true
trust
try
tter
tune
tuni
tunnel
ture
tween
twenty
twitt
txrx
txt
type
typo
ultima
under
unfo
unic
unio
unique
unit
univ
unless
unpre
until
unzip
up_
updat
upgrade
url
usa
usb
use
usin
usr/
uster
util
val_
valid
valu
var/
vari
vault
vect
veeva
vendor
verbose
verify
vers
vert
very
video
view
viol
virtual
visibl
visit
visual
vita
vocab
voice
void
vol_
volat
volume
vuln
wait
wake
wan
wap_
ward
warm
warn
watch
wave
way
weak
web
week
weight
well
wep_
when
where
which
while
white
wide
widge
width
will
wind
wire
with
wlan
wood
word
work
world
wort
would
wow
wpa_
wrap
writ
wrong
x64_
xpect
xxx
year
yello
yield
you
zeppelin
zero
zigbee
zing
zona
zorro
================================================
FILE: credsweeper/config/__init__.py
================================================
================================================
FILE: credsweeper/config/config.py
================================================
import re
from typing import Dict, List, Optional, Set, Any
from humanfriendly import parse_size
from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN
from credsweeper.utils.util import Util
class Config:
"""Class that contain configs that can be changed by user."""
NOT_ALLOWED_PATH = [
".*\\.min\\.js", ".*message.*\\.properties", ".*locale.*\\.properties", ".*makefile.*", ".*package-lock\\.json",
".*package\\.json", ".*\\.css", ".*\\.scss"
]
def __init__(self, config: Dict[str, Any]) -> None:
self.exclude_patterns: List[re.Pattern] = [re.compile(pattern) for pattern in config["exclude"]["pattern"]]
self.exclude_paths: List[str] = config["exclude"]["path"]
self.exclude_containers: List[str] = config["exclude"]["containers"]
self.exclude_documents: List[str] = config["exclude"]["documents"]
self.exclude_extensions: List[str] = config["exclude"]["extension"]
self.exclude_lines: Set[str] = set(config["exclude"].get("lines", []))
self.exclude_values: Set[str] = set(config["exclude"].get("values", []))
self.source_extensions: List[str] = config["source_ext"]
self.source_quote_ext: List[str] = config["source_quote_ext"]
self.find_by_ext_list: List[str] = config["find_by_ext_list"]
self.bruteforce_list: List[str] = config["bruteforce_list"]
self.check_for_literals: bool = config["check_for_literals"]
self.not_allowed_path_pattern = re.compile(f"{Util.get_regex_combine_or(self.NOT_ALLOWED_PATH)}",
flags=re.IGNORECASE)
self.use_filters: bool = config["use_filters"]
self.line_data_output: List[str] = config["line_data_output"]
self.candidate_output: List[str] = config["candidate_output"]
self.find_by_ext: bool = config["find_by_ext"]
self.size_limit: Optional[int] = parse_size(config["size_limit"]) if config["size_limit"] is not None else None
self.pedantic: bool = bool(config["pedantic"])
self.depth: int = int(config["depth"])
self.doc: bool = config["doc"]
self.severity: Severity = Severity.get(config.get("severity"))
self.max_url_cred_value_length: int = int(config["max_url_cred_value_length"])
self.max_password_value_length: int = int(config["max_password_value_length"])
# Trim exclude patterns from space like characters
self.exclude_lines = set(line.strip() for line in self.exclude_lines)
self.exclude_values = set(line.strip() for line in self.exclude_values)
self.pattern_len = config.get("pattern_len", DEFAULT_PATTERN_LEN)
================================================
FILE: credsweeper/credentials/__init__.py
================================================
================================================
FILE: credsweeper/credentials/augment_candidates.py
================================================
from typing import List
from credsweeper.credentials.candidate import Candidate
def augment_candidates(candidates: List[Candidate], new_candidates: List[Candidate]):
"""
Augments candidates with new_candidates if value of line data is not present in the candidates
Args:
candidates: [IN/OUT] list of candidates to be augmented
new_candidates: [IN] list with new candidates
"""
if not new_candidates:
return
found_values = set(line_data.value for candidate in candidates #
for line_data in candidate.line_data_list)
for new_candidate in new_candidates:
for line_data in new_candidate.line_data_list:
if line_data.value not in found_values:
candidates.append(new_candidate)
break
================================================
FILE: credsweeper/credentials/candidate.py
================================================
import copy
import re
from json.encoder import py_encode_basestring_ascii
from typing import Any, Dict, List, Optional
from credsweeper.common.constants import Severity, Confidence
from credsweeper.config.config import Config
from credsweeper.credentials.line_data import LineData
class Candidate:
"""Candidates that can be credentials.
Class contains list of LineData, some attributes from Rule object, and config
Parameters:
line_data_list: List of LineData
patterns: Regular expressions that can be used for detection
rule_name: Name of Rule
severity: critical/high/medium/low
confidence: strong/moderate/weak
config: user configs
use_ml: Whether the candidate should be validated with ML. If not - ml_probability is set None
"""
DUMMY_PATTERN = re.compile(r"^")
def __init__(self,
line_data_list: List[LineData],
patterns: List[re.Pattern],
rule_name: str,
severity: Severity,
config: Optional[Config] = None,
use_ml: bool = False,
confidence: Confidence = Confidence.MODERATE) -> None:
self.line_data_list = line_data_list
self.patterns = patterns
self.rule_name = rule_name
self.severity = severity
self.config = config
self.use_ml = use_ml
self.confidence = confidence
# None - ML is not applicable or not processed yet; float - the ml decision above ml_threshold
# Note: -1.0 is possible too for some activation functions in ml model, so let avoid negative values
self.ml_probability: Optional[float] = None
def compare(self, other: 'Candidate') -> bool:
"""Comparison method - checks only result of final cred"""
if self.rule_name == other.rule_name \
and self.severity == other.severity \
and self.confidence == other.confidence \
and self.use_ml == other.use_ml \
and self.ml_probability == other.ml_probability \
and len(self.line_data_list) == len(other.line_data_list):
for i, j in zip(self.line_data_list, other.line_data_list):
if i.compare(j):
continue
break
else:
# all line_data are equal
return True
return False
@staticmethod
def _encode(value: Any) -> Any:
"""Encode value to the base string ascii
Args:
value: Any type of value to be encoded
"""
if isinstance(value, str):
return py_encode_basestring_ascii(value)
return value
def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
"""Represent candidate with subtext or|and hashed values"""
return f"rule: {self.rule_name}" \
f" | severity: {self.severity.value}" \
f" | confidence: {self.confidence.value}" \
f" | ml_probability: {self.ml_probability}" \
f" | line_data_list: [{', '.join([x.to_str(subtext, hashed) for x in self.line_data_list])}]"
def __str__(self):
return self.to_str()
def __repr__(self):
return self.to_str(subtext=True)
def to_json(self, hashed: bool, subtext: bool) -> Dict:
"""Convert credential candidate object to dictionary.
Return:
Dictionary object generated from current credential candidate
"""
full_output = {
"patterns": [pattern.pattern for pattern in self.patterns],
"rule": self.rule_name,
"severity": self.severity.value,
"confidence": self.confidence.value,
"use_ml": self.use_ml,
"ml_probability": self.ml_probability,
# put the array to end to make json more readable
"line_data_list": [line_data.to_json(hashed, subtext) for line_data in self.line_data_list],
}
if self.config is not None:
reported_output = {k: v for k, v in full_output.items() if k in self.config.candidate_output}
else:
reported_output = full_output
return reported_output
def to_dict_list(self, hashed: bool, subtext: bool) -> List[dict]:
"""Convert credential candidate object to List[dict].
Return:
List[dict] object generated from current credential candidate
"""
reported_output = []
json_output = self.to_json(hashed, subtext)
refined_data = copy.deepcopy(json_output)
del refined_data["line_data_list"]
for line_data in json_output["line_data_list"]:
line_data.update(refined_data)
for key in line_data.keys():
line_data[key] = self._encode(line_data[key])
reported_output.append(line_data)
return reported_output
@classmethod
def get_dummy_candidate(cls, config: Config, file_path: str, file_type: str, info: str, rule_name: str):
"""Create dummy instance to use in searching file by extension"""
return cls( #
line_data_list=[LineData(config, '', -1, 0, file_path, file_type, info, cls.DUMMY_PATTERN)],
patterns=[cls.DUMMY_PATTERN], #
rule_name=rule_name, #
severity=Severity.INFO, #
config=config, #
confidence=Confidence.WEAK)
================================================
FILE: credsweeper/credentials/candidate_group_generator.py
================================================
from typing import Dict, List, Tuple
from credsweeper.credentials.candidate import Candidate
from credsweeper.credentials.candidate_key import CandidateKey
class CandidateGroupGenerator:
"""CandidateGroupGenerator"""
def __init__(self) -> None:
self.grouped_candidates: Dict[CandidateKey, List[Candidate]] = {}
@property
def grouped_candidates(self) -> Dict[CandidateKey, List[Candidate]]:
"""property getter"""
return self._grouped_candidates
@grouped_candidates.setter
def grouped_candidates(self, grouped_candidates: Dict[CandidateKey, List[Candidate]]) -> None:
"""property setter"""
self._grouped_candidates = grouped_candidates
def __contains__(self, key: CandidateKey) -> bool:
return key in self.grouped_candidates
def __getitem__(self, key) -> List[Candidate]:
return self.grouped_candidates[key]
def __setitem__(self, key: CandidateKey, value: List[Candidate]) -> None:
self.grouped_candidates[key] = value
def __len__(self) -> int:
return len(self.grouped_candidates)
def items(self) -> List[Tuple[CandidateKey, List[Candidate]]]:
"""getter"""
return list(self.grouped_candidates.items())
================================================
FILE: credsweeper/credentials/candidate_key.py
================================================
from typing import Tuple
from credsweeper.credentials.line_data import LineData
class CandidateKey:
"""Class used to identify credential candidates.
Candidates that detected same value on same string in a same file would have identical CandidateKey
"""
def __init__(self, line_data: LineData):
self.path: str = line_data.path
self.line_num: int = line_data.line_num
self.value_start: int = line_data.value_start
self.value_end: int = line_data.value_end
self.key: Tuple[str, int, int, int] = (self.path, self.line_num, self.value_start, self.value_end)
self.__line = line_data.line
def __hash__(self):
return hash(self.key)
def __eq__(self, other):
return self.key == other.key
def __ne__(self, other):
return not bool(self == other)
def __repr__(self) -> str:
return f"{self.key}:{self.__line}"
================================================
FILE: credsweeper/credentials/credential_manager.py
================================================
import logging
from multiprocessing import Manager
from typing import List, Dict, Tuple
from credsweeper.credentials.candidate import Candidate
from credsweeper.credentials.candidate_group_generator import CandidateGroupGenerator, CandidateKey
logger = logging.getLogger(__name__)
class CredentialManager:
"""The manager allows you to store, add and delete separate credit candidates."""
def __init__(self) -> None:
self.candidates: List[Candidate] = list(Manager().list())
def clear_credentials(self) -> None:
"""Clear credential candidates stored in the manager."""
self.candidates.clear()
def len_credentials(self) -> int:
"""Get number of credential candidates stored in the manager.
Return:
Non-negative integer
"""
return len(self.candidates)
def get_credentials(self) -> List[Candidate]:
"""Get all credential candidates stored in the manager.
Return:
List with all Candidate objects stored in manager
"""
return self.candidates
def set_credentials(self, candidates: List[Candidate]) -> None:
"""Remove all current credentials candidates from the manager and add new credentials.
Args:
candidates: List with candidates to replace current candidates in the manager
"""
self.candidates = candidates
def add_credential(self, candidate: Candidate) -> None:
"""Add credential candidate to the manager.
Args:
candidate: credential candidate to be added
"""
self.candidates.append(candidate)
def remove_credential(self, candidate: Candidate) -> None:
"""Remove credential candidate from the manager.
Args:
candidate: credential candidate to be removed
"""
self.candidates.remove(candidate)
def purge_duplicates(self) -> int:
"""Purge duplicates candidates which may appear in overlaps during long line scan.
Returns: number of removed duplicates
"""
candidates_dict: Dict[Tuple[str, str, str, int, int, int, int, int, int, int], Candidate] = {}
before = len(self.candidates)
for i in self.candidates:
ld = i.line_data_list[0]
candidate_key = (
i.rule_name, #
ld.path, #
ld.info, #
ld.line_pos, #
ld.variable_start, #
ld.variable_end, #
ld.separator_start, #
ld.separator_end, #
ld.value_start, #
ld.value_end)
if candidate_key in candidates_dict:
# check precisely - compare with the values
candidate_dict = candidates_dict[candidate_key]
if not candidate_dict.compare(i):
ld_ = candidate_dict.line_data_list[0]
logger.warning("Check %s and %s", (ld_.variable, ld_.value), (ld.variable, ld.value))
else:
candidates_dict[candidate_key] = i
self.candidates = list(candidates_dict.values())
after = len(self.candidates)
return before - after
def group_credentials(self) -> CandidateGroupGenerator:
"""Join candidates that reference same secret value in the same line.
Candidate can belong to two groups in the same time if it has more than one LineData object inside
Return:
Contain dictionary of [path, line_num, value] -> credential candidates list
"""
groups = CandidateGroupGenerator()
for credential_candidate in self.get_credentials():
for line_data in credential_candidate.line_data_list[:1]:
# Match by file path+line num+value. Value required so two different credentials still be
# processed independently
candidate_key = CandidateKey(line_data)
if candidate_key in groups:
groups[candidate_key].append(credential_candidate)
else:
groups[candidate_key] = [credential_candidate]
return groups
================================================
FILE: credsweeper/credentials/line_data.py
================================================
import contextlib
import hashlib
import re
import string
from functools import cached_property
from typing import Any, Dict, Optional, Tuple
from colorama import Fore, Style
from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
from credsweeper.config.config import Config
from credsweeper.utils.util import Util
class LineData:
"""Object to treat and store scanned line related data.
Parameters:
key: Optional[str] = None
line: string variable, line
line_num: int variable, number of line in file
path: string variable, path to file
file_type: string variable, extension of file '.txt'
info: additional info about how the data was detected
pattern: regex pattern, detected pattern in line
separator: optional string variable, separators between variable and value
separator_start: optional variable, separator position start
value: optional string variable, detected value in line
variable: optional string variable, detected variable in line
"""
quotation_marks = ('"', "'", '`')
comment_starts = ("//", "* ", "# ", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
bash_param_split = re.compile("\\s+(\\-|\\||\\>|\\w+?\\>|\\&)")
line_endings = re.compile(r"\\{1,8}[nr]")
# https://en.wikipedia.org/wiki/Percent-encoding
url_percent_split = re.compile(r"%(21|23|24|26|27|28|29|2a|2b|2c|2f|3a|3b|3d|3f|40|5b|5d)", flags=re.IGNORECASE)
url_unicode_split = re.compile(r"\\u00(0000)?(21|23|24|26|27|28|29|2a|2b|2c|2f|3a|3b|3d|3f|40|5b|5d)",
flags=re.IGNORECASE)
# some symbols e.g. double quotes cannot be in URL string https://www.ietf.org/rfc/rfc1738.txt
# \ - was added for case of url in escaped string \u0026amp; - means escaped & in HTML
url_scheme_part_regex = re.compile(r"[0-9A-Za-z.-]{3}")
url_chars_not_allowed_pattern = re.compile(r'[\s"<>\[\]^~`{|}]')
url_value_pattern = re.compile(r'[^\s&;"<>\[\]^~`{|}]+[&;][^\s=;"<>\[\]^~`{|}]{3,80}=[^\s;&="<>\[\]^~`{|}]{1,80}')
variable_strip_pattern = string.whitespace + """,'"-;"""
INITIAL_WRONG_POSITION = -3
EXCEPTION_POSITION = -2
def __init__(
self, #
config: Config, #
line: str, #
line_pos: int, #
line_num: int, #
path: str, #
file_type: str, #
info: str, #
pattern: re.Pattern, #
match_obj: Optional[re.Match] = None) -> None:
self.config = config
self.line: str = line
self.line_pos: int = line_pos
self.line_num: int = line_num
self.path: str = path
self.file_type: str = file_type
self.info: str = info
self.pattern: re.Pattern = pattern
# do not store match object due it cannot be pickled with multiprocessing
# start - end position of matched object
self.value_start = LineData.INITIAL_WRONG_POSITION
self.value_end = LineData.INITIAL_WRONG_POSITION
self.key: Optional[str] = None
self.separator: Optional[str] = None
self.separator_start: int = LineData.INITIAL_WRONG_POSITION
self.separator_end: int = LineData.INITIAL_WRONG_POSITION
self.value: Optional[str] = None
self.variable: Optional[str] = None
self.variable_start = LineData.INITIAL_WRONG_POSITION
self.variable_end = LineData.INITIAL_WRONG_POSITION
self.value_leftquote: Optional[str] = None
self.value_rightquote: Optional[str] = None
# is set when variable & value are in URL for any source type
self.url_part = False
self.wrap = None
self._3d_escaped_separator = False
self.initialize(match_obj)
# the line is very useful for debug breakpoint
pass # pylint: disable=W0107
def compare(self, other: 'LineData') -> bool:
"""Comparison method - skip whole line and checks only when variable and value are the same"""
if self.path == other.path \
and self.info == other.info \
and self.line_num == other.line_num \
and self.value_start == other.value_start \
and self.variable == other.variable \
and self.value == other.value:
return True
return False
def initialize(self, match_obj: Optional[re.Match] = None) -> None:
"""Apply regex to the candidate line and set internal fields based on match."""
if not isinstance(match_obj, re.Match) and isinstance(self.pattern, re.Pattern):
match_obj = self.pattern.search(self.line, endpos=MAX_LINE_LENGTH)
if match_obj is None:
return
def get_group_from_match_obj(_match_obj: re.Match, group: str) -> Any:
with contextlib.suppress(Exception):
return _match_obj.group(group)
return None
def get_span_from_match_obj(_match_obj: re.Match, group: str) -> Tuple[int, int]:
with contextlib.suppress(Exception):
span = _match_obj.span(group)
return span[0], span[1]
return LineData.EXCEPTION_POSITION, LineData.EXCEPTION_POSITION
self.key = get_group_from_match_obj(match_obj, "keyword")
self.separator = get_group_from_match_obj(match_obj, "separator")
self.separator_start, self.separator_end = get_span_from_match_obj(match_obj, "separator")
self.value = get_group_from_match_obj(match_obj, "value")
self.value_start, self.value_end = get_span_from_match_obj(match_obj, "value")
self.variable = get_group_from_match_obj(match_obj, "variable")
self.variable_start, self.variable_end = get_span_from_match_obj(match_obj, "variable")
self.value_leftquote = get_group_from_match_obj(match_obj, "value_leftquote")
self.value_rightquote = get_group_from_match_obj(match_obj, "value_rightquote")
self.wrap = get_group_from_match_obj(match_obj, "wrap")
# percent encoded '=' in url
self._3d_escaped_separator = bool(self.separator) and "%3D" == self.separator.upper()
self.sanitize_value()
self.sanitize_variable()
def sanitize_value(self):
"""Clean found value from extra artifacts. Correct positions if changed."""
# process the quotation workaround before cached properties invocation
if not self.value_leftquote and not self.value_rightquote:
while self.value:
first_symbol_code = ord(self.value[0])
last_symbol_code = ord(self.value[-1])
if 0x2018 <= first_symbol_code <= 0x201B and 0x2018 <= last_symbol_code <= 0x201B:
self.value_leftquote = self.value_rightquote = "'"
self.value = self.value[:-1]
self.value_end -= 1
self.value = self.value[1:]
self.value_start += 1
elif 0x201C <= first_symbol_code <= 0x201F and 0x201C <= last_symbol_code <= 0x201F:
self.value_leftquote = self.value_rightquote = '"'
self.value = self.value[1:]
self.value_start += 1
self.value = self.value[:-1]
self.value_end -= 1
else:
break
if self.variable and self.value and not self.is_well_quoted_value:
# sanitize is actual step for keyword pattern only
_value = self.value
self.clean_url_parameters()
self.clean_bash_parameters()
self.clean_toml_parameters()
self.clean_tag_parameters()
if 0 <= self.value_start and 0 <= self.value_end and len(self.value) < len(_value):
start = _value.find(self.value)
self.value_start += start
self.value_end = self.value_start + len(self.value)
def check_url_part(self) -> bool:
"""Determines whether value is part of url like line"""
line_before_value = self.line[:self.value_start]
url_pos = -1
find_pos = 0
while find_pos < self.value_start:
# find rightmost pattern
find_pos = line_before_value.find("://", find_pos)
if -1 == find_pos:
break
url_pos = find_pos
find_pos += 3
# whether the line has url start pattern
self.url_part = 3 <= url_pos
self.url_part &= bool(self.url_scheme_part_regex.match(line_before_value, pos=url_pos - 3, endpos=url_pos))
self.url_part &= not self.url_chars_not_allowed_pattern.search(line_before_value, pos=url_pos + 3)
self.url_part |= self.line[self.variable_start - 1] in "?&" if 0 < self.variable_start else False
self.url_part |= bool(self.url_value_pattern.match(self.value))
self.url_part |= self._3d_escaped_separator
return self.url_part
def clean_url_parameters(self) -> None:
"""Clean url address from 'query parameters'.
If line seem to be a URL - split by & character.
Variable should be right most value after & or ? ([-1]). And value should be left most before & ([0])
"""
# skip sanitize in case of URL credential rule - the regex is mature enough
if self.check_url_part() and not self.variable.endswith("://"):
# all checks have passed - line before the value may be a URL
self.variable = self.variable.rsplit('&')[-1].rsplit('?')[-1].rsplit(';')[-1]
self.value = self.value.split('&', maxsplit=1)[0].split(';', maxsplit=1)[0].split('#', maxsplit=1)[0]
self.value = self.url_unicode_split.split(self.value)[0]
if self._3d_escaped_separator:
self.value = self.url_percent_split.split(self.value)[0]
def clean_bash_parameters(self) -> None:
"""Split variable and value by bash special characters, if line assumed to be CLI command."""
if self.variable.startswith("-"):
value_spl = self.bash_param_split.split(self.value)
# If variable name starts with `-` (usual case for args in CLI)
# and value can be split by bash special characters
if len(value_spl) > 1:
self.value = value_spl[0]
if ' ' not in self.value and ("\\n" in self.value or "\\r" in self.value):
value_whsp = self.line_endings.split(self.value)
if len(value_whsp) > 1:
self.value = value_whsp[0]
def clean_toml_parameters(self) -> None:
"""Parenthesis, curly and squared brackets may be caught in TOML format and bash. Simple clearing"""
cleaning_required = self.value and self.value[-1] in ['}', ']', ')']
line_before_value = self.line[:self.value_start] if self.value_start and 0 <= self.value_start else ""
while cleaning_required:
cleaning_required = False
for left, right in [('{', '}'), ('[', ']'), ('(', ')')]:
if self.value.endswith(right) and left not in self.value \
and line_before_value.count(left) > line_before_value.count(right):
# full match does not reasonable to implement due open character may be in other line
self.value = self.value[:-1]
cleaning_required = True
def clean_tag_parameters(self) -> None:
"""Remove closing tag from value if the opened is somewhere before in line"""
cleaning_required = self.value and self.value.endswith('>')
while cleaning_required:
closing_tag_pos = self.value.rfind("</")
if 0 <= closing_tag_pos:
# use `<a` to avoid tag parameters
opening_tag_prefix = f"<{self.value[closing_tag_pos + 2:-1]}"
if cleaning_required := (opening_tag_prefix not in self.value
and 0 <= self.line.find(opening_tag_prefix, 0, self.value_start)):
self.value = self.value[:closing_tag_pos]
cleaning_required = self.value and self.value.endswith('>')
else:
break
def sanitize_variable(self) -> None:
"""Remove trailing spaces, dashes and quotations around the variable. Correct position."""
sanitized_var_len = 0
variable = self.variable
while self.variable and sanitized_var_len != len(self.variable):
sanitized_var_len = len(self.variable)
self.variable = self.variable.strip(self.variable_strip_pattern)
if self.variable.endswith('\\'):
self.variable = self.variable[:-1]
if self.variable.startswith('{') and '}' in self.line[self.variable_end:]:
# TOML case
self.variable = self.variable[1:]
if variable and len(self.variable) < len(variable) and 0 <= self.variable_start and 0 <= self.variable_end:
start = variable.find(self.variable)
self.variable_start += start
self.variable_end = self.variable_start + len(self.variable)
def is_comment(self) -> bool:
"""Check if line with credential is a comment.
Return:
True if line is a comment, False otherwise
"""
cleaned_line = self.line.strip()
for comment_start in self.comment_starts:
if cleaned_line.startswith(comment_start):
return True
return False
@cached_property
def is_well_quoted_value(self) -> bool:
"""Well quoted value - means the value has been quoted or has line wrap"""
result = False
if self.value_leftquote and self.value_rightquote:
if self.value_leftquote == self.value_rightquote:
# regex caught well
return True
if 1 == len(self.value_leftquote):
leftquote = self.value_leftquote
else:
# right side symbol should be a quote
leftquote = self.value_leftquote[-1]
if leftquote not in self.quotation_marks:
leftquote = ""
if 1 == len(self.value_rightquote):
rightquote = self.value_rightquote
else:
# clean \ sign in escaping text
for q in self.value_rightquote:
if q in self.quotation_marks:
rightquote = q
break
else:
rightquote = ""
result = bool(leftquote) and ( #
bool(rightquote) and (leftquote == rightquote) # normal case
or '\\' == self.value_rightquote and '\\' == self.line[-1] # line wrap
)
elif self.value_leftquote:
result = ( #
('\\' == self.value_rightquote or '\\' == self.value[-1]) and '\\' == self.line[-1] # line wrap
or '.php' == self.file_type # php may use multiline string
or 3 == self.value_leftquote.count('"') or 3 == self.value_leftquote.count("'") # python multiline
)
return result
@cached_property
def is_quoted(self) -> bool:
"""Check if variable and value in a quoted string.
Return:
True if candidate in a quoted string, False otherwise
"""
left_quote = None
if 0 < self.variable_start:
for i in self.line[:self.variable_start]:
if i in ('"', "'", '`'):
left_quote = i
break
right_quote = None
if len(self.line) > self.value_end:
for i in self.line[self.value_end:]:
if i in ('"', "'", '`'):
right_quote = i
break
result = bool(left_quote) and bool(right_quote) and left_quote == right_quote
return result
def is_source_file(self) -> bool:
"""Check if file with credential is a source code file or not (data, log, plain text).
Return:
True if file is source file, False otherwise
"""
if not self.path:
return False
if Util.get_extension(self.path) in self.config.source_extensions:
return True
return False
def is_source_file_with_quotes(self) -> bool:
"""Check if file with credential require quotation for string literals.
Return:
True if file require quotation, False otherwise
"""
file_type = self.file_type or Util.get_extension(self.path)
return bool(file_type) and file_type in self.config.source_quote_ext
@staticmethod
def get_hash_or_subtext(
text: Optional[str], #
hashed: bool, #
cut_pos: Optional[StartEnd] = None, #
) -> Optional[str]:
"""Represent not empty text with hash or a "beauty" subtext if required
Args:
text: str - input string
hashed: bool - whether the text will be hashed and returned
cut_pos: Optional[StartEnd] - start, end positions which text must be kept in output
Return:
sha256 hash in hex representation of input text with UTF-8 encodings
or
subtext from start to end, or original text as is
"""
if text:
if hashed:
text = hashlib.sha256(text.encode(UTF_8, errors="strict")).hexdigest()
elif cut_pos is not None:
if 2 * ML_HUNK < cut_pos.end - cut_pos.start:
# subtext positions exceed the limit
text = text[cut_pos.start:cut_pos.end]
else:
strip_text = text.strip()
if 2 * ML_HUNK >= len(strip_text):
# stripped text length meets the limit
text = strip_text
else:
offset = len(text) - len(text.lstrip())
center = (cut_pos.end + cut_pos.start - offset) >> 1
text = Util.subtext(strip_text, center, ML_HUNK)
return text
def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
"""Represent line_data with subtext or|and hashed values"""
cut_pos = StartEnd(self.variable_start, self.value_end) if subtext else None
return f"path: {self.path}" \
f" | line_num: {self.line_num}" \
f" | value: '{self.get_hash_or_subtext(self.value, hashed)}'" \
f" | line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'"
def __str__(self):
return self.to_str()
def __repr__(self):
return self.to_str(subtext=True)
def to_json(self, hashed: bool, subtext: bool) -> Dict:
"""Convert line data object to dictionary.
Return:
Dictionary object generated from current line data
"""
cut_pos = StartEnd(self.variable_start if 0 <= self.variable_start else self.value_start,
self.value_end) if subtext else None
if isinstance(self.value, str):
entropy = round(Util.get_shannon_entropy(self.value), 5)
else:
entropy = None
full_output = {
"key": self.key,
"line": self.get_hash_or_subtext(self.line, hashed, cut_pos),
"line_num": self.line_num,
"path": self.path,
# info may contain variable name - so let it be hashed if requested
"info": self.get_hash_or_subtext(self.info, hashed),
"pattern": self.pattern.pattern,
"variable": self.get_hash_or_subtext(self.variable, hashed),
"variable_start": self.variable_start,
"variable_end": self.variable_end,
"separator": self.separator,
"separator_start": self.separator_start,
"separator_end": self.separator_end,
"value": self.get_hash_or_subtext(self.value, hashed),
"value_start": self.value_start,
"value_end": self.value_end,
"entropy": entropy,
"value_leftquote": self.value_leftquote,
"value_rightquote": self.value_rightquote,
}
reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
return reported_output
def get_colored_line(self, hashed: bool, subtext: bool = False) -> str:
"""Represents the LineData with a value, separator, and variable color formatting"""
if hashed:
# return colored hash
return Fore.LIGHTGREEN_EX \
+ self.get_hash_or_subtext(self.line, hashed,
StartEnd(self.value_start, self.value_end) if subtext else None) \
+ Style.RESET_ALL
# at least, value must present
line = self.line[:self.value_start] \
+ Fore.LIGHTYELLOW_EX \
+ self.line[self.value_start:self.value_end] \
+ Style.RESET_ALL \
+ self.line[self.value_end:] # noqa: E127
# separator may be missing
if 0 <= self.separator_start < self.separator_end <= self.value_start:
line = line[:self.separator_start] \
+ Fore.LIGHTGREEN_EX \
+ line[self.separator_start:self.separator_end] \
+ Style.RESET_ALL \
+ line[self.separator_end:]
# variable may be missing
if 0 <= self.separator_start \
and 0 <= self.variable_start < self.variable_end <= self.separator_end <= self.value_start \
or 0 <= self.variable_start < self.variable_end <= self.value_start:
line = line[:self.variable_start] \
+ Fore.LIGHTBLUE_EX \
+ line[self.variable_start:self.variable_end] \
+ Style.RESET_ALL \
+ line[self.variable_end:]
if subtext:
# display part of the text, centered around the start of the value, style reset at the end as a fallback
line = f"{Util.subtext(line, self.value_start + len(line) - len(self.line), ML_HUNK)}{Style.RESET_ALL}"
return line
================================================
FILE: credsweeper/deep_scanner/__init__.py
================================================
================================================
FILE: credsweeper/deep_scanner/abstract_scanner.py
================================================
import contextlib
import datetime
import logging
from abc import abstractmethod, ABC
from typing import List, Optional, Tuple, Any, Generator
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, DEFAULT_ENCODING, UTF_8, \
MIN_VALUE_LENGTH
from credsweeper.config.config import Config
from credsweeper.credentials.augment_candidates import augment_candidates
from credsweeper.credentials.candidate import Candidate
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
from credsweeper.file_handler.content_provider import ContentProvider
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.descriptor import Descriptor
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
from credsweeper.file_handler.string_content_provider import StringContentProvider
from credsweeper.file_handler.struct_content_provider import StructContentProvider
from credsweeper.file_handler.text_content_provider import TextContentProvider
from credsweeper.scanner.scanner import Scanner
logger = logging.getLogger(__name__)
class AbstractScanner(ABC):
"""Base abstract class for all recursive scanners"""
@property
@abstractmethod
def config(self) -> Config:
"""Abstract property to be defined in DeepScanner"""
raise NotImplementedError(__name__)
@property
@abstractmethod
def scanner(self) -> Scanner:
"""Abstract property to be defined in DeepScanner"""
raise NotImplementedError(__name__)
@abstractmethod
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Abstract method to be defined in DeepScanner"""
raise NotImplementedError(__name__)
@staticmethod
@abstractmethod
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
raise NotImplementedError(__name__)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def recursive_scan(
self, #
data_provider: DataContentProvider, #
depth: int = 0, #
recursive_limit_size: int = 0) -> List[Candidate]:
"""Recursive function to scan files which might be containers like ZIP archives
Args:
data_provider: DataContentProvider object may be a container
depth: maximal level of recursion
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
"""
candidates: List[Candidate] = []
if 0 > depth:
# break recursion if maximal depth is reached
logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
return candidates
depth -= 1
if MIN_DATA_LEN > len(data_provider.data):
# break recursion for minimal data size
logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
depth, recursive_limit_size, data_provider.file_path, data_provider.info)
return candidates
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
recursive_limit_size, data_provider.file_path, data_provider.info)
if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
# Skip scanning file and makes fake candidate due the extension is suspicious
dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
data_provider.file_type, data_provider.info,
FilePathExtractor.FIND_BY_EXT_RULE)
candidates.append(dummy_candidate)
else:
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
augment_candidates(candidates, new_candidates)
return candidates
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@staticmethod
def key_value_combination(structure: dict) -> Generator[Tuple[Any, Any], None, None]:
"""Combine items by `key` and `value` from a dictionary for augmentation
{..., "key": "api_key", "value": "XXXXXXX", ...} -> ("api_key", "XXXXXXX")
"""
for key_id in ("key", "KEY", "Key"):
if key_id in structure:
struct_key = structure.get(key_id)
break
else:
struct_key = None
if isinstance(struct_key, bytes):
# sqlite table may produce bytes for `key`
with contextlib.suppress(UnicodeError):
struct_key = struct_key.decode(UTF_8)
# only str type is common used for the augmentation
if struct_key and isinstance(struct_key, str):
for value_id in ("value", "VALUE", "Value"):
if value_id in structure:
struct_value = structure.get(value_id)
if struct_value and isinstance(struct_value, (str, bytes)):
yield struct_key, struct_value
# break in successful case
break
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@staticmethod
def structure_processing(structure: Any) -> Generator[Tuple[Any, Any], None, None]:
"""Yields pair `key, value` from given structure if applicable"""
if isinstance(structure, dict):
# transform dictionary to list
for key, value in structure.items():
if not value:
# skip empty values
continue
if isinstance(value, (list, tuple)):
if 1 == len(value):
# simplify some structures like YAML when single item in new line is a value
yield key, value[0]
continue
# all other data will be precessed in next code
yield key, value
yield from AbstractScanner.key_value_combination(structure)
elif isinstance(structure, (list, tuple)):
# enumerate the items to fit for return structure
for key, value in enumerate(structure):
yield key, value
else:
logger.warning("Not supported type:%s val:%s", str(type(structure)), repr(structure))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def structure_scan(
self, #
struct_provider: StructContentProvider, #
depth: int, #
recursive_limit_size: int) -> List[Candidate]:
"""Recursive function to scan structured data
Args:
struct_provider: DataContentProvider object may be a container
depth: maximal level of recursion
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
"""
candidates: List[Candidate] = []
logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
struct_provider.file_path, struct_provider.info)
if 0 > depth:
# break recursion if maximal depth is reached
logger.debug("Bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
return candidates
depth -= 1
augmented_lines_for_keyword_rules = []
for key, value in AbstractScanner.structure_processing(struct_provider.struct):
# a keyword rule may be applicable for `key` (str only) and `value` (str, bytes)
keyword_match = bool(isinstance(key, str) and self.scanner.keywords_required_substrings_check(key.lower()))
if isinstance(value, (dict, list, tuple)) and value:
# recursive scan for not empty structured `value`
val_struct_provider = StructContentProvider(struct=value,
file_path=struct_provider.file_path,
file_type=struct_provider.file_type,
info=f"{struct_provider.info}|STRUCT:{key}")
new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
candidates.extend(new_candidates)
elif isinstance(value, bytes):
# recursive data scan
if MIN_DATA_LEN <= len(value):
bytes_struct_provider = DataContentProvider(data=value,
file_path=struct_provider.file_path,
file_type=struct_provider.file_type,
info=f"{struct_provider.info}|BYTES:{key}")
new_limit = recursive_limit_size - len(value)
new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
candidates.extend(new_candidates)
if keyword_match and MIN_VALUE_LENGTH <= len(value):
augmented_lines_for_keyword_rules.append(f"{key} = {repr(value)}")
elif isinstance(value, str):
# recursive text scan with transformation into bytes
stripped_value = value.strip()
if MIN_DATA_LEN <= len(stripped_value):
# recursive scan only for data which may be decoded at least
with contextlib.suppress(UnicodeError):
data = stripped_value.encode(encoding=DEFAULT_ENCODING, errors='strict')
str_struct_provider = DataContentProvider(data=data,
file_path=struct_provider.file_path,
file_type=struct_provider.file_type,
info=f"{struct_provider.info}|STRING:{key}")
new_limit = recursive_limit_size - len(str_struct_provider.data)
new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
candidates.extend(new_candidates)
if keyword_match and MIN_VALUE_LENGTH <= len(stripped_value):
augmented_lines_for_keyword_rules.append(f"{key} = {repr(stripped_value)}")
elif not value or isinstance(value, (int, float, datetime.date, datetime.datetime)):
# skip useless types
pass
else:
logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
if augmented_lines_for_keyword_rules:
str_provider = StringContentProvider(augmented_lines_for_keyword_rules,
file_path=struct_provider.file_path,
file_type=struct_provider.file_type,
info=f"{struct_provider.info}|KEYWORD")
new_candidates = self.scanner.scan(str_provider)
augment_candidates(candidates, new_candidates)
return candidates
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
recursive_limit_size: int) -> List[Candidate]:
"""Scans with deep scanners and fallback scanners if possible
Args:
data_provider: DataContentProvider with raw data
depth: maximal level of recursion
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
Returns: list with candidates
"""
candidates: List[Candidate] = []
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.descriptor, depth)
fallback = True
for scan_class in deep_scanners:
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
if new_candidates is None:
# scanner did not recognise the content type
continue
augment_candidates(candidates, new_candidates)
# this scan is successful, so fallback is not necessary
fallback = False
if fallback:
for scan_class in fallback_scanners:
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
if fallback_candidates is None:
continue
augment_candidates(candidates, fallback_candidates)
# use only first successful fallback scanner
break
return candidates
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def scan(self,
content_provider: ContentProvider,
depth: int,
recursive_limit_size: Optional[int] = None) -> List[Candidate]:
"""Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
Args:
content_provider: ContentProvider that might contain raw data
depth: maximal level of recursion
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
"""
recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
int) else RECURSIVE_SCAN_LIMITATION
candidates: List[Candidate] = []
data: Optional[bytes] = None
if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
# Feature to scan files which might be containers
data = content_provider.data
info = f"FILE:{content_provider.file_path}"
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
candidates = self.scanner.scan(content_provider)
# Feature to scan binary diffs
diff = content_provider.diff[0].get("line")
# the check for legal fix mypy issue
if isinstance(diff, bytes):
data = diff
info = f"DIFF:{content_provider.file_path}"
else:
logger.warning("Content provider %s does not support deep scan", type(content_provider))
info = "NA"
if data:
data_provider = DataContentProvider(data=data,
file_path=content_provider.file_path,
file_type=content_provider.file_type,
info=content_provider.info or info)
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
augment_candidates(candidates, new_candidates)
return candidates
================================================
FILE: credsweeper/deep_scanner/byte_scanner.py
================================================
import logging
from abc import ABC
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
from credsweeper.file_handler.data_content_provider import DataContentProvider
logger = logging.getLogger(__name__)
class ByteScanner(AbstractScanner, ABC):
"""Implements plain data scanning"""
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to represent data as plain text with splitting by lines and scan as text lines"""
byte_content_provider = ByteContentProvider(content=data_provider.data,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|RAW")
return self.scanner.scan(byte_content_provider)
================================================
FILE: credsweeper/deep_scanner/bzip2_scanner.py
================================================
import bz2
import logging
from abc import ABC
from pathlib import Path
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class Bzip2Scanner(AbstractScanner, ABC):
"""Implements bzip2 scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/Bzip2"""
if data.startswith(b"\x42\x5A\x68") and 10 <= len(data) \
and 0x31 <= data[3] <= 0x39 \
and 4 == data.find(b"\x31\x41\x59\x26\x53\x59", 4, 10):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts data from bzip2 archive and launches data_scan"""
try:
file_path = Path(data_provider.file_path)
new_path = file_path.as_posix()
if ".bz2" == file_path.suffix:
new_path = new_path[:-4]
bzip2_content_provider = DataContentProvider(data=bz2.decompress(data_provider.data),
file_path=new_path,
file_type=Util.get_extension(new_path),
info=f"{data_provider.info}|BZIP2:{file_path}")
new_limit = recursive_limit_size - len(bzip2_content_provider.data)
bzip2_candidates = self.recursive_scan(bzip2_content_provider, depth, new_limit)
return bzip2_candidates
except Exception as bzip2_exc:
logger.warning("%s:%s", data_provider.file_path, bzip2_exc)
return None
================================================
FILE: credsweeper/deep_scanner/crx_scanner.py
================================================
import logging
import struct
from abc import ABC
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
logger = logging.getLogger(__name__)
class CrxScanner(AbstractScanner, ABC):
"""Implements CRX files scanning with cut-off prefix"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""Returns True if prefix match"""
if data.startswith((b"Cr24\x02\x00\x00\x00", b"Cr24\x03\x00\x00\x00")) and 32 < len(data):
return True
return False
@staticmethod
def zip_extract(data: bytes) -> bytes:
"""Extracts zip payload after signature block"""
pubkey_length = struct.unpack("<I", data[8:12])
signature_length = struct.unpack("<I", data[12:16])
zip_offset = 16 + pubkey_length[0] + signature_length[0]
return data[zip_offset:]
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries cut-off header and use ZIP payload"""
try:
zip_data = CrxScanner.zip_extract(data_provider.data)
zip_content_provider = DataContentProvider(data=zip_data,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|CRX")
new_limit = recursive_limit_size - len(zip_data)
crx_candidates = self.recursive_scan(zip_content_provider, depth, new_limit)
return crx_candidates
except Exception as exc:
logger.warning(exc)
return None
================================================
FILE: credsweeper/deep_scanner/csv_scanner.py
================================================
import csv
import io
import logging
import re
from abc import ABC
from typing import List, Optional, Dict, Any
from credsweeper.common.constants import MAX_LINE_LENGTH
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.struct_content_provider import StructContentProvider
logger = logging.getLogger(__name__)
class CsvScanner(AbstractScanner, ABC):
"""Implements CSV scanning"""
sniffer = csv.Sniffer()
# do not use space as separator to avoid hallucinations
DELIMITERS = ",;\t|\x1F"
CSV_PATTERN = re.compile(b"[^\r\n]{1,8000}[,;\t|\x1F][^\r\n]{1,8000}")
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""Check if data MAY be in CSV format"""
end_pos = data.find(b'\n', 0, MAX_LINE_LENGTH)
if 0 > end_pos:
# classic Mac OS format
end_pos = data.find(b'\r', 0, MAX_LINE_LENGTH)
if 0 <= end_pos:
if CsvScanner.CSV_PATTERN.match(data, pos=0, endpos=end_pos):
return True
return False
@classmethod
def get_structure(cls, text: str) -> List[Dict[str, Any]]:
"""Reads a text as CSV standard with guessed dialect"""
# windows style \r\n
first_line_end = text.find('\r', 0, MAX_LINE_LENGTH)
line_terminator = "\r\n"
if 0 > first_line_end:
# unix style \n
first_line_end = text.find('\n', 0, MAX_LINE_LENGTH)
line_terminator = "\n"
if 0 > first_line_end:
raise ValueError(f"No suitable line end found in {MAX_LINE_LENGTH} symbols")
first_line = text[:first_line_end]
dialect = cls.sniffer.sniff(first_line, delimiters=cls.DELIMITERS)
rows = []
reader = csv.DictReader(io.StringIO(text),
delimiter=dialect.delimiter,
lineterminator=line_terminator,
strict=True)
# check the constant columns number for all rows
fields_number = sum(1 for x in reader.fieldnames if x is not None)
for row in reader:
if not isinstance(row, dict):
raise ValueError(f"ERROR: wrong row '{row}'")
if len(row) != fields_number or any(x is None for x in row.values()):
# None means no separator used
raise ValueError(f"Different columns number in row '{row}' - mismatch {fields_number}")
rows.append(row)
return rows
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan each row as structure with column name in key"""
try:
if rows := self.get_structure(data_provider.text):
struct_content_provider = StructContentProvider(struct=rows,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|CSV")
new_limit = recursive_limit_size - sum(len(x) for x in rows)
struct_candidates = self.structure_scan(struct_content_provider, depth, new_limit)
return struct_candidates
except Exception as csv_exc:
logger.debug("%s:%s", data_provider.file_path, csv_exc)
return None
================================================
FILE: credsweeper/deep_scanner/deb_scanner.py
================================================
import logging
import struct
from abc import ABC
from typing import List, Optional, Generator, Tuple
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class DebScanner(AbstractScanner, ABC):
"""Implements deb (ar) scanning"""
__header_size = 60
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/Deb_(file_format)"""
if data.startswith(b"!<arch>\n"):
return True
return False
@staticmethod
def walk_deb(data: bytes) -> Generator[Tuple[int, str, bytes], None, None]:
"""Processes sequence of DEB archive and yields offset, name and data"""
offset = 8 # b"!<arch>\n"
data_limit = len(data) - DebScanner.__header_size
while offset <= data_limit:
_data = data[offset:offset + DebScanner.__header_size]
offset += DebScanner.__header_size
# basic header structure
_name, _, _size, __ = struct.unpack('16s32s10s2s', _data)
file_size = int(_size)
if MIN_DATA_LEN < file_size <= len(data) - offset:
_data = data[offset:offset + file_size]
yield offset, _name.decode(encoding=UTF_8).strip().rstrip('/'), _data
offset += file_size if 0 == 1 & file_size else file_size + 1
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts data file from .ar (debian) archive and launches data_scan"""
try:
candidates: List[Candidate] = []
for offset, name, data in DebScanner.walk_deb(data_provider.data):
deb_content_provider = DataContentProvider(data=data,
file_path=f"{data_provider.file_path}",
file_type=Util.get_extension(name),
info=f"{data_provider.info}|DEB:0x{offset:x}:{name}")
new_limit = recursive_limit_size - len(data)
deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
candidates.extend(deb_candidates)
return candidates
except Exception as exc:
logger.warning(exc)
return None
================================================
FILE: credsweeper/deep_scanner/deep_scanner.py
================================================
import logging
import re
from typing import List, Any, Tuple, Union, Dict
from credsweeper.common.constants import MIN_DATA_LEN
from credsweeper.config.config import Config
from credsweeper.deep_scanner.byte_scanner import ByteScanner
from credsweeper.deep_scanner.bzip2_scanner import Bzip2Scanner
from credsweeper.deep_scanner.crx_scanner import CrxScanner
from credsweeper.deep_scanner.csv_scanner import CsvScanner
from credsweeper.deep_scanner.deb_scanner import DebScanner
from credsweeper.deep_scanner.docx_scanner import DocxScanner
from credsweeper.deep_scanner.eml_scanner import EmlScanner
from credsweeper.deep_scanner.encoder_scanner import EncoderScanner
from credsweeper.deep_scanner.gzip_scanner import GzipScanner
from credsweeper.deep_scanner.html_scanner import HtmlScanner
from credsweeper.deep_scanner.jclass_scanner import JclassScanner
from credsweeper.deep_scanner.jks_scanner import JksScanner
from credsweeper.deep_scanner.lang_scanner import LangScanner
from credsweeper.deep_scanner.lzma_scanner import LzmaScanner
from credsweeper.deep_scanner.mxfile_scanner import MxfileScanner
from credsweeper.deep_scanner.patch_scanner import PatchScanner
from credsweeper.deep_scanner.pdf_scanner import PdfScanner
from credsweeper.deep_scanner.pkcs_scanner import PkcsScanner
from credsweeper.deep_scanner.png_scanner import PngScanner
from credsweeper.deep_scanner.pptx_scanner import PptxScanner
from credsweeper.deep_scanner.rpm_scanner import RpmScanner
from credsweeper.deep_scanner.rtf_scanner import RtfScanner
from credsweeper.deep_scanner.sqlite3_scanner import Sqlite3Scanner
from credsweeper.deep_scanner.strings_scanner import StringsScanner
from credsweeper.deep_scanner.tar_scanner import TarScanner
from credsweeper.deep_scanner.tmx_scanner import TmxScanner
from credsweeper.deep_scanner.xlsx_scanner import XlsxScanner
from credsweeper.deep_scanner.xml_scanner import XmlScanner
from credsweeper.deep_scanner.zip_scanner import ZipScanner
from credsweeper.deep_scanner.zlib_scanner import ZlibScanner
from credsweeper.file_handler.descriptor import Descriptor
from credsweeper.scanner.scanner import Scanner
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class DeepScanner(
ByteScanner, #
Bzip2Scanner, #
CrxScanner, #
CsvScanner, #
DocxScanner, #
EncoderScanner, #
GzipScanner, #
HtmlScanner, #
JclassScanner, #
JksScanner, #
LangScanner, #
LzmaScanner, #
MxfileScanner, #
EmlScanner, #
PatchScanner, #
PdfScanner, #
PkcsScanner, #
PngScanner, #
PptxScanner, #
RtfScanner, #
RpmScanner, #
Sqlite3Scanner, #
StringsScanner, #
TarScanner, #
DebScanner, #
XmlScanner, #
XlsxScanner, #
ZipScanner, #
ZlibScanner, #
): # yapf: disable
"""Advanced scanner with recursive exploring of data"""
def __init__(self, config: Config, scanner: Scanner) -> None:
"""Initialize Advanced credential scanner.
Args:
scanner: CredSweeper scanner object
config: dictionary variable, stores analyzer features
"""
self.__config = config
self.__scanner = scanner
@property
def config(self) -> Config:
return self.__config
@property
def scanner(self) -> Scanner:
return self.__scanner
# manually crafted dict to detect a media format with first byte, prefix and optionally pattern
MEDIA_PATTERNS: Dict[int, List[Tuple[bytes, re.Pattern]]] = {
0x00: [
# JPEG2000
(b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A", None),
# ICO
(b"\x00\x00\x01\x00", None),
# TTF
(b"\x00\x01\x00\x00\x00", None),
# 3gp
(b"\x00\x00\x00", re.compile(b"\x00\x00\x00.ftyp3g")),
# GITCRYPT is not a media but added to use pedantic scan for strings and reduce extra warnings
(b"\x00GITCRYPT\x00", None),
],
0x1A: [
# Matroska
(b"\x1A\x45\xDF\xA3", None),
],
0x7F: [
# ELF signature - to quick pass for strings scanner
(b"\x7FELF", re.compile(b"\x7FELF[\x01\x02][\x01\x02]\x01[\x00-\x12]"))
],
0x89: [
# PNG - can store text chunks inside
(b"\x89PNG\x0D\x0A\x1A\x0A", None),
],
0xFF: [
# JPEG or MPEG-1 Layer 3
(b"\xFF", re.compile(b"\xFF(\xD8\xFF[\xDB\xEE\xE1\xE0\x51]|[\xFB\xF3\xF2])")),
],
ord('8'): [
# PSD
(b"8BPS\x00\x01\x00\x00\x00\x00\x00\x00", None),
# PSB
(b"8BPS\x00\x02\x00\x00\x00\x00\x00\x00", None),
],
ord('B'): [
# BMP
(b"BM", re.compile(b"BM.{2}\x00{4}")),
],
ord('G'): [
# GIF
(b"GIF8", re.compile(b"GIF8[79]a[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")
),
],
ord('I'): [
# TIFF little endian
(b"II", re.compile(b"II[+*]\x00[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
# ID2v3 for various media (e.g. MP3)
(b"ID3\x03\x00\x00\x00", None),
],
ord('M'): [
# TIFF big endian
(b"MM", re.compile(b"MM\x00[+*][^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
],
ord('O'): [
# OGG
(b"OggS", re.compile(b"OggS[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
# OpenType font file
(b"OTTO\x00",
re.compile(b"OTTO\x00[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
],
ord('R'): [
# RIFF va
(b"RIF",
re.compile(b"RIF[FX].{4}[ 0-9A-Za-z]{4}"
b"[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
],
ord('X'): [
# Macromedia
(b"XFIR",
re.compile(b"XFIR.{4}[ 0-9A-Za-z]{4}"
b"[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
],
ord('f'): [
# mp4
(b"ftyp",
re.compile(b"ftyp(isom|MSNV)[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
],
ord('g'): [
# gimp
(b"gimp xcf",
re.compile(b"gimp xcf (file|v001|v002)\x00"
b"[^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
],
ord('w'): [
# WOFF 1.0, 2.0
(b"wOF", re.compile(b"wOF[2F][^\x00-\x08\x0C\x0E\x1F\x80-\xFF]{0,4096}[\x00-\x08\x0C\x0E\x1F\x80-\xFF]")),
],
}
@staticmethod
def is_media(data: Union[bytes, bytearray]) -> bool:
"""Returns True if well-known media format found"""
if patterns := DeepScanner.MEDIA_PATTERNS.get(data[0]):
for prefix, pattern in patterns:
# use prefix for speed-up total search
if prefix and data.startswith(prefix) and (pattern is None or pattern.match(data)):
return True
return False
@staticmethod
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
deep_scanners: List[Any] = []
fallback_scanners: List[Any] = []
if not data or not isinstance(data, (bytes, bytearray)) or len(data) < MIN_DATA_LEN:
# Guard clause: reject empty or invalid input data early
pass
elif ZipScanner.match(data):
if 0 < depth:
deep_scanners.append(ZipScanner)
# probably, there might be a docx, xlsx and so on.
# It might be scanned with text representation in third-party libraries.
if descriptor.extension in (".xlsx", ".ods"):
deep_scanners.append(XlsxScanner)
else:
fallback_scanners.append(XlsxScanner)
if ".docx" == descriptor.extension:
deep_scanners.append(DocxScanner)
else:
fallback_scanners.append(DocxScanner)
if ".pptx" == descriptor.extension:
deep_scanners.append(PptxScanner)
else:
fallback_scanners.append(PptxScanner)
elif XlsxScanner.match(data):
if ".xls" == descriptor.extension:
deep_scanners.append(XlsxScanner)
else:
fallback_scanners.append(XlsxScanner)
elif Bzip2Scanner.match(data):
if 0 < depth:
deep_scanners.append(Bzip2Scanner)
elif LzmaScanner.match(data):
if 0 < depth:
deep_scanners.append(LzmaScanner)
elif TarScanner.match(data):
if 0 < depth:
deep_scanners.append(TarScanner)
elif DebScanner.match(data):
if 0 < depth:
deep_scanners.append(DebScanner)
elif GzipScanner.match(data):
if 0 < depth:
deep_scanners.append(GzipScanner)
elif PdfScanner.match(data):
deep_scanners.append(PdfScanner)
elif PngScanner.match(data):
deep_scanners.append(PngScanner)
elif RpmScanner.match(data):
if 0 < depth:
deep_scanners.append(RpmScanner)
elif JclassScanner.match(data):
deep_scanners.append(JclassScanner)
elif JksScanner.match(data):
deep_scanners.append(JksScanner)
elif Sqlite3Scanner.match(data):
if 0 < depth:
deep_scanners.append(Sqlite3Scanner)
elif PkcsScanner.match(data):
deep_scanners.append(PkcsScanner)
elif CrxScanner.match(data):
if 0 < depth:
deep_scanners.append(CrxScanner)
elif RtfScanner.match(data):
deep_scanners.append(RtfScanner)
fallback_scanners.append(ByteScanner)
elif XmlScanner.match(data):
if HtmlScanner.match(data):
deep_scanners.append(HtmlScanner)
deep_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
elif MxfileScanner.match(data):
deep_scanners.append(MxfileScanner)
deep_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
elif TmxScanner.match(data):
deep_scanners.append(TmxScanner)
fallback_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
else:
deep_scanners.append(XmlScanner)
fallback_scanners.append(ByteScanner)
elif EmlScanner.match(data):
if descriptor.extension in (".eml", ".mht"):
deep_scanners.append(EmlScanner)
else:
if 0 < depth:
# a formal patch looks like an eml
deep_scanners.append(PatchScanner)
fallback_scanners.append(EmlScanner)
fallback_scanners.append(ByteScanner)
elif DeepScanner.is_media(data):
# only StringsScanner may be applied for the formats effective
if 0 < depth:
fallback_scanners.append(StringsScanner)
elif not Util.is_binary(data):
# keep ByteScanner first to apply real value position if possible
deep_scanners.append(ByteScanner)
if 0 < depth:
deep_scanners.append(PatchScanner)
deep_scanners.append(LangScanner)
if CsvScanner.match(data):
deep_scanners.append(CsvScanner)
if EncoderScanner.match(data):
deep_scanners.append(EncoderScanner)
if ZlibScanner.match(data):
deep_scanners.append(ZlibScanner)
else:
if 0 < depth:
if ZlibScanner.match(data):
deep_scanners.append(ZlibScanner)
fallback_scanners.append(StringsScanner)
else:
deep_scanners.append(StringsScanner)
if not descriptor.info.endswith("|BASE64"):
logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]),
len(data))
return deep_scanners, fallback_scanners
================================================
FILE: credsweeper/deep_scanner/docx_scanner.py
================================================
import io
import logging
from abc import ABC
from typing import List, Optional
import docx
from docx.document import Document
from docx.oxml import CT_P, CT_Tbl, CT_SectPr, CT_TcPr
from docx.section import Section, _Header, _Footer
from docx.table import _Cell, Table
from docx.text.paragraph import Paragraph
from lxml.etree import _Element
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
logger = logging.getLogger(__name__)
class DocxScanner(AbstractScanner, ABC):
"""Implements docx scanning"""
@staticmethod
def _iter_block_items(block):
if isinstance(block, Paragraph):
yield block
return
if isinstance(block, (_Header, _Footer)):
for table in block.tables:
for row in table.rows:
for cell in row.cells:
yield from DocxScanner._iter_block_items(cell)
yield from block.paragraphs
return
if isinstance(block, Section):
yield from DocxScanner._iter_block_items(block.header)
yield from DocxScanner._iter_block_items(block.footer)
return
if isinstance(block, Document):
parent_elm = block.element.body
elif isinstance(block, _Cell):
parent_elm = block._tc # pylint: disable=W0212
else:
raise ValueError(f"unrecognised:{type(block)}")
for child in parent_elm.iterchildren():
if isinstance(child, CT_P):
yield Paragraph(child, block)
elif isinstance(child, CT_Tbl):
table = Table(child, block)
for row in table.rows:
for cell in row.cells:
yield from DocxScanner._iter_block_items(cell)
elif isinstance(child, (CT_TcPr, CT_SectPr)):
# config
pass
elif isinstance(child, _Element):
yield child
else:
logger.warning("Unknown:%s", type(child))
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan DOCX text with splitting by lines"""
try:
docx_lines: List[str] = []
doc = docx.Document(io.BytesIO(data_provider.data))
for block in self._iter_block_items(doc):
if block.text:
docx_lines.append(block.text)
header_lines_set = set()
footer_lines_set = set()
for section in doc.sections:
for header in [section.first_page_header, section.even_page_header, section.header]:
for block in self._iter_block_items(header):
if block.text:
header_lines_set.add(block.text)
for footer in [section.first_page_footer, section.even_page_footer, section.footer]:
for block in self._iter_block_items(footer):
if block.text:
footer_lines_set.add(block.text)
docx_lines.extend(sorted(list(header_lines_set)))
docx_lines.extend(sorted(list(footer_lines_set)))
string_data_provider = StringContentProvider(lines=docx_lines,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|DOCX")
docx_candidates = self.scanner.scan(string_data_provider)
return docx_candidates
except Exception as docx_exc:
logger.warning("%s:%s", data_provider.file_path, docx_exc)
return None
================================================
FILE: credsweeper/deep_scanner/eml_scanner.py
================================================
import email
import logging
from abc import ABC
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
logger = logging.getLogger(__name__)
class EmlScanner(AbstractScanner, ABC):
"""Implements eml scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According to https://datatracker.ietf.org/doc/html/rfc822 lookup the fields: Date, From, To or Subject"""
if (b"\nDate:" in data or data.startswith(b"Date:")) \
and (b"\nFrom:" in data or data.startswith(b"From:")) \
and (b"\nTo:" in data or data.startswith(b"To:")) \
and (b"\nSubject:" in data or data.startswith(b"Subject:")):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan EML with text representation"""
try:
candidates: List[Candidate] = []
msg = email.message_from_bytes(data_provider.data)
for part in msg.walk():
content_type = part.get_content_type()
body = part.get_payload(decode=True)
if not isinstance(body, (bytes, str)):
continue
if "text/plain" == content_type:
eml_text_data_provider = ByteContentProvider(
content=(body if isinstance(body, bytes) else body.encode()),
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|EML-TEXT")
eml_candidates = self.scanner.scan(eml_text_data_provider)
candidates.extend(eml_candidates)
else:
x_data_provider = DataContentProvider(data=(body if isinstance(body, bytes) else body.encode()),
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|EML-DATA")
new_limit = recursive_limit_size - len(body)
if "text/html" == content_type and x_data_provider.represent_as_html(
depth, new_limit, self.scanner.keywords_required_substrings_check):
string_data_provider = StringContentProvider(lines=x_data_provider.lines,
line_numbers=x_data_provider.line_numbers,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|EML-HTML")
html_candidates = self.scanner.scan(string_data_provider)
candidates.extend(html_candidates)
elif content_type.startswith("application"):
x_candidates = self.recursive_scan(x_data_provider, depth, new_limit)
candidates.extend(x_candidates)
else:
logger.warning("%s:%s:%s cannot be supported", data_provider.file_path, content_type,
type(body))
return candidates
except Exception as eml_exc:
logger.warning("%s:%s", data_provider.file_path, eml_exc)
return None
================================================
FILE: credsweeper/deep_scanner/encoder_scanner.py
================================================
import contextlib
import logging
import re
from abc import ABC
from typing import List, Optional
from credsweeper.common.constants import MAX_LINE_LENGTH
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
# 8 bytes are encoded to 12 symbols 12345678 -> MTIzNDU2Nzg=
MIN_ENCODED_DATA_LEN = 12
class EncoderScanner(AbstractScanner, ABC):
"""Implements recursive iteration when data might be encoded from base64"""
BASE64_PATTERN = re.compile(
rb"(\xFF\xFE|\xFE\xFF)?("
rb"(?:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/+])|[\s\x00\\])+(?(a)(?(b)(?(c)(=+|$)|(?!x)x)|(?!x)x)|(?!x)x)|"
rb"(?:(?P<e>[A-Z])|(?P<f>[a-z])|(?P<g>[0-9_-])|[\s\x00\\])+(?(e)(?(f)(?(g)(=+|$)|(?!x)x)|(?!x)x)|(?!x)x))")
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""Check if data MAY be base64 encoded with whitespaces (escaping too)"""
if len(data) >= MIN_ENCODED_DATA_LEN \
and EncoderScanner.BASE64_PATTERN.match(data, pos=0, endpos=MAX_LINE_LENGTH):
return True
return False
@staticmethod
def decode(text: str) -> Optional[bytes]:
"""Decodes base64 text with cleaning whitespaces. Returns None when the decoding fails"""
with contextlib.suppress(Exception):
return Util.decode_base64(text=Util.PEM_CLEANING_PATTERN.sub(r'', text).replace('\\', ''),
padding_safe=True,
urlsafe_detect=True)
return None
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to decode data from base64 encode to bytes and scan as bytes again"""
if decoded := EncoderScanner.decode(data_provider.text):
decoded_data_provider = DataContentProvider(data=decoded,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|BASE64")
new_limit = recursive_limit_size - len(decoded_data_provider.data)
return self.recursive_scan(decoded_data_provider, depth, new_limit)
return None
================================================
FILE: credsweeper/deep_scanner/gzip_scanner.py
================================================
import gzip
import io
import logging
from abc import ABC
from pathlib import Path
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class GzipScanner(AbstractScanner, ABC):
"""Realises gzip scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://www.rfc-editor.org/rfc/rfc1952"""
if data.startswith(b"\x1F\x8B\x08"):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts data from gzip archive and launches data_scan"""
try:
with gzip.open(io.BytesIO(data_provider.data)) as f:
file_path = Path(data_provider.file_path)
new_path = file_path.as_posix()
if ".gz" == file_path.suffix:
new_path = new_path[:-3]
gzip_content_provider = DataContentProvider(data=f.read(),
file_path=new_path,
file_type=Util.get_extension(new_path),
info=f"{data_provider.info}|GZIP:{new_path}")
new_limit = recursive_limit_size - len(gzip_content_provider.data)
gzip_candidates = self.recursive_scan(gzip_content_provider, depth, new_limit)
return gzip_candidates
except Exception as gzip_exc:
logger.warning("%s:%s", data_provider.file_path, gzip_exc)
return None
================================================
FILE: credsweeper/deep_scanner/html_scanner.py
================================================
import logging
from abc import ABC
from typing import List, Optional
from credsweeper.common.constants import MAX_LINE_LENGTH
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
logger = logging.getLogger(__name__)
class HtmlScanner(AbstractScanner, ABC):
"""Implements html scanning if possible"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""Used to detect html format. Suppose, invocation of is_xml() was True before."""
for opening_tag, closing_tag in [(b"<html", b"</html>"), (b"<body", b"</body>"), (b"<table", b"</table>"),
(b"<p>", b"</p>"), (b"<span>", b"</span>"), (b"<div>", b"</div>"),
(b"<li>", b"</li>"), (b"<ol>", b"</ol>"), (b"<ul>", b"</ul>"),
(b"<th>", b"</th>"), (b"<tr>", b"</tr>"), (b"<td>", b"</td>")]:
opening_pos = data.find(opening_tag, 0, MAX_LINE_LENGTH)
if 0 <= opening_pos < data.find(closing_tag, opening_pos):
# opening and closing tags were found - suppose it is an HTML
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to represent data as html text and scan as text lines"""
if result := data_provider.represent_as_html(depth, recursive_limit_size,
self.scanner.keywords_required_substrings_check):
string_data_provider = StringContentProvider(lines=data_provider.lines,
line_numbers=data_provider.line_numbers,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|HTML")
return self.scanner.scan(string_data_provider)
return None if result is None else []
================================================
FILE: credsweeper/deep_scanner/jclass_scanner.py
================================================
import io
import logging
import struct
from abc import ABC
from typing import List, Optional
from credsweeper.common.constants import UTF_8
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.struct_content_provider import StructContentProvider
logger = logging.getLogger(__name__)
class JclassScanner(AbstractScanner, ABC):
"""Implements java .class scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - java class"""
if data.startswith(b"\xCA\xFE\xBA\xBE"):
return True
return False
@staticmethod
def u2(stream: io.BytesIO) -> int:
"""Extracts unsigned 16 bit big-endian"""
return int(struct.unpack(">H", stream.read(2))[0])
@staticmethod
def get_utf8_constants(stream: io.BytesIO) -> List[str]:
"""Extracts only Utf8 constants from java ClassFile"""
result = []
# actual number of items is one less!
items_counter = JclassScanner.u2(stream) - 1
while 0 < items_counter:
items_counter -= 1
# uint8
tag = int(stream.read(1)[0])
if 1 == tag:
# UTF-8 string in bytes may be bigger than in characters
length = JclassScanner.u2(stream)
data = stream.read(int(length))
value = data.decode(encoding=UTF_8, errors="replace")
result.append(value)
elif tag in (3, 4, 9, 10, 11, 12, 18):
_ = stream.read(4)
elif tag in (7, 8, 16):
_ = stream.read(2)
elif tag in (5, 6):
_ = stream.read(8)
# long and double types use two indexes
items_counter -= 1
elif 15 == tag:
_ = stream.read(3)
else:
logger.warning("Unknown tag %s", tag)
break
return result
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts data from binary"""
try:
stream = io.BytesIO(data_provider.data)
stream.read(4) # magic
minor = JclassScanner.u2(stream)
major = JclassScanner.u2(stream)
constants = JclassScanner.get_utf8_constants(stream)
struct_content_provider = StructContentProvider(struct=constants,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|Java.{major}.{minor}")
new_limit = recursive_limit_size - sum(len(x) for x in constants)
candidates = self.structure_scan(struct_content_provider, depth, new_limit)
return candidates
except Exception as jclass_exc:
logger.warning("%s:%s", data_provider.file_path, jclass_exc)
return None
================================================
FILE: credsweeper/deep_scanner/jks_scanner.py
================================================
import logging
from abc import ABC
from typing import List, Optional
import jks
from credsweeper.common.constants import Severity, Confidence
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
logger = logging.getLogger(__name__)
class JksScanner(AbstractScanner, ABC):
"""Implements jks scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - jks"""
if data.startswith(b"\xFE\xED\xFE\xED"):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan JKS to open with standard password"""
for pw_probe in self.config.bruteforce_list:
value = repr(pw_probe)
try:
keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
# the password probe has passed, it will be the value
if keystore.private_keys or keystore.secret_keys:
severity = Severity.HIGH
confidence = Confidence.STRONG
info = f"{data_provider.info}|JKS:default password"
rule_name = f"JKS private key with password {value}"
else:
severity = Severity.LOW
confidence = Confidence.WEAK
info = f"{data_provider.info}|JKS:sensitive data"
rule_name = f"JKS sensitive data with password {value}"
candidate = Candidate.get_dummy_candidate(
self.config, #
data_provider.file_path, #
data_provider.file_type, #
info, #
rule_name)
candidate.severity = severity
candidate.confidence = confidence
candidate.line_data_list[0].line = candidate.line_data_list[0].value = value
candidate.line_data_list[0].value_start = 0
candidate.line_data_list[0].value_end = len(value)
return [candidate]
except Exception as jks_exc:
logger.debug("%s:%s:%s", data_provider.file_path, pw_probe, jks_exc)
return None
================================================
FILE: credsweeper/deep_scanner/lang_scanner.py
================================================
import logging
from abc import ABC
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.struct_content_provider import StructContentProvider
logger = logging.getLogger(__name__)
class LangScanner(AbstractScanner, ABC):
"""Implements scanning of data if it is a script of some markup language"""
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to represent data as markup language and scan as structure"""
if result := data_provider.represent_as_structure():
struct_data_provider = StructContentProvider(struct=data_provider.structure,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|STRUCT")
return self.structure_scan(struct_data_provider, depth, recursive_limit_size)
return None if result is None else []
================================================
FILE: credsweeper/deep_scanner/lzma_scanner.py
================================================
import logging
import lzma
from abc import ABC
from pathlib import Path
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class LzmaScanner(AbstractScanner, ABC):
"""Implements lzma scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - lzma also xz"""
if data.startswith((b"\xFD7zXZ\x00", b"\x5D\x00\x00")):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts data from lzma archive and launches data_scan"""
try:
file_path = Path(data_provider.file_path)
new_path = file_path.as_posix()
if ".xz" == file_path.suffix:
new_path = new_path[:-3]
elif ".lzma" == file_path.suffix:
new_path = new_path[:-5]
lzma_content_provider = DataContentProvider(data=lzma.decompress(data_provider.data),
file_path=new_path,
file_type=Util.get_extension(new_path),
info=f"{data_provider.info}|LZMA:{file_path}")
new_limit = recursive_limit_size - len(lzma_content_provider.data)
lzma_candidates = self.recursive_scan(lzma_content_provider, depth, new_limit)
return lzma_candidates
except Exception as lzma_exc:
logger.warning("%s:%s", data_provider.file_path, lzma_exc)
return None
================================================
FILE: credsweeper/deep_scanner/mxfile_scanner.py
================================================
import logging
from abc import ABC
from typing import List, Optional
from bs4 import BeautifulSoup
from lxml import etree
from credsweeper.common.constants import MAX_LINE_LENGTH
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
logger = logging.getLogger(__name__)
class MxfileScanner(AbstractScanner, ABC):
"""Scanner for drawio diagram"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""Used to detect mxfile (drawio) format. Suppose, invocation of is_xml() was True before."""
mxfile_tag_pos = data.find(b"<mxfile", 0, MAX_LINE_LENGTH)
if 0 <= mxfile_tag_pos < data.find(b"</mxfile>", mxfile_tag_pos):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to get text data from the xml format"""
try:
lines = []
line_numbers = []
tree = etree.fromstring(data_provider.text)
for element in tree.iter():
if "mxCell" == getattr(element, "tag"):
line_number = element.sourceline
attr = getattr(element, "attrib")
if attr is None or not (value := attr.get("value")):
continue
if html := BeautifulSoup(value, features="html.parser"):
_, value_lines, __ = data_provider.simple_html_representation(html)
for line in value_lines:
lines.append(line)
line_numbers.append(line_number)
mxfile_data_provider = StringContentProvider(lines=lines,
line_numbers=line_numbers,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|MXFILE")
return self.scanner.scan(mxfile_data_provider)
except Exception as exc:
logger.warning(exc)
return None
================================================
FILE: credsweeper/deep_scanner/patch_scanner.py
================================================
import io
import logging
from abc import ABC
from typing import List, Optional
from credsweeper.common.constants import DiffRowType
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.patches_provider import PatchesProvider
logger = logging.getLogger(__name__)
class PatchScanner(AbstractScanner, ABC):
"""Implements .patch scanning"""
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan EML with text representation"""
try:
candidates: List[Candidate] = []
# common limitation
new_limit_size = recursive_limit_size - len(data_provider.data)
# ADDED
path_added = [(data_provider.file_path, io.BytesIO(data_provider.data))]
added_content_provider = PatchesProvider(path_added, change_type=DiffRowType.ADDED)
for added_file in added_content_provider.get_scannable_files(self.config):
added_candidates = self.scan(added_file, depth, new_limit_size)
candidates.extend(added_candidates)
# DELETED
path_deleted = [(data_provider.file_path, io.BytesIO(data_provider.data))]
deleted_content_provider = PatchesProvider(path_deleted, change_type=DiffRowType.DELETED)
for deleted_file in deleted_content_provider.get_scannable_files(self.config):
added_candidates = self.scan(deleted_file, depth, new_limit_size)
candidates.extend(added_candidates)
# update the line data for deep scan only
for i in candidates:
for line_data in i.line_data_list:
line_data.path = data_provider.file_path
line_data.info = f"{data_provider.info}|PATCH:{line_data.info}"
return candidates
except Exception as patch_exc:
logger.warning("%s:%s", data_provider.file_path, patch_exc)
return None
================================================
FILE: credsweeper/deep_scanner/pdf_scanner.py
================================================
import io
import logging
from abc import ABC
from typing import List, Optional
from pdfminer.high_level import extract_pages
from pdfminer.layout import LAParams, LTText, LTItem
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider, MIN_DATA_LEN
from credsweeper.file_handler.string_content_provider import StringContentProvider
logger = logging.getLogger(__name__)
class PdfScanner(AbstractScanner, ABC):
"""Implements pdf scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - pdf"""
if data.startswith(b"%PDF-"):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan PDF elements recursively and the whole text on page as strings"""
# PyPDF2 - https://github.com/py-pdf/pypdf/issues/1328 text in table is merged without spaces
# pdfminer.six - splits text in table to many lines. Allows to walk through elements
try:
candidates = []
for page in extract_pages(io.BytesIO(data_provider.data), laparams=LAParams()):
for element in page:
if isinstance(element, LTText):
element_text = element.get_text().strip()
if 0 < depth and element_text:
if MIN_DATA_LEN < len(element_text):
pdf_content_provider = DataContentProvider(
data=element_text.encode(),
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|PDF:{page.pageid}")
new_limit = recursive_limit_size - len(pdf_content_provider.data)
element_candidates = self.recursive_scan(pdf_content_provider, depth, new_limit)
candidates.extend(element_candidates)
else:
string_data_provider = StringContentProvider(lines=[element_text],
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|PDF:{page.pageid}")
pdf_candidates = self.scanner.scan(string_data_provider)
candidates.extend(pdf_candidates)
elif isinstance(element, LTItem):
pass
else:
logger.warning("Unsupported %s", element)
return candidates
except Exception as pdf_exc:
logger.warning("%s:%s", data_provider.file_path, pdf_exc)
return None
================================================
FILE: credsweeper/deep_scanner/pkcs_scanner.py
================================================
import base64
import logging
from abc import ABC
from typing import List, Optional, Union
from credsweeper.common.constants import Severity, Confidence
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class PkcsScanner(AbstractScanner, ABC):
"""Implements pkcs12 scanning"""
@staticmethod
def match(data: Union[bytes, bytearray]) -> int:
"""Matched ASN1 structure"""
return bool(Util.get_asn1_size(data))
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan PKCS12 to open with standard password"""
for pw_probe in self.config.bruteforce_list:
try:
password = pw_probe.encode() if pw_probe else None
if pkey := Util.load_pk(data_provider.data, password):
if not Util.check_pk(pkey):
logger.debug("False alarm %s", data_provider.info)
return []
candidate = Candidate.get_dummy_candidate(
self.config, #
data_provider.file_path, #
data_provider.file_type, #
info=f"{data_provider.info}|PKCS_PASSWORD:{repr(password)}", #
rule_name=f"PKCS with password {repr(pw_probe)}" if pw_probe else "PKCS without password")
candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
candidate.line_data_list[0].value = repr(password)
# high severity is assigned to private key rules
candidate.severity = Severity.HIGH
candidate.confidence = Confidence.STRONG
return [candidate]
except Exception as pkcs_exc:
logger.debug("%s:%s:%s", data_provider.file_path, pw_probe, pkcs_exc)
return None
================================================
FILE: credsweeper/deep_scanner/png_scanner.py
================================================
import logging
import struct
from abc import ABC
from typing import List, Optional, Generator, Tuple
from credsweeper.common.constants import LATIN_1, UTF_8
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
logger = logging.getLogger(__name__)
class PngScanner(AbstractScanner, ABC):
"""Implements PNG scanning for text chunks"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""Returns True if prefix match"""
if data.startswith(b"\x89PNG\r\n\x1a\n"):
return True
return False
@staticmethod
def yield_png_chunks(data: bytes) -> Generator[Tuple[int, str, bytes], None, None]:
"""Processes PNG chunks and yields offset, type and data"""
offset = 8 # b"\x89PNG\r\n\x1a\n"
data_limit = len(data) - 12
while offset <= data_limit:
chunk_size = struct.unpack(">I", data[offset:offset + 4])[0]
chunk_type = data[offset + 4:offset + 8]
offset += 8
if len(data) < offset + chunk_size:
raise ValueError(f"PNG chunk size {chunk_size} exceeds data limit 0x{offset:x}")
match chunk_type:
case b"IEND":
# https://www.w3.org/TR/png/#11IEND
break
case b"tEXt":
# https://www.w3.org/TR/png/#11tEXt
keyword, text_data = data[offset:offset + chunk_size].split(b'\0', 1)
yield offset, f"PNG_TEXT:{keyword.decode(encoding=LATIN_1, errors='strict')}", text_data
case b"zTXt":
# https://www.w3.org/TR/png/#11zTXt
keyword, ztxt_data = data[offset:offset + chunk_size].split(b'\0', 1)
if not ztxt_data.startswith(b'\0'):
raise ValueError(f"Unsupported compression method {ztxt_data[0]}")
yield offset, f"PNG_ZTXT:{keyword.decode(encoding=LATIN_1, errors='strict')}", ztxt_data[1:]
case b"iTXt":
# https://www.w3.org/TR/png/#11iTXt
keyword, itxt_data = data[offset:offset + chunk_size].split(b'\0', 1)
if itxt_data.startswith(b"\x00\x00"):
compression = False
elif itxt_data.startswith(b"\x01\x00"):
compression = True
else:
raise ValueError(f"Unsupported compression {repr(itxt_data[:2])}")
lang_tag, itxt_data = itxt_data[2:].split(b'\0', 1)
trans_key, itxt_data = itxt_data.split(b'\0', 1)
yield (offset, f"PNG_ITXT_{'1' if compression else '0'}"
f":{keyword.decode(encoding=UTF_8)}"
f":{lang_tag.decode(encoding=UTF_8)}"
f":{trans_key.decode(encoding=UTF_8)}", itxt_data)
case _:
pass
# skip crc verification
offset += chunk_size + 4
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan each row as structure with column name in key"""
try:
candidates: List[Candidate] = []
for offset, chunk_type, data in PngScanner.yield_png_chunks(data_provider.data):
png_content_provider = DataContentProvider(data=data,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|{chunk_type}:0x{offset:x}")
new_limit = recursive_limit_size - len(data)
png_candidates = self.recursive_scan(png_content_provider, depth, new_limit)
candidates.extend(png_candidates)
return candidates
except Exception as exc:
logger.warning(exc)
return None
================================================
FILE: credsweeper/deep_scanner/pptx_scanner.py
================================================
import io
import logging
from abc import ABC
from typing import List, Optional
from pptx import Presentation
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
logger = logging.getLogger(__name__)
class PptxScanner(AbstractScanner, ABC):
"""Implements pptx scanning"""
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Tries to scan pptx text elements for all slides"""
try:
candidates = []
pptx_lines = []
presentation = Presentation(io.BytesIO(data_provider.data))
for n, slide in enumerate(presentation.slides):
for shape in slide.shapes:
if shape.has_text_frame:
for paragraph in shape.text_frame.paragraphs:
pptx_lines.append(paragraph.text)
string_data_provider = StringContentProvider(lines=pptx_lines,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|PPTX:{n+1}")
pptx_candidates = self.scanner.scan(string_data_provider)
candidates.extend(pptx_candidates)
return candidates
except Exception as pptx_exc:
logger.warning("%s:%s", data_provider.file_path, pptx_exc)
return None
================================================
FILE: credsweeper/deep_scanner/rpm_scanner.py
================================================
import io
import logging
from abc import ABC
from typing import List, Optional
import rpmfile
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class RpmScanner(AbstractScanner, ABC):
"""Implements rpm scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
if data.startswith(b"\xED\xAB\xEE\xDB"):
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts files one by one from the package type and launches recursive scan"""
try:
candidates = []
with rpmfile.open(fileobj=io.BytesIO(data_provider.data)) as rpm_file:
for member in rpm_file.getmembers():
# skip directory
if 0 != member.isdir:
continue
if FilePathExtractor.check_exclude_file(self.config, member.name):
continue
if 0 > recursive_limit_size - member.size:
logger.warning("%s: size %s is over limit %s depth:%s", member.filename, member.size,
recursive_limit_size, depth)
continue
rpm_content_provider = DataContentProvider(data=rpm_file.extractfile(member).read(),
file_path=data_provider.file_path,
file_type=Util.get_extension(member.name),
info=f"{data_provider.info}|RPM:{member.name}")
new_limit = recursive_limit_size - len(rpm_content_provider.data)
rpm_candidates = self.recursive_scan(rpm_content_provider, depth, new_limit)
candidates.extend(rpm_candidates)
return candidates
except Exception as rpm_exc:
logger.warning("%s:%s", data_provider.file_path, rpm_exc)
return None
================================================
FILE: credsweeper/deep_scanner/rtf_scanner.py
================================================
import logging
from abc import ABC
from typing import List, Optional
from striprtf import striprtf
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class RtfScanner(AbstractScanner, ABC):
"""Implements squash file system scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - Rich Text Format"""
if data.startswith(b"{\\rtf1") and data.endswith(b"}"):
return True
return False
@staticmethod
def get_lines(text: str) -> List[str]:
"""Extracts text lines from RTF format"""
rtf_text = striprtf.rtf_to_text(text)
lines = Util.split_text(rtf_text)
return lines
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Scans data as RTF"""
try:
string_data_provider = StringContentProvider(lines=RtfScanner.get_lines(data_provider.text),
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|RTF")
rtf_candidates = self.scanner.scan(string_data_provider)
return rtf_candidates
except Exception as rtf_exc:
logger.warning("%s:%s", data_provider.file_path, rtf_exc)
return None
================================================
FILE: credsweeper/deep_scanner/sqlite3_scanner.py
================================================
import logging
import os.path
import sqlite3
import sys
import tempfile
from abc import ABC
from typing import List, Optional, Tuple, Any, Generator
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.struct_content_provider import StructContentProvider
logger = logging.getLogger(__name__)
class Sqlite3Scanner(AbstractScanner, ABC):
"""Implements SQLite3 database scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
if data.startswith(b"SQLite format 3\0"):
return True
return False
@staticmethod
def __walk(sqlite3db) -> Generator[Tuple[str, Any], None, None]:
sqlite3db.row_factory = sqlite3.Row
cursor = sqlite3db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
for table in cursor.fetchall():
table_name = table[0]
try:
cursor.execute(f"SELECT * FROM {table_name}")
for row in cursor:
yield table_name, dict(row)
except sqlite3.DatabaseError as exc:
print(f"Error reading table {table_name}: {exc}")
@staticmethod
def walk_sqlite(data: bytes) -> Generator[Tuple[str, Any], None, None]:
"""Yields data from sqlite3 database"""
if 10 < sys.version_info.minor:
# Added in version 3.11
with sqlite3.connect(":memory:") as sqlite3db:
sqlite3db.deserialize(data) # type: ignore
yield from Sqlite3Scanner.__walk(sqlite3db)
elif "nt" != os.name:
# a tmpfile has to be used. TODO: remove when 3.10 will deprecate
with tempfile.NamedTemporaryFile(suffix=".sqlite") as t:
t.write(data)
t.flush()
with sqlite3.connect(t.name) as sqlite3db:
yield from Sqlite3Scanner.__walk(sqlite3db)
elif "nt" == os.name:
# windows trick. TODO: remove when 3.10 will deprecate
with tempfile.NamedTemporaryFile(delete=False, suffix=".sqlite") as t:
t.write(data)
t.flush()
sqlite3db = sqlite3.connect(t.name)
yield from Sqlite3Scanner.__walk(sqlite3db)
sqlite3db.close()
if os.path.exists(t.name):
os.remove(t.name)
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts data file from .ar (debian) archive and launches data_scan"""
try:
candidates: List[Candidate] = []
new_limit = recursive_limit_size - len(data_provider.data)
for table, row in self.walk_sqlite(data_provider.data):
struct_content_provider = StructContentProvider(struct=row,
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|SQLite3.{table}")
if new_candidates := self.structure_scan(struct_content_provider, depth, new_limit):
candidates.extend(new_candidates)
return candidates
except Exception as exc:
logger.warning(exc)
return None
================================================
FILE: credsweeper/deep_scanner/strings_scanner.py
================================================
import logging
from abc import ABC
from typing import List, Optional, Tuple
from credsweeper.common.constants import MIN_DATA_LEN
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
logger = logging.getLogger(__name__)
class StringsScanner(AbstractScanner, ABC):
"""Implements known binary file scanning with ASCII strings representations"""
@staticmethod
def get_enumerated_lines(data: bytes) -> List[Tuple[int, str]]:
"""Processes binary to found ASCII strings. Use offset instead line number."""
enumerated_lines = []
offset = -1
line_items = []
for n, x in enumerate(data):
if 0x09 == x or 0x20 <= x <= 0x7E:
# TAB, SPACE and visible ASCII symbols
if 0 > offset:
# use start of string as line number
offset = n
line_items.append(chr(x))
continue
if MIN_DATA_LEN <= len(line_items):
# add valuable lines only
enumerated_lines.append((offset, ''.join(line_items)))
offset = -1
line_items.clear()
if MIN_DATA_LEN <= len(line_items):
enumerated_lines.append((offset, ''.join(line_items)))
return enumerated_lines
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Scan binary files for ASCII strings"""
if strings := StringsScanner.get_enumerated_lines(data_provider.data):
string_data_provider = StringContentProvider(lines=[x[1] for x in strings],
line_numbers=[x[0] for x in strings],
file_path=data_provider.file_path,
file_type=data_provider.file_type,
info=f"{data_provider.info}|STRINGS")
return self.scanner.scan(string_data_provider)
return None if strings is None else []
================================================
FILE: credsweeper/deep_scanner/tar_scanner.py
================================================
import contextlib
import io
import logging
import tarfile
from abc import ABC
from typing import List, Optional
from credsweeper.credentials.candidate import Candidate
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
from credsweeper.file_handler.data_content_provider import DataContentProvider
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
from credsweeper.utils.util import Util
logger = logging.getLogger(__name__)
class TarScanner(AbstractScanner, ABC):
"""Implements tar scanning"""
@staticmethod
def match(data: bytes | bytearray) -> bool:
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
if 512 <= len(data) and 257 == data.find(b"\x75\x73\x74\x61\x72", 257, 262) \
and (262 == data.find(b"\x00\x30\x30", 262, 265)
or 262 == data.find(b"\x20\x20\x00", 262, 265)):
with contextlib.suppress(Exception):
chksum = tarfile.nti(data[148:156]) # type: ignore
unsigned_chksum, signed_chksum = tarfile.calc_chksums(data) # type: ignore
if chksum == unsigned_chksum or chksum == signed_chksum:
return True
return False
def data_scan(
self, #
data_provider: DataContentProvider, #
depth: int, #
recursive_limit_size: int) -> Optional[List[Candidate]]:
"""Extracts files one by
gitextract_f9me649i/
├── LICENSE
├── README.md
├── SECURITY.md
├── action.yml
├── credsweeper/
│ ├── __init__.py
│ ├── __main__.py
│ ├── app.py
│ ├── common/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── keyword_checklist.py
│ │ ├── keyword_checklist.txt
│ │ ├── keyword_pattern.py
│ │ └── morpheme_checklist.txt
│ ├── config/
│ │ ├── __init__.py
│ │ └── config.py
│ ├── credentials/
│ │ ├── __init__.py
│ │ ├── augment_candidates.py
│ │ ├── candidate.py
│ │ ├── candidate_group_generator.py
│ │ ├── candidate_key.py
│ │ ├── credential_manager.py
│ │ └── line_data.py
│ ├── deep_scanner/
│ │ ├── __init__.py
│ │ ├── abstract_scanner.py
│ │ ├── byte_scanner.py
│ │ ├── bzip2_scanner.py
│ │ ├── crx_scanner.py
│ │ ├── csv_scanner.py
│ │ ├── deb_scanner.py
│ │ ├── deep_scanner.py
│ │ ├── docx_scanner.py
│ │ ├── eml_scanner.py
│ │ ├── encoder_scanner.py
│ │ ├── gzip_scanner.py
│ │ ├── html_scanner.py
│ │ ├── jclass_scanner.py
│ │ ├── jks_scanner.py
│ │ ├── lang_scanner.py
│ │ ├── lzma_scanner.py
│ │ ├── mxfile_scanner.py
│ │ ├── patch_scanner.py
│ │ ├── pdf_scanner.py
│ │ ├── pkcs_scanner.py
│ │ ├── png_scanner.py
│ │ ├── pptx_scanner.py
│ │ ├── rpm_scanner.py
│ │ ├── rtf_scanner.py
│ │ ├── sqlite3_scanner.py
│ │ ├── strings_scanner.py
│ │ ├── tar_scanner.py
│ │ ├── tmx_scanner.py
│ │ ├── xlsx_scanner.py
│ │ ├── xml_scanner.py
│ │ ├── zip_scanner.py
│ │ └── zlib_scanner.py
│ ├── file_handler/
│ │ ├── __init__.py
│ │ ├── abstract_provider.py
│ │ ├── analysis_target.py
│ │ ├── byte_content_provider.py
│ │ ├── content_provider.py
│ │ ├── data_content_provider.py
│ │ ├── descriptor.py
│ │ ├── diff_content_provider.py
│ │ ├── file_path_extractor.py
│ │ ├── files_provider.py
│ │ ├── patches_provider.py
│ │ ├── string_content_provider.py
│ │ ├── struct_content_provider.py
│ │ └── text_content_provider.py
│ ├── filters/
│ │ ├── __init__.py
│ │ ├── filter.py
│ │ ├── group/
│ │ │ ├── __init__.py
│ │ │ ├── general_keyword.py
│ │ │ ├── general_pattern.py
│ │ │ ├── group.py
│ │ │ ├── password_keyword.py
│ │ │ ├── token_pattern.py
│ │ │ ├── url_credentials_group.py
│ │ │ ├── weird_base36_token.py
│ │ │ └── weird_base64_token.py
│ │ ├── line_git_binary_check.py
│ │ ├── line_specific_key_check.py
│ │ ├── line_uue_part_check.py
│ │ ├── value_allowlist_check.py
│ │ ├── value_array_dictionary_check.py
│ │ ├── value_atlassian_token_check.py
│ │ ├── value_azure_token_check.py
│ │ ├── value_base32_data_check.py
│ │ ├── value_base64_data_check.py
│ │ ├── value_base64_encoded_pem_check.py
│ │ ├── value_base64_key_check.py
│ │ ├── value_base64_part_check.py
│ │ ├── value_basic_auth_check.py
│ │ ├── value_blocklist_check.py
│ │ ├── value_camel_case_check.py
│ │ ├── value_dictionary_keyword_check.py
│ │ ├── value_discord_bot_check.py
│ │ ├── value_entropy_base32_check.py
│ │ ├── value_entropy_base36_check.py
│ │ ├── value_entropy_base64_check.py
│ │ ├── value_entropy_base_check.py
│ │ ├── value_file_path_check.py
│ │ ├── value_github_check.py
│ │ ├── value_grafana_check.py
│ │ ├── value_grafana_service_check.py
│ │ ├── value_hex_number_check.py
│ │ ├── value_jfrog_token_check.py
│ │ ├── value_json_web_key_check.py
│ │ ├── value_json_web_token_check.py
│ │ ├── value_last_word_check.py
│ │ ├── value_length_check.py
│ │ ├── value_method_check.py
│ │ ├── value_morphemes_check.py
│ │ ├── value_not_allowed_pattern_check.py
│ │ ├── value_not_part_encoded_check.py
│ │ ├── value_number_check.py
│ │ ├── value_pattern_check.py
│ │ ├── value_sealed_secret_check.py
│ │ ├── value_search_check.py
│ │ ├── value_similarity_check.py
│ │ ├── value_split_keyword_check.py
│ │ ├── value_string_type_check.py
│ │ ├── value_token_base32_check.py
│ │ ├── value_token_base36_check.py
│ │ ├── value_token_base64_check.py
│ │ ├── value_token_base_check.py
│ │ └── value_token_check.py
│ ├── logger/
│ │ ├── __init__.py
│ │ └── logger.py
│ ├── main.py
│ ├── ml_model/
│ │ ├── __init__.py
│ │ ├── features/
│ │ │ ├── __init__.py
│ │ │ ├── entropy_evaluation.py
│ │ │ ├── feature.py
│ │ │ ├── file_extension.py
│ │ │ ├── has_html_tag.py
│ │ │ ├── is_secret_numeric.py
│ │ │ ├── length_of_attribute.py
│ │ │ ├── morpheme_dense.py
│ │ │ ├── rule_name.py
│ │ │ ├── rule_severity.py
│ │ │ ├── search_in_attribute.py
│ │ │ ├── word_in.py
│ │ │ ├── word_in_path.py
│ │ │ ├── word_in_postamble.py
│ │ │ ├── word_in_preamble.py
│ │ │ ├── word_in_transition.py
│ │ │ ├── word_in_value.py
│ │ │ └── word_in_variable.py
│ │ ├── ml_config.json
│ │ ├── ml_model.onnx
│ │ └── ml_validator.py
│ ├── py.typed
│ ├── rules/
│ │ ├── __init__.py
│ │ ├── config.yaml
│ │ └── rule.py
│ ├── scanner/
│ │ ├── __init__.py
│ │ ├── scan_type/
│ │ │ ├── __init__.py
│ │ │ ├── multi_pattern.py
│ │ │ ├── pem_key_pattern.py
│ │ │ ├── scan_type.py
│ │ │ └── single_pattern.py
│ │ └── scanner.py
│ ├── secret/
│ │ ├── config.json
│ │ └── log.yaml
│ └── utils/
│ ├── __init__.py
│ ├── hop_stat.py
│ ├── pem_key_detector.py
│ └── util.py
├── docs/
│ ├── Makefile
│ ├── README.md
│ ├── howto/
│ │ └── how-to-contribute.md
│ ├── make.bat
│ ├── requirements.txt
│ └── source/
│ ├── api.rst
│ ├── apps_config.rst
│ ├── conf.py
│ ├── credsweeper.common.rst
│ ├── credsweeper.config.rst
│ ├── credsweeper.credentials.rst
│ ├── credsweeper.deep_scanner.rst
│ ├── credsweeper.file_handler.rst
│ ├── credsweeper.filters.group.rst
│ ├── credsweeper.filters.rst
│ ├── credsweeper.logger.rst
│ ├── credsweeper.ml_model.features.rst
│ ├── credsweeper.ml_model.rst
│ ├── credsweeper.rst
│ ├── credsweeper.rules.rst
│ ├── credsweeper.scanner.rst
│ ├── credsweeper.scanner.scan_type.rst
│ ├── credsweeper.utils.rst
│ ├── develop.rst
│ ├── guide.rst
│ ├── how_to_contribute.rst
│ ├── index.rst
│ ├── install.rst
│ ├── overall_architecture.rst
│ └── rules_config.rst
├── experiment/
│ ├── README.md
│ ├── __init__.py
│ ├── data_loader.py
│ ├── evaluate_model.py
│ ├── features.py
│ ├── hyperparameters.py
│ ├── log_callback.py
│ ├── main.py
│ ├── main.sh
│ ├── ml_model.py
│ ├── model_config_preprocess.py
│ ├── plot.py
│ ├── prepare_data.py
│ ├── requirements.txt
│ ├── tf2onnx/
│ │ └── tf2onnx.sh
│ ├── tools/
│ │ ├── base64_test.py
│ │ ├── entropy_test.py
│ │ ├── morpheme_test.py
│ │ └── strength_test.py
│ └── train.py
├── fuzz/
│ ├── README.md
│ ├── __main__.py
│ ├── auxilary.py
│ ├── coveraging.sh
│ ├── fuzzing.sh
│ ├── minimizing.sh
│ ├── re-fuzzing.sh
│ ├── reducing.sh
│ └── requirements.txt
├── pyproject.toml
├── pytest.ini
├── requirements.txt
└── tests/
├── README.md
├── __init__.py
├── common/
│ ├── __init__.py
│ ├── test_confidence.py
│ ├── test_keyword_checklist.py
│ ├── test_keyword_pattern.py
│ ├── test_regex.py
│ └── test_severity.py
├── config/
│ ├── __init__.py
│ └── test_config.py
├── conftest.py
├── credentials/
│ ├── __init__.py
│ ├── test_augment_candidates.py
│ ├── test_credential_manager.py
│ └── test_line_data.py
├── data/
│ ├── __init__.py
│ ├── depth_3_pedantic.json
│ ├── doc.json
│ ├── no_filters_no_ml.json
│ ├── no_ml.json
│ └── output.json
├── deep_scanner/
│ ├── __init__.py
│ ├── test_abstract_scanner.py
│ ├── test_bzip2_scanner.py
│ ├── test_crx_scanner.py
│ ├── test_csv_scanner.py
│ ├── test_deb_scanner.py
│ ├── test_deep_scanner.py
│ ├── test_eml_scanner.py
│ ├── test_encoder_scanner.py
│ ├── test_gzip_scanner.py
│ ├── test_html_scanner.py
│ ├── test_jclass_scanner.py
│ ├── test_jks_scanner.py
│ ├── test_lzma_scanner.py
│ ├── test_mxfile_scanner.py
│ ├── test_pdf_scanner.py
│ ├── test_png_scanner.py
│ ├── test_rtf_scanner.py
│ ├── test_sqlite3_scanner.py
│ ├── test_strings_scanner.py
│ ├── test_struct_scanner.py
│ ├── test_tar_scanner.py
│ ├── test_tmx_scanner.py
│ ├── test_xml_scanner.py
│ ├── test_zip_scanner.py
│ └── test_zlib_scanner.py
├── file_handler/
│ ├── __init__.py
│ ├── test_byte_content_provider.py
│ ├── test_data_content_provider.py
│ ├── test_diff_content_provider.py
│ ├── test_file_path_extractor.py
│ ├── test_files_provider.py
│ ├── test_patches_provider.py
│ ├── test_string_content_provider.py
│ ├── test_struct_content_provider.py
│ ├── test_text_content_provider.py
│ ├── zip_bomb_1.py
│ └── zip_bomb_2.py
├── filters/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_line_git_binary_check.py
│ ├── test_line_specific_key_check.py
│ ├── test_line_uue_part_check.py
│ ├── test_value_allowlist_check.py
│ ├── test_value_array_dictionary_check.py
│ ├── test_value_atlassian_token_check.py
│ ├── test_value_azure_token_check.py
│ ├── test_value_base32_data_check.py
│ ├── test_value_base64_data_check.py
│ ├── test_value_base64_key_check.py
│ ├── test_value_base64_part_check.py
│ ├── test_value_basic_auth_check.py
│ ├── test_value_blocklist_check.py
│ ├── test_value_camel_case_check.py
│ ├── test_value_dictionary_keyword_check.py
│ ├── test_value_entropy_base32_check.py
│ ├── test_value_entropy_base36_check.py
│ ├── test_value_entropy_base64_check.py
│ ├── test_value_file_path_check.py
│ ├── test_value_github_check.py
│ ├── test_value_grafana_check.py
│ ├── test_value_grafana_service_check.py
│ ├── test_value_hex_number_check.py
│ ├── test_value_json_web_key_check.py
│ ├── test_value_json_web_token_check.py
│ ├── test_value_last_word_check.py
│ ├── test_value_length_check.py
│ ├── test_value_method_check.py
│ ├── test_value_morphemes_check.py
│ ├── test_value_not_allowed_pattern.py
│ ├── test_value_not_part_encoded.py
│ ├── test_value_number_check.py
│ ├── test_value_pattern_check.py
│ ├── test_value_sealed_secret_check.py
│ ├── test_value_search_check.py
│ ├── test_value_similarity_check.py
│ ├── test_value_split_keyword_check.py
│ ├── test_value_string_type_check.py
│ ├── test_value_token_base32_check.py
│ ├── test_value_token_base36_check.py
│ ├── test_value_token_base64_check.py
│ └── test_value_token_check.py
├── ml_model/
│ ├── __init__.py
│ ├── test_features.py
│ └── test_ml_validator.py
├── rules/
│ ├── __init__.py
│ ├── common.py
│ ├── test_api.py
│ ├── test_auth.py
│ ├── test_aws_key.py
│ ├── test_aws_multi.py
│ ├── test_aws_mws_key.py
│ ├── test_credential.py
│ ├── test_dynatrace_api_token.py
│ ├── test_facebook_key.py
│ ├── test_firebase_domain.py
│ ├── test_github_classic_token.py
│ ├── test_github_fine_granted_token.py
│ ├── test_google_api_key.py
│ ├── test_google_multi.py
│ ├── test_google_oauth_key.py
│ ├── test_instagram_access_token.py
│ ├── test_jwt.py
│ ├── test_key.py
│ ├── test_mailchimp_key.py
│ ├── test_nonce.py
│ ├── test_password.py
│ ├── test_paypal_key.py
│ ├── test_pem_key.py
│ ├── test_picatic_key.py
│ ├── test_pypi_api_token.py
│ ├── test_rule.py
│ ├── test_salt.py
│ ├── test_secret.py
│ ├── test_sendgrid_api_key_token.py
│ ├── test_shopify_token.py
│ ├── test_slack_token.py
│ ├── test_slack_webhook.py
│ ├── test_square_access_token.py
│ ├── test_telegram_bot_api_token.py
│ ├── test_token.py
│ └── test_url_credentials.py
├── scanner/
│ ├── __init__.py
│ └── scan_type/
│ ├── __init__.py
│ ├── test_multipattern.py
│ └── test_pem_key_pattern.py
├── test_app.py
├── test_doc.py
├── test_git.py
├── test_main.py
├── test_utils/
│ ├── __init__.py
│ └── dummy_line_data.py
└── utils/
├── __init__.py
├── test_hop_stat.py
└── test_util.py
SYMBOL INDEX (1456 symbols across 291 files)
FILE: credsweeper/app.py
class CredSweeper (line 30) | class CredSweeper:
method __init__ (line 42) | def __init__(self,
method _get_config_path (line 138) | def _get_config_path(config_path: Optional[str]) -> Path:
method _get_config_dict (line 145) | def _get_config_dict(
method _use_ml_validation (line 175) | def _use_ml_validation(self) -> bool:
method ml_validator (line 192) | def ml_validator(self) -> MlValidator:
method pool_initializer (line 208) | def pool_initializer(log_kwargs) -> None:
method run (line 215) | def run(self, content_provider: AbstractProvider) -> int:
method scan (line 236) | def scan(self, content_providers: Sequence[ContentProvider]) -> None:
method __single_job_scan (line 250) | def __single_job_scan(self, content_providers: Sequence[ContentProvide...
method __multi_jobs_scan (line 258) | def __multi_jobs_scan(self, content_providers: Sequence[ContentProvide...
method files_scan (line 287) | def files_scan(self, content_providers: Sequence[ContentProvider]) -> ...
method file_scan (line 300) | def file_scan(self, content_provider: ContentProvider) -> List[Candida...
method post_processing (line 334) | def post_processing(self) -> None:
method export_results (line 372) | def export_results(self, change_type: Optional[DiffRowType] = None) ->...
FILE: credsweeper/common/constants.py
class Severity (line 7) | class Severity(Enum):
method __lt__ (line 15) | def __lt__(self, other) -> bool:
method get (line 27) | def get(severity: Union[str, "Severity"]) -> Optional["Severity"]:
class Confidence (line 38) | class Confidence(Enum):
method __lt__ (line 44) | def __lt__(self, other) -> bool:
method get (line 52) | def get(confidence: Union[str, "Confidence"]) -> Optional["Confidence"]:
class Chars (line 66) | class Chars(Enum):
class GroupType (line 99) | class GroupType(Enum):
class RuleType (line 107) | class RuleType(Enum):
class ThresholdPreset (line 119) | class ThresholdPreset(Enum):
class DiffRowType (line 128) | class DiffRowType(Enum):
FILE: credsweeper/common/keyword_checklist.py
class KeywordChecklist (line 7) | class KeywordChecklist:
method __init__ (line 14) | def __init__(self) -> None:
method keyword_set (line 24) | def keyword_set(self) -> Set[str]:
method keyword_list (line 29) | def keyword_list(self) -> List[str]:
method keyword_len (line 34) | def keyword_len(self) -> int:
method morpheme_set (line 39) | def morpheme_set(self) -> Set[str]:
method morpheme_len (line 49) | def morpheme_len(self) -> int:
method check_morphemes (line 53) | def check_morphemes(self, line_lower: str, threshold: int) -> bool:
FILE: credsweeper/common/keyword_pattern.py
class KeywordPattern (line 4) | class KeywordPattern:
method get_keyword_pattern (line 66) | def get_keyword_pattern(cls, keyword: str) -> re.Pattern:
FILE: credsweeper/config/config.py
class Config (line 10) | class Config:
method __init__ (line 18) | def __init__(self, config: Dict[str, Any]) -> None:
FILE: credsweeper/credentials/augment_candidates.py
function augment_candidates (line 6) | def augment_candidates(candidates: List[Candidate], new_candidates: List...
FILE: credsweeper/credentials/candidate.py
class Candidate (line 11) | class Candidate:
method __init__ (line 28) | def __init__(self,
method compare (line 47) | def compare(self, other: 'Candidate') -> bool:
method _encode (line 65) | def _encode(value: Any) -> Any:
method to_str (line 75) | def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
method __str__ (line 83) | def __str__(self):
method __repr__ (line 86) | def __repr__(self):
method to_json (line 89) | def to_json(self, hashed: bool, subtext: bool) -> Dict:
method to_dict_list (line 112) | def to_dict_list(self, hashed: bool, subtext: bool) -> List[dict]:
method get_dummy_candidate (line 131) | def get_dummy_candidate(cls, config: Config, file_path: str, file_type...
FILE: credsweeper/credentials/candidate_group_generator.py
class CandidateGroupGenerator (line 7) | class CandidateGroupGenerator:
method __init__ (line 10) | def __init__(self) -> None:
method grouped_candidates (line 14) | def grouped_candidates(self) -> Dict[CandidateKey, List[Candidate]]:
method grouped_candidates (line 19) | def grouped_candidates(self, grouped_candidates: Dict[CandidateKey, Li...
method __contains__ (line 23) | def __contains__(self, key: CandidateKey) -> bool:
method __getitem__ (line 26) | def __getitem__(self, key) -> List[Candidate]:
method __setitem__ (line 29) | def __setitem__(self, key: CandidateKey, value: List[Candidate]) -> None:
method __len__ (line 32) | def __len__(self) -> int:
method items (line 35) | def items(self) -> List[Tuple[CandidateKey, List[Candidate]]]:
FILE: credsweeper/credentials/candidate_key.py
class CandidateKey (line 6) | class CandidateKey:
method __init__ (line 12) | def __init__(self, line_data: LineData):
method __hash__ (line 20) | def __hash__(self):
method __eq__ (line 23) | def __eq__(self, other):
method __ne__ (line 26) | def __ne__(self, other):
method __repr__ (line 29) | def __repr__(self) -> str:
FILE: credsweeper/credentials/credential_manager.py
class CredentialManager (line 11) | class CredentialManager:
method __init__ (line 14) | def __init__(self) -> None:
method clear_credentials (line 17) | def clear_credentials(self) -> None:
method len_credentials (line 21) | def len_credentials(self) -> int:
method get_credentials (line 30) | def get_credentials(self) -> List[Candidate]:
method set_credentials (line 39) | def set_credentials(self, candidates: List[Candidate]) -> None:
method add_credential (line 48) | def add_credential(self, candidate: Candidate) -> None:
method remove_credential (line 57) | def remove_credential(self, candidate: Candidate) -> None:
method purge_duplicates (line 66) | def purge_duplicates(self) -> int:
method group_credentials (line 98) | def group_credentials(self) -> CandidateGroupGenerator:
FILE: credsweeper/credentials/line_data.py
class LineData (line 15) | class LineData:
method __init__ (line 51) | def __init__(
method compare (line 93) | def compare(self, other: 'LineData') -> bool:
method initialize (line 104) | def initialize(self, match_obj: Optional[re.Match] = None) -> None:
method sanitize_value (line 137) | def sanitize_value(self):
method check_url_part (line 171) | def check_url_part(self) -> bool:
method clean_url_parameters (line 192) | def clean_url_parameters(self) -> None:
method clean_bash_parameters (line 207) | def clean_bash_parameters(self) -> None:
method clean_toml_parameters (line 220) | def clean_toml_parameters(self) -> None:
method clean_tag_parameters (line 233) | def clean_tag_parameters(self) -> None:
method sanitize_variable (line 248) | def sanitize_variable(self) -> None:
method is_comment (line 265) | def is_comment(self) -> bool:
method is_well_quoted_value (line 279) | def is_well_quoted_value(self) -> bool:
method is_quoted (line 321) | def is_quoted(self) -> bool:
method is_source_file (line 343) | def is_source_file(self) -> bool:
method is_source_file_with_quotes (line 356) | def is_source_file_with_quotes(self) -> bool:
method get_hash_or_subtext (line 367) | def get_hash_or_subtext(
method to_str (line 403) | def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
method __str__ (line 411) | def __str__(self):
method __repr__ (line 414) | def __repr__(self):
method to_json (line 417) | def to_json(self, hashed: bool, subtext: bool) -> Dict:
method get_colored_line (line 454) | def get_colored_line(self, hashed: bool, subtext: bool = False) -> str:
FILE: credsweeper/deep_scanner/abstract_scanner.py
class AbstractScanner (line 26) | class AbstractScanner(ABC):
method config (line 31) | def config(self) -> Config:
method scanner (line 37) | def scanner(self) -> Scanner:
method data_scan (line 42) | def data_scan(
method get_deep_scanners (line 52) | def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int)...
method recursive_scan (line 58) | def recursive_scan(
method key_value_combination (line 99) | def key_value_combination(structure: dict) -> Generator[Tuple[Any, Any...
method structure_processing (line 127) | def structure_processing(structure: Any) -> Generator[Tuple[Any, Any],...
method structure_scan (line 152) | def structure_scan(
method deep_scan_with_fallback (line 234) | def deep_scan_with_fallback(self, data_provider: DataContentProvider, ...
method scan (line 269) | def scan(self,
FILE: credsweeper/deep_scanner/byte_scanner.py
class ByteScanner (line 13) | class ByteScanner(AbstractScanner, ABC):
method data_scan (line 16) | def data_scan(
FILE: credsweeper/deep_scanner/bzip2_scanner.py
class Bzip2Scanner (line 15) | class Bzip2Scanner(AbstractScanner, ABC):
method match (line 19) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 27) | def data_scan(
FILE: credsweeper/deep_scanner/crx_scanner.py
class CrxScanner (line 13) | class CrxScanner(AbstractScanner, ABC):
method match (line 17) | def match(data: bytes | bytearray) -> bool:
method zip_extract (line 24) | def zip_extract(data: bytes) -> bytes:
method data_scan (line 31) | def data_scan(
FILE: credsweeper/deep_scanner/csv_scanner.py
class CsvScanner (line 17) | class CsvScanner(AbstractScanner, ABC):
method match (line 27) | def match(data: bytes | bytearray) -> bool:
method get_structure (line 39) | def get_structure(cls, text: str) -> List[Dict[str, Any]]:
method data_scan (line 69) | def data_scan(
FILE: credsweeper/deep_scanner/deb_scanner.py
class DebScanner (line 15) | class DebScanner(AbstractScanner, ABC):
method match (line 21) | def match(data: bytes | bytearray) -> bool:
method walk_deb (line 28) | def walk_deb(data: bytes) -> Generator[Tuple[int, str, bytes], None, N...
method data_scan (line 43) | def data_scan(
FILE: credsweeper/deep_scanner/deep_scanner.py
class DeepScanner (line 44) | class DeepScanner(
method __init__ (line 77) | def __init__(self, config: Config, scanner: Scanner) -> None:
method config (line 88) | def config(self) -> Config:
method scanner (line 92) | def scanner(self) -> Scanner:
method is_media (line 187) | def is_media(data: Union[bytes, bytearray]) -> bool:
method get_deep_scanners (line 197) | def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int)...
FILE: credsweeper/deep_scanner/docx_scanner.py
class DocxScanner (line 22) | class DocxScanner(AbstractScanner, ABC):
method _iter_block_items (line 26) | def _iter_block_items(block):
method data_scan (line 65) | def data_scan(
FILE: credsweeper/deep_scanner/eml_scanner.py
class EmlScanner (line 15) | class EmlScanner(AbstractScanner, ABC):
method match (line 19) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 28) | def data_scan(
FILE: credsweeper/deep_scanner/encoder_scanner.py
class EncoderScanner (line 19) | class EncoderScanner(AbstractScanner, ABC):
method match (line 28) | def match(data: bytes | bytearray) -> bool:
method decode (line 36) | def decode(text: str) -> Optional[bytes]:
method data_scan (line 44) | def data_scan(
FILE: credsweeper/deep_scanner/gzip_scanner.py
class GzipScanner (line 16) | class GzipScanner(AbstractScanner, ABC):
method match (line 20) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 26) | def data_scan(
FILE: credsweeper/deep_scanner/html_scanner.py
class HtmlScanner (line 14) | class HtmlScanner(AbstractScanner, ABC):
method match (line 18) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 30) | def data_scan(
FILE: credsweeper/deep_scanner/jclass_scanner.py
class JclassScanner (line 16) | class JclassScanner(AbstractScanner, ABC):
method match (line 20) | def match(data: bytes | bytearray) -> bool:
method u2 (line 27) | def u2(stream: io.BytesIO) -> int:
method get_utf8_constants (line 32) | def get_utf8_constants(stream: io.BytesIO) -> List[str]:
method data_scan (line 62) | def data_scan(
FILE: credsweeper/deep_scanner/jks_scanner.py
class JksScanner (line 15) | class JksScanner(AbstractScanner, ABC):
method match (line 19) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 25) | def data_scan(
FILE: credsweeper/deep_scanner/lang_scanner.py
class LangScanner (line 13) | class LangScanner(AbstractScanner, ABC):
method data_scan (line 16) | def data_scan(
FILE: credsweeper/deep_scanner/lzma_scanner.py
class LzmaScanner (line 15) | class LzmaScanner(AbstractScanner, ABC):
method match (line 19) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 25) | def data_scan(
FILE: credsweeper/deep_scanner/mxfile_scanner.py
class MxfileScanner (line 17) | class MxfileScanner(AbstractScanner, ABC):
method match (line 21) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 28) | def data_scan(
FILE: credsweeper/deep_scanner/patch_scanner.py
class PatchScanner (line 15) | class PatchScanner(AbstractScanner, ABC):
method data_scan (line 18) | def data_scan(
FILE: credsweeper/deep_scanner/pdf_scanner.py
class PdfScanner (line 17) | class PdfScanner(AbstractScanner, ABC):
method match (line 21) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 27) | def data_scan(
FILE: credsweeper/deep_scanner/pkcs_scanner.py
class PkcsScanner (line 15) | class PkcsScanner(AbstractScanner, ABC):
method match (line 19) | def match(data: Union[bytes, bytearray]) -> int:
method data_scan (line 23) | def data_scan(
FILE: credsweeper/deep_scanner/png_scanner.py
class PngScanner (line 14) | class PngScanner(AbstractScanner, ABC):
method match (line 18) | def match(data: bytes | bytearray) -> bool:
method yield_png_chunks (line 25) | def yield_png_chunks(data: bytes) -> Generator[Tuple[int, str, bytes],...
method data_scan (line 70) | def data_scan(
FILE: credsweeper/deep_scanner/pptx_scanner.py
class PptxScanner (line 16) | class PptxScanner(AbstractScanner, ABC):
method data_scan (line 19) | def data_scan(
FILE: credsweeper/deep_scanner/rpm_scanner.py
class RpmScanner (line 17) | class RpmScanner(AbstractScanner, ABC):
method match (line 21) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 27) | def data_scan(
FILE: credsweeper/deep_scanner/rtf_scanner.py
class RtfScanner (line 16) | class RtfScanner(AbstractScanner, ABC):
method match (line 20) | def match(data: bytes | bytearray) -> bool:
method get_lines (line 27) | def get_lines(text: str) -> List[str]:
method data_scan (line 33) | def data_scan(
FILE: credsweeper/deep_scanner/sqlite3_scanner.py
class Sqlite3Scanner (line 17) | class Sqlite3Scanner(AbstractScanner, ABC):
method match (line 21) | def match(data: bytes | bytearray) -> bool:
method __walk (line 28) | def __walk(sqlite3db) -> Generator[Tuple[str, Any], None, None]:
method walk_sqlite (line 42) | def walk_sqlite(data: bytes) -> Generator[Tuple[str, Any], None, None]:
method data_scan (line 67) | def data_scan(
FILE: credsweeper/deep_scanner/strings_scanner.py
class StringsScanner (line 14) | class StringsScanner(AbstractScanner, ABC):
method get_enumerated_lines (line 18) | def get_enumerated_lines(data: bytes) -> List[Tuple[int, str]]:
method data_scan (line 40) | def data_scan(
FILE: credsweeper/deep_scanner/tar_scanner.py
class TarScanner (line 17) | class TarScanner(AbstractScanner, ABC):
method match (line 21) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 33) | def data_scan(
FILE: credsweeper/deep_scanner/tmx_scanner.py
class TmxScanner (line 17) | class TmxScanner(AbstractScanner, ABC):
method match (line 21) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 31) | def data_scan(
FILE: credsweeper/deep_scanner/xlsx_scanner.py
class XlsxScanner (line 18) | class XlsxScanner(AbstractScanner, ABC):
method match (line 22) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 29) | def data_scan(
FILE: credsweeper/deep_scanner/xml_scanner.py
class XmlScanner (line 15) | class XmlScanner(AbstractScanner, ABC):
method match (line 23) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 33) | def data_scan(
FILE: credsweeper/deep_scanner/zip_scanner.py
class ZipScanner (line 16) | class ZipScanner(AbstractScanner, ABC):
method match (line 20) | def match(data: bytes | bytearray) -> bool:
method data_scan (line 34) | def data_scan(
FILE: credsweeper/deep_scanner/zlib_scanner.py
class ZlibScanner (line 13) | class ZlibScanner(AbstractScanner, ABC):
method match (line 17) | def match(data: bytes | bytearray) -> bool:
method decompress (line 29) | def decompress(limit: int, data: bytes) -> bytes:
method data_scan (line 41) | def data_scan(
FILE: credsweeper/file_handler/abstract_provider.py
class AbstractProvider (line 10) | class AbstractProvider(ABC):
method __init__ (line 13) | def __init__(self, paths: Sequence[Union[str, Path, io.BytesIO, Tuple[...
method paths (line 23) | def paths(self) -> Sequence[Union[str, Path, io.BytesIO, Tuple[Union[s...
method paths (line 28) | def paths(self, paths: Sequence[Union[str, Path, io.BytesIO, Tuple[Uni...
method get_scannable_files (line 33) | def get_scannable_files(self, config: Config) -> Sequence[ContentProvi...
FILE: credsweeper/file_handler/analysis_target.py
class AnalysisTarget (line 7) | class AnalysisTarget:
method __init__ (line 10) | def __init__(
method offset (line 27) | def offset(self) -> Optional[int]:
method line (line 33) | def line(self) -> str:
method line_len (line 42) | def line_len(self) -> int:
method line_strip (line 47) | def line_strip(self) -> str:
method line_strip_len (line 52) | def line_strip_len(self) -> int:
method line_lower (line 57) | def line_lower(self) -> str:
method line_lower_strip (line 62) | def line_lower_strip(self) -> str:
method lines (line 67) | def lines(self) -> List[str]:
method lines_len (line 72) | def lines_len(self) -> int:
method line_pos (line 77) | def line_pos(self) -> int:
method line_num (line 82) | def line_num(self) -> int:
method line_nums (line 87) | def line_nums(self) -> List[int]:
method file_path (line 92) | def file_path(self) -> Optional[str]:
method file_type (line 97) | def file_type(self) -> Optional[str]:
method info (line 102) | def info(self) -> Optional[str]:
method descriptor (line 107) | def descriptor(self) -> Descriptor:
FILE: credsweeper/file_handler/byte_content_provider.py
class ByteContentProvider (line 12) | class ByteContentProvider(ContentProvider):
method __init__ (line 15) | def __init__(
method data (line 31) | def data(self) -> Optional[bytes]:
method free (line 35) | def free(self) -> None:
method lines (line 45) | def lines(self) -> List[str]:
method yield_analysis_target (line 59) | def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTar...
FILE: credsweeper/file_handler/content_provider.py
class ContentProvider (line 14) | class ContentProvider(ABC):
method __init__ (line 17) | def __init__(
method yield_analysis_target (line 35) | def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTar...
method descriptor (line 48) | def descriptor(self) -> Descriptor:
method file_path (line 53) | def file_path(self) -> str:
method file_type (line 58) | def file_type(self) -> str:
method info (line 63) | def info(self) -> str:
method data (line 69) | def data(self) -> Optional[bytes]:
method free (line 74) | def free(self) -> None:
method lines_to_targets (line 78) | def lines_to_targets(
FILE: credsweeper/file_handler/data_content_provider.py
class DataContentProvider (line 22) | class DataContentProvider(ContentProvider):
method __init__ (line 25) | def __init__(
method data (line 46) | def data(self) -> Optional[bytes]:
method free (line 50) | def free(self) -> None:
method text (line 64) | def text(self) -> str:
method __is_structure (line 70) | def __is_structure(self) -> bool:
method represent_as_structure (line 75) | def represent_as_structure(self) -> Optional[bool]:
method represent_as_xml (line 140) | def represent_as_xml(self) -> Optional[bool]:
method _check_multiline_cell (line 162) | def _check_multiline_cell(self, cell: Tag) -> Optional[Tuple[int, str]]:
method simple_html_representation (line 187) | def simple_html_representation(html: BeautifulSoup) -> Tuple[List[int]...
method _table_depth_reached (line 208) | def _table_depth_reached(table: Tag, depth: int) -> bool:
method _table_representation (line 219) | def _table_representation(
method _html_tables_representation (line 319) | def _html_tables_representation(
method represent_as_html (line 335) | def represent_as_html(
method yield_analysis_target (line 368) | def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTar...
FILE: credsweeper/file_handler/descriptor.py
class Descriptor (line 5) | class Descriptor:
FILE: credsweeper/file_handler/diff_content_provider.py
class DiffRowData (line 25) | class DiffRowData:
class DiffContentProvider (line 33) | class DiffContentProvider(ContentProvider):
method __init__ (line 50) | def __init__(
method data (line 60) | def data(self) -> bytes:
method diff (line 65) | def diff(self) -> List[DiffDict]:
method free (line 69) | def free(self) -> None:
method parse_lines_data (line 76) | def parse_lines_data(change_type: DiffRowType, lines_data: List[DiffRo...
method patch2files_diff (line 100) | def patch2files_diff(raw_patch: List[str], change_type: DiffRowType) -...
method preprocess_diff_rows (line 147) | def preprocess_diff_rows(
method wrong_change (line 172) | def wrong_change(change: DiffDict) -> bool:
method preprocess_file_diff (line 181) | def preprocess_file_diff(changes: List[DiffDict]) -> List[DiffRowData]:
method yield_analysis_target (line 212) | def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTar...
FILE: credsweeper/file_handler/file_path_extractor.py
class FilePathExtractor (line 16) | class FilePathExtractor:
method apply_gitignore (line 23) | def apply_gitignore(detected_files: List[str]) -> List[str]:
method get_file_paths (line 38) | def get_file_paths(config: Config, path: Union[str, Path]) -> List[str]:
method is_valid_path (line 70) | def is_valid_path(cls, path: str) -> bool:
method is_find_by_ext_file (line 105) | def is_find_by_ext_file(config: Config, extension: str) -> bool:
method check_exclude_file (line 119) | def check_exclude_file(config: Config, path: str) -> bool:
method check_file_size (line 154) | def check_file_size(config: Config, reference: Union[str, Path, io.Byt...
FILE: credsweeper/file_handler/files_provider.py
class FilesProvider (line 15) | class FilesProvider(AbstractProvider):
method __init__ (line 18) | def __init__(self,
method get_scannable_files (line 33) | def get_scannable_files(self, config: Config) -> Sequence[ContentProvi...
FILE: credsweeper/file_handler/patches_provider.py
class PatchesProvider (line 17) | class PatchesProvider(AbstractProvider):
method __init__ (line 21) | def __init__(self, paths: Sequence[Union[str, Path, io.BytesIO, Tuple[...
method load_patch_data (line 34) | def load_patch_data(self, config: Config) -> List[List[str]]:
method get_files_sequence (line 53) | def get_files_sequence(self, raw_patches: List[List[str]]) -> Sequence...
method get_scannable_files (line 62) | def get_scannable_files(self, config: Config) -> Sequence[ContentProvi...
FILE: credsweeper/file_handler/string_content_provider.py
class StringContentProvider (line 8) | class StringContentProvider(ContentProvider):
method __init__ (line 11) | def __init__(
method data (line 34) | def data(self) -> bytes:
method free (line 38) | def free(self) -> None:
method lines (line 48) | def lines(self) -> List[str]:
method line_numbers (line 53) | def line_numbers(self) -> List[int]:
method yield_analysis_target (line 59) | def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTar...
FILE: credsweeper/file_handler/struct_content_provider.py
class StructContentProvider (line 11) | class StructContentProvider(ContentProvider):
method __init__ (line 14) | def __init__(
method data (line 29) | def data(self) -> bytes:
method struct (line 34) | def struct(self) -> Any:
method free (line 38) | def free(self) -> None:
method yield_analysis_target (line 44) | def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTar...
FILE: credsweeper/file_handler/text_content_provider.py
class TextContentProvider (line 14) | class TextContentProvider(ContentProvider):
method __init__ (line 22) | def __init__(self,
method data (line 33) | def data(self) -> Optional[bytes]:
method free (line 42) | def free(self) -> None:
method lines (line 54) | def lines(self) -> Optional[List[str]]:
method yield_analysis_target (line 66) | def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTar...
FILE: credsweeper/filters/filter.py
class Filter (line 9) | class Filter(ABC):
method __init__ (line 13) | def __init__(self, config: Optional[Config], *args):
method run (line 18) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/group/general_keyword.py
class GeneralKeyword (line 7) | class GeneralKeyword(Group):
method __init__ (line 10) | def __init__(self, config: Config) -> None:
FILE: credsweeper/filters/group/general_pattern.py
class GeneralPattern (line 6) | class GeneralPattern(Group):
method __init__ (line 9) | def __init__(self, config: Config) -> None:
FILE: credsweeper/filters/group/group.py
class Group (line 23) | class Group(ABC):
method __init__ (line 26) | def __init__(self, config: Config, rule_type: GroupType = GroupType.DE...
method filters (line 54) | def filters(self) -> List[Filter]:
method filters (line 59) | def filters(self, filters: List[Filter]) -> None:
FILE: credsweeper/filters/group/password_keyword.py
class PasswordKeyword (line 9) | class PasswordKeyword(Group):
method __init__ (line 12) | def __init__(self, config: Config) -> None:
FILE: credsweeper/filters/group/token_pattern.py
class TokenPattern (line 7) | class TokenPattern(Group):
method __init__ (line 10) | def __init__(self, config: Config) -> None:
FILE: credsweeper/filters/group/url_credentials_group.py
class UrlCredentialsGroup (line 10) | class UrlCredentialsGroup(Group):
method __init__ (line 13) | def __init__(self, config: Config) -> None:
FILE: credsweeper/filters/group/weird_base36_token.py
class WeirdBase36Token (line 8) | class WeirdBase36Token(Group):
method __init__ (line 11) | def __init__(self, config: Config) -> None:
FILE: credsweeper/filters/group/weird_base64_token.py
class WeirdBase64Token (line 9) | class WeirdBase64Token(Group):
method __init__ (line 12) | def __init__(self, config: Config) -> None:
FILE: credsweeper/filters/line_git_binary_check.py
class LineGitBinaryCheck (line 12) | class LineGitBinaryCheck(Filter):
method __init__ (line 16) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 19) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/line_specific_key_check.py
class LineSpecificKeyCheck (line 12) | class LineSpecificKeyCheck(Filter):
method __init__ (line 18) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 21) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/line_uue_part_check.py
class LineUUEPartCheck (line 10) | class LineUUEPartCheck(Filter):
method __init__ (line 14) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 17) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_allowlist_check.py
class ValueAllowlistCheck (line 11) | class ValueAllowlistCheck(Filter):
method __init__ (line 44) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 47) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_array_dictionary_check.py
class ValueArrayDictionaryCheck (line 10) | class ValueArrayDictionaryCheck(Filter):
method __init__ (line 20) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 23) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_atlassian_token_check.py
class ValueAtlassianTokenCheck (line 13) | class ValueAtlassianTokenCheck(Filter):
method __init__ (line 16) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 19) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
method check_crc32_struct (line 49) | def check_crc32_struct(value: str) -> bool:
method check_atlassian_struct (line 59) | def check_atlassian_struct(value: str) -> bool:
FILE: credsweeper/filters/value_azure_token_check.py
class ValueAzureTokenCheck (line 13) | class ValueAzureTokenCheck(Filter):
method __init__ (line 19) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 22) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_base32_data_check.py
class ValueBase32DataCheck (line 13) | class ValueBase32DataCheck(Filter):
method __init__ (line 18) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 21) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_base64_data_check.py
class ValueBase64DataCheck (line 12) | class ValueBase64DataCheck(Filter):
method __init__ (line 17) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 20) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_base64_encoded_pem_check.py
class ValueBase64EncodedPem (line 15) | class ValueBase64EncodedPem(Filter):
method __init__ (line 18) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 21) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_base64_key_check.py
class ValueBase64KeyCheck (line 11) | class ValueBase64KeyCheck(Filter):
method __init__ (line 16) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 19) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_base64_part_check.py
class ValueBase64PartCheck (line 16) | class ValueBase64PartCheck(Filter):
method __init__ (line 24) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 27) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_basic_auth_check.py
class ValueBasicAuthCheck (line 12) | class ValueBasicAuthCheck(Filter):
method __init__ (line 15) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 18) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_blocklist_check.py
class ValueBlocklistCheck (line 9) | class ValueBlocklistCheck(Filter):
method __init__ (line 24) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 27) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_camel_case_check.py
class ValueCamelCaseCheck (line 12) | class ValueCamelCaseCheck(Filter):
method __init__ (line 18) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 21) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_dictionary_keyword_check.py
class ValueDictionaryKeywordCheck (line 10) | class ValueDictionaryKeywordCheck(Filter):
method __init__ (line 13) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 16) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_discord_bot_check.py
class ValueDiscordBotCheck (line 12) | class ValueDiscordBotCheck(Filter):
method __init__ (line 15) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 18) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_entropy_base32_check.py
class ValueEntropyBase32Check (line 7) | class ValueEntropyBase32Check(ValueEntropyBaseCheck):
method get_min_data_entropy (line 12) | def get_min_data_entropy(x: int) -> float:
FILE: credsweeper/filters/value_entropy_base36_check.py
class ValueEntropyBase36Check (line 7) | class ValueEntropyBase36Check(ValueEntropyBaseCheck):
method get_min_data_entropy (line 12) | def get_min_data_entropy(x: int) -> float:
FILE: credsweeper/filters/value_entropy_base64_check.py
class ValueEntropyBase64Check (line 7) | class ValueEntropyBase64Check(ValueEntropyBaseCheck):
method get_min_data_entropy (line 12) | def get_min_data_entropy(x: int) -> float:
FILE: credsweeper/filters/value_entropy_base_check.py
class ValueEntropyBaseCheck (line 11) | class ValueEntropyBaseCheck(Filter):
method __init__ (line 14) | def __init__(self, config: Optional[Config] = None) -> None:
method get_min_data_entropy (line 19) | def get_min_data_entropy(x: int) -> float:
method run (line 23) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_file_path_check.py
class ValueFilePathCheck (line 13) | class ValueFilePathCheck(Filter):
method __init__ (line 23) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 26) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_github_check.py
class ValueGitHubCheck (line 14) | class ValueGitHubCheck(Filter):
method __init__ (line 17) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 20) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_grafana_check.py
class ValueGrafanaCheck (line 12) | class ValueGrafanaCheck(Filter):
method __init__ (line 15) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 18) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_grafana_service_check.py
class ValueGrafanaServiceCheck (line 13) | class ValueGrafanaServiceCheck(Filter):
method __init__ (line 16) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 19) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_hex_number_check.py
class ValueHexNumberCheck (line 10) | class ValueHexNumberCheck(Filter):
method __init__ (line 15) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 18) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_jfrog_token_check.py
class ValueJfrogTokenCheck (line 15) | class ValueJfrogTokenCheck(Filter):
method __init__ (line 18) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 22) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_json_web_key_check.py
class ValueJsonWebKeyCheck (line 11) | class ValueJsonWebKeyCheck(Filter):
method __init__ (line 19) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 22) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_json_web_token_check.py
class ValueJsonWebTokenCheck (line 12) | class ValueJsonWebTokenCheck(Filter):
method __init__ (line 28) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 31) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_last_word_check.py
class ValueLastWordCheck (line 9) | class ValueLastWordCheck(Filter):
method __init__ (line 12) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 15) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_length_check.py
class ValueLengthCheck (line 10) | class ValueLengthCheck(Filter):
method __init__ (line 13) | def __init__(self,
method run (line 20) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_method_check.py
class ValueMethodCheck (line 10) | class ValueMethodCheck(Filter):
method __init__ (line 18) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 21) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_morphemes_check.py
class ValueMorphemesCheck (line 11) | class ValueMorphemesCheck(Filter):
method __init__ (line 18) | def __init__(self, config: Optional[Config] = None, threshold: Optiona...
method run (line 29) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_not_allowed_pattern_check.py
class ValueNotAllowedPatternCheck (line 11) | class ValueNotAllowedPatternCheck(Filter):
method __init__ (line 19) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 22) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_not_part_encoded_check.py
class ValueNotPartEncodedCheck (line 11) | class ValueNotPartEncodedCheck(Filter):
method __init__ (line 19) | def __init__(self, config: Optional[Config] = None) -> None:
method check_line_target_fit (line 23) | def check_line_target_fit(line_data: LineData, target: AnalysisTarget)...
method check_val (line 32) | def check_val(line: str, pattern: re.Pattern) -> Optional[bool]:
method run (line 44) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_number_check.py
class ValueNumberCheck (line 10) | class ValueNumberCheck(Filter):
method __init__ (line 16) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 19) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_pattern_check.py
class ValuePatternCheck (line 11) | class ValuePatternCheck(Filter):
method __init__ (line 28) | def __init__(self, config: Optional[Config] = None, pattern_len: Optio...
method get_pattern (line 51) | def get_pattern(pattern_len: int) -> re.Pattern:
method equal_pattern_check (line 62) | def equal_pattern_check(self, value: str, bit_length: int) -> bool:
method ascending_pattern_check (line 77) | def ascending_pattern_check(self, value: str, bit_length: int) -> bool:
method descending_pattern_check (line 99) | def descending_pattern_check(self, value: str, bit_length: int) -> bool:
method check_val (line 121) | def check_val(self, value: str, bit_length: int) -> bool:
method duple_pattern_check (line 140) | def duple_pattern_check(self, value: str, bit_length: int) -> bool:
method run (line 158) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_sealed_secret_check.py
class ValueSealedSecretCheck (line 10) | class ValueSealedSecretCheck(Filter):
method __init__ (line 17) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 20) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_search_check.py
class ValueSearchCheck (line 9) | class ValueSearchCheck(Filter):
method __init__ (line 12) | def __init__(self, config: Optional[Config] = None, pattern: Optional[...
method run (line 15) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_similarity_check.py
class ValueSimilarityCheck (line 11) | class ValueSimilarityCheck(Filter):
method __init__ (line 14) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 17) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_split_keyword_check.py
class ValueSplitKeywordCheck (line 11) | class ValueSplitKeywordCheck(Filter):
method __init__ (line 14) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 17) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_string_type_check.py
class ValueStringTypeCheck (line 10) | class ValueStringTypeCheck(Filter):
method __init__ (line 30) | def __init__(self, config: Optional[Config] = None, check_for_literals...
method run (line 33) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_token_base32_check.py
class ValueTokenBase32Check (line 6) | class ValueTokenBase32Check(ValueTokenBaseCheck):
method get_stat_range (line 24) | def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[floa...
FILE: credsweeper/filters/value_token_base36_check.py
class ValueTokenBase36Check (line 6) | class ValueTokenBase36Check(ValueTokenBaseCheck):
method get_stat_range (line 24) | def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[floa...
FILE: credsweeper/filters/value_token_base64_check.py
class ValueTokenBase64Check (line 6) | class ValueTokenBase64Check(ValueTokenBaseCheck):
method get_stat_range (line 24) | def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[floa...
FILE: credsweeper/filters/value_token_base_check.py
class ValueTokenBaseCheck (line 13) | class ValueTokenBaseCheck(Filter):
method __init__ (line 30) | def __init__(self, config: Optional[Config] = None) -> None:
method get_stat_range (line 35) | def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[floa...
method get_ppf (line 40) | def get_ppf(n: int) -> float:
method run (line 46) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/filters/value_token_check.py
class ValueTokenCheck (line 10) | class ValueTokenCheck(Filter):
method __init__ (line 23) | def __init__(self, config: Optional[Config] = None) -> None:
method run (line 26) | def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
FILE: credsweeper/logger/logger.py
class Logger (line 10) | class Logger:
method init_logging (line 27) | def init_logging(log_level: str, file_path: Optional[str] = None) -> N...
FILE: credsweeper/main.py
function positive_int (line 29) | def positive_int(value: Any) -> int:
function threshold_or_float_or_zero (line 38) | def threshold_or_float_or_zero(arg: str) -> Union[int, float, ThresholdP...
function logger_levels (line 61) | def logger_levels(log_level: str) -> str:
function severity_levels (line 75) | def severity_levels(severity_level: str) -> Severity:
function check_integrity (line 90) | def check_integrity() -> int:
function get_arguments (line 105) | def get_arguments() -> Namespace:
function get_credsweeper (line 283) | def get_credsweeper(args: Namespace) -> CredSweeper:
function scan (line 317) | def scan(args: Namespace, content_provider: AbstractProvider) -> int:
function get_commit_providers (line 340) | def get_commit_providers(commit: Commit, repo: Repo) -> Sequence[ByteCon...
function drill (line 359) | def drill(args: Namespace) -> Tuple[int, int]:
function main (line 436) | def main() -> int:
FILE: credsweeper/ml_model/features/entropy_evaluation.py
class EntropyEvaluation (line 12) | class EntropyEvaluation(Feature):
method extract (line 29) | def extract(self, candidate: Candidate) -> np.ndarray:
FILE: credsweeper/ml_model/features/feature.py
class Feature (line 9) | class Feature(ABC):
method __init__ (line 12) | def __init__(self):
method __call__ (line 15) | def __call__(self, candidates: List[Candidate]) -> np.ndarray:
method extract (line 25) | def extract(self, candidate: Candidate) -> Any:
FILE: credsweeper/ml_model/features/file_extension.py
class FileExtension (line 9) | class FileExtension(WordIn):
method __init__ (line 17) | def __init__(self, extensions: List[str]) -> None:
method __call__ (line 20) | def __call__(self, candidates: List[Candidate]) -> np.ndarray:
method extract (line 24) | def extract(self, candidate: Candidate) -> Any:
FILE: credsweeper/ml_model/features/has_html_tag.py
class HasHtmlTag (line 7) | class HasHtmlTag(WordIn):
method __init__ (line 14) | def __init__(self) -> None:
method extract (line 17) | def extract(self, candidate: Candidate) -> float:
FILE: credsweeper/ml_model/features/is_secret_numeric.py
class IsSecretNumeric (line 7) | class IsSecretNumeric(Feature):
method extract (line 10) | def extract(self, candidate: Candidate) -> float:
FILE: credsweeper/ml_model/features/length_of_attribute.py
class LengthOfAttribute (line 8) | class LengthOfAttribute(Feature):
method __init__ (line 11) | def __init__(self, attribute: str):
method extract (line 21) | def extract(self, candidate: Candidate) -> np.ndarray:
FILE: credsweeper/ml_model/features/morpheme_dense.py
class MorphemeDense (line 6) | class MorphemeDense(Feature):
method extract (line 9) | def extract(self, candidate: Candidate) -> float:
FILE: credsweeper/ml_model/features/rule_name.py
class RuleName (line 9) | class RuleName(WordIn):
method __init__ (line 17) | def __init__(self, rule_names: List[str]) -> None:
method __call__ (line 20) | def __call__(self, candidates: List[Candidate]) -> np.ndarray:
method extract (line 24) | def extract(self, candidate: Candidate) -> Any:
FILE: credsweeper/ml_model/features/rule_severity.py
class RuleSeverity (line 6) | class RuleSeverity(Feature):
method extract (line 9) | def extract(self, candidate: Candidate) -> float:
FILE: credsweeper/ml_model/features/search_in_attribute.py
class SearchInAttribute (line 7) | class SearchInAttribute(Feature):
method __init__ (line 10) | def __init__(self, pattern: str, attribute: str):
method extract (line 15) | def extract(self, candidate: Candidate) -> float:
FILE: credsweeper/ml_model/features/word_in.py
class WordIn (line 10) | class WordIn(Feature):
method __init__ (line 13) | def __init__(self, words: List[str]):
method extract (line 22) | def extract(self, candidate: Candidate) -> Any:
method zero (line 26) | def zero(self) -> np.ndarray:
method word_in_ (line 30) | def word_in_(self, iterable_data: Union[str, List[str], Set[str]]) -> ...
FILE: credsweeper/ml_model/features/word_in_path.py
class WordInPath (line 11) | class WordInPath(WordIn):
method __call__ (line 14) | def __call__(self, candidates: List[Candidate]) -> np.ndarray:
method extract (line 25) | def extract(self, candidate: Candidate) -> Any:
FILE: credsweeper/ml_model/features/word_in_postamble.py
class WordInPostamble (line 8) | class WordInPostamble(WordIn):
method extract (line 11) | def extract(self, candidate: Candidate) -> np.ndarray:
FILE: credsweeper/ml_model/features/word_in_preamble.py
class WordInPreamble (line 8) | class WordInPreamble(WordIn):
method extract (line 11) | def extract(self, candidate: Candidate) -> np.ndarray:
FILE: credsweeper/ml_model/features/word_in_transition.py
class WordInTransition (line 7) | class WordInTransition(WordIn):
method extract (line 10) | def extract(self, candidate: Candidate) -> np.ndarray:
FILE: credsweeper/ml_model/features/word_in_value.py
class WordInValue (line 7) | class WordInValue(WordIn):
method extract (line 10) | def extract(self, candidate: Candidate) -> np.ndarray:
FILE: credsweeper/ml_model/features/word_in_variable.py
class WordInVariable (line 7) | class WordInVariable(WordIn):
method extract (line 10) | def extract(self, candidate: Candidate) -> np.ndarray:
FILE: credsweeper/ml_model/ml_validator.py
class MlValidator (line 19) | class MlValidator:
method __init__ (line 29) | def __init__(
method __reduce__ (line 112) | def __reduce__(self):
method session (line 118) | def session(self) -> InferenceSession:
method encode (line 126) | def encode(self, text: str, limit: int) -> np.ndarray:
method encode_line (line 140) | def encode_line(self, text: str, position: int):
method encode_value (line 149) | def encode_value(self, text: str) -> np.ndarray:
method _call_model (line 154) | def _call_model(self, line_input: np.ndarray, variable_input: np.ndarr...
method extract_common_features (line 167) | def extract_common_features(self, candidates: List[Candidate]) -> np.n...
method extract_unique_features (line 179) | def extract_unique_features(self, candidates: List[Candidate]) -> np.n...
method get_group_features (line 196) | def get_group_features(self, candidates: List[Candidate]) -> Tuple[np....
method extract_features (line 218) | def extract_features(self, candidates: List[Candidate]) -> np.ndarray:
method _batch_call_model (line 226) | def _batch_call_model(self, line_input_list, variable_input_list, valu...
method validate_groups (line 237) | def validate_groups(self, group_list: List[Tuple[CandidateKey, List[Ca...
FILE: credsweeper/rules/rule.py
class Rule (line 18) | class Rule:
method __init__ (line 58) | def __init__(self, config: Config, rule_dict: Dict) -> None:
method _malformed_rule_error (line 90) | def _malformed_rule_error(self, rule_dict: Dict, field: str):
method rule_name (line 96) | def rule_name(self) -> str:
method rule_type (line 101) | def rule_type(self) -> RuleType:
method severity (line 106) | def severity(self) -> Severity:
method confidence (line 111) | def confidence(self) -> Confidence:
method filters (line 116) | def filters(self) -> List[Filter]:
method _get_arg (line 121) | def _get_arg(arg: str) -> Union[int, float, str]:
method _init_filters (line 129) | def _init_filters(self, filter_type: Union[None, str, List[str]]) -> L...
method _init_patterns (line 164) | def _init_patterns(self, _values: List[str]) -> List[re.Pattern]:
method patterns (line 196) | def patterns(self) -> List[re.Pattern]:
method use_ml (line 201) | def use_ml(self) -> bool:
method _verify_rule_config (line 206) | def _verify_rule_config(rule_config: Dict) -> None:
method required_substrings (line 222) | def required_substrings(self) -> Set[str]:
method has_required_substrings (line 227) | def has_required_substrings(self) -> bool:
method required_regex (line 232) | def required_regex(self) -> Optional[re.Pattern]:
method min_line_len (line 237) | def min_line_len(self) -> int:
method target (line 242) | def target(self) -> List[str]:
FILE: credsweeper/scanner/scan_type/multi_pattern.py
class MultiPattern (line 15) | class MultiPattern(ScanType):
method run (line 26) | def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> Li...
method get_line_positions (line 58) | def get_line_positions(cls, line_pos: int, target: AnalysisTarget) -> ...
method _scan (line 98) | def _scan(cls, config: Config, candidate: Candidate, candi_line_pos: i...
FILE: credsweeper/scanner/scan_type/pem_key_pattern.py
class PemKeyPattern (line 15) | class PemKeyPattern(ScanType):
method run (line 19) | def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> Li...
FILE: credsweeper/scanner/scan_type/scan_type.py
class ScanType (line 16) | class ScanType(ABC):
method run (line 25) | def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> Li...
method filtering (line 41) | def filtering(cls, target: AnalysisTarget, line_data: LineData, filter...
method get_line_data_list (line 69) | def get_line_data_list(
method _get_candidates (line 148) | def _get_candidates(cls, config: Config, rule: Rule, target: AnalysisT...
method _aux_scan (line 189) | def _aux_scan(cls, config: Config, rule: Rule, target: AnalysisTarget,...
FILE: credsweeper/scanner/scan_type/single_pattern.py
class SinglePattern (line 11) | class SinglePattern(ScanType):
method run (line 15) | def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> Li...
FILE: credsweeper/scanner/scanner.py
class Scanner (line 25) | class Scanner:
method __init__ (line 39) | def __init__(self, config: Config, rule_path: Union[None, str, Path]) ...
method keywords_required_substrings_check (line 52) | def keywords_required_substrings_check(self, text: str) -> bool:
method _get_required_substrings (line 56) | def _get_required_substrings(self, rule_type: RuleType) -> Set[str]:
method _substring_check (line 64) | def _substring_check(substrings: Set[str], text: str) -> bool:
method _set_rules_scanners (line 71) | def _set_rules_scanners(self, rules_path: Union[None, str, Path]) -> N...
method _is_available (line 104) | def _is_available(self, rule: Rule) -> bool:
method yield_rule_scanner (line 116) | def yield_rule_scanner(
method scan (line 132) | def scan(self, provider: ContentProvider) -> List[Candidate]:
method get_scanner (line 204) | def get_scanner(rule: Rule) -> Type[ScanType]:
FILE: credsweeper/utils/hop_stat.py
class HopStat (line 5) | class HopStat:
method __init__ (line 64) | def __init__(self):
method __get_xyz (line 81) | def __get_xyz(c: str) -> Tuple[int, int, int]:
method stat (line 93) | def stat(self, value: str) -> Tuple[float, float]:
FILE: credsweeper/utils/pem_key_detector.py
class PemKeyDetector (line 16) | class PemKeyDetector:
method __init__ (line 35) | def __init__(self, config: Config):
method cut_barrier (line 41) | def cut_barrier(self, line: str) -> str:
method set_barrier (line 50) | def set_barrier(self, line: str, start=0, end=MAX_LINE_LENGTH):
method detect_pem_key (line 60) | def detect_pem_key(self, first_line: LineData, target: AnalysisTarget)...
method finalize (line 137) | def finalize(line_data_list: List[LineData], key_data_list: List[str],...
method sanitize_line_data_list (line 167) | def sanitize_line_data_list(line_data_list: List[LineData], key_data_l...
method sanitize_line (line 203) | def sanitize_line(line: str, recurse_level: int = 5) -> str:
method is_leading_config_line (line 254) | def is_leading_config_line(line: str) -> bool:
FILE: credsweeper/utils/util.py
class Util (line 38) | class Util:
method get_extension (line 42) | def get_extension(file_path: str, lower=True) -> str:
method get_regex_combine_or (line 48) | def get_regex_combine_or(re_strs: List[str]) -> str:
method get_shannon_entropy (line 62) | def get_shannon_entropy(data: Union[str, bytes]) -> float:
method get_min_data_entropy (line 84) | def get_min_data_entropy(x: int) -> float:
method is_ascii_entropy_validate (line 105) | def is_ascii_entropy_validate(data: bytes) -> bool:
method is_binary (line 146) | def is_binary(data: Union[bytes, bytearray]) -> bool:
method is_latin1 (line 161) | def is_latin1(data: Union[bytes, bytearray]) -> bool:
method read_file (line 172) | def read_file(path: Union[str, Path], encodings: Optional[List[str]] =...
method decode_text (line 192) | def decode_text(content: Optional[bytes], encodings: Optional[List[str...
method split_text (line 241) | def split_text(text: str) -> List[str]:
method decode_bytes (line 246) | def decode_bytes(content: Optional[bytes], encodings: Optional[List[st...
method get_asn1_size (line 267) | def get_asn1_size(data: Union[bytes, bytearray]) -> int:
method read_data (line 298) | def read_data(path: Union[str, Path]) -> Optional[bytes]:
method get_xml_from_lines (line 320) | def get_xml_from_lines(xml_lines: List[str]) -> Tuple[Optional[List[st...
method extract_element_data (line 344) | def extract_element_data(element: Any, attr: str) -> str:
method json_load (line 363) | def json_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) ...
method json_dump (line 373) | def json_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_...
method yaml_load (line 382) | def yaml_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) ...
method yaml_dump (line 392) | def yaml_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_...
method parse_python (line 401) | def parse_python(source: str) -> List[Any]:
method decode_base64 (line 413) | def decode_base64(text: str, padding_safe: bool = False, urlsafe_detec...
method load_pk (line 429) | def load_pk(data: bytes, password: Optional[bytes] = None) -> Optional...
method check_pk (line 444) | def check_pk(pkey: PrivateKeyTypes) -> bool:
method get_chunks (line 463) | def get_chunks(line_len: int) -> List[Tuple[int, int]]:
method subtext (line 482) | def subtext(text: str, pos: int, hunk_size: int) -> str:
method get_excel_column_name (line 515) | def get_excel_column_name(column_index: int) -> str:
FILE: experiment/data_loader.py
function transform_to_meta_path (line 20) | def transform_to_meta_path(file_path: pathlib.Path):
function read_detected_data (line 34) | def read_detected_data(file_path: pathlib.Path) -> Dict[identifier, Dict]:
function read_metadata (line 65) | def read_metadata(meta_dir: str) -> Dict[identifier, Dict]:
function get_colored_line (line 123) | def get_colored_line(line_data: Dict[str, Any]) -> str:
function join_label (line 146) | def join_label(detected_data: Dict[identifier, Dict], meta_data: Dict[id...
function get_y_labels (line 250) | def get_y_labels(df: pd.DataFrame) -> np.ndarray:
FILE: experiment/evaluate_model.py
function evaluate_model (line 8) | def evaluate_model(thresholds: dict, keras_model: Model, x_data: List[np...
FILE: experiment/features.py
class CustomLineData (line 13) | class CustomLineData(LineData):
method __init__ (line 16) | def __init__(
function get_candidates (line 40) | def get_candidates(line_data: dict):
function get_features (line 64) | def get_features(line_data: Union[dict, pd.Series],
function prepare_data (line 93) | def prepare_data(df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray, np.n...
FILE: experiment/log_callback.py
class LogCallback (line 7) | class LogCallback(Callback):
method __init__ (line 9) | def __init__(self):
method get_memory_info (line 13) | def get_memory_info():
method on_epoch_end (line 18) | def on_epoch_end(self, epoch, logs=None):
FILE: experiment/main.py
function main (line 9) | def main(argv) -> int:
FILE: experiment/ml_model.py
class MlModel (line 14) | class MlModel(kt.HyperModel):
method __init__ (line 17) | def __init__(self, line_shape: tuple, variable_shape: tuple, value_sha...
method get_hyperparam (line 24) | def get_hyperparam(self, param_name: str, hp=None) -> Any:
method build (line 38) | def build(self, hp: Optional[Any]) -> Model:
FILE: experiment/model_config_preprocess.py
function model_config_preprocess (line 12) | def model_config_preprocess(df_all: pd.DataFrame, doc_target: bool) -> D...
FILE: experiment/plot.py
function save_plot (line 14) | def save_plot(stamp: str, title: str, history: History, dir_path: pathli...
function stamp_plot (line 44) | def stamp_plot(stamp: str, dir_path: pathlib.Path, info: str):
FILE: experiment/prepare_data.py
function execute_scanner (line 15) | def execute_scanner(dataset_location: str, report_file_str: str, train_r...
function data_checksum (line 35) | def data_checksum(dir_path: Path) -> str:
function prepare_train_data (line 45) | def prepare_train_data(cred_data_location: str, jobs: int, doc_target: b...
FILE: experiment/tools/base64_test.py
function gen_token (line 11) | def gen_token(pad: int, txt: bytes) -> bytes:
function main (line 16) | def main(argv):
FILE: experiment/tools/entropy_test.py
function pool_initializer (line 16) | def pool_initializer() -> None:
function evaluate_avg (line 20) | def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]:
FILE: experiment/tools/morpheme_test.py
class KeywordChecklistTest (line 15) | class KeywordChecklistTest(KeywordChecklist):
method calc (line 17) | def calc(self, line_lower: str) -> int:
function pool_initializer (line 28) | def pool_initializer() -> None:
function evaluate_avg (line 32) | def evaluate_avg(_args: Tuple[int, float, float]) -> Tuple[float, float]:
FILE: experiment/tools/strength_test.py
function pool_initializer (line 17) | def pool_initializer() -> None:
function evaluate_avg (line 21) | def evaluate_avg(size) -> Tuple[Tuple[float, float], Tuple[float, float]]:
FILE: experiment/train.py
function train (line 31) | def train(
FILE: fuzz/__main__.py
function fuzz_credsweeper_scan (line 49) | def fuzz_credsweeper_scan(data: bytes):
function main (line 95) | def main():
FILE: fuzz/auxilary.py
function main (line 12) | def main(argv):
FILE: tests/common/test_confidence.py
class TestConfidence (line 6) | class TestConfidence(unittest.TestCase):
method test_severity_p (line 8) | def test_severity_p(self):
method test_severity_n (line 14) | def test_severity_n(self):
method test_severity_comparison_p (line 23) | def test_severity_comparison_p(self):
method test_severity_comparison_n (line 31) | def test_severity_comparison_n(self):
FILE: tests/common/test_keyword_checklist.py
class TestKeywordChecklist (line 7) | class TestKeywordChecklist(TestCase):
method test_keyword_set_p (line 9) | def test_keyword_set_p(self):
method test_morpheme_set_p (line 15) | def test_morpheme_set_p(self):
method test_keyword_set_n (line 22) | def test_keyword_set_n(self):
method test_morpheme_set_n (line 42) | def test_morpheme_set_n(self):
FILE: tests/common/test_keyword_pattern.py
class TestKeywordPattern (line 10) | class TestKeywordPattern:
method test_separator_n (line 13) | def test_separator_n(self, config: Config, file_path: pytest.fixture, ...
method test_separator_p (line 26) | def test_separator_p(self, config: Config, file_path: pytest.fixture, ...
method test_keyword_pattern_p (line 203) | def test_keyword_pattern_p(self, config: Config, file_path: pytest.fix...
method test_keyword_pattern_n (line 220) | def test_keyword_pattern_n(self, config: Config, file_path: pytest.fix...
FILE: tests/common/test_regex.py
class TestRegex (line 6) | class TestRegex:
method test_regex_n (line 22) | def test_regex_n(self, text: str):
method test_regex_p (line 38) | def test_regex_p(self, text: str):
FILE: tests/common/test_severity.py
class TestSeverity (line 6) | class TestSeverity(unittest.TestCase):
method test_severity_p (line 8) | def test_severity_p(self):
method test_severity_n (line 16) | def test_severity_n(self):
method test_severity_comparison_p (line 25) | def test_severity_comparison_p(self):
method test_severity_comparison_n (line 40) | def test_severity_comparison_n(self):
FILE: tests/config/test_config.py
class ConfigTest (line 7) | class ConfigTest(TestCase):
method test_extension_check_p (line 9) | def test_extension_check_p(self):
FILE: tests/conftest.py
function python_file_path (line 17) | def python_file_path() -> str:
function file_path (line 22) | def file_path() -> str:
function args (line 27) | def args() -> Namespace:
function config (line 33) | def config() -> Config:
function rule (line 52) | def rule(rule_name: str, config: Config, rule_path: str) -> Optional[Rule]:
function rule_path (line 61) | def rule_path() -> str:
function scanner (line 66) | def scanner(rule: Rule, config: Config, rule_path: str) -> Scanner:
function scanner_without_filters (line 73) | def scanner_without_filters(rule: Rule, config: Config, rule_path: str):
FILE: tests/credentials/test_augment_candidates.py
class TestAugmentCandidates (line 11) | class TestAugmentCandidates(unittest.TestCase):
method test_augment_candidates_p (line 13) | def test_augment_candidates_p(self):
method test_augment_candidates_n (line 29) | def test_augment_candidates_n(self):
FILE: tests/credentials/test_credential_manager.py
class TestCredentialManager (line 7) | class TestCredentialManager:
method test_groups_p (line 11) | def test_groups_p(self, line):
method test_groups_n (line 24) | def test_groups_n(self, line):
FILE: tests/credentials/test_line_data.py
class TestLineData (line 14) | class TestLineData:
method test_url_params_p (line 23) | def test_url_params_p(self, file_path: pytest.fixture, rule: pytest.fi...
method test_simple_case_p (line 38) | def test_simple_case_p(self, file_path: pytest.fixture, rule: pytest.f...
method test_multiple_word_variable_name_p (line 53) | def test_multiple_word_variable_name_p(self, file_path: pytest.fixture...
method test_multiple_word_variable_name_n (line 64) | def test_multiple_word_variable_name_n(self, file_path: pytest.fixture...
method test_function_call_p (line 76) | def test_function_call_p(self, file_path: pytest.fixture, rule: pytest...
method test_function_argument_p (line 92) | def test_function_argument_p(self, file_path: pytest.fixture, rule: py...
method test_cli_arguments_p (line 112) | def test_cli_arguments_p(self, file_path: pytest.fixture, rule: pytest...
method test_cli_arguments_n (line 125) | def test_cli_arguments_n(self, file_path: pytest.fixture, rule: pytest...
class TestLineDataStartEnd (line 134) | class TestLineDataStartEnd(unittest.TestCase):
method test_start_end_p (line 136) | def test_start_end_p(self) -> None:
method test_search_start_end_p (line 144) | def test_search_start_end_p(self) -> None:
method test_part_url_sanitize_p (line 153) | def test_part_url_sanitize_p(self) -> None:
method test_hash_text_n (line 160) | def test_hash_text_n(self):
method test_hash_text_p (line 163) | def test_hash_text_p(self):
method test_sub_text_n (line 168) | def test_sub_text_n(self):
method test_sub_text_p (line 172) | def test_sub_text_p(self):
method test_toml_parenthesis_sanitize_n (line 179) | def test_toml_parenthesis_sanitize_n(self) -> None:
method test_toml_parenthesis_sanitize_p (line 190) | def test_toml_parenthesis_sanitize_p(self) -> None:
method test_toml_parenthesis_pass_sanitize_p (line 196) | def test_toml_parenthesis_pass_sanitize_p(self) -> None:
method test_toml_quoted_sanitize_p (line 206) | def test_toml_quoted_sanitize_p(self) -> None:
method test_toml_curly_brackets_sanitize_n (line 215) | def test_toml_curly_brackets_sanitize_n(self) -> None:
method test_toml_square_brackets_sanitize_n (line 221) | def test_toml_square_brackets_sanitize_n(self) -> None:
method test_toml_extra_sanitize_n (line 227) | def test_toml_extra_sanitize_n(self) -> None:
method test_tag_sanitize_n (line 234) | def test_tag_sanitize_n(self) -> None:
method test_tag_sanitize_p (line 256) | def test_tag_sanitize_p(self) -> None:
FILE: tests/deep_scanner/test_abstract_scanner.py
class TestAbstractScanner (line 8) | class TestAbstractScanner(unittest.TestCase):
method test_structure_processing_n (line 10) | def test_structure_processing_n(self):
method test_structure_processing_p (line 24) | def test_structure_processing_p(self):
method test_key_value_combination_n (line 40) | def test_key_value_combination_n(self):
method test_key_value_combination_p (line 61) | def test_key_value_combination_p(self):
FILE: tests/deep_scanner/test_bzip2_scanner.py
class TestBzip2Scanner (line 6) | class TestBzip2Scanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 13) | def test_match_n(self):
FILE: tests/deep_scanner/test_crx_scanner.py
class TestCrxScanner (line 7) | class TestCrxScanner(unittest.TestCase):
method test_match_p (line 9) | def test_match_p(self):
method test_match_n (line 13) | def test_match_n(self):
method test_zip_extract_p (line 23) | def test_zip_extract_p(self):
method test_zip_extract_n (line 27) | def test_zip_extract_n(self):
FILE: tests/deep_scanner/test_csv_scanner.py
class TestCsvScanner (line 9) | class TestCsvScanner(unittest.TestCase):
method setUp (line 11) | def setUp(self):
method test_match_n (line 14) | def test_match_n(self):
method test_match_p (line 23) | def test_match_p(self):
method test_get_structure_n (line 28) | def test_get_structure_n(self):
method test_get_structure_from_sample_n (line 46) | def test_get_structure_from_sample_n(self):
method test_get_structure_p (line 51) | def test_get_structure_p(self):
FILE: tests/deep_scanner/test_deb_scanner.py
class TestDebScanner (line 6) | class TestDebScanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 13) | def test_match_n(self):
FILE: tests/deep_scanner/test_deep_scanner.py
class TestDeepScanner (line 10) | class TestDeepScanner(unittest.TestCase):
method test_get_deep_scanners_n (line 12) | def test_get_deep_scanners_n(self):
method test_get_deep_scanners_n (line 18) | def test_get_deep_scanners_n(self, data):
method test_is_media_n (line 24) | def test_is_media_n(self):
method test_is_media_p (line 37) | def test_is_media_p(self):
FILE: tests/deep_scanner/test_eml_scanner.py
class TestEmlScanner (line 6) | class TestEmlScanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 17) | def test_match_n(self):
FILE: tests/deep_scanner/test_encoder_scanner.py
class TestEncoderScanner (line 9) | class TestEncoderScanner(unittest.TestCase):
method test_match_n (line 11) | def test_match_n(self):
method test_match_p (line 29) | def test_match_p(self):
FILE: tests/deep_scanner/test_gzip_scanner.py
class TestGzipScanner (line 6) | class TestGzipScanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 12) | def test_match_n(self):
FILE: tests/deep_scanner/test_html_scanner.py
class TestHtmlScanner (line 6) | class TestHtmlScanner(unittest.TestCase):
method test_match_n (line 8) | def test_match_n(self):
method test_match_p (line 13) | def test_match_p(self):
FILE: tests/deep_scanner/test_jclass_scanner.py
class TestJclassScanner (line 32) | class TestJclassScanner(unittest.TestCase):
method setUp (line 34) | def setUp(self):
method test_get_utf8_constants_n (line 37) | def test_get_utf8_constants_n(self):
method test_get_utf8_constants_p (line 44) | def test_get_utf8_constants_p(self):
method test_match_p (line 60) | def test_match_p(self):
method test_match_n (line 65) | def test_match_n(self):
FILE: tests/deep_scanner/test_jks_scanner.py
class TestJksScanner (line 6) | class TestJksScanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 13) | def test_match_n(self):
FILE: tests/deep_scanner/test_lzma_scanner.py
class TestLzmaScanner (line 6) | class TestLzmaScanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 15) | def test_match_n(self):
FILE: tests/deep_scanner/test_mxfile_scanner.py
class TestMxfileScanner (line 6) | class TestMxfileScanner(unittest.TestCase):
method test_match_n (line 8) | def test_match_n(self):
method test_match_p (line 16) | def test_match_p(self):
FILE: tests/deep_scanner/test_pdf_scanner.py
class TestPdfScanner (line 6) | class TestPdfScanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 12) | def test_match_n(self):
FILE: tests/deep_scanner/test_png_scanner.py
class TestPngScanner (line 13) | class TestPngScanner(unittest.TestCase):
method setUp (line 15) | def setUp(self):
method test_match_n (line 18) | def test_match_n(self):
method test_match_p (line 23) | def test_match_p(self):
method test_yield_png_chunks_p (line 27) | def test_yield_png_chunks_p(self):
FILE: tests/deep_scanner/test_rtf_scanner.py
class TestRtfScanner (line 12) | class TestRtfScanner(unittest.TestCase):
method setUp (line 14) | def setUp(self):
method test_get_lines_n (line 17) | def test_get_lines_n(self):
method test_get_lines_p (line 20) | def test_get_lines_p(self):
FILE: tests/deep_scanner/test_sqlite3_scanner.py
class TestSqlite3Scanner (line 7) | class TestSqlite3Scanner(unittest.TestCase):
method setUp (line 9) | def setUp(self):
method test_walk_n (line 12) | def test_walk_n(self):
method test_walk_p (line 16) | def test_walk_p(self):
FILE: tests/deep_scanner/test_strings_scanner.py
class TestStringsScanner (line 8) | class TestStringsScanner(unittest.TestCase):
method setUp (line 10) | def setUp(self):
method test_get_lines_hypothesis_n (line 14) | def test_get_lines_hypothesis_n(self, data):
method test_get_lines_n (line 17) | def test_get_lines_n(self):
method test_get_lines_p (line 23) | def test_get_lines_p(self):
FILE: tests/deep_scanner/test_struct_scanner.py
class TestStructScanner (line 12) | class TestStructScanner(unittest.TestCase):
method setUp (line 14) | def setUp(self):
method test_scan_n (line 28) | def test_scan_n(self):
method test_scan_p (line 32) | def test_scan_p(self):
FILE: tests/deep_scanner/test_tar_scanner.py
class TestTarScanner (line 8) | class TestTarScanner(unittest.TestCase):
method test_match_p (line 10) | def test_match_p(self):
method test_match_n (line 18) | def test_match_n(self):
FILE: tests/deep_scanner/test_tmx_scanner.py
class TestTmxScanner (line 6) | class TestTmxScanner(unittest.TestCase):
method test_match_p (line 8) | def test_match_p(self):
method test_match_n (line 21) | def test_match_n(self):
FILE: tests/deep_scanner/test_xml_scanner.py
class TestXmlScanner (line 7) | class TestXmlScanner(unittest.TestCase):
method test_match_n (line 9) | def test_match_n(self):
method test_match_p (line 22) | def test_match_p(self):
FILE: tests/deep_scanner/test_zip_scanner.py
class TestZipScanner (line 7) | class TestZipScanner(unittest.TestCase):
method test_match_p (line 9) | def test_match_p(self):
method test_match_n (line 16) | def test_match_n(self):
FILE: tests/deep_scanner/test_zlib_scanner.py
class TestZlibScanner (line 18) | class TestZlibScanner(unittest.TestCase):
method setUp (line 20) | def setUp(self):
method test_match_hypothesis_n (line 24) | def test_match_hypothesis_n(self, data):
method test_match_p (line 28) | def test_match_p(self):
method test_decompress_hypothesis_n (line 34) | def test_decompress_hypothesis_n(self, data):
method test_decompress_static_n (line 39) | def test_decompress_static_n(self):
method test_decompress_static_p (line 51) | def test_decompress_static_p(self):
method test_decompress_n (line 56) | def test_decompress_n(self):
method test_decompress_p (line 74) | def test_decompress_p(self):
FILE: tests/file_handler/test_byte_content_provider.py
class TestByteContentProvider (line 13) | class TestByteContentProvider:
method test_get_analysis_target_p (line 17) | def test_get_analysis_target_p(self, lines_as_bytes: bytes, lines: Lis...
method test_byte_content_provider_p (line 29) | def test_byte_content_provider_p(self) -> None:
method test_free_n (line 43) | def test_free_n(self) -> None:
FILE: tests/file_handler/test_data_content_provider.py
class DataContentProviderTest (line 18) | class DataContentProviderTest(unittest.TestCase):
method test_wrong_xml_n (line 21) | def test_wrong_xml_n(self) -> None:
method test_scan_wrong_provider_n (line 35) | def test_scan_wrong_provider_n(self) -> None:
method test_scan_bottom_reach_n (line 41) | def test_scan_bottom_reach_n(self) -> None:
method test_scan_wrong_zip_data_n (line 46) | def test_scan_wrong_zip_data_n(self) -> None:
method test_scan_empty_zip_n (line 51) | def test_scan_empty_zip_n(self) -> None:
method test_scan_zipfile_n (line 57) | def test_scan_zipfile_n(self) -> None:
method test_scan_zipfile_p (line 73) | def test_scan_zipfile_p(self) -> None:
method test_scan_zipfile_size_limit_n (line 139) | def test_scan_zipfile_size_limit_n(self) -> None:
method test_scan_zipfile_size_limit_p (line 144) | def test_scan_zipfile_size_limit_p(self) -> None:
method test_scan_zipfile_bomb_1_n (line 149) | def test_scan_zipfile_bomb_1_n(self) -> None:
method test_scan_zipfile_bomb_2_n (line 156) | def test_scan_zipfile_bomb_2_n(self) -> None:
method test_free_n (line 163) | def test_free_n(self) -> None:
FILE: tests/file_handler/test_diff_content_provider.py
class TestDiffContentProvider (line 9) | class TestDiffContentProvider(unittest.TestCase):
method test_get_analysis_target_p (line 11) | def test_get_analysis_target_p(self) -> None:
method test_get_analysis_target_n (line 41) | def test_get_analysis_target_n(self) -> None:
method test_parse_lines_data_p (line 64) | def test_parse_lines_data_p(self) -> None:
method test_parse_lines_data_n (line 74) | def test_parse_lines_data_n(self) -> None:
method test_free_n (line 84) | def test_free_n(self) -> None:
method test_data_n (line 106) | def test_data_n(self) -> None:
FILE: tests/file_handler/test_file_path_extractor.py
class TestFilePathExtractor (line 16) | class TestFilePathExtractor(unittest.TestCase):
method setUp (line 18) | def setUp(self):
method tearDown (line 60) | def tearDown(self):
method test_apply_gitignore_p (line 63) | def test_apply_gitignore_p(self) -> None:
method test_apply_gitignore_n (line 69) | def test_apply_gitignore_n(self) -> None:
method assert_true_check_exclude_file (line 90) | def assert_true_check_exclude_file(self, paths: List[str]):
method assert_false_check_exclude_file (line 94) | def assert_false_check_exclude_file(self, paths: List[str]):
method test_check_exclude_file_p (line 98) | def test_check_exclude_file_p(self) -> None:
method test_check_exclude_file_n (line 126) | def test_check_exclude_file_n(self) -> None:
method test_find_by_ext_file_p (line 148) | def test_find_by_ext_file_p(self) -> None:
method test_find_by_ext_file_n (line 155) | def test_find_by_ext_file_n(self) -> None:
method test_check_file_size_p (line 163) | def test_check_file_size_p(self, mock_getsize) -> None:
method test_check_file_size_n (line 169) | def test_check_file_size_n(self, mock_getsize) -> None:
method test_skip_symlink_n (line 176) | def test_skip_symlink_n(self) -> None:
FILE: tests/file_handler/test_files_provider.py
class TestFilesProvider (line 13) | class TestFilesProvider(unittest.TestCase):
method test_get_scannable_files_io_p (line 15) | def test_get_scannable_files_io_p(self) -> None:
method test_get_scannable_files_io_n (line 78) | def test_get_scannable_files_io_n(self) -> None:
FILE: tests/file_handler/test_patches_provider.py
class TestPatchesProvider (line 13) | class TestPatchesProvider:
method test_load_patch_data_p (line 15) | def test_load_patch_data_p(self, config: Config) -> None:
method test_load_patch_data_io_p (line 39) | def test_load_patch_data_io_p(self, config: Config) -> None:
method test_load_patch_data_utf16_p (line 64) | def test_load_patch_data_utf16_p(self, config: Config) -> None:
method test_load_patch_data_western_n (line 86) | def test_load_patch_data_western_n(self, config: Config) -> None:
method test_load_patch_data_n (line 111) | def test_load_patch_data_n(self, config: Config) -> None:
method test_oversize_n (line 137) | def test_oversize_n(self, config: Config) -> None:
method test_memory_error_n (line 151) | def test_memory_error_n(self, config: Config) -> None:
method test_overflow_error_n (line 178) | def test_overflow_error_n(self, config: Config) -> None:
FILE: tests/file_handler/test_string_content_provider.py
class TestStringContentProvider (line 9) | class TestStringContentProvider(unittest.TestCase):
method test_get_analysis_target_p (line 11) | def test_get_analysis_target_p(self) -> None:
method test_get_analysis_target_n (line 31) | def test_get_analysis_target_n(self) -> None:
method test_free_n (line 53) | def test_free_n(self) -> None:
method test_data_n (line 60) | def test_data_n(self) -> None:
FILE: tests/file_handler/test_struct_content_provider.py
class TestStructContentProvider (line 6) | class TestStructContentProvider(unittest.TestCase):
method test_free_n (line 8) | def test_free_n(self) -> None:
method test_data_n (line 15) | def test_data_n(self) -> None:
FILE: tests/file_handler/test_text_content_provider.py
class TestTextContentProvider (line 11) | class TestTextContentProvider(unittest.TestCase):
method test_get_analysis_target_p (line 13) | def test_get_analysis_target_p(self) -> None:
method test_get_analysis_target_n (line 52) | def test_get_analysis_target_n(self) -> None:
method test_free_n (line 72) | def test_free_n(self) -> None:
FILE: tests/filters/conftest.py
function success_line (line 25) | def success_line(request) -> str:
FILE: tests/filters/test_line_git_binary_check.py
class TestLineGitBinaryCheck (line 9) | class TestLineGitBinaryCheck:
method test_line_specific_key_check_p (line 13) | def test_line_specific_key_check_p(self, file_path: pytest.fixture, li...
method test_line_specific_key_check_n (line 20) | def test_line_specific_key_check_n(self, file_path: pytest.fixture, li...
FILE: tests/filters/test_line_specific_key_check.py
class TestLineSpecificKeyCheck (line 9) | class TestLineSpecificKeyCheck:
method test_line_specific_key_check_p (line 14) | def test_line_specific_key_check_p(self, file_path: pytest.fixture, li...
method test_line_specific_key_check_n (line 23) | def test_line_specific_key_check_n(self, file_path: pytest.fixture, li...
FILE: tests/filters/test_line_uue_part_check.py
class TestLineUUEPartCheck (line 9) | class TestLineUUEPartCheck(TestCase):
method test_line_uue_part_check_short_n (line 11) | def test_line_uue_part_check_short_n(self):
method test_line_uue_part_check_uue__n (line 17) | def test_line_uue_part_check_uue__n(self):
method test_line_uue_part_single_n (line 27) | def test_line_uue_part_single_n(self):
method test_line_uue_part_check_n (line 33) | def test_line_uue_part_check_n(self):
method test_line_uue_part_check_p (line 39) | def test_line_uue_part_check_p(self):
FILE: tests/filters/test_value_allowlist_check.py
class TestValueAllowlistCheck (line 8) | class TestValueAllowlistCheck:
method test_value_allowlist_check_p (line 30) | def test_value_allowlist_check_p(self, file_path: pytest.fixture, line...
method test_value_allowlist_check_n (line 55) | def test_value_allowlist_check_n(self, file_path: pytest.fixture, line...
FILE: tests/filters/test_value_array_dictionary_check.py
class TestValueArrayDictionaryCheck (line 9) | class TestValueArrayDictionaryCheck:
method token_rule (line 12) | def token_rule(self, config) -> Rule:
method test_value_array_dictionary_p (line 27) | def test_value_array_dictionary_p(self, token_rule: Rule, file_path: p...
method test_value_array_dictionary_n (line 38) | def test_value_array_dictionary_n(self, token_rule: Rule, file_path: p...
method test_array_assignment_n (line 54) | def test_array_assignment_n(self, token_rule: Rule, file_path: pytest....
FILE: tests/filters/test_value_atlassian_token_check.py
class TestValueAtlassianTokenCheck (line 12) | class TestValueAtlassianTokenCheck:
method test_value_structured_token_check_p (line 15) | def test_value_structured_token_check_p(self, file_path: pytest.fixtur...
method test_value_structured_token_check_n (line 28) | def test_value_structured_token_check_n(self, file_path: pytest.fixtur...
FILE: tests/filters/test_value_azure_token_check.py
class TestValueAzureTokenCheck (line 8) | class TestValueAzureTokenCheck(unittest.TestCase):
method test_value_AzureToken_check_p (line 10) | def test_value_AzureToken_check_p(self):
method test_value_AzureToken_check_n (line 21) | def test_value_AzureToken_check_n(self):
FILE: tests/filters/test_value_base32_data_check.py
class TestValueBase32DataCheck (line 8) | class TestValueBase32DataCheck:
method test_value_entropy_check_p (line 11) | def test_value_entropy_check_p(self, file_path: pytest.fixture, line: ...
method test_value_entropy_check_n (line 16) | def test_value_entropy_check_n(self, file_path: pytest.fixture, line: ...
FILE: tests/filters/test_value_base64_data_check.py
class TestValueBase64DataCheck (line 8) | class TestValueBase64DataCheck:
method test_value_entropy_check_p (line 11) | def test_value_entropy_check_p(self, file_path: pytest.fixture, line: ...
method test_value_entropy_check_n (line 16) | def test_value_entropy_check_n(self, file_path: pytest.fixture, line: ...
FILE: tests/filters/test_value_base64_key_check.py
class TestValueBase64KeyCheck (line 9) | class TestValueBase64KeyCheck(unittest.TestCase):
method test_value_check_n (line 13) | def test_value_check_n(self) -> None:
method test_value_check_p (line 24) | def test_value_check_p(self) -> None:
FILE: tests/filters/test_value_base64_part_check.py
class TestValueBase64PartCheck (line 9) | class TestValueBase64PartCheck(unittest.TestCase):
method test_value_check_n (line 11) | def test_value_check_n(self) -> None:
method test_value_check_p (line 47) | def test_value_check_p(self) -> None:
FILE: tests/filters/test_value_basic_auth_check.py
class TestValueBasicAuthCheck (line 9) | class TestValueBasicAuthCheck(unittest.TestCase):
method test_value_check_n (line 11) | def test_value_check_n(self) -> None:
method test_value_check_p (line 28) | def test_value_check_p(self) -> None:
FILE: tests/filters/test_value_blocklist_check.py
class TestValueBlocklistCheck (line 8) | class TestValueBlocklistCheck:
method test_value_blocklist_p (line 10) | def test_value_blocklist_p(self, file_path: pytest.fixture, success_li...
method test_value_blocklist_n (line 15) | def test_value_blocklist_n(self, file_path: pytest.fixture, line: str)...
FILE: tests/filters/test_value_camel_case_check.py
class TestValueCamelCaseCheck (line 8) | class TestValueCamelCaseCheck:
method test_value_camelcase_p (line 10) | def test_value_camelcase_p(self, file_path: pytest.fixture, success_li...
method test_value_camelcase_n (line 15) | def test_value_camelcase_n(self, file_path: pytest.fixture, line: str)...
FILE: tests/filters/test_value_dictionary_keyword_check.py
class TestValueDictionaryKeywordCheck (line 8) | class TestValueDictionaryKeywordCheck:
method test_value_dictionary_keyword_check_p (line 10) | def test_value_dictionary_keyword_check_p(self, file_path: pytest.fixt...
method test_value_dictionary_keyword_check_n (line 15) | def test_value_dictionary_keyword_check_n(self, file_path: pytest.fixt...
FILE: tests/filters/test_value_entropy_base32_check.py
class TestValueEntropyBase32Check (line 10) | class TestValueEntropyBase32Check:
method test_value_entropy_check_p (line 13) | def test_value_entropy_check_p(self, file_path: pytest.fixture, line: ...
method test_value_entropy_check_n (line 18) | def test_value_entropy_check_n(self, file_path: pytest.fixture, line: ...
method test_get_min_data_entropy_p (line 28) | def test_get_min_data_entropy_p(self, size: int, entropy: float, devia...
method test_get_min_data_entropy_n (line 36) | def test_get_min_data_entropy_n(self, size: int) -> None:
FILE: tests/filters/test_value_entropy_base36_check.py
class TestValueEntropyBase36Check (line 10) | class TestValueEntropyBase36Check:
method test_value_entropy_check_p (line 13) | def test_value_entropy_check_p(self, file_path: pytest.fixture, line: ...
method test_value_entropy_check_n (line 18) | def test_value_entropy_check_n(self, file_path: pytest.fixture, line: ...
method test_get_min_data_entropy_p (line 29) | def test_get_min_data_entropy_p(self, size: int, entropy: float, devia...
method test_get_min_data_entropy_n (line 37) | def test_get_min_data_entropy_n(self, size: int) -> None:
FILE: tests/filters/test_value_entropy_base64_check.py
class TestValueEntropyBase64Check (line 10) | class TestValueEntropyBase64Check:
method test_value_entropy_check_p (line 13) | def test_value_entropy_check_p(self, file_path: pytest.fixture, line: ...
method test_value_entropy_check_n (line 18) | def test_value_entropy_check_n(self, file_path: pytest.fixture, line: ...
method test_get_min_data_entropy_p (line 37) | def test_get_min_data_entropy_p(self, size: int, entropy: float, devia...
method test_get_min_data_entropy_n (line 45) | def test_get_min_data_entropy_n(self, size: int) -> None:
FILE: tests/filters/test_value_file_path_check.py
class TestValueFilePathCheck (line 8) | class TestValueFilePathCheck:
method test_value_file_path_check_p (line 15) | def test_value_file_path_check_p(self, file_path: pytest.fixture, line...
method test_value_file_path_check_n (line 39) | def test_value_file_path_check_n(self, file_path: pytest.fixture, line...
FILE: tests/filters/test_value_github_check.py
class TestValueGitHubCheck (line 8) | class TestValueGitHubCheck:
method test_value_github_p (line 12) | def test_value_github_p(self, file_path: pytest.fixture, line: str) ->...
method test_value_github_n (line 18) | def test_value_github_n(self, file_path: pytest.fixture, line: str) ->...
FILE: tests/filters/test_value_grafana_check.py
class TestValueGrafanaCheck (line 10) | class TestValueGrafanaCheck:
method test_value_grafana_token_p (line 13) | def test_value_grafana_token_p(self, file_path: pytest.fixture, line: ...
method test_value_grafana_key_p (line 19) | def test_value_grafana_key_p(self, file_path: pytest.fixture, line: st...
method test_value_grafana_n (line 25) | def test_value_grafana_n(self, file_path: pytest.fixture, line: str) -...
FILE: tests/filters/test_value_grafana_service_check.py
class TestValueGrafanaServiceCheck (line 8) | class TestValueGrafanaServiceCheck:
method test_value_sgrafana_service_check_p (line 11) | def test_value_sgrafana_service_check_p(self, file_path: pytest.fixtur...
method test_value_sgrafana_service_check_n (line 16) | def test_value_sgrafana_service_check_n(self, file_path: pytest.fixtur...
FILE: tests/filters/test_value_hex_number_check.py
class TestValueHexNumberCheck (line 8) | class TestValueHexNumberCheck:
method test_value_number_check_p (line 11) | def test_value_number_check_p(self, file_path: pytest.fixture, line: s...
method test_value_number_check_n (line 16) | def test_value_number_check_n(self, file_path: pytest.fixture, line: s...
FILE: tests/filters/test_value_json_web_key_check.py
class TestValueJsonWebKeyCheck (line 9) | class TestValueJsonWebKeyCheck(unittest.TestCase):
method test_value_jwk_check_n (line 11) | def test_value_jwk_check_n(self):
method test_value_jwt_check_p (line 23) | def test_value_jwt_check_p(self):
FILE: tests/filters/test_value_json_web_token_check.py
class TestValueJsonWebTokenCheck (line 8) | class TestValueJsonWebTokenCheck(unittest.TestCase):
method test_value_jwt_check_p (line 10) | def test_value_jwt_check_p(self):
method test_value_jwt_check_n (line 27) | def test_value_jwt_check_n(self):
FILE: tests/filters/test_value_last_word_check.py
class TestValueLastWordCheck (line 8) | class TestValueLastWordCheck:
method test_value_last_word_check_p (line 10) | def test_value_last_word_check_p(self, file_path: pytest.fixture, succ...
method test_value_last_word_check_n (line 15) | def test_value_last_word_check_n(self, file_path: pytest.fixture, line...
FILE: tests/filters/test_value_length_check.py
class TestValueLengthCheck (line 8) | class TestValueLengthCheck:
method test_value_length_check_p (line 10) | def test_value_length_check_p(self, file_path: pytest.fixture, success...
method test_value_length_check_n (line 15) | def test_value_length_check_n(self, file_path: pytest.fixture, line: s...
FILE: tests/filters/test_value_method_check.py
class TestValueMethodCheck (line 8) | class TestValueMethodCheck:
method test_value_method_check_p (line 10) | def test_value_method_check_p(self, file_path: pytest.fixture, success...
method test_value_method_check_n (line 15) | def test_value_method_check_n(self, file_path: pytest.fixture, line: s...
FILE: tests/filters/test_value_morphemes_check.py
class TestValueMorphemesCheck (line 14) | class TestValueMorphemesCheck(unittest.TestCase):
method setUp (line 16) | def setUp(self) -> None:
method test_min_patter_len_n (line 19) | def test_min_patter_len_n(self) -> None:
method test_init_n (line 23) | def test_init_n(self) -> None:
method test_init_p (line 27) | def test_init_p(self) -> None:
method test_run_small_n (line 31) | def test_run_small_n(self) -> None:
method test_run_oversize_n (line 35) | def test_run_oversize_n(self) -> None:
method test_run_true_p (line 42) | def test_run_true_p(self) -> None:
method test_run_false_p (line 46) | def test_run_false_p(self) -> None:
class TestValueMorphemesCheckFixture (line 51) | class TestValueMorphemesCheckFixture:
method test_value_couple_keyword_check_p (line 62) | def test_value_couple_keyword_check_p(self, file_path: pytest.fixture,...
method test_value_couple_keyword_check_n (line 86) | def test_value_couple_keyword_check_n(self, file_path: pytest.fixture,...
method test_value_couple_keyword_check_arg_n (line 91) | def test_value_couple_keyword_check_arg_n(self, file_path: pytest.fixt...
method test_value_couple_keyword_check_arg_p (line 96) | def test_value_couple_keyword_check_arg_p(self, file_path: pytest.fixt...
FILE: tests/filters/test_value_not_allowed_pattern.py
class TestValueLastWordCheck (line 8) | class TestValueLastWordCheck:
method test_value_last_word_check_p (line 10) | def test_value_last_word_check_p(self, file_path: pytest.fixture, succ...
method test_value_last_word_check_n (line 15) | def test_value_last_word_check_n(self, file_path: pytest.fixture, line...
FILE: tests/filters/test_value_not_part_encoded.py
class TestValueNotPartEncodedCheck (line 10) | class TestValueNotPartEncodedCheck:
method test_value_not_part_encoded_p (line 12) | def test_value_not_part_encoded_p(self, config: pytest.fixture) -> None:
method test_value_not_part_encoded_n (line 21) | def test_value_not_part_encoded_n(self, config: pytest.fixture) -> None:
FILE: tests/filters/test_value_number_check.py
class TestValueNumberCheck (line 8) | class TestValueNumberCheck:
method test_value_number_check_p (line 11) | def test_value_number_check_p(self, file_path: pytest.fixture, line: s...
method test_value_number_check_n (line 19) | def test_value_number_check_n(self, file_path: pytest.fixture, line: s...
FILE: tests/filters/test_value_pattern_check.py
class TestValuePatternCheck (line 13) | class TestValuePatternCheck(unittest.TestCase):
method setUp (line 15) | def setUp(self) -> None:
method test_min_patter_len_n (line 18) | def test_min_patter_len_n(self) -> None:
method test_init_n (line 22) | def test_init_n(self) -> None:
method test_init_p (line 27) | def test_init_p(self) -> None:
method test_duple_pattern_check_n (line 32) | def test_duple_pattern_check_n(self) -> None:
method test_duple_pattern_check_p (line 39) | def test_duple_pattern_check_p(self) -> None:
method test_equal_pattern_check_n (line 44) | def test_equal_pattern_check_n(self) -> None:
method test_equal_pattern_check_p (line 50) | def test_equal_pattern_check_p(self) -> None:
method test_ascending_pattern_check_n (line 55) | def test_ascending_pattern_check_n(self) -> None:
method test_ascending_pattern_check_p (line 60) | def test_ascending_pattern_check_p(self) -> None:
method test_descending_pattern_check_n (line 64) | def test_descending_pattern_check_n(self) -> None:
method test_descending_pattern_check_p (line 68) | def test_descending_pattern_check_p(self) -> None:
class TestValuePatternCheckFixture (line 73) | class TestValuePatternCheckFixture:
method test_value_similarity_check_p (line 75) | def test_value_similarity_check_p(self, file_path: pytest.fixture, con...
method test_value_similarity_check_n (line 85) | def test_value_similarity_check_n(self, file_path: pytest.fixture, con...
FILE: tests/filters/test_value_sealed_secret_check.py
class TestValueSealedSecretCheck (line 12) | class TestValueSealedSecretCheck(unittest.TestCase):
method test_value_search_check_n (line 14) | def test_value_search_check_n(self):
method test_value_search_check_p (line 22) | def test_value_search_check_p(self):
FILE: tests/filters/test_value_search_check.py
class TestValueSearchCheck (line 11) | class TestValueSearchCheck(unittest.TestCase):
method test_value_search_check_n (line 13) | def test_value_search_check_n(self):
method test_value_search_check_p (line 24) | def test_value_search_check_p(self):
FILE: tests/filters/test_value_similarity_check.py
class TestValueSimilarityCheck (line 9) | class TestValueSimilarityCheck:
method password_rule (line 12) | def password_rule(self, config) -> Rule:
method test_value_similarity_check_p (line 26) | def test_value_similarity_check_p(self, password_rule: Rule, file_path...
method test_value_similarity_check_n (line 33) | def test_value_similarity_check_n(self, password_rule: Rule, file_path...
FILE: tests/filters/test_value_split_keyword_check.py
class TestValueSplitKeywordCheck (line 8) | class TestValueSplitKeywordCheck:
method test_value_split_keyword_check_p (line 11) | def test_value_split_keyword_check_p(self, file_path: pytest.fixture, ...
method test_value_split_keyword_check_n (line 16) | def test_value_split_keyword_check_n(self, file_path: pytest.fixture, ...
FILE: tests/filters/test_value_string_type_check.py
class TestValueStringTypeCheck (line 9) | class TestValueStringTypeCheck:
method test_value_string_type_check_p (line 11) | def test_value_string_type_check_p(self, config: Config, success_line:...
method test_value_string_type_check_n (line 17) | def test_value_string_type_check_n(self, config: Config, line: str) ->...
method test_value_string_type_check_none_path_n (line 22) | def test_value_string_type_check_none_path_n(self, config: Config, suc...
method test_value_string_type_check_not_quoted_source_file_n (line 29) | def test_value_string_type_check_not_quoted_source_file_n(self, line: ...
FILE: tests/filters/test_value_token_base32_check.py
class TestValueTokenBase32Check (line 8) | class TestValueTokenBase32Check:
method test_value_token_base32_check_p (line 11) | def test_value_token_base32_check_p(self, line: str) -> None:
method test_value_token_base32_check_n (line 17) | def test_value_token_base32_check_n(self, line: str) -> None:
FILE: tests/filters/test_value_token_base36_check.py
class TestValueTokenBase36Check (line 8) | class TestValueTokenBase36Check:
method test_value_token_base36_check_p (line 18) | def test_value_token_base36_check_p(self, line: str) -> None:
method test_value_token_base36_check_n (line 32) | def test_value_token_base36_check_n(self, line: str) -> None:
FILE: tests/filters/test_value_token_base64_check.py
class TestValueTokenBase64Check (line 8) | class TestValueTokenBase64Check:
method test_value_token_base64_check_p (line 18) | def test_value_token_base64_check_p(self, line: str) -> None:
method test_value_token_base64_check_n (line 31) | def test_value_token_base64_check_n(self, line: str) -> None:
FILE: tests/filters/test_value_token_check.py
class TestValueTokenCheck (line 8) | class TestValueTokenCheck:
method test_value_token_check_p (line 10) | def test_value_token_check_p(self, file_path: pytest.fixture, success_...
method test_value_token_check_n (line 15) | def test_value_token_check_n(self, file_path: pytest.fixture, line: st...
FILE: tests/ml_model/test_features.py
class TestFeatures (line 28) | class TestFeatures(TestCase):
method init_feature_search_comment (line 31) | def init_feature_search_comment(comment: str) -> SearchInAttribute:
method setUp (line 42) | def setUp(self):
method test_entropy_evaluation_n (line 59) | def test_entropy_evaluation_n(self):
method test_entropy_evaluation_p (line 66) | def test_entropy_evaluation_p(self):
method test_file_extension_n (line 79) | def test_file_extension_n(self):
method test_file_extension_p (line 82) | def test_file_extension_p(self):
method test_length_attribute_unsupported_n (line 85) | def test_length_attribute_unsupported_n(self):
method test_length_attribute_empty_n (line 89) | def test_length_attribute_empty_n(self):
method test_length_attribute_oversize_n (line 95) | def test_length_attribute_oversize_n(self):
method test_length_attribute_p (line 101) | def test_length_attribute_p(self):
method test_word_in_path_empty_n (line 106) | def test_word_in_path_empty_n(self):
method test_word_in_path_n (line 110) | def test_word_in_path_n(self):
method test_word_in_path_p (line 113) | def test_word_in_path_p(self):
method test_word_in_value_empty_n (line 116) | def test_word_in_value_empty_n(self):
method test_word_in_value_n (line 120) | def test_word_in_value_n(self):
method test_word_in_value_p (line 123) | def test_word_in_value_p(self):
method test_word_in_variable_empty_n (line 127) | def test_word_in_variable_empty_n(self):
method test_word_in_variable_n (line 132) | def test_word_in_variable_n(self):
method test_word_in_variable_p (line 136) | def test_word_in_variable_p(self):
method test_word_in_preamble_dup_n (line 140) | def test_word_in_preamble_dup_n(self):
method test_word_in_preamble_empty_n (line 144) | def test_word_in_preamble_empty_n(self):
method test_word_in_preamble_n (line 150) | def test_word_in_preamble_n(self):
method test_word_in_preamble_p (line 154) | def test_word_in_preamble_p(self):
method test_word_in_transition_dup_n (line 158) | def test_word_in_transition_dup_n(self):
method test_word_in_transition_empty_n (line 162) | def test_word_in_transition_empty_n(self):
method test_word_in_transition_n (line 168) | def test_word_in_transition_n(self):
method test_word_in_transition_p (line 172) | def test_word_in_transition_p(self):
method test_word_in_postamble_dup_n (line 176) | def test_word_in_postamble_dup_n(self):
method test_word_in_postamble_empty_n (line 180) | def test_word_in_postamble_empty_n(self):
method test_word_in_postamble_n (line 186) | def test_word_in_postamble_n(self):
method test_word_in_postamble_p (line 190) | def test_word_in_postamble_p(self):
method test_has_html_tag_empty_n (line 194) | def test_has_html_tag_empty_n(self):
method test_has_html_tag_n (line 200) | def test_has_html_tag_n(self):
method test_has_html_tag_p (line 204) | def test_has_html_tag_p(self):
method test_is_secret_numeric_empty_n (line 211) | def test_is_secret_numeric_empty_n(self):
method test_is_secret_numeric_n (line 216) | def test_is_secret_numeric_n(self):
method test_is_secret_numeric_p (line 220) | def test_is_secret_numeric_p(self):
method test_search_in_attribute_line_empty_n (line 232) | def test_search_in_attribute_line_empty_n(self):
method test_search_in_attribute_variable_empty_n (line 236) | def test_search_in_attribute_variable_empty_n(self):
method test_search_in_attribute_value_empty_n (line 242) | def test_search_in_attribute_value_empty_n(self):
method test_search_in_attribute_n (line 246) | def test_search_in_attribute_n(self):
method test_search_in_attribute_p (line 251) | def test_search_in_attribute_p(self):
method test_morpheme_dense_n (line 257) | def test_morpheme_dense_n(self):
method test_morpheme_dense_p (line 263) | def test_morpheme_dense_p(self):
method test_rule_name_n (line 272) | def test_rule_name_n(self):
method test_rule_name_p (line 275) | def test_rule_name_p(self):
method test_style_n (line 314) | def test_style_n(self):
method test_style_p (line 323) | def test_style_p(self):
method test_rule_severity_n (line 332) | def test_rule_severity_n(self):
method test_rule_severity_p (line 339) | def test_rule_severity_p(self):
FILE: tests/ml_model/test_ml_validator.py
class TestMlValidator (line 19) | class TestMlValidator(unittest.TestCase):
method setUp (line 21) | def setUp(self):
method validate (line 36) | def validate(self, _candidate: Candidate) -> Tuple[bool, float]:
method test_ml_validator_simple_n (line 43) | def test_ml_validator_simple_n(self):
method test_ml_validator_auxiliary_p (line 69) | def test_ml_validator_auxiliary_p(self):
method test_ml_validator_auxiliary_n (line 105) | def test_ml_validator_auxiliary_n(self):
method test_extract_features_n (line 130) | def test_extract_features_n(self):
method test_extract_features_p (line 140) | def test_extract_features_p(self):
method testVariableNotAllowedPatternCheck_n (line 170) | def testVariableNotAllowedPatternCheck_n(self):
method test_extract_features_normalized_n (line 184) | def test_extract_features_normalized_n(self):
method test_encode_n (line 200) | def test_encode_n(self):
method test_encode_p (line 205) | def test_encode_p(self):
FILE: tests/rules/common.py
class BaseTestRule (line 8) | class BaseTestRule:
method test_scan_p (line 10) | def test_scan_p(self, file_path: pytest.fixture, lines: pytest.fixture,
method test_scan_n (line 18) | def test_scan_n(self, file_path: pytest.fixture, lines: List[str], sca...
class BaseTestNoQuotesRule (line 24) | class BaseTestNoQuotesRule:
method test_scan_quote_p (line 33) | def test_scan_quote_p(self, file_path: pytest.fixture, lines: pytest.f...
method test_scan_quote_n (line 38) | def test_scan_quote_n(self, python_file_path: pytest.fixture, lines: p...
class BaseTestCommentRule (line 45) | class BaseTestCommentRule:
method test_scan_comment_p (line 54) | def test_scan_comment_p(self, python_file_path: pytest.fixture, lines:...
method test_scan_comment_n (line 60) | def test_scan_comment_n(self, python_file_path: pytest.fixture, lines:...
class BaseTestMultiRule (line 68) | class BaseTestMultiRule:
method test_scan_line_data_p (line 70) | def test_scan_line_data_p(self, file_path: pytest.fixture, lines: pyte...
method test_scan_line_data_n (line 76) | def test_scan_line_data_n(self, file_path: pytest.fixture, scanner: py...
FILE: tests/rules/test_api.py
class TestApi (line 8) | class TestApi(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_auth.py
class TestAuth (line 8) | class TestAuth(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method empty_line (line 15) | def empty_line(self, request) -> List[str]:
method rule_name (line 19) | def rule_name(self) -> str:
FILE: tests/rules/test_aws_key.py
class TestAwsKey (line 8) | class TestAwsKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_aws_multi.py
class TestAwsMulti (line 8) | class TestAwsMulti(BaseTestRule, BaseTestMultiRule):
method lines (line 13) | def lines(self, request) -> List[str]:
method rule_name (line 17) | def rule_name(self) -> str:
FILE: tests/rules/test_aws_mws_key.py
class TestAwsMwsKey (line 8) | class TestAwsMwsKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_credential.py
class TestCredential (line 8) | class TestCredential(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_dynatrace_api_token.py
class TestStripeApiKey (line 8) | class TestStripeApiKey(BaseTestRule):
method lines (line 13) | def lines(self, request) -> List[str]:
method rule_name (line 17) | def rule_name(self) -> str:
FILE: tests/rules/test_facebook_key.py
class TestFacebookKey (line 8) | class TestFacebookKey(BaseTestRule):
method lines (line 14) | def lines(self, request) -> List[str]:
method rule_name (line 18) | def rule_name(self) -> str:
FILE: tests/rules/test_firebase_domain.py
class TestFirebasDomain (line 8) | class TestFirebasDomain(BaseTestRule):
method lines (line 14) | def lines(self, request) -> List[str]:
method rule_name (line 18) | def rule_name(self) -> str:
FILE: tests/rules/test_github_classic_token.py
class TestClassicToken (line 8) | class TestClassicToken(BaseTestRule):
method lines (line 13) | def lines(self, request) -> List[str]:
method rule_name (line 17) | def rule_name(self) -> str:
FILE: tests/rules/test_github_fine_granted_token.py
class TestGithubFineGrantedToken (line 8) | class TestGithubFineGrantedToken(BaseTestRule):
method lines (line 13) | def lines(self, request) -> List[str]:
method rule_name (line 17) | def rule_name(self) -> str:
FILE: tests/rules/test_google_api_key.py
class TestGoogleApiKey (line 8) | class TestGoogleApiKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_google_multi.py
class TestGoogleMulti (line 8) | class TestGoogleMulti(BaseTestRule, BaseTestMultiRule):
method lines (line 12) | def lines(self, request) -> List[str]:
method rule_name (line 16) | def rule_name(self) -> str:
FILE: tests/rules/test_google_oauth_key.py
class TestGoogleOAuthKey (line 8) | class TestGoogleOAuthKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_instagram_access_token.py
class TestInstagramAccessToken (line 10) | class TestInstagramAccessToken(BaseTestRule):
method lines (line 14) | def lines(self, request) -> List[str]:
method rule_name (line 18) | def rule_name(self) -> str:
FILE: tests/rules/test_jwt.py
class TestJwt (line 8) | class TestJwt(BaseTestRule):
method lines (line 18) | def lines(self, request) -> List[str]:
method rule_name (line 22) | def rule_name(self) -> str:
FILE: tests/rules/test_key.py
class TestKey (line 8) | class TestKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method empty_line (line 15) | def empty_line(self, request) -> List[str]:
method rule_name (line 19) | def rule_name(self) -> str:
FILE: tests/rules/test_mailchimp_key.py
class TestMailChimpKey (line 8) | class TestMailChimpKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_nonce.py
class TestNone (line 8) | class TestNone(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method empty_line (line 15) | def empty_line(self, request) -> List[str]:
method rule_name (line 19) | def rule_name(self) -> str:
FILE: tests/rules/test_password.py
class TestPassword (line 8) | class TestPassword(BaseTestRule):
method lines (line 12) | def lines(self, request) -> List[str]:
method rule_name (line 16) | def rule_name(self) -> str:
class TestPasswordNoQuotes (line 20) | class TestPasswordNoQuotes(BaseTestNoQuotesRule):
method lines (line 23) | def lines(self, request) -> List[str]:
method rule_name (line 27) | def rule_name(self) -> str:
class TestPasswordComment (line 31) | class TestPasswordComment(BaseTestCommentRule):
method lines (line 34) | def lines(self, request) -> List[str]:
method rule_name (line 38) | def rule_name(self) -> str:
FILE: tests/rules/test_paypal_key.py
class TestPayPalKey (line 8) | class TestPayPalKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_pem_key.py
class TestPemKey (line 9) | class TestPemKey(BaseTestRule):
method lines (line 62) | def lines(self, request) -> List[str]:
method rule_name (line 66) | def rule_name(self) -> str:
class TestEmptyPemKey (line 70) | class TestEmptyPemKey:
method lines (line 82) | def lines(self, request) -> List[str]:
method rule_name (line 86) | def rule_name(self) -> str:
method test_scan_no_division_by_zero_exception_n (line 89) | def test_scan_no_division_by_zero_exception_n(self, file_path: pytest....
FILE: tests/rules/test_picatic_key.py
class TestPicaticKey (line 8) | class TestPicaticKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_pypi_api_token.py
class TestPyPiApiToken (line 8) | class TestPyPiApiToken(BaseTestRule):
method lines (line 24) | def lines(self, request) -> List[str]:
method rule_name (line 28) | def rule_name(self) -> str:
FILE: tests/rules/test_rule.py
class TestRuleConfigParsing (line 12) | class TestRuleConfigParsing:
method rule_config (line 40) | def rule_config(self, request: str) -> Any:
method test_create_from_config_p (line 43) | def test_create_from_config_p(self, config: Config, rule_config: pytes...
method test_create_from_malformed_config_n (line 51) | def test_create_from_malformed_config_n(self, config: Config, rule_con...
method test_create_from_missing_fields_n (line 57) | def test_create_from_missing_fields_n(self, config: Config) -> None:
FILE: tests/rules/test_salt.py
class TestSalt (line 8) | class TestSalt(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method empty_line (line 15) | def empty_line(self, request) -> List[str]:
method rule_name (line 19) | def rule_name(self) -> str:
FILE: tests/rules/test_secret.py
class TestSecret (line 8) | class TestSecret(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
class TestSecretNoQuotes (line 19) | class TestSecretNoQuotes(BaseTestNoQuotesRule):
method lines (line 22) | def lines(self, request) -> List[str]:
method rule_name (line 26) | def rule_name(self) -> str:
class TestSecretComment (line 30) | class TestSecretComment(BaseTestCommentRule):
method lines (line 33) | def lines(self, request) -> List[str]:
method rule_name (line 37) | def rule_name(self) -> str:
FILE: tests/rules/test_sendgrid_api_key_token.py
class TestSendGridApiKey (line 8) | class TestSendGridApiKey(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_shopify_token.py
class TestShopifyToken (line 8) | class TestShopifyToken(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_slack_token.py
class TestSlackToken (line 8) | class TestSlackToken(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_slack_webhook.py
class TestSlackToken (line 8) | class TestSlackToken(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_square_access_token.py
class TestSquareAccessToken (line 8) | class TestSquareAccessToken(BaseTestRule):
method lines (line 11) | def lines(self, request) -> List[str]:
method rule_name (line 15) | def rule_name(self) -> str:
FILE: tests/rules/test_telegram_bot_api_token.py
class TestTelegramBotApiToken (line 8) | class TestTelegramBotApiToken(BaseTestRule):
method lines (line 14) | def lines(self, request) -> List[str]:
method rule_name (line 18) | def rule_name(self) -> str:
FILE: tests/rules/test_token.py
class TestToken (line 9) | class TestToken(BaseTestRule):
method lines (line 12) | def lines(self, request) -> List[str]:
method rule_name (line 16) | def rule_name(self) -> str:
class TestTokenNoQuotes (line 20) | class TestTokenNoQuotes(BaseTestNoQuotesRule):
method lines (line 23) | def lines(self, request) -> List[str]:
method rule_name (line 27) | def rule_name(self) -> str:
class TestTokenComment (line 31) | class TestTokenComment(BaseTestCommentRule):
method lines (line 34) | def lines(self, request) -> List[str]:
method rule_name (line 38) | def rule_name(self) -> str:
class TestTokenWhitespaceBeforeQuote (line 42) | class TestTokenWhitespaceBeforeQuote:
method lines (line 45) | def lines(self) -> List[str]:
method rule_name (line 50) | def rule_name(self) -> str:
method test_scan_whitespace_before_quote_p (line 53) | def test_scan_whitespace_before_quote_p(self, file_path: pytest.fixtur...
FILE: tests/rules/test_url_credentials.py
class TestUrlCredentials (line 8) | class TestUrlCredentials(BaseTestRule):
method lines (line 14) | def lines(self, request) -> List[str]:
method rule_name (line 18) | def rule_name(self) -> str:
FILE: tests/scanner/scan_type/test_multipattern.py
class TestMultiPattern (line 13) | class TestMultiPattern(unittest.TestCase):
method setUp (line 15) | def setUp(self) -> None:
method test_oversize_line_n (line 33) | def test_oversize_line_n(self) -> None:
method test_oversize_line_p (line 41) | def test_oversize_line_p(self) -> None:
method test_get_line_positions_n (line 48) | def test_get_line_positions_n(self):
method test_get_line_positions_p (line 54) | def test_get_line_positions_p(self):
FILE: tests/scanner/scan_type/test_pem_key_pattern.py
class TestPemKeyPattern (line 6) | class TestPemKeyPattern(unittest.TestCase):
method test_remove_leading_config_lines_p (line 8) | def test_remove_leading_config_lines_p(self):
method test_remove_leading_config_lines_n (line 13) | def test_remove_leading_config_lines_n(self):
method test_sanitize_line_p (line 22) | def test_sanitize_line_p(self):
method test_sanitize_line_n (line 36) | def test_sanitize_line_n(self):
FILE: tests/test_app.py
class TestApp (line 30) | class TestApp(TestCase):
method setUp (line 32) | def setUp(self):
method _m_credsweeper (line 36) | def _m_credsweeper(args) -> Tuple[str, str]:
method test_it_works_p (line 54) | def test_it_works_p(self) -> None:
method test_huge_diff_p (line 77) | def test_huge_diff_p(self) -> None:
method test_it_works_with_patch_p (line 103) | def test_it_works_with_patch_p(self) -> None:
method test_it_works_with_multiline_in_patch_p (line 136) | def test_it_works_with_multiline_in_patch_p(self) -> None:
method test_it_works_with_patch_color_p (line 182) | def test_it_works_with_patch_color_p(self) -> None:
method test_it_works_n (line 199) | def test_it_works_n(self) -> None:
method test_log_p (line 255) | def test_log_p(self) -> None:
method test_log_n (line 282) | def test_log_n(self) -> None:
method test_help_p (line 301) | def test_help_p(self) -> None:
method test_version_p (line 321) | def test_version_p(self) -> None:
method test_banner_p (line 330) | def test_banner_p(self) -> None:
method test_patch_save_json_p (line 359) | def test_patch_save_json_p(self) -> None:
method test_patch_save_json_n (line 370) | def test_patch_save_json_n(self) -> None:
method test_export_config_p (line 383) | def test_export_config_p(self) -> None:
method test_import_config_p (line 391) | def test_import_config_p(self) -> None:
method test_import_config_n (line 417) | def test_import_config_n(self) -> None:
method test_export_log_config_p (line 437) | def test_export_log_config_p(self) -> None:
method test_import_log_config_p (line 445) | def test_import_log_config_p(self) -> None:
method test_find_by_ext_p (line 460) | def test_find_by_ext_p(self) -> None:
method test_find_by_ext_n (line 495) | def test_find_by_ext_n(self) -> None:
method test_depth_p (line 511) | def test_depth_p(self) -> None:
method test_depth_n (line 555) | def test_depth_n(self) -> None:
method test_denylist_p (line 569) | def test_denylist_p(self) -> None:
method test_denylist_n (line 595) | def test_denylist_n(self) -> None:
method test_rules_ml_p (line 612) | def test_rules_ml_p(self) -> None:
method test_rules_ml_n (line 638) | def test_rules_ml_n(self) -> None:
method test_no_filters_p (line 660) | def test_no_filters_p(self) -> None:
method test_severity_patch_xlsx_n (line 680) | def test_severity_patch_xlsx_n(self) -> None:
method test_severity_patch_xlsx_p (line 702) | def test_severity_patch_xlsx_p(self) -> None:
method test_doc_n (line 733) | def test_doc_n(self) -> None:
method test_external_ml_n (line 751) | def test_external_ml_n(self) -> None:
method test_external_ml_p (line 777) | def test_external_ml_p(self) -> None:
FILE: tests/test_doc.py
class TestDoc (line 11) | class TestDoc(unittest.TestCase):
method setUp (line 13) | def setUp(self) -> None:
method test_secret_pair_p (line 18) | def test_secret_pair_p(self) -> None:
method test_passwd_pair_p (line 26) | def test_passwd_pair_p(self) -> None:
method test_ip_id_passwd_triple_p (line 34) | def test_ip_id_passwd_triple_p(self) -> None:
method test_id_pair_passwd_pair_p (line 42) | def test_id_pair_passwd_pair_p(self) -> None:
method test_id_passwd_pair_p (line 50) | def test_id_passwd_pair_p(self) -> None:
FILE: tests/test_git.py
class TestGit (line 19) | class TestGit(unittest.TestCase):
method setUp (line 21) | def setUp(self):
method tearDown (line 256) | def tearDown(self):
method test_git_n (line 266) | def test_git_n(self, mock_get_arguments) -> None:
method test_git_p (line 303) | def test_git_p(self, mock_get_arguments) -> None:
FILE: tests/test_main.py
class TestMain (line 36) | class TestMain(unittest.TestCase):
method setUp (line 38) | def setUp(self):
method tearDown (line 41) | def tearDown(self):
method test_ml_validation_p (line 44) | def test_ml_validation_p(self) -> None:
method test_ml_validation_n (line 50) | def test_ml_validation_n(self) -> None:
method test_use_filters_p (line 56) | def test_use_filters_p(self) -> None:
method test_use_filters_n (line 65) | def test_use_filters_n(self) -> None:
method test_rules_dub_n (line 74) | def test_rules_dub_n(self) -> None:
method test_rules_dub_p (line 102) | def test_rules_dub_p(self) -> None:
method test_main_n (line 129) | def test_main_n(self, mock_get_arguments, mock_scan) -> None:
method test_main_path_p (line 147) | def test_main_path_p(self, mock_get_arguments) -> None:
method test_binary_patch_p (line 182) | def test_binary_patch_p(self, mock_get_arguments) -> None:
method test_report_p (line 216) | def test_report_p(self, mock_get_arguments) -> None:
method test_parse_args_n (line 270) | def test_parse_args_n(self, mock_parse) -> None:
method test_positive_int_p (line 276) | def test_positive_int_p(self):
method test_positive_int_n (line 282) | def test_positive_int_n(self):
method test_threshold_or_float_or_zero_p (line 289) | def test_threshold_or_float_or_zero_p(self):
method test_threshold_or_float_or_zero_n (line 296) | def test_threshold_or_float_or_zero_n(self):
method test_wrong_severity_n (line 302) | def test_wrong_severity_n(self) -> None:
method test_scan_bytes_p (line 308) | def test_scan_bytes_p(self) -> None:
method test_scan_bytes_n (line 320) | def test_scan_bytes_n(self) -> None:
method test_colored_line_p (line 329) | def test_colored_line_p(self) -> None:
method test_string_content_provider_n (line 346) | def test_string_content_provider_n(self) -> None:
method test_find_by_ext_and_not_ignore_p (line 357) | def test_find_by_ext_and_not_ignore_p(self) -> None:
method test_multi_jobs_n (line 381) | def test_multi_jobs_n(self) -> None:
method test_multi_jobs_p (line 407) | def test_multi_jobs_p(self) -> None:
method test_find_by_ext_n (line 438) | def test_find_by_ext_n(self) -> None:
method test_tar_n (line 461) | def test_tar_n(self) -> None:
method test_tar_p (line 469) | def test_tar_p(self) -> None:
method test_bad_tar_n (line 477) | def test_bad_tar_n(self) -> None:
method test_png_p (line 492) | def test_png_p(self) -> None:
method test_aws_multi_p (line 500) | def test_aws_multi_p(self) -> None:
method test_depth_p (line 514) | def test_depth_p(self) -> None:
method test_depth_n (line 529) | def test_depth_n(self) -> None:
method test_bzip2_p (line 537) | def test_bzip2_p(self) -> None:
method test_bzip2_n (line 546) | def test_bzip2_n(self) -> None:
method test_eml_p (line 564) | def test_eml_p(self) -> None:
method test_pdf_p (line 574) | def test_pdf_p(self) -> None:
method test_pdf_n (line 588) | def test_pdf_n(self) -> None:
method test_py_n (line 596) | def test_py_n(self) -> None:
method test_py_p (line 604) | def test_py_p(self) -> None:
method test_json_p (line 704) | def test_json_p(self) -> None:
method test_json_n (line 716) | def test_json_n(self) -> None:
method test_yaml_p (line 726) | def test_yaml_p(self) -> None:
method test_yaml_n (line 739) | def test_yaml_n(self) -> None:
method test_encoded_p (line 749) | def test_encoded_p(self) -> None:
method test_docx_p (line 761) | def test_docx_p(self) -> None:
method test_docx_n (line 786) | def test_docx_n(self) -> None:
method test_html_p (line 796) | def test_html_p(self) -> None:
method test_html_n (line 822) | def test_html_n(self) -> None:
method test_exclude_value_p (line 831) | def test_exclude_value_p(self) -> None:
method test_exclude_value_n (line 840) | def test_exclude_value_n(self) -> None:
method test_exclude_line_p (line 849) | def test_exclude_line_p(self) -> None:
method test_exclude_line_n (line 858) | def test_exclude_line_n(self) -> None:
method test_doc_p (line 867) | def test_doc_p(self) -> None:
method test_doc_n (line 884) | def test_doc_n(self) -> None:
method test_fallback_n (line 893) | def test_fallback_n(self) -> None:
method test_data_p (line 905) | def test_data_p(self) -> None:
method test_param_n (line 979) | def test_param_n(self) -> None:
method test_param_p (line 1018) | def test_param_p(self) -> None:
method test_random_p (line 1099) | def test_random_p(self) -> None:
method test_hashed_n (line 1114) | def test_hashed_n(self) -> None:
FILE: tests/test_utils/dummy_line_data.py
function config (line 11) | def config() -> Config:
function get_line_data (line 23) | def get_line_data(test_config: Config = config(),
FILE: tests/utils/test_hop_stat.py
class TestHopStat (line 6) | class TestHopStat(unittest.TestCase):
method test_hop_stat_n (line 8) | def test_hop_stat_n(self):
method test_hop_stat_p (line 17) | def test_hop_stat_p(self):
FILE: tests/utils/test_util.py
class TestUtils (line 20) | class TestUtils(unittest.TestCase):
method test_asn1_n (line 112) | def test_asn1_n(self):
method test_asn1_p (line 127) | def test_asn1_p(self):
method test_get_extension_n (line 140) | def test_get_extension_n(self):
method test_get_extension_p (line 151) | def test_get_extension_p(self):
method test_colon_os_n (line 161) | def test_colon_os_n(self):
method test_get_shannon_entropy_hypothesis_n (line 176) | def test_get_shannon_entropy_hypothesis_n(self, data):
method test_get_shannon_entropy_n (line 179) | def test_get_shannon_entropy_n(self):
method test_get_shannon_entropy_p (line 186) | def test_get_shannon_entropy_p(self):
method test_util_read_file_n (line 202) | def test_util_read_file_n(self):
method test_util_read_file_p (line 216) | def test_util_read_file_p(self):
method test_util_read_utf8_bin_p (line 229) | def test_util_read_utf8_bin_p(self):
method test_util_read_utf16le_bin_p (line 280) | def test_util_read_utf16le_bin_p(self):
method test_util_read_utf16le_txt_p (line 314) | def test_util_read_utf16le_txt_p(self):
method test_util_read_utf16be_txt_p (line 345) | def test_util_read_utf16be_txt_p(self):
method test_decode_text_n (line 377) | def test_decode_text_n(self):
method test_decode_text_p (line 381) | def test_decode_text_p(self):
method test_is_binary_n (line 390) | def test_is_binary_n(self):
method test_is_binary_p (line 400) | def test_is_binary_p(self):
method test_is_latin1_n (line 409) | def test_is_latin1_n(self):
method test_is_latin1_p (line 417) | def test_is_latin1_p(self):
method test_is_ascii_entropy_validate_p (line 425) | def test_is_ascii_entropy_validate_p(self):
method test_is_ascii_entropy_validate_n (line 437) | def test_is_ascii_entropy_validate_n(self):
method test_read_bin_file_n (line 448) | def test_read_bin_file_n(self):
method test_read_data_n (line 457) | def test_read_data_n(self):
method test_split_text_n (line 461) | def test_split_text_n(self, text):
method test_get_xml_data_p (line 464) | def test_get_xml_data_p(self):
method test_get_xml_data_n (line 484) | def test_get_xml_data_n(self):
method test_json_load_p (line 490) | def test_json_load_p(self):
method test_json_load_n (line 535) | def test_json_load_n(self):
method test_json_dump_p (line 544) | def test_json_dump_p(self):
method test_json_dump_n (line 570) | def test_json_dump_n(self):
method test_parse_py_p (line 579) | def test_parse_py_p(self):
method test_parse_py_n (line 586) | def test_parse_py_n(self):
method test_decode_base64_p (line 595) | def test_decode_base64_p(self):
method test_decode_base64_n (line 606) | def test_decode_base64_n(self):
method test_get_chunks_n (line 620) | def test_get_chunks_n(self):
method test_get_chunks_p (line 633) | def test_get_chunks_p(self):
method test_get_chunks_coverage_n (line 663) | def test_get_chunks_coverage_n(self):
method test_subtext_n (line 676) | def test_subtext_n(self):
method test_subtext_p (line 680) | def test_subtext_p(self):
method test_get_excel_column_name_n (line 701) | def test_get_excel_column_name_n(self):
method test_get_excel_column_name_p (line 706) | def test_get_excel_column_name_p(self):
method test_load_pk_n (line 714) | def test_load_pk_n(self):
method test_load_pk_p (line 721) | def test_load_pk_p(self):
method test_check_pk_n (line 743) | def test_check_pk_n(self):
method test_check_pk_p (line 746) | def test_check_pk_p(self):
Condensed preview — 387 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (5,864K chars).
[
{
"path": "LICENSE",
"chars": 1065,
"preview": "Copyright (c) 2021 SAMSUNG\n\nPermission is hereby granted, free of charge, to any person obtaining a copy \nof this softwa"
},
{
"path": "README.md",
"chars": 9253,
"preview": "# CredSweeper\n\n[](https://g"
},
{
"path": "SECURITY.md",
"chars": 308,
"preview": "# Security Policy\n\n## Supported Versions\n\n| Version | Supported |\n|---------|--------------------|\n| 1.15.x | "
},
{
"path": "action.yml",
"chars": 1580,
"preview": "name: \"CredSweeper action\"\ndescription: \"CredSweeper checks files\"\nauthor: \"r.babenko@samsung.com\"\nbranding:\n icon: \"te"
},
{
"path": "credsweeper/__init__.py",
"chars": 992,
"preview": "from credsweeper.app import CredSweeper\nfrom credsweeper.common.constants import ThresholdPreset, Severity, Confidence\nf"
},
{
"path": "credsweeper/__main__.py",
"chars": 95,
"preview": "import sys\n\nfrom credsweeper.main import main\n\nif __name__ == \"__main__\":\n sys.exit(main())\n"
},
{
"path": "credsweeper/app.py",
"chars": 20995,
"preview": "import json\nimport logging\nimport multiprocessing\nimport signal\nfrom pathlib import Path\nfrom typing import Any, List, O"
},
{
"path": "credsweeper/common/__init__.py",
"chars": 184,
"preview": "from credsweeper.common.keyword_checklist import KeywordChecklist\n\n# use the variable to avoid singleton creation and ma"
},
{
"path": "credsweeper/common/constants.py",
"chars": 5538,
"preview": "import string\nimport typing\nfrom enum import Enum\nfrom typing import Optional, Union\n\n\nclass Severity(Enum):\n \"\"\"Seve"
},
{
"path": "credsweeper/common/keyword_checklist.py",
"chars": 2258,
"preview": "from functools import cached_property\nfrom typing import Set, List\n\nfrom credsweeper.app import APP_PATH\n\n\nclass Keyword"
},
{
"path": "credsweeper/common/keyword_checklist.txt",
"chars": 7169,
"preview": "1234\nabort\nabout\nabove\nabsolute\nabstract\naccent\naccept\naccess\naccount\naction\nactive\nactivity\nactor\nactual\nadded\nadding\na"
},
{
"path": "credsweeper/common/keyword_pattern.py",
"chars": 3499,
"preview": "import re\n\n\nclass KeywordPattern:\n \"\"\"Pattern set of keyword types\"\"\"\n directive = r\"(?P<directive>(?:\" \\\n "
},
{
"path": "credsweeper/common/morpheme_checklist.txt",
"chars": 9017,
"preview": "../\n.com\n.org\n/bin\n/dev\n/etc\n/lib\n/mnt\n/opt\n/sbin\n/srv\n/tmp\n/usr\n/var\n000\n111\n14159265\n18284590\n222\n333\n444\n555\n65358979"
},
{
"path": "credsweeper/config/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/config/config.py",
"chars": 2694,
"preview": "import re\nfrom typing import Dict, List, Optional, Set, Any\n\nfrom humanfriendly import parse_size\n\nfrom credsweeper.comm"
},
{
"path": "credsweeper/credentials/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/credentials/augment_candidates.py",
"chars": 812,
"preview": "from typing import List\n\nfrom credsweeper.credentials.candidate import Candidate\n\n\ndef augment_candidates(candidates: Li"
},
{
"path": "credsweeper/credentials/candidate.py",
"chars": 5471,
"preview": "import copy\nimport re\nfrom json.encoder import py_encode_basestring_ascii\nfrom typing import Any, Dict, List, Optional\n\n"
},
{
"path": "credsweeper/credentials/candidate_group_generator.py",
"chars": 1243,
"preview": "from typing import Dict, List, Tuple\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweeper.credentia"
},
{
"path": "credsweeper/credentials/candidate_key.py",
"chars": 917,
"preview": "from typing import Tuple\n\nfrom credsweeper.credentials.line_data import LineData\n\n\nclass CandidateKey:\n \"\"\"Class used"
},
{
"path": "credsweeper/credentials/credential_manager.py",
"chars": 4204,
"preview": "import logging\nfrom multiprocessing import Manager\nfrom typing import List, Dict, Tuple\n\nfrom credsweeper.credentials.ca"
},
{
"path": "credsweeper/credentials/line_data.py",
"chars": 22477,
"preview": "import contextlib\nimport hashlib\nimport re\nimport string\nfrom functools import cached_property\nfrom typing import Any, D"
},
{
"path": "credsweeper/deep_scanner/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/deep_scanner/abstract_scanner.py",
"chars": 15862,
"preview": "import contextlib\nimport datetime\nimport logging\nfrom abc import abstractmethod, ABC\nfrom typing import List, Optional, "
},
{
"path": "credsweeper/deep_scanner/byte_scanner.py",
"chars": 1159,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.credentials.candidate import Cand"
},
{
"path": "credsweeper/deep_scanner/bzip2_scanner.py",
"chars": 1995,
"preview": "import bz2\nimport logging\nfrom abc import ABC\nfrom pathlib import Path\nfrom typing import List, Optional\n\nfrom credsweep"
},
{
"path": "credsweeper/deep_scanner/crx_scanner.py",
"chars": 1946,
"preview": "import logging\nimport struct\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.credentials.candida"
},
{
"path": "credsweeper/deep_scanner/csv_scanner.py",
"chars": 3714,
"preview": "import csv\nimport io\nimport logging\nimport re\nfrom abc import ABC\nfrom typing import List, Optional, Dict, Any\n\nfrom cre"
},
{
"path": "credsweeper/deep_scanner/deb_scanner.py",
"chars": 2723,
"preview": "import logging\nimport struct\nfrom abc import ABC\nfrom typing import List, Optional, Generator, Tuple\n\nfrom credsweeper.c"
},
{
"path": "credsweeper/deep_scanner/deep_scanner.py",
"chars": 12785,
"preview": "import logging\nimport re\nfrom typing import List, Any, Tuple, Union, Dict\n\nfrom credsweeper.common.constants import MIN_"
},
{
"path": "credsweeper/deep_scanner/docx_scanner.py",
"chars": 4127,
"preview": "import io\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nimport docx\nfrom docx.document import Do"
},
{
"path": "credsweeper/deep_scanner/eml_scanner.py",
"chars": 4082,
"preview": "import email\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.credentials.candidat"
},
{
"path": "credsweeper/deep_scanner/encoder_scanner.py",
"chars": 2597,
"preview": "import contextlib\nimport logging\nimport re\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.commo"
},
{
"path": "credsweeper/deep_scanner/gzip_scanner.py",
"chars": 1935,
"preview": "import gzip\nimport io\nimport logging\nfrom abc import ABC\nfrom pathlib import Path\nfrom typing import List, Optional\n\nfro"
},
{
"path": "credsweeper/deep_scanner/html_scanner.py",
"chars": 2410,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.common.constants import MAX_LINE_"
},
{
"path": "credsweeper/deep_scanner/jclass_scanner.py",
"chars": 3349,
"preview": "import io\nimport logging\nimport struct\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.common.co"
},
{
"path": "credsweeper/deep_scanner/jks_scanner.py",
"chars": 2549,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nimport jks\n\nfrom credsweeper.common.constants impo"
},
{
"path": "credsweeper/deep_scanner/lang_scanner.py",
"chars": 1350,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.credentials.candidate import Cand"
},
{
"path": "credsweeper/deep_scanner/lzma_scanner.py",
"chars": 1982,
"preview": "import logging\nimport lzma\nfrom abc import ABC\nfrom pathlib import Path\nfrom typing import List, Optional\n\nfrom credswee"
},
{
"path": "credsweeper/deep_scanner/mxfile_scanner.py",
"chars": 2522,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom bs4 import BeautifulSoup\nfrom lxml import etr"
},
{
"path": "credsweeper/deep_scanner/patch_scanner.py",
"chars": 2238,
"preview": "import io\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.common.constants import"
},
{
"path": "credsweeper/deep_scanner/pdf_scanner.py",
"chars": 3300,
"preview": "import io\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom pdfminer.high_level import extract_"
},
{
"path": "credsweeper/deep_scanner/pkcs_scanner.py",
"chars": 2244,
"preview": "import base64\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional, Union\n\nfrom credsweeper.common.const"
},
{
"path": "credsweeper/deep_scanner/png_scanner.py",
"chars": 4297,
"preview": "import logging\nimport struct\nfrom abc import ABC\nfrom typing import List, Optional, Generator, Tuple\n\nfrom credsweeper.c"
},
{
"path": "credsweeper/deep_scanner/pptx_scanner.py",
"chars": 1843,
"preview": "import io\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom pptx import Presentation\n\nfrom cred"
},
{
"path": "credsweeper/deep_scanner/rpm_scanner.py",
"chars": 2544,
"preview": "import io\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nimport rpmfile\n\nfrom credsweeper.credent"
},
{
"path": "credsweeper/deep_scanner/rtf_scanner.py",
"chars": 1895,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom striprtf import striprtf\n\nfrom credsweeper.cr"
},
{
"path": "credsweeper/deep_scanner/sqlite3_scanner.py",
"chars": 3770,
"preview": "import logging\nimport os.path\nimport sqlite3\nimport sys\nimport tempfile\nfrom abc import ABC\nfrom typing import List, Opt"
},
{
"path": "credsweeper/deep_scanner/strings_scanner.py",
"chars": 2403,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional, Tuple\n\nfrom credsweeper.common.constants import MI"
},
{
"path": "credsweeper/deep_scanner/tar_scanner.py",
"chars": 3166,
"preview": "import contextlib\nimport io\nimport logging\nimport tarfile\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom cr"
},
{
"path": "credsweeper/deep_scanner/tmx_scanner.py",
"chars": 2542,
"preview": "import logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom lxml import etree\n\nfrom credsweeper.common.co"
},
{
"path": "credsweeper/deep_scanner/xlsx_scanner.py",
"chars": 3048,
"preview": "import io\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\n\nimport pandas as pd\n\nfrom credsweeper.cr"
},
{
"path": "credsweeper/deep_scanner/xml_scanner.py",
"chars": 2167,
"preview": "import logging\nimport re\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.common.constants import"
},
{
"path": "credsweeper/deep_scanner/zip_scanner.py",
"chars": 3086,
"preview": "import io\nimport logging\nfrom abc import ABC\nfrom typing import List, Optional\nfrom zipfile import ZipFile\n\nfrom credswe"
},
{
"path": "credsweeper/deep_scanner/zlib_scanner.py",
"chars": 2520,
"preview": "import logging\nimport zlib\nfrom abc import ABC\nfrom typing import List, Optional\n\nfrom credsweeper.credentials.candidate"
},
{
"path": "credsweeper/file_handler/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/file_handler/abstract_provider.py",
"chars": 1324,
"preview": "import io\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nfrom typing import Union, Tuple, Sequence\n\nfrom c"
},
{
"path": "credsweeper/file_handler/analysis_target.py",
"chars": 2730,
"preview": "from functools import cached_property\nfrom typing import List, Optional\n\nfrom credsweeper.file_handler.descriptor import"
},
{
"path": "credsweeper/file_handler/byte_content_provider.py",
"chars": 2433,
"preview": "import logging\nfrom functools import cached_property\nfrom typing import List, Optional, Generator\n\nfrom credsweeper.file"
},
{
"path": "credsweeper/file_handler/content_provider.py",
"chars": 3906,
"preview": "import logging\nfrom abc import ABC, abstractmethod\nfrom functools import cached_property\nfrom typing import List, Option"
},
{
"path": "credsweeper/file_handler/data_content_provider.py",
"chars": 16534,
"preview": "import json\nimport logging\nimport warnings\nfrom functools import cached_property\nfrom typing import List, Optional, Any,"
},
{
"path": "credsweeper/file_handler/descriptor.py",
"chars": 185,
"preview": "from dataclasses import dataclass\n\n\n@dataclass(frozen=True)\nclass Descriptor:\n \"\"\"Descriptor for file - optimize memo"
},
{
"path": "credsweeper/file_handler/diff_content_provider.py",
"chars": 8060,
"preview": "import logging\nfrom dataclasses import dataclass\nfrom functools import cached_property\nfrom typing import List, Tuple, G"
},
{
"path": "credsweeper/file_handler/file_path_extractor.py",
"chars": 6967,
"preview": "import io\nimport logging\nimport os\nfrom pathlib import Path\nfrom typing import List, Dict, Union, Tuple\n\nfrom git import"
},
{
"path": "credsweeper/file_handler/files_provider.py",
"chars": 2539,
"preview": "import io\nimport logging\nfrom pathlib import Path\nfrom typing import List, Optional, Union, Tuple, Sequence\n\nfrom credsw"
},
{
"path": "credsweeper/file_handler/patches_provider.py",
"chars": 3017,
"preview": "import io\nimport logging\nfrom pathlib import Path\nfrom typing import List, Union, Tuple, Sequence\n\nfrom credsweeper.comm"
},
{
"path": "credsweeper/file_handler/string_content_provider.py",
"chars": 2481,
"preview": "from functools import cached_property\nfrom typing import List, Optional, Generator\n\nfrom credsweeper.file_handler.analys"
},
{
"path": "credsweeper/file_handler/struct_content_provider.py",
"chars": 1595,
"preview": "import logging\nfrom functools import cached_property\nfrom typing import Optional, Any, Generator\n\nfrom credsweeper.file_"
},
{
"path": "credsweeper/file_handler/text_content_provider.py",
"chars": 3368,
"preview": "import io\nimport logging\nfrom functools import cached_property\nfrom pathlib import Path\nfrom typing import List, Optiona"
},
{
"path": "credsweeper/filters/__init__.py",
"chars": 3467,
"preview": "from credsweeper.filters.line_git_binary_check import LineGitBinaryCheck\nfrom credsweeper.filters.line_specific_key_chec"
},
{
"path": "credsweeper/filters/filter.py",
"chars": 927,
"preview": "from abc import abstractmethod, ABC\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom creds"
},
{
"path": "credsweeper/filters/group/__init__.py",
"chars": 502,
"preview": "from credsweeper.filters.group.general_keyword import GeneralKeyword\nfrom credsweeper.filters.group.general_pattern impo"
},
{
"path": "credsweeper/filters/group/general_keyword.py",
"chars": 474,
"preview": "from credsweeper.common.constants import GroupType\nfrom credsweeper.config.config import Config\nfrom credsweeper.filters"
},
{
"path": "credsweeper/filters/group/general_pattern.py",
"chars": 303,
"preview": "from credsweeper.common.constants import GroupType\nfrom credsweeper.config.config import Config\nfrom credsweeper.filters"
},
{
"path": "credsweeper/filters/group/group.py",
"chars": 2661,
"preview": "from abc import ABC\nfrom typing import List\n\nfrom credsweeper.common.constants import GroupType\nfrom credsweeper.config."
},
{
"path": "credsweeper/filters/group/password_keyword.py",
"chars": 776,
"preview": "from credsweeper.common.constants import GroupType\nfrom credsweeper.config.config import Config\nfrom credsweeper.filters"
},
{
"path": "credsweeper/filters/group/token_pattern.py",
"chars": 580,
"preview": "from credsweeper.common.constants import GroupType\nfrom credsweeper.config.config import Config\nfrom credsweeper.filters"
},
{
"path": "credsweeper/filters/group/url_credentials_group.py",
"chars": 1444,
"preview": "from credsweeper.common.constants import GroupType\nfrom credsweeper.config.config import Config\nfrom credsweeper.filters"
},
{
"path": "credsweeper/filters/group/weird_base36_token.py",
"chars": 672,
"preview": "from credsweeper.common.constants import GroupType\nfrom credsweeper.config.config import Config\nfrom credsweeper.filters"
},
{
"path": "credsweeper/filters/group/weird_base64_token.py",
"chars": 860,
"preview": "from credsweeper.common.constants import GroupType\nfrom credsweeper.config.config import Config\nfrom credsweeper.filters"
},
{
"path": "credsweeper/filters/line_git_binary_check.py",
"chars": 1657,
"preview": "import base64\nimport contextlib\nimport re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom"
},
{
"path": "credsweeper/filters/line_specific_key_check.py",
"chars": 1603,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.common.constants import ML_HUNK\nfrom credsweeper.config.config i"
},
{
"path": "credsweeper/filters/line_uue_part_check.py",
"chars": 1581,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_allowlist_check.py",
"chars": 2255,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_array_dictionary_check.py",
"chars": 1409,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_atlassian_token_check.py",
"chars": 2953,
"preview": "import binascii\nimport contextlib\nfrom typing import Optional\n\nfrom credsweeper.common.constants import LATIN_1, ASCII\nf"
},
{
"path": "credsweeper/filters/value_azure_token_check.py",
"chars": 2013,
"preview": "import contextlib\nimport json\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper"
},
{
"path": "credsweeper/filters/value_base32_data_check.py",
"chars": 1550,
"preview": "import base64\nimport contextlib\nimport string\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\n"
},
{
"path": "credsweeper/filters/value_base64_data_check.py",
"chars": 1516,
"preview": "import contextlib\nimport string\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweep"
},
{
"path": "credsweeper/filters/value_base64_encoded_pem_check.py",
"chars": 2529,
"preview": "import logging\nfrom typing import Optional\n\nfrom credsweeper.common.constants import ASCII, PEM_BEGIN_PATTERN, MAX_LINE_"
},
{
"path": "credsweeper/filters/value_base64_key_check.py",
"chars": 1971,
"preview": "import contextlib\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials"
},
{
"path": "credsweeper/filters/value_base64_part_check.py",
"chars": 4999,
"preview": "import contextlib\nimport re\nimport statistics\nfrom itertools import takewhile\nfrom typing import Optional\n\nfrom credswee"
},
{
"path": "credsweeper/filters/value_basic_auth_check.py",
"chars": 1395,
"preview": "import contextlib\nfrom typing import Optional\n\nfrom credsweeper.common.constants import DEFAULT_PATTERN_LEN, UTF_8\nfrom "
},
{
"path": "credsweeper/filters/value_blocklist_check.py",
"chars": 1224,
"preview": "from typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_data import "
},
{
"path": "credsweeper/filters/value_camel_case_check.py",
"chars": 1296,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.common import static_keyword_checklist\nfrom credsweeper.config.c"
},
{
"path": "credsweeper/filters/value_dictionary_keyword_check.py",
"chars": 1326,
"preview": "from typing import Optional\n\nfrom credsweeper.common import static_keyword_checklist\nfrom credsweeper.config.config impo"
},
{
"path": "credsweeper/filters/value_discord_bot_check.py",
"chars": 1547,
"preview": "import contextlib\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials"
},
{
"path": "credsweeper/filters/value_entropy_base32_check.py",
"chars": 642,
"preview": "import math\nfrom functools import cache\n\nfrom credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck\n"
},
{
"path": "credsweeper/filters/value_entropy_base36_check.py",
"chars": 651,
"preview": "import math\nfrom functools import cache\n\nfrom credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck\n"
},
{
"path": "credsweeper/filters/value_entropy_base64_check.py",
"chars": 812,
"preview": "import math\nfrom functools import cache\n\nfrom credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck\n"
},
{
"path": "credsweeper/filters/value_entropy_base_check.py",
"chars": 1308,
"preview": "from abc import abstractmethod\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweepe"
},
{
"path": "credsweeper/filters/value_file_path_check.py",
"chars": 3787,
"preview": "from typing import Optional\n\nfrom credsweeper.common import static_keyword_checklist\nfrom credsweeper.common.constants i"
},
{
"path": "credsweeper/filters/value_github_check.py",
"chars": 1630,
"preview": "import binascii\nimport contextlib\nfrom typing import Optional\n\nimport base62\n\nfrom credsweeper.common.constants import A"
},
{
"path": "credsweeper/filters/value_grafana_check.py",
"chars": 1573,
"preview": "import contextlib\nimport json\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper"
},
{
"path": "credsweeper/filters/value_grafana_service_check.py",
"chars": 1195,
"preview": "import binascii\nimport contextlib\nimport struct\nfrom typing import Optional\n\nfrom credsweeper.common.constants import AS"
},
{
"path": "credsweeper/filters/value_hex_number_check.py",
"chars": 1038,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_jfrog_token_check.py",
"chars": 1723,
"preview": "import contextlib\nimport re\nfrom typing import Optional\n\nimport base58\n\nfrom credsweeper.common.constants import ASCII\nf"
},
{
"path": "credsweeper/filters/value_json_web_key_check.py",
"chars": 1392,
"preview": "import contextlib\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials"
},
{
"path": "credsweeper/filters/value_json_web_token_check.py",
"chars": 2878,
"preview": "import contextlib\nimport json\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper"
},
{
"path": "credsweeper/filters/value_last_word_check.py",
"chars": 963,
"preview": "from typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_data import "
},
{
"path": "credsweeper/filters/value_length_check.py",
"chars": 1156,
"preview": "from typing import Optional\n\nfrom credsweeper.common.constants import MIN_VALUE_LENGTH, MAX_LINE_LENGTH\nfrom credsweeper"
},
{
"path": "credsweeper/filters/value_method_check.py",
"chars": 1175,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_morphemes_check.py",
"chars": 1958,
"preview": "from typing import Optional\n\nfrom credsweeper.common import static_keyword_checklist\nfrom credsweeper.common.constants i"
},
{
"path": "credsweeper/filters/value_not_allowed_pattern_check.py",
"chars": 1245,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_not_part_encoded_check.py",
"chars": 3849,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.common import static_keyword_checklist\nfrom credsweeper.config.c"
},
{
"path": "credsweeper/filters/value_number_check.py",
"chars": 1192,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_pattern_check.py",
"chars": 7148,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.common.constants import DEFAULT_PATTERN_LEN, MAX_LINE_LENGTH, MI"
},
{
"path": "credsweeper/filters/value_sealed_secret_check.py",
"chars": 2241,
"preview": "from typing import Optional\n\nfrom credsweeper.common.constants import MAX_LINE_LENGTH\nfrom credsweeper.config.config imp"
},
{
"path": "credsweeper/filters/value_search_check.py",
"chars": 1182,
"preview": "from typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_data import "
},
{
"path": "credsweeper/filters/value_similarity_check.py",
"chars": 1575,
"preview": "from difflib import SequenceMatcher\nfrom typing import Optional\n\nfrom credsweeper.common.constants import MIN_VALUE_LENG"
},
{
"path": "credsweeper/filters/value_split_keyword_check.py",
"chars": 1175,
"preview": "from typing import Optional\nfrom typing import Union\n\nfrom credsweeper.common import static_keyword_checklist\nfrom creds"
},
{
"path": "credsweeper/filters/value_string_type_check.py",
"chars": 2198,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/filters/value_token_base32_check.py",
"chars": 1883,
"preview": "from typing import Tuple\n\nfrom credsweeper.filters.value_token_base_check import ValueTokenBaseCheck\n\n\nclass ValueTokenB"
},
{
"path": "credsweeper/filters/value_token_base36_check.py",
"chars": 1893,
"preview": "from typing import Tuple\n\nfrom credsweeper.filters.value_token_base_check import ValueTokenBaseCheck\n\n\nclass ValueTokenB"
},
{
"path": "credsweeper/filters/value_token_base64_check.py",
"chars": 1899,
"preview": "from typing import Tuple\n\nfrom credsweeper.filters.value_token_base_check import ValueTokenBaseCheck\n\n\nclass ValueTokenB"
},
{
"path": "credsweeper/filters/value_token_base_check.py",
"chars": 2083,
"preview": "import contextlib\nfrom abc import abstractmethod\nfrom typing import Optional\nfrom typing import Tuple\n\nfrom credsweeper."
},
{
"path": "credsweeper/filters/value_token_check.py",
"chars": 1473,
"preview": "import re\nfrom typing import Optional\n\nfrom credsweeper.config.config import Config\nfrom credsweeper.credentials.line_da"
},
{
"path": "credsweeper/logger/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/logger/logger.py",
"chars": 1833,
"preview": "import logging\nimport logging.config\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom credsweeper.app import A"
},
{
"path": "credsweeper/main.py",
"chars": 22575,
"preview": "import binascii\nimport contextlib\nimport logging\nimport os\nimport sys\nimport time\nfrom argparse import ArgumentParser, A"
},
{
"path": "credsweeper/ml_model/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/ml_model/features/__init__.py",
"chars": 1089,
"preview": "from credsweeper.ml_model.features.entropy_evaluation import EntropyEvaluation\nfrom credsweeper.ml_model.features.file_e"
},
{
"path": "credsweeper/ml_model/features/entropy_evaluation.py",
"chars": 2515,
"preview": "import math\nfrom typing import Dict, List, Set\n\nimport numpy as np\n\nfrom credsweeper.common.constants import Chars, ML_H"
},
{
"path": "credsweeper/ml_model/features/feature.py",
"chars": 667,
"preview": "from abc import ABC, abstractmethod\nfrom typing import List, Any\n\nimport numpy as np\n\nfrom credsweeper.credentials.candi"
},
{
"path": "credsweeper/ml_model/features/file_extension.py",
"chars": 699,
"preview": "from typing import List, Any\n\nimport numpy as np\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweep"
},
{
"path": "credsweeper/ml_model/features/has_html_tag.py",
"chars": 1159,
"preview": "from credsweeper.common.constants import CHUNK_SIZE\nfrom credsweeper.credentials.candidate import Candidate\nfrom credswe"
},
{
"path": "credsweeper/ml_model/features/is_secret_numeric.py",
"chars": 431,
"preview": "import contextlib\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweeper.ml_model.features.feature im"
},
{
"path": "credsweeper/ml_model/features/length_of_attribute.py",
"chars": 1176,
"preview": "import numpy as np\n\nfrom credsweeper.common.constants import ML_HUNK\nfrom credsweeper.credentials.candidate import Candi"
},
{
"path": "credsweeper/ml_model/features/morpheme_dense.py",
"chars": 1102,
"preview": "from credsweeper.common import static_keyword_checklist\nfrom credsweeper.credentials.candidate import Candidate\nfrom cre"
},
{
"path": "credsweeper/ml_model/features/rule_name.py",
"chars": 679,
"preview": "from typing import List, Any\n\nimport numpy as np\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweep"
},
{
"path": "credsweeper/ml_model/features/rule_severity.py",
"chars": 749,
"preview": "from credsweeper.common.constants import Severity\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweep"
},
{
"path": "credsweeper/ml_model/features/search_in_attribute.py",
"chars": 701,
"preview": "import re\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweeper.ml_model.features.feature import Fea"
},
{
"path": "credsweeper/ml_model/features/word_in.py",
"chars": 1254,
"preview": "from abc import abstractmethod\nfrom typing import List, Any, Set, Union\n\nimport numpy as np\n\nfrom credsweeper.credential"
},
{
"path": "credsweeper/ml_model/features/word_in_path.py",
"chars": 1080,
"preview": "import os.path\nfrom pathlib import Path\nfrom typing import List, Any\n\nimport numpy as np\n\nfrom credsweeper.credentials.c"
},
{
"path": "credsweeper/ml_model/features/word_in_postamble.py",
"chars": 865,
"preview": "import numpy as np\n\nfrom credsweeper.common.constants import ML_HUNK\nfrom credsweeper.credentials.candidate import Candi"
},
{
"path": "credsweeper/ml_model/features/word_in_preamble.py",
"chars": 1213,
"preview": "import numpy as np\n\nfrom credsweeper.common.constants import ML_HUNK\nfrom credsweeper.credentials.candidate import Candi"
},
{
"path": "credsweeper/ml_model/features/word_in_transition.py",
"chars": 817,
"preview": "import numpy as np\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweeper.ml_model.features.word_in i"
},
{
"path": "credsweeper/ml_model/features/word_in_value.py",
"chars": 522,
"preview": "import numpy as np\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweeper.ml_model.features.word_in i"
},
{
"path": "credsweeper/ml_model/features/word_in_variable.py",
"chars": 497,
"preview": "import numpy as np\n\nfrom credsweeper.credentials.candidate import Candidate\nfrom credsweeper.ml_model.features.word_in i"
},
{
"path": "credsweeper/ml_model/ml_config.json",
"chars": 21186,
"preview": "{\n \"char_set\": \"\\u001b\\t\\n\\r !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuv"
},
{
"path": "credsweeper/ml_model/ml_validator.py",
"chars": 12877,
"preview": "import hashlib\nimport json\nimport logging\nfrom pathlib import Path\nfrom typing import List, Tuple, Union, Optional, Dict"
},
{
"path": "credsweeper/py.typed",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/rules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/rules/config.yaml",
"chars": 46822,
"preview": "- name: DOC_GET\n severity: medium\n confidence: moderate\n type: pattern\n values:\n - (?P<variable>(\\w*(?i:비밀번호|비번|패"
},
{
"path": "credsweeper/rules/rule.py",
"chars": 10262,
"preview": "import contextlib\nimport logging\nimport re\nfrom functools import cached_property\nfrom typing import Dict, List, Optional"
},
{
"path": "credsweeper/scanner/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/scanner/scan_type/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/scanner/scan_type/multi_pattern.py",
"chars": 5660,
"preview": "import copy\nimport re\nfrom typing import List\n\nfrom credsweeper.common.constants import RuleType, MAX_LINE_LENGTH\nfrom c"
},
{
"path": "credsweeper/scanner/scan_type/pem_key_pattern.py",
"chars": 1592,
"preview": "import logging\nfrom typing import List\n\nfrom credsweeper.common.constants import RuleType\nfrom credsweeper.config.config"
},
{
"path": "credsweeper/scanner/scan_type/scan_type.py",
"chars": 9425,
"preview": "import logging\nimport re\nfrom abc import ABC, abstractmethod\nfrom typing import List\n\nfrom credsweeper.common.constants "
},
{
"path": "credsweeper/scanner/scan_type/single_pattern.py",
"chars": 1297,
"preview": "from typing import List\n\nfrom credsweeper.common.constants import RuleType\nfrom credsweeper.config.config import Config\n"
},
{
"path": "credsweeper/scanner/scanner.py",
"chars": 10587,
"preview": "import logging\nimport re\nfrom pathlib import Path\nfrom typing import List, Type, Tuple, Union, Dict, Generator, Set\n\nfro"
},
{
"path": "credsweeper/secret/config.json",
"chars": 3846,
"preview": "{\n \"exclude\": {\n \"pattern\": [],\n \"containers\": [\n \".aar\",\n \".apk\",\n \"."
},
{
"path": "credsweeper/secret/log.yaml",
"chars": 952,
"preview": "---\n\nversion: 1\n\ndisable_existing_loggers: False\n\nignore: [git, pdfminer]\n\nformatters:\n simple:\n format: \"%(as"
},
{
"path": "credsweeper/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "credsweeper/utils/hop_stat.py",
"chars": 3010,
"preview": "import statistics\nfrom typing import Tuple, Dict\n\n\nclass HopStat:\n \"\"\"Statistical check distances between symbols seq"
},
{
"path": "credsweeper/utils/pem_key_detector.py",
"chars": 12658,
"preview": "import contextlib\nimport logging\nimport re\nimport string\nfrom typing import List\n\nfrom credsweeper.common.constants impo"
},
{
"path": "credsweeper/utils/util.py",
"chars": 20509,
"preview": "import ast\nimport base64\nimport contextlib\nimport json\nimport logging\nimport math\nimport os\nimport random\nimport re\nimpo"
},
{
"path": "docs/Makefile",
"chars": 644,
"preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
},
{
"path": "docs/README.md",
"chars": 478,
"preview": "# Documentation of CredSweeper\n\nThe directory is used for documentation of CredSweeper with using [sphinx](https://www.s"
},
{
"path": "docs/howto/how-to-contribute.md",
"chars": 6694,
"preview": "# Contributing\n\nThank you for your interest in contributing to the CredSweeper tool!\n\nThe document covers the process fo"
},
{
"path": "docs/make.bat",
"chars": 804,
"preview": "@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sp"
},
{
"path": "docs/requirements.txt",
"chars": 66,
"preview": "myst_parser[linkify]==4.0.1\nsphinx==8.1.3\nsphinx_rtd_theme==3.0.2\n"
},
{
"path": "docs/source/api.rst",
"chars": 504,
"preview": "API\n===\n\nThis part of the documentation covers all the interfaces of CredSweeper.\n\n.. toctree::\n :maxdepth: 2\n\n cred"
},
{
"path": "docs/source/apps_config.rst",
"chars": 182,
"preview": "CredSweeper Credential Analyzer Configuration\n=============================================\n\n.. literalinclude:: ../../c"
},
{
"path": "docs/source/conf.py",
"chars": 4189,
"preview": "# Configuration file for the Sphinx documentation builder.\n#\n# For the full list of built-in configuration values, see t"
},
{
"path": "docs/source/credsweeper.common.rst",
"chars": 765,
"preview": "credsweeper.common package\n==========================\n\nSubmodules\n----------\n\ncredsweeper.common.constants module\n------"
},
{
"path": "docs/source/credsweeper.config.rst",
"chars": 364,
"preview": "credsweeper.config package\n==========================\n\nSubmodules\n----------\n\ncredsweeper.config.config module\n---------"
},
{
"path": "docs/source/credsweeper.credentials.rst",
"chars": 1469,
"preview": "credsweeper.credentials package\n===============================\n\nSubmodules\n----------\n\ncredsweeper.credentials.augment\\"
},
{
"path": "docs/source/credsweeper.deep_scanner.rst",
"chars": 6766,
"preview": "credsweeper.deep\\_scanner package\n=================================\n\nSubmodules\n----------\n\ncredsweeper.deep\\_scanner.ab"
},
{
"path": "docs/source/credsweeper.file_handler.rst",
"chars": 3127,
"preview": "credsweeper.file\\_handler package\n=================================\n\nSubmodules\n----------\n\ncredsweeper.file\\_handler.ab"
},
{
"path": "docs/source/credsweeper.filters.group.rst",
"chars": 1940,
"preview": "credsweeper.filters.group package\n=================================\n\nSubmodules\n----------\n\ncredsweeper.filters.group.ge"
},
{
"path": "docs/source/credsweeper.filters.rst",
"chars": 10969,
"preview": "credsweeper.filters package\n===========================\n\nSubpackages\n-----------\n\n.. toctree::\n :maxdepth: 4\n\n creds"
},
{
"path": "docs/source/credsweeper.logger.rst",
"chars": 364,
"preview": "credsweeper.logger package\n==========================\n\nSubmodules\n----------\n\ncredsweeper.logger.logger module\n---------"
},
{
"path": "docs/source/credsweeper.ml_model.features.rst",
"chars": 4087,
"preview": "credsweeper.ml\\_model.features package\n======================================\n\nSubmodules\n----------\n\ncredsweeper.ml\\_mo"
},
{
"path": "docs/source/credsweeper.ml_model.rst",
"chars": 489,
"preview": "credsweeper.ml\\_model package\n=============================\n\nSubpackages\n-----------\n\n.. toctree::\n :maxdepth: 4\n\n c"
},
{
"path": "docs/source/credsweeper.rst",
"chars": 183,
"preview": "Credsweeper package\n===================\n\nCredSweeper\n-----------\n\n.. toctree::\n :maxdepth: 4\n\n.. automodule:: credswee"
},
{
"path": "docs/source/credsweeper.rules.rst",
"chars": 352,
"preview": "credsweeper.rules package\n=========================\n\nSubmodules\n----------\n\ncredsweeper.rules.rule module\n--------------"
},
{
"path": "docs/source/credsweeper.scanner.rst",
"chars": 462,
"preview": "credsweeper.scanner package\n===========================\n\nSubpackages\n-----------\n\n.. toctree::\n :maxdepth: 4\n\n creds"
},
{
"path": "docs/source/credsweeper.scanner.scan_type.rst",
"chars": 1122,
"preview": "credsweeper.scanner.scan\\_type package\n======================================\n\nSubmodules\n----------\n\ncredsweeper.scanne"
},
{
"path": "docs/source/credsweeper.utils.rst",
"chars": 716,
"preview": "credsweeper.utils package\n=========================\n\nSubmodules\n----------\n\ncredsweeper.utils.hop\\_stat module\n---------"
},
{
"path": "docs/source/develop.rst",
"chars": 377,
"preview": "Develop\n=======\n\nTests\n-----\n\nTo run all tests:\n\n.. code-block:: bash\n\n python -m pytest -s tests/\n\nBenchmark\n-------"
},
{
"path": "docs/source/guide.rst",
"chars": 12665,
"preview": "How To Use\n==========\n\nRun\n---\n\nGet all argument list:\n\n.. code-block:: bash\n\n python -m credsweeper --help\n\n\n.. code"
},
{
"path": "docs/source/how_to_contribute.rst",
"chars": 112,
"preview": "How To Contribute\n=================\n\n.. include:: ../howto/how-to-contribute.md\n :parser: myst_parser.sphinx_\n"
},
{
"path": "docs/source/index.rst",
"chars": 1259,
"preview": ".. |CredSweeper_logo| image:: ../images/Logo.png\n :width: 100\n :alt: Alternative text\n\n|CredSweeper_logo|\n\nWelcome t"
},
{
"path": "docs/source/install.rst",
"chars": 3531,
"preview": "Installation\n============\n\nCurrently `CredSweeper` requires the following prerequisites:\n\n* Python version 3.10, 3.11, 3"
},
{
"path": "docs/source/overall_architecture.rst",
"chars": 10605,
"preview": "Overall Architecture\n====================\n\nCredSweeper is largely composed of 3 parts as follows. (Pre-processing_, Scan"
},
{
"path": "docs/source/rules_config.rst",
"chars": 129,
"preview": "Rules Configuration\n===================\n\n.. literalinclude:: ../../credsweeper/rules/config.yaml\n :language: yaml\n :"
},
{
"path": "experiment/README.md",
"chars": 1133,
"preview": "# Train credential detection model\n\nThis code will allow you to retrain model on the CredData dataset\n\n## Preparation\n\n-"
},
{
"path": "experiment/__init__.py",
"chars": 0,
"preview": ""
}
]
// ... and 187 more files (download for full content)
About this extraction
This page contains the full source code of the Samsung/CredSweeper GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 387 files (16.1 MB), approximately 1.4M tokens, and a symbol index with 1456 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.