Repository: t0sche/cvss-bt Branch: main Commit: 38d2a16d6563 Files: 11 Total size: 73.3 MB Directory structure: gitextract_sryycckm/ ├── .github/ │ └── workflows/ │ ├── cvss-bt.yml │ └── epss.yml ├── .gitignore ├── LICENSE ├── README.md ├── code/ │ ├── enrich_nvd.py │ ├── last_run.txt │ ├── process_nvd.py │ └── requirements.txt ├── cvss-bt.csv └── test.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/cvss-bt.yml ================================================ name: Run Enrichment Process on: workflow_run: workflows: ["Check for new EPSS data"] types: - completed workflow_dispatch: permissions: contents: write jobs: publish: runs-on: ubuntu-latest if: ${{ github.event.workflow_run.conclusion == 'success' }} steps: - name: Checkout repository uses: actions/checkout@v2 with: ref: ${{ github.event.workflow_run.head_branch }} - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.11' - name: Install dependencies run: | pip install -r code/requirements.txt - name: Grab Needed Data env: VULNCHECK_API_KEY: ${{ secrets.VULNCHECK_API_KEY }} run: | rm -f *.zip RESPONSE=$(curl --request GET \ --url https://api.vulncheck.com/v3/backup/nist-nvd \ --header 'Accept: application/json' \ --header "Authorization: Bearer $VULNCHECK_API_KEY") url=$(echo "$RESPONSE" | jq -r '.data[0].url') curl -L -o nvd.zip $url unzip -o "*.zip" rm -f *.zip - name: Run Enrichment Process timeout-minutes: 60 env: VULNCHECK_API_KEY: ${{ secrets.VULNCHECK_API_KEY }} run: | python -u code/process_nvd.py - name: setup git config run: | git config user.name "GitHub Actions Bot" git config user.email "<>" - name: Generate tag id: generate_tag run: | echo "::set-output name=tag::v$(date +%Y.%m.%d)" - name: Commit and push if there are changes run: | git add cvss-bt.csv code/last_run.txt git commit -m "Updated CVSS-BT data $(date +%Y-%m-%d)" -a || exit 0 git push origin HEAD:main tag="${{ steps.generate_tag.outputs.tag }}" git tag -a "$tag" -m "Updated CVSS-BT data $(date +%Y-%m-%d)" git push origin "$tag" - name: Create Release id: create_release uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ steps.generate_tag.outputs.tag }} release_name: "Release ${{ steps.generate_tag.outputs.tag }}" draft: false prerelease: false - name: Upload Release Asset id: upload-release-asset uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} asset_path: ./cvss-bt.csv asset_name: cvss-bt.csv asset_content_type: text/plain ================================================ FILE: .github/workflows/epss.yml ================================================ name: Check for new EPSS data on: schedule: - cron: "0 11-15 * * *" # Every day from 11am to 4pm UTC, 7am to 11am EST push: branches: - dev workflow_dispatch: permissions: contents: write jobs: check-epss: runs-on: ubuntu-latest steps: - name: Checkout code dir uses: actions/checkout@v2 - name: Read and Compare Date in Last Run File run: | LAST_RUN_DATE=$(cut -d'T' -f1 code/last_run.txt) CURRENT_DATE=$(date -u +"%Y-%m-%d") echo "Last Run Date: $LAST_RUN_DATE" echo "Current Date: $CURRENT_DATE" if [[ "$LAST_RUN_DATE" == "$CURRENT_DATE" ]]; then echo "The enrichment has already run today. Exiting." exit 1 else echo "The enrichment has not run today. Proceeding with further steps." fi - name: Check for new EPSS scores run: | today=$(TZ=America/New_York date +%Y-%m-%d) url="https://epss.empiricalsecurity.com/epss_scores-${today}.csv.gz" echo $url response=$(curl -s -o /dev/null -w "%{http_code}" "$url") echo $response if [ "$response" -eq 200 ]; then echo "EPSS scores available for today" else echo "EPSS scores not available yet for today" exit 1 fi ================================================ FILE: .gitignore ================================================ .venv .DS_Store .local *.json *.zip code/__pycache__/ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2023 Stephen Shaffer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # cvss-bt This project enriches the NVD CVSS scores to include Temporal/Threat Metrics, and publishes a CSV file daily with the CVSS-BT scores and information sources. ## Overview The Common Vulnerability Scoring System (CVSS) is an industry standard for assessing the severity of computer system security vulnerabilities. CVSS attempts to establish a measure of how severe a vulnerability is based on its attributes. The National Vulnerability Database includes CVSS Base scores in its catalog, but base scores are not enough to effectively prioritizie or contextualize vulnerabilities. In this repository I continuously enrich the CVSS score by using the Exploit Code Maturity/Exploitability (E) Temporal Metric. ### Temporal Metric - Exploit Code Maturity/Exploitability (E) Sources: - https://www.first.org/cvss/v4-0/cvss-v40-specification.pdf - https://www.first.org/cvss/v3.1/specification-document - https://www.first.org/cvss/v3.0/specification-document - https://www.first.org/cvss/v2/guide | Value | Description | CVE Present In | |---------------------------|-------------|-------------| | Attacked (A) (v4.0) | Based on available threat intelligence either of the following must apply: Attacks targeting this vulnerability (attempted or successful) have been reported. Solutions to simplify attempts to exploit the vulnerability are publicly or privately available (such as exploit toolkits) | [CISA KEV](https://www.cisa.gov/known-exploited-vulnerabilities-catalog), [VulnCheck KEV](https://vulncheck.com/kev), [EPSS](https://www.first.org/epss/) > Threshold, [Metasploit](https://www.metasploit.com/) | | High (H) (v3.1/3.0/2.0)| Functional autonomous code exists, or no exploit is required (manual trigger) and details are widely available. Exploit code works in every situation, or is actively being delivered via an autonomous agent (such as a worm or virus). Network-connected systems are likely to encounter scanning or exploitation attempts. Exploit development has reached the level of reliable, widely available, easy-to-use automated tools. | [CISA KEV](https://www.cisa.gov/known-exploited-vulnerabilities-catalog), [VulnCheck KEV](https://vulncheck.com/kev), [EPSS](https://www.first.org/epss/) > Threshold, [Metasploit](https://www.metasploit.com/) | | Functional (F) (v3.1/3.0/2.0) | Functional exploit code is available. The code works in most situations where the vulnerability exists. | [Nuclei](https://github.com/projectdiscovery/nuclei) | | Proof-of-Concept (P) (v4.0/3.1/3.0/2.0) | Proof-of-concept exploit code is available. The code might not work in all situations. | [ExploitDB](https://www.exploit-db.com/), [PoC-in-GitHub](https://github.com/nomi-sec/PoC-in-GitHub) | | Unproven (U) (v4.0/3.1/3.0/2.0) | No exploit code is available, or an exploit is theoretical. | CVE not present in any threat intelligence source above. | | Not Defined (X) (v4.0/3.1/3.0/2.0) | Assigning this value to the metric will not influence the score. It means the user does not have enough information to assign a score. | We drop this value since we have information to assign a score. | ## Features This repository continuously enriches and publishes CVSS Temporal Scores based on the following threat intelligence: - CISA KEV - VulnCheck KEV - EPSS - Metasploit - Nuclei - ExploitDB - PoC-in-GitHub ### Steps - Fetches EPSS scores every morning - Fetches CVSS scores from NVD if there are new EPSS scores. - Calculates the Exploit Code Maturity/Exploitability (E) Metric when new data is found. - Provides a resulting CVSS-BT score for each CVE ## Caveats - In the event that the NVD calculated score is using a lesser version than a secondary source, I use the higher CVSS version. - The EPSS threshold for returning an `E:H` or `E:A` value is .36, or 36%. This is based on the F1 score of the model and the 37% threshold where most CVEs have weaponized exploit code. - I do not recommend using this percentage as a general threshold to prioritize on. ## CVSS Visual Mapping This data visualization provides a breakdown of how the CVSS-B, CVSS-BT and CVSS enriched temporal metrics map to the defined OSINT sources as of November 25th, 2023 ![CVSS-BT Mapping](CVSS-BT-Enrichment.png) ## Acknowledgements This product uses VulnCheck KEV. This product uses EPSS scores but is not endoresed or certified by the EPSS SIG. # Support this project If you'd like to financially support this project, feel free to donate below. [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/stephenshaffer) ================================================ FILE: code/enrich_nvd.py ================================================ import requests import os import re import pandas as pd import cvss EPSS_CSV = 'data/epss/epss_scores.csv' METASPLOIT_JSON = 'https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json' NUCLEI_JSON = 'https://raw.githubusercontent.com/projectdiscovery/nuclei-templates/main/cves.json' EXPLOITDB_CSV = 'https://gitlab.com/exploit-database/exploitdb/-/raw/main/files_exploits.csv' KEV_JSON = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" POC_GITHUB = "https://raw.githubusercontent.com/nomi-sec/PoC-in-GitHub/master/README.md" VULNCHECK_KEV = 'https://api.vulncheck.com/v3/index/vulncheck-kev' VULNCHECK_API_KEY = os.environ.get('VULNCHECK_API_KEY') EPSS_THRESHOLD = 0.36 """ 36% is the threshold correlated to the F1 score of EPSSv3 model At ~37%, the CVE is very likely to have weaponized exploit code """ def enrich(df, epss_df): """ Enrich CVE data with EPSS, KEV, ExploitDB, Metasploit, and Nuclei data """ #Load KEV Data response = requests.get(KEV_JSON) kev_json_data = response.json() kev_cve_list = [] for vuln in kev_json_data.get('vulnerabilities'): kev_cve_list.append(vuln.get('cveID')) kev_df = pd.DataFrame(kev_cve_list, columns=['cve']) kev_df['cisa_kev'] = True #Load VulnCheck KEV vulncheck_kev = get_vulncheck_data() vulncheck_kev_df = pd.DataFrame(vulncheck_kev, columns=['cve']) vulncheck_kev_df['cve'] = vulncheck_kev_df['cve'].apply(lambda x: ', '.join(map(str, x))) vulncheck_kev_df['vulncheck_kev'] = True #Load ExploitDB exploitdb_df = pd.read_csv(EXPLOITDB_CSV, usecols=['codes']).rename(columns={"codes": "cve"}) exploitdb_df.drop_duplicates(inplace=True) exploitdb_df = exploitdb_df['cve'].str.extract(r"(CVE-\d{4}-\d{4,7})", expand=False).dropna().values exploitdb_df = pd.DataFrame(exploitdb_df, columns = ['cve']) exploitdb_df['exploitdb'] = True #Load Metasploit response = requests.get(METASPLOIT_JSON) ms_json_data = response.json() ms_cve_list = [] for item in ms_json_data: if 'references' in ms_json_data[item]: cve_references = [ref for ref in ms_json_data[item]['references'] if ref.startswith('CVE-')] ms_cve_list.extend(cve_references) metasploit_df = pd.DataFrame(ms_cve_list, columns=['cve']) metasploit_df['metasploit'] = True #Load Nuclei nuclei_df = pd.read_json(NUCLEI_JSON, lines=True) nuclei_df.rename(columns={"ID": "cve"}, inplace=True) nuclei_df = nuclei_df.drop(columns=['Info', 'file_path']) nuclei_df['nuclei'] = True #Load Poc-in-GitHub poc_githib_df = pd.DataFrame(extract_cves_from_github(POC_GITHUB), columns=['cve']) poc_githib_df['poc_github'] = True print('Mapping EPSS Data') df = pd.merge(df, epss_df, on='cve', how='left') print('Mapping KEV Data') df = pd.merge(df, kev_df, on='cve', how='left') print('Mapping VulnCheck KEV Data') df = pd.merge(df, vulncheck_kev_df, on='cve', how='left') print('Mapping ExploitDB Data') df = pd.merge(df, exploitdb_df, on='cve', how='left') print('Mapping Metasploit Data') df = pd.merge(df, metasploit_df, on='cve', how='left') print('Mapping Nuclei Data') df = pd.merge(df, nuclei_df, on='cve', how='left') print('Mapping Poc-in-GitHub Data') df = pd.merge(df, poc_githib_df, on='cve', how='left') df = df.drop_duplicates(subset='cve') # Fill NaN values appropriately for each column type bool_columns = ['cisa_kev', 'vulncheck_kev', 'exploitdb', 'metasploit', 'nuclei', 'poc_github'] df[bool_columns] = df[bool_columns].fillna(False) df['epss'] = df['epss'].fillna(0.0) return df def extract_cves_from_github(url): response = requests.get(url) if response.status_code == 200: content = response.text else: content = "" print("Failed to fetch README file") cve_pattern = r"CVE-\d{4}-\d{4,7}" cve_matches = re.findall(cve_pattern, content) unique_cves = set(cve_matches) return list(unique_cves) def get_vulncheck_data(): data = [] headers = { "accept": "application/json", "authorization": f"Bearer {VULNCHECK_API_KEY}" } response = requests.get(VULNCHECK_KEV, headers=headers) response = response.json() current_page = response.get('_meta').get('page') total_pages = response.get('_meta').get('total_pages') data.extend(response.get('data')) while current_page < total_pages: current_page += 1 response = requests.get(f"{VULNCHECK_KEV}?page={current_page}", headers=headers) response = response.json() data.extend(response.get('data')) return data def update_temporal_score(df, epss_threshold): """ Update temporal score and severity based on exploit maturity """ df['exploit_maturity'] = 'E:U' # Default value condition_ea = (df['cisa_kev']) | (df['epss'] >= epss_threshold) | (df['vulncheck_kev']) | (df['metasploit']) condition_ep4 = (~condition_ea) & ((df['nuclei']) | (df['exploitdb'] | df['poc_github'])) # First condition for 'E:H' condition_eh = (df['cisa_kev']) | (df['epss'] >= epss_threshold) | (df['vulncheck_kev']) # Next condition for 'E:F' condition_ef = (~condition_eh) & ((df['nuclei']) | (df['metasploit'])) # Last condition for 'E:P' condition_ep = (~condition_eh) & (~condition_ef) & (df['exploitdb'] | df['poc_github']) df.loc[condition_eh & (df['cvss_version'].astype(str) != '4.0'), 'exploit_maturity'] = 'E:H' df.loc[condition_ea & (df['cvss_version'].astype(str) == '4.0'), 'exploit_maturity'] = 'E:A' #Updated to Attacked for 4.0 df.loc[condition_ef & (df['cvss_version'].astype(str) != '4.0'), 'exploit_maturity'] = 'E:F' df.loc[condition_ep & (df['cvss_version'].astype(str) == '2.0'), 'exploit_maturity'] = 'E:POC' df.loc[condition_ep & (df['cvss_version'].astype(str) != '2.0') & (df['cvss_version'].astype(str) != '4.0'), 'exploit_maturity'] = 'E:P' df.loc[condition_ep4 & (df['cvss_version'].astype(str) == '4.0'), 'exploit_maturity'] = 'E:P' # Update vector with exploit maturity #Remove "E:X" from base vector if it exists df['cvss-bt_vector'] = df.apply(lambda row: f"{row['base_vector']}/{row['exploit_maturity']}" if 'E:X' not in row['base_vector'] and row['base_vector'] != 'N/A' \ else row['base_vector'].replace('/E:X', f"/{row['exploit_maturity']}") if row['base_vector'] != 'N/A' \ else row['base_vector'], axis=1) # Apply CVSS computation def compute_cvss(row): try: if 'N/A' in str(row['cvss_version']): return 'UNKNOWN', 'UNKNOWN' elif '4' in str(row['cvss_version']): c = cvss.CVSS4(row['cvss-bt_vector']) return c.base_score, str(c.severity).upper() elif '3' in str(row['cvss_version']): c = cvss.CVSS3(row['cvss-bt_vector']) return c.temporal_score, str(c.severities()[1]).upper() elif '2' in str(row['cvss_version']): c = cvss.CVSS2(row['cvss-bt_vector']) return c.temporal_score, str(c.severities()[1]).upper() else: raise ValueError(f"Unknown CVSS version: {row['cvss_version']}") except Exception as e: print(f"Error occurred while computing CVSS for {row['cve']}: {e}") return 'UNKNOWN', 'UNKNOWN' # Extracting CVSS scores and severities print('Computing CVSS-BT scores and severities') df[['cvss-bt_score', 'cvss-bt_severity']] = df.apply(compute_cvss, axis=1, result_type='expand') #Apply function to each row return df ================================================ FILE: code/last_run.txt ================================================ 2026-05-12T13:44:04Z ================================================ FILE: code/process_nvd.py ================================================ from datetime import datetime, date from pathlib import Path import pandas as pd import enrich_nvd import ijson EPSS_CSV = f'https://epss.empiricalsecurity.com/epss_scores-{date.today()}.csv.gz' TIMESTAMP_FILE = './code/last_run.txt' def process_nvd_files(): """ Processes the NVD JSON files incrementally using a streaming parser (ijson). """ nvd_dict = [] for file_path in Path('.').glob('*.json'): print(f'Processing {file_path.name}') with file_path.open('r', encoding='utf-8') as file: # Stream over each item in the CVE_Items array for entry in ijson.items(file, 'CVE_Items.item'): if not entry['cve']['description']['description_data'][0]['value'].startswith('**'): cve = entry['cve']['CVE_data_meta']['ID'] if 'metricV40' in entry['impact']: cvss_version = '4.0' base_score = entry['impact']['metricV40']['baseScore'] base_severity = entry['impact']['metricV40']['baseSeverity'] base_vector = entry['impact']['metricV40']['vectorString'] elif 'baseMetricV3' in entry['impact']: cvss_version = entry['impact']['baseMetricV3']['cvssV3']['version'] base_score = entry['impact']['baseMetricV3']['cvssV3']['baseScore'] base_severity = entry['impact']['baseMetricV3']['cvssV3']['baseSeverity'] base_vector = entry['impact']['baseMetricV3']['cvssV3']['vectorString'] else: cvss_version = entry['impact'].get('baseMetricV2', {}).get('cvssV2', {}).get('version', 'N/A') base_score = entry['impact'].get('baseMetricV2', {}).get('cvssV2', {}).get('baseScore', 'N/A') base_severity = entry['impact'].get('baseMetricV2', {}).get('severity', 'N/A') base_vector = entry['impact'].get('baseMetricV2', {}).get('cvssV2', {}).get('vectorString', 'N/A') assigner = entry['cve']['CVE_data_meta']['ASSIGNER'] published_date = entry['publishedDate'] description = entry['cve']['description']['description_data'][0]['value'] dict_entry = { 'cve': cve, 'cvss_version': cvss_version, 'base_score': base_score, 'base_severity': base_severity, 'base_vector': base_vector, 'assigner': assigner, 'published_date': published_date, 'description': description } nvd_dict.append(dict_entry) nvd_df = pd.DataFrame(nvd_dict) print('CVEs with CVSS scores from NVD:', nvd_df['cve'].nunique()) return nvd_df def enrich_df(nvd_df): """ Enriches the dataframe with exploit maturity and temporal scores. """ print('Enriching data') enriched_df = enrich_nvd.enrich(nvd_df, pd.read_csv(EPSS_CSV, comment='#', compression='gzip')) cvss_bt_df = enrich_nvd.update_temporal_score(enriched_df, enrich_nvd.EPSS_THRESHOLD) columns = [ 'cve', 'cvss-bt_score', 'cvss-bt_severity', 'cvss-bt_vector', 'cvss_version', 'base_score', 'base_severity', 'base_vector', 'assigner', 'published_date', 'epss', 'cisa_kev', 'vulncheck_kev', 'exploitdb', 'metasploit', 'nuclei', 'poc_github' ] cvss_bt_df = cvss_bt_df[columns] cvss_bt_df = cvss_bt_df.sort_values(by=['published_date']) cvss_bt_df = cvss_bt_df.reset_index(drop=True) cvss_bt_df.to_csv('cvss-bt.csv', index=False, mode='w') def save_last_run_timestamp(filename='last_run.txt'): """ Save the current timestamp as the last run timestamp in a file. Args: filename (str): The name of the file to save the timestamp. Default is 'last_run.txt'. """ with open(filename, 'w', encoding='utf-8') as f: f.write(datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')) enrich_df(process_nvd_files()) save_last_run_timestamp(TIMESTAMP_FILE) ================================================ FILE: code/requirements.txt ================================================ cvss pandas requests ijson ================================================ FILE: cvss-bt.csv ================================================ [File too large to display: 73.3 MB] ================================================ FILE: test.sh ================================================ today=$(TZ=America/New_York date +%Y-%m-%d) year=$(date +%Y) url="https://epss.empiricalsecurity.com/epss_scores-${today}.csv.gz" echo $url response=$(curl -s -o /dev/null -w "%{http_code}" "$url") echo $response if [ "$response" -eq 200 ]; then echo "EPSS scores available for today" else echo "EPSS scores not available yet for today" exit 1 fi pip3 install -r code/requirements.txt rm -f *.zip RESPONSE=$(curl --request GET \ --url https://api.vulncheck.com/v3/backup/nist-nvd \ --header 'Accept: application/json' \ --header "Authorization: Bearer $VULNCHECK_API_KEY") url=$(echo "$RESPONSE" | jq -r '.data[0].url') curl -L -o nvd.zip $url unzip -o "*.zip" rm -f *.zip python3 -u code/process_nvd.py