Copy disabled (too large)
Download .txt
Showing preview only (67,840K chars total). Download the full file to get everything.
Repository: FZJ-IEK3-VSA/tsam
Branch: develop
Commit: ca2eb74b5901
Files: 267
Total size: 64.6 MB
Directory structure:
gitextract_t_0zf53m/
├── .github/
│ ├── pull_request_template.md
│ ├── renovate.json
│ └── workflows/
│ ├── ci-develop.yaml
│ ├── ci-master.yaml
│ ├── coverage.yaml
│ ├── pr-title.yaml
│ ├── publish.yaml
│ ├── push_to_jugit.yml
│ └── release.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .release-please-config.json
├── .release-please-manifest.json
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── README.md
├── benchmarks/
│ ├── bench.py
│ └── benchmark_tuning.py
├── codecov.yml
├── docs/
│ ├── api/
│ │ ├── SUMMARY.md
│ │ └── tsam/
│ │ ├── api.md
│ │ ├── config.md
│ │ ├── exceptions.md
│ │ ├── hyperparametertuning.md
│ │ ├── periodAggregation.md
│ │ ├── plot.md
│ │ ├── representations.md
│ │ ├── result.md
│ │ ├── timeseriesaggregation.md
│ │ ├── tuning.md
│ │ └── utils/
│ │ ├── durationRepresentation.md
│ │ ├── k_maxoids.md
│ │ ├── k_medoids_contiguity.md
│ │ ├── k_medoids_exact.md
│ │ └── segmentation.md
│ ├── background/
│ │ └── math.md
│ ├── further-reading.md
│ ├── gen_ref_pages.py
│ ├── getting-started.md
│ ├── glossary.md
│ ├── index.md
│ ├── installation.md
│ ├── javascripts/
│ │ └── mathjax.js
│ ├── legal-notice.md
│ ├── migration-guide.md
│ ├── notebooks/
│ │ ├── building_energy_system.ipynb
│ │ ├── clustering_methods.ipynb
│ │ ├── clustering_transfer.ipynb
│ │ ├── disaggregation.ipynb
│ │ ├── k_maxoids.ipynb
│ │ ├── optimization_input.ipynb
│ │ ├── pareto_optimization.ipynb
│ │ ├── quickstart.ipynb
│ │ ├── representations.ipynb
│ │ ├── segmentation.ipynb
│ │ ├── testdata.csv
│ │ ├── tuning.ipynb
│ │ └── visualization.ipynb
│ ├── overrides/
│ │ ├── .gitkeep
│ │ └── partials/
│ │ └── footer.html
│ └── stylesheets/
│ └── extra.css
├── environment.yml
├── mkdocs.yml
├── pyproject.toml
├── src/
│ └── tsam/
│ ├── __init__.py
│ ├── api.py
│ ├── config.py
│ ├── exceptions.py
│ ├── hyperparametertuning.py
│ ├── periodAggregation.py
│ ├── plot.py
│ ├── py.typed
│ ├── representations.py
│ ├── result.py
│ ├── timeseriesaggregation.py
│ ├── tuning.py
│ └── utils/
│ ├── __init__.py
│ ├── durationRepresentation.py
│ ├── k_maxoids.py
│ ├── k_medoids_contiguity.py
│ ├── k_medoids_exact.py
│ └── segmentation.py
└── test/
├── _configs.py
├── _old_new_equivalence.py
├── conftest.py
├── data/
│ ├── clustering_e2e/
│ │ ├── expected_contiguous_medoid_8clusters.csv
│ │ ├── expected_hierarchical_distribution_8clusters.csv
│ │ ├── expected_hierarchical_mean_8clusters.csv
│ │ ├── expected_hierarchical_medoid_8clusters.csv
│ │ ├── expected_hierarchical_medoid_8clusters_12segments.csv
│ │ ├── expected_hierarchical_medoid_8clusters_12segments_extremes_append.csv
│ │ ├── expected_hierarchical_medoid_8clusters_12segments_extremes_replace.csv
│ │ ├── expected_hierarchical_medoid_8clusters_6segments.csv
│ │ ├── expected_hierarchical_medoid_8clusters_6segments_extremes_newcluster.csv
│ │ ├── expected_hierarchical_medoid_8clusters_extremes_append.csv
│ │ ├── expected_hierarchical_medoid_8clusters_extremes_newcluster.csv
│ │ ├── expected_hierarchical_medoid_8clusters_extremes_replace.csv
│ │ ├── expected_kmaxoids_maxoid_8clusters.csv
│ │ ├── expected_kmeans_mean_8clusters.csv
│ │ ├── expected_kmedoids_medoid_8clusters.csv
│ │ ├── meta_contiguous_medoid_8clusters.json
│ │ ├── meta_hierarchical_distribution_8clusters.json
│ │ ├── meta_hierarchical_mean_8clusters.json
│ │ ├── meta_hierarchical_medoid_8clusters.json
│ │ ├── meta_hierarchical_medoid_8clusters_12segments.json
│ │ ├── meta_hierarchical_medoid_8clusters_12segments_extremes_append.json
│ │ ├── meta_hierarchical_medoid_8clusters_12segments_extremes_replace.json
│ │ ├── meta_hierarchical_medoid_8clusters_6segments.json
│ │ ├── meta_hierarchical_medoid_8clusters_6segments_extremes_newcluster.json
│ │ ├── meta_hierarchical_medoid_8clusters_extremes_append.json
│ │ ├── meta_hierarchical_medoid_8clusters_extremes_newcluster.json
│ │ ├── meta_hierarchical_medoid_8clusters_extremes_replace.json
│ │ ├── meta_kmaxoids_maxoid_8clusters.json
│ │ ├── meta_kmeans_mean_8clusters.json
│ │ └── meta_kmedoids_medoid_8clusters.json
│ ├── golden/
│ │ ├── averaging/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── averaging_segmentation/
│ │ │ └── testdata.csv
│ │ ├── contiguous/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── contiguous_extremes_append/
│ │ │ └── testdata.csv
│ │ ├── contiguous_segmentation/
│ │ │ └── testdata.csv
│ │ ├── distribution_global/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── distribution_minmax_global/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_append/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_constant/
│ │ │ └── constant.csv
│ │ ├── extremes_max_period/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_min_period/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_min_value/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_multi/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_new_cluster/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_replace/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_replace_segmentation/
│ │ │ └── testdata.csv
│ │ ├── extremes_wide_multi/
│ │ │ └── wide.csv
│ │ ├── extremes_with_segmentation/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_zero_column/
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_default/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_distribution/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_distribution_minmax/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_duration_curves/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_eval_sum_periods/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_maxoid/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_mean/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_no_rescale/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_rescale_exclude/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_round/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_segmentation/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_weighted/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_duration_curves/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_extremes/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_no_rescale/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_rescale_exclude/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_samemean/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation_distribution/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation_extremes/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation_samemean/
│ │ │ └── testdata.csv
│ │ ├── kmaxoids/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── kmaxoids_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmaxoids_weighted/
│ │ │ └── testdata.csv
│ │ ├── kmeans/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── kmeans_distribution/
│ │ │ └── testdata.csv
│ │ ├── kmeans_duration_curves/
│ │ │ └── testdata.csv
│ │ ├── kmeans_extremes_append/
│ │ │ └── testdata.csv
│ │ ├── kmeans_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmeans_weighted/
│ │ │ └── testdata.csv
│ │ ├── kmeans_weighted_distribution/
│ │ │ └── testdata.csv
│ │ ├── kmeans_weighted_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmedoids/
│ │ │ └── testdata.csv
│ │ ├── kmedoids_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmedoids_weighted/
│ │ │ └── testdata.csv
│ │ ├── minmaxmean/
│ │ │ └── testdata.csv
│ │ ├── segmentation_distribution_global/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ └── segmentation_samemean/
│ │ └── testdata.csv
│ ├── preprocessed_wind.csv
│ ├── testperiods_hierarchical.csv
│ ├── testperiods_kmedoids.csv
│ ├── testperiods_predefClusterOrder.csv
│ ├── testperiods_predefClusterOrderAndClusterCenters.csv
│ ├── testperiods_segmentation.csv
│ └── wide.csv
├── generate_golden.py
├── same_cluster_as_input_data.py
├── test_accuracyIndicators.py
├── test_adjacent_periods.py
├── test_aggregate_hiearchical.py
├── test_api_equivalence.py
├── test_assert_raises.py
├── test_averaging.py
├── test_cluster_order.py
├── test_clustering_e2e.py
├── test_disaggregate.py
├── test_durationCurve.py
├── test_durationRepresentation.py
├── test_extremePeriods.py
├── test_golden_regression.py
├── test_hierarchical.py
├── test_hypertuneAggregation.py
├── test_k_maxoids.py
├── test_k_medoids.py
├── test_k_medoids_contiguity.py
├── test_minmaxRepresentation.py
├── test_new_api.py
├── test_plot.py
├── test_preprocess.py
├── test_properties.py
├── test_reconstruct_samemean_segmentation.py
├── test_samemean.py
├── test_segmentation.py
├── test_segmentation_weight_bug.py
├── test_subhourlyResolution.py
├── test_subhourly_periods.py
└── test_weightingFactors.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/pull_request_template.md
================================================
## Description
<!-- Describe your changes in detail -->
## Motivation and Context
<!-- Why is this change required? What problem does it solve? -->
<!-- If it fixes an open issue, please link to the issue here. -->
## Type of Change
<!-- Please check the relevant options -->
- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)
- [ ] Documentation update
## Checklist
<!-- Please check the relevant options -->
- [ ] My code follows the project's code style (run `ruff check` and `ruff format`)
- [ ] I have added tests that prove my fix/feature works
- [ ] All new and existing tests pass (`pytest test/`)
- [ ] I have updated the documentation if needed
================================================
FILE: .github/renovate.json
================================================
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:recommended"
],
"labels": [
"dependencies"
],
"schedule": [
"before 5am on monday"
],
"timezone": "Europe/Berlin",
"commitMessagePrefix": "chore(deps):",
"rangeStrategy": "widen",
"enabledManagers": [
"pep621",
"custom.regex",
"github-actions"
],
"pep621": {
"managerFilePatterns": [
"/(^|/)pyproject\\.toml$/"
]
},
"customManagers": [
{
"customType": "regex",
"managerFilePatterns": [
"/(^|/)environment\\.yml$/"
],
"matchStrings": [
"- (?<depName>[a-zA-Z0-9_.-]+)[^\\n]*?(?<currentValue><=?\\d[^\\n,]*)"
],
"packageNameTemplate": "conda-forge/{{{depName}}}",
"datasourceTemplate": "conda",
"versioningTemplate": "pep440"
}
],
"packageRules": [
{
"description": "One PR per dependency, grouping updates across pyproject.toml and environment.yml",
"matchManagers": [
"pep621",
"custom.regex"
],
"groupName": "{{depName}}"
},
{
"description": "Group GitHub Actions updates",
"matchManagers": [
"github-actions"
],
"groupName": "github-actions",
"commitMessageTopic": "GitHub Actions"
},
{
"description": "Replace (don't widen) Python version in setup-python",
"matchDepNames": ["python"],
"matchManagers": ["github-actions"],
"rangeStrategy": "replace"
}
],
"vulnerabilityAlerts": {
"enabled": true,
"labels": [
"security"
]
},
"prConcurrentLimit": 20,
"prHourlyLimit": 10
}
================================================
FILE: .github/workflows/ci-develop.yaml
================================================
name: CI (develop)
on:
workflow_dispatch:
pull_request:
branches: [develop]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
name: Lint
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v6
- uses: astral-sh/ruff-action@v4.0.0
- run: ruff check --output-format=github --config=pyproject.toml
- run: ruff format --diff --config=pyproject.toml
test:
name: Test (Python ${{ matrix.python-version }}, ${{ matrix.os }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [self-hosted]
python-version: ["3.10", "3.14"]
include:
- os: macos-latest
python-version: "3.12"
- os: windows-latest
python-version: "3.12"
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- uses: astral-sh/setup-uv@v8.1.0
with:
enable-cache: true
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install
run: uv pip install --system ".[develop]"
- name: Run tests
run: pytest -n auto -vv
# Renovate: test max versions of changed dependencies
detect-renovate-changes:
name: Detect changed dependencies
if: startsWith(github.head_ref, 'renovate/')
uses: FZJ-IEK3-VSA/.github/.github/workflows/detect_renovate_changes.yml@v1
with:
files: "environment.yml pyproject.toml"
extract-renovate-versions:
name: Extract max versions of changed libraries
needs: detect-renovate-changes
if: startsWith(github.head_ref, 'renovate/') && needs.detect-renovate-changes.outputs.libraries != ''
uses: FZJ-IEK3-VSA/.github/.github/workflows/extract_versions.yml@v1
with:
libraries: ${{ needs.detect-renovate-changes.outputs.libraries }}
version_type: max
env_file: environment.yml
test-renovate:
name: Test ${{ matrix.dependencies.library_name }} ${{ matrix.dependencies.version }} (max)
needs: extract-renovate-versions
if: startsWith(github.head_ref, 'renovate/')
strategy:
fail-fast: false
matrix:
runner_tag: ["self-hosted"]
dependencies: ${{ fromJSON(needs.extract-renovate-versions.outputs.matrix) }}
uses: FZJ-IEK3-VSA/.github/.github/workflows/run_conda_forge_test.yml@v1
with:
runner_tag: ${{ matrix.runner_tag }}
requirements_file_name: environment.yml
library_name: ${{ matrix.dependencies.library_name }}
library_version: ${{ matrix.dependencies.version }}
dependency_position_env_file: ${{ matrix.dependencies.yaml_position }}
multiprocessing_pytest_string: "-n auto"
docs_build_command: "mkdocs build --strict"
docs:
name: Build docs
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- uses: astral-sh/setup-uv@v8.1.0
with:
enable-cache: true
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install
run: uv pip install --system ".[develop]"
- name: Build MkDocs
run: mkdocs build --strict
ci-success:
name: CI Success
if: always()
needs: [lint, test, docs]
runs-on: ubuntu-24.04
steps:
- if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')
run: exit 1
================================================
FILE: .github/workflows/ci-master.yaml
================================================
name: CI (master)
on:
workflow_dispatch:
pull_request:
branches: [master]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
name: Lint
if: ${{ !startsWith(github.head_ref, 'release-please--') }}
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v6
- uses: astral-sh/ruff-action@v4.0.0
- run: ruff check --output-format=github --config=pyproject.toml
- run: ruff format --diff --config=pyproject.toml
# Extended PyPI cross-platform testing
# (6 OS variants × all Python versions)
test:
name: Test (Python ${{ matrix.python-version }}, ${{ matrix.os }})
if: ${{ !startsWith(github.head_ref, 'release-please--') }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, ubuntu-22.04, macos-latest, macos-15-intel, windows-latest, windows-2022]
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- uses: astral-sh/setup-uv@v8.1.0
with:
enable-cache: true
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install
run: uv pip install --system ".[develop]"
- name: Run tests
run: pytest -n auto -vv
# Conda-forge testing
extract-python-versions:
if: ${{ github.actor != 'renovate[bot]' && !startsWith(github.head_ref, 'release-please--') }}
uses: FZJ-IEK3-VSA/.github/.github/workflows/extract_versions.yml@v1
with:
libraries: python
env_file: environment.yml
test-conda-forge:
name: Test conda-forge (Python ${{ matrix.python_version }}, ${{ matrix.runner_tag }})
needs: extract-python-versions
if: ${{ github.actor != 'renovate[bot]' && !startsWith(github.head_ref, 'release-please--') }}
strategy:
fail-fast: false
matrix:
runner_tag: ["self-hosted"]
python_version: ${{ fromJSON(needs.extract-python-versions.outputs.versions) }}
uses: FZJ-IEK3-VSA/.github/.github/workflows/run_conda_forge_test.yml@v1
with:
runner_tag: ${{ matrix.runner_tag }}
requirements_file_name: environment.yml
library_name: "python"
library_version: ${{ matrix.python_version }}
dependency_position_env_file: ${{ needs.extract-python-versions.outputs.dependency_position }}
multiprocessing_example_string: "-n auto"
# Min/max dependency boundary testing
extract-versions:
if: ${{ !startsWith(github.head_ref, 'release-please--') }}
uses: FZJ-IEK3-VSA/.github/.github/workflows/extract_versions.yml@v1
with:
libraries: "scikit-learn,pandas,numpy,pyomo,networkx,tqdm,highspy"
version_type: both
env_file: environment.yml
test-dependency-bounds:
name: Test ${{ matrix.dependencies.library_name }} ${{ matrix.dependencies.version }} (${{ matrix.dependencies.version_type }})
if: ${{ !startsWith(github.head_ref, 'release-please--') }}
needs: extract-versions
strategy:
fail-fast: false
matrix:
runner_tag: ["self-hosted"]
dependencies: ${{ fromJSON(needs.extract-versions.outputs.matrix) }}
uses: FZJ-IEK3-VSA/.github/.github/workflows/run_conda_forge_test.yml@v1
with:
runner_tag: ${{ matrix.runner_tag }}
requirements_file_name: environment.yml
library_version: ${{ matrix.dependencies.version }}
dependency_position_env_file: ${{ matrix.dependencies.yaml_position }}
library_name: ${{ matrix.dependencies.library_name }}
multiprocessing_example_string: "-n auto"
multiprocessing_pytest_string: "-n auto"
ipynb_example_folder: docs/notebooks/
docs:
name: Build docs
if: ${{ !startsWith(github.head_ref, 'release-please--') }}
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- uses: astral-sh/setup-uv@v8.1.0
with:
enable-cache: true
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install
run: uv pip install --system ".[develop]"
- name: Build MkDocs
run: mkdocs build --strict
ci-success:
name: CI Success
if: ${{ always() && !startsWith(github.head_ref, 'release-please--') }}
needs: [lint, test, docs]
runs-on: ubuntu-24.04
steps:
- if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')
run: exit 1
================================================
FILE: .github/workflows/coverage.yaml
================================================
name: Coverage
on:
workflow_dispatch:
push:
branches: [develop]
jobs:
coverage:
name: Upload coverage to Codecov
runs-on: self-hosted
steps:
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v8.1.0
with:
enable-cache: true
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install
run: uv pip install --system ".[develop]"
- name: Run tests with coverage
run: pytest -n auto --cov=tsam --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v6
with:
files: ./coverage.xml
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}
================================================
FILE: .github/workflows/pr-title.yaml
================================================
name: PR Title
on:
push:
branches: ['release-please--**']
pull_request:
types: [opened, edited, synchronize, reopened]
jobs:
validate:
name: Validate conventional commit format
runs-on: ubuntu-24.04
steps:
- if: ${{ github.event_name != 'push' }}
uses: amannn/action-semantic-pull-request@v6
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
types: |
feat
fix
refactor
test
docs
chore
ci
perf
build
requireScope: false
# Prevent e.g. "fix(ci):" — use "ci:" as the type instead.
# These types are hidden from the changelog, but only when used
# as the type, not as a scope on another type like "fix".
disallowScopes: |
ci
build
chore
test
refactor
subjectPattern: ^.+$
subjectPatternError: "PR title must have a description after the type"
- if: ${{ github.event_name == 'push' }}
run: echo "Release Please PRs always use valid conventional commit format"
================================================
FILE: .github/workflows/publish.yaml
================================================
name: Publish
on:
push:
tags: ["v*"]
permissions:
contents: write
id-token: write
jobs:
publish:
name: Build & publish to PyPI
runs-on: ubuntu-24.04
timeout-minutes: 10
environment:
name: pypi
url: https://pypi.org/project/tsam
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.ref_name }}
fetch-depth: 0
- uses: astral-sh/setup-uv@v8.1.0
with:
enable-cache: true
- uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Build
run: uv build
- uses: pypa/gh-action-pypi-publish@release/v1
github-release:
name: Create GitHub Release
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v6
with:
ref: ${{ github.ref_name }}
- name: Create GitHub Release
run: gh release create "$TAG" --generate-notes || echo "Release $TAG already exists, skipping."
env:
GH_TOKEN: ${{ github.token }}
TAG: ${{ github.ref_name }}
================================================
FILE: .github/workflows/push_to_jugit.yml
================================================
name: Mirror to JuGit
on:
workflow_dispatch:
push:
branches:
- develop
- master
jobs:
mirror:
name: Push ${{ github.ref_name }} to JuGit
uses: FZJ-IEK3-VSA/.github/.github/workflows/sync_jugit.yml@v1
with:
jugit_repo_path: iek-3/shared-code/tsam
secrets:
JUGIT_ACCESS_TOKEN: ${{ secrets.JUGIT_ACCESS_TOKEN }}
================================================
FILE: .github/workflows/release.yaml
================================================
name: Release
on:
push:
branches: [master]
permissions:
contents: write
pull-requests: write
jobs:
release-please:
name: Release Please
runs-on: ubuntu-24.04
outputs:
release_created: ${{ steps.release.outputs.release_created }}
tag_name: ${{ steps.release.outputs.tag_name }}
steps:
- uses: actions/create-github-app-token@v3
id: app-token
with:
app-id: ${{ secrets.RELEASE_APP_ID }}
private-key: ${{ secrets.RELEASE_APP_PRIVATE_KEY }}
- uses: googleapis/release-please-action@v5
id: release
with:
config-file: .release-please-config.json
manifest-file: .release-please-manifest.json
target-branch: master
token: ${{ steps.app-token.outputs.token }}
sync-develop:
name: Sync develop with master
needs: release-please
if: needs.release-please.outputs.release_created
runs-on: ubuntu-24.04
permissions:
contents: write
pull-requests: write
steps:
- uses: actions/checkout@v6
with:
ref: master
fetch-depth: 0
- name: Open PR to merge master into develop
run: |
gh pr create \
--base develop \
--head master \
--title "chore: sync develop with master after ${{ needs.release-please.outputs.tag_name }}" \
--body "Automated PR to merge release ${{ needs.release-please.outputs.tag_name }} back into develop." \
|| echo "PR already exists or branches are already in sync"
env:
GH_TOKEN: ${{ github.token }}
- name: Enable auto-merge (merge commit) for sync PR
run: |
gh pr merge master \
--repo ${{ github.repository }} \
--merge \
--auto \
|| echo "Auto-merge already enabled or PR already merged"
env:
GH_TOKEN: ${{ github.token }}
================================================
FILE: .gitignore
================================================
# Python
__pycache__/
src/tsam/_version.py
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
.venv/
venv/
ENV/
env/
# IDEs
.idea/
.vscode/
*.swp
*.swo
*~
# Testing
.pytest_cache/
.benchmarks/
.coverage
htmlcov/
.tox/
.nox/
# Type checking
.mypy_cache/
.dmypy.json
dmypy.json
# Linting
.ruff_cache/
# Jupyter
.ipynb_checkpoints/
*/.ipynb_checkpoints/
# Documentation
docs/build/
site/
# OS
.DS_Store
Thumbs.db
# Project specific
*.log
*.lp
*.glp
*.mp4
trash/
# Notebook output artifacts (regenerated when notebooks run)
docs/notebooks/results/
docs/notebooks/clustering.json
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
args: ['--unsafe']
- id: check-added-large-files
args: ['--maxkb=4000']
- id: check-merge-conflict
- id: debug-statements
- repo: https://github.com/kynan/nbstripout
rev: 0.8.1
hooks:
- id: nbstripout
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.0
hooks:
- id: ruff
args: [--fix]
- id: ruff-format
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.13.0
hooks:
- id: mypy
additional_dependencies:
- pandas-stubs
args: [--ignore-missing-imports]
exclude: ^(docs/|test/|benchmarks/)
================================================
FILE: .readthedocs.yml
================================================
# Required
version: 2
# Build environment
build:
os: ubuntu-22.04
tools:
python: "3.12"
apt_packages:
- pandoc
jobs:
post_install:
- pip install -e ".[develop]"
# Build documentation with MkDocs
mkdocs:
configuration: mkdocs.yml
================================================
FILE: .release-please-config.json
================================================
{
"$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
"include-component-in-tag": false,
"changelog-sections": [
{"type": "feat", "section": "Features"},
{"type": "fix", "section": "Bug Fixes"},
{"type": "perf", "section": "Performance"},
{"type": "docs", "section": "Documentation"},
{"type": "ci", "hidden": true},
{"type": "build", "hidden": true},
{"type": "chore", "hidden": true},
{"type": "refactor", "hidden": true},
{"type": "test", "hidden": true}
],
"packages": {
".": {
"release-type": "python",
"package-name": "tsam",
"changelog-path": "CHANGELOG.md"
}
}
}
================================================
FILE: .release-please-manifest.json
================================================
{
".": "3.3.0"
}
================================================
FILE: CHANGELOG.md
================================================
# ETHOS.TSAM Change Log
All notable changes to this project will be documented in this file.
New entries are automatically added by [release-please](https://github.com/googleapis/release-please) from conventional commit messages.
## [3.3.0](https://github.com/FZJ-IEK3-VSA/tsam/compare/v3.2.1...v3.3.0) (2026-03-30)
### Features
* AccuracyMetrics now exposes weighted_rmse, weighted_mae, and weighted_rmse_duration as pre-computed scalars ([#238](https://github.com/FZJ-IEK3-VSA/tsam/issues/238)) ([b70b819](https://github.com/FZJ-IEK3-VSA/tsam/commit/b70b81998c03473cc494834f5589ba7364cf5ff9))
* add disaggregate() method ([#245](https://github.com/FZJ-IEK3-VSA/tsam/issues/245)) ([b24e32e](https://github.com/FZJ-IEK3-VSA/tsam/commit/b24e32e4263b5d97c127b3e881d5d845228c01b9))
### Bug Fixes
* make LegacyAPIWarning visible by default before v4 removal ([#236](https://github.com/FZJ-IEK3-VSA/tsam/issues/236)) ([#237](https://github.com/FZJ-IEK3-VSA/tsam/issues/237)) ([37ff3d8](https://github.com/FZJ-IEK3-VSA/tsam/commit/37ff3d88a1f28b64bcb4615ba9842f81d8d8bb43))
## [3.2.1](https://github.com/FZJ-IEK3-VSA/tsam/compare/v3.2.0...v3.2.1) (2026-03-25)
### Bug Fixes
* use column weights in tuning RMSE objective ([#227](https://github.com/FZJ-IEK3-VSA/tsam/issues/227)) ([1ceee5c](https://github.com/FZJ-IEK3-VSA/tsam/commit/1ceee5c69856b61aed9eae3f5d5f713be8ac85e9)), closes [#226](https://github.com/FZJ-IEK3-VSA/tsam/issues/226)
## [3.2.0](https://github.com/FZJ-IEK3-VSA/tsam/compare/v3.1.2...v3.2.0) (2026-03-24)
This release moves the `weights` argument out of `ClusterConfig`and into `aggregate` (and similar methods), while deprecating the old usage inside `ClusterConfig`. The Parameter affects all aggregation steps and is now placed accordingly. Further, we added a new plotting method that allows you to inspect cluster members and their representation.
### Features
* Interactive cluster member visualization ([#159](https://github.com/FZJ-IEK3-VSA/tsam/issues/159)) ([61c6296](https://github.com/FZJ-IEK3-VSA/tsam/commit/61c6296e2a9c616b36af42ad8d22181652d5d291))
* Move weights to top-level aggregate() parameter ([#195](https://github.com/FZJ-IEK3-VSA/tsam/issues/195)) ([4f177d0](https://github.com/FZJ-IEK3-VSA/tsam/commit/4f177d0792e06373c23ab1eefc2f0794c7990675))
### Documentation
* Add ETHOS.TSAM branding, FZJ theme, and documentation update ([#194](https://github.com/FZJ-IEK3-VSA/tsam/issues/194)) ([d24a0a3](https://github.com/FZJ-IEK3-VSA/tsam/commit/d24a0a39971c8cf0c597956a9a3c4b64bc263e1d))
* extract glossary into standalone file ([d24a0a3](https://github.com/FZJ-IEK3-VSA/tsam/commit/d24a0a39971c8cf0c597956a9a3c4b64bc263e1d))
* improve codeblock in Getting Started: ([d24a0a3](https://github.com/FZJ-IEK3-VSA/tsam/commit/d24a0a39971c8cf0c597956a9a3c4b64bc263e1d))
* remove integrated software section and update legal notice ([#218](https://github.com/FZJ-IEK3-VSA/tsam/issues/218)) ([4c9cc71](https://github.com/FZJ-IEK3-VSA/tsam/commit/4c9cc71621b2fa9fd690211f6186b7bf5d9d2444))
* update images to README_assets v1.0.0 and add missing publication ([#215](https://github.com/FZJ-IEK3-VSA/tsam/issues/215)) ([e56a686](https://github.com/FZJ-IEK3-VSA/tsam/commit/e56a686ab621cdc14e3837a1095c678e7c4ec19f))
## [3.1.1](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v3.1.1)
ETHOS.TSAM v3.1.1 is the first stable v3 release (versions 3.0.0 and 3.1.0 were yanked from PyPI).
It introduces a modern functional API alongside significant improvements to performance,
plotting, hyperparameter tuning, and overall code quality.
See the [migration guide](migration-guide.md) for a complete guide on upgrading from v2.
### Breaking Changes
* **New functional API**: The primary interface is now `tsam.aggregate()` which returns an `AggregationResult` object
* **Configuration objects**: Clustering and segmentation options are now configured via `ClusterConfig`, `SegmentConfig`, and `ExtremeConfig` dataclasses
* **Segment representation default**: In v2, omitting `segmentRepresentationMethod` caused segments
to silently inherit the cluster `representationMethod` (e.g. distribution). In v3,
`SegmentConfig(representation=...)` defaults to `"mean"` independently. If you relied on the
implicit inheritance, pass the representation explicitly:
`SegmentConfig(n_segments=12, representation=Distribution(scope="global"))`
* **Removed methods**: The `reconstruct()` method has been removed; use the `reconstructed` property on `AggregationResult` instead
* **Renamed parameters**: Parameters have been renamed for consistency:
| Old (v2) | New (v3) |
|----------|----------|
| `noTypicalPeriods` | `n_clusters` |
| `hoursPerPeriod` | `period_duration` |
| `resolution` | `temporal_resolution` |
| `clusterMethod` | `cluster=ClusterConfig(method=...)` |
| `representationMethod` | `cluster=ClusterConfig(representation=...)` |
| `segmentation` + `noSegments` | `segments=SegmentConfig(n_segments=...)` |
| `sameMean` | `cluster=ClusterConfig(normalize_column_means=...)` |
| `rescaleClusterPeriods` | `preserve_column_means` |
| `sortValues` | `cluster=ClusterConfig(use_duration_curves=...)` |
| `evalSumPeriods` | `cluster=ClusterConfig(include_period_sums=...)` |
| `weightDict` | `weights` (top-level parameter) |
| `addPeakMax/Min`, etc. | `extremes=ExtremeConfig(max_value=..., ...)` |
### New Features
* **Modern functional API**: New `tsam.aggregate()` function returns an `AggregationResult` with properties:
- `cluster_representatives`: DataFrame with aggregated typical periods
- `cluster_assignments`: Which cluster each original period belongs to
- `cluster_weights`: Occurrence count per cluster
- `accuracy`: `AccuracyMetrics` object with RMSE, MAE, and duration curve RMSE
- `reconstructed`: Reconstructed time series (cached property)
- `residuals`: Difference between original and reconstructed
- `original`: Access to original input data
- `clustering`: `ClusteringResult` for serialization and transfer
* **Clustering transfer and serialization**: New `ClusteringResult` enables:
- Save/load clustering via `to_json()` / `from_json()`
- Apply same clustering to different data via `apply()`
- Transfer clustering from one dataset to another (e.g., cluster on wind, apply to all columns)
* **Integrated plotting** via `result.plot` accessor with Plotly (replaces matplotlib):
- `result.plot.compare()`: Compare original vs reconstructed (overlay, side-by-side, or duration curves)
- `result.plot.residuals()`: Visualize reconstruction errors (time series, histogram, by period, or by timestep)
- `result.plot.cluster_representatives()`: Plot typical periods with cluster weights
- `result.plot.cluster_members()`: All original periods per cluster with representative highlighted, interactive slider
- `result.plot.cluster_weights()`: Cluster weight distribution
- `result.plot.accuracy()`: Accuracy metrics (RMSE, MAE, duration RMSE) per column
- `result.plot.segment_durations()`: Average segment durations (when using segmentation)
* **Hyperparameter tuning module** `tsam.tuning` with:
- `find_optimal_combination()`: Find best n_clusters/n_segments combination
- `find_pareto_front()`: Compute Pareto front of accuracy vs. complexity
- Support for parallel execution
- New parameters: `segment_representation`, `extremes`, `preserve_column_means`, `round_decimals`, `numerical_tolerance`
* **Accuracy metrics**: `AccuracyMetrics` class with `.summary` property for convenient DataFrame output
* **Utility functions**: `tsam.unstack_to_periods()` for reshaping time series for heatmap visualization
* `Distribution` and `MinMaxMean` **representation objects** for `ClusterConfig` and
`SegmentConfig`, providing a structured alternative to plain string representation names
### Improvements
* Segment center preservation for better accuracy when using medoid/maxoid segment representation
* Consistent semantic naming across the entire codebase
* Better handling of extreme periods with `n_clusters` edge cases
* Lazy loading of optional modules (`plot`, `tuning`) to reduce import time
### Bug Fixes
These bugs existed in v2.3.9:
* Fixed rescaling with segmentation (was applying rescaling twice)
* Fixed `predictOriginalData()` denormalization when using `sameMean=True` with segmentation
* Fixed segment label ordering bug: `AgglomerativeClustering` produces arbitrary cluster labels,
which caused `durationRepresentation()` with `distributionPeriodWise=False` to allocate
the global distribution differently when transferring a clustering. Segment clusters are now
relabelled to temporal order after `fit_predict()`.
* Fixed non-deterministic sorting in `durationRepresentation()` across both code paths
by using `kind="stable"` and `np.round(mean, 10)` before `argsort`, ensuring
identical tie-breaking across platforms.
### Result consistency
The stable sort fix guarantees cross-platform reproducibility but changes tie-breaking
compared to v2.3.9. Four distribution-related configurations (`hierarchical_distribution`,
`hierarchical_distribution_minmax`, `distribution_global`, `distribution_minmax_global`)
produce slightly different results, but will be consistent across systems from now on. All statistical properties are preserved. The remaining
23 configurations are bit-for-bit identical to v2.3.9. See the
[migration guide](migration-guide.md) for details.
### Known Limitations
* **Clustering transfer with 'replace' extreme method**: The 'replace' extreme method
creates a hybrid cluster representation where some columns use the medoid values
and others use the extreme period values. This hybrid representation cannot be
perfectly reproduced during transfer via `ClusteringResult.apply()`. Warnings
are issued when saving (`to_json()`) or applying such a clustering. For exact
transfer with extreme periods, use 'append' or 'new_cluster' extreme methods instead.
### Performance
Multiple vectorization optimizations replace pandas loops with numpy array operations,
providing **35--77x** end-to-end speedups over v2.3.9 for most configurations.
**Benchmarked across 27 configurations x 4 datasets against v2.3.9:**
* Hierarchical methods on real-world data: **35--60x faster**
* Distribution representation (cluster-wise): **35--55x faster**
* Averaging: up to **77x faster**
* Contiguous clustering: **50--54x faster**
* Distribution representation (global scope): **7--16x faster**
* Iterative methods (kmeans, kmedoids, kmaxoids): **1--6x faster** (core solver dominates)
**Key function-level optimizations:**
* **`predictOriginalData()`**: Vectorized indexing replaces per-period
`.unstack()` loop (~290x function speedup).
* **`durationRepresentation()`**: Vectorized numpy 3D operations replace
nested pandas loops (~8x function speedup).
* **`_rescaleClusterPeriods()`**: numpy 3D arrays replace pandas
MultiIndex operations (~11x function speedup).
* **`_clusterSortedPeriods()`**: numpy 3D reshape + sort replaces
per-column DataFrame sorting loop (~12x function speedup).
### Testing
* Regression test suite: 296 old/new API equivalence tests + 148 golden-file tests
comparing both APIs against baselines generated with tsam v2.3.9.
* Benchmark suite (`benchmarks/bench.py`) for performance comparison across versions
using pytest-benchmark.
### Deprecations
* **TimeSeriesAggregation class**: The legacy class-based API now emits a `LegacyAPIWarning` when instantiated. It will be removed in a future version. Users should migrate to the new `tsam.aggregate()` function.
* **unstackToPeriods function**: Deprecated in favor of `tsam.unstack_to_periods()`.
* **HyperTunedAggregations class**: The legacy hyperparameter tuning class in `tsam.hyperparametertuning` is deprecated. Use `tsam.tuning.find_optimal_combination()` or `tsam.tuning.find_pareto_front()` instead.
* **getNoPeriodsForDataReduction / getNoSegmentsForDataReduction**: Helper functions deprecated along with `HyperTunedAggregations`.
* To suppress warnings during migration:
```python
import warnings
from tsam import LegacyAPIWarning
warnings.filterwarnings("ignore", category=LegacyAPIWarning)
```
### Legacy API
The class-based API remains available for backward compatibility but is deprecated:
```python
import tsam.timeseriesaggregation as tsam_legacy
aggregation = tsam_legacy.TimeSeriesAggregation(
raw,
noTypicalPeriods=8,
hoursPerPeriod=24,
clusterMethod='hierarchical',
)
typical_periods = aggregation.createTypicalPeriods()
```
## [2.3.9](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.9)
* Improved time series aggregation speed with segmentation (issue #96)
* Fixed issue #99
## [2.3.8](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.8)
* Enhanced time series aggregation speed with segmentation (issue #96)
## [2.3.7](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.7)
* Added Python 3.13 support
* Updated GitHub Actions workflow (ubuntu-20.04 to ubuntu-22.04)
* Resolved invalid escape sequence error (issue #90)
## [2.3.6](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.6)
* Migrated from `setup.py` to `pyproject.toml`
* Changed project layout from flat to source structure
* Updated installation documentation
* Fixed deprecation and future warnings (issue #91)
## [2.3.5](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.5)
* Re-release of v2.3.4 to fix GitHub/PyPI synchronization
## [2.3.4](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.4)
* Extended reporting for time series tolerance exceedances
* Added option to silence tolerance warnings (default threshold: 1e-13)
## [2.3.3](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.3)
* Dropped support for Python versions below 3.9
* Fixed deprecation warnings
## [2.3.2](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.2)
* Limited pandas version to below 3.0
* Silenced deprecation warnings
## [2.3.1](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.1)
* Accelerated rescale cluster periods functionality
* Updated documentation with autodeployment features
## [2.3.0](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.3.0)
* Fixed deprecated pandas functions
* Corrected distribution representation sum calculations
* Added segment representation capability
* Extended default example
* Switched CI infrastructure from Travis to GitHub workflows
## [2.2.2](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.2.2)
* Fixed Hypertuning class
* Adjusted the default MILP solver
* Reworked documentation
## [2.1.0](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.1.0)
* Added hyperparameter tuning meta class for identifying optimal time series aggregation parameters
## [2.0.1](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.0.1)
* Changed dependency of scikit-learn to make tsam conda-forge compatible
## [2.0.0](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v2.0.0)
* A new comprehensive structure that allows for free cross-combination of clustering algorithms and cluster representations (e.g., centroids or medoids)
* A novel cluster representation method that precisely replicates the original time series value distribution based on [Hoffmann, Kotzur and Stolten (2021)](https://arxiv.org/abs/2111.12072)
* Maxoids as representation algorithm which represents time series by outliers only based on Sifa and Bauckhage (2017): "Online k-Maxoids clustering"
* K-medoids contiguity: An algorithm based on Oehrlein and Hauner (2017) that accounts for contiguity constraints
## [1.1.2](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v1.1.2)
* Added first version of the k-medoid contiguity algorithm
## [1.1.1](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v1.1.1)
* Significantly increased test coverage
* Separation between clustering and representation (e.g., for Ward's hierarchical clustering, the representation by medoids or centroids can now be freely chosen)
## [1.1.0](https://github.com/FZJ-IEK3-VSA/tsam/releases/tag/v1.1.0)
* Segmentation (clustering of adjacent time steps) according to Pineda et al. (2018)
* k-MILP: Extension of MILP-based k-medoids clustering for automatic identification of extreme periods according to Zatti et al. (2019)
* Option to dynamically choose whether clusters should be represented by their centroid or medoid
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to ETHOS.TSAM
Thank you for your interest in contributing to ETHOS.TSAM! This document provides guidelines and instructions for contributing.
## Development Setup
### Prerequisites
- Python 3.10 or higher
- [uv](https://docs.astral.sh/uv/) (recommended) or pip
### Setting Up Your Development Environment
1. **Clone the repository**
```bash
git clone https://github.com/FZJ-IEK3-VSA/tsam.git
cd tsam
```
2. **Create a virtual environment and install dependencies**
Using uv (recommended):
```bash
uv venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
uv pip install -e ".[develop]"
```
Using pip:
```bash
python -m venv .venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
pip install -e ".[develop]"
```
3. **Set up pre-commit hooks**
```bash
pre-commit install
```
This will automatically run linting and formatting checks before each commit.
## Code Quality
We use modern Python tools to maintain code quality:
### Linting and Formatting
[Ruff](https://docs.astral.sh/ruff/) is used for both linting and formatting:
```bash
# Check for linting issues
ruff check src/ test/
# Auto-fix linting issues
ruff check src/ test/ --fix
# Format code
ruff format src/ test/
```
### Type Checking
[Mypy](https://mypy.readthedocs.io/) is used for static type checking:
```bash
mypy src/tsam/
```
### Running All Checks
You can run all checks at once using pre-commit:
```bash
pre-commit run --all-files
```
## Testing
We use [pytest](https://docs.pytest.org/) for testing:
```bash
# Run all tests
uv run pytest test/
# Run tests with coverage
uv run pytest test/ --cov=tsam
# Run tests in parallel (faster)
uv run pytest test/ -n auto
# Run a specific test file
uv run pytest test/test_averaging.py
# Run tests matching a pattern
uv run pytest test/ -k "test_k_means"
```
### Writing Tests
- Place tests in the `test/` directory
- Name test files with the `test_` prefix
- Name test functions with the `test_` prefix
- Use descriptive names that explain what is being tested
## Making Changes
### Branching Strategy
1. Create a new branch from `develop`:
```bash
git checkout develop
git pull origin develop
git checkout -b feature/your-feature-name
```
2. Make your changes and commit them with clear messages
3. Push your branch and create a pull request to `develop`
### Commit Messages
We use [Conventional Commits](https://www.conventionalcommits.org/). PR titles are validated against this format and used to generate the changelog.
Common prefixes:
| Prefix | When to use | Example |
|---|---|---|
| `feat:` | New feature | `feat: add hourly resolution support` |
| `fix:` | Bug fix | `fix: correct weight normalization` |
| `docs:` | Documentation only | `docs: update installation guide` |
| `build:` | Build system / dependencies | `build: bump pandas to 2.2` |
| `ci:` | CI configuration | `ci: add Python 3.13 to matrix` |
| `refactor:` | Code change that neither fixes a bug nor adds a feature | `refactor: extract clustering logic` |
| `test:` | Adding or updating tests | `test: add segmentation edge cases` |
Use `!` after the prefix (e.g. `feat!:`) for breaking changes.
### Pull Request Guidelines
1. Ensure all tests pass
2. Ensure linting and type checks pass
3. Update documentation if needed
4. Add tests for new functionality
5. Keep pull requests focused on a single change
## Code Style
- Follow [PEP 8](https://peps.python.org/pep-0008/) guidelines (enforced by Ruff)
- Use meaningful variable and function names
- Add docstrings to public functions and classes
- Keep functions focused and reasonably sized
## Documentation
Documentation is built using [MkDocs](https://www.mkdocs.org/) with [Material for MkDocs](https://squidfun.github.io/mkdocs-material/) and hosted on [Read the Docs](https://tsam.readthedocs.io/).
### Building Documentation Locally
```bash
mkdocs serve
```
The documentation will be available at `http://127.0.0.1:8000/`.
## Releasing
Releases are automated via [release-please](https://github.com/googleapis/release-please) and GitHub Actions.
### Regular releases
1. Merge (squash) PRs with conventional commit titles into `develop`
2. Merge (merge) `develop` into `master`
3. release-please automatically opens a PR with version bump + CHANGELOG update
4. Merge the release-please PR → a git tag is created → package is published to PyPI
### Pre-releases
Pre-releases can be published from any branch by pushing a tag:
```bash
git tag v4.1.0-rc.1
git push origin v4.1.0-rc.1
```
This creates a GitHub Release marked as pre-release and publishes to PyPI.
### Hotfix / manual releases
Tag any commit and push it to trigger a release:
```bash
git tag v4.0.1
git push origin v4.0.1
```
Note: manual releases skip the CHANGELOG update (which is managed by release-please).
## Questions?
If you have questions, feel free to:
- Open an issue on GitHub
- Check existing issues and discussions
- Review the [documentation](https://tsam.readthedocs.io/)
## License
By contributing to ETHOS.TSAM, you agree that your contributions will be licensed under the MIT License.
================================================
FILE: LICENSE.txt
================================================
MIT License
Copyright (c) 2017-2026 Forschungszentrum Jülich GmbH (ICE-2)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
[](https://pypi.python.org/pypi/tsam) [](https://anaconda.org/conda-forge/tsam) [](https://tsam.readthedocs.io/en/latest/) []((https://github.com/FZJ-IEK3-VSA/tsam/blob/master/LICENSE.txt)) [](https://codecov.io/gh/FZJ-IEK3-VSA/tsam)
<p align="left">
<a href="https://tsam.readthedocs.io/en/latest/">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="docs/assets/tsam-logo-dark.svg">
<source media="(prefers-color-scheme: light)" srcset="docs/assets/tsam-logo-light.svg">
<img src="docs/assets/tsam-logo-light.svg" alt="ETHOS.TSAM Logo" height="80px">
</picture>
</a>
<a href="https://www.fz-juelich.de/en/ice/ice-2"><img src="https://raw.githubusercontent.com/FZJ-IEK3-VSA/README_assets/v.1.0.0/ICE2_Logos/JSA-Header.svg" alt="Jülich System Analysis Header" height="80px"></a>
</p>
# ETHOS.TSAM - Time Series Aggregation Module
ETHOS.TSAM is a python package which uses different machine learning algorithms for the aggregation of time series. The data aggregation can be performed in two freely combinable dimensions: By representing the time series by a user-defined number of typical periods or by decreasing the temporal resolution.
ETHOS.TSAM was originally designed for reducing the computational load for large-scale energy system optimization models by aggregating their input data, but is applicable for all types of time series, e.g., weather data, load data, both simultaneously or other arbitrary groups of time series.
ETHOS.TSAM is part of the [Energy Transformation PatHway Optimization Suite (ETHOS) at ICE-2](https://www.fz-juelich.de/de/ice/ice-2/leistungen/model-services). It is tightly integrated into [ETHOS.FINE](https://github.com/FZJ-IEK3-VSA/FINE) to reduce the temporal complexity of energy system models.
The documentation of the ETHOS.TSAM code can be found [**here**](https://tsam.readthedocs.io/).
## Features
* flexible handling of multidimensional time-series via the pandas module
* different aggregation methods implemented (averaging, k-means, exact k-medoids, hierarchical, k-maxoids, k-medoids with contiguity), which are based on scikit-learn, or self-programmed with pyomo
* hypertuning of aggregation parameters to find the optimal combination of the number of segments inside a period and the number of typical periods
* novel representation methods, keeping statistical attributes, such as the distribution
* flexible integration of extreme periods as own cluster centers
* weighting for the case of multidimensional time-series to represent their relevance
## Installation
To avoid dependency conflicts, it is recommended that you install ETHOS.TSAM in its own environment. You can use either [uv](https://docs.astral.sh/uv/) or [conda/mamba](https://conda-forge.org/download/) to manage environments and installations. Before proceeding, you must install either UV or Conda/Mamba, or both.
**Quick Install with uv**
```bash
uv venv tsam_env
uv pip install tsam
```
Or from conda-forge:
```bash
conda create -n tsam_env -c conda-forge tsam
```
conda and mamba can be used interchangeably
### Development Installation
```bash
git clone https://github.com/FZJ-IEK3-VSA/tsam.git
cd tsam
```
# Using uv (recommended)
```bash
uv venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
uv pip install -e ".[develop]"
```
# Using conda-forge
```bash
conda env create -n tsam_env --file=environment.yml
conda activate tsam_env
pip install -e . --no-deps
```
# Set up pre-commit hooks
```bash
pre-commit install
```
See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed development guidelines.
### MILP Solver for k-medoids
[HiGHS](https://github.com/ERGO-Code/HiGHS) is installed by default. For better performance on large problems, commercial solvers (Gurobi, CPLEX) are recommended if you have a license
## Examples
### Basic workflow
A small example how ETHOS.TSAM can be used is described as follows:
```python
import pandas as pd
import tsam
```
Read in the time series data set with pandas
```python
raw = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
```
Run the aggregation - specify the number of typical periods and configure clustering/segmentation options:
```python
from tsam import aggregate, ClusterConfig, SegmentConfig
result = tsam.aggregate(
raw,
n_clusters=8,
period_duration='24h', # or 24, '1d'
cluster=ClusterConfig(
method='hierarchical',
representation='distribution_minmax',
),
segments=SegmentConfig(n_segments=8),
)
```
Access the results:
```python
# Get the typical periods DataFrame
cluster_representatives = result.cluster_representatives
# Check accuracy metrics
print(f"RMSE: {result.accuracy.rmse.mean():.4f}")
# Reconstruct the original time series from typical periods
reconstructed = result.reconstructed
# Save results
cluster_representatives.to_csv('cluster_representatives.csv')
```
### Legacy API
For backward compatibility, the class-based API of TSAM Version 2 is still available.
```python
import tsam.timeseriesaggregation as tsam_legacy
aggregation = tsam_legacy.TimeSeriesAggregation(
raw,
noTypicalPeriods=8,
hoursPerPeriod=24,
segmentation=True,
noSegments=8,
representationMethod="distributionAndMinMaxRepresentation",
clusterMethod='hierarchical'
)
cluster_representatives = aggregation.createTypicalPeriods()
```
### Detailed examples
Detailed examples can be found at:/docs/notebooks/
A [**quickstart example**](/docs/notebooks/quickstart.ipynb) shows the capabilities of ETHOS.TSAM as a Jupyter notebook.
A [**second example**](/docs/notebooks/optimization_input.ipynb) shows in more detail how to access the relevant aggregation results required for parameterizing e.g. an optimization.
The example time series are based on a department [publication](https://www.mdpi.com/1996-1073/10/3/361) and the [test reference years of the DWD](https://www.dwd.de/DE/leistungen/testreferenzjahre/testreferenzjahre.html).
## License
[MIT License](LICENSE.txt)
## Citing and further reading
If you want to use ETHOS.TSAM in a published work, **please kindly cite** our latest journal articles:
* Hoffmann et al. (2022):\
[**The Pareto-Optimal Temporal Aggregation of Energy System Models**](https://www.sciencedirect.com/science/article/abs/pii/S0306261922004342)
If you are further interested in the impact of time series aggregation on the cost-optimal results on different energy system use cases, you can find a publication which validates the methods and describes their cababilites via the following [**link**](https://www.sciencedirect.com/science/article/pii/S0960148117309783). A second publication introduces a method how to model state variables (e.g. the state of charge of energy storage components) between the aggregated typical periods which can be found [**here**](https://www.sciencedirect.com/science/article/pii/S0306261918300242). Finally yet importantly the potential of time series aggregation to simplify mixed integer linear problems is investigated [**here**](https://www.mdpi.com/1996-1073/12/14/2825).
The publications about time series aggregation for energy system optimization models published alongside the development of ETHOS.TSAM are listed below:
* Hoffmann et al. (2021):\
[**The Pareto-Optimal Temporal Aggregation of Energy System Models**](https://www.sciencedirect.com/science/article/abs/pii/S0306261922004342)\
(open access manuscript to be found [**here**](https://arxiv.org/abs/1710.07593))
* Hoffmann et al. (2021):\
[**Typical periods or typical time steps? A multi-model analysis to determine the optimal temporal aggregation for energy system models**](https://www.sciencedirect.com/science/article/abs/pii/S0306261921011545)
* Hoffmann et al. (2020):\
[**A Review on Time Series Aggregation Methods for Energy System Models**](https://www.mdpi.com/1996-1073/13/3/641)
* Kannengießer et al. (2019):\
[**Reducing Computational Load for Mixed Integer Linear Programming: An Example for a District and an Island Energy System**](https://www.mdpi.com/1996-1073/12/14/2825)
* Kotzur et al. (2018):\
[**Time series aggregation for energy system design: Modeling seasonal storage**](https://www.sciencedirect.com/science/article/pii/S0306261918300242)\
(open access manuscript to be found [**here**](https://arxiv.org/abs/1710.07593))
* Kotzur et al. (2018):\
[**Impact of different time series aggregation methods on optimal energy system design**](https://www.sciencedirect.com/science/article/abs/pii/S0960148117309783)\
(open access manuscript to be found [**here**](https://arxiv.org/abs/1708.00420))
## About Us
We are the <a href="https://www.fz-juelich.de/en/ice/ice-2">Institute of Climate and Energy Systems – Jülich Systems Analysis (ICE-2)</a> at the <a href="https://www.fz-juelich.de/en"> Forschungszentrum Jülich</a>.
Our work focuses on independent, interdisciplinary research in energy, bioeconomy, infrastructure, and sustainability. We support a just, greenhouse gas–neutral transformation through open models and policy-relevant science.
## Code of Conduct
Please respect our [code of conduct](https://github.com/FZJ-IEK3-VSA/README_assets/blob/main/CODE_CONDUCT.md).
## Acknowledgement
This work is supported by the Helmholtz Association under the Joint Initiative ["Energy System 2050 A Contribution of the Research Field Energy"](https://www.helmholtz.de/en/research/energy/energy_system_2050/) and the program ["Energy System Design"](https://www.esd.kit.edu/index.php) and within the [BMWi/BMWk](https://www.bmwk.de/Navigation/DE/Home/home.html) funded project **METIS**.
<a href="https://www.helmholtz.de/en/">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/FZJ-IEK3-VSA/README_assets/v.1.0.0/Helmholtz_Logos/Helmholtz-Logo-White-RGB.svg">
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/FZJ-IEK3-VSA/README_assets/v.1.0.0/Helmholtz_Logos/Helmholtz-Logo-Dark-Blue-RGB.svg">
<img src="https://raw.githubusercontent.com/FZJ-IEK3-VSA/README_assets/v.1.0.0/Helmholtz_Logos/Helmholtz-Logo-Dark-Blue-RGB.svg" alt="Helmholtz Logo" width="200px" style="float:right">
</picture>
</a>
================================================
FILE: benchmarks/bench.py
================================================
"""Benchmarks for tsam using pytest-benchmark.
Self-contained: only imports ``tsam.timeseriesaggregation`` so it works with
both the current dev version and older releases (e.g. tsam==2.3.9).
Usage::
# Benchmark an old version
uv pip install tsam==2.3.9
pytest benchmarks/bench.py --benchmark-save=v2.3.9
# Switch back to dev and compare
uv pip install -e .
pytest benchmarks/bench.py --benchmark-compare='*v2.3.9'
# Compare two saved snapshots
pytest-benchmark compare '*v2.3.9' '*v3.0.0' --group-by=name
"""
from __future__ import annotations
import sys
from pathlib import Path
import numpy as np
import pytest
# Add test/ to sys.path so we can import _configs
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "test"))
import tsam.timeseriesaggregation as tsam
from _configs import build_old_cases, case_ids, get_data
# ---------------------------------------------------------------------------
# Build parametrized cases from shared configs
# ---------------------------------------------------------------------------
OLD_CASES = build_old_cases()
_IDS = case_ids(OLD_CASES)
# ---------------------------------------------------------------------------
# Runner
# ---------------------------------------------------------------------------
def _run(case) -> None:
data = get_data(case.dataset)
if case.seed is not None:
np.random.seed(case.seed)
agg = tsam.TimeSeriesAggregation(timeSeries=data, **case.old_kwargs)
agg.createTypicalPeriods()
agg.predictOriginalData()
# ---------------------------------------------------------------------------
# Benchmark tests
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("case", OLD_CASES, ids=_IDS)
def test_bench(case, benchmark):
benchmark(lambda: _run(case))
================================================
FILE: benchmarks/benchmark_tuning.py
================================================
#!/usr/bin/env python
"""Benchmark script for tuning parallelization.
Compares sequential vs parallel (file-based) execution.
The file-based approach saves data to a temp file and workers load from disk,
avoiding any DataFrame pickling - safe for sensitive/corporate data.
Usage:
uv run python benchmark_tuning.py
uv run python benchmark_tuning.py --workers 4 # Specific worker count
uv run python benchmark_tuning.py --reduction 0.05 # Different reduction target
"""
from __future__ import annotations
import argparse
import math
import os
import time
from pathlib import Path
import pandas as pd
from tsam.tuning import find_optimal_combination
def main() -> None:
parser = argparse.ArgumentParser(description="Benchmark tuning parallelization")
parser.add_argument(
"--workers",
type=int,
default=-1,
help="Number of workers (-1 for all CPUs)",
)
parser.add_argument(
"--reduction",
type=float,
default=0.02,
help="Data reduction target (default: 0.02 = 2%%)",
)
args = parser.parse_args()
# Load test data
data_path = Path(__file__).parent.parent / "docs/notebooks/testdata.csv"
if not data_path.exists():
print(f"Error: Test data not found at {data_path}")
print("Please ensure the examples_notebooks directory exists.")
return
print("Loading test data...")
raw = pd.read_csv(data_path, index_col=0, parse_dates=True)
print(f" Shape: {raw.shape}")
print(f" Columns: {list(raw.columns)}")
print()
n_workers = args.workers if args.workers > 0 else (os.cpu_count() or 1)
# Benchmark sequential
print("=" * 60)
print("Benchmark: Sequential (n_jobs=1)")
print("=" * 60)
start = time.perf_counter()
result_seq = find_optimal_combination(
raw,
data_reduction=args.reduction,
n_jobs=1,
show_progress=True,
)
time_sequential = time.perf_counter() - start
print(f" Time: {time_sequential:.2f}s")
print(
f" Optimal: {result_seq.n_clusters} periods, {result_seq.n_segments} segments"
)
print(f" RMSE: {result_seq.rmse:.4f}")
print(f" Configs tested: {len(result_seq.history)}")
print()
# Benchmark parallel (file-based)
print("=" * 60)
print(f"Benchmark: Parallel file-based (n_jobs={n_workers})")
print(" Data saved to temp file, workers load from disk")
print(" No DataFrame pickling - safe for sensitive data")
print("=" * 60)
start = time.perf_counter()
result_par = find_optimal_combination(
raw,
data_reduction=args.reduction,
n_jobs=args.workers,
show_progress=True,
)
time_parallel = time.perf_counter() - start
print(f" Time: {time_parallel:.2f}s")
print(f" Speedup vs sequential: {time_sequential / time_parallel:.2f}x")
print()
# Validation
assert math.isclose(result_par.rmse, result_seq.rmse, rel_tol=1e-6), (
"Parallel and sequential results differ (RMSE mismatch)"
)
assert result_par.n_clusters == result_seq.n_clusters, (
"Parallel and sequential results differ (n_clusters mismatch)"
)
assert result_par.n_segments == result_seq.n_segments, (
"Parallel and sequential results differ (n_segments mismatch)"
)
# Summary
print("=" * 60)
print("SUMMARY")
print("=" * 60)
print(
f" Optimal: {result_seq.n_clusters} periods, "
f"{result_seq.n_segments} segments, "
f"RMSE: {result_seq.rmse:.4f}"
)
print()
print(f" Sequential: {time_sequential:6.2f}s (baseline)")
print(
f" Parallel: {time_parallel:6.2f}s "
f"({time_sequential / time_parallel:.2f}x speedup)"
)
print()
print(" Security: No DataFrame pickling - only file paths passed to workers")
if __name__ == "__main__":
main()
================================================
FILE: codecov.yml
================================================
coverage:
status:
project:
default:
target: auto
threshold: 5%
comment:
layout: "reach,diff,flags,files"
behavior: default
require_changes: false
================================================
FILE: docs/api/SUMMARY.md
================================================
* tsam
* [api](tsam/api.md)
* [config](tsam/config.md)
* [exceptions](tsam/exceptions.md)
* [hyperparametertuning](tsam/hyperparametertuning.md)
* [periodAggregation](tsam/periodAggregation.md)
* [plot](tsam/plot.md)
* [representations](tsam/representations.md)
* [result](tsam/result.md)
* [timeseriesaggregation](tsam/timeseriesaggregation.md)
* [tuning](tsam/tuning.md)
* utils
* [durationRepresentation](tsam/utils/durationRepresentation.md)
* [k_maxoids](tsam/utils/k_maxoids.md)
* [k_medoids_contiguity](tsam/utils/k_medoids_contiguity.md)
* [k_medoids_exact](tsam/utils/k_medoids_exact.md)
* [segmentation](tsam/utils/segmentation.md)
================================================
FILE: docs/api/tsam/api.md
================================================
# tsam.api
::: tsam.api
================================================
FILE: docs/api/tsam/config.md
================================================
# tsam.config
::: tsam.config
================================================
FILE: docs/api/tsam/exceptions.md
================================================
# tsam.exceptions
::: tsam.exceptions
================================================
FILE: docs/api/tsam/hyperparametertuning.md
================================================
# tsam.hyperparametertuning
::: tsam.hyperparametertuning
================================================
FILE: docs/api/tsam/periodAggregation.md
================================================
# tsam.periodAggregation
::: tsam.periodAggregation
================================================
FILE: docs/api/tsam/plot.md
================================================
# tsam.plot
::: tsam.plot
================================================
FILE: docs/api/tsam/representations.md
================================================
# tsam.representations
::: tsam.representations
================================================
FILE: docs/api/tsam/result.md
================================================
# tsam.result
::: tsam.result
================================================
FILE: docs/api/tsam/timeseriesaggregation.md
================================================
# tsam.timeseriesaggregation
::: tsam.timeseriesaggregation
================================================
FILE: docs/api/tsam/tuning.md
================================================
# tsam.tuning
::: tsam.tuning
================================================
FILE: docs/api/tsam/utils/durationRepresentation.md
================================================
# tsam.utils.durationRepresentation
::: tsam.utils.durationRepresentation
================================================
FILE: docs/api/tsam/utils/k_maxoids.md
================================================
# tsam.utils.k_maxoids
::: tsam.utils.k_maxoids
================================================
FILE: docs/api/tsam/utils/k_medoids_contiguity.md
================================================
# tsam.utils.k_medoids_contiguity
::: tsam.utils.k_medoids_contiguity
================================================
FILE: docs/api/tsam/utils/k_medoids_exact.md
================================================
# tsam.utils.k_medoids_exact
::: tsam.utils.k_medoids_exact
================================================
FILE: docs/api/tsam/utils/segmentation.md
================================================
# tsam.utils.segmentation
::: tsam.utils.segmentation
================================================
FILE: docs/background/math.md
================================================
# Mathematical Description
The description of ETHOS.TSAM presented in the following is based on the review on time series aggregation methods by
[Hoffmann et al. (2020)](https://www.mdpi.com/1996-1073/13/3/641).
ETHOS.TSAM is aggregating time series by reducing the number of time steps. Generally, time series can also be aggregated by
grouping similar time series as illustrated in the upper right part of the figure below. Instead, ETHOS.TSAM is decreasing
the amount of time series data by merging adjacent time steps based on their similarity (segmentation) or forming time
periods along the time axis and clustering those based on their similarity. This is shown in the middle right part and the
lower part of the figure below. The number of attributes to be clustered is thus not changed and accordingly, ETHOS.TSAM is also
capable of clustering multi-dimensional time series without changing their dimensionality.
<div style="text-align: center">
<a href="https://www.mdpi.com/energies/energies-13-00641/article_deploy/html/images/energies-13-00641-g004.png">
<img src="https://www.mdpi.com/energies/energies-13-00641/article_deploy/html/images/energies-13-00641-g004.png" alt="Review Figure 1">
</a>
</div>
The process of clustering applied in tsam includes for different steps: Preprocessing, clustering, adding extreme periods and
backscaling. This is shown in the figure below.
<div style="text-align: center">
<a href="https://www.mdpi.com/energies/energies-13-00641/article_deploy/html/images/energies-13-00641-g009.png">
<img src="https://www.mdpi.com/energies/energies-13-00641/article_deploy/html/images/energies-13-00641-g009.png" alt="Review Figure 2">
</a>
</div>
The preprocessing mainly consists of an attribute-wise normalization of all time series in order to avoid overweighting of attributes
with larger scales during the clustering process:
$$
x_{a,s}=\frac{x'_{a,s}-\min{x'_a}}{\max{x'_a}-\min{x'_a}}
$$
Then, all time steps within the chosen periods (e.g. hourly time steps within daily periods) are realigned in such a way that each
period becomes an own row-vector or hyperdimensional point whose dimensions are formed by the number of time steps within the periods
for each attribute.
Then, clustering is applied to these hyperdimensional points. Clustering generally strives to group data points in such a way that
points within a cluster are more similar to each other than data points from different clusters. An example for this is the k-means
clustering algorithm with the objective function to minimize the sum of all distances of all data points to their cluster centers as
given by:
$$
\min{\sum_{k=1}^{N_k}\sum_{p\in\mathbb{C}_k}}\text{dist}(x_p,c_k)^2
$$
With:
$$
\text{dist}(x_p,c_k)=\sqrt{\sum_{a=1}^{N_a}\sum_{t=1}^{N_t}(x_{p,a,t}-c_k)^2}
$$
And:
$$
c_k=\frac{1}{\left | \mathbb{C}_k \right |}\sum_{p\in\mathbb{C}_k}x_{p,a,t}
$$
After that and since some of the clustering methods in tsam are not preserving the average value of each time series, an optional
attribute-wise rescaling step according to the following equation can be performed:
$$
c^*_{k,a,t}=c_{k,a,t}\frac{\sum_{p=1}^{N_p}\sum_{t=1}^{N_t}x_{p,a,t}}{\sum_{k=1}^{N_k}\left ( \left | \mathbb{C}_k \right |\sum_{t=1}^{N_t}c_{p,a,t} \right )} \qquad \forall \qquad k,a,t
$$
In an additional intermediate step the temporal resolution of the periods can also be decreased using segmentation.
In the end all time series are scaled back to their original scale:
$$
c'^*_{k,a,t}=c^*_{k,a,t}\left ( \max{x'_a}-\min{x'_a} \right ) + \min{x'_a} \qquad \forall \qquad a
$$
The output of tsam are thus clustered periods with different numbers of occurences consisting of time segments with different lenghts.
================================================
FILE: docs/further-reading.md
================================================
# Further Reading
ETHOS.TSAM was originally designed for reducing the computational load for large-scale energy system optimization models. If you are interested
in that purpose of time series aggregation, you can find a detailed review about that topic [here](https://www.mdpi.com/1996-1073/13/3/641).
If you are further interested in the impact of time series aggregation on the cost-optimal results on different energy system use cases,
you can find a publication which validates the methods and describes their capabilities via the following [link](https://www.sciencedirect.com/science/article/abs/pii/S0960148117309783).
A second publication introduces a method how to model model state variables (e.g. the state of charge of energy storage components) between the
aggregated typical periods which can be found [here](https://www.sciencedirect.com/science/article/pii/S0306261918300242).
Finally yet importantly the potential of time series aggregation to simplify mixed integer linear problems is investigated [here](https://www.mdpi.com/1996-1073/12/14/2825).
The publications about time series aggregation for energy system optimization models published alongside the development of ETHOS.TSAM are listed below:
* Kotzur et al. (2018):
[Impact of different time series aggregation methods on optimal energy system design](https://www.sciencedirect.com/science/article/abs/pii/S0960148117309783)
(open access: [arXiv:1708.00420](https://arxiv.org/abs/1708.00420))
* Kotzur et al. (2018):
[Time series aggregation for energy system design: Modeling seasonal storage](https://www.sciencedirect.com/science/article/pii/S0306261918300242)
(open access: [arXiv:1710.07593](https://arxiv.org/abs/1710.07593))
* Kannengiesser et al. (2019):
[Reducing Computational Load for Mixed Integer Linear Programming: An Example for a District and an Island Energy System](https://www.mdpi.com/1996-1073/12/14/2825)
* Hoffmann et al. (2020):
[A Review on Time Series Aggregation Methods for Energy System Models](https://www.mdpi.com/1996-1073/13/3/641)
* Hoffmann et al. (2021):
[Typical periods or typical time steps? A multi-model analysis to determine the optimal temporal aggregation for energy system models](https://www.sciencedirect.com/science/article/abs/pii/S0306261921011545)
* Hoffmann et al. (2022):
[The Pareto-Optimal Temporal Aggregation of Energy System Models](https://www.sciencedirect.com/science/article/abs/pii/S0306261922004342)
================================================
FILE: docs/gen_ref_pages.py
================================================
"""Auto-generate API reference pages from Python source modules."""
from pathlib import Path
import mkdocs_gen_files
nav = mkdocs_gen_files.Nav()
src = Path("src")
for path in sorted(src.rglob("*.py")):
module_path = path.relative_to(src).with_suffix("")
doc_path = path.relative_to(src).with_suffix(".md")
full_doc_path = Path("api", doc_path)
parts = tuple(module_path.parts)
# Skip __init__ and __pycache__
if parts[-1] == "__init__" or "__pycache__" in parts:
continue
# Skip private modules
if any(part.startswith("_") and part != "__init__" for part in parts):
continue
nav[parts] = doc_path.as_posix()
with mkdocs_gen_files.open(full_doc_path, "w") as fd:
identifier = ".".join(parts)
fd.write(f"# {identifier}\n\n::: {identifier}\n")
mkdocs_gen_files.set_edit_path(full_doc_path, path)
with mkdocs_gen_files.open("api/SUMMARY.md", "w") as nav_file:
nav_file.writelines(nav.build_literate_nav())
================================================
FILE: docs/getting-started.md
================================================
# Getting started
## Basic Workflow
Run the aggregation and access the results:
=== "v3"
```python
import pandas as pd
import tsam
from tsam import ClusterConfig, SegmentConfig
df = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
# Configure and run aggregation
result = tsam.aggregate(
df,
n_clusters=8,
period_duration='1D',
cluster=ClusterConfig(
method='hierarchical',
representation='distribution_minmax',
),
segments=SegmentConfig(n_segments=8),
)
# Access results
cluster_representatives = result.cluster_representatives
print(f"RMSE: {result.accuracy.rmse.mean():.4f}")
reconstructed = result.reconstructed
cluster_representatives.to_csv('cluster_representatives.csv')
```
=== "Legacy"
```python
import pandas as pd
import tsam.timeseriesaggregation as tsam_legacy
df = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
# Configure and run aggregation
aggregation = tsam_legacy.TimeSeriesAggregation(
df,
noTypicalPeriods=8,
hoursPerPeriod=24,
clusterMethod='hierarchical',
representationMethod="distributionAndMinMaxRepresentation",
segmentation=True, noSegments=8,
)
# Access results
cluster_representatives = aggregation.createTypicalPeriods()
print(f"RMSE: {aggregation.accuracyIndicators()['RMSE'].mean():.4f}")
reconstructed = aggregation.predictOriginalData()
cluster_representatives.to_csv('cluster_representatives.csv')
```
## Hypertuned Aggregation
If you don't know which number of periods or segments to choose, specify a target data reduction and let the tuner search for the best configuration:
=== "v3"
```python
import pandas as pd
from tsam.tuning import find_optimal_combination
df = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
result = find_optimal_combination(
df,
data_reduction=0.02,
period_duration='1D',
n_jobs=-1,
show_progress=True,
)
print(f"Optimal: {result.n_clusters} periods x {result.n_segments} segments")
print(f"RMSE: {result.rmse:.4f}")
cluster_representatives = result.best_result.cluster_representatives
```
=== "Legacy"
```python
import pandas as pd
import tsam.timeseriesaggregation as tsam_legacy
from tsam.hyperparametertuning import HyperTunedAggregations
df = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
aggregation = tsam_legacy.TimeSeriesAggregation(
df,
hoursPerPeriod=24,
segmentation=True,
)
tuner = HyperTunedAggregations(aggregation)
noSegments, noTypicalPeriods, RMSE = tuner.identifyOptimalSegmentPeriodCombination(
dataReduction=0.02,
)
print(f"Optimal: {noTypicalPeriods} periods x {noSegments} segments")
print(f"RMSE: {RMSE:.4f}")
```
!!! tip
Tuning can be time consuming. Run it once, save the resulting period and segment counts, and use them as fixed values in production.
For exploring the full Pareto front of period/segment combinations:
=== "v3"
!!! tip
Use `timesteps` to only evaluate specific timestep counts instead of the full search space for huge speedups.
```python
import pandas as pd
from tsam.tuning import find_pareto_front
df = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
pareto = find_pareto_front(
df,
timesteps=range(10, 500, 50),
n_jobs=-1,
)
pareto.summary
pareto.plot()
```
=== "Legacy"
```python
import pandas as pd
import tsam.timeseriesaggregation as tsam_legacy
from tsam.hyperparametertuning import HyperTunedAggregations
df = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
aggregation = tsam_legacy.TimeSeriesAggregation(
df,
hoursPerPeriod=24,
segmentation=True,
)
tuner = HyperTunedAggregations(aggregation)
tuner.identifyParetoOptimalAggregation(untilTotalTimeSteps=500)
# Results in tuner._periodHistory, tuner._segmentHistory, tuner._RMSEHistory
```
See the [tuning notebook](notebooks/tuning.ipynb) for a detailed walkthrough, and the [scientific paper](https://www.sciencedirect.com/science/article/abs/pii/S0306261922004342) for the methodology behind it.
================================================
FILE: docs/glossary.md
================================================
# Glossary
Key concepts used in the ETHOS.TSAM API:
| Concept | Description |
|---------|-------------|
| **Period** | A fixed-length time window (e.g., 24 hours = 1 day). The original time series is divided into periods for clustering. |
| **Typical Period** | A representative period selected or computed to represent a cluster of similar periods. |
| **Cluster** | A group of similar original periods. Each cluster is represented by one typical period. |
| **Segment** | A subdivision within a period. Consecutive timesteps are grouped into segments to reduce temporal resolution. |
| **Timestep** | A single time point within a period (e.g., one hour in a 24-hour period). |
| **Duration Curve** | A sorted representation of values within a period (highest to lowest). Used with `use_duration_curves=True` to cluster by value distribution rather than temporal pattern. |
| `n_clusters` | Number of clusters to create. Each cluster is represented by one typical period. |
| `n_segments` | Number of segments per period. If not specified, equals timesteps per period (no segmentation). |
| `period_duration` | Length of each period. Accepts int/float (hours) or pandas Timedelta strings (e.g., `24`, `'24h'`, `'1d'`). |
| `temporal_resolution` | Time resolution of input data. Accepts float (hours) or pandas Timedelta strings (e.g., `1.0`, `'1h'`, `'15min'`). If not provided, inferred from the datetime index. |
| `cluster_assignments` | Array mapping each original period to its cluster index (0 to n_clusters-1). |
| `cluster_weights` | Dictionary mapping cluster index to occurrence count (how many original periods each cluster represents). |
| `segment_durations` | Nested tuple with duration (in timesteps) for each segment in each typical period. |
| `cluster_representatives` | MultiIndex DataFrame with aggregated data. Index levels are (cluster, timestep) or (cluster, segment) if segmented. |
================================================
FILE: docs/index.md
================================================
<div class="landing-logos">
<div class="tsam-logo-light">
<img src="assets/tsam-logo-light.svg" alt="ETHOS.TSAM">
</div>
<div class="tsam-logo-dark">
<img src="assets/tsam-logo-dark.svg" alt="ETHOS.TSAM">
</div>
<div class="jsa-logo">
<a href="https://www.fz-juelich.de/en/iek/iek-3">
<img src="https://raw.githubusercontent.com/FZJ-IEK3-VSA/README_assets/v.1.0.0/ICE2_Logos/JSA-Header.svg" alt="Jülich System Analysis">
</a>
</div>
</div>
# Introduction
## ETHOS.TSAM - Time Series Aggregation Module
ETHOS.TSAM is a python package which uses different machine learning algorithms for the aggregation of typical periods. It is applicable
for all type of time series, either weather data, price data, behavioral data, electrical load data or all together. The module is able to significantly reduce input
time series for mathematical models, and therefore the model's complexity and computational time.
To use ETHOS.TSAM, first [install ETHOS.TSAM](installation.md) and then [start aggregating time series](getting-started.md).
ETHOS.TSAM is open-source available on [GitHub](https://github.com/FZJ-IEK3-VSA/tsam)
and open for collaboration, help requests etc.
In case you use ETHOS.TSAM in a scientific publication, we kindly request you to cite one of our publications listed in the
[Further Reading Section](further-reading.md).
ETHOS.TSAM is part of the [Energy Transformation PatHway Optimization Suite (ETHOS) at ICE-2](https://www.fz-juelich.de/de/ice/ice-2/leistungen/model-services). It is tightly integrated into [ETHOS.FINE](https://github.com/FZJ-IEK3-VSA/FINE) to reduce the temporal complexity of energy system models.
================================================
FILE: docs/installation.md
================================================
# Installation
It is recommended to install ETHOS.TSAM within its own environment. If you are not familiar with python environments, please consider reading some [external documentation](https://realpython.com/python-virtual-environments-a-primer/).
**Quick Install (Recommended)**
The fastest way to install ETHOS.TSAM is using [uv](https://docs.astral.sh/uv/):
```bash
uv pip install tsam
```
**Alternative Installation Methods**
Using pip:
```bash
pip install tsam
```
Using conda-forge:
```bash
conda install tsam -c conda-forge
```
**Creating an Isolated Environment**
With uv (recommended):
```bash
uv venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
uv pip install tsam
```
With conda/mamba:
```bash
mamba create -n tsam_env python pip
mamba activate tsam_env
pip install tsam
```
## Local Installation for Development
Clone the repository:
```bash
git clone https://github.com/FZJ-IEK3-VSA/tsam.git
cd tsam
```
**Using uv (Recommended)**
```bash
uv venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
uv pip install -e ".[develop]"
```
**Using conda**
```bash
conda env create --file=environment.yml
conda activate tsam_dev
pip install -e ".[develop]"
```
## Development Tools
ETHOS.TSAM uses modern Python development tools for code quality:
**Linting and Formatting with Ruff**
[Ruff](https://docs.astral.sh/ruff/) is used for fast linting and formatting:
```bash
# Check for issues
ruff check src/ test/
# Auto-fix issues
ruff check src/ test/ --fix
# Format code
ruff format src/ test/
```
**Type Checking with Mypy**
[Mypy](https://mypy.readthedocs.io/) is used for static type checking:
```bash
mypy src/tsam/
```
**Pre-commit Hooks**
Pre-commit hooks automatically run linting and formatting on every commit:
```bash
# Install pre-commit
uv pip install pre-commit
# Set up hooks (run once after cloning)
pre-commit install
# Run manually on all files
pre-commit run --all-files
```
**Running Tests**
Tests are run using pytest:
```bash
# Run all tests
uv run pytest test/
# Run tests with coverage
uv run pytest test/ --cov=tsam
# Run tests in parallel
uv run pytest test/ -n auto
```
## Installation of an Optimization Solver
Some clustering algorithms in ETHOS.TSAM are based on Mixed-Integer Linear Programming. An appropriate solver accessible by [Pyomo](https://github.com/Pyomo/pyomo/) is required.
**HiGHS (Default)**
[HiGHS](https://github.com/ERGO-Code/HiGHS) is installed by default and works well for most use cases.
**Commercial Solvers**
For better performance on large problems, commercial solvers are recommended if you have access to a license:
* [Gurobi](https://www.gurobi.com/)
* [CPLEX](https://www.ibm.com/products/ilog-cplex-optimization-studio)
================================================
FILE: docs/javascripts/mathjax.js
================================================
window.MathJax = {
tex: {
inlineMath: [["\\(", "\\)"]],
displayMath: [["\\[", "\\]"]],
processEscapes: true,
processEnvironments: true,
},
options: {
ignoreHtmlClass: ".*|",
processHtmlClass: "arithmatex",
},
};
// Re-typeset on instant navigation (needed for navigation.instant)
document$.subscribe(function () {
MathJax.startup.output.clearCache();
MathJax.typesetClear();
MathJax.texReset();
MathJax.typesetPromise();
});
================================================
FILE: docs/legal-notice.md
================================================
# Legal Notice
## License
ETHOS.TSAM is published under the [MIT License](https://opensource.org/licenses/MIT).
## Acknowledgement
The development of this software is currently supported by The authors acknowledge the financial support by the Federal Ministry
for Economic Affairs and Energy of Germany in the project METIS (project number 03ET4064A).
<div style="text-align: center">
<a href="https://www.bmwi.de/Navigation/EN/Home/home.html">
<img src="https://www.bmwi.de/SiteGlobals/BMWI/StyleBundles/Bilder/bmwi_logo_en.svgz?__blob=normal&v=10" width="200" alt="METIS Logo">
</a>
</div>
Further, this work was supported by the Helmholtz Association under the Joint Initiative ["Energy System 2050 A Contribution of
the Research Field Energy"](https://www.helmholtz.de/en/research/energy/energy_system_2050/).
<div style="text-align: center">
<a href="https://www.helmholtz.de/en/">
<img src="https://raw.githubusercontent.com/FZJ-IEK3-VSA/README_assets/v.1.0.0/Helmholtz_Logos/Helmholtz-Logo-Dark-Blue-RGB.svg" width="200" alt="Helmholtz Logo">
</a>
</div>
================================================
FILE: docs/migration-guide.md
================================================
# Migrating from ETHOS.TSAM v2 to v3 { #migration-guide }
ETHOS.TSAM v3 replaces the class-based API with a functional API.
The old `TimeSeriesAggregation` class still works but is deprecated
and will be removed in a future release.
This guide covers every change you need to make.
## Quick before-and-after
=== "v3 (new)"
```python
import tsam
from tsam import ClusterConfig, SegmentConfig, ExtremeConfig
result = tsam.aggregate(
df,
n_clusters=8,
period_duration=24,
cluster=ClusterConfig(
method='hierarchical',
representation='distribution_minmax',
),
segments=SegmentConfig(n_segments=12),
preserve_column_means=True,
extremes=ExtremeConfig(max_value=['demand']),
)
representatives = result.cluster_representatives
reconstructed = result.reconstructed
accuracy = result.accuracy.summary
```
=== "v2 (old)"
```python
import tsam.timeseriesaggregation as tsam
agg = tsam.TimeSeriesAggregation(
df,
noTypicalPeriods=8,
hoursPerPeriod=24,
clusterMethod='hierarchical',
representationMethod='distributionAndMinMaxRepresentation',
segmentation=True,
noSegments=12,
rescaleClusterPeriods=True,
addPeakMax=['demand'],
)
representatives = agg.createTypicalPeriods()
reconstructed = agg.predictOriginalData()
accuracy = agg.accuracyIndicators()
```
## Parameter mapping
The table below maps every old parameter to its v3 equivalent.
| Old (v2) | New (v3) | Notes |
|----------|----------|-------|
| `timeSeries` | `data` | Renamed. |
| `noTypicalPeriods` | `n_clusters` | |
| `hoursPerPeriod` | `period_duration` | Also accepts strings (`'24h'`, `'1d'`). |
| `resolution` | `temporal_resolution` | Also accepts strings (`'1h'`, `'15min'`). |
| `clusterMethod` | `ClusterConfig(method=...)` | See [cluster method values](#cluster-method-values). |
| `representationMethod` | `ClusterConfig(representation=...)` | See [representation values](#representation-method-values). |
| `weightDict` | `weights` | Top-level kwarg of `aggregate()`. |
| `sameMean` | `ClusterConfig(normalize_column_means=...)` | |
| `sortValues` | `ClusterConfig(use_duration_curves=...)` | |
| `evalSumPeriods` | `ClusterConfig(include_period_sums=...)` | |
| `solver` | `ClusterConfig(solver=...)` | |
| `segmentation` | Pass a `SegmentConfig` or omit it. | No boolean flag needed. |
| `noSegments` | `SegmentConfig(n_segments=...)` | |
| `segmentRepresentationMethod` | `SegmentConfig(representation=...)` | Uses short names (see below). |
| `rescaleClusterPeriods` | `preserve_column_means` | Top-level kwarg of `aggregate()`. |
| `rescaleExcludeColumns` | `rescale_exclude_columns` | |
| `roundOutput` | `round_decimals` | |
| `numericalTolerance` | `numerical_tolerance` | |
| `extremePeriodMethod` | `ExtremeConfig(method=...)` | See [extreme method values](#extreme-method-values). |
| `addPeakMax` | `ExtremeConfig(max_value=...)` | |
| `addPeakMin` | `ExtremeConfig(min_value=...)` | |
| `addMeanMax` | `ExtremeConfig(max_period=...)` | |
| `addMeanMin` | `ExtremeConfig(min_period=...)` | |
| `distributionPeriodWise` | `Distribution(scope="cluster"\|"global")` | See [representation objects](#typed-representation-objects). |
| `representationDict` | `MinMaxMean(max_columns=[...], min_columns=[...])` | See [representation objects](#typed-representation-objects). |
### Cluster method values { #cluster-method-values }
| Old (v2) | New (v3) |
|----------|----------|
| `'averaging'` | `'averaging'` |
| `'k_means'` | `'kmeans'` |
| `'k_medoids'` | `'kmedoids'` |
| `'k_maxoids'` | `'kmaxoids'` |
| `'hierarchical'` | `'hierarchical'` |
| `'adjacent_periods'` | `'contiguous'` |
### Representation method values { #representation-method-values }
| Old (v2) | New (v3) |
|----------|----------|
| `'meanRepresentation'` | `'mean'` |
| `'medoidRepresentation'` | `'medoid'` |
| `'maxoidRepresentation'` | `'maxoid'` |
| `'distributionRepresentation'` | `'distribution'` |
| `'durationRepresentation'` | `'distribution'` (both old parameters meant the same) |
| `'distributionAndMinMaxRepresentation'` | `'distribution_minmax'` |
| `'minmaxmeanRepresentation'` | `'minmax_mean'` |
### Typed representation objects { #typed-representation-objects }
For `distribution`, `distribution_minmax`, and `minmax_mean`
representations, v3 offers typed objects that expose options previously
controlled by separate parameters (`distributionPeriodWise`,
`representationDict`). Plain string shortcuts still work for the
common cases.
**Distribution with global scope** (`distributionPeriodWise=False`):
=== "v3 (new)"
```python
from tsam import Distribution
result = tsam.aggregate(
df,
n_clusters=8,
cluster=ClusterConfig(
representation=Distribution(scope="global"),
),
)
```
=== "v2 (old)"
```python
agg = tsam.TimeSeriesAggregation(
df,
noTypicalPeriods=8,
representationMethod='distributionRepresentation',
distributionPeriodWise=False,
)
```
**Distribution with min/max preservation and global scope**:
=== "v3 (new)"
```python
from tsam import Distribution
result = tsam.aggregate(
df,
n_clusters=8,
cluster=ClusterConfig(
representation=Distribution(scope="global", preserve_minmax=True),
),
)
```
=== "v2 (old)"
```python
agg = tsam.TimeSeriesAggregation(
df,
noTypicalPeriods=8,
representationMethod='distributionAndMinMaxRepresentation',
distributionPeriodWise=False,
)
```
**Per-column min/max/mean** (`representationDict`):
=== "v3 (new)"
```python
from tsam import MinMaxMean
result = tsam.aggregate(
df,
n_clusters=8,
cluster=ClusterConfig(
representation=MinMaxMean(
max_columns=['GHI'],
min_columns=['T', 'Load'],
),
),
)
```
=== "v2 (old)"
```python
agg = tsam.TimeSeriesAggregation(
df,
noTypicalPeriods=8,
representationMethod='minmaxmeanRepresentation',
representationDict={'GHI': 'max', 'T': 'min', 'Wind': 'mean', 'Load': 'min'},
)
```
Columns not listed in `max_columns` or `min_columns` default to mean.
!!! note
The string shortcuts `"distribution"`, `"distribution_minmax"`, and
`"minmax_mean"` remain valid and are equivalent to:
- `"distribution"` -> `Distribution()`
- `"distribution_minmax"` -> `Distribution(preserve_minmax=True)`
- `"minmax_mean"` -> `MinMaxMean()` (all columns default to mean)
### Extreme method values { #extreme-method-values }
| Old (v2) | New (v3) |
|----------|----------|
| `'None'` | Omit the `extremes` parameter entirely. |
| `'append'` | `'append'` |
| `'replace_cluster_center'` | `'replace'` |
| `'new_cluster_center'` | `'new_cluster'` |
## Default changes { #migration-defaults }
| Parameter | Old default | New default | Impact |
|-----------|-------------|-------------|--------|
| `n_clusters` | 10 | *required* | Code that relied on the default must now pass a value explicitly. |
| `SegmentConfig(representation=...)` | Inherited from `representationMethod` | `"mean"` | In v2, omitting `segmentRepresentationMethod` caused segments to inherit the cluster representation (e.g. distribution). In v3, `SegmentConfig` always defaults to `"mean"`. If you relied on the implicit inheritance, pass the representation explicitly: `SegmentConfig(n_segments=12, representation=Distribution(scope="global"))` |
## Accessing results
The old API returned raw DataFrames and arrays from methods you had to
call in sequence. The new API returns a single `AggregationResult`
object with everything attached.
| Old (v2) | New (v3) |
|----------|----------|
| `agg.createTypicalPeriods()` | `result.cluster_representatives` |
| `agg.predictOriginalData()` | `result.reconstructed` |
| `agg.accuracyIndicators()` | `result.accuracy.summary` |
| `agg.clusterOrder` | `result.cluster_assignments` |
| `agg.clusterPeriodNoOccur` | `result.cluster_weights` |
| `agg.clusterCenterIndices` | `result.clustering.cluster_centers` |
| `agg.timeSeries` | `result.original` |
| *(no equivalent)* | `result.residuals` |
| *(no equivalent)* | `result.plot.compare()` |
The `cluster_representatives` DataFrame now uses a
`MultiIndex(cluster, timestep)` instead of
`MultiIndex(PeriodNum, TimeStep)`.
## Clustering transfer
Reusing a clustering on new data used to require manually passing
`predefClusterOrder`, `predefClusterCenterIndices`, etc.
In v3 this is a single method call:
=== "v3 (new)"
```python
# Cluster on one dataset
result = tsam.aggregate(df_wind, n_clusters=8)
# Apply same clustering to another dataset
result_all = result.clustering.apply(df_all)
# Save and load clusterings
result.clustering.to_json("clustering.json")
from tsam import ClusteringResult
clustering = ClusteringResult.from_json("clustering.json")
result = clustering.apply(df)
```
=== "v2 (old)"
```python
# Required manually passing multiple parameters
agg2 = tsam.TimeSeriesAggregation(
df_all,
predefClusterOrder=agg.clusterOrder,
predefClusterCenterIndices=agg.clusterCenterIndices,
...
)
```
## Plotting
Plotting has moved from `matplotlib` to `plotly`.
Instead of calling separate functions, use the `result.plot` accessor:
```python
result.plot.compare() # Duration curves: original vs reconstructed
result.plot.residuals() # Reconstruction errors
result.plot.heatmap() # Heatmap of cluster representatives
result.plot.cluster_assignments() # Period-to-cluster mapping
result.plot.cluster_weights() # Cluster occurrence counts
result.plot.accuracy() # Accuracy metrics bar chart
```
## Hyperparameter tuning
The `HyperTunedAggregations` class is replaced by two functions in
`tsam.tuning`.
### `identifyOptimalSegmentPeriodCombination` -> `find_optimal_combination`
=== "v3 (new)"
```python
import tsam
from tsam import ClusterConfig
result = tsam.tuning.find_optimal_combination(
df,
data_reduction=0.01,
period_duration=24,
cluster=ClusterConfig(method="hierarchical"),
segment_representation="mean",
)
segments = result.n_segments
periods = result.n_clusters
rmse = result.rmse
best = result.best_result # AggregationResult
```
=== "v2 (old)"
```python
from tsam.hyperparametertuning import HyperTunedAggregations
import tsam.timeseriesaggregation as tsam_legacy
agg = HyperTunedAggregations(
tsam_legacy.TimeSeriesAggregation(
df,
hoursPerPeriod=24,
clusterMethod="hierarchical",
representationMethod="meanRepresentation",
segmentation=True,
)
)
segments, periods, rmse = agg.identifyOptimalSegmentPeriodCombination(
dataReduction=0.01,
)
```
### `identifyParetoOptimalAggregation` -> `find_pareto_front`
=== "v3 (new)"
```python
pareto = tsam.tuning.find_pareto_front(
df,
period_duration=24,
max_timesteps=500,
cluster=ClusterConfig(method="hierarchical"),
segment_representation="mean",
)
print(pareto.summary) # DataFrame of all tested configs
pareto.plot() # Interactive Plotly visualization
```
=== "v2 (old)"
```python
agg.identifyParetoOptimalAggregation(untilTotalTimeSteps=500)
for a in agg.aggregationHistory:
print(a.totalAccuracyIndicators()["RMSE"])
```
The `TuningResult` returned by both functions also supports
`find_by_timesteps(target)` and `find_by_rmse(threshold)` for
querying specific configurations, and iteration via `for r in result`.
### Helper functions
| Old (v2) | New (v3) |
|----------|----------|
| `getNoPeriodsForDataReduction(n, segs, red)` | `tsam.tuning.find_clusters_for_reduction(n, segs, red)` |
| `getNoSegmentsForDataReduction(n, periods, red)` | `tsam.tuning.find_segments_for_reduction(n, periods, red)` |
### New capabilities
- **Parallel execution**: Pass `n_jobs=-1` to use all CPU cores.
- **Targeted exploration**: `find_pareto_front` accepts a `timesteps`
sequence (e.g., `range(10, 500, 10)`) for faster targeted search
instead of full steepest descent.
- **Built-in visualization**: `result.plot()` shows an interactive
RMSE-vs-timesteps chart.
## Performance
tsam v3 is significantly faster than v2.3.9, primarily due to replacing
pandas loops with vectorized numpy operations.
| Configuration | constant | testdata | wide | with_zero_col |
|---------------|----------|----------|------|---------------|
| hierarchical (default) | 2x | 44x | 25x | 42x |
| hierarchical (distribution) | 5x | 55x | 35x | 51x |
| averaging | 5x | 77x | 66x | 74x |
| contiguous | 5x | 54x | 50x | 53x |
| distribution (global) | 2x | 16x | 7x | 13x |
| kmeans | 1.4x | 4x | 6x | 6x |
| kmaxoids | 1.3x | 1.4x | 1.4x | 1.4x |
??? info "Key optimizations"
- **`predictOriginalData()`**: Vectorized indexing replaces per-period
`.unstack()` loop (~290x function speedup).
- **`durationRepresentation()`**: numpy 3D operations replace nested
pandas loops (~8x function speedup, contributing to the distribution
config gains above).
- **`_rescaleClusterPeriods()`**: numpy 3D arrays replace pandas
MultiIndex operations (~11x function speedup).
Iterative methods (kmeans, kmedoids, kmaxoids) show modest gains because
the solver itself dominates runtime.
Use `benchmarks/bench.py` to run your own comparisons:
```
pytest benchmarks/bench.py --benchmark-save=my_run
```
## Result consistency and reproducibility
??? info "Cross-platform reproducibility"
v2.3.9 used numpy's default unstable sort (`introsort`) in
`durationRepresentation()`, which does not guarantee a specific order
for tied values. In practice, this caused different results on different
platforms (macOS vs Linux vs Windows) for distribution representations.
v3 fixes this by using `kind="stable"` (mergesort) for all sorting
operations and rounding floating-point means to 10 decimal places before
tie-breaking. This guarantees **identical results across macOS, Linux,
and Windows** for all configurations.
??? info "Consistency with v2.3.9"
As a consequence of the stable sort fix, 4 distribution-related
configurations produce slightly different results compared to v2.3.9:
- `hierarchical_distribution`
- `hierarchical_distribution_minmax`
- `distribution_global`
- `distribution_minmax_global`
The stable sort breaks ties by position rather than arbitrarily, and
rounding absorbs ~1e-16 floating-point noise that previously created
artificial ordering among effectively-equal means. This changes the
assignment of representative values to time steps, but preserves all
statistical properties (same distribution, same min/max, same weighted
mean).
All other 23 configurations (hierarchical with medoid/mean/maxoid,
averaging, contiguous, kmeans, kmedoids, kmaxoids, minmaxmean,
segmentation, extremes) are bit-for-bit identical to v2.3.9.
Result stability is enforced by two test layers:
1. **Golden regression tests** (`test/test_golden_regression.py`):
148 tests compare both APIs against stored CSV baselines. Any code
change that alters output values will fail these tests.
2. **Old/new API equivalence tests** (`test/test_old_new_equivalence.py`):
296 tests verify that the legacy `TimeSeriesAggregation` class and
the new `tsam.aggregate()` function produce identical results.
If a future release intentionally changes results (e.g., improved
algorithm), the golden files will be regenerated and the change
documented in the changelog.
## Suppressing warnings
During migration you can silence the deprecation warnings:
```python
import warnings
from tsam import LegacyAPIWarning
warnings.filterwarnings("ignore", category=LegacyAPIWarning)
```
## Removed parameters
`prepareEnersysInput()`
: Removed. Access result properties directly instead.
================================================
FILE: docs/notebooks/building_energy_system.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"# Building Energy System\n",
"\n",
"Optimal combination of segments and periods for building energy supply systems.\n",
"\n",
"Author: Leander Kotzur"
]
},
{
"cell_type": "markdown",
"id": "1",
"metadata": {},
"source": [
"Import pandas and the relevant time series aggregation class"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig, SegmentConfig\n",
"from tsam.tuning import find_pareto_front\n",
"\n",
"pio.renderers.default = \"notebook_connected\""
]
},
{
"cell_type": "markdown",
"id": "3",
"metadata": {},
"source": [
"### Input data "
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"Read in time series from testdata.csv with pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0)\n",
"raw = raw.rename(\n",
" columns={\n",
" \"T\": \"Temperature [°C]\",\n",
" \"Load\": \"Load [kW]\",\n",
" \"Wind\": \"Wind [m/s]\",\n",
" \"GHI\": \"Solar [W/m²]\",\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
"source": [
"raw = raw.drop(\n",
" columns=[\n",
" \"Wind [m/s]\",\n",
" ],\n",
")"
]
},
{
"cell_type": "markdown",
"id": "7",
"metadata": {},
"source": [
"Use tsam's built-in heatmap plotting for visual comparison of the time series"
]
},
{
"cell_type": "markdown",
"id": "8",
"metadata": {},
"source": [
"Plot an example series - in this case the temperature"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"metadata": {},
"outputs": [],
"source": [
"# Original data heatmaps using tsam.unstack_to_periods() with plotly\n",
"unstacked = tsam.unstack_to_periods(raw, period_duration=24)\n",
"for col in raw.columns:\n",
" px.imshow(\n",
" unstacked[col].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": col},\n",
" title=f\"Original {col}\",\n",
" aspect=\"auto\",\n",
" ).show()"
]
},
{
"cell_type": "markdown",
"id": "10",
"metadata": {},
"source": [
"### Tune a hierarchical aggregation with segments in combination with distribution representation\n",
"\n",
"Use the new `find_pareto_front()` function to explore the Pareto-optimal combinations."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"pareto_results = find_pareto_front(\n",
" raw,\n",
" period_duration=24,\n",
" max_timesteps=100,\n",
" cluster=ClusterConfig(\n",
" method=\"hierarchical\",\n",
" representation=\"distribution\",\n",
" ),\n",
" n_jobs=-1,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "12",
"metadata": {},
"source": [
"And determine the pareto optimal aggregation up to 100 total time steps. This may take some time..."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13",
"metadata": {},
"outputs": [],
"source": [
"# Show the last result in the Pareto front\n",
"last_result = pareto_results[-1]\n",
"print(\n",
" f\"Final configuration: {last_result.n_clusters} periods, {last_result.n_segments} segments\"\n",
")"
]
},
{
"cell_type": "markdown",
"id": "14",
"metadata": {},
"source": [
"And show the results for the last aggregation"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15",
"metadata": {},
"outputs": [],
"source": [
"# Reconstruct the data from the last Pareto result\n",
"reconstructed = last_result.reconstructed"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16",
"metadata": {},
"outputs": [],
"source": [
"# Reconstructed data heatmaps from last tuned aggregation\n",
"unstacked_recon = tsam.unstack_to_periods(reconstructed, period_duration=24)\n",
"for col in reconstructed.columns:\n",
" px.imshow(\n",
" unstacked_recon[col].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": col},\n",
" title=f\"Reconstructed {col}\",\n",
" aspect=\"auto\",\n",
" ).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17",
"metadata": {},
"outputs": [],
"source": [
"last_result.n_segments"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18",
"metadata": {},
"outputs": [],
"source": [
"last_result.n_clusters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19",
"metadata": {},
"outputs": [],
"source": [
"# Example with specific configuration using distribution_minmax representation\n",
"result = tsam.aggregate(\n",
" raw,\n",
" n_clusters=14,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(\n",
" method=\"hierarchical\",\n",
" representation=\"distribution_minmax\",\n",
" ),\n",
" segments=SegmentConfig(n_segments=8),\n",
" preserve_column_means=False,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20",
"metadata": {},
"outputs": [],
"source": [
"# Reconstructed data heatmaps with 8 segments and 14 periods\n",
"recon = result.reconstructed\n",
"unstacked_recon2 = tsam.unstack_to_periods(recon, period_duration=24)\n",
"for col in recon.columns:\n",
" px.imshow(\n",
" unstacked_recon2[col].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": col},\n",
" title=f\"Reconstructed {col} (8 seg, 14 periods)\",\n",
" aspect=\"auto\",\n",
" ).show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/notebooks/clustering_methods.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clustering Methods\n",
"\n",
"This notebook demonstrates all clustering methods and configuration options available in tsam.\n",
"\n",
"## Available Methods\n",
"\n",
"| Method | Description | Best For |\n",
"|--------|-------------|----------|\n",
"| `hierarchical` | Agglomerative hierarchical clustering | General purpose, recommended default |\n",
"| `kmeans` | K-means with centroids | Fast clustering, large datasets |\n",
"| `kmedoids` | K-medoids (MILP exact) | Optimal solution, smaller datasets (slow) |\n",
"| `kmaxoids` | Selects most dissimilar periods | Capturing extremes |\n",
"| `contiguous` | Hierarchical with temporal constraint | Storage modeling, seasonal patterns |\n",
"| `averaging` | Sequential period averaging | Simple baseline |\n",
"\n",
"**Tip:** For medoid-based clustering on large datasets, use `hierarchical` with `representation=\"medoid\"` instead of `kmedoids`.\n",
"\n",
"## Key Configuration Options\n",
"\n",
"| Option | Description |\n",
"|--------|-------------|\n",
"| `weights` | Per-column importance weights (top-level parameter of `aggregate()`) |\n",
"| `representation` | How to represent cluster centers (mean, medoid, maxoid, distribution, distribution_minmax) |\n",
"| `normalize_column_means` | Normalize columns to same mean before clustering |\n",
"| `use_duration_curves` | Match by value distribution rather than timing |"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig\n",
"\n",
"pio.renderers.default = \"notebook_connected\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Input Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The test dataset contains hourly time series for one year with four columns:\n",
"- **GHI**: Global Horizontal Irradiance (solar)\n",
"- **T**: Temperature\n",
"- **Wind**: Wind speed\n",
"- **Load**: Electrical load"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0)\n",
"print(f\"Shape: {raw.shape} ({raw.shape[0]} hours = {raw.shape[0] // 24} days)\")\n",
"raw.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Hierarchical Clustering (Recommended Default)\n",
"\n",
"Agglomerative hierarchical clustering builds a tree of clusters and cuts it at the desired number. It's the recommended default because it:\n",
"- Produces consistent results (deterministic)\n",
"- Works well with various representations\n",
"- Handles multi-variate data effectively"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_hierarchical = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\"),\n",
")\n",
"print(f\"Accuracy: RMSE = {result_hierarchical.accuracy.rmse.mean():.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. K-Means Clustering\n",
"\n",
"K-means is fast and widely used. It computes cluster centroids (averages), which may not correspond to actual periods in the data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_kmeans = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"kmeans\"),\n",
")\n",
"print(f\"Accuracy: RMSE = {result_kmeans.accuracy.rmse.mean():.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. K-Medoids-like Clustering\n",
"\n",
"K-medoids selects actual periods as cluster centers (medoids) rather than computing averages. This preserves realistic patterns.\n",
"\n",
"**Note:** The true `kmedoids` method uses an exact MILP solver which can be slow for large datasets. For most use cases, `hierarchical` with `representation=\"medoid\"` gives similar results much faster."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Use hierarchical with medoid representation (fast alternative to kmedoids)\n",
"result_kmedoids = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\", representation=\"medoid\"),\n",
")\n",
"print(f\"Accuracy: RMSE = {result_kmedoids.accuracy.rmse.mean():.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. K-Maxoids Clustering\n",
"\n",
"K-maxoids selects the most dissimilar periods as cluster centers. This is useful for capturing extreme conditions.\n",
"\n",
"**Note:** We set `preserve_column_means=False` below because mean preservation adjusts typical period values to match the original data's mean. For k-maxoids, where the goal is to preserve extreme values, this would diminish the very extremes we're trying to capture. Use `preserve_column_means=True` (default) when mean preservation is more important than extreme value preservation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_kmaxoids = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"kmaxoids\"),\n",
" preserve_column_means=False, # Don't rescale to preserve extreme values\n",
")\n",
"print(f\"Accuracy: RMSE = {result_kmaxoids.accuracy.rmse.mean():.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 5. Contiguous Clustering\n",
"\n",
"Contiguous clustering enforces temporal continuity - adjacent typical periods must come from adjacent original periods. This is important for:\n",
"- **Storage modeling**: State-of-charge must be continuous\n",
"- **Seasonal patterns**: Preserving the natural progression of seasons"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_contiguous = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"contiguous\"),\n",
")\n",
"print(f\"Accuracy: RMSE = {result_contiguous.accuracy.rmse.mean():.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6. Comparison of Methods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Collect all results for comparison\n",
"results = {\n",
" \"Original\": raw,\n",
" \"Hierarchical\": result_hierarchical.reconstructed,\n",
" \"K-Means\": result_kmeans.reconstructed,\n",
" \"K-Medoids\": result_kmedoids.reconstructed,\n",
" \"K-Maxoids\": result_kmaxoids.reconstructed,\n",
" \"Contiguous\": result_contiguous.reconstructed,\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Duration Curve Comparison\n",
"\n",
"Duration curves show how well each method preserves the value distribution."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Duration curve comparison - Load\n",
"frames = []\n",
"for name, df in results.items():\n",
" sorted_vals = df[\"Load\"].sort_values(ascending=False).reset_index(drop=True)\n",
" frames.append(\n",
" pd.DataFrame(\n",
" {\"Hour\": range(len(sorted_vals)), \"Load\": sorted_vals, \"Method\": name}\n",
" )\n",
" )\n",
"long_df = pd.concat(frames, ignore_index=True)\n",
"\n",
"px.line(\n",
" long_df,\n",
" x=\"Hour\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Duration Curve Comparison - Load\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Duration curve comparison - GHI\n",
"frames = []\n",
"for name, df in results.items():\n",
" sorted_vals = df[\"GHI\"].sort_values(ascending=False).reset_index(drop=True)\n",
" frames.append(\n",
" pd.DataFrame(\n",
" {\"Hour\": range(len(sorted_vals)), \"GHI\": sorted_vals, \"Method\": name}\n",
" )\n",
" )\n",
"long_df = pd.concat(frames, ignore_index=True)\n",
"\n",
"px.line(\n",
" long_df, x=\"Hour\", y=\"GHI\", color=\"Method\", title=\"Duration Curve Comparison - GHI\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Accuracy Comparison"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Compare RMSE across methods\n",
"accuracy_comparison = pd.DataFrame(\n",
" {\n",
" \"Method\": [\"Hierarchical\", \"K-Means\", \"K-Medoids\", \"K-Maxoids\", \"Contiguous\"],\n",
" \"Mean RMSE\": [\n",
" result_hierarchical.accuracy.rmse.mean(),\n",
" result_kmeans.accuracy.rmse.mean(),\n",
" result_kmedoids.accuracy.rmse.mean(),\n",
" result_kmaxoids.accuracy.rmse.mean(),\n",
" result_contiguous.accuracy.rmse.mean(),\n",
" ],\n",
" }\n",
")\n",
"accuracy_comparison.sort_values(\"Mean RMSE\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 7. Configuration Options\n",
"\n",
"### Using Weights\n",
"\n",
"When clustering multi-variate time series, you can assign different importance to each column using the `weights` parameter of `aggregate()`. This is useful when one variable is more critical for your application. Weights influence all pipeline stages (clustering, segmentation, representation, rescaling)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Prioritize Load over other columns (e.g., for demand-focused energy systems)\n",
"result_weighted = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\"),\n",
" weights={\"Load\": 3.0, \"GHI\": 1.0, \"T\": 1.0, \"Wind\": 1.0},\n",
")\n",
"print(f\"Load RMSE (weighted): {result_weighted.accuracy.rmse['Load']:.4f}\")\n",
"print(f\"Load RMSE (unweighted): {result_hierarchical.accuracy.rmse['Load']:.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Using Duration Curves for Clustering\n",
"\n",
"By default, clustering matches periods by their temporal patterns. Setting `use_duration_curves=True` matches periods by their value distributions instead, ignoring timing."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cluster by value distribution rather than temporal pattern\n",
"result_duration_curves = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(\n",
" method=\"hierarchical\",\n",
" use_duration_curves=True,\n",
" ),\n",
")\n",
"print(f\"RMSE with duration curves: {result_duration_curves.accuracy.rmse.mean():.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Distribution-Preserving Representation\n",
"\n",
"The `distribution_minmax` representation preserves both the value distribution AND the min/max values. This is excellent for energy system optimization where both the shape and extremes matter."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Use distribution_minmax representation\n",
"result_dist_minmax = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(\n",
" method=\"hierarchical\",\n",
" representation=\"distribution_minmax\",\n",
" ),\n",
")\n",
"\n",
"# Compare min/max preservation\n",
"print(\"Original data range:\")\n",
"print(f\" Load: {raw['Load'].min():.2f} - {raw['Load'].max():.2f}\")\n",
"\n",
"reconstructed_standard = result_hierarchical.reconstructed\n",
"reconstructed_dist = result_dist_minmax.reconstructed\n",
"\n",
"print(\"\\nStandard medoid representation:\")\n",
"print(\n",
" f\" Load: {reconstructed_standard['Load'].min():.2f} - {reconstructed_standard['Load'].max():.2f}\"\n",
")\n",
"\n",
"print(\"\\nDistribution + MinMax representation:\")\n",
"print(\n",
" f\" Load: {reconstructed_dist['Load'].min():.2f} - {reconstructed_dist['Load'].max():.2f}\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Comparison: Standard vs Distribution-Preserving"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Comparison: Standard vs Distribution-Preserving\n",
"comparison_dist = {\n",
" \"Original\": raw,\n",
" \"Medoid (standard)\": reconstructed_standard,\n",
" \"Distribution + MinMax\": reconstructed_dist,\n",
"}\n",
"\n",
"frames = []\n",
"for name, df in comparison_dist.items():\n",
" sorted_vals = df[\"Load\"].sort_values(ascending=False).reset_index(drop=True)\n",
" frames.append(\n",
" pd.DataFrame(\n",
" {\"Hour\": range(len(sorted_vals)), \"Load\": sorted_vals, \"Method\": name}\n",
" )\n",
" )\n",
"long_df = pd.concat(frames, ignore_index=True)\n",
"\n",
"px.line(\n",
" long_df,\n",
" x=\"Hour\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Effect of Distribution-Preserving Representation\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Summary\n",
"\n",
"| Use Case | Recommended Method | Key Options |\n",
"|----------|-------------------|-------------|\n",
"| General purpose | `hierarchical` | Default settings |\n",
"| Fast clustering | `kmeans` | - |\n",
"| Preserve realistic patterns | `hierarchical` | `representation=\"medoid\"` |\n",
"| Capture extremes | `kmaxoids` | `preserve_column_means=False` |\n",
"| Storage modeling | `contiguous` | - |\n",
"| Demand-focused | `hierarchical` | `weights={\"Load\": 3.0, ...}` (top-level) |\n",
"| Preserve distribution | `hierarchical` | `representation=\"distribution_minmax\"` |\n",
"\n",
"**Note:** The `kmedoids` method uses an exact MILP solver and can be slow for datasets with many periods (365+ days). Use `hierarchical` with `representation=\"medoid\"` for similar results with much better performance."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tsam_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: docs/notebooks/clustering_transfer.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"# Clustering Transfer\n",
"\n",
"This notebook demonstrates the **cluster once, apply many times** workflow."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig, ClusteringResult\n",
"\n",
"pio.renderers.default = \"notebook_connected\"\n",
"\n",
"# Ensure results directory exists\n",
"RESULTS_DIR = Path(\"results\")\n",
"RESULTS_DIR.mkdir(exist_ok=True)\n",
"\n",
"raw = pd.read_csv(\"testdata.csv\", index_col=0) # 4 columns: GHI, T, Wind, Load"
]
},
{
"cell_type": "markdown",
"id": "2",
"metadata": {},
"source": [
"## Two Paths to Aggregate\n",
"\n",
"**Path A:** Cluster on ALL variables\n",
"**Path B:** Cluster on WIND only, then apply to all variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
"# Path A: Cluster on ALL variables\n",
"result_all = tsam.aggregate(\n",
" raw, n_clusters=8, cluster=ClusterConfig(method=\"hierarchical\")\n",
")\n",
"\n",
"# Path B: Cluster on WIND only, then transfer to all\n",
"result_wind = tsam.aggregate(\n",
" raw[[\"Wind\"]], n_clusters=8, cluster=ClusterConfig(method=\"hierarchical\")\n",
")\n",
"result_transferred = result_wind.clustering.apply(raw)"
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"## Comparing Cluster Assignments\n",
"\n",
"- **Row 1** (all variables) differs from **Row 2** (wind only)\n",
"- **Row 2** and **Row 3** are **identical** - the transfer preserves the clustering!"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"px.imshow(\n",
" pd.DataFrame(\n",
" {\n",
" \"Clustered on ALL variables\": result_all.cluster_assignments,\n",
" \"Clustered on WIND only\": result_wind.cluster_assignments,\n",
" \"Transferred to all variables\": result_transferred.cluster_assignments,\n",
" },\n",
" index=pd.RangeIndex(\n",
" start=0, stop=len(result_all.cluster_assignments), name=\"Original Period\"\n",
" ),\n",
" ).T,\n",
" color_continuous_scale=\"viridis\",\n",
" title=\"Cluster Assignments: Row 2 and 3 are IDENTICAL!\",\n",
")"
]
},
{
"cell_type": "markdown",
"id": "6",
"metadata": {},
"source": [
"## Verifying the Transfer\n",
"\n",
"The wind typical periods are **identical** - but now we also have GHI, T, and Load:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7",
"metadata": {},
"outputs": [],
"source": [
"# Wind typical periods: wind-only vs transferred\n",
"wind_only = result_wind.cluster_representatives[\"Wind\"]\n",
"wind_transferred = result_transferred.cluster_representatives[\"Wind\"]\n",
"\n",
"print(\n",
" \"Wind typical periods IDENTICAL after transfer:\", wind_only.equals(wind_transferred)\n",
")\n",
"print(f\"Max difference: {(wind_only - wind_transferred).abs().max()}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8",
"metadata": {},
"outputs": [],
"source": [
"# Verify: cluster assignments are equal\n",
"print(\n",
" \"Cluster assignments equal:\",\n",
" np.array_equal(\n",
" result_wind.cluster_assignments, result_transferred.cluster_assignments\n",
" ),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"metadata": {},
"outputs": [],
"source": [
"# But now we have ALL columns!\n",
"print(\"Wind-only result columns:\", result_wind.cluster_representatives.columns.tolist())\n",
"print(\n",
" \"Transferred result columns:\",\n",
" result_transferred.cluster_representatives.columns.tolist(),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "10",
"metadata": {},
"source": [
"## Use Case: Save and Reload Clustering"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"# Save clustering to file\n",
"result_wind.clustering.to_json(\"clustering.json\")\n",
"\n",
"# Later: load and apply to any data\n",
"clustering = ClusteringResult.from_json(\"clustering.json\")\n",
"result_reloaded = clustering.apply(raw)\n",
"\n",
"print(\n",
" \"Reloaded result identical:\",\n",
" result_transferred.cluster_representatives.equals(\n",
" result_reloaded.cluster_representatives\n",
" ),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "12",
"metadata": {},
"source": [
"## Summary\n",
"\n",
"```python\n",
"# Cluster on subset\n",
"result_wind = tsam.aggregate(data[[\"Wind\"]], n_clusters=8)\n",
"\n",
"# Apply to all variables - wind stays identical!\n",
"result_all = result_wind.clustering.apply(data)\n",
"\n",
"# Save for later\n",
"result_wind.clustering.to_json(\"clustering.json\")\n",
"```"
]
}
],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/notebooks/disaggregation.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"# Disaggregation\n",
"\n",
"How to expand typical-period results back to the original time series length.\n",
"\n",
"**Use case:** You aggregate a year of hourly data into 8 typical days, run an optimization on those 8 days, and then need the results mapped back to all 365 days.\n",
"\n",
"`disaggregate()` does exactly this — it takes any DataFrame with the same `(cluster, timestep)` structure as `cluster_representatives` and expands it using the stored cluster assignments."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"pio.renderers.default = \"notebook_connected\"\n",
"\n",
"import tsam\n",
"from tsam import ClusteringResult, SegmentConfig\n",
"\n",
"raw = pd.read_csv(\"testdata.csv\", index_col=0, parse_dates=True)"
]
},
{
"cell_type": "markdown",
"id": "2",
"metadata": {},
"source": [
"## Basic Disaggregation\n",
"\n",
"Aggregate, then disaggregate the typical periods back to the full year. The result matches `.reconstructed` exactly."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
"result = tsam.aggregate(raw, n_clusters=8)\n",
"\n",
"print(f\"Original: {result.original.shape}\")\n",
"print(f\"Cluster representatives: {result.cluster_representatives.shape}\")\n",
"\n",
"expanded = result.disaggregate(result.cluster_representatives)\n",
"print(f\"Disaggregated: {expanded.shape}\")\n",
"print(f\"Matches .reconstructed: {expanded.equals(result.reconstructed)}\")"
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"## Disaggregating Arbitrary Data\n",
"\n",
"The real value: disaggregate data that tsam didn't produce. Here we simulate optimization results — a \"dispatch\" column computed from the typical periods — and expand it back to the full year."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"# Simulate optimization: compute \"dispatch\" as a function of the typical periods\n",
"reps = result.cluster_representatives\n",
"dispatch = pd.DataFrame(\n",
" {\"Dispatch\": reps[\"Load\"] - 0.5 * reps[\"GHI\"] - 0.3 * reps[\"Wind\"]},\n",
" index=reps.index,\n",
")\n",
"\n",
"print(f\"Dispatch (typical periods): {dispatch.shape}\")\n",
"dispatch.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
"source": [
"# Disaggregate back to full year\n",
"full_year_dispatch = result.disaggregate(dispatch)\n",
"\n",
"print(f\"Full year dispatch: {full_year_dispatch.shape}\")\n",
"\n",
"fig = px.line(full_year_dispatch, labels={\"index\": \"Time\", \"value\": \"Dispatch\"})\n",
"fig.update_layout(title=\"Disaggregated Dispatch Over Full Year\", showlegend=False)\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "7",
"metadata": {},
"source": [
"## Survives IO\n",
"\n",
"Save the clustering to JSON, load it later, and disaggregate without the original `AggregationResult`."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8",
"metadata": {},
"outputs": [],
"source": [
"# Save clustering\n",
"result.clustering.to_json(\"clustering.json\")\n",
"\n",
"# Later: load and disaggregate\n",
"clustering = ClusteringResult.from_json(\"clustering.json\")\n",
"full_year_from_disk = clustering.disaggregate(dispatch)\n",
"\n",
"print(f\"Shape: {full_year_from_disk.shape}\")\n",
"print(\n",
" f\"Matches original disaggregation: {full_year_dispatch.values[:8760].tolist() == full_year_from_disk.values.tolist()}\"\n",
")"
]
},
{
"cell_type": "markdown",
"id": "9",
"metadata": {},
"source": [
"Note: `ClusteringResult.disaggregate()` returns an integer-indexed DataFrame (it doesn't have access to the original datetime index). `AggregationResult.disaggregate()` restores the datetime index automatically."
]
},
{
"cell_type": "markdown",
"id": "10",
"metadata": {},
"source": [
"## Segmented Data\n",
"\n",
"With segmentation, `cluster_representatives` has a `(cluster, segment, duration)` index. Disaggregation expands segments to full timesteps, placing values at the start of each segment and NaN elsewhere. Use `.ffill()` for a step function."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"result_seg = tsam.aggregate(raw, n_clusters=8, segments=SegmentConfig(n_segments=4))\n",
"\n",
"print(f\"Cluster representatives: {result_seg.cluster_representatives.shape}\")\n",
"print(f\"Index levels: {result_seg.cluster_representatives.index.names}\")\n",
"result_seg.cluster_representatives.head(8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12",
"metadata": {},
"outputs": [],
"source": [
"expanded_seg = result_seg.disaggregate(result_seg.cluster_representatives)\n",
"\n",
"print(f\"Disaggregated shape: {expanded_seg.shape}\")\n",
"print(f\"NaN count: {expanded_seg.isna().sum().sum()} (segment gaps)\")\n",
"print(f\"Non-NaN count: {expanded_seg.notna().sum().sum()} (segment starts)\")\n",
"\n",
"# Show a single day: values at segment starts, NaN in between\n",
"expanded_seg[\"GHI\"].iloc[:24]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13",
"metadata": {},
"outputs": [],
"source": [
"# Forward-fill for a step function\n",
"filled = expanded_seg.ffill()\n",
"\n",
"fig = px.line(\n",
" pd.DataFrame(\n",
" {\"Original\": result_seg.original[\"GHI\"], \"Disaggregated (ffill)\": filled[\"GHI\"]}\n",
" ),\n",
" labels={\"index\": \"Time\", \"value\": \"GHI\"},\n",
").update_layout(\n",
" title=\"Segmented Disaggregation: Original vs Reconstructed (first 2 weeks)\"\n",
")\n",
"fig.update_xaxes(range=[raw.index[0], raw.index[24 * 14]])\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "14",
"metadata": {},
"source": [
"## Summary\n",
"\n",
"```python\n",
"# Aggregate\n",
"result = tsam.aggregate(df, n_clusters=8)\n",
"\n",
"# Run optimization on typical periods\n",
"optimized = my_optimizer(result.cluster_representatives)\n",
"\n",
"# Expand back to full year (with datetime index)\n",
"full_year = result.disaggregate(optimized)\n",
"\n",
"# Or via saved clustering (integer index)\n",
"clustering = ClusteringResult.from_json(\"clustering.json\")\n",
"full_year = clustering.disaggregate(optimized)\n",
"\n",
"# For segmented data: .ffill() gives a step function\n",
"full_year = result.disaggregate(segmented_data).ffill()\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/notebooks/k_maxoids.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# K-Maxoids Clustering\n",
"Example comparing k-means and k-maxoids clustering methods.\n",
"\n",
"K-maxoids automatically preserves extreme periods by selecting points closest to the convex hull.\n",
"\n",
"Author: Maximilian Hoffmann"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import pandas and the relevant time series aggregation class"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig\n",
"\n",
"pio.renderers.default = \"notebook_connected\"\n",
"\n",
"# Ensure results directory exists\n",
"RESULTS_DIR = Path(\"results\")\n",
"RESULTS_DIR.mkdir(exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Input data "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read in time series from testdata.csv with pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show a slice of the dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show the shape of the raw input data: 4 types of timeseries (GHI, Temperature, Wind and Load) for every hour in a year"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a plot function for the temperature for a visual comparison of the time series"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Use tsam.unstack_to_periods() with plotly for heatmap visualization\n",
"# px.imshow(unstacked[\"column\"].values.T) creates interactive heatmaps"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot an example series - in this case the temperature"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Original temperature heatmap\n",
"unstacked = tsam.unstack_to_periods(raw, period_duration=24)\n",
"px.imshow(\n",
" unstacked[\"T\"].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": \"Temperature\"},\n",
" title=\"Original Temperature\",\n",
" aspect=\"auto\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Simple k-mean aggregation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Initialize an aggregation class object with k-means as method for eight typical days, without any integration of extreme periods. Alternative methods are 'averaging', 'hierarchical', 'kmedoids' and 'kmaxoids'."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"kmeans\"),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives = result.cluster_representatives"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show shape of typical periods: 4 types of timeseries for 8*24 hours"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save typical periods to .csv file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives.to_csv(RESULTS_DIR / \"testperiods_kmeans.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Repredict the original time series based on the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reconstructed = result.reconstructed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot the repredicted data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# K-means predicted temperature heatmap\n",
"unstacked_kmeans = tsam.unstack_to_periods(reconstructed, period_duration=24)\n",
"px.imshow(\n",
" unstacked_kmeans[\"T\"].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": \"Temperature\"},\n",
" title=\"K-means Predicted Temperature\",\n",
" aspect=\"auto\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As seen, they days with the minimal temperature are excluded. In case that they are required they can be added to the aggregation as follow."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### k-maxoids aggregation including extreme periods"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Initialize a time series aggregation based on k-maxoids, which automatically searches for points closest to the convex hull."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_maxoids = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"kmaxoids\"),\n",
" preserve_column_means=False,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives_maxoids = result_maxoids.cluster_representatives"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The aggregation can also be evaluated by indicators"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_maxoids.accuracy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Repredict the original time series based on the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reconstructed_maxoids = result_maxoids.reconstructed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot repredicted data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# K-maxoids predicted temperature heatmap\n",
"unstacked_maxoids = tsam.unstack_to_periods(reconstructed_maxoids, period_duration=24)\n",
"px.imshow(\n",
" unstacked_maxoids[\"T\"].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": \"Temperature\"},\n",
" title=\"K-maxoids Predicted Temperature\",\n",
" aspect=\"auto\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here bigger biggest values and lower lowest values can be observed compared to k-means clustering."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Comparison of the aggregations \n",
"It was shown for the temperature, but both times all four time series have been aggregated. Therefore, we compare here also the duration curves of the electrical load for the original time series, the aggregation with k-mean, and the k-maxoids aggregation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Duration curve comparison using plotly express\n",
"comparison_data = {\n",
" \"Original\": raw,\n",
" \"8 typ days (Centroids)\": reconstructed,\n",
" \"8 typ days (Maxoids)\": reconstructed_maxoids,\n",
"}\n",
"\n",
"frames = []\n",
"for name, df in comparison_data.items():\n",
" sorted_vals = df[\"Load\"].sort_values(ascending=False).reset_index(drop=True)\n",
" frames.append(\n",
" pd.DataFrame(\n",
" {\"Hour\": range(len(sorted_vals)), \"Load\": sorted_vals, \"Method\": name}\n",
" )\n",
" )\n",
"long_df = pd.concat(frames, ignore_index=True)\n",
"\n",
"px.line(\n",
" long_df,\n",
" x=\"Hour\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Duration Curve Comparison - Load\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or as unsorted time series for an example week"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Time slice comparison using plotly express\n",
"frames = []\n",
"for name, df in comparison_data.items():\n",
" sliced = df.loc[\"20100210\":\"20100218\", [\"Load\"]].copy()\n",
" sliced[\"Method\"] = name\n",
" frames.append(sliced)\n",
"long_df = pd.concat(frames).reset_index(names=\"Time\")\n",
"\n",
"px.line(\n",
" long_df,\n",
" x=\"Time\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Time Slice Comparison - Load (Feb 10-18)\",\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tsam_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: docs/notebooks/optimization_input.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Optimization Input\n",
"\n",
"How to access aggregation results for energy system optimization models.\n",
"\n",
"This notebook shows how to access:\n",
"- Cluster weights (occurrence counts)\n",
"- Cluster assignments (period ordering)\n",
"- Typical period data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import pandas and the relevant time series aggregation class"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"pio.renderers.default = \"notebook_connected\"\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig, ExtremeConfig"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Input data "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read in time series from testdata.csv with pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Transform the index to a datetime index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw.index = pd.to_datetime(raw.index)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot raw data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = px.line(\n",
" raw, facet_row=\"variable\", labels={\"index\": \"Time\", \"value\": \"\"}\n",
").update_yaxes(matches=None)\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Aggregate the data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Aggregate to typical weeks, including days with minimum temperature and maximum load as extreme periods."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result = tsam.aggregate(\n",
" raw,\n",
" n_clusters=5,\n",
" period_duration=24 * 7, # Weekly periods\n",
" cluster=ClusterConfig(method=\"hierarchical\"),\n",
" extremes=ExtremeConfig(\n",
" method=\"new_cluster\",\n",
" min_value=[\"T\"],\n",
" max_value=[\"Load\"],\n",
" ),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives = result.cluster_representatives\n",
"cluster_representatives.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show the resulting order of aggregated periods"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculates how the original index is represented by the old index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Advanced: Access the internal aggregation object for features not exposed in the public API.\n",
"# Note: The _aggregation attribute is internal and may change in future versions.\n",
"index_matching = result._aggregation.indexMatching()\n",
"index_matching.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot the appearance of the 5+2 aggregated periods in the original timeframe"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"visualization_df = pd.DataFrame(\n",
" 0, index=index_matching.index, columns=result.period_index\n",
")\n",
"for col in visualization_df.columns:\n",
" visualization_df.loc[index_matching[\"PeriodNum\"] == col, col] = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = px.area(\n",
" visualization_df,\n",
" labels={\"index\": \"Time\", \"value\": \"Occurrence\"},\n",
" color_discrete_sequence=px.colors.sample_colorscale(\n",
" \"Viridis\", len(visualization_df.columns)\n",
" ),\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get input for potential energy system optimization"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**i. cluster_weights** - The occurrence count of each typical period for weighting in the objective function.\n",
"\n",
"Note: Period three is only partially evaluated since its appearance at the end of the year exceeds the original time series."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result.cluster_weights"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"weights = pd.Series(result.cluster_weights)\n",
"fig = px.bar(\n",
" x=weights.index,\n",
" y=weights.values,\n",
" labels={\"x\": \"Period index\", \"y\": \"Number of occurence\"},\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ii. Accessing period data by index**\n",
"<br>Access aggregated time series values using period and time step indices. This uses internal API methods that may change in future versions."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Advanced: Access internal dictionary-style data access.\n",
"# Note: The _aggregation attribute and its methods are internal and may change in future versions.\n",
"agg = result._aggregation\n",
"agg.clusterPeriodDict[\"GHI\"][(agg.clusterPeriodIdx[3], agg.stepIdx[12])]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively this is given as data frame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result.cluster_representatives.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**iii. cluster_assignments**\n",
"<br> The order of the typical periods to represent the original time series, e.g., to model seasonal storage."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result.cluster_assignments"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tsam_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: docs/notebooks/pareto_optimization.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"# Pareto Optimization\n",
"\n",
"Determine the optimal combination of segments and periods for time series aggregation.\n",
"\n",
"Author: Leander Kotzur"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig\n",
"from tsam.tuning import find_pareto_front\n",
"\n",
"pio.renderers.default = \"notebook_connected\"\n",
"\n",
"# Ensure results directory exists\n",
"RESULTS_DIR = Path(\"results\")\n",
"RESULTS_DIR.mkdir(exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "2",
"metadata": {},
"source": [
"## Input data\n",
"\n",
"Read in time series from testdata.csv with pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0)\n",
"raw = raw.rename(\n",
" columns={\"T\": \"Temperature\", \"Load\": \"Demand\", \"Wind\": \"Wind\", \"GHI\": \"Solar\"}\n",
")\n",
"period_duration = 24"
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"Plot the original data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"# Original data heatmaps using tsam.unstack_to_periods() with plotly\n",
"unstacked = tsam.unstack_to_periods(raw, period_duration=period_duration)\n",
"for col in raw.columns:\n",
" px.imshow(\n",
" unstacked[col].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": col},\n",
" title=f\"Original {col}\",\n",
" aspect=\"auto\",\n",
" ).show()"
]
},
{
"cell_type": "markdown",
"id": "6",
"metadata": {},
"source": [
"## Find Pareto-optimal aggregations\n",
"\n",
"Use `find_pareto_front()` to explore the Pareto-optimal combinations of periods and segments."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7",
"metadata": {},
"outputs": [],
"source": [
"pareto_results = find_pareto_front(\n",
" raw,\n",
" period_duration=period_duration,\n",
" timesteps=np.geomspace(5, 8760, 50).astype(int).tolist(),\n",
" cluster=ClusterConfig(method=\"hierarchical\", representation=\"distribution\"),\n",
" n_jobs=-1,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "8",
"metadata": {},
"source": [
"Visualize the Pareto front - the trade-off between compression and accuracy."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"metadata": {},
"outputs": [],
"source": [
"pareto_df = pd.DataFrame(\n",
" [\n",
" {\n",
" \"timesteps\": r.n_clusters * r.n_segments,\n",
" \"periods\": r.n_clusters,\n",
" \"segments\": r.n_segments,\n",
" \"rmse\": r.accuracy.rmse.mean(),\n",
" }\n",
" for r in pareto_results\n",
" ]\n",
")\n",
"\n",
"fig = px.line(\n",
" pareto_df,\n",
" x=\"timesteps\",\n",
" y=\"rmse\",\n",
" markers=True,\n",
" labels={\"timesteps\": \"Timesteps (periods x segments)\", \"rmse\": \"RMSE\"},\n",
" title=\"Pareto Front: Compression vs Accuracy\",\n",
" hover_data=[\"periods\", \"segments\"],\n",
" range_y=(0, None),\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "10",
"metadata": {},
"source": [
"Show the final result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"last_result = pareto_results[-1]\n",
"print(\n",
" f\"Final: {last_result.n_clusters} periods, {last_result.n_segments} segments, RMSE: {last_result.accuracy.rmse.mean():.4f}\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12",
"metadata": {},
"outputs": [],
"source": [
"# Reconstructed data heatmaps\n",
"reconstructed = last_result.reconstructed\n",
"unstacked_recon = tsam.unstack_to_periods(\n",
" reconstructed, period_duration=period_duration\n",
")\n",
"for col in reconstructed.columns:\n",
" px.imshow(\n",
" unstacked_recon[col].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": col},\n",
" title=f\"Reconstructed {col}\",\n",
" aspect=\"auto\",\n",
" ).show()"
]
},
{
"cell_type": "markdown",
"id": "13",
"metadata": {},
"source": [
"## Animated visualization\n",
"\n",
"Animate through all Pareto-optimal aggregations to visualize the trade-off between compression and accuracy."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14",
"metadata": {},
"outputs": [],
"source": [
"n_days = len(raw) // period_duration\n",
"n_vars = len(raw.columns)\n",
"\n",
"# Get normalization parameters from original data\n",
"raw_min = raw.min()\n",
"raw_range = raw.max() - raw.min()\n",
"\n",
"frames_data, labels = [], []\n",
"for result in reversed(pareto_results):\n",
" p, s = result.n_clusters, result.n_segments\n",
" labels.append(f\"{round((1 - s * p / len(raw)) * 100, 1)}% ({p}p x {s}s)\")\n",
"\n",
" # Normalize at DataFrame level, then reshape\n",
" reconstructed = result.reconstructed\n",
" normalized = (reconstructed - raw_min) / raw_range\n",
" data = normalized.values.reshape(n_days, period_duration, n_vars).transpose(2, 1, 0)\n",
"\n",
" frames_data.append(data.reshape(-1, n_days))\n",
"\n",
"img_stack = np.stack(frames_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15",
"metadata": {},
"outputs": [],
"source": [
"fig = px.imshow(\n",
" img_stack,\n",
" animation_frame=0,\n",
" color_continuous_scale=\"RdYlBu_r\",\n",
" aspect=\"auto\",\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\"},\n",
" title=\"Time Series Aggregation\",\n",
")\n",
"\n",
"for i, step in enumerate(fig.layout.sliders[0].steps):\n",
" step[\"label\"] = labels[i]\n",
"\n",
"tickvals = [period_duration * i + period_duration // 2 for i in range(n_vars)]\n",
"fig.update_yaxes(tickvals=tickvals, ticktext=list(raw.columns))\n",
"fig.update_layout(height=600, coloraxis_showscale=False)\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "16",
"metadata": {},
"source": [
"## Save results"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17",
"metadata": {},
"outputs": [],
"source": [
"pareto_df.to_csv(RESULTS_DIR / \"paretoOptimalAggregation.csv\")\n",
"fig.write_html(RESULTS_DIR / \"animation.html\")"
]
}
],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/notebooks/quickstart.ipynb
================================================
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Basic Example\n",
"\n",
"Example usage of the time series aggregation module (tsam).\n",
"\n",
"This notebook demonstrates:\n",
"1. Basic k-means aggregation\n",
"2. Hierarchical aggregation with extreme periods\n",
"3. Advanced aggregation with segmentation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import pandas and the relevant time series aggregation class"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig, ExtremeConfig, SegmentConfig\n",
"\n",
"pio.renderers.default = \"notebook_connected\"\n",
"\n",
"# Ensure results directory exists\n",
"RESULTS_DIR = Path(\"results\")\n",
"RESULTS_DIR.mkdir(exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Input data "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read in time series from testdata.csv with pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0, parse_dates=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show a slice of the dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show the shape of the raw input data: 4 types of timeseries (GHI, Temperature, Wind and Load) for every hour in a year"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot the original temperature data as a heatmap"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Use tsam.unstack_to_periods() with plotly for heatmaps\n",
"unstacked = tsam.unstack_to_periods(raw, period_duration=24)\n",
"px.imshow(\n",
" unstacked[\"T\"].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": \"Temperature\"},\n",
" title=\"Original Temperature\",\n",
" aspect=\"auto\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use the `aggregate()` function with k-means clustering for eight typical days."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_kmeans = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"kmeans\"),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Access the typical periods from the result object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives = result_kmeans.cluster_representatives\n",
"cluster_representatives.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show shape of typical periods: 4 types of timeseries for 8*24 hours"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f\"Shape: {cluster_representatives.shape}\")\n",
"print(\n",
" f\"Periods: {result_kmeans.n_clusters}, Timesteps per period: {result_kmeans.n_timesteps_per_period}\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save typical periods to .csv file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives.to_csv(RESULTS_DIR / \"testperiods_kmeans.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Reconstruct the original time series based on the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reconstructed = result_kmeans.reconstructed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot the repredicted data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# K-means reconstructed temperature heatmap\n",
"unstacked_kmeans = tsam.unstack_to_periods(reconstructed, period_duration=24)\n",
"px.imshow(\n",
" unstacked_kmeans[\"T\"].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": \"Temperature\"},\n",
" title=\"K-means Reconstructed Temperature\",\n",
" aspect=\"auto\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As seen, they days with the minimal temperature are excluded. In case that they are required they can be added to the aggregation as follow."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hierarchical aggregation including extreme periods"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use hierarchical clustering with extreme period preservation. This ensures the day with the minimum temperature and maximum load are included."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_hier = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\"),\n",
" extremes=ExtremeConfig(\n",
" method=\"new_cluster\",\n",
" min_value=[\"T\"], # Preserve day with minimum temperature\n",
" max_value=[\"Load\"], # Preserve day with maximum load\n",
" ),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives = result_hier.cluster_representatives\n",
"cluster_representatives.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The aggregation can also be evaluated by indicators"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# View accuracy metrics\n",
"print(result_hier.accuracy)\n",
"print(\"\\nRMSE per column:\")\n",
"print(result_hier.accuracy.rmse)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save typical periods to .csv file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cluster_representatives.to_csv(RESULTS_DIR / \"testperiods_hierarchical.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Repredict the original time series based on the typical periods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reconstructed_extremes = result_hier.reconstructed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot repredicted data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Hierarchical with extremes reconstructed temperature heatmap\n",
"unstacked_hier = tsam.unstack_to_periods(reconstructed_extremes, period_duration=24)\n",
"px.imshow(\n",
" unstacked_hier[\"T\"].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": \"Temperature\"},\n",
" title=\"Hierarchical + Extremes Reconstructed Temperature\",\n",
" aspect=\"auto\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now also the days with the minimal temperature are integrated into the typical periods."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Advanced aggregation method"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Combining hierarchical clustering with segmentation (reduced temporal resolution) and distribution-preserving representation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_advanced = tsam.aggregate(\n",
" raw,\n",
" n_clusters=24,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(\n",
" method=\"hierarchical\",\n",
" representation=\"distribution_minmax\",\n",
" ),\n",
" segments=SegmentConfig(n_segments=8),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reconstructed_advanced = result_advanced.reconstructed"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Advanced method reconstructed temperature heatmap\n",
"unstacked_adv = tsam.unstack_to_periods(reconstructed_advanced, period_duration=24)\n",
"px.imshow(\n",
" unstacked_adv[\"T\"].values.T,\n",
" labels={\"x\": \"Day\", \"y\": \"Hour\", \"color\": \"Temperature\"},\n",
" title=\"Advanced Method Reconstructed Temperature\",\n",
" aspect=\"auto\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Comparison of the aggregations \n",
"It was shown for the temperature, but both times all four time series have been aggregated. Therefore, we compare here also the duration curves of the electrical load for the original time series, the aggregation with k-mean, and the hierarchical aggregation including peak periods."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Duration curve comparison using plotly express\n",
"comparison_data = {\n",
" \"Original\": raw,\n",
" \"8 typ days\": reconstructed,\n",
" \"8 typ days + peak\": reconstructed_extremes,\n",
" \"24 typ days + 8 seg\": reconstructed_advanced,\n",
"}\n",
"\n",
"# Build long-form DataFrame for px.line\n",
"frames = []\n",
"for name, df in comparison_data.items():\n",
" sorted_vals = df[\"Load\"].sort_values(ascending=False).reset_index(drop=True)\n",
" frames.append(\n",
" pd.DataFrame(\n",
" {\"Hour\": range(len(sorted_vals)), \"Load\": sorted_vals, \"Method\": name}\n",
" )\n",
" )\n",
"long_df = pd.concat(frames, ignore_index=True)\n",
"\n",
"px.line(\n",
" long_df,\n",
" x=\"Hour\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Duration Curve Comparison - Load\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or as unsorted time series for an example week"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Time slice comparison - Load\n",
"frames = []\n",
"for name, df in comparison_data.items():\n",
" sliced = df.loc[\"20100210\":\"20100218\", [\"Load\"]].copy()\n",
" sliced[\"Method\"] = name\n",
" frames.append(sliced)\n",
"long_df = pd.concat(frames).reset_index(names=\"Time\")\n",
"\n",
"px.line(\n",
" long_df,\n",
" x=\"Time\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Time Slice Comparison - Load (Feb 10-18)\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Time slice comparison - GHI\n",
"frames = []\n",
"for name, df in comparison_data.items():\n",
" sliced = df.loc[\"20100210\":\"20100218\", [\"GHI\"]].copy()\n",
" sliced[\"Method\"] = name\n",
" frames.append(sliced)\n",
"long_df = pd.concat(frames).reset_index(names=\"Time\")\n",
"\n",
"px.line(\n",
" long_df,\n",
" x=\"Time\",\n",
" y=\"GHI\",\n",
" color=\"Method\",\n",
" title=\"Time Slice Comparison - Solar Irradiance (Feb 10-18)\",\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tsam_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: docs/notebooks/representations.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"# Representation Methods\n",
"\n",
"Comparison of different cluster representation methods: medoid, maxoid, mean, minmax, and duration.\n",
"\n",
"Author: Maximilian Hoffmann"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig\n",
"\n",
"pio.renderers.default = \"notebook_connected\""
]
},
{
"cell_type": "markdown",
"id": "2",
"metadata": {},
"source": [
"### Input data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0)\n",
"raw"
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"### Medoid representation\n",
"\n",
"Picks the actual observed period closest to each cluster centroid."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"result_medoid = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\", representation=\"medoid\"),\n",
")\n",
"result_medoid.accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
"source": [
"result_medoid.plot.cluster_members()"
]
},
{
"cell_type": "markdown",
"id": "7",
"metadata": {},
"source": [
"### Maxoid representation\n",
"\n",
"Picks the period that maximizes the sum of all column values in each cluster."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8",
"metadata": {},
"outputs": [],
"source": [
"result_maxoid = tsam.aggregate(\n",
" raw,\n",
" n_clusters=8,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\", representation=\"maxoid\"),\n",
" preserve_column_means=False,\n",
")\n",
"result_maxoid.accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"metadata": {},
"outputs": [],
"source": [
"result_maxoid.plot.cluster_members()"
]
},
{
"cell_type": "markdown",
"id": "10",
"metadata": {},
"source": [
"### Mean representation\n",
"\n",
"Averages all member periods of each cluster."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"result_mean = tsam.aggregate(\n",
" raw,\n",
" n_clusters=20,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\", representation=\"mean\"),\n",
")\n",
"result_mean.accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12",
"metadata": {},
"outputs": [],
"source": [
"result_mean.plot.cluster_members()"
]
},
{
"cell_type": "markdown",
"id": "13",
"metadata": {},
"source": [
"### MinMax Mean representation\n",
"\n",
"Like mean, but preserves the minimum and maximum values per column."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14",
"metadata": {},
"outputs": [],
"source": [
"result_minmax = tsam.aggregate(\n",
" raw,\n",
" n_clusters=20,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\", representation=\"minmax_mean\"),\n",
" preserve_column_means=False,\n",
")\n",
"result_minmax.accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15",
"metadata": {},
"outputs": [],
"source": [
"result_minmax.plot.cluster_members()"
]
},
{
"cell_type": "markdown",
"id": "16",
"metadata": {},
"source": [
"### Distribution representation\n",
"\n",
"Preserves the full value distribution (duration curve) within each cluster."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17",
"metadata": {},
"outputs": [],
"source": [
"result_duration = tsam.aggregate(\n",
" raw,\n",
" n_clusters=20,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\", representation=\"distribution\"),\n",
" preserve_column_means=False,\n",
")\n",
"result_duration.accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18",
"metadata": {},
"outputs": [],
"source": [
"result_duration.plot.cluster_members()"
]
},
{
"cell_type": "markdown",
"id": "19",
"metadata": {},
"source": [
"### Comparison\n",
"\n",
"Compare all representation methods via duration curves, heatmaps, and time slices."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "20",
"metadata": {},
"outputs": [],
"source": [
"results = {\n",
" \"Medoid (8)\": result_medoid,\n",
" \"Maxoid (8)\": result_maxoid,\n",
" \"Mean (20)\": result_mean,\n",
" \"Minmax (20)\": result_minmax,\n",
" \"Distribution (20)\": result_duration,\n",
"}\n",
"\n",
"# Duration curves\n",
"frames = []\n",
"for name, r in {\"Original\": None, **results}.items():\n",
" vals = (raw if r is None else r.reconstructed)[\"Load\"]\n",
" sorted_vals = vals.sort_values(ascending=False).reset_index(drop=True)\n",
" frames.append(\n",
" pd.DataFrame(\n",
" {\"Hour\": range(len(sorted_vals)), \"Load\": sorted_vals, \"Method\": name}\n",
" )\n",
" )\n",
"\n",
"px.line(\n",
" pd.concat(frames, ignore_index=True),\n",
" x=\"Hour\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Duration Curve Comparison - Load\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21",
"metadata": {},
"outputs": [],
"source": [
"# Heatmap comparison\n",
"param = \"GHI\"\n",
"unstacked_orig = tsam.unstack_to_periods(raw, period_duration=24)\n",
"\n",
"import plotly.graph_objects as go\n",
"from plotly.subplots import make_subplots\n",
"\n",
"labels = [\"Original\", *list(results.keys())]\n",
"data = [unstacked_orig] + [\n",
" tsam.unstack_to_periods(r.reconstructed, period_duration=24)\n",
" for r in results.values()\n",
"]\n",
"\n",
"fig = make_subplots(\n",
" rows=len(data), cols=1, subplot_titles=labels, vertical_spacing=0.03\n",
")\n",
"for i, d in enumerate(data, 1):\n",
" fig.add_trace(go.Heatmap(z=d[param].values.T, coloraxis=\"coloraxis\"), row=i, col=1)\n",
"fig.update_layout(\n",
" height=250 * len(data),\n",
" coloraxis={\"colorscale\": \"Viridis\"},\n",
" title_text=f\"Heatmap Comparison - {param}\",\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22",
"metadata": {},
"outputs": [],
"source": [
"# Time slice comparison\n",
"frames = []\n",
"for name, r in {\"Original\": None, **results}.items():\n",
" df = raw if r is None else r.reconstructed\n",
" sliced = df.loc[\"20100210\":\"20100218\", [\"Load\"]].copy()\n",
" sliced[\"Method\"] = name\n",
" frames.append(sliced)\n",
"\n",
"px.line(\n",
" pd.concat(frames).reset_index(names=\"Time\"),\n",
" x=\"Time\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Time Slice Comparison - Load (Feb 10-18)\",\n",
")"
]
},
{
"cell_type": "markdown",
"id": "23",
"metadata": {},
"source": [
"### Validation\n",
"\n",
"Column means should be preserved (except maxoid, which uses `preserve_column_means=False`)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24",
"metadata": {},
"outputs": [],
"source": [
"means = pd.DataFrame(\n",
" {\"Original\": raw.mean()}\n",
" | {name: r.reconstructed.mean() for name, r in results.items()}\n",
")\n",
"means"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tsam_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/notebooks/segmentation.ipynb
================================================
{
"cells": [
{
"cell_type": "markdown",
"id": "0",
"metadata": {},
"source": [
"# Segmentation\n",
"\n",
"How to use time step segmentation to reduce the number of timesteps per period.\n",
"\n",
"Author: Maximilian Hoffmann"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.io as pio\n",
"\n",
"import tsam\n",
"from tsam import ClusterConfig, SegmentConfig\n",
"\n",
"pio.renderers.default = \"notebook_connected\""
]
},
{
"cell_type": "markdown",
"id": "2",
"metadata": {},
"source": [
"### Input data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3",
"metadata": {},
"outputs": [],
"source": [
"raw = pd.read_csv(\"testdata.csv\", index_col=0)"
]
},
{
"cell_type": "markdown",
"id": "4",
"metadata": {},
"source": [
"### 10 typical days at hourly resolution (no segmentation)\n",
"\n",
"Baseline: hierarchical clustering with medoid representation and 24 hourly timesteps."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"result = tsam.aggregate(\n",
" raw,\n",
" n_clusters=10,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\"),\n",
")\n",
"result.accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
"source": [
"result.plot.cluster_members()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7",
"metadata": {},
"outputs": [],
"source": [
"result.cluster_representatives"
]
},
{
"cell_type": "markdown",
"id": "8",
"metadata": {},
"source": [
"### 20 typical days with 12 irregular segments\n",
"\n",
"Segmentation reduces the number of timesteps per period while preserving key transitions."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"metadata": {},
"outputs": [],
"source": [
"result_segmented = tsam.aggregate(\n",
" raw,\n",
" n_clusters=20,\n",
" period_duration=24,\n",
" cluster=ClusterConfig(method=\"hierarchical\"),\n",
" segments=SegmentConfig(n_segments=12),\n",
")\n",
"result_segmented.accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10",
"metadata": {},
"outputs": [],
"source": [
"result_segmented.plot.cluster_members()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"metadata": {},
"outputs": [],
"source": [
"result_segmented.plot.segment_durations()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12",
"metadata": {},
"outputs": [],
"source": [
"result_segmented.cluster_representatives"
]
},
{
"cell_type": "markdown",
"id": "13",
"metadata": {},
"source": [
"### Comparison"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14",
"metadata": {},
"outputs": [],
"source": [
"results = {\n",
" \"10 x 24h\": result,\n",
" \"20 x 12seg\": result_segmented,\n",
"}\n",
"\n",
"# Duration curves\n",
"frames = []\n",
"for name, r in {\"Original\": None, **results}.items():\n",
" vals = (raw if r is None else r.reconstructed)[\"Load\"]\n",
" sorted_vals = vals.sort_values(ascending=False).reset_index(drop=True)\n",
" frames.append(\n",
" pd.DataFrame(\n",
" {\"Hour\": range(len(sorted_vals)), \"Load\": sorted_vals, \"Method\": name}\n",
" )\n",
" )\n",
"\n",
"px.line(\n",
" pd.concat(frames, ignore_index=True),\n",
" x=\"Hour\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Duration Curve Comparison - Load\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15",
"metadata": {},
"outputs": [],
"source": [
"# Heatmap comparison\n",
"param = \"GHI\"\n",
"\n",
"import plotly.graph_objects as go\n",
"from plotly.subplots import make_subplots\n",
"\n",
"labels = [\"Original\", \"10 x 24h\", \"20 x 12seg\"]\n",
"data = [\n",
" tsam.unstack_to_periods(raw, period_duration=24),\n",
" tsam.unstack_to_periods(result.reconstructed, period_duration=24),\n",
" tsam.unstack_to_periods(result_segmented.reconstructed, period_duration=24),\n",
"]\n",
"\n",
"fig = make_subplots(rows=3, cols=1, subplot_titles=labels, vertical_spacing=0.05)\n",
"for i, d in enumerate(data, 1):\n",
" fig.add_trace(go.Heatmap(z=d[param].values.T, coloraxis=\"coloraxis\"), row=i, col=1)\n",
"fig.update_layout(\n",
" height=750,\n",
" coloraxis={\"colorscale\": \"Viridis\"},\n",
" title_text=f\"Heatmap Comparison - {param}\",\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16",
"metadata": {},
"outputs": [],
"source": [
"# Time slice comparison\n",
"frames = []\n",
"for name, r in {\"Original\": None, **results}.items():\n",
" df = raw if r is None else r.reconstructed\n",
" sliced = df.loc[\"20100210\":\"20100218\", [\"Load\"]].copy()\n",
" sliced[\"Method\"] = name\n",
" frames.append(sliced)\n",
"\n",
"px.line(\n",
" pd.concat(frames).reset_index(names=\"Time\"),\n",
" x=\"Time\",\n",
" y=\"Load\",\n",
" color=\"Method\",\n",
" title=\"Time Slice Comparison - Load (Feb 10-18)\",\n",
")"
]
},
{
"cell_type": "markdown",
"id": "17",
"metadata": {},
"source": [
"### Validation\n",
"\n",
"Column means should be preserved for both approaches."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18",
"metadata": {},
"outputs": [],
"source": [
"means = pd.DataFrame(\n",
" {\n",
" \"Original\": raw.mean(),\n",
" \"10 x 24h\": result.reconstructed.mean(),\n",
" \"20 x 12seg\": result_segmented.reconstructed.mean(),\n",
" }\n",
")\n",
"means"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tsam_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
================================================
FILE: docs/notebooks/testdata.csv
================================================
,GHI,T,Wind,Load
2009-12-31 23:30:00,0,-2.1,7.1,375.4783938
2010-01-01 00:30:00,0,-2.8,8.6,364.5413263
2010-01-01 01:30:00,0,-3.3,9.7,357.4168443
2010-01-01 02:30:00,0,-3.2,9.8,350.1913058
2010-01-01 03:30:00,0,-3.2,9.4,345.161449
2010-01-01 04:30:00,0,-3.2,10.0,340.678216
2010-01-01 05:30:00,0,-3.2,8.0,331.0686632
2010-01-01 06:30:00,0,-3.1,9.0,326.0985216
2010-01-01 07:30:00,2,-3.0,10.0,330.3107396000001
2010-01-01 08:30:00,28,-3.0,10.0,344.8215318
2010-01-01 09:30:00,51,-2.7,10.0,359.92947589999994
2010-01-01 10:30:00,63,-2.2,9.0,381.4085719
2010-01-01 11:30:00,63,-1.8,8.0,390.8022313
2010-01-01 12:30:00,52,-2.0,8.0,395.1384731
2010-01-01 13:30:00,30,-1.5,7.0,398.7489456
2010-01-01 14:30:00,3,-1.1,6.0,393.7282758
2010-01-01 15:30:00,0,-0.9,5.0,396.9942376
2010-01-01 16:30:00,0,-0.3,4.0,419.3047529
2010-01-01 17:30:00,0,-0.2,4.0,429.14857300000006
2010-01-01 18:30:00,0,0.1,4.0,421.65661280000006
2010-01-01 19:30:00,0,0.0,5.0,408.4963027
2010-01-01 20:30:00,0,-0.1,3.0,405.1844062
2010-01-01 21:30:00,0,0.5,4.0,400.6460514
2010-01-01 22:30:00,0,0.7,5.0,381.679587
2010-01-01 23:30:00,0,0.3,5.0,363.2827137
2010-01-02 00:30:00,0,0.0,5.0,353.374585
2010-01-02 01:30:00,0,0.5,4.0,348.9970019
2010-01-02 02:30:00,0,1.1,3.0,351.8862986
2010-01-02 03:30:00,0,1.4,6.0,360.0718737
2010-01-02 04:30:00,0,1.5,3.0,373.2229968
2010-01-02 05:30:00,0,1.5,4.0,409.8605652
2010-01-02 06:30:00,0,2.0,6.0,448.43198719999987
2010-01-02 07:30:00,3,2.7,2.0,475.1154921
2010-01-02 08:30:00,28,2.8,3.0,483.6501714
2010-01-02 09:30:00,51,2.9,3.0,490.97217289999986
2010-01-02 10:30:00,64,3.0,2.0,504.83528489999986
2010-01-02 11:30:00,64,3.4,6.0,504.1370765
2010-01-02 12:30:00,52,4.1,5.0,504.6699198
2010-01-02 13:30:00,76,4.6,5.0,504.1554504
2010-01-02 14:30:00,12,5.2,6.0,502.4191163
2010-01-02 15:30:00,0,5.1,6.0,505.3405673
2010-01-02 16:30:00,0,4.8,7.0,526.8196633
2010-01-02 17:30:00,0,4.1,5.0,529.2450188999999
2010-01-02 18:30:00,0,4.8,6.0,516.0893023
2010-01-02 19:30:00,0,4.6,6.0,487.2744243
2010-01-02 20:30:00,0,4.6,4.0,468.08747310000007
2010-01-02 21:30:00,0,4.3,6.0,449.6768194
2010-01-02 22:30:00,0,4.3,3.0,421.3166956
2010-01-02 23:30:00,0,4.7,3.0,400.9905621
2010-01-03 00:30:00,0,4.6,4.0,389.6400818
2010-01-03 01:30:00,0,4.8,5.0,386.0801375
2010-01-03 02:30:00,0,4.6,5.0,383.452669
2010-01-03 03:30:00,0,4.7,5.0,397.2285049
2010-01-03 04:30:00,0,4.0,5.0,413.7971746
2010-01-03 05:30:00,0,4.0,7.0,445.1752124
2010-01-03 06:30:00,0,3.7,7.0,483.3837498
2010-01-03 07:30:00,3,3.6,4.0,507.15499050000005
2010-01-03 08:30:00,29,3.4,4.0,517.311167
2010-01-03 09:30:00,51,3.5,4.0,528.9418495
2010-01-03 10:30:00,121,3.1,5.0,538.8545717000002
2010-01-03 11:30:00,193,3.4,4.0,542.5615072
2010-01-03 12:30:00,100,3.8,4.0,539.4425367
2010-01-03 13:30:00,31,3.1,4.0,529.5527819
2010-01-03 14:30:00,4,3.4,4.0,524.1278861000002
2010-01-03 15:30:00,0,2.6,3.0,522.9014279
2010-01-03 16:30:00,0,2.1,2.0,538.1196155
2010-01-03 17:30:00,0,1.9,3.0,539.9478191000002
2010-01-03 18:30:00,0,1.4,3.0,526.2914135999998
2010-01-03 19:30:00,0,1.0,3.0,493.6639501
2010-01-03 20:30:00,0,0.6,3.0,480.8159964
2010-01-03 21:30:00,0,0.2,2.0,461.261567
2010-01-03 22:30:00,0,0.1,4.0,431.0548657
2010-01-03 23:30:00,0,0.3,4.0,397.4949265
2010-01-04 00:30:00,0,0.8,6.0,383.5904732
2010-01-04 01:30:00,0,1.1,8.0,379.837603
2010-01-04 02:30:00,0,1.2,10.0,380.4026006
2010-01-04 03:30:00,0,1.1,9.0,389.0934581
2010-01-04 04:30:00,0,1.3,9.0,405.09713010000013
2010-01-04 05:30:00,0,1.3,8.0,441.3810008
2010-01-04 06:30:00,0,1.6,9.0,469.6400681
2010-01-04 07:30:00,3,1.7,9.0,493.5169589
2010-01-04 08:30:00,29,1.9,8.0,500.02591500000005
2010-01-04 09:30:00,52,2.3,9.0,511.5693214
2010-01-04 10:30:00,64,2.5,8.0,522.1802521000002
2010-01-04 11:30:00,65,2.3,8.0,526.7461677
2010-01-04 12:30:00,53,2.0,8.0,521.7392784
2010-01-04 13:30:00,31,2.4,7.0,512.7176906000002
2010-01-04 14:30:00,5,2.7,8.0,509.6722157
2010-01-04 15:30:00,0,3.2,5.0,507.2927948
2010-01-04 16:30:00,0,4.3,7.0,518.188521
2010-01-04 17:30:00,0,4.7,6.0,510.9400152
2010-01-04 18:30:00,0,4.7,5.0,495.7815428
2010-01-04 19:30:00,0,4.3,7.0,466.746178
2010-01-04 20:30:00,0,4.0,6.0,457.0585361
2010-01-04 21:30:00,0,4.7,8.0,441.3075052
2010-01-04 22:30:00,0,4.0,6.0,411.3029168
2010-01-04 23:30:00,0,3.6,7.0,392.4696633
2010-01-05 00:30:00,0,3.4,4.0,376.0801392
2010-01-05 01:30:00,0,3.4,5.0,366.9712753
2010-01-05 02:30:00,0,3.3,5.0,363.7925896
2010-01-05 03:30:00,0,3.5,6.0,366.8977797
2010-01-05 04:30:00,0,3.7,6.0,364.9409587
2010-01-05 05:30:00,0,3.7,8.0,363.043853
2010-01-05 06:30:00,0,4.0,8.0,373.6639706
2010-01-05 07:30:00,3,4.2,8.0,399.4701214
2010-01-05 08:30:00,29,4.1,10.0,419.663044
2010-01-05 09:30:00,52,3.4,11.0,437.0998808
2010-01-05 10:30:00,65,3.6,10.0,446.530288
2010-01-05 11:30:00,65,3.5,10.0,446.3098011
2010-01-05 12:30:00,54,4.0,8.0,445.8320795
2010-01-05 13:30:00,32,4.7,8.0,438.5284519
2010-01-05 14:30:00,5,5.6,10.0,435.7172443
2010-01-05 15:30:00,0,6.2,9.0,442.1756723
2010-01-05 16:30:00,0,6.7,9.0,459.56657419999993
2010-01-05 17:30:00,0,7.2,10.0,462.80956860000015
2010-01-05 18:30:00,0,7.7,8.0,456.0617517
2010-01-05 19:30:00,0,7.9,9.0,438.9740192
2010-01-
gitextract_t_0zf53m/
├── .github/
│ ├── pull_request_template.md
│ ├── renovate.json
│ └── workflows/
│ ├── ci-develop.yaml
│ ├── ci-master.yaml
│ ├── coverage.yaml
│ ├── pr-title.yaml
│ ├── publish.yaml
│ ├── push_to_jugit.yml
│ └── release.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .release-please-config.json
├── .release-please-manifest.json
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── README.md
├── benchmarks/
│ ├── bench.py
│ └── benchmark_tuning.py
├── codecov.yml
├── docs/
│ ├── api/
│ │ ├── SUMMARY.md
│ │ └── tsam/
│ │ ├── api.md
│ │ ├── config.md
│ │ ├── exceptions.md
│ │ ├── hyperparametertuning.md
│ │ ├── periodAggregation.md
│ │ ├── plot.md
│ │ ├── representations.md
│ │ ├── result.md
│ │ ├── timeseriesaggregation.md
│ │ ├── tuning.md
│ │ └── utils/
│ │ ├── durationRepresentation.md
│ │ ├── k_maxoids.md
│ │ ├── k_medoids_contiguity.md
│ │ ├── k_medoids_exact.md
│ │ └── segmentation.md
│ ├── background/
│ │ └── math.md
│ ├── further-reading.md
│ ├── gen_ref_pages.py
│ ├── getting-started.md
│ ├── glossary.md
│ ├── index.md
│ ├── installation.md
│ ├── javascripts/
│ │ └── mathjax.js
│ ├── legal-notice.md
│ ├── migration-guide.md
│ ├── notebooks/
│ │ ├── building_energy_system.ipynb
│ │ ├── clustering_methods.ipynb
│ │ ├── clustering_transfer.ipynb
│ │ ├── disaggregation.ipynb
│ │ ├── k_maxoids.ipynb
│ │ ├── optimization_input.ipynb
│ │ ├── pareto_optimization.ipynb
│ │ ├── quickstart.ipynb
│ │ ├── representations.ipynb
│ │ ├── segmentation.ipynb
│ │ ├── testdata.csv
│ │ ├── tuning.ipynb
│ │ └── visualization.ipynb
│ ├── overrides/
│ │ ├── .gitkeep
│ │ └── partials/
│ │ └── footer.html
│ └── stylesheets/
│ └── extra.css
├── environment.yml
├── mkdocs.yml
├── pyproject.toml
├── src/
│ └── tsam/
│ ├── __init__.py
│ ├── api.py
│ ├── config.py
│ ├── exceptions.py
│ ├── hyperparametertuning.py
│ ├── periodAggregation.py
│ ├── plot.py
│ ├── py.typed
│ ├── representations.py
│ ├── result.py
│ ├── timeseriesaggregation.py
│ ├── tuning.py
│ └── utils/
│ ├── __init__.py
│ ├── durationRepresentation.py
│ ├── k_maxoids.py
│ ├── k_medoids_contiguity.py
│ ├── k_medoids_exact.py
│ └── segmentation.py
└── test/
├── _configs.py
├── _old_new_equivalence.py
├── conftest.py
├── data/
│ ├── clustering_e2e/
│ │ ├── expected_contiguous_medoid_8clusters.csv
│ │ ├── expected_hierarchical_distribution_8clusters.csv
│ │ ├── expected_hierarchical_mean_8clusters.csv
│ │ ├── expected_hierarchical_medoid_8clusters.csv
│ │ ├── expected_hierarchical_medoid_8clusters_12segments.csv
│ │ ├── expected_hierarchical_medoid_8clusters_12segments_extremes_append.csv
│ │ ├── expected_hierarchical_medoid_8clusters_12segments_extremes_replace.csv
│ │ ├── expected_hierarchical_medoid_8clusters_6segments.csv
│ │ ├── expected_hierarchical_medoid_8clusters_6segments_extremes_newcluster.csv
│ │ ├── expected_hierarchical_medoid_8clusters_extremes_append.csv
│ │ ├── expected_hierarchical_medoid_8clusters_extremes_newcluster.csv
│ │ ├── expected_hierarchical_medoid_8clusters_extremes_replace.csv
│ │ ├── expected_kmaxoids_maxoid_8clusters.csv
│ │ ├── expected_kmeans_mean_8clusters.csv
│ │ ├── expected_kmedoids_medoid_8clusters.csv
│ │ ├── meta_contiguous_medoid_8clusters.json
│ │ ├── meta_hierarchical_distribution_8clusters.json
│ │ ├── meta_hierarchical_mean_8clusters.json
│ │ ├── meta_hierarchical_medoid_8clusters.json
│ │ ├── meta_hierarchical_medoid_8clusters_12segments.json
│ │ ├── meta_hierarchical_medoid_8clusters_12segments_extremes_append.json
│ │ ├── meta_hierarchical_medoid_8clusters_12segments_extremes_replace.json
│ │ ├── meta_hierarchical_medoid_8clusters_6segments.json
│ │ ├── meta_hierarchical_medoid_8clusters_6segments_extremes_newcluster.json
│ │ ├── meta_hierarchical_medoid_8clusters_extremes_append.json
│ │ ├── meta_hierarchical_medoid_8clusters_extremes_newcluster.json
│ │ ├── meta_hierarchical_medoid_8clusters_extremes_replace.json
│ │ ├── meta_kmaxoids_maxoid_8clusters.json
│ │ ├── meta_kmeans_mean_8clusters.json
│ │ └── meta_kmedoids_medoid_8clusters.json
│ ├── golden/
│ │ ├── averaging/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── averaging_segmentation/
│ │ │ └── testdata.csv
│ │ ├── contiguous/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── contiguous_extremes_append/
│ │ │ └── testdata.csv
│ │ ├── contiguous_segmentation/
│ │ │ └── testdata.csv
│ │ ├── distribution_global/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── distribution_minmax_global/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_append/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_constant/
│ │ │ └── constant.csv
│ │ ├── extremes_max_period/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_min_period/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_min_value/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_multi/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_new_cluster/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_replace/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_replace_segmentation/
│ │ │ └── testdata.csv
│ │ ├── extremes_wide_multi/
│ │ │ └── wide.csv
│ │ ├── extremes_with_segmentation/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ ├── extremes_zero_column/
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_default/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_distribution/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_distribution_minmax/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_duration_curves/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_eval_sum_periods/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_maxoid/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_mean/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_no_rescale/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_rescale_exclude/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_round/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_segmentation/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── hierarchical_weighted/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_duration_curves/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_extremes/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_no_rescale/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_rescale_exclude/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_samemean/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation_distribution/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation_extremes/
│ │ │ └── testdata.csv
│ │ ├── hierarchical_weighted_segmentation_samemean/
│ │ │ └── testdata.csv
│ │ ├── kmaxoids/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── kmaxoids_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmaxoids_weighted/
│ │ │ └── testdata.csv
│ │ ├── kmeans/
│ │ │ ├── constant.csv
│ │ │ ├── testdata.csv
│ │ │ ├── wide.csv
│ │ │ └── with_zero_column.csv
│ │ ├── kmeans_distribution/
│ │ │ └── testdata.csv
│ │ ├── kmeans_duration_curves/
│ │ │ └── testdata.csv
│ │ ├── kmeans_extremes_append/
│ │ │ └── testdata.csv
│ │ ├── kmeans_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmeans_weighted/
│ │ │ └── testdata.csv
│ │ ├── kmeans_weighted_distribution/
│ │ │ └── testdata.csv
│ │ ├── kmeans_weighted_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmedoids/
│ │ │ └── testdata.csv
│ │ ├── kmedoids_segmentation/
│ │ │ └── testdata.csv
│ │ ├── kmedoids_weighted/
│ │ │ └── testdata.csv
│ │ ├── minmaxmean/
│ │ │ └── testdata.csv
│ │ ├── segmentation_distribution_global/
│ │ │ ├── testdata.csv
│ │ │ └── with_zero_column.csv
│ │ └── segmentation_samemean/
│ │ └── testdata.csv
│ ├── preprocessed_wind.csv
│ ├── testperiods_hierarchical.csv
│ ├── testperiods_kmedoids.csv
│ ├── testperiods_predefClusterOrder.csv
│ ├── testperiods_predefClusterOrderAndClusterCenters.csv
│ ├── testperiods_segmentation.csv
│ └── wide.csv
├── generate_golden.py
├── same_cluster_as_input_data.py
├── test_accuracyIndicators.py
├── test_adjacent_periods.py
├── test_aggregate_hiearchical.py
├── test_api_equivalence.py
├── test_assert_raises.py
├── test_averaging.py
├── test_cluster_order.py
├── test_clustering_e2e.py
├── test_disaggregate.py
├── test_durationCurve.py
├── test_durationRepresentation.py
├── test_extremePeriods.py
├── test_golden_regression.py
├── test_hierarchical.py
├── test_hypertuneAggregation.py
├── test_k_maxoids.py
├── test_k_medoids.py
├── test_k_medoids_contiguity.py
├── test_minmaxRepresentation.py
├── test_new_api.py
├── test_plot.py
├── test_preprocess.py
├── test_properties.py
├── test_reconstruct_samemean_segmentation.py
├── test_samemean.py
├── test_segmentation.py
├── test_segmentation_weight_bug.py
├── test_subhourlyResolution.py
├── test_subhourly_periods.py
└── test_weightingFactors.py
SYMBOL INDEX (433 symbols across 53 files)
FILE: benchmarks/bench.py
function _run (line 47) | def _run(case) -> None:
function test_bench (line 62) | def test_bench(case, benchmark):
FILE: benchmarks/benchmark_tuning.py
function main (line 27) | def main() -> None:
FILE: src/tsam/__init__.py
function __getattr__ (line 47) | def __getattr__(name: str):
FILE: src/tsam/api.py
function _weighted_mean (line 28) | def _weighted_mean(
function _weighted_rms (line 53) | def _weighted_rms(
function _parse_duration_hours (line 83) | def _parse_duration_hours(value: int | float | str, param_name: str) -> ...
function aggregate (line 109) | def aggregate(
function _build_clustering_result (line 426) | def _build_clustering_result(
function _apply_representation_params (line 530) | def _apply_representation_params(
function _build_old_params (line 569) | def _build_old_params(
function unstack_to_periods (line 678) | def unstack_to_periods(
FILE: src/tsam/config.py
class Distribution (line 44) | class Distribution:
method to_dict (line 60) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 70) | def from_dict(cls, data: dict) -> Distribution:
class MinMaxMean (line 79) | class MinMaxMean:
method to_dict (line 95) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 105) | def from_dict(cls, data: dict) -> MinMaxMean:
function _resolve_representation (line 117) | def _resolve_representation(rep: Representation) -> Representation:
function _representation_to_dict (line 136) | def _representation_to_dict(rep: Representation) -> str | dict[str, Any]:
function _representation_from_dict (line 143) | def _representation_from_dict(data: str | dict) -> Representation:
function _time_index_to_dict (line 156) | def _time_index_to_dict(idx: pd.DatetimeIndex) -> dict[str, Any] | list[...
function _time_index_from_dict (line 168) | def _time_index_from_dict(
class ClusterConfig (line 178) | class ClusterConfig:
method __post_init__ (line 248) | def __post_init__(self) -> None:
method get_representation (line 258) | def get_representation(self) -> Representation:
method to_dict (line 274) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 292) | def from_dict(cls, data: dict) -> ClusterConfig:
class SegmentConfig (line 310) | class SegmentConfig:
method __post_init__ (line 336) | def __post_init__(self) -> None:
method to_dict (line 342) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 350) | def from_dict(cls, data: dict) -> SegmentConfig:
function _validate_disaggregate_input (line 359) | def _validate_disaggregate_input(
function _expand_segments_to_timesteps (line 423) | def _expand_segments_to_timesteps(
function _expand_periods (line 470) | def _expand_periods(
class ClusteringResult (line 502) | class ClusteringResult:
method __post_init__ (line 608) | def __post_init__(self) -> None:
method n_clusters (line 623) | def n_clusters(self) -> int:
method n_original_periods (line 628) | def n_original_periods(self) -> int:
method n_segments (line 633) | def n_segments(self) -> int | None:
method __repr__ (line 639) | def __repr__(self) -> str:
method to_dataframe (line 664) | def to_dataframe(self) -> pd.DataFrame:
method segment_dataframe (line 688) | def segment_dataframe(self) -> pd.DataFrame | None:
method to_dict (line 712) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 754) | def from_dict(cls, data: dict) -> ClusteringResult:
method to_json (line 800) | def to_json(self, path: str) -> None:
method from_json (line 839) | def from_json(cls, path: str) -> ClusteringResult:
method disaggregate (line 862) | def disaggregate(self, data: pd.DataFrame) -> pd.DataFrame:
method apply (line 929) | def apply(
class ExtremeConfig (line 1164) | class ExtremeConfig:
method has_extremes (line 1203) | def has_extremes(self) -> bool:
method to_dict (line 1209) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 1225) | def from_dict(cls, data: dict) -> ExtremeConfig:
FILE: src/tsam/exceptions.py
class LegacyAPIWarning (line 4) | class LegacyAPIWarning(FutureWarning):
FILE: src/tsam/hyperparametertuning.py
function getNoPeriodsForDataReduction (line 11) | def getNoPeriodsForDataReduction(noRawTimeSteps, segmentsPerPeriod, data...
function getNoSegmentsForDataReduction (line 38) | def getNoSegmentsForDataReduction(noRawTimeSteps, typicalPeriods, dataRe...
class HyperTunedAggregations (line 65) | class HyperTunedAggregations:
method __init__ (line 66) | def __init__(self, base_aggregation, saveAggregationHistory=True):
method _testAggregation (line 106) | def _testAggregation(self, noTypicalPeriods, noSegments):
method _deleteTestHistory (line 131) | def _deleteTestHistory(self, index):
method identifyOptimalSegmentPeriodCombination (line 142) | def identifyOptimalSegmentPeriodCombination(self, dataReduction):
method identifyParetoOptimalAggregation (line 212) | def identifyParetoOptimalAggregation(self, untilTotalTimeSteps=None):
FILE: src/tsam/periodAggregation.py
function aggregatePeriods (line 6) | def aggregatePeriods(
FILE: src/tsam/plot.py
function _validate_columns (line 45) | def _validate_columns(
function _duration_curve_figure (line 94) | def _duration_curve_figure(
class ResultPlotAccessor (line 125) | class ResultPlotAccessor:
method __init__ (line 140) | def __init__(self, result: AggregationResult):
method cluster_representatives (line 143) | def cluster_representatives(
method cluster_members (line 193) | def cluster_members(
method cluster_weights (line 448) | def cluster_weights(self, title: str = "Cluster Weights") -> go.Figure:
method accuracy (line 482) | def accuracy(self, title: str = "Accuracy Metrics") -> go.Figure:
method segment_durations (line 522) | def segment_durations(self, title: str = "Segment Durations") -> go.Fi...
method compare (line 581) | def compare(
method residuals (line 666) | def residuals(
FILE: src/tsam/representations.py
function representations (line 7) | def representations(
function maxoidRepresentation (line 58) | def maxoidRepresentation(candidates, clusterOrder):
function medoidRepresentation (line 83) | def medoidRepresentation(candidates, clusterOrder):
function meanRepresentation (line 108) | def meanRepresentation(candidates, clusterOrder):
function minmaxmeanRepresentation (line 129) | def minmaxmeanRepresentation(
FILE: src/tsam/result.py
class AccuracyMetrics (line 19) | class AccuracyMetrics:
method summary (line 63) | def summary(self) -> pd.DataFrame:
method __repr__ (line 83) | def __repr__(self) -> str:
class AggregationResult (line 100) | class AggregationResult:
method n_clusters (line 176) | def n_clusters(self) -> int:
method n_segments (line 186) | def n_segments(self) -> int | None:
method cluster_assignments (line 191) | def cluster_assignments(self) -> np.ndarray:
method __repr__ (line 199) | def __repr__(self) -> str:
method original (line 211) | def original(self) -> pd.DataFrame:
method reconstructed (line 228) | def reconstructed(self) -> pd.DataFrame:
method disaggregate (line 247) | def disaggregate(self, data: pd.DataFrame) -> pd.DataFrame:
method residuals (line 280) | def residuals(self) -> pd.DataFrame:
method to_dict (line 297) | def to_dict(self) -> dict:
method timestep_index (line 327) | def timestep_index(self) -> list[int]:
method period_index (line 340) | def period_index(self) -> list[int]:
method assignments (line 354) | def assignments(self) -> pd.DataFrame:
method plot (line 430) | def plot(self) -> ResultPlotAccessor:
FILE: src/tsam/timeseriesaggregation.py
function unstackToPeriods (line 27) | def unstackToPeriods(timeSeries, timeStepsPerPeriod):
class TimeSeriesAggregation (line 88) | class TimeSeriesAggregation:
method __init__ (line 119) | def __init__(
method _check_init_args (line 376) | def _check_init_args(self):
method _normalizeTimeSeries (line 575) | def _normalizeTimeSeries(self, sameMean=False):
method _unnormalizeTimeSeries (line 598) | def _unnormalizeTimeSeries(self, normalizedTimeSeries, sameMean=False):
method _preProcessTimeSeries (line 627) | def _preProcessTimeSeries(self):
method _postProcessTimeSeries (line 665) | def _postProcessTimeSeries(self, normalizedTimeSeries, applyWeighting=...
method _addExtremePeriods (line 686) | def _addExtremePeriods(
method _append_col_with (line 877) | def _append_col_with(self, column, append_with=" max."):
method _rescaleClusterPeriods (line 887) | def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extreme...
method _clusterSortedPeriods (line 986) | def _clusterSortedPeriods(
method createTypicalPeriods (line 1057) | def createTypicalPeriods(self):
method prepareEnersysInput (line 1244) | def prepareEnersysInput(self):
method stepIdx (line 1256) | def stepIdx(self):
method clusterPeriodIdx (line 1266) | def clusterPeriodIdx(self):
method clusterOrder (line 1275) | def clusterOrder(self):
method clusterPeriodNoOccur (line 1285) | def clusterPeriodNoOccur(self):
method clusterPeriodDict (line 1294) | def clusterPeriodDict(self):
method segmentDurationDict (line 1307) | def segmentDurationDict(self):
method predictOriginalData (line 1334) | def predictOriginalData(self):
method indexMatching (line 1381) | def indexMatching(self):
method accuracyIndicators (line 1428) | def accuracyIndicators(self):
method totalAccuracyIndicators (line 1462) | def totalAccuracyIndicators(self):
FILE: src/tsam/tuning.py
class _AggregateOpts (line 36) | class _AggregateOpts(TypedDict):
function _test_single_config_file (line 53) | def _test_single_config_file(
function _infer_temporal_resolution (line 112) | def _infer_temporal_resolution(data: pd.DataFrame) -> float:
function _parallel_context (line 131) | def _parallel_context(
function _test_configs (line 168) | def _test_configs(
function _get_n_workers (line 250) | def _get_n_workers(n_jobs: int | None) -> int:
class TuningResult (line 271) | class TuningResult:
method summary (line 309) | def summary(self) -> pd.DataFrame:
method find_by_timesteps (line 316) | def find_by_timesteps(self, target: int) -> AggregationResult:
method find_by_rmse (line 342) | def find_by_rmse(self, threshold: float) -> AggregationResult:
method plot (line 374) | def plot(self, show_labels: bool = True, **kwargs: object) -> object:
method __len__ (line 406) | def __len__(self) -> int:
method __getitem__ (line 409) | def __getitem__(self, index: int) -> AggregationResult:
method __iter__ (line 412) | def __iter__(self):
function find_clusters_for_reduction (line 416) | def find_clusters_for_reduction(
function find_segments_for_reduction (line 445) | def find_segments_for_reduction(
function find_optimal_combination (line 474) | def find_optimal_combination(
function find_pareto_front (line 658) | def find_pareto_front(
function _find_pareto_front_targeted (line 814) | def _find_pareto_front_targeted(
function _find_pareto_front_steepest (line 909) | def _find_pareto_front_steepest(
FILE: src/tsam/utils/durationRepresentation.py
function durationRepresentation (line 9) | def durationRepresentation(
function _representMinMax (line 151) | def _representMinMax(
FILE: src/tsam/utils/k_maxoids.py
class KMaxoids (line 10) | class KMaxoids(BaseEstimator, ClusterMixin, TransformerMixin):
method __init__ (line 21) | def __init__(
method _check_init_args (line 30) | def _check_init_args(self):
method fit (line 54) | def fit(self, X, y=None):
method _check_array (line 77) | def _check_array(self, X):
method k_maxoids (line 92) | def k_maxoids(self, X, k, numpasses=5, doLogarithmic=False, n_init=100):
FILE: src/tsam/utils/k_medoids_contiguity.py
function k_medoids_contiguity (line 20) | def k_medoids_contiguity(
function _contiguity_to_graph (line 115) | def _contiguity_to_graph(adjacency, distances=None):
FILE: src/tsam/utils/k_medoids_exact.py
class KMedoids (line 15) | class KMedoids(BaseEstimator, ClusterMixin, TransformerMixin):
method __init__ (line 35) | def __init__(
method _check_init_args (line 53) | def _check_init_args(self):
method fit (line 77) | def fit(self, X, y=None):
method _check_array (line 117) | def _check_array(self, X):
method _k_medoids_exact (line 132) | def _k_medoids_exact(self, distances, n_clusters):
function _setup_k_medoids (line 151) | def _setup_k_medoids(distances, n_clusters):
function _solve_given_pyomo_model (line 205) | def _solve_given_pyomo_model(M, solver="highs"):
FILE: src/tsam/utils/segmentation.py
function segmentation (line 8) | def segmentation(
FILE: test/_configs.py
function _make_constant (line 37) | def _make_constant() -> pd.DataFrame:
function _make_with_zero_column (line 43) | def _make_with_zero_column() -> pd.DataFrame:
function get_data (line 60) | def get_data(name: str, max_timesteps: int | None = None) -> pd.DataFrame:
class BaseConfig (line 83) | class BaseConfig:
class OldCase (line 743) | class OldCase:
function build_old_cases (line 753) | def build_old_cases(configs: list[BaseConfig] | None = None) -> list[Old...
function case_ids (line 779) | def case_ids(cases: list) -> list[str]:
FILE: test/_old_new_equivalence.py
class EquivalenceCase (line 469) | class EquivalenceCase:
function _build_cases (line 481) | def _build_cases() -> list[EquivalenceCase]:
function _run_old (line 518) | def _run_old(data: pd.DataFrame, case: EquivalenceCase):
function _run_new (line 529) | def _run_new(data: pd.DataFrame, case: EquivalenceCase):
function _suppress_windows_kmeans_warnings (line 537) | def _suppress_windows_kmeans_warnings(case: EquivalenceCase):
class TestOldNewEquivalence (line 565) | class TestOldNewEquivalence:
method test_cluster_representatives (line 569) | def test_cluster_representatives(self, case: EquivalenceCase):
method test_cluster_assignments (line 583) | def test_cluster_assignments(self, case: EquivalenceCase):
method test_accuracy (line 596) | def test_accuracy(self, case: EquivalenceCase):
method test_reconstruction (line 617) | def test_reconstruction(self, case: EquivalenceCase):
FILE: test/conftest.py
function pytest_addoption (line 26) | def pytest_addoption(parser):
function update_golden (line 36) | def update_golden(request):
FILE: test/generate_golden.py
function main (line 28) | def main() -> None:
FILE: test/same_cluster_as_input_data.py
function input_data (line 31) | def input_data() -> pd.DataFrame:
function test_same_cluster_as_input_data (line 58) | def test_same_cluster_as_input_data(
FILE: test/test_accuracyIndicators.py
function test_accuracyIndicators (line 8) | def test_accuracyIndicators():
FILE: test/test_adjacent_periods.py
function test_adjacent_periods (line 10) | def test_adjacent_periods():
FILE: test/test_aggregate_hiearchical.py
function test_aggregate_hiearchical (line 8) | def test_aggregate_hiearchical():
FILE: test/test_api_equivalence.py
function sample_data (line 31) | def sample_data():
function small_data (line 37) | def small_data(sample_data):
class TestAggregateEquivalence (line 42) | class TestAggregateEquivalence:
method test_hierarchical_default (line 45) | def test_hierarchical_default(self, sample_data):
method test_kmeans (line 88) | def test_kmeans(self, sample_data):
method test_hierarchical_with_medoid (line 125) | def test_hierarchical_with_medoid(self, sample_data):
method test_with_weights (line 151) | def test_with_weights(self, sample_data):
method test_with_segmentation (line 180) | def test_with_segmentation(self, sample_data):
method test_with_duration_curves (line 208) | def test_with_duration_curves(self, sample_data):
method test_with_extremes_append (line 234) | def test_with_extremes_append(self, sample_data):
method test_contiguous_clustering (line 262) | def test_contiguous_clustering(self, sample_data):
method test_rescale_off (line 287) | def test_rescale_off(self, sample_data):
method test_distribution_minmax_representation (line 314) | def test_distribution_minmax_representation(self, sample_data):
class TestRepresentationObjects (line 343) | class TestRepresentationObjects:
method test_distribution_global_equivalence (line 346) | def test_distribution_global_equivalence(self, sample_data):
method test_distribution_cluster_equivalence (line 373) | def test_distribution_cluster_equivalence(self, sample_data):
method test_distribution_minmax_global_equivalence (line 400) | def test_distribution_minmax_global_equivalence(self, sample_data):
method test_minmaxmean_equivalence (line 427) | def test_minmaxmean_equivalence(self, sample_data):
method test_segment_distribution_global_equivalence (line 457) | def test_segment_distribution_global_equivalence(self, sample_data):
method test_segment_distribution_global_roundtrip (line 487) | def test_segment_distribution_global_roundtrip(self, sample_data, tmp_...
method test_representation_object_json_roundtrip (line 509) | def test_representation_object_json_roundtrip(self, sample_data, tmp_p...
class TestTuningEquivalence (line 537) | class TestTuningEquivalence:
method test_find_clusters_for_reduction (line 540) | def test_find_clusters_for_reduction(self):
method test_find_segments_for_reduction (line 561) | def test_find_segments_for_reduction(self):
method test_find_optimal_combination (line 577) | def test_find_optimal_combination(self, sample_data):
method test_find_pareto_front (line 617) | def test_find_pareto_front(self, small_data):
method test_find_optimal_combination_save_all_results (line 655) | def test_find_optimal_combination_save_all_results(self, small_data):
class TestSubhourlyResolution (line 674) | class TestSubhourlyResolution:
method test_15min_resolution (line 677) | def test_15min_resolution(self):
method test_tuning_with_15min_resolution (line 714) | def test_tuning_with_15min_resolution(self):
class TestReconstructionEquivalence (line 740) | class TestReconstructionEquivalence:
method test_reconstruct_matches_old_predict (line 743) | def test_reconstruct_matches_old_predict(self, sample_data):
FILE: test/test_assert_raises.py
function test_assert_raises (line 11) | def test_assert_raises():
FILE: test/test_averaging.py
function test_averaging (line 10) | def test_averaging():
FILE: test/test_cluster_order.py
function test_cluster_order (line 10) | def test_cluster_order():
FILE: test/test_clustering_e2e.py
function _suppress_windows_kmeans_warnings (line 31) | def _suppress_windows_kmeans_warnings(case_id: str):
function set_random_seed (line 48) | def set_random_seed():
class ClusteringTestCase (line 60) | class ClusteringTestCase(NamedTuple):
function get_test_ids (line 176) | def get_test_ids():
function input_data (line 182) | def input_data():
function fixtures_dir (line 188) | def fixtures_dir():
function run_aggregation (line 193) | def run_aggregation(data: pd.DataFrame, test_case: ClusteringTestCase):
class TestClusteringE2E (line 230) | class TestClusteringE2E:
method test_cluster_representatives (line 234) | def test_cluster_representatives(
method test_cluster_weights (line 265) | def test_cluster_weights(
method test_accuracy_metrics (line 298) | def test_accuracy_metrics(
function get_transfer_test_ids (line 349) | def get_transfer_test_ids():
class TestClusteringTransfer (line 354) | class TestClusteringTransfer:
method test_apply_produces_identical_results (line 360) | def test_apply_produces_identical_results(
method test_json_roundtrip_produces_identical_results (line 382) | def test_json_roundtrip_produces_identical_results(
method test_reconstruction_shape (line 410) | def test_reconstruction_shape(self, test_case: ClusteringTestCase, inp...
method test_apply_to_different_columns (line 422) | def test_apply_to_different_columns(self, input_data):
method test_segmentation_preserved_in_transfer (line 441) | def test_segmentation_preserved_in_transfer(self, input_data, tmp_path):
function generate_fixtures (line 472) | def generate_fixtures(output_dir: Path | None = None):
FILE: test/test_disaggregate.py
function sample_data (line 12) | def sample_data():
function result (line 17) | def result(sample_data):
function result_segmented (line 22) | def result_segmented(sample_data):
class TestAggregationResultDisaggregate (line 26) | class TestAggregationResultDisaggregate:
method test_shape_matches_original (line 29) | def test_shape_matches_original(self, result):
method test_datetime_index_restored (line 33) | def test_datetime_index_restored(self, result):
method test_matches_reconstructed (line 37) | def test_matches_reconstructed(self, result):
method test_columns_preserved (line 43) | def test_columns_preserved(self, result):
method test_segmented_shape_matches_original (line 47) | def test_segmented_shape_matches_original(self, result_segmented):
method test_segmented_datetime_index (line 53) | def test_segmented_datetime_index(self, result_segmented):
method test_segmented_has_nan (line 59) | def test_segmented_has_nan(self, result_segmented):
method test_segmented_ffill_removes_nan (line 66) | def test_segmented_ffill_removes_nan(self, result_segmented):
method test_arbitrary_data (line 74) | def test_arbitrary_data(self, result):
method test_subset_columns (line 83) | def test_subset_columns(self, result):
method test_multiindex_columns (line 89) | def test_multiindex_columns(self, result):
class TestClusteringResultDisaggregate (line 105) | class TestClusteringResultDisaggregate:
method test_datetime_index_restored (line 108) | def test_datetime_index_restored(self, result):
method test_integer_index_when_no_time_index (line 116) | def test_integer_index_when_no_time_index(self, result):
method test_shape (line 124) | def test_shape(self, result):
method test_segmented (line 130) | def test_segmented(self, result_segmented):
method test_segmented_nan_count (line 138) | def test_segmented_nan_count(self, result_segmented):
method test_timestep_input_on_segmented_clustering_raises (line 149) | def test_timestep_input_on_segmented_clustering_raises(self, result_se...
method test_segmented_input_on_nonsegmented_clustering_raises (line 162) | def test_segmented_input_on_nonsegmented_clustering_raises(self, result):
method test_io_roundtrip (line 176) | def test_io_roundtrip(self, result, tmp_path):
method test_io_roundtrip_preserves_time_index (line 185) | def test_io_roundtrip_preserves_time_index(self, result, tmp_path):
method test_io_roundtrip_no_time_index (line 198) | def test_io_roundtrip_no_time_index(self, tmp_path):
method test_io_roundtrip_segmented (line 211) | def test_io_roundtrip_segmented(self, result_segmented, tmp_path):
class TestTimeIndexSerialization (line 228) | class TestTimeIndexSerialization:
method test_regular_index_compact (line 231) | def test_regular_index_compact(self):
method test_regular_index_roundtrip (line 240) | def test_regular_index_roundtrip(self):
method test_irregular_index_fallback (line 247) | def test_irregular_index_fallback(self):
method test_old_list_format_still_loads (line 256) | def test_old_list_format_still_loads(self):
class TestDisaggregateEdgeCases (line 264) | class TestDisaggregateEdgeCases:
method test_padded_last_period (line 267) | def test_padded_last_period(self, sample_data):
method test_single_cluster (line 276) | def test_single_cluster(self, sample_data):
method test_extreme_periods_append (line 285) | def test_extreme_periods_append(self, sample_data):
method test_extreme_periods_new_cluster (line 303) | def test_extreme_periods_new_cluster(self, sample_data):
method test_kmeans (line 321) | def test_kmeans(self, sample_data):
method test_segmented_nan_at_correct_positions (line 336) | def test_segmented_nan_at_correct_positions(self, result_segmented):
method test_reconstructed_unchanged_by_refactor (line 363) | def test_reconstructed_unchanged_by_refactor(self, sample_data):
class TestDisaggregateValidation (line 381) | class TestDisaggregateValidation:
method test_flat_index_raises (line 384) | def test_flat_index_raises(self, result):
method test_wrong_clusters_raises (line 388) | def test_wrong_clusters_raises(self, result):
method test_missing_cluster_raises (line 394) | def test_missing_cluster_raises(self, result):
method test_wrong_timesteps_raises (line 401) | def test_wrong_timesteps_raises(self, result):
FILE: test/test_durationCurve.py
function test_durationCurve (line 11) | def test_durationCurve():
FILE: test/test_durationRepresentation.py
function test_durationRepresentation (line 19) | def test_durationRepresentation():
function test_distributionMinMaxRepresentation (line 94) | def test_distributionMinMaxRepresentation():
function test_distributionRepresentation_keeps_mean (line 126) | def test_distributionRepresentation_keeps_mean():
FILE: test/test_extremePeriods.py
function test_extremePeriods (line 8) | def test_extremePeriods():
FILE: test/test_golden_regression.py
function _expected_warnings (line 51) | def _expected_warnings(case: EquivalenceCase):
function _expected_windows_kmeans_warnings (line 62) | def _expected_windows_kmeans_warnings(case: EquivalenceCase):
function _golden_path (line 79) | def _golden_path(case: EquivalenceCase) -> str:
function _save_golden (line 84) | def _save_golden(df: pd.DataFrame, case: EquivalenceCase) -> None:
function _load_golden (line 90) | def _load_golden(case: EquivalenceCase) -> pd.DataFrame:
class TestGoldenRegression (line 95) | class TestGoldenRegression:
method test_update_golden (line 99) | def test_update_golden(self, case: EquivalenceCase, update_golden):
method test_new_api_matches_golden (line 110) | def test_new_api_matches_golden(self, case: EquivalenceCase, update_go...
method test_old_api_matches_golden (line 136) | def test_old_api_matches_golden(self, case: EquivalenceCase, update_go...
FILE: test/test_hierarchical.py
function test_hierarchical (line 10) | def test_hierarchical():
function test_hierarchical_for_weeks (line 58) | def test_hierarchical_for_weeks():
FILE: test/test_hypertuneAggregation.py
function test_getPeriodPair (line 10) | def test_getPeriodPair():
function test_optimalPair (line 33) | def test_optimalPair():
function test_steepest_gradient_leads_to_optima (line 88) | def test_steepest_gradient_leads_to_optima():
function test_paretoOptimalAggregation (line 144) | def test_paretoOptimalAggregation():
FILE: test/test_k_maxoids.py
function test_k_maxoids (line 16) | def test_k_maxoids():
FILE: test/test_k_medoids.py
function test_k_medoids (line 10) | def test_k_medoids():
FILE: test/test_k_medoids_contiguity.py
function test_node_cuts (line 34) | def test_node_cuts():
function test_k_medoids_simple (line 42) | def test_k_medoids_simple():
function test_k_medoids_simple_contiguity (line 61) | def test_k_medoids_simple_contiguity():
FILE: test/test_minmaxRepresentation.py
function test_minmaxRepresentation (line 10) | def test_minmaxRepresentation():
FILE: test/test_new_api.py
function sample_data (line 16) | def sample_data():
class TestAggregate (line 21) | class TestAggregate:
method test_basic_aggregation (line 24) | def test_basic_aggregation(self, sample_data):
method test_with_cluster_config (line 33) | def test_with_cluster_config(self, sample_data):
method test_with_segmentation (line 47) | def test_with_segmentation(self, sample_data):
method test_with_extremes (line 58) | def test_with_extremes(self, sample_data):
method test_result_reconstructed (line 75) | def test_result_reconstructed(self, sample_data):
method test_result_to_dict (line 82) | def test_result_to_dict(self, sample_data):
method test_accuracy_metrics (line 91) | def test_accuracy_metrics(self, sample_data):
class TestValidation (line 101) | class TestValidation:
method test_invalid_n_clusters (line 104) | def test_invalid_n_clusters(self, sample_data):
method test_invalid_data_type (line 112) | def test_invalid_data_type(self):
method test_invalid_extreme_columns (line 117) | def test_invalid_extreme_columns(self, sample_data):
method test_invalid_weight_columns (line 126) | def test_invalid_weight_columns(self, sample_data):
method test_segments_exceeds_timesteps (line 135) | def test_segments_exceeds_timesteps(self, sample_data):
class TestClusterConfig (line 146) | class TestClusterConfig:
method test_default_representation (line 149) | def test_default_representation(self):
method test_explicit_representation (line 155) | def test_explicit_representation(self):
class TestImports (line 161) | class TestImports:
method test_top_level_imports (line 164) | def test_top_level_imports(self):
method test_version (line 175) | def test_version(self):
class TestAssignments (line 182) | class TestAssignments:
method test_assignments_basic (line 185) | def test_assignments_basic(self, sample_data):
method test_assignments_with_segmentation (line 204) | def test_assignments_with_segmentation(self, sample_data):
method test_assignments_values_valid (line 220) | def test_assignments_values_valid(self, sample_data):
class TestSegmentTransfer (line 241) | class TestSegmentTransfer:
method test_segment_assignments_and_durations_in_clustering (line 244) | def test_segment_assignments_and_durations_in_clustering(self, sample_...
method test_segment_transfer (line 267) | def test_segment_transfer(self, sample_data):
method test_segment_properties_none_without_segmentation (line 285) | def test_segment_properties_none_without_segmentation(self, sample_data):
class TestClusteringResult (line 293) | class TestClusteringResult:
method test_clustering_property_and_apply (line 296) | def test_clustering_property_and_apply(self, sample_data):
method test_clustering_apply_with_segments (line 314) | def test_clustering_apply_with_segments(self, sample_data):
method test_clustering_from_dict_and_json (line 332) | def test_clustering_from_dict_and_json(self, sample_data, tmp_path):
method test_clustering_includes_period_duration (line 357) | def test_clustering_includes_period_duration(self, sample_data, tmp_pa...
class TestDeterministicPreservation (line 375) | class TestDeterministicPreservation:
method test_transfer_fields_preserved_in_json (line 378) | def test_transfer_fields_preserved_in_json(self, sample_data, tmp_path):
method test_representation_method_deterministic (line 404) | def test_representation_method_deterministic(self, sample_data, tmp_pa...
method test_apply_to_different_data (line 427) | def test_apply_to_different_data(self, sample_data):
method test_segmentation_preserved_through_json (line 446) | def test_segmentation_preserved_through_json(self, sample_data, tmp_pa...
method test_preserve_column_means_setting_preserved (line 474) | def test_preserve_column_means_setting_preserved(self, sample_data, tm...
class TestSegmentConfigValidation (line 497) | class TestSegmentConfigValidation:
method test_n_segments_must_be_positive (line 500) | def test_n_segments_must_be_positive(self):
method test_valid_segment_config (line 508) | def test_valid_segment_config(self):
class TestSegmentCenters (line 515) | class TestSegmentCenters:
method test_segment_centers_with_medoid (line 518) | def test_segment_centers_with_medoid(self, sample_data):
method test_segment_centers_none_with_mean (line 543) | def test_segment_centers_none_with_mean(self, sample_data):
method test_segment_centers_preserved_in_json (line 554) | def test_segment_centers_preserved_in_json(self, sample_data, tmp_path):
method test_segment_centers_deterministic_transfer (line 580) | def test_segment_centers_deterministic_transfer(self, sample_data):
class TestDurationParsing (line 599) | class TestDurationParsing:
method test_period_duration_string (line 602) | def test_period_duration_string(self, sample_data):
method test_non_hour_period_duration (line 624) | def test_non_hour_period_duration(self):
method test_temporal_resolution_string (line 642) | def test_temporal_resolution_string(self, sample_data):
method test_invalid_duration_string (line 657) | def test_invalid_duration_string(self, sample_data):
method test_invalid_duration_type (line 662) | def test_invalid_duration_type(self, sample_data):
method test_negative_duration (line 667) | def test_negative_duration(self, sample_data):
FILE: test/test_plot.py
function sample_data (line 16) | def sample_data() -> pd.DataFrame:
function result (line 21) | def result(sample_data) -> tsam.AggregationResult:
function result_segmented (line 26) | def result_segmented(sample_data) -> tsam.AggregationResult:
class TestValidateColumns (line 35) | class TestValidateColumns:
method test_none_returns_all (line 36) | def test_none_returns_all(self):
method test_valid_subset (line 39) | def test_valid_subset(self):
method test_invalid_warns (line 42) | def test_invalid_warns(self):
method test_all_invalid_raises (line 47) | def test_all_invalid_raises(self):
class TestAccessor (line 55) | class TestAccessor:
method test_plot_returns_accessor (line 56) | def test_plot_returns_accessor(self, result):
class TestClusterRepresentatives (line 63) | class TestClusterRepresentatives:
method test_returns_figure (line 64) | def test_returns_figure(self, result):
method test_with_columns (line 68) | def test_with_columns(self, result):
class TestClusterMembers (line 77) | class TestClusterMembers:
method test_returns_figure (line 78) | def test_returns_figure(self, result):
method test_single_column (line 83) | def test_single_column(self, result):
method test_specific_clusters (line 88) | def test_specific_clusters(self, result):
method test_slider_column (line 92) | def test_slider_column(self, result):
method test_invalid_slider_raises (line 97) | def test_invalid_slider_raises(self, result):
method test_invalid_clusters_warns (line 101) | def test_invalid_clusters_warns(self, result):
method test_all_invalid_clusters_raises (line 106) | def test_all_invalid_clusters_raises(self, result):
method test_representative_trace_matches_data (line 110) | def test_representative_trace_matches_data(self, result):
method test_member_trace_contains_all_members (line 121) | def test_member_trace_contains_all_members(self, result):
method test_with_segmentation (line 137) | def test_with_segmentation(self, result_segmented):
method test_segmented_representative_expanded (line 141) | def test_segmented_representative_expanded(self, result_segmented):
class TestClusterWeights (line 156) | class TestClusterWeights:
method test_returns_figure (line 157) | def test_returns_figure(self, result):
class TestAccuracy (line 165) | class TestAccuracy:
method test_returns_figure (line 166) | def test_returns_figure(self, result):
class TestSegmentDurations (line 174) | class TestSegmentDurations:
method test_returns_figure (line 175) | def test_returns_figure(self, result_segmented):
method test_raises_without_segmentation (line 179) | def test_raises_without_segmentation(self, result):
class TestCompare (line 187) | class TestCompare:
method test_overlay (line 188) | def test_overlay(self, result):
method test_side_by_side (line 192) | def test_side_by_side(self, result):
method test_duration_curve (line 196) | def test_duration_curve(self, result):
method test_with_columns (line 200) | def test_with_columns(self, result):
method test_invalid_mode_raises (line 205) | def test_invalid_mode_raises(self, result):
class TestResiduals (line 213) | class TestResiduals:
method test_time_series (line 214) | def test_time_series(self, result):
method test_histogram (line 218) | def test_histogram(self, result):
method test_by_period (line 222) | def test_by_period(self, result):
method test_by_timestep (line 226) | def test_by_timestep(self, result):
method test_with_columns (line 230) | def test_with_columns(self, result):
method test_invalid_mode_raises (line 235) | def test_invalid_mode_raises(self, result):
FILE: test/test_preprocess.py
function test_preprocess (line 8) | def test_preprocess():
FILE: test/test_properties.py
function test_properties (line 14) | def test_properties():
FILE: test/test_reconstruct_samemean_segmentation.py
function test_data (line 22) | def test_data():
class TestReconstructSameMeanSegmentation (line 36) | class TestReconstructSameMeanSegmentation:
method _check_reconstruction_bounds (line 39) | def _check_reconstruction_bounds(
method test_segments_only (line 56) | def test_segments_only(self, test_data):
method test_normalize_column_means_only (line 67) | def test_normalize_column_means_only(self, test_data):
method test_normalize_column_means_with_segments (line 78) | def test_normalize_column_means_with_segments(self, test_data):
method test_normalize_with_segments_mean_repr (line 92) | def test_normalize_with_segments_mean_repr(self, test_data):
method test_normalize_with_different_segment_counts (line 104) | def test_normalize_with_different_segment_counts(self, test_data):
FILE: test/test_samemean.py
function test_samemean (line 18) | def test_samemean():
FILE: test/test_segmentation.py
function test_segmentation (line 13) | def test_segmentation():
function test_representation_in_segmentation (line 61) | def test_representation_in_segmentation():
FILE: test/test_segmentation_weight_bug.py
function _make_data (line 19) | def _make_data():
class TestSegmentationWeightLeak (line 49) | class TestSegmentationWeightLeak:
method test_uniform_weights_equal_no_weights (line 52) | def test_uniform_weights_equal_no_weights(self):
method test_reconstructed_means_not_scaled_by_weight (line 72) | def test_reconstructed_means_not_scaled_by_weight(self):
method test_reconstructed_values_within_original_range (line 96) | def test_reconstructed_values_within_original_range(self):
method test_segmentation_samemean_weights (line 118) | def test_segmentation_samemean_weights(self):
FILE: test/test_subhourlyResolution.py
function test_subhourlyResolution (line 14) | def test_subhourlyResolution():
FILE: test/test_subhourly_periods.py
function test_subhourly_periods (line 12) | def test_subhourly_periods():
FILE: test/test_weightingFactors.py
function test_weightingFactors (line 11) | def test_weightingFactors():
function test_uniform_weights_equal_no_weights (line 66) | def test_uniform_weights_equal_no_weights():
function test_reconstructed_within_original_range (line 90) | def test_reconstructed_within_original_range():
Copy disabled (too large)
Download .json
Condensed preview — 267 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (68,687K chars).
[
{
"path": ".github/pull_request_template.md",
"chars": 824,
"preview": "## Description\n\n<!-- Describe your changes in detail -->\n\n## Motivation and Context\n\n<!-- Why is this change required? W"
},
{
"path": ".github/renovate.json",
"chars": 1957,
"preview": "{\n \"$schema\": \"https://docs.renovatebot.com/renovate-schema.json\",\n \"extends\": [\n \"config:recommended\"\n "
},
{
"path": ".github/workflows/ci-develop.yaml",
"chars": 3486,
"preview": "name: CI (develop)\n\non:\n workflow_dispatch:\n pull_request:\n branches: [develop]\n\nconcurrency:\n group: ${{ github.w"
},
{
"path": ".github/workflows/ci-master.yaml",
"chars": 4562,
"preview": "name: CI (master)\n\non:\n workflow_dispatch:\n pull_request:\n branches: [master]\n\nconcurrency:\n group: ${{ github.wor"
},
{
"path": ".github/workflows/coverage.yaml",
"chars": 744,
"preview": "name: Coverage\n\non:\n workflow_dispatch:\n push:\n branches: [develop]\n\njobs:\n coverage:\n name: Upload coverage to"
},
{
"path": ".github/workflows/pr-title.yaml",
"chars": 1189,
"preview": "name: PR Title\n\non:\n push:\n branches: ['release-please--**']\n pull_request:\n types: [opened, edited, synchronize"
},
{
"path": ".github/workflows/publish.yaml",
"chars": 1066,
"preview": "name: Publish\n\non:\n push:\n tags: [\"v*\"]\n\npermissions:\n contents: write\n id-token: write\n\njobs:\n publish:\n name"
},
{
"path": ".github/workflows/push_to_jugit.yml",
"chars": 361,
"preview": "name: Mirror to JuGit\n\non:\n workflow_dispatch:\n push:\n branches:\n - develop\n - master\n\njobs:\n mirror:\n "
},
{
"path": ".github/workflows/release.yaml",
"chars": 1940,
"preview": "name: Release\n\non:\n push:\n branches: [master]\n\npermissions:\n contents: write\n pull-requests: write\n\njobs:\n releas"
},
{
"path": ".gitignore",
"chars": 788,
"preview": "# Python\r\n__pycache__/\r\nsrc/tsam/_version.py\r\n*.py[cod]\r\n*$py.class\r\n*.so\r\n.Python\r\nbuild/\r\ndevelop-eggs/\r\ndist/\r\ndownlo"
},
{
"path": ".pre-commit-config.yaml",
"chars": 817,
"preview": "repos:\n - repo: https://github.com/pre-commit/pre-commit-hooks\n rev: v4.6.0\n hooks:\n - id: trailing-whitespa"
},
{
"path": ".readthedocs.yml",
"chars": 259,
"preview": "# Required\nversion: 2\n\n# Build environment\nbuild:\n os: ubuntu-22.04\n tools:\n python: \"3.12\"\n apt_packages:\n - p"
},
{
"path": ".release-please-config.json",
"chars": 692,
"preview": "{\n \"$schema\": \"https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json\",\n \"include-compon"
},
{
"path": ".release-please-manifest.json",
"chars": 19,
"preview": "{\n \".\": \"3.3.0\"\n}\n"
},
{
"path": "CHANGELOG.md",
"chars": 16431,
"preview": "# ETHOS.TSAM Change Log\n\nAll notable changes to this project will be documented in this file.\n\nNew entries are automatic"
},
{
"path": "CONTRIBUTING.md",
"chars": 5199,
"preview": "# Contributing to ETHOS.TSAM\n\nThank you for your interest in contributing to ETHOS.TSAM! This document provides guidelin"
},
{
"path": "LICENSE.txt",
"chars": 1100,
"preview": "MIT License\n\nCopyright (c) 2017-2026 Forschungszentrum Jülich GmbH (ICE-2)\n\nPermission is hereby granted, free of charg"
},
{
"path": "README.md",
"chars": 10567,
"preview": "[](https://pypi.python.org/pypi/tsam) [ exec"
},
{
"path": "codecov.yml",
"chars": 181,
"preview": "coverage:\n status:\n project:\n default:\n target: auto\n threshold: 5%\n\ncomment:\n layout: \"reach,di"
},
{
"path": "docs/api/SUMMARY.md",
"chars": 743,
"preview": "* tsam\r\n * [api](tsam/api.md)\r\n * [config](tsam/config.md)\r\n * [exceptions](tsam/exceptions.md)\r\n * [hyperpa"
},
{
"path": "docs/api/tsam/api.md",
"chars": 28,
"preview": "# tsam.api\r\n\r\n::: tsam.api\r\n"
},
{
"path": "docs/api/tsam/config.md",
"chars": 34,
"preview": "# tsam.config\r\n\r\n::: tsam.config\r\n"
},
{
"path": "docs/api/tsam/exceptions.md",
"chars": 42,
"preview": "# tsam.exceptions\r\n\r\n::: tsam.exceptions\r\n"
},
{
"path": "docs/api/tsam/hyperparametertuning.md",
"chars": 62,
"preview": "# tsam.hyperparametertuning\r\n\r\n::: tsam.hyperparametertuning\r\n"
},
{
"path": "docs/api/tsam/periodAggregation.md",
"chars": 56,
"preview": "# tsam.periodAggregation\r\n\r\n::: tsam.periodAggregation\r\n"
},
{
"path": "docs/api/tsam/plot.md",
"chars": 30,
"preview": "# tsam.plot\r\n\r\n::: tsam.plot\r\n"
},
{
"path": "docs/api/tsam/representations.md",
"chars": 52,
"preview": "# tsam.representations\r\n\r\n::: tsam.representations\r\n"
},
{
"path": "docs/api/tsam/result.md",
"chars": 34,
"preview": "# tsam.result\r\n\r\n::: tsam.result\r\n"
},
{
"path": "docs/api/tsam/timeseriesaggregation.md",
"chars": 64,
"preview": "# tsam.timeseriesaggregation\r\n\r\n::: tsam.timeseriesaggregation\r\n"
},
{
"path": "docs/api/tsam/tuning.md",
"chars": 34,
"preview": "# tsam.tuning\r\n\r\n::: tsam.tuning\r\n"
},
{
"path": "docs/api/tsam/utils/durationRepresentation.md",
"chars": 78,
"preview": "# tsam.utils.durationRepresentation\r\n\r\n::: tsam.utils.durationRepresentation\r\n"
},
{
"path": "docs/api/tsam/utils/k_maxoids.md",
"chars": 52,
"preview": "# tsam.utils.k_maxoids\r\n\r\n::: tsam.utils.k_maxoids\r\n"
},
{
"path": "docs/api/tsam/utils/k_medoids_contiguity.md",
"chars": 74,
"preview": "# tsam.utils.k_medoids_contiguity\r\n\r\n::: tsam.utils.k_medoids_contiguity\r\n"
},
{
"path": "docs/api/tsam/utils/k_medoids_exact.md",
"chars": 64,
"preview": "# tsam.utils.k_medoids_exact\r\n\r\n::: tsam.utils.k_medoids_exact\r\n"
},
{
"path": "docs/api/tsam/utils/segmentation.md",
"chars": 58,
"preview": "# tsam.utils.segmentation\r\n\r\n::: tsam.utils.segmentation\r\n"
},
{
"path": "docs/background/math.md",
"chars": 3741,
"preview": "# Mathematical Description\n\nThe description of ETHOS.TSAM presented in the following is based on the review on time seri"
},
{
"path": "docs/further-reading.md",
"chars": 2463,
"preview": "# Further Reading\n\nETHOS.TSAM was originally designed for reducing the computational load for large-scale energy system "
},
{
"path": "docs/gen_ref_pages.py",
"chars": 995,
"preview": "\"\"\"Auto-generate API reference pages from Python source modules.\"\"\"\n\nfrom pathlib import Path\n\nimport mkdocs_gen_files\n\n"
},
{
"path": "docs/getting-started.md",
"chars": 4410,
"preview": "# Getting started\n\n## Basic Workflow\n\nRun the aggregation and access the results:\n\n=== \"v3\"\n\n ```python\n import pa"
},
{
"path": "docs/glossary.md",
"chars": 1910,
"preview": "# Glossary\n\nKey concepts used in the ETHOS.TSAM API:\n\n| Concept | Description |\n|---------|-------------|\n| **Period** |"
},
{
"path": "docs/index.md",
"chars": 1674,
"preview": "<div class=\"landing-logos\">\n <div class=\"tsam-logo-light\">\n <img src=\"assets/tsam-logo-light.svg\" alt=\"ETHOS.TSAM\">\n"
},
{
"path": "docs/installation.md",
"chars": 2768,
"preview": "# Installation\n\nIt is recommended to install ETHOS.TSAM within its own environment. If you are not familiar with python "
},
{
"path": "docs/javascripts/mathjax.js",
"chars": 466,
"preview": "window.MathJax = {\n tex: {\n inlineMath: [[\"\\\\(\", \"\\\\)\"]],\n displayMath: [[\"\\\\[\", \"\\\\]\"]],\n processEscapes: tru"
},
{
"path": "docs/legal-notice.md",
"chars": 1087,
"preview": "# Legal Notice\n\n## License\n\nETHOS.TSAM is published under the [MIT License](https://opensource.org/licenses/MIT).\n\n\n## A"
},
{
"path": "docs/migration-guide.md",
"chars": 16438,
"preview": "# Migrating from ETHOS.TSAM v2 to v3 { #migration-guide }\n\nETHOS.TSAM v3 replaces the class-based API with a functional "
},
{
"path": "docs/notebooks/building_energy_system.ipynb",
"chars": 6697,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Building Energy Sys"
},
{
"path": "docs/notebooks/clustering_methods.ipynb",
"chars": 16376,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Clustering Methods\\n\",\n \"\\n\",\n"
},
{
"path": "docs/notebooks/clustering_transfer.ipynb",
"chars": 5698,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Clustering Transfer"
},
{
"path": "docs/notebooks/disaggregation.ipynb",
"chars": 7590,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Disaggregation\\n\",\n"
},
{
"path": "docs/notebooks/k_maxoids.ipynb",
"chars": 10821,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# K-Maxoids Clustering\\n\",\n \"Exa"
},
{
"path": "docs/notebooks/optimization_input.ipynb",
"chars": 7525,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Optimization Input\\n\",\n \"\\n\",\n"
},
{
"path": "docs/notebooks/pareto_optimization.ipynb",
"chars": 7451,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Pareto Optimization"
},
{
"path": "docs/notebooks/quickstart.ipynb",
"chars": 13510,
"preview": "{\n \"cells\": [\n {\n \"attachments\": {},\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Basic Examp"
},
{
"path": "docs/notebooks/representations.ipynb",
"chars": 9031,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Representation Meth"
},
{
"path": "docs/notebooks/segmentation.ipynb",
"chars": 6969,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Segmentation\\n\",\n "
},
{
"path": "docs/notebooks/testdata.csv",
"chars": 387336,
"preview": ",GHI,T,Wind,Load\n2009-12-31 23:30:00,0,-2.1,7.1,375.4783938\n2010-01-01 00:30:00,0,-2.8,8.6,364.5413263\n2010-01-01 01:30:"
},
{
"path": "docs/notebooks/tuning.ipynb",
"chars": 7327,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Hyperparameter Tuni"
},
{
"path": "docs/notebooks/visualization.ipynb",
"chars": 28420,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"0\",\n \"metadata\": {},\n \"source\": [\n \"# Visualization & Qua"
},
{
"path": "docs/overrides/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "docs/overrides/partials/footer.html",
"chars": 3626,
"preview": "{#-\n Custom footer with FZJ / Helmholtz institutional branding\n-#}\n<footer class=\"md-footer\">\n {% if \"navigation.foote"
},
{
"path": "docs/stylesheets/extra.css",
"chars": 3862,
"preview": "/* ==========================================================================\n ETHOS.TSAM — Forschungszentrum Jülich B"
},
{
"path": "environment.yml",
"chars": 765,
"preview": "name: tsam_env\nchannels:\n - conda-forge\ndependencies:\n - python>=3.10,<=3.14.4\n - pip\n # Core dependencies\n - sciki"
},
{
"path": "mkdocs.yml",
"chars": 4015,
"preview": "site_name: ETHOS.TSAM Documentation\nsite_url: https://fzj-iek3-vsa.github.io/tsam/\nrepo_url: https://github.com/FZJ-IEK3"
},
{
"path": "pyproject.toml",
"chars": 4747,
"preview": "[build-system]\nrequires = [\"hatchling\", \"hatch-vcs\"]\nbuild-backend = \"hatchling.build\"\n\n\n[project]\nname = \"tsam\"\ndynamic"
},
{
"path": "src/tsam/__init__.py",
"chars": 2405,
"preview": "\"\"\"tsam - Time Series Aggregation Module.\n\nA Python package for aggregating time series data using clustering algorithms"
},
{
"path": "src/tsam/api.py",
"chars": 27636,
"preview": "\"\"\"New simplified API for tsam aggregation.\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nimport warnings\nfrom typin"
},
{
"path": "src/tsam/config.py",
"chars": 49074,
"preview": "\"\"\"Configuration classes for tsam aggregation.\"\"\"\n\nfrom __future__ import annotations\n\nimport warnings\nfrom dataclasses "
},
{
"path": "src/tsam/exceptions.py",
"chars": 504,
"preview": "\"\"\"Custom exceptions and warnings for tsam.\"\"\"\n\n\nclass LegacyAPIWarning(FutureWarning):\n \"\"\"Warning for deprecated ts"
},
{
"path": "src/tsam/hyperparametertuning.py",
"chars": 11328,
"preview": "import copy\nimport warnings\n\nimport numpy as np\nimport tqdm\n\nfrom tsam.exceptions import LegacyAPIWarning\nfrom tsam.time"
},
{
"path": "src/tsam/periodAggregation.py",
"chars": 5771,
"preview": "import numpy as np\n\nfrom tsam.representations import representations\n\n\ndef aggregatePeriods(\n candidates,\n n_clust"
},
{
"path": "src/tsam/plot.py",
"chars": 26015,
"preview": "\"\"\"Plotting accessor for tsam aggregation results.\n\nProvides convenient plotting methods directly on the result object f"
},
{
"path": "src/tsam/py.typed",
"chars": 0,
"preview": ""
},
{
"path": "src/tsam/representations.py",
"chars": 7053,
"preview": "import numpy as np\nfrom sklearn.metrics.pairwise import euclidean_distances\n\nfrom tsam.utils.durationRepresentation impo"
},
{
"path": "src/tsam/result.py",
"chars": 16309,
"preview": "\"\"\"Result classes for tsam aggregation.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass, field"
},
{
"path": "src/tsam/timeseriesaggregation.py",
"chars": 60335,
"preview": "import copy\nimport time\nimport warnings\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn import preprocessing\nfrom s"
},
{
"path": "src/tsam/tuning.py",
"chars": 36317,
"preview": "\"\"\"Hyperparameter tuning for tsam aggregation.\n\nThis module provides functions for finding optimal aggregation parameter"
},
{
"path": "src/tsam/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "src/tsam/utils/durationRepresentation.py",
"chars": 9205,
"preview": "\"\"\"Orders a set of representation values to fit several candidate value sets\"\"\"\n\nimport warnings\n\nimport numpy as np\nimp"
},
{
"path": "src/tsam/utils/k_maxoids.py",
"chars": 4241,
"preview": "\"\"\"Exact K-maxoids clustering\"\"\"\n\nimport numpy as np\nimport numpy.random as rnd\nfrom sklearn.base import BaseEstimator, "
},
{
"path": "src/tsam/utils/k_medoids_contiguity.py",
"chars": 5920,
"preview": "import time\n\nimport numpy as np\n\n# switch to numpy 2.0 (restore deprecated aliases for backward compatibility)\nnp.float_"
},
{
"path": "src/tsam/utils/k_medoids_exact.py",
"chars": 6997,
"preview": "import numpy as np\nfrom sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin\nfrom sklearn.metrics.pairwise "
},
{
"path": "src/tsam/utils/segmentation.py",
"chars": 11581,
"preview": "import numpy as np\nimport pandas as pd\nfrom sklearn.cluster import AgglomerativeClustering\n\nfrom tsam.representations im"
},
{
"path": "test/_configs.py",
"chars": 23640,
"preview": "\"\"\"Shared test configuration — no new-API imports.\n\nImportable under any tsam version (including v2.3.9) because it only"
},
{
"path": "test/_old_new_equivalence.py",
"chars": 21252,
"preview": "\"\"\"Parametrized equivalence tests: old TimeSeriesAggregation vs new tsam.aggregate().\n\nEvery config is tested against ev"
},
{
"path": "test/conftest.py",
"chars": 1113,
"preview": "\"\"\"Pytest configuration and shared fixtures for tsam tests.\"\"\"\n\nfrom pathlib import Path\n\nimport pytest\n\n# Path to test "
},
{
"path": "test/data/clustering_e2e/expected_contiguous_medoid_8clusters.csv",
"chars": 13490,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,420.5346926699424,-0.8992069158245563,2.125905749296286\n0,1,0.0,409.01239615571905,-1."
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_distribution_8clusters.csv",
"chars": 12958,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,394.25496157291667,-0.9875000000000003,3.0\n0,1,0.0,378.65836095416665,-1.6812500000000"
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_mean_8clusters.csv",
"chars": 13881,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,401.16275471874997,-0.5416666666666671,2.8375\n0,1,0.0,390.31434358958336,-0.8750000000"
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters.csv",
"chars": 13818,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,404.7493765041648,-0.6623254244431773,3.4994512099513932\n0,1,0.0,395.3998629891593,-0."
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_12segments.csv",
"chars": 7284,
"preview": ",Segment Step,Segment Duration,GHI,Load,T,Wind\n0,0,2,0.0,400.07461974666205,-0.8094608581294735,3.966044704611579\n0,1,3,"
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_12segments_extremes_append.csv",
"chars": 7872,
"preview": ",Segment Step,Segment Duration,GHI,Load,T,Wind\n0,0,2,0.0,399.9985660837293,-0.8136995791782012,3.967405281988925\n0,1,3,0"
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_12segments_extremes_replace.csv",
"chars": 7037,
"preview": ",Segment Step,Segment Duration,GHI,Load,T,Wind\n0,0,2,0.0,397.614189805862,-0.8157035818384176,4.045079566775535\n0,1,3,0."
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_6segments.csv",
"chars": 3682,
"preview": ",Segment Step,Segment Duration,GHI,Load,T,Wind\n0,0,5,0.0,396.4045997763328,-1.074304638764809,3.2661544626213\n0,1,2,4.89"
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_6segments_extremes_newcluster.csv",
"chars": 4038,
"preview": ",Segment Step,Segment Duration,GHI,Load,T,Wind\n0,0,5,0.0,395.53146028047877,-1.109713531470965,3.2795937682003498\n0,1,2,"
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_extremes_append.csv",
"chars": 14876,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,404.67058954571735,-0.6666402901814017,3.5006517194019926\n0,1,0.0,395.3265426217413,-0"
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_extremes_newcluster.csv",
"chars": 14721,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,403.8185954813564,-0.6995748784038625,3.513850465928946\n0,1,0.0,394.53366371046013,-0."
},
{
"path": "test/data/clustering_e2e/expected_hierarchical_medoid_8clusters_extremes_replace.csv",
"chars": 13243,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,402.20052107717447,-0.6686802928894068,3.569187853037237\n0,1,0.0,393.02785853454964,-0"
},
{
"path": "test/data/clustering_e2e/expected_kmaxoids_maxoid_8clusters.csv",
"chars": 13365,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,352.5044447403267,-5.283022944630396,1.243776935651657\n0,1,0.0,327.9791515092384,-5.37"
},
{
"path": "test/data/clustering_e2e/expected_kmeans_mean_8clusters.csv",
"chars": 13658,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,366.6365925093024,15.118604651162789,1.7651162790697676\n0,1,0.0,356.19711602093025,14."
},
{
"path": "test/data/clustering_e2e/expected_kmedoids_medoid_8clusters.csv",
"chars": 13470,
"preview": ",timestep,GHI,Load,T,Wind\n0,0,0.0,394.3691140459029,-0.10536963599833105,4.125812560782106\n0,1,0.0,380.8055583367772,0.3"
},
{
"path": "test/data/clustering_e2e/meta_contiguous_medoid_8clusters.json",
"chars": 593,
"preview": "{\n \"config\": {\n \"method\": \"contiguous\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8\n },\n \"cluster_weights\""
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_distribution_8clusters.json",
"chars": 600,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"distribution\",\n \"n_clusters\": 8\n },\n \"cluster_"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_mean_8clusters.json",
"chars": 594,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"mean\",\n \"n_clusters\": 8\n },\n \"cluster_weights\""
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters.json",
"chars": 592,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8\n },\n \"cluster_weight"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_12segments.json",
"chars": 616,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"n_segments\": 12\n"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_12segments_extremes_append.json",
"chars": 704,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"n_segments\": 12,"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_12segments_extremes_replace.json",
"chars": 696,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"n_segments\": 12,"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_6segments.json",
"chars": 614,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"n_segments\": 6\n "
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_6segments_extremes_newcluster.json",
"chars": 711,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"n_segments\": 6,\n"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_extremes_append.json",
"chars": 681,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"extreme_method\":"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_extremes_newcluster.json",
"chars": 686,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"extreme_method\":"
},
{
"path": "test/data/clustering_e2e/meta_hierarchical_medoid_8clusters_extremes_replace.json",
"chars": 672,
"preview": "{\n \"config\": {\n \"method\": \"hierarchical\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8,\n \"extreme_method\":"
},
{
"path": "test/data/clustering_e2e/meta_kmaxoids_maxoid_8clusters.json",
"chars": 592,
"preview": "{\n \"config\": {\n \"method\": \"kmaxoids\",\n \"representation\": \"maxoid\",\n \"n_clusters\": 8\n },\n \"cluster_weights\": "
},
{
"path": "test/data/clustering_e2e/meta_kmeans_mean_8clusters.json",
"chars": 584,
"preview": "{\n \"config\": {\n \"method\": \"kmeans\",\n \"representation\": \"mean\",\n \"n_clusters\": 8\n },\n \"cluster_weights\": {\n "
},
{
"path": "test/data/clustering_e2e/meta_kmedoids_medoid_8clusters.json",
"chars": 585,
"preview": "{\n \"config\": {\n \"method\": \"kmedoids\",\n \"representation\": \"medoid\",\n \"n_clusters\": 8\n },\n \"cluster_weights\": "
},
{
"path": "test/data/golden/averaging/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/averaging/testdata.csv",
"chars": 527467,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,439.25464229,0.10888889,3.99777778\n2010-01-01 00:30:00,0.0,426.16342552,-0.0111"
},
{
"path": "test/data/golden/averaging/wide.csv",
"chars": 1421720,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.56330802,4392"
},
{
"path": "test/data/golden/averaging/with_zero_column.csv",
"chars": 562512,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,439.25464229,0.10888889,3.99777778,0.0\n2010-01-01 00:30:00,0.0,426.1634255"
},
{
"path": "test/data/golden/averaging_segmentation/testdata.csv",
"chars": 553092,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,428.09777944,-0.08333333,4.20407407\n2010-01-01 00:30:00,0.0,428.09777944,-0.083"
},
{
"path": "test/data/golden/contiguous/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/contiguous/testdata.csv",
"chars": 553814,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,420.53469267,-0.89920692,2.12590575\n2010-01-01 00:30:00,0.0,409.01239616,-1.099"
},
{
"path": "test/data/golden/contiguous/wide.csv",
"chars": 1357831,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.48412528,4427"
},
{
"path": "test/data/golden/contiguous/with_zero_column.csv",
"chars": 588859,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,420.53469267,-0.89920692,2.12590575,0.0\n2010-01-01 00:30:00,0.0,409.012396"
},
{
"path": "test/data/golden/contiguous_extremes_append/testdata.csv",
"chars": 555431,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,420.46401001,-0.90319309,2.12583438\n2010-01-01 00:30:00,0.0,408.94712372,-1.103"
},
{
"path": "test/data/golden/contiguous_segmentation/testdata.csv",
"chars": 565469,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,420.53469267,-0.89920692,2.12590575\n2010-01-01 00:30:00,0.0,415.50028044,-1.459"
},
{
"path": "test/data/golden/distribution_global/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/distribution_global/testdata.csv",
"chars": 496799,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,394.34413812,3.45576923,5.0\n2010-01-01 00:30:00,0.0,376.72177372,3.14615385,5.0"
},
{
"path": "test/data/golden/distribution_global/wide.csv",
"chars": 1344149,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.65351183,3776"
},
{
"path": "test/data/golden/distribution_global/with_zero_column.csv",
"chars": 531844,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,394.34413812,3.45576923,5.0,0.0\n2010-01-01 00:30:00,0.0,376.72177372,3.146"
},
{
"path": "test/data/golden/distribution_minmax_global/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/distribution_minmax_global/testdata.csv",
"chars": 550524,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,394.3842675,3.4554192,4.98124274\n2010-01-01 00:30:00,0.0,376.75621586,3.1458125"
},
{
"path": "test/data/golden/distribution_minmax_global/wide.csv",
"chars": 1344173,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.65290816,3775"
},
{
"path": "test/data/golden/distribution_minmax_global/with_zero_column.csv",
"chars": 585569,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,394.3842675,3.4554192,4.98124274,0.0\n2010-01-01 00:30:00,0.0,376.75621586,"
},
{
"path": "test/data/golden/extremes_append/testdata.csv",
"chars": 558890,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.44989363,4.33337554,5.36766597\n2010-01-01 00:30:00,0.0,367.8055737,3.941217"
},
{
"path": "test/data/golden/extremes_append/with_zero_column.csv",
"chars": 593935,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,379.44989363,4.33337554,5.36766597,0.0\n2010-01-01 00:30:00,0.0,367.8055737"
},
{
"path": "test/data/golden/extremes_constant/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/extremes_max_period/testdata.csv",
"chars": 559129,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.46639105,4.34653349,5.3685336\n2010-01-01 00:30:00,0.0,367.82031597,3.953988"
},
{
"path": "test/data/golden/extremes_max_period/with_zero_column.csv",
"chars": 594174,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,379.46639105,4.34653349,5.3685336,0.0\n2010-01-01 00:30:00,0.0,367.82031597"
},
{
"path": "test/data/golden/extremes_min_period/testdata.csv",
"chars": 559038,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.48311229,4.3377437,5.37720371\n2010-01-01 00:30:00,0.0,367.83525824,3.945457"
},
{
"path": "test/data/golden/extremes_min_period/with_zero_column.csv",
"chars": 594083,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,379.48311229,4.3377437,5.37720371,0.0\n2010-01-01 00:30:00,0.0,367.83525824"
},
{
"path": "test/data/golden/extremes_min_value/testdata.csv",
"chars": 555312,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.45225211,4.35143869,5.37390323\n2010-01-01 00:30:00,0.0,367.80768126,3.95874"
},
{
"path": "test/data/golden/extremes_min_value/with_zero_column.csv",
"chars": 590357,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,379.45225211,4.35143869,5.37390323,0.0\n2010-01-01 00:30:00,0.0,367.8076812"
},
{
"path": "test/data/golden/extremes_multi/testdata.csv",
"chars": 558037,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.3395565,4.35082993,5.37853754\n2010-01-01 00:30:00,0.0,367.70697528,3.958158"
},
{
"path": "test/data/golden/extremes_multi/with_zero_column.csv",
"chars": 593082,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,379.3395565,4.35082993,5.37853754,0.0\n2010-01-01 00:30:00,0.0,367.70697528"
},
{
"path": "test/data/golden/extremes_new_cluster/testdata.csv",
"chars": 554441,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,378.75745841,4.28068019,5.38790405\n2010-01-01 00:30:00,0.0,367.18680632,3.89007"
},
{
"path": "test/data/golden/extremes_new_cluster/with_zero_column.csv",
"chars": 589486,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,378.75745841,4.28068019,5.38790405,0.0\n2010-01-01 00:30:00,0.0,367.1868063"
},
{
"path": "test/data/golden/extremes_replace/testdata.csv",
"chars": 547062,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,377.44241203,4.33011153,5.47275471\n2010-01-01 00:30:00,0.0,366.01166707,3.93804"
},
{
"path": "test/data/golden/extremes_replace/with_zero_column.csv",
"chars": 582107,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,377.44241203,4.33011153,5.47275471,0.0\n2010-01-01 00:30:00,0.0,366.0116670"
},
{
"path": "test/data/golden/extremes_replace_segmentation/testdata.csv",
"chars": 563155,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,372.12039665,3.70934653,5.29429532\n2010-01-01 00:30:00,0.0,372.12039665,3.70934"
},
{
"path": "test/data/golden/extremes_wide_multi/wide.csv",
"chars": 1362617,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.54990197,3755"
},
{
"path": "test/data/golden/extremes_with_segmentation/testdata.csv",
"chars": 573044,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,374.02844035,3.71245854,5.19263338\n2010-01-01 00:30:00,0.0,374.02844035,3.71245"
},
{
"path": "test/data/golden/extremes_with_segmentation/with_zero_column.csv",
"chars": 608089,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,374.02844035,3.71245854,5.19263338,0.0\n2010-01-01 00:30:00,0.0,374.0284403"
},
{
"path": "test/data/golden/extremes_zero_column/with_zero_column.csv",
"chars": 593133,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,375.4783938,-2.1,7.1,0.0\n2010-01-01 00:30:00,0.0,364.5413263,-2.8,8.6,0.0\n"
},
{
"path": "test/data/golden/hierarchical_default/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_default/testdata.csv",
"chars": 559176,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.51392561,4.34027932,5.36582519\n2010-01-01 00:30:00,0.0,367.86279335,3.94791"
},
{
"path": "test/data/golden/hierarchical_default/wide.csv",
"chars": 1362772,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,1.54748586,3612"
},
{
"path": "test/data/golden/hierarchical_default/with_zero_column.csv",
"chars": 594221,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,379.51392561,4.34027932,5.36582519,0.0\n2010-01-01 00:30:00,0.0,367.8627933"
},
{
"path": "test/data/golden/hierarchical_distribution/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_distribution/testdata.csv",
"chars": 511597,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,398.50557972,1.93846154,3.08653846\n2010-01-01 00:30:00,0.0,383.08863594,1.65769"
},
{
"path": "test/data/golden/hierarchical_distribution/wide.csv",
"chars": 1350634,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.2810692,3842."
},
{
"path": "test/data/golden/hierarchical_distribution/with_zero_column.csv",
"chars": 546642,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,398.50557972,1.93846154,3.08653846,0.0\n2010-01-01 00:30:00,0.0,383.0886359"
},
{
"path": "test/data/golden/hierarchical_distribution_minmax/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_distribution_minmax/testdata.csv",
"chars": 552751,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,399.39087033,1.93123775,2.991787\n2010-01-01 00:30:00,0.0,383.86771734,1.6506539"
},
{
"path": "test/data/golden/hierarchical_distribution_minmax/wide.csv",
"chars": 1343781,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.26362876,3852"
},
{
"path": "test/data/golden/hierarchical_distribution_minmax/with_zero_column.csv",
"chars": 587796,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,399.39087033,1.93123775,2.991787,0.0\n2010-01-01 00:30:00,0.0,383.86771734,"
},
{
"path": "test/data/golden/hierarchical_duration_curves/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_duration_curves/testdata.csv",
"chars": 558095,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,413.93507766,6.57065572,3.99805531\n2010-01-01 00:30:00,0.0,397.84346837,6.47272"
},
{
"path": "test/data/golden/hierarchical_duration_curves/wide.csv",
"chars": 1355765,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.0,3894.627949"
},
{
"path": "test/data/golden/hierarchical_duration_curves/with_zero_column.csv",
"chars": 593140,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,413.93507766,6.57065572,3.99805531,0.0\n2010-01-01 00:30:00,0.0,397.8434683"
},
{
"path": "test/data/golden/hierarchical_eval_sum_periods/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_eval_sum_periods/testdata.csv",
"chars": 557020,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,420.46427904,2.79688611,4.10826565\n2010-01-01 00:30:00,0.0,408.64022617,2.79688"
},
{
"path": "test/data/golden/hierarchical_eval_sum_periods/wide.csv",
"chars": 1347250,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,1.58178458,3622"
},
{
"path": "test/data/golden/hierarchical_eval_sum_periods/with_zero_column.csv",
"chars": 592065,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,420.46427904,2.79688611,4.10826565,0.0\n2010-01-01 00:30:00,0.0,408.6402261"
},
{
"path": "test/data/golden/hierarchical_maxoid/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_maxoid/testdata.csv",
"chars": 550323,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,356.93016501,7.93672748,4.6008199\n2010-01-01 00:30:00,0.0,348.07359234,7.839946"
},
{
"path": "test/data/golden/hierarchical_maxoid/wide.csv",
"chars": 1319324,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.0,3397.676129"
},
{
"path": "test/data/golden/hierarchical_maxoid/with_zero_column.csv",
"chars": 585368,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,356.93016501,7.93672748,4.6008199,0.0\n2010-01-01 00:30:00,0.0,348.07359234"
},
{
"path": "test/data/golden/hierarchical_mean/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_mean/testdata.csv",
"chars": 554502,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,396.39708568,2.76923077,4.87884615\n2010-01-01 00:30:00,0.0,384.37710609,2.64038"
},
{
"path": "test/data/golden/hierarchical_mean/wide.csv",
"chars": 1426497,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,0.84541179,3806"
},
{
"path": "test/data/golden/hierarchical_mean/with_zero_column.csv",
"chars": 589547,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,396.39708568,2.76923077,4.87884615,0.0\n2010-01-01 00:30:00,0.0,384.3771060"
},
{
"path": "test/data/golden/hierarchical_no_rescale/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_no_rescale/testdata.csv",
"chars": 396835,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,386.0571701,4.6,4.6\n2010-01-01 00:30:00,0.0,373.7099053,4.2,4.5\n2010-01-01 01:3"
},
{
"path": "test/data/golden/hierarchical_no_rescale/wide.csv",
"chars": 1362698,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,1.42643509,3605"
},
{
"path": "test/data/golden/hierarchical_no_rescale/with_zero_column.csv",
"chars": 431880,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,386.0571701,4.6,4.6,0.0\n2010-01-01 00:30:00,0.0,373.7099053,4.2,4.5,0.0\n20"
},
{
"path": "test/data/golden/hierarchical_rescale_exclude/testdata.csv",
"chars": 526978,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.51392561,4.34027932,5.36582519\n2010-01-01 00:30:00,0.0,367.86279335,3.94791"
},
{
"path": "test/data/golden/hierarchical_round/testdata.csv",
"chars": 371716,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.51,4.34,5.37\n2010-01-01 00:30:00,0.0,367.86,3.95,5.25\n2010-01-01 01:30:00,0"
},
{
"path": "test/data/golden/hierarchical_segmentation/constant.csv",
"chars": 6965,
"preview": ",A,B\n2020-01-01 00:00:00,42.0,7.0\n2020-01-01 01:00:00,42.0,7.0\n2020-01-01 02:00:00,42.0,7.0\n2020-01-01 03:00:00,42.0,7.0"
},
{
"path": "test/data/golden/hierarchical_segmentation/testdata.csv",
"chars": 573814,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,374.08930059,3.71904082,5.19085263\n2010-01-01 00:30:00,0.0,374.08930059,3.71904"
},
{
"path": "test/data/golden/hierarchical_segmentation/wide.csv",
"chars": 1403002,
"preview": ",DE_GHI,DE_Load,DE_T,DE_Wind,FR_GHI,FR_Load,FR_T,FR_Wind,NL_GHI,NL_Load,NL_T,NL_Wind\n2009-12-31 23:30:00,1.0682792,3439."
},
{
"path": "test/data/golden/hierarchical_segmentation/with_zero_column.csv",
"chars": 608859,
"preview": ",GHI,Load,T,Wind,Zero\n2009-12-31 23:30:00,0.0,374.08930059,3.71904082,5.19085263,0.0\n2010-01-01 00:30:00,0.0,374.0893005"
},
{
"path": "test/data/golden/hierarchical_weighted/testdata.csv",
"chars": 552972,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,334.62935809,7.96905754,3.33330568\n2010-01-01 00:30:00,0.0,347.08397253,7.96905"
},
{
"path": "test/data/golden/hierarchical_weighted_duration_curves/testdata.csv",
"chars": 558110,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,360.75709013,2.64046044,5.87380084\n2010-01-01 00:30:00,0.0,347.62838736,2.04857"
},
{
"path": "test/data/golden/hierarchical_weighted_extremes/testdata.csv",
"chars": 553642,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,380.36127658,1.39835768,3.09004425\n2010-01-01 00:30:00,0.0,370.98913378,1.50555"
},
{
"path": "test/data/golden/hierarchical_weighted_no_rescale/testdata.csv",
"chars": 395170,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,379.2082967,0.7,3.0\n2010-01-01 00:30:00,0.0,369.9340677,0.8,2.0\n2010-01-01 01:3"
},
{
"path": "test/data/golden/hierarchical_weighted_rescale_exclude/testdata.csv",
"chars": 526246,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,380.37662596,1.40439079,3.09087272\n2010-01-01 00:30:00,0.0,371.00317965,1.51165"
},
{
"path": "test/data/golden/hierarchical_weighted_samemean/testdata.csv",
"chars": 556453,
"preview": ",GHI,Load,T,Wind\n2009-12-31 23:30:00,0.0,337.4943979,8.84437501,3.40619462\n2010-01-01 00:30:00,0.0,350.50112932,8.844375"
}
]
// ... and 67 more files (download for full content)
About this extraction
This page contains the full source code of the FZJ-IEK3-VSA/tsam GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 267 files (64.6 MB), approximately 17.0M tokens, and a symbol index with 433 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.